diff --git a/OpenCL/datamining/covariance/covariance.c b/OpenCL/datamining/covariance/covariance.c index b8812c2..80d3cab 100644 --- a/OpenCL/datamining/covariance/covariance.c +++ b/OpenCL/datamining/covariance/covariance.c @@ -201,6 +201,7 @@ void cl_launch_kernel(int m, int n) localWorkSize_Kernel1[1] = DIM_LOCAL_WORK_GROUP_KERNEL_1_Y; globalWorkSize_Kernel1[0] = (size_t)ceil(((float)M) / ((float)DIM_LOCAL_WORK_GROUP_KERNEL_1_X)) * DIM_LOCAL_WORK_GROUP_KERNEL_1_X; globalWorkSize_Kernel1[1] = 1; + printf("local: %zu, global: %zu", localWorkSize_Kernel1[0], globalWorkSize_Kernel1[0]); localWorkSize_Kernel2[0] = DIM_LOCAL_WORK_GROUP_KERNEL_2_X; localWorkSize_Kernel2[1] = DIM_LOCAL_WORK_GROUP_KERNEL_2_Y; diff --git a/create-ir-and-cfg.sh b/create-ir-and-cfg.sh index f4b8046..490c1c6 100755 --- a/create-ir-and-cfg.sh +++ b/create-ir-and-cfg.sh @@ -3,7 +3,7 @@ export POCL_KERNEL_CACHE=1 opt_cfg_command="opt -dot-cfg " dot_command="dot -Tpdf " -targer_dir="irs" +targer_dir="master-pocl-11-llvm" cwd=$(pwd) target_path=$cwd/$targer_dir diff --git a/if-removed-conditional-parallel/2DConvolution.pdf b/if-removed-conditional-parallel/2DConvolution.pdf new file mode 100644 index 0000000..6faac56 Binary files /dev/null and b/if-removed-conditional-parallel/2DConvolution.pdf differ diff --git a/if-removed-conditional-parallel/2mm_kernel1.pdf b/if-removed-conditional-parallel/2mm_kernel1.pdf new file mode 100644 index 0000000..34f20dd Binary files /dev/null and b/if-removed-conditional-parallel/2mm_kernel1.pdf differ diff --git a/if-removed-conditional-parallel/2mm_kernel2.pdf b/if-removed-conditional-parallel/2mm_kernel2.pdf new file mode 100644 index 0000000..5d9a097 Binary files /dev/null and b/if-removed-conditional-parallel/2mm_kernel2.pdf differ diff --git a/if-removed-conditional-parallel/3DConvolution.pdf b/if-removed-conditional-parallel/3DConvolution.pdf new file mode 100644 index 0000000..fa7ba83 Binary files /dev/null and b/if-removed-conditional-parallel/3DConvolution.pdf differ diff --git a/if-removed-conditional-parallel/3mm_kernel1.pdf b/if-removed-conditional-parallel/3mm_kernel1.pdf new file mode 100644 index 0000000..6d9db47 Binary files /dev/null and b/if-removed-conditional-parallel/3mm_kernel1.pdf differ diff --git a/if-removed-conditional-parallel/3mm_kernel2.pdf b/if-removed-conditional-parallel/3mm_kernel2.pdf new file mode 100644 index 0000000..78f29c7 Binary files /dev/null and b/if-removed-conditional-parallel/3mm_kernel2.pdf differ diff --git a/if-removed-conditional-parallel/3mm_kernel3.pdf b/if-removed-conditional-parallel/3mm_kernel3.pdf new file mode 100644 index 0000000..01d2470 Binary files /dev/null and b/if-removed-conditional-parallel/3mm_kernel3.pdf differ diff --git a/if-removed-conditional-parallel/adi_kernel1.pdf b/if-removed-conditional-parallel/adi_kernel1.pdf new file mode 100644 index 0000000..568334e Binary files /dev/null and b/if-removed-conditional-parallel/adi_kernel1.pdf differ diff --git a/if-removed-conditional-parallel/adi_kernel2.pdf b/if-removed-conditional-parallel/adi_kernel2.pdf new file mode 100644 index 0000000..f7f9988 Binary files /dev/null and b/if-removed-conditional-parallel/adi_kernel2.pdf differ diff --git a/if-removed-conditional-parallel/adi_kernel3.pdf b/if-removed-conditional-parallel/adi_kernel3.pdf new file mode 100644 index 0000000..08fa642 Binary files /dev/null and b/if-removed-conditional-parallel/adi_kernel3.pdf differ diff --git a/if-removed-conditional-parallel/adi_kernel4.pdf b/if-removed-conditional-parallel/adi_kernel4.pdf new file mode 100644 index 0000000..9e42620 Binary files /dev/null and b/if-removed-conditional-parallel/adi_kernel4.pdf differ diff --git a/if-removed-conditional-parallel/adi_kernel5.pdf b/if-removed-conditional-parallel/adi_kernel5.pdf new file mode 100644 index 0000000..2cc046e Binary files /dev/null and b/if-removed-conditional-parallel/adi_kernel5.pdf differ diff --git a/if-removed-conditional-parallel/adi_kernel6.pdf b/if-removed-conditional-parallel/adi_kernel6.pdf new file mode 100644 index 0000000..10d48ca Binary files /dev/null and b/if-removed-conditional-parallel/adi_kernel6.pdf differ diff --git a/if-removed-conditional-parallel/atax_kernel1.pdf b/if-removed-conditional-parallel/atax_kernel1.pdf new file mode 100644 index 0000000..8c8a8a6 Binary files /dev/null and b/if-removed-conditional-parallel/atax_kernel1.pdf differ diff --git a/if-removed-conditional-parallel/atax_kernel2.pdf b/if-removed-conditional-parallel/atax_kernel2.pdf new file mode 100644 index 0000000..01601a0 Binary files /dev/null and b/if-removed-conditional-parallel/atax_kernel2.pdf differ diff --git a/if-removed-conditional-parallel/bicg_kernel1.pdf b/if-removed-conditional-parallel/bicg_kernel1.pdf new file mode 100644 index 0000000..41df5df Binary files /dev/null and b/if-removed-conditional-parallel/bicg_kernel1.pdf differ diff --git a/if-removed-conditional-parallel/bicg_kernel2.pdf b/if-removed-conditional-parallel/bicg_kernel2.pdf new file mode 100644 index 0000000..7f9d722 Binary files /dev/null and b/if-removed-conditional-parallel/bicg_kernel2.pdf differ diff --git a/if-removed-conditional-parallel/correlation_corr.pdf b/if-removed-conditional-parallel/correlation_corr.pdf new file mode 100644 index 0000000..831c904 Binary files /dev/null and b/if-removed-conditional-parallel/correlation_corr.pdf differ diff --git a/if-removed-conditional-parallel/correlation_mean.pdf b/if-removed-conditional-parallel/correlation_mean.pdf new file mode 100644 index 0000000..27a49e7 Binary files /dev/null and b/if-removed-conditional-parallel/correlation_mean.pdf differ diff --git a/if-removed-conditional-parallel/correlation_reduce.pdf b/if-removed-conditional-parallel/correlation_reduce.pdf new file mode 100644 index 0000000..1205f57 Binary files /dev/null and b/if-removed-conditional-parallel/correlation_reduce.pdf differ diff --git a/if-removed-conditional-parallel/correlation_std.pdf b/if-removed-conditional-parallel/correlation_std.pdf new file mode 100644 index 0000000..9b6cc98 Binary files /dev/null and b/if-removed-conditional-parallel/correlation_std.pdf differ diff --git a/if-removed-conditional-parallel/covariance_covar.pdf b/if-removed-conditional-parallel/covariance_covar.pdf new file mode 100644 index 0000000..89bcce8 Binary files /dev/null and b/if-removed-conditional-parallel/covariance_covar.pdf differ diff --git a/if-removed-conditional-parallel/covariance_mean.pdf b/if-removed-conditional-parallel/covariance_mean.pdf new file mode 100644 index 0000000..53219eb Binary files /dev/null and b/if-removed-conditional-parallel/covariance_mean.pdf differ diff --git a/if-removed-conditional-parallel/covariance_reduce.pdf b/if-removed-conditional-parallel/covariance_reduce.pdf new file mode 100644 index 0000000..d8f45da Binary files /dev/null and b/if-removed-conditional-parallel/covariance_reduce.pdf differ diff --git a/if-removed-conditional-parallel/doitgen_kernel1.pdf b/if-removed-conditional-parallel/doitgen_kernel1.pdf new file mode 100644 index 0000000..eb8acc2 Binary files /dev/null and b/if-removed-conditional-parallel/doitgen_kernel1.pdf differ diff --git a/if-removed-conditional-parallel/doitgen_kernel2.pdf b/if-removed-conditional-parallel/doitgen_kernel2.pdf new file mode 100644 index 0000000..e9d0dcc Binary files /dev/null and b/if-removed-conditional-parallel/doitgen_kernel2.pdf differ diff --git a/if-removed-conditional-parallel/fdtd2d_kernel1.pdf b/if-removed-conditional-parallel/fdtd2d_kernel1.pdf new file mode 100644 index 0000000..6139fb8 Binary files /dev/null and b/if-removed-conditional-parallel/fdtd2d_kernel1.pdf differ diff --git a/if-removed-conditional-parallel/fdtd2d_kernel2.pdf b/if-removed-conditional-parallel/fdtd2d_kernel2.pdf new file mode 100644 index 0000000..0434caa Binary files /dev/null and b/if-removed-conditional-parallel/fdtd2d_kernel2.pdf differ diff --git a/if-removed-conditional-parallel/fdtd2d_kernel3.pdf b/if-removed-conditional-parallel/fdtd2d_kernel3.pdf new file mode 100644 index 0000000..511afb9 Binary files /dev/null and b/if-removed-conditional-parallel/fdtd2d_kernel3.pdf differ diff --git a/if-removed-conditional-parallel/gemm.pdf b/if-removed-conditional-parallel/gemm.pdf new file mode 100644 index 0000000..effe03b Binary files /dev/null and b/if-removed-conditional-parallel/gemm.pdf differ diff --git a/if-removed-conditional-parallel/gemver_kernel1.pdf b/if-removed-conditional-parallel/gemver_kernel1.pdf new file mode 100644 index 0000000..4380380 Binary files /dev/null and b/if-removed-conditional-parallel/gemver_kernel1.pdf differ diff --git a/if-removed-conditional-parallel/gemver_kernel2.pdf b/if-removed-conditional-parallel/gemver_kernel2.pdf new file mode 100644 index 0000000..34b3526 Binary files /dev/null and b/if-removed-conditional-parallel/gemver_kernel2.pdf differ diff --git a/if-removed-conditional-parallel/gemver_kernel3.pdf b/if-removed-conditional-parallel/gemver_kernel3.pdf new file mode 100644 index 0000000..0d1e44b Binary files /dev/null and b/if-removed-conditional-parallel/gemver_kernel3.pdf differ diff --git a/if-removed-conditional-parallel/gesummv.pdf b/if-removed-conditional-parallel/gesummv.pdf new file mode 100644 index 0000000..3e1e3e1 Binary files /dev/null and b/if-removed-conditional-parallel/gesummv.pdf differ diff --git a/if-removed-conditional-parallel/gramschmidt_kernel1.pdf b/if-removed-conditional-parallel/gramschmidt_kernel1.pdf new file mode 100644 index 0000000..6935a28 Binary files /dev/null and b/if-removed-conditional-parallel/gramschmidt_kernel1.pdf differ diff --git a/if-removed-conditional-parallel/gramschmidt_kernel2.pdf b/if-removed-conditional-parallel/gramschmidt_kernel2.pdf new file mode 100644 index 0000000..eb1276f Binary files /dev/null and b/if-removed-conditional-parallel/gramschmidt_kernel2.pdf differ diff --git a/if-removed-conditional-parallel/gramschmidt_kernel3.pdf b/if-removed-conditional-parallel/gramschmidt_kernel3.pdf new file mode 100644 index 0000000..08690d0 Binary files /dev/null and b/if-removed-conditional-parallel/gramschmidt_kernel3.pdf differ diff --git a/if-removed-conditional-parallel/jacobi1D_kernel1.pdf b/if-removed-conditional-parallel/jacobi1D_kernel1.pdf new file mode 100644 index 0000000..e3185f6 Binary files /dev/null and b/if-removed-conditional-parallel/jacobi1D_kernel1.pdf differ diff --git a/if-removed-conditional-parallel/jacobi1D_kernel2.pdf b/if-removed-conditional-parallel/jacobi1D_kernel2.pdf new file mode 100644 index 0000000..c3f1954 Binary files /dev/null and b/if-removed-conditional-parallel/jacobi1D_kernel2.pdf differ diff --git a/if-removed-conditional-parallel/jacobi2D_kernel1.pdf b/if-removed-conditional-parallel/jacobi2D_kernel1.pdf new file mode 100644 index 0000000..eb3aa03 Binary files /dev/null and b/if-removed-conditional-parallel/jacobi2D_kernel1.pdf differ diff --git a/if-removed-conditional-parallel/jacobi2D_kernel2.pdf b/if-removed-conditional-parallel/jacobi2D_kernel2.pdf new file mode 100644 index 0000000..2142275 Binary files /dev/null and b/if-removed-conditional-parallel/jacobi2D_kernel2.pdf differ diff --git a/if-removed-conditional-parallel/lu_kernel1.pdf b/if-removed-conditional-parallel/lu_kernel1.pdf new file mode 100644 index 0000000..0276f91 Binary files /dev/null and b/if-removed-conditional-parallel/lu_kernel1.pdf differ diff --git a/if-removed-conditional-parallel/lu_kernel2.pdf b/if-removed-conditional-parallel/lu_kernel2.pdf new file mode 100644 index 0000000..8a23c14 Binary files /dev/null and b/if-removed-conditional-parallel/lu_kernel2.pdf differ diff --git a/if-removed-conditional-parallel/mvt_kernel1.pdf b/if-removed-conditional-parallel/mvt_kernel1.pdf new file mode 100644 index 0000000..bb442f0 Binary files /dev/null and b/if-removed-conditional-parallel/mvt_kernel1.pdf differ diff --git a/if-removed-conditional-parallel/mvt_kernel2.pdf b/if-removed-conditional-parallel/mvt_kernel2.pdf new file mode 100644 index 0000000..7636f90 Binary files /dev/null and b/if-removed-conditional-parallel/mvt_kernel2.pdf differ diff --git a/if-removed-conditional-parallel/syr2k.pdf b/if-removed-conditional-parallel/syr2k.pdf new file mode 100644 index 0000000..1d84f85 Binary files /dev/null and b/if-removed-conditional-parallel/syr2k.pdf differ diff --git a/if-removed-conditional-parallel/syrk.pdf b/if-removed-conditional-parallel/syrk.pdf new file mode 100644 index 0000000..d56a11d Binary files /dev/null and b/if-removed-conditional-parallel/syrk.pdf differ diff --git a/if-removed-unconditional-parallel/2DConvolution.pdf b/if-removed-unconditional-parallel/2DConvolution.pdf new file mode 100644 index 0000000..0d9d5fd Binary files /dev/null and b/if-removed-unconditional-parallel/2DConvolution.pdf differ diff --git a/if-removed-unconditional-parallel/2mm_kernel1.pdf b/if-removed-unconditional-parallel/2mm_kernel1.pdf new file mode 100644 index 0000000..4e5b6cc Binary files /dev/null and b/if-removed-unconditional-parallel/2mm_kernel1.pdf differ diff --git a/if-removed-unconditional-parallel/2mm_kernel2.pdf b/if-removed-unconditional-parallel/2mm_kernel2.pdf new file mode 100644 index 0000000..2b29b2f Binary files /dev/null and b/if-removed-unconditional-parallel/2mm_kernel2.pdf differ diff --git a/if-removed-unconditional-parallel/3DConvolution.pdf b/if-removed-unconditional-parallel/3DConvolution.pdf new file mode 100644 index 0000000..d2012f7 Binary files /dev/null and b/if-removed-unconditional-parallel/3DConvolution.pdf differ diff --git a/if-removed-unconditional-parallel/3mm_kernel1.pdf b/if-removed-unconditional-parallel/3mm_kernel1.pdf new file mode 100644 index 0000000..12a6f0a Binary files /dev/null and b/if-removed-unconditional-parallel/3mm_kernel1.pdf differ diff --git a/if-removed-unconditional-parallel/3mm_kernel2.pdf b/if-removed-unconditional-parallel/3mm_kernel2.pdf new file mode 100644 index 0000000..c522e35 Binary files /dev/null and b/if-removed-unconditional-parallel/3mm_kernel2.pdf differ diff --git a/if-removed-unconditional-parallel/3mm_kernel3.pdf b/if-removed-unconditional-parallel/3mm_kernel3.pdf new file mode 100644 index 0000000..59093e8 Binary files /dev/null and b/if-removed-unconditional-parallel/3mm_kernel3.pdf differ diff --git a/if-removed-unconditional-parallel/adi_kernel1.pdf b/if-removed-unconditional-parallel/adi_kernel1.pdf new file mode 100644 index 0000000..b3ee5c6 Binary files /dev/null and b/if-removed-unconditional-parallel/adi_kernel1.pdf differ diff --git a/if-removed-unconditional-parallel/adi_kernel2.pdf b/if-removed-unconditional-parallel/adi_kernel2.pdf new file mode 100644 index 0000000..5fe08c8 Binary files /dev/null and b/if-removed-unconditional-parallel/adi_kernel2.pdf differ diff --git a/if-removed-unconditional-parallel/adi_kernel3.pdf b/if-removed-unconditional-parallel/adi_kernel3.pdf new file mode 100644 index 0000000..b39309a Binary files /dev/null and b/if-removed-unconditional-parallel/adi_kernel3.pdf differ diff --git a/if-removed-unconditional-parallel/adi_kernel4.pdf b/if-removed-unconditional-parallel/adi_kernel4.pdf new file mode 100644 index 0000000..9690f2b Binary files /dev/null and b/if-removed-unconditional-parallel/adi_kernel4.pdf differ diff --git a/if-removed-unconditional-parallel/adi_kernel5.pdf b/if-removed-unconditional-parallel/adi_kernel5.pdf new file mode 100644 index 0000000..276d51f Binary files /dev/null and b/if-removed-unconditional-parallel/adi_kernel5.pdf differ diff --git a/if-removed-unconditional-parallel/adi_kernel6.pdf b/if-removed-unconditional-parallel/adi_kernel6.pdf new file mode 100644 index 0000000..9e9bda6 Binary files /dev/null and b/if-removed-unconditional-parallel/adi_kernel6.pdf differ diff --git a/if-removed-unconditional-parallel/atax_kernel1.pdf b/if-removed-unconditional-parallel/atax_kernel1.pdf new file mode 100644 index 0000000..a044967 Binary files /dev/null and b/if-removed-unconditional-parallel/atax_kernel1.pdf differ diff --git a/if-removed-unconditional-parallel/atax_kernel2.pdf b/if-removed-unconditional-parallel/atax_kernel2.pdf new file mode 100644 index 0000000..c02ae93 Binary files /dev/null and b/if-removed-unconditional-parallel/atax_kernel2.pdf differ diff --git a/if-removed-unconditional-parallel/bicg_kernel1.pdf b/if-removed-unconditional-parallel/bicg_kernel1.pdf new file mode 100644 index 0000000..cc80de7 Binary files /dev/null and b/if-removed-unconditional-parallel/bicg_kernel1.pdf differ diff --git a/if-removed-unconditional-parallel/bicg_kernel2.pdf b/if-removed-unconditional-parallel/bicg_kernel2.pdf new file mode 100644 index 0000000..346ff8c Binary files /dev/null and b/if-removed-unconditional-parallel/bicg_kernel2.pdf differ diff --git a/if-removed-unconditional-parallel/correlation_corr.pdf b/if-removed-unconditional-parallel/correlation_corr.pdf new file mode 100644 index 0000000..5edf3e3 Binary files /dev/null and b/if-removed-unconditional-parallel/correlation_corr.pdf differ diff --git a/if-removed-unconditional-parallel/correlation_mean.pdf b/if-removed-unconditional-parallel/correlation_mean.pdf new file mode 100644 index 0000000..dd33798 Binary files /dev/null and b/if-removed-unconditional-parallel/correlation_mean.pdf differ diff --git a/if-removed-unconditional-parallel/correlation_reduce.pdf b/if-removed-unconditional-parallel/correlation_reduce.pdf new file mode 100644 index 0000000..ba8ddc1 Binary files /dev/null and b/if-removed-unconditional-parallel/correlation_reduce.pdf differ diff --git a/if-removed-unconditional-parallel/correlation_std.pdf b/if-removed-unconditional-parallel/correlation_std.pdf new file mode 100644 index 0000000..78860a5 Binary files /dev/null and b/if-removed-unconditional-parallel/correlation_std.pdf differ diff --git a/if-removed-unconditional-parallel/covariance_covar.pdf b/if-removed-unconditional-parallel/covariance_covar.pdf new file mode 100644 index 0000000..c31d2df Binary files /dev/null and b/if-removed-unconditional-parallel/covariance_covar.pdf differ diff --git a/if-removed-unconditional-parallel/covariance_mean.pdf b/if-removed-unconditional-parallel/covariance_mean.pdf new file mode 100644 index 0000000..98a5439 Binary files /dev/null and b/if-removed-unconditional-parallel/covariance_mean.pdf differ diff --git a/if-removed-unconditional-parallel/covariance_reduce.pdf b/if-removed-unconditional-parallel/covariance_reduce.pdf new file mode 100644 index 0000000..24a6316 Binary files /dev/null and b/if-removed-unconditional-parallel/covariance_reduce.pdf differ diff --git a/if-removed-unconditional-parallel/doitgen_kernel1.pdf b/if-removed-unconditional-parallel/doitgen_kernel1.pdf new file mode 100644 index 0000000..7308bb3 Binary files /dev/null and b/if-removed-unconditional-parallel/doitgen_kernel1.pdf differ diff --git a/if-removed-unconditional-parallel/doitgen_kernel2.pdf b/if-removed-unconditional-parallel/doitgen_kernel2.pdf new file mode 100644 index 0000000..17c0ca5 Binary files /dev/null and b/if-removed-unconditional-parallel/doitgen_kernel2.pdf differ diff --git a/if-removed-unconditional-parallel/fdtd2d_kernel1.pdf b/if-removed-unconditional-parallel/fdtd2d_kernel1.pdf new file mode 100644 index 0000000..f3db20c Binary files /dev/null and b/if-removed-unconditional-parallel/fdtd2d_kernel1.pdf differ diff --git a/if-removed-unconditional-parallel/fdtd2d_kernel2.pdf b/if-removed-unconditional-parallel/fdtd2d_kernel2.pdf new file mode 100644 index 0000000..c34da45 Binary files /dev/null and b/if-removed-unconditional-parallel/fdtd2d_kernel2.pdf differ diff --git a/if-removed-unconditional-parallel/fdtd2d_kernel3.pdf b/if-removed-unconditional-parallel/fdtd2d_kernel3.pdf new file mode 100644 index 0000000..7bdc3b1 Binary files /dev/null and b/if-removed-unconditional-parallel/fdtd2d_kernel3.pdf differ diff --git a/if-removed-unconditional-parallel/gemm.pdf b/if-removed-unconditional-parallel/gemm.pdf new file mode 100644 index 0000000..df7c3b0 Binary files /dev/null and b/if-removed-unconditional-parallel/gemm.pdf differ diff --git a/if-removed-unconditional-parallel/gemver_kernel1.pdf b/if-removed-unconditional-parallel/gemver_kernel1.pdf new file mode 100644 index 0000000..18d4f08 Binary files /dev/null and b/if-removed-unconditional-parallel/gemver_kernel1.pdf differ diff --git a/if-removed-unconditional-parallel/gemver_kernel2.pdf b/if-removed-unconditional-parallel/gemver_kernel2.pdf new file mode 100644 index 0000000..b3c4a82 Binary files /dev/null and b/if-removed-unconditional-parallel/gemver_kernel2.pdf differ diff --git a/if-removed-unconditional-parallel/gemver_kernel3.pdf b/if-removed-unconditional-parallel/gemver_kernel3.pdf new file mode 100644 index 0000000..96e9096 Binary files /dev/null and b/if-removed-unconditional-parallel/gemver_kernel3.pdf differ diff --git a/if-removed-unconditional-parallel/gesummv.pdf b/if-removed-unconditional-parallel/gesummv.pdf new file mode 100644 index 0000000..c44ac97 Binary files /dev/null and b/if-removed-unconditional-parallel/gesummv.pdf differ diff --git a/unroll-cfgs/gramschmidt_kernel1.pdf b/if-removed-unconditional-parallel/gramschmidt_kernel1.pdf similarity index 65% rename from unroll-cfgs/gramschmidt_kernel1.pdf rename to if-removed-unconditional-parallel/gramschmidt_kernel1.pdf index 395fa40..afb5086 100644 Binary files a/unroll-cfgs/gramschmidt_kernel1.pdf and b/if-removed-unconditional-parallel/gramschmidt_kernel1.pdf differ diff --git a/if-removed-unconditional-parallel/gramschmidt_kernel2.pdf b/if-removed-unconditional-parallel/gramschmidt_kernel2.pdf new file mode 100644 index 0000000..65b44a5 Binary files /dev/null and b/if-removed-unconditional-parallel/gramschmidt_kernel2.pdf differ diff --git a/unroll-cfgs/gramschmidt_kernel3.pdf b/if-removed-unconditional-parallel/gramschmidt_kernel3.pdf similarity index 55% rename from unroll-cfgs/gramschmidt_kernel3.pdf rename to if-removed-unconditional-parallel/gramschmidt_kernel3.pdf index 30b2d74..f78a8bb 100644 Binary files a/unroll-cfgs/gramschmidt_kernel3.pdf and b/if-removed-unconditional-parallel/gramschmidt_kernel3.pdf differ diff --git a/unroll-cfgs/jacobi1D_kernel1.pdf b/if-removed-unconditional-parallel/jacobi1D_kernel1.pdf similarity index 59% rename from unroll-cfgs/jacobi1D_kernel1.pdf rename to if-removed-unconditional-parallel/jacobi1D_kernel1.pdf index 50f60c1..eb4568f 100644 Binary files a/unroll-cfgs/jacobi1D_kernel1.pdf and b/if-removed-unconditional-parallel/jacobi1D_kernel1.pdf differ diff --git a/unroll-cfgs/jacobi1D_kernel2.pdf b/if-removed-unconditional-parallel/jacobi1D_kernel2.pdf similarity index 77% rename from unroll-cfgs/jacobi1D_kernel2.pdf rename to if-removed-unconditional-parallel/jacobi1D_kernel2.pdf index 0e85e55..948da47 100644 Binary files a/unroll-cfgs/jacobi1D_kernel2.pdf and b/if-removed-unconditional-parallel/jacobi1D_kernel2.pdf differ diff --git a/if-removed-unconditional-parallel/jacobi2D_kernel1.pdf b/if-removed-unconditional-parallel/jacobi2D_kernel1.pdf new file mode 100644 index 0000000..1786461 Binary files /dev/null and b/if-removed-unconditional-parallel/jacobi2D_kernel1.pdf differ diff --git a/if-removed-unconditional-parallel/jacobi2D_kernel2.pdf b/if-removed-unconditional-parallel/jacobi2D_kernel2.pdf new file mode 100644 index 0000000..48a9884 Binary files /dev/null and b/if-removed-unconditional-parallel/jacobi2D_kernel2.pdf differ diff --git a/unroll-cfgs/lu_kernel1.pdf b/if-removed-unconditional-parallel/lu_kernel1.pdf similarity index 64% rename from unroll-cfgs/lu_kernel1.pdf rename to if-removed-unconditional-parallel/lu_kernel1.pdf index 036e7ae..403a8d4 100644 Binary files a/unroll-cfgs/lu_kernel1.pdf and b/if-removed-unconditional-parallel/lu_kernel1.pdf differ diff --git a/if-removed-unconditional-parallel/lu_kernel2.pdf b/if-removed-unconditional-parallel/lu_kernel2.pdf new file mode 100644 index 0000000..20b6c28 Binary files /dev/null and b/if-removed-unconditional-parallel/lu_kernel2.pdf differ diff --git a/if-removed-unconditional-parallel/mvt_kernel1.pdf b/if-removed-unconditional-parallel/mvt_kernel1.pdf new file mode 100644 index 0000000..8c329d4 Binary files /dev/null and b/if-removed-unconditional-parallel/mvt_kernel1.pdf differ diff --git a/if-removed-unconditional-parallel/mvt_kernel2.pdf b/if-removed-unconditional-parallel/mvt_kernel2.pdf new file mode 100644 index 0000000..479774b Binary files /dev/null and b/if-removed-unconditional-parallel/mvt_kernel2.pdf differ diff --git a/if-removed-unconditional-parallel/syr2k.pdf b/if-removed-unconditional-parallel/syr2k.pdf new file mode 100644 index 0000000..92270f3 Binary files /dev/null and b/if-removed-unconditional-parallel/syr2k.pdf differ diff --git a/if-removed-unconditional-parallel/syrk.pdf b/if-removed-unconditional-parallel/syrk.pdf new file mode 100644 index 0000000..6351a2c Binary files /dev/null and b/if-removed-unconditional-parallel/syrk.pdf differ diff --git a/master-pocl-11-llvm/2DConvolution.pdf b/master-pocl-11-llvm/2DConvolution.pdf new file mode 100644 index 0000000..c4aece4 Binary files /dev/null and b/master-pocl-11-llvm/2DConvolution.pdf differ diff --git a/master-pocl-11-llvm/2mm_kernel1.pdf b/master-pocl-11-llvm/2mm_kernel1.pdf new file mode 100644 index 0000000..85f286b Binary files /dev/null and b/master-pocl-11-llvm/2mm_kernel1.pdf differ diff --git a/master-pocl-11-llvm/2mm_kernel2.pdf b/master-pocl-11-llvm/2mm_kernel2.pdf new file mode 100644 index 0000000..1b0c2d8 Binary files /dev/null and b/master-pocl-11-llvm/2mm_kernel2.pdf differ diff --git a/master-pocl-11-llvm/3DConvolution.pdf b/master-pocl-11-llvm/3DConvolution.pdf new file mode 100644 index 0000000..c0337af Binary files /dev/null and b/master-pocl-11-llvm/3DConvolution.pdf differ diff --git a/master-pocl-11-llvm/3mm_kernel1.pdf b/master-pocl-11-llvm/3mm_kernel1.pdf new file mode 100644 index 0000000..90ba00f Binary files /dev/null and b/master-pocl-11-llvm/3mm_kernel1.pdf differ diff --git a/master-pocl-11-llvm/3mm_kernel2.pdf b/master-pocl-11-llvm/3mm_kernel2.pdf new file mode 100644 index 0000000..db0b367 Binary files /dev/null and b/master-pocl-11-llvm/3mm_kernel2.pdf differ diff --git a/master-pocl-11-llvm/3mm_kernel3.pdf b/master-pocl-11-llvm/3mm_kernel3.pdf new file mode 100644 index 0000000..47db5bb Binary files /dev/null and b/master-pocl-11-llvm/3mm_kernel3.pdf differ diff --git a/master-pocl-11-llvm/adi_kernel1.pdf b/master-pocl-11-llvm/adi_kernel1.pdf new file mode 100644 index 0000000..db7ba22 Binary files /dev/null and b/master-pocl-11-llvm/adi_kernel1.pdf differ diff --git a/master-pocl-11-llvm/adi_kernel2.pdf b/master-pocl-11-llvm/adi_kernel2.pdf new file mode 100644 index 0000000..7e22d0b Binary files /dev/null and b/master-pocl-11-llvm/adi_kernel2.pdf differ diff --git a/master-pocl-11-llvm/adi_kernel3.pdf b/master-pocl-11-llvm/adi_kernel3.pdf new file mode 100644 index 0000000..68b05ef Binary files /dev/null and b/master-pocl-11-llvm/adi_kernel3.pdf differ diff --git a/master-pocl-11-llvm/adi_kernel4.pdf b/master-pocl-11-llvm/adi_kernel4.pdf new file mode 100644 index 0000000..36b529e Binary files /dev/null and b/master-pocl-11-llvm/adi_kernel4.pdf differ diff --git a/master-pocl-11-llvm/adi_kernel5.pdf b/master-pocl-11-llvm/adi_kernel5.pdf new file mode 100644 index 0000000..60c73b7 Binary files /dev/null and b/master-pocl-11-llvm/adi_kernel5.pdf differ diff --git a/master-pocl-11-llvm/adi_kernel6.pdf b/master-pocl-11-llvm/adi_kernel6.pdf new file mode 100644 index 0000000..84a6942 Binary files /dev/null and b/master-pocl-11-llvm/adi_kernel6.pdf differ diff --git a/master-pocl-11-llvm/atax_kernel1.pdf b/master-pocl-11-llvm/atax_kernel1.pdf new file mode 100644 index 0000000..376339b Binary files /dev/null and b/master-pocl-11-llvm/atax_kernel1.pdf differ diff --git a/master-pocl-11-llvm/atax_kernel2.pdf b/master-pocl-11-llvm/atax_kernel2.pdf new file mode 100644 index 0000000..8065da4 Binary files /dev/null and b/master-pocl-11-llvm/atax_kernel2.pdf differ diff --git a/master-pocl-11-llvm/bicg_kernel1.pdf b/master-pocl-11-llvm/bicg_kernel1.pdf new file mode 100644 index 0000000..ea89093 Binary files /dev/null and b/master-pocl-11-llvm/bicg_kernel1.pdf differ diff --git a/master-pocl-11-llvm/bicg_kernel2.pdf b/master-pocl-11-llvm/bicg_kernel2.pdf new file mode 100644 index 0000000..6834355 Binary files /dev/null and b/master-pocl-11-llvm/bicg_kernel2.pdf differ diff --git a/master-pocl-11-llvm/correlation_corr.pdf b/master-pocl-11-llvm/correlation_corr.pdf new file mode 100644 index 0000000..aa4885a Binary files /dev/null and b/master-pocl-11-llvm/correlation_corr.pdf differ diff --git a/master-pocl-11-llvm/correlation_mean.pdf b/master-pocl-11-llvm/correlation_mean.pdf new file mode 100644 index 0000000..9aa08d5 Binary files /dev/null and b/master-pocl-11-llvm/correlation_mean.pdf differ diff --git a/master-pocl-11-llvm/correlation_reduce.pdf b/master-pocl-11-llvm/correlation_reduce.pdf new file mode 100644 index 0000000..10c3277 Binary files /dev/null and b/master-pocl-11-llvm/correlation_reduce.pdf differ diff --git a/master-pocl-11-llvm/correlation_std.pdf b/master-pocl-11-llvm/correlation_std.pdf new file mode 100644 index 0000000..93c49b4 Binary files /dev/null and b/master-pocl-11-llvm/correlation_std.pdf differ diff --git a/master-pocl-11-llvm/covariance_covar.pdf b/master-pocl-11-llvm/covariance_covar.pdf new file mode 100644 index 0000000..1219a4d Binary files /dev/null and b/master-pocl-11-llvm/covariance_covar.pdf differ diff --git a/master-pocl-11-llvm/covariance_mean.pdf b/master-pocl-11-llvm/covariance_mean.pdf new file mode 100644 index 0000000..9767b3a Binary files /dev/null and b/master-pocl-11-llvm/covariance_mean.pdf differ diff --git a/master-pocl-11-llvm/covariance_reduce.pdf b/master-pocl-11-llvm/covariance_reduce.pdf new file mode 100644 index 0000000..e512b07 Binary files /dev/null and b/master-pocl-11-llvm/covariance_reduce.pdf differ diff --git a/master-pocl-11-llvm/doitgen_kernel1.pdf b/master-pocl-11-llvm/doitgen_kernel1.pdf new file mode 100644 index 0000000..c13bb78 Binary files /dev/null and b/master-pocl-11-llvm/doitgen_kernel1.pdf differ diff --git a/master-pocl-11-llvm/doitgen_kernel2.pdf b/master-pocl-11-llvm/doitgen_kernel2.pdf new file mode 100644 index 0000000..7585ee3 Binary files /dev/null and b/master-pocl-11-llvm/doitgen_kernel2.pdf differ diff --git a/master-pocl-11-llvm/fdtd2d_kernel1.pdf b/master-pocl-11-llvm/fdtd2d_kernel1.pdf new file mode 100644 index 0000000..5c0fed1 Binary files /dev/null and b/master-pocl-11-llvm/fdtd2d_kernel1.pdf differ diff --git a/master-pocl-11-llvm/fdtd2d_kernel2.pdf b/master-pocl-11-llvm/fdtd2d_kernel2.pdf new file mode 100644 index 0000000..482762e Binary files /dev/null and b/master-pocl-11-llvm/fdtd2d_kernel2.pdf differ diff --git a/master-pocl-11-llvm/fdtd2d_kernel3.pdf b/master-pocl-11-llvm/fdtd2d_kernel3.pdf new file mode 100644 index 0000000..32dae91 Binary files /dev/null and b/master-pocl-11-llvm/fdtd2d_kernel3.pdf differ diff --git a/master-pocl-11-llvm/gemm.pdf b/master-pocl-11-llvm/gemm.pdf new file mode 100644 index 0000000..e8cc3e8 Binary files /dev/null and b/master-pocl-11-llvm/gemm.pdf differ diff --git a/master-pocl-11-llvm/gemver_kernel1.pdf b/master-pocl-11-llvm/gemver_kernel1.pdf new file mode 100644 index 0000000..3ac2a00 Binary files /dev/null and b/master-pocl-11-llvm/gemver_kernel1.pdf differ diff --git a/master-pocl-11-llvm/gemver_kernel2.pdf b/master-pocl-11-llvm/gemver_kernel2.pdf new file mode 100644 index 0000000..edbb612 Binary files /dev/null and b/master-pocl-11-llvm/gemver_kernel2.pdf differ diff --git a/master-pocl-11-llvm/gemver_kernel3.pdf b/master-pocl-11-llvm/gemver_kernel3.pdf new file mode 100644 index 0000000..848ffe4 Binary files /dev/null and b/master-pocl-11-llvm/gemver_kernel3.pdf differ diff --git a/master-pocl-11-llvm/gesummv.pdf b/master-pocl-11-llvm/gesummv.pdf new file mode 100644 index 0000000..8e0cb9a Binary files /dev/null and b/master-pocl-11-llvm/gesummv.pdf differ diff --git a/master-pocl-11-llvm/gramschmidt_kernel1.pdf b/master-pocl-11-llvm/gramschmidt_kernel1.pdf new file mode 100644 index 0000000..065ec8b Binary files /dev/null and b/master-pocl-11-llvm/gramschmidt_kernel1.pdf differ diff --git a/master-pocl-11-llvm/gramschmidt_kernel2.pdf b/master-pocl-11-llvm/gramschmidt_kernel2.pdf new file mode 100644 index 0000000..0113112 Binary files /dev/null and b/master-pocl-11-llvm/gramschmidt_kernel2.pdf differ diff --git a/master-pocl-11-llvm/gramschmidt_kernel3.pdf b/master-pocl-11-llvm/gramschmidt_kernel3.pdf new file mode 100644 index 0000000..955e5a1 Binary files /dev/null and b/master-pocl-11-llvm/gramschmidt_kernel3.pdf differ diff --git a/master-pocl-11-llvm/jacobi1D_kernel1.pdf b/master-pocl-11-llvm/jacobi1D_kernel1.pdf new file mode 100644 index 0000000..d6ea2b0 Binary files /dev/null and b/master-pocl-11-llvm/jacobi1D_kernel1.pdf differ diff --git a/master-pocl-11-llvm/jacobi1D_kernel2.pdf b/master-pocl-11-llvm/jacobi1D_kernel2.pdf new file mode 100644 index 0000000..e578cfe Binary files /dev/null and b/master-pocl-11-llvm/jacobi1D_kernel2.pdf differ diff --git a/master-pocl-11-llvm/jacobi2D_kernel1.pdf b/master-pocl-11-llvm/jacobi2D_kernel1.pdf new file mode 100644 index 0000000..5e8e0e1 Binary files /dev/null and b/master-pocl-11-llvm/jacobi2D_kernel1.pdf differ diff --git a/master-pocl-11-llvm/jacobi2D_kernel2.pdf b/master-pocl-11-llvm/jacobi2D_kernel2.pdf new file mode 100644 index 0000000..717488b Binary files /dev/null and b/master-pocl-11-llvm/jacobi2D_kernel2.pdf differ diff --git a/master-pocl-11-llvm/lu_kernel1.pdf b/master-pocl-11-llvm/lu_kernel1.pdf new file mode 100644 index 0000000..3145d15 Binary files /dev/null and b/master-pocl-11-llvm/lu_kernel1.pdf differ diff --git a/master-pocl-11-llvm/lu_kernel2.pdf b/master-pocl-11-llvm/lu_kernel2.pdf new file mode 100644 index 0000000..18286df Binary files /dev/null and b/master-pocl-11-llvm/lu_kernel2.pdf differ diff --git a/master-pocl-11-llvm/mvt_kernel1.pdf b/master-pocl-11-llvm/mvt_kernel1.pdf new file mode 100644 index 0000000..9f2da0a Binary files /dev/null and b/master-pocl-11-llvm/mvt_kernel1.pdf differ diff --git a/master-pocl-11-llvm/mvt_kernel2.pdf b/master-pocl-11-llvm/mvt_kernel2.pdf new file mode 100644 index 0000000..3e99de1 Binary files /dev/null and b/master-pocl-11-llvm/mvt_kernel2.pdf differ diff --git a/master-pocl-11-llvm/syr2k.pdf b/master-pocl-11-llvm/syr2k.pdf new file mode 100644 index 0000000..f62414b Binary files /dev/null and b/master-pocl-11-llvm/syr2k.pdf differ diff --git a/master-pocl-11-llvm/syrk.pdf b/master-pocl-11-llvm/syrk.pdf new file mode 100644 index 0000000..0c9dd08 Binary files /dev/null and b/master-pocl-11-llvm/syrk.pdf differ diff --git a/master-pocl-master-llvm/2DConvolution.pdf b/master-pocl-master-llvm/2DConvolution.pdf new file mode 100644 index 0000000..bc67952 Binary files /dev/null and b/master-pocl-master-llvm/2DConvolution.pdf differ diff --git a/master-pocl-master-llvm/2mm_kernel1.pdf b/master-pocl-master-llvm/2mm_kernel1.pdf new file mode 100644 index 0000000..afd23b7 Binary files /dev/null and b/master-pocl-master-llvm/2mm_kernel1.pdf differ diff --git a/master-pocl-master-llvm/2mm_kernel2.pdf b/master-pocl-master-llvm/2mm_kernel2.pdf new file mode 100644 index 0000000..50aca8e Binary files /dev/null and b/master-pocl-master-llvm/2mm_kernel2.pdf differ diff --git a/master-pocl-master-llvm/3DConvolution.pdf b/master-pocl-master-llvm/3DConvolution.pdf new file mode 100644 index 0000000..94d2e9c Binary files /dev/null and b/master-pocl-master-llvm/3DConvolution.pdf differ diff --git a/master-pocl-master-llvm/3mm_kernel1.pdf b/master-pocl-master-llvm/3mm_kernel1.pdf new file mode 100644 index 0000000..c6d4a6b Binary files /dev/null and b/master-pocl-master-llvm/3mm_kernel1.pdf differ diff --git a/master-pocl-master-llvm/3mm_kernel2.pdf b/master-pocl-master-llvm/3mm_kernel2.pdf new file mode 100644 index 0000000..2dc2f21 Binary files /dev/null and b/master-pocl-master-llvm/3mm_kernel2.pdf differ diff --git a/master-pocl-master-llvm/3mm_kernel3.pdf b/master-pocl-master-llvm/3mm_kernel3.pdf new file mode 100644 index 0000000..51aa149 Binary files /dev/null and b/master-pocl-master-llvm/3mm_kernel3.pdf differ diff --git a/master-pocl-master-llvm/adi_kernel1.pdf b/master-pocl-master-llvm/adi_kernel1.pdf new file mode 100644 index 0000000..375fb23 Binary files /dev/null and b/master-pocl-master-llvm/adi_kernel1.pdf differ diff --git a/master-pocl-master-llvm/adi_kernel2.pdf b/master-pocl-master-llvm/adi_kernel2.pdf new file mode 100644 index 0000000..ca82bc3 Binary files /dev/null and b/master-pocl-master-llvm/adi_kernel2.pdf differ diff --git a/master-pocl-master-llvm/adi_kernel3.pdf b/master-pocl-master-llvm/adi_kernel3.pdf new file mode 100644 index 0000000..67529e7 Binary files /dev/null and b/master-pocl-master-llvm/adi_kernel3.pdf differ diff --git a/master-pocl-master-llvm/adi_kernel4.pdf b/master-pocl-master-llvm/adi_kernel4.pdf new file mode 100644 index 0000000..843248f Binary files /dev/null and b/master-pocl-master-llvm/adi_kernel4.pdf differ diff --git a/master-pocl-master-llvm/adi_kernel5.pdf b/master-pocl-master-llvm/adi_kernel5.pdf new file mode 100644 index 0000000..85b3c1f Binary files /dev/null and b/master-pocl-master-llvm/adi_kernel5.pdf differ diff --git a/master-pocl-master-llvm/adi_kernel6.pdf b/master-pocl-master-llvm/adi_kernel6.pdf new file mode 100644 index 0000000..e2262b3 Binary files /dev/null and b/master-pocl-master-llvm/adi_kernel6.pdf differ diff --git a/master-pocl-master-llvm/atax_kernel1.pdf b/master-pocl-master-llvm/atax_kernel1.pdf new file mode 100644 index 0000000..56bbb6e Binary files /dev/null and b/master-pocl-master-llvm/atax_kernel1.pdf differ diff --git a/master-pocl-master-llvm/atax_kernel2.pdf b/master-pocl-master-llvm/atax_kernel2.pdf new file mode 100644 index 0000000..a890b97 Binary files /dev/null and b/master-pocl-master-llvm/atax_kernel2.pdf differ diff --git a/master-pocl-master-llvm/bicg_kernel1.pdf b/master-pocl-master-llvm/bicg_kernel1.pdf new file mode 100644 index 0000000..3a2f5b2 Binary files /dev/null and b/master-pocl-master-llvm/bicg_kernel1.pdf differ diff --git a/master-pocl-master-llvm/bicg_kernel2.pdf b/master-pocl-master-llvm/bicg_kernel2.pdf new file mode 100644 index 0000000..18e778c Binary files /dev/null and b/master-pocl-master-llvm/bicg_kernel2.pdf differ diff --git a/master-pocl-master-llvm/correlation_corr.pdf b/master-pocl-master-llvm/correlation_corr.pdf new file mode 100644 index 0000000..e8aab27 Binary files /dev/null and b/master-pocl-master-llvm/correlation_corr.pdf differ diff --git a/master-pocl-master-llvm/correlation_mean.pdf b/master-pocl-master-llvm/correlation_mean.pdf new file mode 100644 index 0000000..16356e9 Binary files /dev/null and b/master-pocl-master-llvm/correlation_mean.pdf differ diff --git a/master-pocl-master-llvm/correlation_reduce.pdf b/master-pocl-master-llvm/correlation_reduce.pdf new file mode 100644 index 0000000..0b78eac Binary files /dev/null and b/master-pocl-master-llvm/correlation_reduce.pdf differ diff --git a/master-pocl-master-llvm/correlation_std.pdf b/master-pocl-master-llvm/correlation_std.pdf new file mode 100644 index 0000000..f55bd26 Binary files /dev/null and b/master-pocl-master-llvm/correlation_std.pdf differ diff --git a/master-pocl-master-llvm/covariance_covar.pdf b/master-pocl-master-llvm/covariance_covar.pdf new file mode 100644 index 0000000..59a6c38 Binary files /dev/null and b/master-pocl-master-llvm/covariance_covar.pdf differ diff --git a/master-pocl-master-llvm/covariance_mean.pdf b/master-pocl-master-llvm/covariance_mean.pdf new file mode 100644 index 0000000..d898fc9 Binary files /dev/null and b/master-pocl-master-llvm/covariance_mean.pdf differ diff --git a/master-pocl-master-llvm/covariance_reduce.pdf b/master-pocl-master-llvm/covariance_reduce.pdf new file mode 100644 index 0000000..b6a4488 Binary files /dev/null and b/master-pocl-master-llvm/covariance_reduce.pdf differ diff --git a/master-pocl-master-llvm/doitgen_kernel1.pdf b/master-pocl-master-llvm/doitgen_kernel1.pdf new file mode 100644 index 0000000..58a915d Binary files /dev/null and b/master-pocl-master-llvm/doitgen_kernel1.pdf differ diff --git a/master-pocl-master-llvm/doitgen_kernel2.pdf b/master-pocl-master-llvm/doitgen_kernel2.pdf new file mode 100644 index 0000000..0c1e142 Binary files /dev/null and b/master-pocl-master-llvm/doitgen_kernel2.pdf differ diff --git a/master-pocl-master-llvm/fdtd2d_kernel1.pdf b/master-pocl-master-llvm/fdtd2d_kernel1.pdf new file mode 100644 index 0000000..a9f96c6 Binary files /dev/null and b/master-pocl-master-llvm/fdtd2d_kernel1.pdf differ diff --git a/master-pocl-master-llvm/fdtd2d_kernel2.pdf b/master-pocl-master-llvm/fdtd2d_kernel2.pdf new file mode 100644 index 0000000..2b34203 Binary files /dev/null and b/master-pocl-master-llvm/fdtd2d_kernel2.pdf differ diff --git a/master-pocl-master-llvm/fdtd2d_kernel3.pdf b/master-pocl-master-llvm/fdtd2d_kernel3.pdf new file mode 100644 index 0000000..6679ec1 Binary files /dev/null and b/master-pocl-master-llvm/fdtd2d_kernel3.pdf differ diff --git a/master-pocl-master-llvm/gemm.pdf b/master-pocl-master-llvm/gemm.pdf new file mode 100644 index 0000000..007ee1e Binary files /dev/null and b/master-pocl-master-llvm/gemm.pdf differ diff --git a/master-pocl-master-llvm/gemver_kernel1.pdf b/master-pocl-master-llvm/gemver_kernel1.pdf new file mode 100644 index 0000000..5b761dc Binary files /dev/null and b/master-pocl-master-llvm/gemver_kernel1.pdf differ diff --git a/master-pocl-master-llvm/gemver_kernel2.pdf b/master-pocl-master-llvm/gemver_kernel2.pdf new file mode 100644 index 0000000..22afe28 Binary files /dev/null and b/master-pocl-master-llvm/gemver_kernel2.pdf differ diff --git a/master-pocl-master-llvm/gemver_kernel3.pdf b/master-pocl-master-llvm/gemver_kernel3.pdf new file mode 100644 index 0000000..190b3d5 Binary files /dev/null and b/master-pocl-master-llvm/gemver_kernel3.pdf differ diff --git a/master-pocl-master-llvm/gesummv.pdf b/master-pocl-master-llvm/gesummv.pdf new file mode 100644 index 0000000..62b1d8e Binary files /dev/null and b/master-pocl-master-llvm/gesummv.pdf differ diff --git a/master-pocl-master-llvm/gramschmidt_kernel1.pdf b/master-pocl-master-llvm/gramschmidt_kernel1.pdf new file mode 100644 index 0000000..32fec42 Binary files /dev/null and b/master-pocl-master-llvm/gramschmidt_kernel1.pdf differ diff --git a/master-pocl-master-llvm/gramschmidt_kernel2.pdf b/master-pocl-master-llvm/gramschmidt_kernel2.pdf new file mode 100644 index 0000000..b6fa2b3 Binary files /dev/null and b/master-pocl-master-llvm/gramschmidt_kernel2.pdf differ diff --git a/master-pocl-master-llvm/gramschmidt_kernel3.pdf b/master-pocl-master-llvm/gramschmidt_kernel3.pdf new file mode 100644 index 0000000..fa8a23f Binary files /dev/null and b/master-pocl-master-llvm/gramschmidt_kernel3.pdf differ diff --git a/master-pocl-master-llvm/jacobi1D_kernel1.pdf b/master-pocl-master-llvm/jacobi1D_kernel1.pdf new file mode 100644 index 0000000..545ee78 Binary files /dev/null and b/master-pocl-master-llvm/jacobi1D_kernel1.pdf differ diff --git a/master-pocl-master-llvm/jacobi1D_kernel2.pdf b/master-pocl-master-llvm/jacobi1D_kernel2.pdf new file mode 100644 index 0000000..e515f03 Binary files /dev/null and b/master-pocl-master-llvm/jacobi1D_kernel2.pdf differ diff --git a/master-pocl-master-llvm/jacobi2D_kernel1.pdf b/master-pocl-master-llvm/jacobi2D_kernel1.pdf new file mode 100644 index 0000000..83597a1 Binary files /dev/null and b/master-pocl-master-llvm/jacobi2D_kernel1.pdf differ diff --git a/master-pocl-master-llvm/jacobi2D_kernel2.pdf b/master-pocl-master-llvm/jacobi2D_kernel2.pdf new file mode 100644 index 0000000..11bdb53 Binary files /dev/null and b/master-pocl-master-llvm/jacobi2D_kernel2.pdf differ diff --git a/master-pocl-master-llvm/lu_kernel1.pdf b/master-pocl-master-llvm/lu_kernel1.pdf new file mode 100644 index 0000000..1096086 Binary files /dev/null and b/master-pocl-master-llvm/lu_kernel1.pdf differ diff --git a/master-pocl-master-llvm/lu_kernel2.pdf b/master-pocl-master-llvm/lu_kernel2.pdf new file mode 100644 index 0000000..5af44c9 Binary files /dev/null and b/master-pocl-master-llvm/lu_kernel2.pdf differ diff --git a/master-pocl-master-llvm/mvt_kernel1.pdf b/master-pocl-master-llvm/mvt_kernel1.pdf new file mode 100644 index 0000000..e5c86f7 Binary files /dev/null and b/master-pocl-master-llvm/mvt_kernel1.pdf differ diff --git a/master-pocl-master-llvm/mvt_kernel2.pdf b/master-pocl-master-llvm/mvt_kernel2.pdf new file mode 100644 index 0000000..0aaa10b Binary files /dev/null and b/master-pocl-master-llvm/mvt_kernel2.pdf differ diff --git a/master-pocl-master-llvm/syr2k.pdf b/master-pocl-master-llvm/syr2k.pdf new file mode 100644 index 0000000..995b47c Binary files /dev/null and b/master-pocl-master-llvm/syr2k.pdf differ diff --git a/master-pocl-master-llvm/syrk.pdf b/master-pocl-master-llvm/syrk.pdf new file mode 100644 index 0000000..32a6c75 Binary files /dev/null and b/master-pocl-master-llvm/syrk.pdf differ diff --git a/no-unroll-cfgs/2DConvolution.pdf b/no-unroll-cfgs/2DConvolution.pdf deleted file mode 100644 index 474378e..0000000 Binary files a/no-unroll-cfgs/2DConvolution.pdf and /dev/null differ diff --git a/no-unroll-cfgs/2mm_kernel1.pdf b/no-unroll-cfgs/2mm_kernel1.pdf deleted file mode 100644 index ff9c150..0000000 Binary files a/no-unroll-cfgs/2mm_kernel1.pdf and /dev/null differ diff --git a/no-unroll-cfgs/2mm_kernel2.pdf b/no-unroll-cfgs/2mm_kernel2.pdf deleted file mode 100644 index 43a787d..0000000 Binary files a/no-unroll-cfgs/2mm_kernel2.pdf and /dev/null differ diff --git a/no-unroll-cfgs/3DConvolution.pdf b/no-unroll-cfgs/3DConvolution.pdf deleted file mode 100644 index 0d29dd6..0000000 Binary files a/no-unroll-cfgs/3DConvolution.pdf and /dev/null differ diff --git a/no-unroll-cfgs/3mm_kernel1.pdf b/no-unroll-cfgs/3mm_kernel1.pdf deleted file mode 100644 index 706fce7..0000000 Binary files a/no-unroll-cfgs/3mm_kernel1.pdf and /dev/null differ diff --git a/no-unroll-cfgs/3mm_kernel2.pdf b/no-unroll-cfgs/3mm_kernel2.pdf deleted file mode 100644 index b34329a..0000000 Binary files a/no-unroll-cfgs/3mm_kernel2.pdf and /dev/null differ diff --git a/no-unroll-cfgs/3mm_kernel3.pdf b/no-unroll-cfgs/3mm_kernel3.pdf deleted file mode 100644 index 6aec29f..0000000 Binary files a/no-unroll-cfgs/3mm_kernel3.pdf and /dev/null differ diff --git a/no-unroll-cfgs/adi_kernel1.pdf b/no-unroll-cfgs/adi_kernel1.pdf deleted file mode 100644 index f4418da..0000000 Binary files a/no-unroll-cfgs/adi_kernel1.pdf and /dev/null differ diff --git a/no-unroll-cfgs/adi_kernel2.pdf b/no-unroll-cfgs/adi_kernel2.pdf deleted file mode 100644 index f1f9d8d..0000000 Binary files a/no-unroll-cfgs/adi_kernel2.pdf and /dev/null differ diff --git a/no-unroll-cfgs/adi_kernel3.pdf b/no-unroll-cfgs/adi_kernel3.pdf deleted file mode 100644 index 02ccf92..0000000 Binary files a/no-unroll-cfgs/adi_kernel3.pdf and /dev/null differ diff --git a/no-unroll-cfgs/adi_kernel4.pdf b/no-unroll-cfgs/adi_kernel4.pdf deleted file mode 100644 index dc264ae..0000000 Binary files a/no-unroll-cfgs/adi_kernel4.pdf and /dev/null differ diff --git a/no-unroll-cfgs/adi_kernel5.pdf b/no-unroll-cfgs/adi_kernel5.pdf deleted file mode 100644 index 5c8c014..0000000 Binary files a/no-unroll-cfgs/adi_kernel5.pdf and /dev/null differ diff --git a/no-unroll-cfgs/adi_kernel6.pdf b/no-unroll-cfgs/adi_kernel6.pdf deleted file mode 100644 index 49405ce..0000000 Binary files a/no-unroll-cfgs/adi_kernel6.pdf and /dev/null differ diff --git a/no-unroll-cfgs/atax_kernel1.pdf b/no-unroll-cfgs/atax_kernel1.pdf deleted file mode 100644 index 7d274ae..0000000 Binary files a/no-unroll-cfgs/atax_kernel1.pdf and /dev/null differ diff --git a/no-unroll-cfgs/atax_kernel2.pdf b/no-unroll-cfgs/atax_kernel2.pdf deleted file mode 100644 index 75ba1b9..0000000 Binary files a/no-unroll-cfgs/atax_kernel2.pdf and /dev/null differ diff --git a/no-unroll-cfgs/correlation_corr.pdf b/no-unroll-cfgs/correlation_corr.pdf deleted file mode 100644 index ed7e862..0000000 Binary files a/no-unroll-cfgs/correlation_corr.pdf and /dev/null differ diff --git a/no-unroll-cfgs/correlation_mean.pdf b/no-unroll-cfgs/correlation_mean.pdf deleted file mode 100644 index 7d93f55..0000000 Binary files a/no-unroll-cfgs/correlation_mean.pdf and /dev/null differ diff --git a/no-unroll-cfgs/correlation_reduce.pdf b/no-unroll-cfgs/correlation_reduce.pdf deleted file mode 100644 index ce1ef3f..0000000 Binary files a/no-unroll-cfgs/correlation_reduce.pdf and /dev/null differ diff --git a/no-unroll-cfgs/correlation_std.pdf b/no-unroll-cfgs/correlation_std.pdf deleted file mode 100644 index 1848a64..0000000 Binary files a/no-unroll-cfgs/correlation_std.pdf and /dev/null differ diff --git a/no-unroll-cfgs/covariance_covar.pdf b/no-unroll-cfgs/covariance_covar.pdf deleted file mode 100644 index ceec4c7..0000000 Binary files a/no-unroll-cfgs/covariance_covar.pdf and /dev/null differ diff --git a/no-unroll-cfgs/covariance_mean.pdf b/no-unroll-cfgs/covariance_mean.pdf deleted file mode 100644 index e970217..0000000 Binary files a/no-unroll-cfgs/covariance_mean.pdf and /dev/null differ diff --git a/no-unroll-cfgs/covariance_reduce.pdf b/no-unroll-cfgs/covariance_reduce.pdf deleted file mode 100644 index f90cacc..0000000 Binary files a/no-unroll-cfgs/covariance_reduce.pdf and /dev/null differ diff --git a/no-unroll-cfgs/doitgen_kernel1.pdf b/no-unroll-cfgs/doitgen_kernel1.pdf deleted file mode 100644 index d8bdbfa..0000000 Binary files a/no-unroll-cfgs/doitgen_kernel1.pdf and /dev/null differ diff --git a/no-unroll-cfgs/doitgen_kernel2.pdf b/no-unroll-cfgs/doitgen_kernel2.pdf deleted file mode 100644 index 26c991d..0000000 Binary files a/no-unroll-cfgs/doitgen_kernel2.pdf and /dev/null differ diff --git a/no-unroll-cfgs/fdtd2d_kernel1.pdf b/no-unroll-cfgs/fdtd2d_kernel1.pdf deleted file mode 100644 index 3a03145..0000000 Binary files a/no-unroll-cfgs/fdtd2d_kernel1.pdf and /dev/null differ diff --git a/no-unroll-cfgs/fdtd2d_kernel2.pdf b/no-unroll-cfgs/fdtd2d_kernel2.pdf deleted file mode 100644 index 9457a95..0000000 Binary files a/no-unroll-cfgs/fdtd2d_kernel2.pdf and /dev/null differ diff --git a/no-unroll-cfgs/fdtd2d_kernel3.pdf b/no-unroll-cfgs/fdtd2d_kernel3.pdf deleted file mode 100644 index a6feebc..0000000 Binary files a/no-unroll-cfgs/fdtd2d_kernel3.pdf and /dev/null differ diff --git a/no-unroll-cfgs/gemm.pdf b/no-unroll-cfgs/gemm.pdf deleted file mode 100644 index 0f7de68..0000000 Binary files a/no-unroll-cfgs/gemm.pdf and /dev/null differ diff --git a/no-unroll-cfgs/gemver_kernel1.pdf b/no-unroll-cfgs/gemver_kernel1.pdf deleted file mode 100644 index 8c99b5f..0000000 Binary files a/no-unroll-cfgs/gemver_kernel1.pdf and /dev/null differ diff --git a/no-unroll-cfgs/gemver_kernel2.pdf b/no-unroll-cfgs/gemver_kernel2.pdf deleted file mode 100644 index b67e4f2..0000000 Binary files a/no-unroll-cfgs/gemver_kernel2.pdf and /dev/null differ diff --git a/no-unroll-cfgs/gemver_kernel3.pdf b/no-unroll-cfgs/gemver_kernel3.pdf deleted file mode 100644 index 73f76d7..0000000 Binary files a/no-unroll-cfgs/gemver_kernel3.pdf and /dev/null differ diff --git a/no-unroll-cfgs/gesummv.pdf b/no-unroll-cfgs/gesummv.pdf deleted file mode 100644 index a3ef490..0000000 Binary files a/no-unroll-cfgs/gesummv.pdf and /dev/null differ diff --git a/no-unroll-cfgs/gramschmidt_kernel1.pdf b/no-unroll-cfgs/gramschmidt_kernel1.pdf deleted file mode 100644 index b183dd4..0000000 Binary files a/no-unroll-cfgs/gramschmidt_kernel1.pdf and /dev/null differ diff --git a/no-unroll-cfgs/gramschmidt_kernel2.pdf b/no-unroll-cfgs/gramschmidt_kernel2.pdf deleted file mode 100644 index b2ef927..0000000 Binary files a/no-unroll-cfgs/gramschmidt_kernel2.pdf and /dev/null differ diff --git a/no-unroll-cfgs/gramschmidt_kernel3.pdf b/no-unroll-cfgs/gramschmidt_kernel3.pdf deleted file mode 100644 index ea90e52..0000000 Binary files a/no-unroll-cfgs/gramschmidt_kernel3.pdf and /dev/null differ diff --git a/no-unroll-cfgs/jacobi1D_kernel1.pdf b/no-unroll-cfgs/jacobi1D_kernel1.pdf deleted file mode 100644 index 7366c2f..0000000 Binary files a/no-unroll-cfgs/jacobi1D_kernel1.pdf and /dev/null differ diff --git a/no-unroll-cfgs/jacobi1D_kernel2.pdf b/no-unroll-cfgs/jacobi1D_kernel2.pdf deleted file mode 100644 index bf9d611..0000000 Binary files a/no-unroll-cfgs/jacobi1D_kernel2.pdf and /dev/null differ diff --git a/no-unroll-cfgs/jacobi2D_kernel1.pdf b/no-unroll-cfgs/jacobi2D_kernel1.pdf deleted file mode 100644 index f717adf..0000000 Binary files a/no-unroll-cfgs/jacobi2D_kernel1.pdf and /dev/null differ diff --git a/no-unroll-cfgs/jacobi2D_kernel2.pdf b/no-unroll-cfgs/jacobi2D_kernel2.pdf deleted file mode 100644 index f2bb547..0000000 Binary files a/no-unroll-cfgs/jacobi2D_kernel2.pdf and /dev/null differ diff --git a/no-unroll-cfgs/lu_kernel1.pdf b/no-unroll-cfgs/lu_kernel1.pdf deleted file mode 100644 index 27708a4..0000000 Binary files a/no-unroll-cfgs/lu_kernel1.pdf and /dev/null differ diff --git a/no-unroll-cfgs/lu_kernel2.pdf b/no-unroll-cfgs/lu_kernel2.pdf deleted file mode 100644 index d265422..0000000 Binary files a/no-unroll-cfgs/lu_kernel2.pdf and /dev/null differ diff --git a/no-unroll-cfgs/mvt_kernel1.pdf b/no-unroll-cfgs/mvt_kernel1.pdf deleted file mode 100644 index 600a1c3..0000000 Binary files a/no-unroll-cfgs/mvt_kernel1.pdf and /dev/null differ diff --git a/no-unroll-cfgs/mvt_kernel2.pdf b/no-unroll-cfgs/mvt_kernel2.pdf deleted file mode 100644 index 38e396a..0000000 Binary files a/no-unroll-cfgs/mvt_kernel2.pdf and /dev/null differ diff --git a/no-unroll-cfgs/syr2k.pdf b/no-unroll-cfgs/syr2k.pdf deleted file mode 100644 index 0390458..0000000 Binary files a/no-unroll-cfgs/syr2k.pdf and /dev/null differ diff --git a/no-unroll-cfgs/syr2k_kernel1.pdf b/no-unroll-cfgs/syr2k_kernel1.pdf deleted file mode 100644 index 54a0d54..0000000 Binary files a/no-unroll-cfgs/syr2k_kernel1.pdf and /dev/null differ diff --git a/no-unroll-cfgs/syr2k_kernel2.pdf b/no-unroll-cfgs/syr2k_kernel2.pdf deleted file mode 100644 index 307f362..0000000 Binary files a/no-unroll-cfgs/syr2k_kernel2.pdf and /dev/null differ diff --git a/no-unroll-cfgs/syrk.pdf b/no-unroll-cfgs/syrk.pdf deleted file mode 100644 index f762598..0000000 Binary files a/no-unroll-cfgs/syrk.pdf and /dev/null differ diff --git a/pocl_irs/2DConvolution.ll b/pocl_irs/2DConvolution.ll deleted file mode 100644 index 4175ea9..0000000 --- a/pocl_irs/2DConvolution.ll +++ /dev/null @@ -1,378 +0,0 @@ -; ModuleID = './EO/HGGEHAFHLIKHOAJGBNGCHLPIOLBMKCAFLHJGK/Convolution2D_kernel/32-8-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_Convolution2D_kernel(float* nocapture readonly %0, float* nocapture %1, i32 %2, i32 %3, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %4, i64 %5, i64 %6, i64 %7) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %5, 5 - %mul3.i.i = shl i64 %6, 3 - %sub.i = add nsw i32 %2, -1 - %sub4.i = add nsw i32 %3, -1 - br label %pregion_for_entry.pregion_for_init.i - -pregion_for_entry.pregion_for_init.i: ; preds = %pregion_for_end.i, %8 - %_local_id_y.0 = phi i64 [ 0, %8 ], [ %27, %pregion_for_end.i ] - %add6.i.i = add nuw nsw i64 %_local_id_y.0, %mul3.i.i - %conv2.i = trunc i64 %add6.i.i to i32 - %cmp.i = icmp sgt i32 %sub.i, %conv2.i - %cmp8.i = icmp sgt i32 %conv2.i, 0 - %sub13.i = add nsw i32 %conv2.i, -1 - %mul.i = mul nsw i32 %sub13.i, %3 - %mul31.i = mul nsw i32 %conv2.i, %3 - %add51.i = add nuw nsw i32 %conv2.i, 1 - %mul52.i = mul nsw i32 %add51.i, %3 - br i1 %cmp.i, label %pregion_for_entry.entry.i.us.preheader, label %pregion_for_end.i - -pregion_for_entry.entry.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i - br label %pregion_for_entry.entry.i.us - -pregion_for_entry.entry.i.us: ; preds = %if.end.i.us, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ %26, %if.end.i.us ], [ 0, %pregion_for_entry.entry.i.us.preheader ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp5.i.us = icmp sgt i32 %sub4.i, %conv.i.us - %or.cond.i.us = and i1 %cmp8.i, %cmp5.i.us - %cmp11.i.us = icmp sgt i32 %conv.i.us, 0 - %or.cond76.i.us = and i1 %cmp11.i.us, %or.cond.i.us - br i1 %or.cond76.i.us, label %if.then.i.us, label %if.end.i.us - -if.then.i.us: ; preds = %pregion_for_entry.entry.i.us - %sub14.i.us = add nsw i32 %conv.i.us, -1 - %add.i.us = add nsw i32 %sub14.i.us, %mul.i - %idxprom.i.us = sext i32 %add.i.us to i64 - %arrayidx.i.us = getelementptr inbounds float, float* %0, i64 %idxprom.i.us - %9 = load float, float* %arrayidx.i.us, align 4, !tbaa !12 - %add19.i.us = add nsw i32 %mul.i, %conv.i.us - %idxprom20.i.us = sext i32 %add19.i.us to i64 - %arrayidx21.i.us = getelementptr inbounds float, float* %0, i64 %idxprom20.i.us - %10 = load float, float* %arrayidx21.i.us, align 4, !tbaa !12 - %mul22.i.us = fmul float %10, 5.000000e-01 - %11 = tail call float @llvm.fmuladd.f32(float %9, float 0x3FC99999A0000000, float %mul22.i.us) #3 - %add25.i.us = add nuw nsw i32 %conv.i.us, 1 - %add26.i.us = add nsw i32 %add25.i.us, %mul.i - %idxprom27.i.us = sext i32 %add26.i.us to i64 - %arrayidx28.i.us = getelementptr inbounds float, float* %0, i64 %idxprom27.i.us - %12 = load float, float* %arrayidx28.i.us, align 4, !tbaa !12 - %13 = tail call float @llvm.fmuladd.f32(float %12, float 0xBFE99999A0000000, float %11) #3 - %add33.i.us = add nsw i32 %sub14.i.us, %mul31.i - %idxprom34.i.us = sext i32 %add33.i.us to i64 - %arrayidx35.i.us = getelementptr inbounds float, float* %0, i64 %idxprom34.i.us - %14 = load float, float* %arrayidx35.i.us, align 4, !tbaa !12 - %15 = tail call float @llvm.fmuladd.f32(float %14, float 0xBFD3333340000000, float %13) #3 - %add40.i.us = add nsw i32 %mul31.i, %conv.i.us - %idxprom41.i.us = sext i32 %add40.i.us to i64 - %arrayidx42.i.us = getelementptr inbounds float, float* %0, i64 %idxprom41.i.us - %16 = load float, float* %arrayidx42.i.us, align 4, !tbaa !12 - %17 = tail call float @llvm.fmuladd.f32(float %16, float 0x3FE3333340000000, float %15) #3 - %add47.i.us = add nsw i32 %add25.i.us, %mul31.i - %idxprom48.i.us = sext i32 %add47.i.us to i64 - %arrayidx49.i.us = getelementptr inbounds float, float* %0, i64 %idxprom48.i.us - %18 = load float, float* %arrayidx49.i.us, align 4, !tbaa !12 - %19 = tail call float @llvm.fmuladd.f32(float %18, float 0xBFECCCCCC0000000, float %17) #3 - %add54.i.us = add nsw i32 %sub14.i.us, %mul52.i - %idxprom55.i.us = sext i32 %add54.i.us to i64 - %arrayidx56.i.us = getelementptr inbounds float, float* %0, i64 %idxprom55.i.us - %20 = load float, float* %arrayidx56.i.us, align 4, !tbaa !12 - %21 = tail call float @llvm.fmuladd.f32(float %20, float 0x3FD99999A0000000, float %19) #3 - %add61.i.us = add nsw i32 %mul52.i, %conv.i.us - %idxprom62.i.us = sext i32 %add61.i.us to i64 - %arrayidx63.i.us = getelementptr inbounds float, float* %0, i64 %idxprom62.i.us - %22 = load float, float* %arrayidx63.i.us, align 4, !tbaa !12 - %23 = tail call float @llvm.fmuladd.f32(float %22, float 0x3FE6666660000000, float %21) #3 - %add68.i.us = add nsw i32 %add25.i.us, %mul52.i - %idxprom69.i.us = sext i32 %add68.i.us to i64 - %arrayidx70.i.us = getelementptr inbounds float, float* %0, i64 %idxprom69.i.us - %24 = load float, float* %arrayidx70.i.us, align 4, !tbaa !12 - %25 = tail call float @llvm.fmuladd.f32(float %24, float 0x3FB99999A0000000, float %23) #3 - %arrayidx75.i.us = getelementptr inbounds float, float* %1, i64 %idxprom41.i.us - store float %25, float* %arrayidx75.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us - -if.end.i.us: ; preds = %if.then.i.us, %pregion_for_entry.entry.i.us - %26 = add nuw nsw i64 %_local_id_x.0.us, 1 - %exitcond.not = icmp eq i64 %26, 32 - br i1 %exitcond.not, label %pregion_for_end.i.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !19 - -pregion_for_end.i.loopexit: ; preds = %if.end.i.us - br label %pregion_for_end.i - -pregion_for_end.i: ; preds = %pregion_for_end.i.loopexit, %pregion_for_entry.pregion_for_init.i - %27 = add nuw nsw i64 %_local_id_y.0, 1 - %exitcond2.not = icmp eq i64 %27, 8 - br i1 %exitcond2.not, label %Convolution2D_kernel.exit, label %pregion_for_entry.pregion_for_init.i, !llvm.loop !21 - -Convolution2D_kernel.exit: ; preds = %pregion_for_end.i - ret void -} - -; Function Attrs: nofree nounwind -define void @_pocl_kernel_Convolution2D_kernel_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to i32** - %15 = load i32*, i32** %14, align 8 - %16 = load i32, i32* %15, align 4 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %sub.i.i = add nsw i32 %16, -1 - %sub4.i.i = add nsw i32 %20, -1 - br label %pregion_for_entry.pregion_for_init.i.i - -pregion_for_entry.pregion_for_init.i.i: ; preds = %pregion_for_end.i.i, %5 - %_local_id_y.i.0 = phi i64 [ 0, %5 ], [ %39, %pregion_for_end.i.i ] - %add6.i.i.i = add nuw nsw i64 %_local_id_y.i.0, %mul3.i.i.i - %conv2.i.i = trunc i64 %add6.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %sub.i.i, %conv2.i.i - %cmp8.i.i = icmp sgt i32 %conv2.i.i, 0 - %sub13.i.i = add nsw i32 %conv2.i.i, -1 - %mul.i.i = mul nsw i32 %sub13.i.i, %20 - %mul31.i.i = mul nsw i32 %20, %conv2.i.i - %add51.i.i = add nuw nsw i32 %conv2.i.i, 1 - %mul52.i.i = mul nsw i32 %add51.i.i, %20 - br i1 %cmp.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i - br label %pregion_for_entry.entry.i.i.us - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %38, %if.end.i.i.us ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp5.i.i.us = icmp sgt i32 %sub4.i.i, %conv.i.i.us - %or.cond.i.i.us = and i1 %cmp8.i.i, %cmp5.i.i.us - %cmp11.i.i.us = icmp sgt i32 %conv.i.i.us, 0 - %or.cond76.i.i.us = and i1 %cmp11.i.i.us, %or.cond.i.i.us - br i1 %or.cond76.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %sub14.i.i.us = add nsw i32 %conv.i.i.us, -1 - %add.i.i.us = add nsw i32 %sub14.i.i.us, %mul.i.i - %idxprom.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us - %21 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %add19.i.i.us = add nsw i32 %mul.i.i, %conv.i.i.us - %idxprom20.i.i.us = sext i32 %add19.i.i.us to i64 - %arrayidx21.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom20.i.i.us - %22 = load float, float* %arrayidx21.i.i.us, align 4, !tbaa !12 - %mul22.i.i.us = fmul float %22, 5.000000e-01 - %23 = tail call float @llvm.fmuladd.f32(float %21, float 0x3FC99999A0000000, float %mul22.i.i.us) #3 - %add25.i.i.us = add nuw nsw i32 %conv.i.i.us, 1 - %add26.i.i.us = add nsw i32 %add25.i.i.us, %mul.i.i - %idxprom27.i.i.us = sext i32 %add26.i.i.us to i64 - %arrayidx28.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom27.i.i.us - %24 = load float, float* %arrayidx28.i.i.us, align 4, !tbaa !12 - %25 = tail call float @llvm.fmuladd.f32(float %24, float 0xBFE99999A0000000, float %23) #3 - %add33.i.i.us = add nsw i32 %sub14.i.i.us, %mul31.i.i - %idxprom34.i.i.us = sext i32 %add33.i.i.us to i64 - %arrayidx35.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom34.i.i.us - %26 = load float, float* %arrayidx35.i.i.us, align 4, !tbaa !12 - %27 = tail call float @llvm.fmuladd.f32(float %26, float 0xBFD3333340000000, float %25) #3 - %add40.i.i.us = add nsw i32 %mul31.i.i, %conv.i.i.us - %idxprom41.i.i.us = sext i32 %add40.i.i.us to i64 - %arrayidx42.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom41.i.i.us - %28 = load float, float* %arrayidx42.i.i.us, align 4, !tbaa !12 - %29 = tail call float @llvm.fmuladd.f32(float %28, float 0x3FE3333340000000, float %27) #3 - %add47.i.i.us = add nsw i32 %add25.i.i.us, %mul31.i.i - %idxprom48.i.i.us = sext i32 %add47.i.i.us to i64 - %arrayidx49.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom48.i.i.us - %30 = load float, float* %arrayidx49.i.i.us, align 4, !tbaa !12 - %31 = tail call float @llvm.fmuladd.f32(float %30, float 0xBFECCCCCC0000000, float %29) #3 - %add54.i.i.us = add nsw i32 %sub14.i.i.us, %mul52.i.i - %idxprom55.i.i.us = sext i32 %add54.i.i.us to i64 - %arrayidx56.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom55.i.i.us - %32 = load float, float* %arrayidx56.i.i.us, align 4, !tbaa !12 - %33 = tail call float @llvm.fmuladd.f32(float %32, float 0x3FD99999A0000000, float %31) #3 - %add61.i.i.us = add nsw i32 %mul52.i.i, %conv.i.i.us - %idxprom62.i.i.us = sext i32 %add61.i.i.us to i64 - %arrayidx63.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom62.i.i.us - %34 = load float, float* %arrayidx63.i.i.us, align 4, !tbaa !12 - %35 = tail call float @llvm.fmuladd.f32(float %34, float 0x3FE6666660000000, float %33) #3 - %add68.i.i.us = add nsw i32 %add25.i.i.us, %mul52.i.i - %idxprom69.i.i.us = sext i32 %add68.i.i.us to i64 - %arrayidx70.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom69.i.i.us - %36 = load float, float* %arrayidx70.i.i.us, align 4, !tbaa !12 - %37 = tail call float @llvm.fmuladd.f32(float %36, float 0x3FB99999A0000000, float %35) #3 - %arrayidx75.i.i.us = getelementptr inbounds float, float* %12, i64 %idxprom41.i.i.us - store float %37, float* %arrayidx75.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us - %38 = add nuw nsw i64 %_local_id_x.i.0.us, 1 - %exitcond.not = icmp eq i64 %38, 32 - br i1 %exitcond.not, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !19 - -pregion_for_end.i.i.loopexit: ; preds = %if.end.i.i.us - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %pregion_for_entry.pregion_for_init.i.i - %39 = add nuw nsw i64 %_local_id_y.i.0, 1 - %exitcond2.not = icmp eq i64 %39, 8 - br i1 %exitcond2.not, label %_pocl_kernel_Convolution2D_kernel.exit, label %pregion_for_entry.pregion_for_init.i.i, !llvm.loop !21 - -_pocl_kernel_Convolution2D_kernel.exit: ; preds = %pregion_for_end.i.i - ret void -} - -; Function Attrs: nofree nounwind -define void @_pocl_kernel_Convolution2D_kernel_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to i32** - %13 = load i32*, i32** %12, align 8 - %14 = load i32, i32* %13, align 4 - %15 = getelementptr i8*, i8** %0, i64 3 - %16 = bitcast i8** %15 to i32** - %17 = load i32*, i32** %16, align 8 - %18 = load i32, i32* %17, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %sub.i.i = add nsw i32 %14, -1 - %sub4.i.i = add nsw i32 %18, -1 - br label %pregion_for_entry.pregion_for_init.i.i - -pregion_for_entry.pregion_for_init.i.i: ; preds = %pregion_for_end.i.i, %5 - %_local_id_y.i.0 = phi i64 [ 0, %5 ], [ %37, %pregion_for_end.i.i ] - %add6.i.i.i = add nuw nsw i64 %_local_id_y.i.0, %mul3.i.i.i - %conv2.i.i = trunc i64 %add6.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %sub.i.i, %conv2.i.i - %cmp8.i.i = icmp sgt i32 %conv2.i.i, 0 - %sub13.i.i = add nsw i32 %conv2.i.i, -1 - %mul.i.i = mul nsw i32 %sub13.i.i, %18 - %mul31.i.i = mul nsw i32 %18, %conv2.i.i - %add51.i.i = add nuw nsw i32 %conv2.i.i, 1 - %mul52.i.i = mul nsw i32 %add51.i.i, %18 - br i1 %cmp.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i - br label %pregion_for_entry.entry.i.i.us - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %36, %if.end.i.i.us ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp5.i.i.us = icmp sgt i32 %sub4.i.i, %conv.i.i.us - %or.cond.i.i.us = and i1 %cmp8.i.i, %cmp5.i.i.us - %cmp11.i.i.us = icmp sgt i32 %conv.i.i.us, 0 - %or.cond76.i.i.us = and i1 %cmp11.i.i.us, %or.cond.i.i.us - br i1 %or.cond76.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %sub14.i.i.us = add nsw i32 %conv.i.i.us, -1 - %add.i.i.us = add nsw i32 %sub14.i.i.us, %mul.i.i - %idxprom.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us - %19 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %add19.i.i.us = add nsw i32 %mul.i.i, %conv.i.i.us - %idxprom20.i.i.us = sext i32 %add19.i.i.us to i64 - %arrayidx21.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom20.i.i.us - %20 = load float, float* %arrayidx21.i.i.us, align 4, !tbaa !12 - %mul22.i.i.us = fmul float %20, 5.000000e-01 - %21 = tail call float @llvm.fmuladd.f32(float %19, float 0x3FC99999A0000000, float %mul22.i.i.us) #3 - %add25.i.i.us = add nuw nsw i32 %conv.i.i.us, 1 - %add26.i.i.us = add nsw i32 %add25.i.i.us, %mul.i.i - %idxprom27.i.i.us = sext i32 %add26.i.i.us to i64 - %arrayidx28.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom27.i.i.us - %22 = load float, float* %arrayidx28.i.i.us, align 4, !tbaa !12 - %23 = tail call float @llvm.fmuladd.f32(float %22, float 0xBFE99999A0000000, float %21) #3 - %add33.i.i.us = add nsw i32 %sub14.i.i.us, %mul31.i.i - %idxprom34.i.i.us = sext i32 %add33.i.i.us to i64 - %arrayidx35.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom34.i.i.us - %24 = load float, float* %arrayidx35.i.i.us, align 4, !tbaa !12 - %25 = tail call float @llvm.fmuladd.f32(float %24, float 0xBFD3333340000000, float %23) #3 - %add40.i.i.us = add nsw i32 %mul31.i.i, %conv.i.i.us - %idxprom41.i.i.us = sext i32 %add40.i.i.us to i64 - %arrayidx42.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom41.i.i.us - %26 = load float, float* %arrayidx42.i.i.us, align 4, !tbaa !12 - %27 = tail call float @llvm.fmuladd.f32(float %26, float 0x3FE3333340000000, float %25) #3 - %add47.i.i.us = add nsw i32 %add25.i.i.us, %mul31.i.i - %idxprom48.i.i.us = sext i32 %add47.i.i.us to i64 - %arrayidx49.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom48.i.i.us - %28 = load float, float* %arrayidx49.i.i.us, align 4, !tbaa !12 - %29 = tail call float @llvm.fmuladd.f32(float %28, float 0xBFECCCCCC0000000, float %27) #3 - %add54.i.i.us = add nsw i32 %sub14.i.i.us, %mul52.i.i - %idxprom55.i.i.us = sext i32 %add54.i.i.us to i64 - %arrayidx56.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom55.i.i.us - %30 = load float, float* %arrayidx56.i.i.us, align 4, !tbaa !12 - %31 = tail call float @llvm.fmuladd.f32(float %30, float 0x3FD99999A0000000, float %29) #3 - %add61.i.i.us = add nsw i32 %mul52.i.i, %conv.i.i.us - %idxprom62.i.i.us = sext i32 %add61.i.i.us to i64 - %arrayidx63.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom62.i.i.us - %32 = load float, float* %arrayidx63.i.i.us, align 4, !tbaa !12 - %33 = tail call float @llvm.fmuladd.f32(float %32, float 0x3FE6666660000000, float %31) #3 - %add68.i.i.us = add nsw i32 %add25.i.i.us, %mul52.i.i - %idxprom69.i.i.us = sext i32 %add68.i.i.us to i64 - %arrayidx70.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom69.i.i.us - %34 = load float, float* %arrayidx70.i.i.us, align 4, !tbaa !12 - %35 = tail call float @llvm.fmuladd.f32(float %34, float 0x3FB99999A0000000, float %33) #3 - %arrayidx75.i.i.us = getelementptr inbounds float, float* %10, i64 %idxprom41.i.i.us - store float %35, float* %arrayidx75.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us - %36 = add nuw nsw i64 %_local_id_x.i.0.us, 1 - %exitcond.not = icmp eq i64 %36, 32 - br i1 %exitcond.not, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !19 - -pregion_for_end.i.i.loopexit: ; preds = %if.end.i.i.us - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %pregion_for_entry.pregion_for_init.i.i - %37 = add nuw nsw i64 %_local_id_y.i.0, 1 - %exitcond2.not = icmp eq i64 %37, 8 - br i1 %exitcond2.not, label %_pocl_kernel_Convolution2D_kernel.exit, label %pregion_for_entry.pregion_for_init.i.i, !llvm.loop !21 - -_pocl_kernel_Convolution2D_kernel.exit: ; preds = %pregion_for_end.i.i - ret void -} - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nofree nounwind } -attributes #3 = { nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"int", !"int"} -!8 = !{!"float*", !"float*", !"int", !"int"} -!9 = !{!"", !"", !"", !""} -!10 = !{!"A", !"B", !"ni", !"nj"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17, !18} -!17 = distinct !{} -!18 = distinct !{} -!19 = distinct !{!19, !20} -!20 = !{!"llvm.loop.parallel_accesses", !17} -!21 = distinct !{!21, !22} -!22 = !{!"llvm.loop.parallel_accesses", !18} diff --git a/pocl_irs/2mm_kernel1.ll b/pocl_irs/2mm_kernel1.ll deleted file mode 100644 index b66f230..0000000 --- a/pocl_irs/2mm_kernel1.ll +++ /dev/null @@ -1,3639 +0,0 @@ -; ModuleID = './AE/OAMONJBAJKCKPFEIFGGGJLBIMMFAJGMJFHHDO/mm2_kernel1/32-8-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_mm2_kernel1(float* nocapture %0, float* nocapture readonly %1, float* nocapture readonly %2, i32 %3, i32 %4, i32 %5, i32 %6, float %7, float %8, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %9, i64 %10, i64 %11, i64 %12) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %10, 5 - %mul3.i.i = shl i64 %11, 3 - %cmp639.i = icmp sgt i32 %5, 0 - %14 = sext i32 %4 to i64 - %wide.trip.count.i = zext i32 %5 to i64 - br i1 %cmp639.i, label %pregion_for_entry.pregion_for_init.i.us.preheader, label %pregion_for_entry.pregion_for_init.i.preheader - -pregion_for_entry.pregion_for_init.i.preheader: ; preds = %13 - %conv.i.us = trunc i64 %mul.i.i to i32 - %cmp4.i.us = icmp slt i32 %conv.i.us, %4 - %15 = trunc i64 %mul.i.i to i32 - %conv.i.us.1 = or i32 %15, 1 - %cmp4.i.us.1 = icmp slt i32 %conv.i.us.1, %4 - %16 = trunc i64 %mul.i.i to i32 - %conv.i.us.2 = or i32 %16, 2 - %cmp4.i.us.2 = icmp slt i32 %conv.i.us.2, %4 - %17 = trunc i64 %mul.i.i to i32 - %conv.i.us.3 = or i32 %17, 3 - %cmp4.i.us.3 = icmp slt i32 %conv.i.us.3, %4 - %18 = trunc i64 %mul.i.i to i32 - %conv.i.us.4 = or i32 %18, 4 - %cmp4.i.us.4 = icmp slt i32 %conv.i.us.4, %4 - %19 = trunc i64 %mul.i.i to i32 - %conv.i.us.5 = or i32 %19, 5 - %cmp4.i.us.5 = icmp slt i32 %conv.i.us.5, %4 - %20 = trunc i64 %mul.i.i to i32 - %conv.i.us.6 = or i32 %20, 6 - %cmp4.i.us.6 = icmp slt i32 %conv.i.us.6, %4 - %21 = trunc i64 %mul.i.i to i32 - %conv.i.us.7 = or i32 %21, 7 - %cmp4.i.us.7 = icmp slt i32 %conv.i.us.7, %4 - %22 = trunc i64 %mul.i.i to i32 - %conv.i.us.8 = or i32 %22, 8 - %cmp4.i.us.8 = icmp slt i32 %conv.i.us.8, %4 - %23 = trunc i64 %mul.i.i to i32 - %conv.i.us.9 = or i32 %23, 9 - %cmp4.i.us.9 = icmp slt i32 %conv.i.us.9, %4 - %24 = trunc i64 %mul.i.i to i32 - %conv.i.us.10 = or i32 %24, 10 - %cmp4.i.us.10 = icmp slt i32 %conv.i.us.10, %4 - %25 = trunc i64 %mul.i.i to i32 - %conv.i.us.11 = or i32 %25, 11 - %cmp4.i.us.11 = icmp slt i32 %conv.i.us.11, %4 - %26 = trunc i64 %mul.i.i to i32 - %conv.i.us.12 = or i32 %26, 12 - %cmp4.i.us.12 = icmp slt i32 %conv.i.us.12, %4 - %27 = trunc i64 %mul.i.i to i32 - %conv.i.us.13 = or i32 %27, 13 - %cmp4.i.us.13 = icmp slt i32 %conv.i.us.13, %4 - %28 = trunc i64 %mul.i.i to i32 - %conv.i.us.14 = or i32 %28, 14 - %cmp4.i.us.14 = icmp slt i32 %conv.i.us.14, %4 - %29 = trunc i64 %mul.i.i to i32 - %conv.i.us.15 = or i32 %29, 15 - %cmp4.i.us.15 = icmp slt i32 %conv.i.us.15, %4 - %30 = trunc i64 %mul.i.i to i32 - %conv.i.us.16 = or i32 %30, 16 - %cmp4.i.us.16 = icmp slt i32 %conv.i.us.16, %4 - %31 = trunc i64 %mul.i.i to i32 - %conv.i.us.17 = or i32 %31, 17 - %cmp4.i.us.17 = icmp slt i32 %conv.i.us.17, %4 - %32 = trunc i64 %mul.i.i to i32 - %conv.i.us.18 = or i32 %32, 18 - %cmp4.i.us.18 = icmp slt i32 %conv.i.us.18, %4 - %33 = trunc i64 %mul.i.i to i32 - %conv.i.us.19 = or i32 %33, 19 - %cmp4.i.us.19 = icmp slt i32 %conv.i.us.19, %4 - %34 = trunc i64 %mul.i.i to i32 - %conv.i.us.20 = or i32 %34, 20 - %cmp4.i.us.20 = icmp slt i32 %conv.i.us.20, %4 - %35 = trunc i64 %mul.i.i to i32 - %conv.i.us.21 = or i32 %35, 21 - %cmp4.i.us.21 = icmp slt i32 %conv.i.us.21, %4 - %36 = trunc i64 %mul.i.i to i32 - %conv.i.us.22 = or i32 %36, 22 - %cmp4.i.us.22 = icmp slt i32 %conv.i.us.22, %4 - %37 = trunc i64 %mul.i.i to i32 - %conv.i.us.23 = or i32 %37, 23 - %cmp4.i.us.23 = icmp slt i32 %conv.i.us.23, %4 - %38 = trunc i64 %mul.i.i to i32 - %conv.i.us.24 = or i32 %38, 24 - %cmp4.i.us.24 = icmp slt i32 %conv.i.us.24, %4 - %39 = trunc i64 %mul.i.i to i32 - %conv.i.us.25 = or i32 %39, 25 - %cmp4.i.us.25 = icmp slt i32 %conv.i.us.25, %4 - %40 = trunc i64 %mul.i.i to i32 - %conv.i.us.26 = or i32 %40, 26 - %cmp4.i.us.26 = icmp slt i32 %conv.i.us.26, %4 - %41 = trunc i64 %mul.i.i to i32 - %conv.i.us.27 = or i32 %41, 27 - %cmp4.i.us.27 = icmp slt i32 %conv.i.us.27, %4 - %42 = trunc i64 %mul.i.i to i32 - %conv.i.us.28 = or i32 %42, 28 - %cmp4.i.us.28 = icmp slt i32 %conv.i.us.28, %4 - %43 = trunc i64 %mul.i.i to i32 - %conv.i.us.29 = or i32 %43, 29 - %cmp4.i.us.29 = icmp slt i32 %conv.i.us.29, %4 - %44 = trunc i64 %mul.i.i to i32 - %conv.i.us.30 = or i32 %44, 30 - %cmp4.i.us.30 = icmp slt i32 %conv.i.us.30, %4 - %45 = trunc i64 %mul.i.i to i32 - %conv.i.us.31 = or i32 %45, 31 - %cmp4.i.us.31 = icmp slt i32 %conv.i.us.31, %4 - br label %pregion_for_entry.pregion_for_init.i - -pregion_for_entry.pregion_for_init.i.us.preheader: ; preds = %13 - %conv2.i.us = trunc i64 %mul3.i.i to i32 - %cmp.i.us = icmp slt i32 %conv2.i.us, %3 - %mul.i.us = mul nsw i32 %conv2.i.us, %4 - %mul8.i.us = mul nsw i32 %conv2.i.us, %5 - %46 = sext i32 %mul8.i.us to i64 - br i1 %cmp.i.us, label %pregion_for_entry.entry.i.us.us.preheader, label %pregion_for_end.i.us - -pregion_for_entry.entry.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.us.preheader - br label %pregion_for_entry.entry.i.us.us - -pregion_for_end.i.us.loopexit: ; preds = %if.end.i.us.us.155 - br label %pregion_for_end.i.us - -pregion_for_end.i.us: ; preds = %pregion_for_end.i.us.loopexit, %pregion_for_entry.pregion_for_init.i.us.preheader - %47 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.1 = or i32 %47, 1 - %cmp.i.us.1 = icmp slt i32 %conv2.i.us.1, %3 - %mul.i.us.1 = mul nsw i32 %conv2.i.us.1, %4 - %mul8.i.us.1 = mul nsw i32 %conv2.i.us.1, %5 - %48 = sext i32 %mul8.i.us.1 to i64 - br i1 %cmp.i.us.1, label %pregion_for_entry.entry.i.us.us.1.preheader, label %pregion_for_end.i.us.1 - -pregion_for_entry.entry.i.us.us.1.preheader: ; preds = %pregion_for_end.i.us - br label %pregion_for_entry.entry.i.us.us.1 - -pregion_for_entry.entry.i.us.us: ; preds = %if.end.i.us.us.155, %pregion_for_entry.entry.i.us.us.preheader - %_local_id_x.0.us.us = phi i64 [ %205, %if.end.i.us.us.155 ], [ 0, %pregion_for_entry.entry.i.us.us.preheader ] - %add1.i.i.us.us = add nuw nsw i64 %_local_id_x.0.us.us, %mul.i.i - %conv.i.us.us = trunc i64 %add1.i.i.us.us to i32 - %cmp4.i.us.us = icmp slt i32 %conv.i.us.us, %4 - br i1 %cmp4.i.us.us, label %if.then.i.us.us, label %if.end.i.us.us - -if.then.i.us.us: ; preds = %pregion_for_entry.entry.i.us.us - %add.i.us.us = add nsw i32 %mul.i.us, %conv.i.us.us - %idxprom.i.us.us = sext i32 %add.i.us.us to i64 - %arrayidx.i.us.us = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us - store float 0.000000e+00, float* %arrayidx.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us = shl i64 %add1.i.i.us.us, 32 - %49 = ashr exact i64 %sext.i.us.us, 32 - br label %for.body.i.us.us - -if.end.i.us.us.loopexit: ; preds = %for.body.i.us.us - br label %if.end.i.us.us - -if.end.i.us.us: ; preds = %if.end.i.us.us.loopexit, %pregion_for_entry.entry.i.us.us - %50 = or i64 %_local_id_x.0.us.us, 1 - %add1.i.i.us.us.139 = add nuw nsw i64 %50, %mul.i.i - %conv.i.us.us.140 = trunc i64 %add1.i.i.us.us.139 to i32 - %cmp4.i.us.us.141 = icmp slt i32 %conv.i.us.us.140, %4 - br i1 %cmp4.i.us.us.141, label %if.then.i.us.us.147, label %if.end.i.us.us.155 - -for.body.i.us.us: ; preds = %for.body.i.us.us, %if.then.i.us.us - %indvars.iv.next.i3.us.us = phi i64 [ %indvars.iv.next.i.us.us, %for.body.i.us.us ], [ 0, %if.then.i.us.us ] - %51 = phi float [ %57, %for.body.i.us.us ], [ 0.000000e+00, %if.then.i.us.us ] - %52 = add nsw i64 %indvars.iv.next.i3.us.us, %46 - %arrayidx11.i.us.us = getelementptr inbounds float, float* %1, i64 %52 - %53 = load float, float* %arrayidx11.i.us.us, align 4, !tbaa !12 - %mul12.i.us.us = fmul float %53, %7 - %54 = mul nsw i64 %indvars.iv.next.i3.us.us, %14 - %55 = add nsw i64 %54, %49 - %arrayidx16.i.us.us = getelementptr inbounds float, float* %2, i64 %55 - %56 = load float, float* %arrayidx16.i.us.us, align 4, !tbaa !12 - %57 = tail call float @llvm.fmuladd.f32(float %mul12.i.us.us, float %56, float %51) #2 - store float %57, float* %arrayidx.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us = add nuw nsw i64 %indvars.iv.next.i3.us.us, 1 - %exitcond.not.i.us.us = icmp eq i64 %indvars.iv.next.i.us.us, %wide.trip.count.i - br i1 %exitcond.not.i.us.us, label %if.end.i.us.us.loopexit, label %for.body.i.us.us, !llvm.loop !19 - -pregion_for_entry.pregion_for_init.i: ; preds = %pregion_for_end.i, %pregion_for_entry.pregion_for_init.i.preheader - %_local_id_y.0 = phi i64 [ %58, %pregion_for_end.i ], [ 0, %pregion_for_entry.pregion_for_init.i.preheader ] - %add6.i.i = add nuw nsw i64 %_local_id_y.0, %mul3.i.i - %conv2.i = trunc i64 %add6.i.i to i32 - %cmp.i = icmp slt i32 %conv2.i, %3 - %mul.i = mul nsw i32 %conv2.i, %4 - br i1 %cmp.i, label %pregion_for_entry.entry.i.us.preheader, label %pregion_for_end.i - -pregion_for_entry.entry.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i - br i1 %cmp4.i.us, label %if.then.i.us, label %if.end.i.us - -if.then.i.us: ; preds = %pregion_for_entry.entry.i.us.preheader - %add.i.us = add nsw i32 %mul.i, %conv.i.us - %idxprom.i.us = sext i32 %add.i.us to i64 - %arrayidx.i.us = getelementptr inbounds float, float* %0, i64 %idxprom.i.us - store float 0.000000e+00, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us - -if.end.i.us: ; preds = %if.then.i.us, %pregion_for_entry.entry.i.us.preheader - br i1 %cmp4.i.us.1, label %if.then.i.us.1, label %if.end.i.us.1 - -pregion_for_end.i: ; preds = %if.then.i.us.31, %if.end.i.us.30, %pregion_for_entry.pregion_for_init.i - %58 = add nuw nsw i64 %_local_id_y.0, 1 - %exitcond34.not = icmp eq i64 %58, 8 - br i1 %exitcond34.not, label %mm2_kernel1.exit.loopexit56, label %pregion_for_entry.pregion_for_init.i, !llvm.loop !21 - -mm2_kernel1.exit.loopexit: ; preds = %if.end.i.us.us.7.1 - br label %mm2_kernel1.exit - -mm2_kernel1.exit.loopexit56: ; preds = %pregion_for_end.i - br label %mm2_kernel1.exit - -mm2_kernel1.exit: ; preds = %pregion_for_end.i.us.6, %mm2_kernel1.exit.loopexit56, %mm2_kernel1.exit.loopexit - ret void - -pregion_for_entry.entry.i.us.us.1: ; preds = %if.end.i.us.us.1.1, %pregion_for_entry.entry.i.us.us.1.preheader - %_local_id_x.0.us.us.1 = phi i64 [ %196, %if.end.i.us.us.1.1 ], [ 0, %pregion_for_entry.entry.i.us.us.1.preheader ] - %add1.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.0.us.us.1, %mul.i.i - %conv.i.us.us.1 = trunc i64 %add1.i.i.us.us.1 to i32 - %cmp4.i.us.us.1 = icmp slt i32 %conv.i.us.us.1, %4 - br i1 %cmp4.i.us.us.1, label %if.then.i.us.us.1, label %if.end.i.us.us.1 - -if.then.i.us.us.1: ; preds = %pregion_for_entry.entry.i.us.us.1 - %add.i.us.us.1 = add nsw i32 %mul.i.us.1, %conv.i.us.us.1 - %idxprom.i.us.us.1 = sext i32 %add.i.us.us.1 to i64 - %arrayidx.i.us.us.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.1 = shl i64 %add1.i.i.us.us.1, 32 - %59 = ashr exact i64 %sext.i.us.us.1, 32 - br label %for.body.i.us.us.1 - -for.body.i.us.us.1: ; preds = %for.body.i.us.us.1, %if.then.i.us.us.1 - %indvars.iv.next.i3.us.us.1 = phi i64 [ %indvars.iv.next.i.us.us.1, %for.body.i.us.us.1 ], [ 0, %if.then.i.us.us.1 ] - %60 = phi float [ %66, %for.body.i.us.us.1 ], [ 0.000000e+00, %if.then.i.us.us.1 ] - %61 = add nsw i64 %indvars.iv.next.i3.us.us.1, %48 - %arrayidx11.i.us.us.1 = getelementptr inbounds float, float* %1, i64 %61 - %62 = load float, float* %arrayidx11.i.us.us.1, align 4, !tbaa !12 - %mul12.i.us.us.1 = fmul float %62, %7 - %63 = mul nsw i64 %indvars.iv.next.i3.us.us.1, %14 - %64 = add nsw i64 %63, %59 - %arrayidx16.i.us.us.1 = getelementptr inbounds float, float* %2, i64 %64 - %65 = load float, float* %arrayidx16.i.us.us.1, align 4, !tbaa !12 - %66 = tail call float @llvm.fmuladd.f32(float %mul12.i.us.us.1, float %65, float %60) #2 - store float %66, float* %arrayidx.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.1, 1 - %exitcond.not.i.us.us.1 = icmp eq i64 %indvars.iv.next.i.us.us.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.1, label %if.end.i.us.us.1.loopexit, label %for.body.i.us.us.1, !llvm.loop !19 - -if.end.i.us.us.1.loopexit: ; preds = %for.body.i.us.us.1 - br label %if.end.i.us.us.1 - -if.end.i.us.us.1: ; preds = %if.end.i.us.us.1.loopexit, %pregion_for_entry.entry.i.us.us.1 - %67 = or i64 %_local_id_x.0.us.us.1, 1 - %add1.i.i.us.us.1.1 = add nuw nsw i64 %67, %mul.i.i - %conv.i.us.us.1.1 = trunc i64 %add1.i.i.us.us.1.1 to i32 - %cmp4.i.us.us.1.1 = icmp slt i32 %conv.i.us.us.1.1, %4 - br i1 %cmp4.i.us.us.1.1, label %if.then.i.us.us.1.1, label %if.end.i.us.us.1.1 - -pregion_for_end.i.us.1.loopexit: ; preds = %if.end.i.us.us.1.1 - br label %pregion_for_end.i.us.1 - -pregion_for_end.i.us.1: ; preds = %pregion_for_end.i.us.1.loopexit, %pregion_for_end.i.us - %68 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.2 = or i32 %68, 2 - %cmp.i.us.2 = icmp slt i32 %conv2.i.us.2, %3 - %mul.i.us.2 = mul nsw i32 %conv2.i.us.2, %4 - %mul8.i.us.2 = mul nsw i32 %conv2.i.us.2, %5 - %69 = sext i32 %mul8.i.us.2 to i64 - br i1 %cmp.i.us.2, label %pregion_for_entry.entry.i.us.us.2.preheader, label %pregion_for_end.i.us.2 - -pregion_for_entry.entry.i.us.us.2.preheader: ; preds = %pregion_for_end.i.us.1 - br label %pregion_for_entry.entry.i.us.us.2 - -pregion_for_entry.entry.i.us.us.2: ; preds = %if.end.i.us.us.2.1, %pregion_for_entry.entry.i.us.us.2.preheader - %_local_id_x.0.us.us.2 = phi i64 [ %187, %if.end.i.us.us.2.1 ], [ 0, %pregion_for_entry.entry.i.us.us.2.preheader ] - %add1.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.0.us.us.2, %mul.i.i - %conv.i.us.us.2 = trunc i64 %add1.i.i.us.us.2 to i32 - %cmp4.i.us.us.2 = icmp slt i32 %conv.i.us.us.2, %4 - br i1 %cmp4.i.us.us.2, label %if.then.i.us.us.2, label %if.end.i.us.us.2 - -if.then.i.us.us.2: ; preds = %pregion_for_entry.entry.i.us.us.2 - %add.i.us.us.2 = add nsw i32 %mul.i.us.2, %conv.i.us.us.2 - %idxprom.i.us.us.2 = sext i32 %add.i.us.us.2 to i64 - %arrayidx.i.us.us.2 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.2 - store float 0.000000e+00, float* %arrayidx.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.2 = shl i64 %add1.i.i.us.us.2, 32 - %70 = ashr exact i64 %sext.i.us.us.2, 32 - br label %for.body.i.us.us.2 - -for.body.i.us.us.2: ; preds = %for.body.i.us.us.2, %if.then.i.us.us.2 - %indvars.iv.next.i3.us.us.2 = phi i64 [ %indvars.iv.next.i.us.us.2, %for.body.i.us.us.2 ], [ 0, %if.then.i.us.us.2 ] - %71 = phi float [ %77, %for.body.i.us.us.2 ], [ 0.000000e+00, %if.then.i.us.us.2 ] - %72 = add nsw i64 %indvars.iv.next.i3.us.us.2, %69 - %arrayidx11.i.us.us.2 = getelementptr inbounds float, float* %1, i64 %72 - %73 = load float, float* %arrayidx11.i.us.us.2, align 4, !tbaa !12 - %mul12.i.us.us.2 = fmul float %73, %7 - %74 = mul nsw i64 %indvars.iv.next.i3.us.us.2, %14 - %75 = add nsw i64 %74, %70 - %arrayidx16.i.us.us.2 = getelementptr inbounds float, float* %2, i64 %75 - %76 = load float, float* %arrayidx16.i.us.us.2, align 4, !tbaa !12 - %77 = tail call float @llvm.fmuladd.f32(float %mul12.i.us.us.2, float %76, float %71) #2 - store float %77, float* %arrayidx.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.2 = add nuw nsw i64 %indvars.iv.next.i3.us.us.2, 1 - %exitcond.not.i.us.us.2 = icmp eq i64 %indvars.iv.next.i.us.us.2, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.2, label %if.end.i.us.us.2.loopexit, label %for.body.i.us.us.2, !llvm.loop !19 - -if.end.i.us.us.2.loopexit: ; preds = %for.body.i.us.us.2 - br label %if.end.i.us.us.2 - -if.end.i.us.us.2: ; preds = %if.end.i.us.us.2.loopexit, %pregion_for_entry.entry.i.us.us.2 - %78 = or i64 %_local_id_x.0.us.us.2, 1 - %add1.i.i.us.us.2.1 = add nuw nsw i64 %78, %mul.i.i - %conv.i.us.us.2.1 = trunc i64 %add1.i.i.us.us.2.1 to i32 - %cmp4.i.us.us.2.1 = icmp slt i32 %conv.i.us.us.2.1, %4 - br i1 %cmp4.i.us.us.2.1, label %if.then.i.us.us.2.1, label %if.end.i.us.us.2.1 - -pregion_for_end.i.us.2.loopexit: ; preds = %if.end.i.us.us.2.1 - br label %pregion_for_end.i.us.2 - -pregion_for_end.i.us.2: ; preds = %pregion_for_end.i.us.2.loopexit, %pregion_for_end.i.us.1 - %79 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.3 = or i32 %79, 3 - %cmp.i.us.3 = icmp slt i32 %conv2.i.us.3, %3 - %mul.i.us.3 = mul nsw i32 %conv2.i.us.3, %4 - %mul8.i.us.3 = mul nsw i32 %conv2.i.us.3, %5 - %80 = sext i32 %mul8.i.us.3 to i64 - br i1 %cmp.i.us.3, label %pregion_for_entry.entry.i.us.us.3.preheader, label %pregion_for_end.i.us.3 - -pregion_for_entry.entry.i.us.us.3.preheader: ; preds = %pregion_for_end.i.us.2 - br label %pregion_for_entry.entry.i.us.us.3 - -pregion_for_entry.entry.i.us.us.3: ; preds = %if.end.i.us.us.3.1, %pregion_for_entry.entry.i.us.us.3.preheader - %_local_id_x.0.us.us.3 = phi i64 [ %178, %if.end.i.us.us.3.1 ], [ 0, %pregion_for_entry.entry.i.us.us.3.preheader ] - %add1.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.0.us.us.3, %mul.i.i - %conv.i.us.us.3 = trunc i64 %add1.i.i.us.us.3 to i32 - %cmp4.i.us.us.3 = icmp slt i32 %conv.i.us.us.3, %4 - br i1 %cmp4.i.us.us.3, label %if.then.i.us.us.3, label %if.end.i.us.us.3 - -if.then.i.us.us.3: ; preds = %pregion_for_entry.entry.i.us.us.3 - %add.i.us.us.3 = add nsw i32 %mul.i.us.3, %conv.i.us.us.3 - %idxprom.i.us.us.3 = sext i32 %add.i.us.us.3 to i64 - %arrayidx.i.us.us.3 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.3 - store float 0.000000e+00, float* %arrayidx.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.3 = shl i64 %add1.i.i.us.us.3, 32 - %81 = ashr exact i64 %sext.i.us.us.3, 32 - br label %for.body.i.us.us.3 - -for.body.i.us.us.3: ; preds = %for.body.i.us.us.3, %if.then.i.us.us.3 - %indvars.iv.next.i3.us.us.3 = phi i64 [ %indvars.iv.next.i.us.us.3, %for.body.i.us.us.3 ], [ 0, %if.then.i.us.us.3 ] - %82 = phi float [ %88, %for.body.i.us.us.3 ], [ 0.000000e+00, %if.then.i.us.us.3 ] - %83 = add nsw i64 %indvars.iv.next.i3.us.us.3, %80 - %arrayidx11.i.us.us.3 = getelementptr inbounds float, float* %1, i64 %83 - %84 = load float, float* %arrayidx11.i.us.us.3, align 4, !tbaa !12 - %mul12.i.us.us.3 = fmul float %84, %7 - %85 = mul nsw i64 %indvars.iv.next.i3.us.us.3, %14 - %86 = add nsw i64 %85, %81 - %arrayidx16.i.us.us.3 = getelementptr inbounds float, float* %2, i64 %86 - %87 = load float, float* %arrayidx16.i.us.us.3, align 4, !tbaa !12 - %88 = tail call float @llvm.fmuladd.f32(float %mul12.i.us.us.3, float %87, float %82) #2 - store float %88, float* %arrayidx.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.3 = add nuw nsw i64 %indvars.iv.next.i3.us.us.3, 1 - %exitcond.not.i.us.us.3 = icmp eq i64 %indvars.iv.next.i.us.us.3, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.3, label %if.end.i.us.us.3.loopexit, label %for.body.i.us.us.3, !llvm.loop !19 - -if.end.i.us.us.3.loopexit: ; preds = %for.body.i.us.us.3 - br label %if.end.i.us.us.3 - -if.end.i.us.us.3: ; preds = %if.end.i.us.us.3.loopexit, %pregion_for_entry.entry.i.us.us.3 - %89 = or i64 %_local_id_x.0.us.us.3, 1 - %add1.i.i.us.us.3.1 = add nuw nsw i64 %89, %mul.i.i - %conv.i.us.us.3.1 = trunc i64 %add1.i.i.us.us.3.1 to i32 - %cmp4.i.us.us.3.1 = icmp slt i32 %conv.i.us.us.3.1, %4 - br i1 %cmp4.i.us.us.3.1, label %if.then.i.us.us.3.1, label %if.end.i.us.us.3.1 - -pregion_for_end.i.us.3.loopexit: ; preds = %if.end.i.us.us.3.1 - br label %pregion_for_end.i.us.3 - -pregion_for_end.i.us.3: ; preds = %pregion_for_end.i.us.3.loopexit, %pregion_for_end.i.us.2 - %90 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.4 = or i32 %90, 4 - %cmp.i.us.4 = icmp slt i32 %conv2.i.us.4, %3 - %mul.i.us.4 = mul nsw i32 %conv2.i.us.4, %4 - %mul8.i.us.4 = mul nsw i32 %conv2.i.us.4, %5 - %91 = sext i32 %mul8.i.us.4 to i64 - br i1 %cmp.i.us.4, label %pregion_for_entry.entry.i.us.us.4.preheader, label %pregion_for_end.i.us.4 - -pregion_for_entry.entry.i.us.us.4.preheader: ; preds = %pregion_for_end.i.us.3 - br label %pregion_for_entry.entry.i.us.us.4 - -pregion_for_entry.entry.i.us.us.4: ; preds = %if.end.i.us.us.4.1, %pregion_for_entry.entry.i.us.us.4.preheader - %_local_id_x.0.us.us.4 = phi i64 [ %169, %if.end.i.us.us.4.1 ], [ 0, %pregion_for_entry.entry.i.us.us.4.preheader ] - %add1.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.0.us.us.4, %mul.i.i - %conv.i.us.us.4 = trunc i64 %add1.i.i.us.us.4 to i32 - %cmp4.i.us.us.4 = icmp slt i32 %conv.i.us.us.4, %4 - br i1 %cmp4.i.us.us.4, label %if.then.i.us.us.4, label %if.end.i.us.us.4 - -if.then.i.us.us.4: ; preds = %pregion_for_entry.entry.i.us.us.4 - %add.i.us.us.4 = add nsw i32 %mul.i.us.4, %conv.i.us.us.4 - %idxprom.i.us.us.4 = sext i32 %add.i.us.us.4 to i64 - %arrayidx.i.us.us.4 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.4 - store float 0.000000e+00, float* %arrayidx.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.4 = shl i64 %add1.i.i.us.us.4, 32 - %92 = ashr exact i64 %sext.i.us.us.4, 32 - br label %for.body.i.us.us.4 - -for.body.i.us.us.4: ; preds = %for.body.i.us.us.4, %if.then.i.us.us.4 - %indvars.iv.next.i3.us.us.4 = phi i64 [ %indvars.iv.next.i.us.us.4, %for.body.i.us.us.4 ], [ 0, %if.then.i.us.us.4 ] - %93 = phi float [ %99, %for.body.i.us.us.4 ], [ 0.000000e+00, %if.then.i.us.us.4 ] - %94 = add nsw i64 %indvars.iv.next.i3.us.us.4, %91 - %arrayidx11.i.us.us.4 = getelementptr inbounds float, float* %1, i64 %94 - %95 = load float, float* %arrayidx11.i.us.us.4, align 4, !tbaa !12 - %mul12.i.us.us.4 = fmul float %95, %7 - %96 = mul nsw i64 %indvars.iv.next.i3.us.us.4, %14 - %97 = add nsw i64 %96, %92 - %arrayidx16.i.us.us.4 = getelementptr inbounds float, float* %2, i64 %97 - %98 = load float, float* %arrayidx16.i.us.us.4, align 4, !tbaa !12 - %99 = tail call float @llvm.fmuladd.f32(float %mul12.i.us.us.4, float %98, float %93) #2 - store float %99, float* %arrayidx.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.4 = add nuw nsw i64 %indvars.iv.next.i3.us.us.4, 1 - %exitcond.not.i.us.us.4 = icmp eq i64 %indvars.iv.next.i.us.us.4, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.4, label %if.end.i.us.us.4.loopexit, label %for.body.i.us.us.4, !llvm.loop !19 - -if.end.i.us.us.4.loopexit: ; preds = %for.body.i.us.us.4 - br label %if.end.i.us.us.4 - -if.end.i.us.us.4: ; preds = %if.end.i.us.us.4.loopexit, %pregion_for_entry.entry.i.us.us.4 - %100 = or i64 %_local_id_x.0.us.us.4, 1 - %add1.i.i.us.us.4.1 = add nuw nsw i64 %100, %mul.i.i - %conv.i.us.us.4.1 = trunc i64 %add1.i.i.us.us.4.1 to i32 - %cmp4.i.us.us.4.1 = icmp slt i32 %conv.i.us.us.4.1, %4 - br i1 %cmp4.i.us.us.4.1, label %if.then.i.us.us.4.1, label %if.end.i.us.us.4.1 - -pregion_for_end.i.us.4.loopexit: ; preds = %if.end.i.us.us.4.1 - br label %pregion_for_end.i.us.4 - -pregion_for_end.i.us.4: ; preds = %pregion_for_end.i.us.4.loopexit, %pregion_for_end.i.us.3 - %101 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.5 = or i32 %101, 5 - %cmp.i.us.5 = icmp slt i32 %conv2.i.us.5, %3 - %mul.i.us.5 = mul nsw i32 %conv2.i.us.5, %4 - %mul8.i.us.5 = mul nsw i32 %conv2.i.us.5, %5 - %102 = sext i32 %mul8.i.us.5 to i64 - br i1 %cmp.i.us.5, label %pregion_for_entry.entry.i.us.us.5.preheader, label %pregion_for_end.i.us.5 - -pregion_for_entry.entry.i.us.us.5.preheader: ; preds = %pregion_for_end.i.us.4 - br label %pregion_for_entry.entry.i.us.us.5 - -pregion_for_entry.entry.i.us.us.5: ; preds = %if.end.i.us.us.5.1, %pregion_for_entry.entry.i.us.us.5.preheader - %_local_id_x.0.us.us.5 = phi i64 [ %160, %if.end.i.us.us.5.1 ], [ 0, %pregion_for_entry.entry.i.us.us.5.preheader ] - %add1.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.0.us.us.5, %mul.i.i - %conv.i.us.us.5 = trunc i64 %add1.i.i.us.us.5 to i32 - %cmp4.i.us.us.5 = icmp slt i32 %conv.i.us.us.5, %4 - br i1 %cmp4.i.us.us.5, label %if.then.i.us.us.5, label %if.end.i.us.us.5 - -if.then.i.us.us.5: ; preds = %pregion_for_entry.entry.i.us.us.5 - %add.i.us.us.5 = add nsw i32 %mul.i.us.5, %conv.i.us.us.5 - %idxprom.i.us.us.5 = sext i32 %add.i.us.us.5 to i64 - %arrayidx.i.us.us.5 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.5 - store float 0.000000e+00, float* %arrayidx.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.5 = shl i64 %add1.i.i.us.us.5, 32 - %103 = ashr exact i64 %sext.i.us.us.5, 32 - br label %for.body.i.us.us.5 - -for.body.i.us.us.5: ; preds = %for.body.i.us.us.5, %if.then.i.us.us.5 - %indvars.iv.next.i3.us.us.5 = phi i64 [ %indvars.iv.next.i.us.us.5, %for.body.i.us.us.5 ], [ 0, %if.then.i.us.us.5 ] - %104 = phi float [ %110, %for.body.i.us.us.5 ], [ 0.000000e+00, %if.then.i.us.us.5 ] - %105 = add nsw i64 %indvars.iv.next.i3.us.us.5, %102 - %arrayidx11.i.us.us.5 = getelementptr inbounds float, float* %1, i64 %105 - %106 = load float, float* %arrayidx11.i.us.us.5, align 4, !tbaa !12 - %mul12.i.us.us.5 = fmul float %106, %7 - %107 = mul nsw i64 %indvars.iv.next.i3.us.us.5, %14 - %108 = add nsw i64 %107, %103 - %arrayidx16.i.us.us.5 = getelementptr inbounds float, float* %2, i64 %108 - %109 = load float, float* %arrayidx16.i.us.us.5, align 4, !tbaa !12 - %110 = tail call float @llvm.fmuladd.f32(float %mul12.i.us.us.5, float %109, float %104) #2 - store float %110, float* %arrayidx.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.5 = add nuw nsw i64 %indvars.iv.next.i3.us.us.5, 1 - %exitcond.not.i.us.us.5 = icmp eq i64 %indvars.iv.next.i.us.us.5, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.5, label %if.end.i.us.us.5.loopexit, label %for.body.i.us.us.5, !llvm.loop !19 - -if.end.i.us.us.5.loopexit: ; preds = %for.body.i.us.us.5 - br label %if.end.i.us.us.5 - -if.end.i.us.us.5: ; preds = %if.end.i.us.us.5.loopexit, %pregion_for_entry.entry.i.us.us.5 - %111 = or i64 %_local_id_x.0.us.us.5, 1 - %add1.i.i.us.us.5.1 = add nuw nsw i64 %111, %mul.i.i - %conv.i.us.us.5.1 = trunc i64 %add1.i.i.us.us.5.1 to i32 - %cmp4.i.us.us.5.1 = icmp slt i32 %conv.i.us.us.5.1, %4 - br i1 %cmp4.i.us.us.5.1, label %if.then.i.us.us.5.1, label %if.end.i.us.us.5.1 - -pregion_for_end.i.us.5.loopexit: ; preds = %if.end.i.us.us.5.1 - br label %pregion_for_end.i.us.5 - -pregion_for_end.i.us.5: ; preds = %pregion_for_end.i.us.5.loopexit, %pregion_for_end.i.us.4 - %112 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.6 = or i32 %112, 6 - %cmp.i.us.6 = icmp slt i32 %conv2.i.us.6, %3 - %mul.i.us.6 = mul nsw i32 %conv2.i.us.6, %4 - %mul8.i.us.6 = mul nsw i32 %conv2.i.us.6, %5 - %113 = sext i32 %mul8.i.us.6 to i64 - br i1 %cmp.i.us.6, label %pregion_for_entry.entry.i.us.us.6.preheader, label %pregion_for_end.i.us.6 - -pregion_for_entry.entry.i.us.us.6.preheader: ; preds = %pregion_for_end.i.us.5 - br label %pregion_for_entry.entry.i.us.us.6 - -pregion_for_entry.entry.i.us.us.6: ; preds = %if.end.i.us.us.6.1, %pregion_for_entry.entry.i.us.us.6.preheader - %_local_id_x.0.us.us.6 = phi i64 [ %151, %if.end.i.us.us.6.1 ], [ 0, %pregion_for_entry.entry.i.us.us.6.preheader ] - %add1.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.0.us.us.6, %mul.i.i - %conv.i.us.us.6 = trunc i64 %add1.i.i.us.us.6 to i32 - %cmp4.i.us.us.6 = icmp slt i32 %conv.i.us.us.6, %4 - br i1 %cmp4.i.us.us.6, label %if.then.i.us.us.6, label %if.end.i.us.us.6 - -if.then.i.us.us.6: ; preds = %pregion_for_entry.entry.i.us.us.6 - %add.i.us.us.6 = add nsw i32 %mul.i.us.6, %conv.i.us.us.6 - %idxprom.i.us.us.6 = sext i32 %add.i.us.us.6 to i64 - %arrayidx.i.us.us.6 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.6 - store float 0.000000e+00, float* %arrayidx.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.6 = shl i64 %add1.i.i.us.us.6, 32 - %114 = ashr exact i64 %sext.i.us.us.6, 32 - br label %for.body.i.us.us.6 - -for.body.i.us.us.6: ; preds = %for.body.i.us.us.6, %if.then.i.us.us.6 - %indvars.iv.next.i3.us.us.6 = phi i64 [ %indvars.iv.next.i.us.us.6, %for.body.i.us.us.6 ], [ 0, %if.then.i.us.us.6 ] - %115 = phi float [ %121, %for.body.i.us.us.6 ], [ 0.000000e+00, %if.then.i.us.us.6 ] - %116 = add nsw i64 %indvars.iv.next.i3.us.us.6, %113 - %arrayidx11.i.us.us.6 = getelementptr inbounds float, float* %1, i64 %116 - %117 = load float, float* %arrayidx11.i.us.us.6, align 4, !tbaa !12 - %mul12.i.us.us.6 = fmul float %117, %7 - %118 = mul nsw i64 %indvars.iv.next.i3.us.us.6, %14 - %119 = add nsw i64 %118, %114 - %arrayidx16.i.us.us.6 = getelementptr inbounds float, float* %2, i64 %119 - %120 = load float, float* %arrayidx16.i.us.us.6, align 4, !tbaa !12 - %121 = tail call float @llvm.fmuladd.f32(float %mul12.i.us.us.6, float %120, float %115) #2 - store float %121, float* %arrayidx.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.6 = add nuw nsw i64 %indvars.iv.next.i3.us.us.6, 1 - %exitcond.not.i.us.us.6 = icmp eq i64 %indvars.iv.next.i.us.us.6, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.6, label %if.end.i.us.us.6.loopexit, label %for.body.i.us.us.6, !llvm.loop !19 - -if.end.i.us.us.6.loopexit: ; preds = %for.body.i.us.us.6 - br label %if.end.i.us.us.6 - -if.end.i.us.us.6: ; preds = %if.end.i.us.us.6.loopexit, %pregion_for_entry.entry.i.us.us.6 - %122 = or i64 %_local_id_x.0.us.us.6, 1 - %add1.i.i.us.us.6.1 = add nuw nsw i64 %122, %mul.i.i - %conv.i.us.us.6.1 = trunc i64 %add1.i.i.us.us.6.1 to i32 - %cmp4.i.us.us.6.1 = icmp slt i32 %conv.i.us.us.6.1, %4 - br i1 %cmp4.i.us.us.6.1, label %if.then.i.us.us.6.1, label %if.end.i.us.us.6.1 - -pregion_for_end.i.us.6.loopexit: ; preds = %if.end.i.us.us.6.1 - br label %pregion_for_end.i.us.6 - -pregion_for_end.i.us.6: ; preds = %pregion_for_end.i.us.6.loopexit, %pregion_for_end.i.us.5 - %123 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.7 = or i32 %123, 7 - %cmp.i.us.7 = icmp slt i32 %conv2.i.us.7, %3 - %mul.i.us.7 = mul nsw i32 %conv2.i.us.7, %4 - %mul8.i.us.7 = mul nsw i32 %conv2.i.us.7, %5 - %124 = sext i32 %mul8.i.us.7 to i64 - br i1 %cmp.i.us.7, label %pregion_for_entry.entry.i.us.us.7.preheader, label %mm2_kernel1.exit - -pregion_for_entry.entry.i.us.us.7.preheader: ; preds = %pregion_for_end.i.us.6 - br label %pregion_for_entry.entry.i.us.us.7 - -pregion_for_entry.entry.i.us.us.7: ; preds = %if.end.i.us.us.7.1, %pregion_for_entry.entry.i.us.us.7.preheader - %_local_id_x.0.us.us.7 = phi i64 [ %142, %if.end.i.us.us.7.1 ], [ 0, %pregion_for_entry.entry.i.us.us.7.preheader ] - %add1.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.0.us.us.7, %mul.i.i - %conv.i.us.us.7 = trunc i64 %add1.i.i.us.us.7 to i32 - %cmp4.i.us.us.7 = icmp slt i32 %conv.i.us.us.7, %4 - br i1 %cmp4.i.us.us.7, label %if.then.i.us.us.7, label %if.end.i.us.us.7 - -if.then.i.us.us.7: ; preds = %pregion_for_entry.entry.i.us.us.7 - %add.i.us.us.7 = add nsw i32 %mul.i.us.7, %conv.i.us.us.7 - %idxprom.i.us.us.7 = sext i32 %add.i.us.us.7 to i64 - %arrayidx.i.us.us.7 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.7 - store float 0.000000e+00, float* %arrayidx.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.7 = shl i64 %add1.i.i.us.us.7, 32 - %125 = ashr exact i64 %sext.i.us.us.7, 32 - br label %for.body.i.us.us.7 - -for.body.i.us.us.7: ; preds = %for.body.i.us.us.7, %if.then.i.us.us.7 - %indvars.iv.next.i3.us.us.7 = phi i64 [ %indvars.iv.next.i.us.us.7, %for.body.i.us.us.7 ], [ 0, %if.then.i.us.us.7 ] - %126 = phi float [ %132, %for.body.i.us.us.7 ], [ 0.000000e+00, %if.then.i.us.us.7 ] - %127 = add nsw i64 %indvars.iv.next.i3.us.us.7, %124 - %arrayidx11.i.us.us.7 = getelementptr inbounds float, float* %1, i64 %127 - %128 = load float, float* %arrayidx11.i.us.us.7, align 4, !tbaa !12 - %mul12.i.us.us.7 = fmul float %128, %7 - %129 = mul nsw i64 %indvars.iv.next.i3.us.us.7, %14 - %130 = add nsw i64 %129, %125 - %arrayidx16.i.us.us.7 = getelementptr inbounds float, float* %2, i64 %130 - %131 = load float, float* %arrayidx16.i.us.us.7, align 4, !tbaa !12 - %132 = tail call float @llvm.fmuladd.f32(float %mul12.i.us.us.7, float %131, float %126) #2 - store float %132, float* %arrayidx.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.7 = add nuw nsw i64 %indvars.iv.next.i3.us.us.7, 1 - %exitcond.not.i.us.us.7 = icmp eq i64 %indvars.iv.next.i.us.us.7, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.7, label %if.end.i.us.us.7.loopexit, label %for.body.i.us.us.7, !llvm.loop !19 - -if.end.i.us.us.7.loopexit: ; preds = %for.body.i.us.us.7 - br label %if.end.i.us.us.7 - -if.end.i.us.us.7: ; preds = %if.end.i.us.us.7.loopexit, %pregion_for_entry.entry.i.us.us.7 - %133 = or i64 %_local_id_x.0.us.us.7, 1 - %add1.i.i.us.us.7.1 = add nuw nsw i64 %133, %mul.i.i - %conv.i.us.us.7.1 = trunc i64 %add1.i.i.us.us.7.1 to i32 - %cmp4.i.us.us.7.1 = icmp slt i32 %conv.i.us.us.7.1, %4 - br i1 %cmp4.i.us.us.7.1, label %if.then.i.us.us.7.1, label %if.end.i.us.us.7.1 - -if.then.i.us.1: ; preds = %if.end.i.us - %add.i.us.1 = add nsw i32 %mul.i, %conv.i.us.1 - %idxprom.i.us.1 = sext i32 %add.i.us.1 to i64 - %arrayidx.i.us.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.1 - store float 0.000000e+00, float* %arrayidx.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.1 - -if.end.i.us.1: ; preds = %if.then.i.us.1, %if.end.i.us - br i1 %cmp4.i.us.2, label %if.then.i.us.2, label %if.end.i.us.2 - -if.then.i.us.2: ; preds = %if.end.i.us.1 - %add.i.us.2 = add nsw i32 %mul.i, %conv.i.us.2 - %idxprom.i.us.2 = sext i32 %add.i.us.2 to i64 - %arrayidx.i.us.2 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.2 - store float 0.000000e+00, float* %arrayidx.i.us.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.2 - -if.end.i.us.2: ; preds = %if.then.i.us.2, %if.end.i.us.1 - br i1 %cmp4.i.us.3, label %if.then.i.us.3, label %if.end.i.us.3 - -if.then.i.us.3: ; preds = %if.end.i.us.2 - %add.i.us.3 = add nsw i32 %mul.i, %conv.i.us.3 - %idxprom.i.us.3 = sext i32 %add.i.us.3 to i64 - %arrayidx.i.us.3 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.3 - store float 0.000000e+00, float* %arrayidx.i.us.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.3 - -if.end.i.us.3: ; preds = %if.then.i.us.3, %if.end.i.us.2 - br i1 %cmp4.i.us.4, label %if.then.i.us.4, label %if.end.i.us.4 - -if.then.i.us.4: ; preds = %if.end.i.us.3 - %add.i.us.4 = add nsw i32 %mul.i, %conv.i.us.4 - %idxprom.i.us.4 = sext i32 %add.i.us.4 to i64 - %arrayidx.i.us.4 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.4 - store float 0.000000e+00, float* %arrayidx.i.us.4, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.4 - -if.end.i.us.4: ; preds = %if.then.i.us.4, %if.end.i.us.3 - br i1 %cmp4.i.us.5, label %if.then.i.us.5, label %if.end.i.us.5 - -if.then.i.us.5: ; preds = %if.end.i.us.4 - %add.i.us.5 = add nsw i32 %mul.i, %conv.i.us.5 - %idxprom.i.us.5 = sext i32 %add.i.us.5 to i64 - %arrayidx.i.us.5 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.5 - store float 0.000000e+00, float* %arrayidx.i.us.5, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.5 - -if.end.i.us.5: ; preds = %if.then.i.us.5, %if.end.i.us.4 - br i1 %cmp4.i.us.6, label %if.then.i.us.6, label %if.end.i.us.6 - -if.then.i.us.6: ; preds = %if.end.i.us.5 - %add.i.us.6 = add nsw i32 %mul.i, %conv.i.us.6 - %idxprom.i.us.6 = sext i32 %add.i.us.6 to i64 - %arrayidx.i.us.6 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.6 - store float 0.000000e+00, float* %arrayidx.i.us.6, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.6 - -if.end.i.us.6: ; preds = %if.then.i.us.6, %if.end.i.us.5 - br i1 %cmp4.i.us.7, label %if.then.i.us.7, label %if.end.i.us.7 - -if.then.i.us.7: ; preds = %if.end.i.us.6 - %add.i.us.7 = add nsw i32 %mul.i, %conv.i.us.7 - %idxprom.i.us.7 = sext i32 %add.i.us.7 to i64 - %arrayidx.i.us.7 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.7 - store float 0.000000e+00, float* %arrayidx.i.us.7, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.7 - -if.end.i.us.7: ; preds = %if.then.i.us.7, %if.end.i.us.6 - br i1 %cmp4.i.us.8, label %if.then.i.us.8, label %if.end.i.us.8 - -if.then.i.us.8: ; preds = %if.end.i.us.7 - %add.i.us.8 = add nsw i32 %mul.i, %conv.i.us.8 - %idxprom.i.us.8 = sext i32 %add.i.us.8 to i64 - %arrayidx.i.us.8 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.8 - store float 0.000000e+00, float* %arrayidx.i.us.8, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.8 - -if.end.i.us.8: ; preds = %if.then.i.us.8, %if.end.i.us.7 - br i1 %cmp4.i.us.9, label %if.then.i.us.9, label %if.end.i.us.9 - -if.then.i.us.9: ; preds = %if.end.i.us.8 - %add.i.us.9 = add nsw i32 %mul.i, %conv.i.us.9 - %idxprom.i.us.9 = sext i32 %add.i.us.9 to i64 - %arrayidx.i.us.9 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.9 - store float 0.000000e+00, float* %arrayidx.i.us.9, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.9 - -if.end.i.us.9: ; preds = %if.then.i.us.9, %if.end.i.us.8 - br i1 %cmp4.i.us.10, label %if.then.i.us.10, label %if.end.i.us.10 - -if.then.i.us.10: ; preds = %if.end.i.us.9 - %add.i.us.10 = add nsw i32 %mul.i, %conv.i.us.10 - %idxprom.i.us.10 = sext i32 %add.i.us.10 to i64 - %arrayidx.i.us.10 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.10 - store float 0.000000e+00, float* %arrayidx.i.us.10, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.10 - -if.end.i.us.10: ; preds = %if.then.i.us.10, %if.end.i.us.9 - br i1 %cmp4.i.us.11, label %if.then.i.us.11, label %if.end.i.us.11 - -if.then.i.us.11: ; preds = %if.end.i.us.10 - %add.i.us.11 = add nsw i32 %mul.i, %conv.i.us.11 - %idxprom.i.us.11 = sext i32 %add.i.us.11 to i64 - %arrayidx.i.us.11 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.11 - store float 0.000000e+00, float* %arrayidx.i.us.11, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.11 - -if.end.i.us.11: ; preds = %if.then.i.us.11, %if.end.i.us.10 - br i1 %cmp4.i.us.12, label %if.then.i.us.12, label %if.end.i.us.12 - -if.then.i.us.12: ; preds = %if.end.i.us.11 - %add.i.us.12 = add nsw i32 %mul.i, %conv.i.us.12 - %idxprom.i.us.12 = sext i32 %add.i.us.12 to i64 - %arrayidx.i.us.12 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.12 - store float 0.000000e+00, float* %arrayidx.i.us.12, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.12 - -if.end.i.us.12: ; preds = %if.then.i.us.12, %if.end.i.us.11 - br i1 %cmp4.i.us.13, label %if.then.i.us.13, label %if.end.i.us.13 - -if.then.i.us.13: ; preds = %if.end.i.us.12 - %add.i.us.13 = add nsw i32 %mul.i, %conv.i.us.13 - %idxprom.i.us.13 = sext i32 %add.i.us.13 to i64 - %arrayidx.i.us.13 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.13 - store float 0.000000e+00, float* %arrayidx.i.us.13, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.13 - -if.end.i.us.13: ; preds = %if.then.i.us.13, %if.end.i.us.12 - br i1 %cmp4.i.us.14, label %if.then.i.us.14, label %if.end.i.us.14 - -if.then.i.us.14: ; preds = %if.end.i.us.13 - %add.i.us.14 = add nsw i32 %mul.i, %conv.i.us.14 - %idxprom.i.us.14 = sext i32 %add.i.us.14 to i64 - %arrayidx.i.us.14 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.14 - store float 0.000000e+00, float* %arrayidx.i.us.14, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.14 - -if.end.i.us.14: ; preds = %if.then.i.us.14, %if.end.i.us.13 - br i1 %cmp4.i.us.15, label %if.then.i.us.15, label %if.end.i.us.15 - -if.then.i.us.15: ; preds = %if.end.i.us.14 - %add.i.us.15 = add nsw i32 %mul.i, %conv.i.us.15 - %idxprom.i.us.15 = sext i32 %add.i.us.15 to i64 - %arrayidx.i.us.15 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.15 - store float 0.000000e+00, float* %arrayidx.i.us.15, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.15 - -if.end.i.us.15: ; preds = %if.then.i.us.15, %if.end.i.us.14 - br i1 %cmp4.i.us.16, label %if.then.i.us.16, label %if.end.i.us.16 - -if.then.i.us.16: ; preds = %if.end.i.us.15 - %add.i.us.16 = add nsw i32 %mul.i, %conv.i.us.16 - %idxprom.i.us.16 = sext i32 %add.i.us.16 to i64 - %arrayidx.i.us.16 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.16 - store float 0.000000e+00, float* %arrayidx.i.us.16, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.16 - -if.end.i.us.16: ; preds = %if.then.i.us.16, %if.end.i.us.15 - br i1 %cmp4.i.us.17, label %if.then.i.us.17, label %if.end.i.us.17 - -if.then.i.us.17: ; preds = %if.end.i.us.16 - %add.i.us.17 = add nsw i32 %mul.i, %conv.i.us.17 - %idxprom.i.us.17 = sext i32 %add.i.us.17 to i64 - %arrayidx.i.us.17 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.17 - store float 0.000000e+00, float* %arrayidx.i.us.17, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.17 - -if.end.i.us.17: ; preds = %if.then.i.us.17, %if.end.i.us.16 - br i1 %cmp4.i.us.18, label %if.then.i.us.18, label %if.end.i.us.18 - -if.then.i.us.18: ; preds = %if.end.i.us.17 - %add.i.us.18 = add nsw i32 %mul.i, %conv.i.us.18 - %idxprom.i.us.18 = sext i32 %add.i.us.18 to i64 - %arrayidx.i.us.18 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.18 - store float 0.000000e+00, float* %arrayidx.i.us.18, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.18 - -if.end.i.us.18: ; preds = %if.then.i.us.18, %if.end.i.us.17 - br i1 %cmp4.i.us.19, label %if.then.i.us.19, label %if.end.i.us.19 - -if.then.i.us.19: ; preds = %if.end.i.us.18 - %add.i.us.19 = add nsw i32 %mul.i, %conv.i.us.19 - %idxprom.i.us.19 = sext i32 %add.i.us.19 to i64 - %arrayidx.i.us.19 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.19 - store float 0.000000e+00, float* %arrayidx.i.us.19, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.19 - -if.end.i.us.19: ; preds = %if.then.i.us.19, %if.end.i.us.18 - br i1 %cmp4.i.us.20, label %if.then.i.us.20, label %if.end.i.us.20 - -if.then.i.us.20: ; preds = %if.end.i.us.19 - %add.i.us.20 = add nsw i32 %mul.i, %conv.i.us.20 - %idxprom.i.us.20 = sext i32 %add.i.us.20 to i64 - %arrayidx.i.us.20 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.20 - store float 0.000000e+00, float* %arrayidx.i.us.20, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.20 - -if.end.i.us.20: ; preds = %if.then.i.us.20, %if.end.i.us.19 - br i1 %cmp4.i.us.21, label %if.then.i.us.21, label %if.end.i.us.21 - -if.then.i.us.21: ; preds = %if.end.i.us.20 - %add.i.us.21 = add nsw i32 %mul.i, %conv.i.us.21 - %idxprom.i.us.21 = sext i32 %add.i.us.21 to i64 - %arrayidx.i.us.21 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.21 - store float 0.000000e+00, float* %arrayidx.i.us.21, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.21 - -if.end.i.us.21: ; preds = %if.then.i.us.21, %if.end.i.us.20 - br i1 %cmp4.i.us.22, label %if.then.i.us.22, label %if.end.i.us.22 - -if.then.i.us.22: ; preds = %if.end.i.us.21 - %add.i.us.22 = add nsw i32 %mul.i, %conv.i.us.22 - %idxprom.i.us.22 = sext i32 %add.i.us.22 to i64 - %arrayidx.i.us.22 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.22 - store float 0.000000e+00, float* %arrayidx.i.us.22, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.22 - -if.end.i.us.22: ; preds = %if.then.i.us.22, %if.end.i.us.21 - br i1 %cmp4.i.us.23, label %if.then.i.us.23, label %if.end.i.us.23 - -if.then.i.us.23: ; preds = %if.end.i.us.22 - %add.i.us.23 = add nsw i32 %mul.i, %conv.i.us.23 - %idxprom.i.us.23 = sext i32 %add.i.us.23 to i64 - %arrayidx.i.us.23 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.23 - store float 0.000000e+00, float* %arrayidx.i.us.23, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.23 - -if.end.i.us.23: ; preds = %if.then.i.us.23, %if.end.i.us.22 - br i1 %cmp4.i.us.24, label %if.then.i.us.24, label %if.end.i.us.24 - -if.then.i.us.24: ; preds = %if.end.i.us.23 - %add.i.us.24 = add nsw i32 %mul.i, %conv.i.us.24 - %idxprom.i.us.24 = sext i32 %add.i.us.24 to i64 - %arrayidx.i.us.24 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.24 - store float 0.000000e+00, float* %arrayidx.i.us.24, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.24 - -if.end.i.us.24: ; preds = %if.then.i.us.24, %if.end.i.us.23 - br i1 %cmp4.i.us.25, label %if.then.i.us.25, label %if.end.i.us.25 - -if.then.i.us.25: ; preds = %if.end.i.us.24 - %add.i.us.25 = add nsw i32 %mul.i, %conv.i.us.25 - %idxprom.i.us.25 = sext i32 %add.i.us.25 to i64 - %arrayidx.i.us.25 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.25 - store float 0.000000e+00, float* %arrayidx.i.us.25, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.25 - -if.end.i.us.25: ; preds = %if.then.i.us.25, %if.end.i.us.24 - br i1 %cmp4.i.us.26, label %if.then.i.us.26, label %if.end.i.us.26 - -if.then.i.us.26: ; preds = %if.end.i.us.25 - %add.i.us.26 = add nsw i32 %mul.i, %conv.i.us.26 - %idxprom.i.us.26 = sext i32 %add.i.us.26 to i64 - %arrayidx.i.us.26 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.26 - store float 0.000000e+00, float* %arrayidx.i.us.26, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.26 - -if.end.i.us.26: ; preds = %if.then.i.us.26, %if.end.i.us.25 - br i1 %cmp4.i.us.27, label %if.then.i.us.27, label %if.end.i.us.27 - -if.then.i.us.27: ; preds = %if.end.i.us.26 - %add.i.us.27 = add nsw i32 %mul.i, %conv.i.us.27 - %idxprom.i.us.27 = sext i32 %add.i.us.27 to i64 - %arrayidx.i.us.27 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.27 - store float 0.000000e+00, float* %arrayidx.i.us.27, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.27 - -if.end.i.us.27: ; preds = %if.then.i.us.27, %if.end.i.us.26 - br i1 %cmp4.i.us.28, label %if.then.i.us.28, label %if.end.i.us.28 - -if.then.i.us.28: ; preds = %if.end.i.us.27 - %add.i.us.28 = add nsw i32 %mul.i, %conv.i.us.28 - %idxprom.i.us.28 = sext i32 %add.i.us.28 to i64 - %arrayidx.i.us.28 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.28 - store float 0.000000e+00, float* %arrayidx.i.us.28, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.28 - -if.end.i.us.28: ; preds = %if.then.i.us.28, %if.end.i.us.27 - br i1 %cmp4.i.us.29, label %if.then.i.us.29, label %if.end.i.us.29 - -if.then.i.us.29: ; preds = %if.end.i.us.28 - %add.i.us.29 = add nsw i32 %mul.i, %conv.i.us.29 - %idxprom.i.us.29 = sext i32 %add.i.us.29 to i64 - %arrayidx.i.us.29 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.29 - store float 0.000000e+00, float* %arrayidx.i.us.29, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.29 - -if.end.i.us.29: ; preds = %if.then.i.us.29, %if.end.i.us.28 - br i1 %cmp4.i.us.30, label %if.then.i.us.30, label %if.end.i.us.30 - -if.then.i.us.30: ; preds = %if.end.i.us.29 - %add.i.us.30 = add nsw i32 %mul.i, %conv.i.us.30 - %idxprom.i.us.30 = sext i32 %add.i.us.30 to i64 - %arrayidx.i.us.30 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.30 - store float 0.000000e+00, float* %arrayidx.i.us.30, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.30 - -if.end.i.us.30: ; preds = %if.then.i.us.30, %if.end.i.us.29 - br i1 %cmp4.i.us.31, label %if.then.i.us.31, label %pregion_for_end.i - -if.then.i.us.31: ; preds = %if.end.i.us.30 - %add.i.us.31 = add nsw i32 %mul.i, %conv.i.us.31 - %idxprom.i.us.31 = sext i32 %add.i.us.31 to i64 - %arrayidx.i.us.31 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.31 - store float 0.000000e+00, float* %arrayidx.i.us.31, align 4, !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i - -if.then.i.us.us.7.1: ; preds = %if.end.i.us.us.7 - %add.i.us.us.7.1 = add nsw i32 %mul.i.us.7, %conv.i.us.us.7.1 - %idxprom.i.us.us.7.1 = sext i32 %add.i.us.us.7.1 to i64 - %arrayidx.i.us.us.7.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.7.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.7.1 = shl i64 %add1.i.i.us.us.7.1, 32 - %134 = ashr exact i64 %sext.i.us.us.7.1, 32 - br label %for.body.i.us.us.7.1 - -for.body.i.us.us.7.1: ; preds = %for.body.i.us.us.7.1, %if.then.i.us.us.7.1 - %indvars.iv.next.i3.us.us.7.1 = phi i64 [ %indvars.iv.next.i.us.us.7.1, %for.body.i.us.us.7.1 ], [ 0, %if.then.i.us.us.7.1 ] - %135 = phi float [ %141, %for.body.i.us.us.7.1 ], [ 0.000000e+00, %if.then.i.us.us.7.1 ] - %136 = add nsw i64 %indvars.iv.next.i3.us.us.7.1, %124 - %arrayidx11.i.us.us.7.1 = getelementptr inbounds float, float* %1, i64 %136 - %137 = load float, float* %arrayidx11.i.us.us.7.1, align 4, !tbaa !12 - %mul12.i.us.us.7.1 = fmul float %137, %7 - %138 = mul nsw i64 %indvars.iv.next.i3.us.us.7.1, %14 - %139 = add nsw i64 %138, %134 - %arrayidx16.i.us.us.7.1 = getelementptr inbounds float, float* %2, i64 %139 - %140 = load float, float* %arrayidx16.i.us.us.7.1, align 4, !tbaa !12 - %141 = tail call float @llvm.fmuladd.f32(float %mul12.i.us.us.7.1, float %140, float %135) #2 - store float %141, float* %arrayidx.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.7.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.7.1, 1 - %exitcond.not.i.us.us.7.1 = icmp eq i64 %indvars.iv.next.i.us.us.7.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.7.1, label %if.end.i.us.us.7.1.loopexit, label %for.body.i.us.us.7.1, !llvm.loop !19 - -if.end.i.us.us.7.1.loopexit: ; preds = %for.body.i.us.us.7.1 - br label %if.end.i.us.us.7.1 - -if.end.i.us.us.7.1: ; preds = %if.end.i.us.us.7.1.loopexit, %if.end.i.us.us.7 - %142 = add nuw nsw i64 %_local_id_x.0.us.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %142, 32 - br i1 %exitcond.7.not.1, label %mm2_kernel1.exit.loopexit, label %pregion_for_entry.entry.i.us.us.7, !llvm.loop !23 - -if.then.i.us.us.6.1: ; preds = %if.end.i.us.us.6 - %add.i.us.us.6.1 = add nsw i32 %mul.i.us.6, %conv.i.us.us.6.1 - %idxprom.i.us.us.6.1 = sext i32 %add.i.us.us.6.1 to i64 - %arrayidx.i.us.us.6.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.6.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.6.1 = shl i64 %add1.i.i.us.us.6.1, 32 - %143 = ashr exact i64 %sext.i.us.us.6.1, 32 - br label %for.body.i.us.us.6.1 - -for.body.i.us.us.6.1: ; preds = %for.body.i.us.us.6.1, %if.then.i.us.us.6.1 - %indvars.iv.next.i3.us.us.6.1 = phi i64 [ %indvars.iv.next.i.us.us.6.1, %for.body.i.us.us.6.1 ], [ 0, %if.then.i.us.us.6.1 ] - %144 = phi float [ %150, %for.body.i.us.us.6.1 ], [ 0.000000e+00, %if.then.i.us.us.6.1 ] - %145 = add nsw i64 %indvars.iv.next.i3.us.us.6.1, %113 - %arrayidx11.i.us.us.6.1 = getelementptr inbounds float, float* %1, i64 %145 - %146 = load float, float* %arrayidx11.i.us.us.6.1, align 4, !tbaa !12 - %mul12.i.us.us.6.1 = fmul float %146, %7 - %147 = mul nsw i64 %indvars.iv.next.i3.us.us.6.1, %14 - %148 = add nsw i64 %147, %143 - %arrayidx16.i.us.us.6.1 = getelementptr inbounds float, float* %2, i64 %148 - %149 = load float, float* %arrayidx16.i.us.us.6.1, align 4, !tbaa !12 - %150 = tail call float @llvm.fmuladd.f32(float %mul12.i.us.us.6.1, float %149, float %144) #2 - store float %150, float* %arrayidx.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.6.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.6.1, 1 - %exitcond.not.i.us.us.6.1 = icmp eq i64 %indvars.iv.next.i.us.us.6.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.6.1, label %if.end.i.us.us.6.1.loopexit, label %for.body.i.us.us.6.1, !llvm.loop !19 - -if.end.i.us.us.6.1.loopexit: ; preds = %for.body.i.us.us.6.1 - br label %if.end.i.us.us.6.1 - -if.end.i.us.us.6.1: ; preds = %if.end.i.us.us.6.1.loopexit, %if.end.i.us.us.6 - %151 = add nuw nsw i64 %_local_id_x.0.us.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %151, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.us.6.loopexit, label %pregion_for_entry.entry.i.us.us.6, !llvm.loop !23 - -if.then.i.us.us.5.1: ; preds = %if.end.i.us.us.5 - %add.i.us.us.5.1 = add nsw i32 %mul.i.us.5, %conv.i.us.us.5.1 - %idxprom.i.us.us.5.1 = sext i32 %add.i.us.us.5.1 to i64 - %arrayidx.i.us.us.5.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.5.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.5.1 = shl i64 %add1.i.i.us.us.5.1, 32 - %152 = ashr exact i64 %sext.i.us.us.5.1, 32 - br label %for.body.i.us.us.5.1 - -for.body.i.us.us.5.1: ; preds = %for.body.i.us.us.5.1, %if.then.i.us.us.5.1 - %indvars.iv.next.i3.us.us.5.1 = phi i64 [ %indvars.iv.next.i.us.us.5.1, %for.body.i.us.us.5.1 ], [ 0, %if.then.i.us.us.5.1 ] - %153 = phi float [ %159, %for.body.i.us.us.5.1 ], [ 0.000000e+00, %if.then.i.us.us.5.1 ] - %154 = add nsw i64 %indvars.iv.next.i3.us.us.5.1, %102 - %arrayidx11.i.us.us.5.1 = getelementptr inbounds float, float* %1, i64 %154 - %155 = load float, float* %arrayidx11.i.us.us.5.1, align 4, !tbaa !12 - %mul12.i.us.us.5.1 = fmul float %155, %7 - %156 = mul nsw i64 %indvars.iv.next.i3.us.us.5.1, %14 - %157 = add nsw i64 %156, %152 - %arrayidx16.i.us.us.5.1 = getelementptr inbounds float, float* %2, i64 %157 - %158 = load float, float* %arrayidx16.i.us.us.5.1, align 4, !tbaa !12 - %159 = tail call float @llvm.fmuladd.f32(float %mul12.i.us.us.5.1, float %158, float %153) #2 - store float %159, float* %arrayidx.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.5.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.5.1, 1 - %exitcond.not.i.us.us.5.1 = icmp eq i64 %indvars.iv.next.i.us.us.5.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.5.1, label %if.end.i.us.us.5.1.loopexit, label %for.body.i.us.us.5.1, !llvm.loop !19 - -if.end.i.us.us.5.1.loopexit: ; preds = %for.body.i.us.us.5.1 - br label %if.end.i.us.us.5.1 - -if.end.i.us.us.5.1: ; preds = %if.end.i.us.us.5.1.loopexit, %if.end.i.us.us.5 - %160 = add nuw nsw i64 %_local_id_x.0.us.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %160, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.us.5.loopexit, label %pregion_for_entry.entry.i.us.us.5, !llvm.loop !23 - -if.then.i.us.us.4.1: ; preds = %if.end.i.us.us.4 - %add.i.us.us.4.1 = add nsw i32 %mul.i.us.4, %conv.i.us.us.4.1 - %idxprom.i.us.us.4.1 = sext i32 %add.i.us.us.4.1 to i64 - %arrayidx.i.us.us.4.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.4.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.4.1 = shl i64 %add1.i.i.us.us.4.1, 32 - %161 = ashr exact i64 %sext.i.us.us.4.1, 32 - br label %for.body.i.us.us.4.1 - -for.body.i.us.us.4.1: ; preds = %for.body.i.us.us.4.1, %if.then.i.us.us.4.1 - %indvars.iv.next.i3.us.us.4.1 = phi i64 [ %indvars.iv.next.i.us.us.4.1, %for.body.i.us.us.4.1 ], [ 0, %if.then.i.us.us.4.1 ] - %162 = phi float [ %168, %for.body.i.us.us.4.1 ], [ 0.000000e+00, %if.then.i.us.us.4.1 ] - %163 = add nsw i64 %indvars.iv.next.i3.us.us.4.1, %91 - %arrayidx11.i.us.us.4.1 = getelementptr inbounds float, float* %1, i64 %163 - %164 = load float, float* %arrayidx11.i.us.us.4.1, align 4, !tbaa !12 - %mul12.i.us.us.4.1 = fmul float %164, %7 - %165 = mul nsw i64 %indvars.iv.next.i3.us.us.4.1, %14 - %166 = add nsw i64 %165, %161 - %arrayidx16.i.us.us.4.1 = getelementptr inbounds float, float* %2, i64 %166 - %167 = load float, float* %arrayidx16.i.us.us.4.1, align 4, !tbaa !12 - %168 = tail call float @llvm.fmuladd.f32(float %mul12.i.us.us.4.1, float %167, float %162) #2 - store float %168, float* %arrayidx.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.4.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.4.1, 1 - %exitcond.not.i.us.us.4.1 = icmp eq i64 %indvars.iv.next.i.us.us.4.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.4.1, label %if.end.i.us.us.4.1.loopexit, label %for.body.i.us.us.4.1, !llvm.loop !19 - -if.end.i.us.us.4.1.loopexit: ; preds = %for.body.i.us.us.4.1 - br label %if.end.i.us.us.4.1 - -if.end.i.us.us.4.1: ; preds = %if.end.i.us.us.4.1.loopexit, %if.end.i.us.us.4 - %169 = add nuw nsw i64 %_local_id_x.0.us.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %169, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.us.4.loopexit, label %pregion_for_entry.entry.i.us.us.4, !llvm.loop !23 - -if.then.i.us.us.3.1: ; preds = %if.end.i.us.us.3 - %add.i.us.us.3.1 = add nsw i32 %mul.i.us.3, %conv.i.us.us.3.1 - %idxprom.i.us.us.3.1 = sext i32 %add.i.us.us.3.1 to i64 - %arrayidx.i.us.us.3.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.3.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.3.1 = shl i64 %add1.i.i.us.us.3.1, 32 - %170 = ashr exact i64 %sext.i.us.us.3.1, 32 - br label %for.body.i.us.us.3.1 - -for.body.i.us.us.3.1: ; preds = %for.body.i.us.us.3.1, %if.then.i.us.us.3.1 - %indvars.iv.next.i3.us.us.3.1 = phi i64 [ %indvars.iv.next.i.us.us.3.1, %for.body.i.us.us.3.1 ], [ 0, %if.then.i.us.us.3.1 ] - %171 = phi float [ %177, %for.body.i.us.us.3.1 ], [ 0.000000e+00, %if.then.i.us.us.3.1 ] - %172 = add nsw i64 %indvars.iv.next.i3.us.us.3.1, %80 - %arrayidx11.i.us.us.3.1 = getelementptr inbounds float, float* %1, i64 %172 - %173 = load float, float* %arrayidx11.i.us.us.3.1, align 4, !tbaa !12 - %mul12.i.us.us.3.1 = fmul float %173, %7 - %174 = mul nsw i64 %indvars.iv.next.i3.us.us.3.1, %14 - %175 = add nsw i64 %174, %170 - %arrayidx16.i.us.us.3.1 = getelementptr inbounds float, float* %2, i64 %175 - %176 = load float, float* %arrayidx16.i.us.us.3.1, align 4, !tbaa !12 - %177 = tail call float @llvm.fmuladd.f32(float %mul12.i.us.us.3.1, float %176, float %171) #2 - store float %177, float* %arrayidx.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.3.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.3.1, 1 - %exitcond.not.i.us.us.3.1 = icmp eq i64 %indvars.iv.next.i.us.us.3.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.3.1, label %if.end.i.us.us.3.1.loopexit, label %for.body.i.us.us.3.1, !llvm.loop !19 - -if.end.i.us.us.3.1.loopexit: ; preds = %for.body.i.us.us.3.1 - br label %if.end.i.us.us.3.1 - -if.end.i.us.us.3.1: ; preds = %if.end.i.us.us.3.1.loopexit, %if.end.i.us.us.3 - %178 = add nuw nsw i64 %_local_id_x.0.us.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %178, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.us.3.loopexit, label %pregion_for_entry.entry.i.us.us.3, !llvm.loop !23 - -if.then.i.us.us.2.1: ; preds = %if.end.i.us.us.2 - %add.i.us.us.2.1 = add nsw i32 %mul.i.us.2, %conv.i.us.us.2.1 - %idxprom.i.us.us.2.1 = sext i32 %add.i.us.us.2.1 to i64 - %arrayidx.i.us.us.2.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.2.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.2.1 = shl i64 %add1.i.i.us.us.2.1, 32 - %179 = ashr exact i64 %sext.i.us.us.2.1, 32 - br label %for.body.i.us.us.2.1 - -for.body.i.us.us.2.1: ; preds = %for.body.i.us.us.2.1, %if.then.i.us.us.2.1 - %indvars.iv.next.i3.us.us.2.1 = phi i64 [ %indvars.iv.next.i.us.us.2.1, %for.body.i.us.us.2.1 ], [ 0, %if.then.i.us.us.2.1 ] - %180 = phi float [ %186, %for.body.i.us.us.2.1 ], [ 0.000000e+00, %if.then.i.us.us.2.1 ] - %181 = add nsw i64 %indvars.iv.next.i3.us.us.2.1, %69 - %arrayidx11.i.us.us.2.1 = getelementptr inbounds float, float* %1, i64 %181 - %182 = load float, float* %arrayidx11.i.us.us.2.1, align 4, !tbaa !12 - %mul12.i.us.us.2.1 = fmul float %182, %7 - %183 = mul nsw i64 %indvars.iv.next.i3.us.us.2.1, %14 - %184 = add nsw i64 %183, %179 - %arrayidx16.i.us.us.2.1 = getelementptr inbounds float, float* %2, i64 %184 - %185 = load float, float* %arrayidx16.i.us.us.2.1, align 4, !tbaa !12 - %186 = tail call float @llvm.fmuladd.f32(float %mul12.i.us.us.2.1, float %185, float %180) #2 - store float %186, float* %arrayidx.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.2.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.2.1, 1 - %exitcond.not.i.us.us.2.1 = icmp eq i64 %indvars.iv.next.i.us.us.2.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.2.1, label %if.end.i.us.us.2.1.loopexit, label %for.body.i.us.us.2.1, !llvm.loop !19 - -if.end.i.us.us.2.1.loopexit: ; preds = %for.body.i.us.us.2.1 - br label %if.end.i.us.us.2.1 - -if.end.i.us.us.2.1: ; preds = %if.end.i.us.us.2.1.loopexit, %if.end.i.us.us.2 - %187 = add nuw nsw i64 %_local_id_x.0.us.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %187, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.us.2.loopexit, label %pregion_for_entry.entry.i.us.us.2, !llvm.loop !23 - -if.then.i.us.us.1.1: ; preds = %if.end.i.us.us.1 - %add.i.us.us.1.1 = add nsw i32 %mul.i.us.1, %conv.i.us.us.1.1 - %idxprom.i.us.us.1.1 = sext i32 %add.i.us.us.1.1 to i64 - %arrayidx.i.us.us.1.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.1.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.1.1 = shl i64 %add1.i.i.us.us.1.1, 32 - %188 = ashr exact i64 %sext.i.us.us.1.1, 32 - br label %for.body.i.us.us.1.1 - -for.body.i.us.us.1.1: ; preds = %for.body.i.us.us.1.1, %if.then.i.us.us.1.1 - %indvars.iv.next.i3.us.us.1.1 = phi i64 [ %indvars.iv.next.i.us.us.1.1, %for.body.i.us.us.1.1 ], [ 0, %if.then.i.us.us.1.1 ] - %189 = phi float [ %195, %for.body.i.us.us.1.1 ], [ 0.000000e+00, %if.then.i.us.us.1.1 ] - %190 = add nsw i64 %indvars.iv.next.i3.us.us.1.1, %48 - %arrayidx11.i.us.us.1.1 = getelementptr inbounds float, float* %1, i64 %190 - %191 = load float, float* %arrayidx11.i.us.us.1.1, align 4, !tbaa !12 - %mul12.i.us.us.1.1 = fmul float %191, %7 - %192 = mul nsw i64 %indvars.iv.next.i3.us.us.1.1, %14 - %193 = add nsw i64 %192, %188 - %arrayidx16.i.us.us.1.1 = getelementptr inbounds float, float* %2, i64 %193 - %194 = load float, float* %arrayidx16.i.us.us.1.1, align 4, !tbaa !12 - %195 = tail call float @llvm.fmuladd.f32(float %mul12.i.us.us.1.1, float %194, float %189) #2 - store float %195, float* %arrayidx.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.1.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.1.1, 1 - %exitcond.not.i.us.us.1.1 = icmp eq i64 %indvars.iv.next.i.us.us.1.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.1.1, label %if.end.i.us.us.1.1.loopexit, label %for.body.i.us.us.1.1, !llvm.loop !19 - -if.end.i.us.us.1.1.loopexit: ; preds = %for.body.i.us.us.1.1 - br label %if.end.i.us.us.1.1 - -if.end.i.us.us.1.1: ; preds = %if.end.i.us.us.1.1.loopexit, %if.end.i.us.us.1 - %196 = add nuw nsw i64 %_local_id_x.0.us.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %196, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.us.1.loopexit, label %pregion_for_entry.entry.i.us.us.1, !llvm.loop !23 - -if.then.i.us.us.147: ; preds = %if.end.i.us.us - %add.i.us.us.143 = add nsw i32 %mul.i.us, %conv.i.us.us.140 - %idxprom.i.us.us.144 = sext i32 %add.i.us.us.143 to i64 - %arrayidx.i.us.us.145 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.144 - store float 0.000000e+00, float* %arrayidx.i.us.us.145, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.146 = shl i64 %add1.i.i.us.us.139, 32 - %197 = ashr exact i64 %sext.i.us.us.146, 32 - br label %for.body.i.us.us.154 - -for.body.i.us.us.154: ; preds = %for.body.i.us.us.154, %if.then.i.us.us.147 - %indvars.iv.next.i3.us.us.148 = phi i64 [ %indvars.iv.next.i.us.us.152, %for.body.i.us.us.154 ], [ 0, %if.then.i.us.us.147 ] - %198 = phi float [ %204, %for.body.i.us.us.154 ], [ 0.000000e+00, %if.then.i.us.us.147 ] - %199 = add nsw i64 %indvars.iv.next.i3.us.us.148, %46 - %arrayidx11.i.us.us.149 = getelementptr inbounds float, float* %1, i64 %199 - %200 = load float, float* %arrayidx11.i.us.us.149, align 4, !tbaa !12 - %mul12.i.us.us.150 = fmul float %200, %7 - %201 = mul nsw i64 %indvars.iv.next.i3.us.us.148, %14 - %202 = add nsw i64 %201, %197 - %arrayidx16.i.us.us.151 = getelementptr inbounds float, float* %2, i64 %202 - %203 = load float, float* %arrayidx16.i.us.us.151, align 4, !tbaa !12 - %204 = tail call float @llvm.fmuladd.f32(float %mul12.i.us.us.150, float %203, float %198) #2 - store float %204, float* %arrayidx.i.us.us.145, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.152 = add nuw nsw i64 %indvars.iv.next.i3.us.us.148, 1 - %exitcond.not.i.us.us.153 = icmp eq i64 %indvars.iv.next.i.us.us.152, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.153, label %if.end.i.us.us.155.loopexit, label %for.body.i.us.us.154, !llvm.loop !19 - -if.end.i.us.us.155.loopexit: ; preds = %for.body.i.us.us.154 - br label %if.end.i.us.us.155 - -if.end.i.us.us.155: ; preds = %if.end.i.us.us.155.loopexit, %if.end.i.us.us - %205 = add nuw nsw i64 %_local_id_x.0.us.us, 2 - %exitcond.not.1 = icmp eq i64 %205, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.us.loopexit, label %pregion_for_entry.entry.i.us.us, !llvm.loop !23 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_mm2_kernel1_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to float*** - %15 = load float**, float*** %14, align 8 - %16 = load float*, float** %15, align 8 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %21 = getelementptr i8*, i8** %0, i64 4 - %22 = bitcast i8** %21 to i32** - %23 = load i32*, i32** %22, align 8 - %24 = load i32, i32* %23, align 4 - %25 = getelementptr i8*, i8** %0, i64 5 - %26 = bitcast i8** %25 to i32** - %27 = load i32*, i32** %26, align 8 - %28 = load i32, i32* %27, align 4 - %29 = getelementptr i8*, i8** %0, i64 7 - %30 = bitcast i8** %29 to float** - %31 = load float*, float** %30, align 8 - %32 = load float, float* %31, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %cmp639.i.i = icmp sgt i32 %28, 0 - %33 = sext i32 %24 to i64 - %wide.trip.count.i.i = zext i32 %28 to i64 - br i1 %cmp639.i.i, label %pregion_for_entry.pregion_for_init.i.i.us.preheader, label %pregion_for_entry.pregion_for_init.i.i.preheader - -pregion_for_entry.pregion_for_init.i.i.preheader: ; preds = %5 - %conv.i.i.us = trunc i64 %mul.i.i.i to i32 - %cmp4.i.i.us = icmp sgt i32 %24, %conv.i.i.us - %34 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.1 = or i32 %34, 1 - %cmp4.i.i.us.1 = icmp sgt i32 %24, %conv.i.i.us.1 - %35 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.2 = or i32 %35, 2 - %cmp4.i.i.us.2 = icmp sgt i32 %24, %conv.i.i.us.2 - %36 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.3 = or i32 %36, 3 - %cmp4.i.i.us.3 = icmp sgt i32 %24, %conv.i.i.us.3 - %37 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.4 = or i32 %37, 4 - %cmp4.i.i.us.4 = icmp sgt i32 %24, %conv.i.i.us.4 - %38 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.5 = or i32 %38, 5 - %cmp4.i.i.us.5 = icmp sgt i32 %24, %conv.i.i.us.5 - %39 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.6 = or i32 %39, 6 - %cmp4.i.i.us.6 = icmp sgt i32 %24, %conv.i.i.us.6 - %40 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.7 = or i32 %40, 7 - %cmp4.i.i.us.7 = icmp sgt i32 %24, %conv.i.i.us.7 - %41 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.8 = or i32 %41, 8 - %cmp4.i.i.us.8 = icmp sgt i32 %24, %conv.i.i.us.8 - %42 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.9 = or i32 %42, 9 - %cmp4.i.i.us.9 = icmp sgt i32 %24, %conv.i.i.us.9 - %43 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.10 = or i32 %43, 10 - %cmp4.i.i.us.10 = icmp sgt i32 %24, %conv.i.i.us.10 - %44 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.11 = or i32 %44, 11 - %cmp4.i.i.us.11 = icmp sgt i32 %24, %conv.i.i.us.11 - %45 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.12 = or i32 %45, 12 - %cmp4.i.i.us.12 = icmp sgt i32 %24, %conv.i.i.us.12 - %46 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.13 = or i32 %46, 13 - %cmp4.i.i.us.13 = icmp sgt i32 %24, %conv.i.i.us.13 - %47 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.14 = or i32 %47, 14 - %cmp4.i.i.us.14 = icmp sgt i32 %24, %conv.i.i.us.14 - %48 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.15 = or i32 %48, 15 - %cmp4.i.i.us.15 = icmp sgt i32 %24, %conv.i.i.us.15 - %49 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.16 = or i32 %49, 16 - %cmp4.i.i.us.16 = icmp sgt i32 %24, %conv.i.i.us.16 - %50 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.17 = or i32 %50, 17 - %cmp4.i.i.us.17 = icmp sgt i32 %24, %conv.i.i.us.17 - %51 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.18 = or i32 %51, 18 - %cmp4.i.i.us.18 = icmp sgt i32 %24, %conv.i.i.us.18 - %52 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.19 = or i32 %52, 19 - %cmp4.i.i.us.19 = icmp sgt i32 %24, %conv.i.i.us.19 - %53 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.20 = or i32 %53, 20 - %cmp4.i.i.us.20 = icmp sgt i32 %24, %conv.i.i.us.20 - %54 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.21 = or i32 %54, 21 - %cmp4.i.i.us.21 = icmp sgt i32 %24, %conv.i.i.us.21 - %55 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.22 = or i32 %55, 22 - %cmp4.i.i.us.22 = icmp sgt i32 %24, %conv.i.i.us.22 - %56 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.23 = or i32 %56, 23 - %cmp4.i.i.us.23 = icmp sgt i32 %24, %conv.i.i.us.23 - %57 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.24 = or i32 %57, 24 - %cmp4.i.i.us.24 = icmp sgt i32 %24, %conv.i.i.us.24 - %58 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.25 = or i32 %58, 25 - %cmp4.i.i.us.25 = icmp sgt i32 %24, %conv.i.i.us.25 - %59 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.26 = or i32 %59, 26 - %cmp4.i.i.us.26 = icmp sgt i32 %24, %conv.i.i.us.26 - %60 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.27 = or i32 %60, 27 - %cmp4.i.i.us.27 = icmp sgt i32 %24, %conv.i.i.us.27 - %61 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.28 = or i32 %61, 28 - %cmp4.i.i.us.28 = icmp sgt i32 %24, %conv.i.i.us.28 - %62 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.29 = or i32 %62, 29 - %cmp4.i.i.us.29 = icmp sgt i32 %24, %conv.i.i.us.29 - %63 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.30 = or i32 %63, 30 - %cmp4.i.i.us.30 = icmp sgt i32 %24, %conv.i.i.us.30 - %64 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.31 = or i32 %64, 31 - %cmp4.i.i.us.31 = icmp sgt i32 %24, %conv.i.i.us.31 - br label %pregion_for_entry.pregion_for_init.i.i - -pregion_for_entry.pregion_for_init.i.i.us.preheader: ; preds = %5 - %conv2.i.i.us = trunc i64 %mul3.i.i.i to i32 - %cmp.i.i.us = icmp sgt i32 %20, %conv2.i.i.us - %mul.i.i.us = mul nsw i32 %24, %conv2.i.i.us - %mul8.i.i.us = mul nsw i32 %28, %conv2.i.i.us - %65 = sext i32 %mul8.i.i.us to i64 - br i1 %cmp.i.i.us, label %pregion_for_entry.entry.i.i.us.us.preheader, label %pregion_for_end.i.i.us - -pregion_for_entry.entry.i.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i.us.preheader - br label %pregion_for_entry.entry.i.i.us.us - -pregion_for_end.i.i.us.loopexit: ; preds = %if.end.i.i.us.us.155 - br label %pregion_for_end.i.i.us - -pregion_for_end.i.i.us: ; preds = %pregion_for_end.i.i.us.loopexit, %pregion_for_entry.pregion_for_init.i.i.us.preheader - %66 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.1 = or i32 %66, 1 - %cmp.i.i.us.1 = icmp sgt i32 %20, %conv2.i.i.us.1 - %mul.i.i.us.1 = mul nsw i32 %24, %conv2.i.i.us.1 - %mul8.i.i.us.1 = mul nsw i32 %28, %conv2.i.i.us.1 - %67 = sext i32 %mul8.i.i.us.1 to i64 - br i1 %cmp.i.i.us.1, label %pregion_for_entry.entry.i.i.us.us.1.preheader, label %pregion_for_end.i.i.us.1 - -pregion_for_entry.entry.i.i.us.us.1.preheader: ; preds = %pregion_for_end.i.i.us - br label %pregion_for_entry.entry.i.i.us.us.1 - -pregion_for_entry.entry.i.i.us.us: ; preds = %if.end.i.i.us.us.155, %pregion_for_entry.entry.i.i.us.us.preheader - %_local_id_x.i.0.us.us = phi i64 [ %224, %if.end.i.i.us.us.155 ], [ 0, %pregion_for_entry.entry.i.i.us.us.preheader ] - %add1.i.i.i.us.us = add nuw nsw i64 %_local_id_x.i.0.us.us, %mul.i.i.i - %conv.i.i.us.us = trunc i64 %add1.i.i.i.us.us to i32 - %cmp4.i.i.us.us = icmp sgt i32 %24, %conv.i.i.us.us - br i1 %cmp4.i.i.us.us, label %if.then.i.i.us.us, label %if.end.i.i.us.us - -if.then.i.i.us.us: ; preds = %pregion_for_entry.entry.i.i.us.us - %add.i.i.us.us = add nsw i32 %mul.i.i.us, %conv.i.i.us.us - %idxprom.i.i.us.us = sext i32 %add.i.i.us.us to i64 - %arrayidx.i.i.us.us = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.us - store float 0.000000e+00, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us = shl i64 %add1.i.i.i.us.us, 32 - %68 = ashr exact i64 %sext.i.i.us.us, 32 - br label %for.body.i.i.us.us - -if.end.i.i.us.us.loopexit: ; preds = %for.body.i.i.us.us - br label %if.end.i.i.us.us - -if.end.i.i.us.us: ; preds = %if.end.i.i.us.us.loopexit, %pregion_for_entry.entry.i.i.us.us - %69 = or i64 %_local_id_x.i.0.us.us, 1 - %add1.i.i.i.us.us.139 = add nuw nsw i64 %69, %mul.i.i.i - %conv.i.i.us.us.140 = trunc i64 %add1.i.i.i.us.us.139 to i32 - %cmp4.i.i.us.us.141 = icmp sgt i32 %24, %conv.i.i.us.us.140 - br i1 %cmp4.i.i.us.us.141, label %if.then.i.i.us.us.147, label %if.end.i.i.us.us.155 - -for.body.i.i.us.us: ; preds = %for.body.i.i.us.us, %if.then.i.i.us.us - %indvars.iv.next.i.i3.us.us = phi i64 [ %indvars.iv.next.i.i.us.us, %for.body.i.i.us.us ], [ 0, %if.then.i.i.us.us ] - %70 = phi float [ %76, %for.body.i.i.us.us ], [ 0.000000e+00, %if.then.i.i.us.us ] - %71 = add nsw i64 %indvars.iv.next.i.i3.us.us, %65 - %arrayidx11.i.i.us.us = getelementptr inbounds float, float* %12, i64 %71 - %72 = load float, float* %arrayidx11.i.i.us.us, align 4, !tbaa !12 - %mul12.i.i.us.us = fmul float %32, %72 - %73 = mul nsw i64 %indvars.iv.next.i.i3.us.us, %33 - %74 = add nsw i64 %73, %68 - %arrayidx16.i.i.us.us = getelementptr inbounds float, float* %16, i64 %74 - %75 = load float, float* %arrayidx16.i.i.us.us, align 4, !tbaa !12 - %76 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us, float %75, float %70) #2 - store float %76, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us = add nuw nsw i64 %indvars.iv.next.i.i3.us.us, 1 - %exitcond.not.i.i.us.us = icmp eq i64 %indvars.iv.next.i.i.us.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us, label %if.end.i.i.us.us.loopexit, label %for.body.i.i.us.us, !llvm.loop !19 - -pregion_for_entry.pregion_for_init.i.i: ; preds = %pregion_for_end.i.i, %pregion_for_entry.pregion_for_init.i.i.preheader - %_local_id_y.i.0 = phi i64 [ %77, %pregion_for_end.i.i ], [ 0, %pregion_for_entry.pregion_for_init.i.i.preheader ] - %add6.i.i.i = add nuw nsw i64 %_local_id_y.i.0, %mul3.i.i.i - %conv2.i.i = trunc i64 %add6.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %20, %conv2.i.i - %mul.i.i = mul nsw i32 %24, %conv2.i.i - br i1 %cmp.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i - br i1 %cmp4.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us.preheader - %add.i.i.us = add nsw i32 %mul.i.i, %conv.i.i.us - %idxprom.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us - store float 0.000000e+00, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - br i1 %cmp4.i.i.us.1, label %if.then.i.i.us.1, label %if.end.i.i.us.1 - -pregion_for_end.i.i: ; preds = %if.then.i.i.us.31, %if.end.i.i.us.30, %pregion_for_entry.pregion_for_init.i.i - %77 = add nuw nsw i64 %_local_id_y.i.0, 1 - %exitcond34.not = icmp eq i64 %77, 8 - br i1 %exitcond34.not, label %_pocl_kernel_mm2_kernel1.exit.loopexit56, label %pregion_for_entry.pregion_for_init.i.i, !llvm.loop !21 - -_pocl_kernel_mm2_kernel1.exit.loopexit: ; preds = %if.end.i.i.us.us.7.1 - br label %_pocl_kernel_mm2_kernel1.exit - -_pocl_kernel_mm2_kernel1.exit.loopexit56: ; preds = %pregion_for_end.i.i - br label %_pocl_kernel_mm2_kernel1.exit - -_pocl_kernel_mm2_kernel1.exit: ; preds = %pregion_for_end.i.i.us.6, %_pocl_kernel_mm2_kernel1.exit.loopexit56, %_pocl_kernel_mm2_kernel1.exit.loopexit - ret void - -pregion_for_entry.entry.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.1, %pregion_for_entry.entry.i.i.us.us.1.preheader - %_local_id_x.i.0.us.us.1 = phi i64 [ %215, %if.end.i.i.us.us.1.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.1.preheader ] - %add1.i.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, %mul.i.i.i - %conv.i.i.us.us.1 = trunc i64 %add1.i.i.i.us.us.1 to i32 - %cmp4.i.i.us.us.1 = icmp sgt i32 %24, %conv.i.i.us.us.1 - br i1 %cmp4.i.i.us.us.1, label %if.then.i.i.us.us.1, label %if.end.i.i.us.us.1 - -if.then.i.i.us.us.1: ; preds = %pregion_for_entry.entry.i.i.us.us.1 - %add.i.i.us.us.1 = add nsw i32 %mul.i.i.us.1, %conv.i.i.us.us.1 - %idxprom.i.i.us.us.1 = sext i32 %add.i.i.us.us.1 to i64 - %arrayidx.i.i.us.us.1 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.us.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.1 = shl i64 %add1.i.i.i.us.us.1, 32 - %78 = ashr exact i64 %sext.i.i.us.us.1, 32 - br label %for.body.i.i.us.us.1 - -for.body.i.i.us.us.1: ; preds = %for.body.i.i.us.us.1, %if.then.i.i.us.us.1 - %indvars.iv.next.i.i3.us.us.1 = phi i64 [ %indvars.iv.next.i.i.us.us.1, %for.body.i.i.us.us.1 ], [ 0, %if.then.i.i.us.us.1 ] - %79 = phi float [ %85, %for.body.i.i.us.us.1 ], [ 0.000000e+00, %if.then.i.i.us.us.1 ] - %80 = add nsw i64 %indvars.iv.next.i.i3.us.us.1, %67 - %arrayidx11.i.i.us.us.1 = getelementptr inbounds float, float* %12, i64 %80 - %81 = load float, float* %arrayidx11.i.i.us.us.1, align 4, !tbaa !12 - %mul12.i.i.us.us.1 = fmul float %32, %81 - %82 = mul nsw i64 %indvars.iv.next.i.i3.us.us.1, %33 - %83 = add nsw i64 %82, %78 - %arrayidx16.i.i.us.us.1 = getelementptr inbounds float, float* %16, i64 %83 - %84 = load float, float* %arrayidx16.i.i.us.us.1, align 4, !tbaa !12 - %85 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.1, float %84, float %79) #2 - store float %85, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.1, 1 - %exitcond.not.i.i.us.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.1, label %if.end.i.i.us.us.1.loopexit, label %for.body.i.i.us.us.1, !llvm.loop !19 - -if.end.i.i.us.us.1.loopexit: ; preds = %for.body.i.i.us.us.1 - br label %if.end.i.i.us.us.1 - -if.end.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.loopexit, %pregion_for_entry.entry.i.i.us.us.1 - %86 = or i64 %_local_id_x.i.0.us.us.1, 1 - %add1.i.i.i.us.us.1.1 = add nuw nsw i64 %86, %mul.i.i.i - %conv.i.i.us.us.1.1 = trunc i64 %add1.i.i.i.us.us.1.1 to i32 - %cmp4.i.i.us.us.1.1 = icmp sgt i32 %24, %conv.i.i.us.us.1.1 - br i1 %cmp4.i.i.us.us.1.1, label %if.then.i.i.us.us.1.1, label %if.end.i.i.us.us.1.1 - -pregion_for_end.i.i.us.1.loopexit: ; preds = %if.end.i.i.us.us.1.1 - br label %pregion_for_end.i.i.us.1 - -pregion_for_end.i.i.us.1: ; preds = %pregion_for_end.i.i.us.1.loopexit, %pregion_for_end.i.i.us - %87 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.2 = or i32 %87, 2 - %cmp.i.i.us.2 = icmp sgt i32 %20, %conv2.i.i.us.2 - %mul.i.i.us.2 = mul nsw i32 %24, %conv2.i.i.us.2 - %mul8.i.i.us.2 = mul nsw i32 %28, %conv2.i.i.us.2 - %88 = sext i32 %mul8.i.i.us.2 to i64 - br i1 %cmp.i.i.us.2, label %pregion_for_entry.entry.i.i.us.us.2.preheader, label %pregion_for_end.i.i.us.2 - -pregion_for_entry.entry.i.i.us.us.2.preheader: ; preds = %pregion_for_end.i.i.us.1 - br label %pregion_for_entry.entry.i.i.us.us.2 - -pregion_for_entry.entry.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.1, %pregion_for_entry.entry.i.i.us.us.2.preheader - %_local_id_x.i.0.us.us.2 = phi i64 [ %206, %if.end.i.i.us.us.2.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.2.preheader ] - %add1.i.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, %mul.i.i.i - %conv.i.i.us.us.2 = trunc i64 %add1.i.i.i.us.us.2 to i32 - %cmp4.i.i.us.us.2 = icmp sgt i32 %24, %conv.i.i.us.us.2 - br i1 %cmp4.i.i.us.us.2, label %if.then.i.i.us.us.2, label %if.end.i.i.us.us.2 - -if.then.i.i.us.us.2: ; preds = %pregion_for_entry.entry.i.i.us.us.2 - %add.i.i.us.us.2 = add nsw i32 %mul.i.i.us.2, %conv.i.i.us.us.2 - %idxprom.i.i.us.us.2 = sext i32 %add.i.i.us.us.2 to i64 - %arrayidx.i.i.us.us.2 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.us.2 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.2 = shl i64 %add1.i.i.i.us.us.2, 32 - %89 = ashr exact i64 %sext.i.i.us.us.2, 32 - br label %for.body.i.i.us.us.2 - -for.body.i.i.us.us.2: ; preds = %for.body.i.i.us.us.2, %if.then.i.i.us.us.2 - %indvars.iv.next.i.i3.us.us.2 = phi i64 [ %indvars.iv.next.i.i.us.us.2, %for.body.i.i.us.us.2 ], [ 0, %if.then.i.i.us.us.2 ] - %90 = phi float [ %96, %for.body.i.i.us.us.2 ], [ 0.000000e+00, %if.then.i.i.us.us.2 ] - %91 = add nsw i64 %indvars.iv.next.i.i3.us.us.2, %88 - %arrayidx11.i.i.us.us.2 = getelementptr inbounds float, float* %12, i64 %91 - %92 = load float, float* %arrayidx11.i.i.us.us.2, align 4, !tbaa !12 - %mul12.i.i.us.us.2 = fmul float %32, %92 - %93 = mul nsw i64 %indvars.iv.next.i.i3.us.us.2, %33 - %94 = add nsw i64 %93, %89 - %arrayidx16.i.i.us.us.2 = getelementptr inbounds float, float* %16, i64 %94 - %95 = load float, float* %arrayidx16.i.i.us.us.2, align 4, !tbaa !12 - %96 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.2, float %95, float %90) #2 - store float %96, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.2 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.2, 1 - %exitcond.not.i.i.us.us.2 = icmp eq i64 %indvars.iv.next.i.i.us.us.2, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.2, label %if.end.i.i.us.us.2.loopexit, label %for.body.i.i.us.us.2, !llvm.loop !19 - -if.end.i.i.us.us.2.loopexit: ; preds = %for.body.i.i.us.us.2 - br label %if.end.i.i.us.us.2 - -if.end.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.loopexit, %pregion_for_entry.entry.i.i.us.us.2 - %97 = or i64 %_local_id_x.i.0.us.us.2, 1 - %add1.i.i.i.us.us.2.1 = add nuw nsw i64 %97, %mul.i.i.i - %conv.i.i.us.us.2.1 = trunc i64 %add1.i.i.i.us.us.2.1 to i32 - %cmp4.i.i.us.us.2.1 = icmp sgt i32 %24, %conv.i.i.us.us.2.1 - br i1 %cmp4.i.i.us.us.2.1, label %if.then.i.i.us.us.2.1, label %if.end.i.i.us.us.2.1 - -pregion_for_end.i.i.us.2.loopexit: ; preds = %if.end.i.i.us.us.2.1 - br label %pregion_for_end.i.i.us.2 - -pregion_for_end.i.i.us.2: ; preds = %pregion_for_end.i.i.us.2.loopexit, %pregion_for_end.i.i.us.1 - %98 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.3 = or i32 %98, 3 - %cmp.i.i.us.3 = icmp sgt i32 %20, %conv2.i.i.us.3 - %mul.i.i.us.3 = mul nsw i32 %24, %conv2.i.i.us.3 - %mul8.i.i.us.3 = mul nsw i32 %28, %conv2.i.i.us.3 - %99 = sext i32 %mul8.i.i.us.3 to i64 - br i1 %cmp.i.i.us.3, label %pregion_for_entry.entry.i.i.us.us.3.preheader, label %pregion_for_end.i.i.us.3 - -pregion_for_entry.entry.i.i.us.us.3.preheader: ; preds = %pregion_for_end.i.i.us.2 - br label %pregion_for_entry.entry.i.i.us.us.3 - -pregion_for_entry.entry.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.1, %pregion_for_entry.entry.i.i.us.us.3.preheader - %_local_id_x.i.0.us.us.3 = phi i64 [ %197, %if.end.i.i.us.us.3.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.3.preheader ] - %add1.i.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, %mul.i.i.i - %conv.i.i.us.us.3 = trunc i64 %add1.i.i.i.us.us.3 to i32 - %cmp4.i.i.us.us.3 = icmp sgt i32 %24, %conv.i.i.us.us.3 - br i1 %cmp4.i.i.us.us.3, label %if.then.i.i.us.us.3, label %if.end.i.i.us.us.3 - -if.then.i.i.us.us.3: ; preds = %pregion_for_entry.entry.i.i.us.us.3 - %add.i.i.us.us.3 = add nsw i32 %mul.i.i.us.3, %conv.i.i.us.us.3 - %idxprom.i.i.us.us.3 = sext i32 %add.i.i.us.us.3 to i64 - %arrayidx.i.i.us.us.3 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.us.3 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.3 = shl i64 %add1.i.i.i.us.us.3, 32 - %100 = ashr exact i64 %sext.i.i.us.us.3, 32 - br label %for.body.i.i.us.us.3 - -for.body.i.i.us.us.3: ; preds = %for.body.i.i.us.us.3, %if.then.i.i.us.us.3 - %indvars.iv.next.i.i3.us.us.3 = phi i64 [ %indvars.iv.next.i.i.us.us.3, %for.body.i.i.us.us.3 ], [ 0, %if.then.i.i.us.us.3 ] - %101 = phi float [ %107, %for.body.i.i.us.us.3 ], [ 0.000000e+00, %if.then.i.i.us.us.3 ] - %102 = add nsw i64 %indvars.iv.next.i.i3.us.us.3, %99 - %arrayidx11.i.i.us.us.3 = getelementptr inbounds float, float* %12, i64 %102 - %103 = load float, float* %arrayidx11.i.i.us.us.3, align 4, !tbaa !12 - %mul12.i.i.us.us.3 = fmul float %32, %103 - %104 = mul nsw i64 %indvars.iv.next.i.i3.us.us.3, %33 - %105 = add nsw i64 %104, %100 - %arrayidx16.i.i.us.us.3 = getelementptr inbounds float, float* %16, i64 %105 - %106 = load float, float* %arrayidx16.i.i.us.us.3, align 4, !tbaa !12 - %107 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.3, float %106, float %101) #2 - store float %107, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.3 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.3, 1 - %exitcond.not.i.i.us.us.3 = icmp eq i64 %indvars.iv.next.i.i.us.us.3, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.3, label %if.end.i.i.us.us.3.loopexit, label %for.body.i.i.us.us.3, !llvm.loop !19 - -if.end.i.i.us.us.3.loopexit: ; preds = %for.body.i.i.us.us.3 - br label %if.end.i.i.us.us.3 - -if.end.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.loopexit, %pregion_for_entry.entry.i.i.us.us.3 - %108 = or i64 %_local_id_x.i.0.us.us.3, 1 - %add1.i.i.i.us.us.3.1 = add nuw nsw i64 %108, %mul.i.i.i - %conv.i.i.us.us.3.1 = trunc i64 %add1.i.i.i.us.us.3.1 to i32 - %cmp4.i.i.us.us.3.1 = icmp sgt i32 %24, %conv.i.i.us.us.3.1 - br i1 %cmp4.i.i.us.us.3.1, label %if.then.i.i.us.us.3.1, label %if.end.i.i.us.us.3.1 - -pregion_for_end.i.i.us.3.loopexit: ; preds = %if.end.i.i.us.us.3.1 - br label %pregion_for_end.i.i.us.3 - -pregion_for_end.i.i.us.3: ; preds = %pregion_for_end.i.i.us.3.loopexit, %pregion_for_end.i.i.us.2 - %109 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.4 = or i32 %109, 4 - %cmp.i.i.us.4 = icmp sgt i32 %20, %conv2.i.i.us.4 - %mul.i.i.us.4 = mul nsw i32 %24, %conv2.i.i.us.4 - %mul8.i.i.us.4 = mul nsw i32 %28, %conv2.i.i.us.4 - %110 = sext i32 %mul8.i.i.us.4 to i64 - br i1 %cmp.i.i.us.4, label %pregion_for_entry.entry.i.i.us.us.4.preheader, label %pregion_for_end.i.i.us.4 - -pregion_for_entry.entry.i.i.us.us.4.preheader: ; preds = %pregion_for_end.i.i.us.3 - br label %pregion_for_entry.entry.i.i.us.us.4 - -pregion_for_entry.entry.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.1, %pregion_for_entry.entry.i.i.us.us.4.preheader - %_local_id_x.i.0.us.us.4 = phi i64 [ %188, %if.end.i.i.us.us.4.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.4.preheader ] - %add1.i.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, %mul.i.i.i - %conv.i.i.us.us.4 = trunc i64 %add1.i.i.i.us.us.4 to i32 - %cmp4.i.i.us.us.4 = icmp sgt i32 %24, %conv.i.i.us.us.4 - br i1 %cmp4.i.i.us.us.4, label %if.then.i.i.us.us.4, label %if.end.i.i.us.us.4 - -if.then.i.i.us.us.4: ; preds = %pregion_for_entry.entry.i.i.us.us.4 - %add.i.i.us.us.4 = add nsw i32 %mul.i.i.us.4, %conv.i.i.us.us.4 - %idxprom.i.i.us.us.4 = sext i32 %add.i.i.us.us.4 to i64 - %arrayidx.i.i.us.us.4 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.us.4 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.4 = shl i64 %add1.i.i.i.us.us.4, 32 - %111 = ashr exact i64 %sext.i.i.us.us.4, 32 - br label %for.body.i.i.us.us.4 - -for.body.i.i.us.us.4: ; preds = %for.body.i.i.us.us.4, %if.then.i.i.us.us.4 - %indvars.iv.next.i.i3.us.us.4 = phi i64 [ %indvars.iv.next.i.i.us.us.4, %for.body.i.i.us.us.4 ], [ 0, %if.then.i.i.us.us.4 ] - %112 = phi float [ %118, %for.body.i.i.us.us.4 ], [ 0.000000e+00, %if.then.i.i.us.us.4 ] - %113 = add nsw i64 %indvars.iv.next.i.i3.us.us.4, %110 - %arrayidx11.i.i.us.us.4 = getelementptr inbounds float, float* %12, i64 %113 - %114 = load float, float* %arrayidx11.i.i.us.us.4, align 4, !tbaa !12 - %mul12.i.i.us.us.4 = fmul float %32, %114 - %115 = mul nsw i64 %indvars.iv.next.i.i3.us.us.4, %33 - %116 = add nsw i64 %115, %111 - %arrayidx16.i.i.us.us.4 = getelementptr inbounds float, float* %16, i64 %116 - %117 = load float, float* %arrayidx16.i.i.us.us.4, align 4, !tbaa !12 - %118 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.4, float %117, float %112) #2 - store float %118, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.4 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.4, 1 - %exitcond.not.i.i.us.us.4 = icmp eq i64 %indvars.iv.next.i.i.us.us.4, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.4, label %if.end.i.i.us.us.4.loopexit, label %for.body.i.i.us.us.4, !llvm.loop !19 - -if.end.i.i.us.us.4.loopexit: ; preds = %for.body.i.i.us.us.4 - br label %if.end.i.i.us.us.4 - -if.end.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.loopexit, %pregion_for_entry.entry.i.i.us.us.4 - %119 = or i64 %_local_id_x.i.0.us.us.4, 1 - %add1.i.i.i.us.us.4.1 = add nuw nsw i64 %119, %mul.i.i.i - %conv.i.i.us.us.4.1 = trunc i64 %add1.i.i.i.us.us.4.1 to i32 - %cmp4.i.i.us.us.4.1 = icmp sgt i32 %24, %conv.i.i.us.us.4.1 - br i1 %cmp4.i.i.us.us.4.1, label %if.then.i.i.us.us.4.1, label %if.end.i.i.us.us.4.1 - -pregion_for_end.i.i.us.4.loopexit: ; preds = %if.end.i.i.us.us.4.1 - br label %pregion_for_end.i.i.us.4 - -pregion_for_end.i.i.us.4: ; preds = %pregion_for_end.i.i.us.4.loopexit, %pregion_for_end.i.i.us.3 - %120 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.5 = or i32 %120, 5 - %cmp.i.i.us.5 = icmp sgt i32 %20, %conv2.i.i.us.5 - %mul.i.i.us.5 = mul nsw i32 %24, %conv2.i.i.us.5 - %mul8.i.i.us.5 = mul nsw i32 %28, %conv2.i.i.us.5 - %121 = sext i32 %mul8.i.i.us.5 to i64 - br i1 %cmp.i.i.us.5, label %pregion_for_entry.entry.i.i.us.us.5.preheader, label %pregion_for_end.i.i.us.5 - -pregion_for_entry.entry.i.i.us.us.5.preheader: ; preds = %pregion_for_end.i.i.us.4 - br label %pregion_for_entry.entry.i.i.us.us.5 - -pregion_for_entry.entry.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.1, %pregion_for_entry.entry.i.i.us.us.5.preheader - %_local_id_x.i.0.us.us.5 = phi i64 [ %179, %if.end.i.i.us.us.5.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.5.preheader ] - %add1.i.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, %mul.i.i.i - %conv.i.i.us.us.5 = trunc i64 %add1.i.i.i.us.us.5 to i32 - %cmp4.i.i.us.us.5 = icmp sgt i32 %24, %conv.i.i.us.us.5 - br i1 %cmp4.i.i.us.us.5, label %if.then.i.i.us.us.5, label %if.end.i.i.us.us.5 - -if.then.i.i.us.us.5: ; preds = %pregion_for_entry.entry.i.i.us.us.5 - %add.i.i.us.us.5 = add nsw i32 %mul.i.i.us.5, %conv.i.i.us.us.5 - %idxprom.i.i.us.us.5 = sext i32 %add.i.i.us.us.5 to i64 - %arrayidx.i.i.us.us.5 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.us.5 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.5 = shl i64 %add1.i.i.i.us.us.5, 32 - %122 = ashr exact i64 %sext.i.i.us.us.5, 32 - br label %for.body.i.i.us.us.5 - -for.body.i.i.us.us.5: ; preds = %for.body.i.i.us.us.5, %if.then.i.i.us.us.5 - %indvars.iv.next.i.i3.us.us.5 = phi i64 [ %indvars.iv.next.i.i.us.us.5, %for.body.i.i.us.us.5 ], [ 0, %if.then.i.i.us.us.5 ] - %123 = phi float [ %129, %for.body.i.i.us.us.5 ], [ 0.000000e+00, %if.then.i.i.us.us.5 ] - %124 = add nsw i64 %indvars.iv.next.i.i3.us.us.5, %121 - %arrayidx11.i.i.us.us.5 = getelementptr inbounds float, float* %12, i64 %124 - %125 = load float, float* %arrayidx11.i.i.us.us.5, align 4, !tbaa !12 - %mul12.i.i.us.us.5 = fmul float %32, %125 - %126 = mul nsw i64 %indvars.iv.next.i.i3.us.us.5, %33 - %127 = add nsw i64 %126, %122 - %arrayidx16.i.i.us.us.5 = getelementptr inbounds float, float* %16, i64 %127 - %128 = load float, float* %arrayidx16.i.i.us.us.5, align 4, !tbaa !12 - %129 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.5, float %128, float %123) #2 - store float %129, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.5 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.5, 1 - %exitcond.not.i.i.us.us.5 = icmp eq i64 %indvars.iv.next.i.i.us.us.5, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.5, label %if.end.i.i.us.us.5.loopexit, label %for.body.i.i.us.us.5, !llvm.loop !19 - -if.end.i.i.us.us.5.loopexit: ; preds = %for.body.i.i.us.us.5 - br label %if.end.i.i.us.us.5 - -if.end.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.loopexit, %pregion_for_entry.entry.i.i.us.us.5 - %130 = or i64 %_local_id_x.i.0.us.us.5, 1 - %add1.i.i.i.us.us.5.1 = add nuw nsw i64 %130, %mul.i.i.i - %conv.i.i.us.us.5.1 = trunc i64 %add1.i.i.i.us.us.5.1 to i32 - %cmp4.i.i.us.us.5.1 = icmp sgt i32 %24, %conv.i.i.us.us.5.1 - br i1 %cmp4.i.i.us.us.5.1, label %if.then.i.i.us.us.5.1, label %if.end.i.i.us.us.5.1 - -pregion_for_end.i.i.us.5.loopexit: ; preds = %if.end.i.i.us.us.5.1 - br label %pregion_for_end.i.i.us.5 - -pregion_for_end.i.i.us.5: ; preds = %pregion_for_end.i.i.us.5.loopexit, %pregion_for_end.i.i.us.4 - %131 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.6 = or i32 %131, 6 - %cmp.i.i.us.6 = icmp sgt i32 %20, %conv2.i.i.us.6 - %mul.i.i.us.6 = mul nsw i32 %24, %conv2.i.i.us.6 - %mul8.i.i.us.6 = mul nsw i32 %28, %conv2.i.i.us.6 - %132 = sext i32 %mul8.i.i.us.6 to i64 - br i1 %cmp.i.i.us.6, label %pregion_for_entry.entry.i.i.us.us.6.preheader, label %pregion_for_end.i.i.us.6 - -pregion_for_entry.entry.i.i.us.us.6.preheader: ; preds = %pregion_for_end.i.i.us.5 - br label %pregion_for_entry.entry.i.i.us.us.6 - -pregion_for_entry.entry.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.1, %pregion_for_entry.entry.i.i.us.us.6.preheader - %_local_id_x.i.0.us.us.6 = phi i64 [ %170, %if.end.i.i.us.us.6.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.6.preheader ] - %add1.i.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, %mul.i.i.i - %conv.i.i.us.us.6 = trunc i64 %add1.i.i.i.us.us.6 to i32 - %cmp4.i.i.us.us.6 = icmp sgt i32 %24, %conv.i.i.us.us.6 - br i1 %cmp4.i.i.us.us.6, label %if.then.i.i.us.us.6, label %if.end.i.i.us.us.6 - -if.then.i.i.us.us.6: ; preds = %pregion_for_entry.entry.i.i.us.us.6 - %add.i.i.us.us.6 = add nsw i32 %mul.i.i.us.6, %conv.i.i.us.us.6 - %idxprom.i.i.us.us.6 = sext i32 %add.i.i.us.us.6 to i64 - %arrayidx.i.i.us.us.6 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.us.6 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.6 = shl i64 %add1.i.i.i.us.us.6, 32 - %133 = ashr exact i64 %sext.i.i.us.us.6, 32 - br label %for.body.i.i.us.us.6 - -for.body.i.i.us.us.6: ; preds = %for.body.i.i.us.us.6, %if.then.i.i.us.us.6 - %indvars.iv.next.i.i3.us.us.6 = phi i64 [ %indvars.iv.next.i.i.us.us.6, %for.body.i.i.us.us.6 ], [ 0, %if.then.i.i.us.us.6 ] - %134 = phi float [ %140, %for.body.i.i.us.us.6 ], [ 0.000000e+00, %if.then.i.i.us.us.6 ] - %135 = add nsw i64 %indvars.iv.next.i.i3.us.us.6, %132 - %arrayidx11.i.i.us.us.6 = getelementptr inbounds float, float* %12, i64 %135 - %136 = load float, float* %arrayidx11.i.i.us.us.6, align 4, !tbaa !12 - %mul12.i.i.us.us.6 = fmul float %32, %136 - %137 = mul nsw i64 %indvars.iv.next.i.i3.us.us.6, %33 - %138 = add nsw i64 %137, %133 - %arrayidx16.i.i.us.us.6 = getelementptr inbounds float, float* %16, i64 %138 - %139 = load float, float* %arrayidx16.i.i.us.us.6, align 4, !tbaa !12 - %140 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.6, float %139, float %134) #2 - store float %140, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.6 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.6, 1 - %exitcond.not.i.i.us.us.6 = icmp eq i64 %indvars.iv.next.i.i.us.us.6, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.6, label %if.end.i.i.us.us.6.loopexit, label %for.body.i.i.us.us.6, !llvm.loop !19 - -if.end.i.i.us.us.6.loopexit: ; preds = %for.body.i.i.us.us.6 - br label %if.end.i.i.us.us.6 - -if.end.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.loopexit, %pregion_for_entry.entry.i.i.us.us.6 - %141 = or i64 %_local_id_x.i.0.us.us.6, 1 - %add1.i.i.i.us.us.6.1 = add nuw nsw i64 %141, %mul.i.i.i - %conv.i.i.us.us.6.1 = trunc i64 %add1.i.i.i.us.us.6.1 to i32 - %cmp4.i.i.us.us.6.1 = icmp sgt i32 %24, %conv.i.i.us.us.6.1 - br i1 %cmp4.i.i.us.us.6.1, label %if.then.i.i.us.us.6.1, label %if.end.i.i.us.us.6.1 - -pregion_for_end.i.i.us.6.loopexit: ; preds = %if.end.i.i.us.us.6.1 - br label %pregion_for_end.i.i.us.6 - -pregion_for_end.i.i.us.6: ; preds = %pregion_for_end.i.i.us.6.loopexit, %pregion_for_end.i.i.us.5 - %142 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.7 = or i32 %142, 7 - %cmp.i.i.us.7 = icmp sgt i32 %20, %conv2.i.i.us.7 - %mul.i.i.us.7 = mul nsw i32 %24, %conv2.i.i.us.7 - %mul8.i.i.us.7 = mul nsw i32 %28, %conv2.i.i.us.7 - %143 = sext i32 %mul8.i.i.us.7 to i64 - br i1 %cmp.i.i.us.7, label %pregion_for_entry.entry.i.i.us.us.7.preheader, label %_pocl_kernel_mm2_kernel1.exit - -pregion_for_entry.entry.i.i.us.us.7.preheader: ; preds = %pregion_for_end.i.i.us.6 - br label %pregion_for_entry.entry.i.i.us.us.7 - -pregion_for_entry.entry.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.1, %pregion_for_entry.entry.i.i.us.us.7.preheader - %_local_id_x.i.0.us.us.7 = phi i64 [ %161, %if.end.i.i.us.us.7.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.7.preheader ] - %add1.i.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, %mul.i.i.i - %conv.i.i.us.us.7 = trunc i64 %add1.i.i.i.us.us.7 to i32 - %cmp4.i.i.us.us.7 = icmp sgt i32 %24, %conv.i.i.us.us.7 - br i1 %cmp4.i.i.us.us.7, label %if.then.i.i.us.us.7, label %if.end.i.i.us.us.7 - -if.then.i.i.us.us.7: ; preds = %pregion_for_entry.entry.i.i.us.us.7 - %add.i.i.us.us.7 = add nsw i32 %mul.i.i.us.7, %conv.i.i.us.us.7 - %idxprom.i.i.us.us.7 = sext i32 %add.i.i.us.us.7 to i64 - %arrayidx.i.i.us.us.7 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.us.7 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.7 = shl i64 %add1.i.i.i.us.us.7, 32 - %144 = ashr exact i64 %sext.i.i.us.us.7, 32 - br label %for.body.i.i.us.us.7 - -for.body.i.i.us.us.7: ; preds = %for.body.i.i.us.us.7, %if.then.i.i.us.us.7 - %indvars.iv.next.i.i3.us.us.7 = phi i64 [ %indvars.iv.next.i.i.us.us.7, %for.body.i.i.us.us.7 ], [ 0, %if.then.i.i.us.us.7 ] - %145 = phi float [ %151, %for.body.i.i.us.us.7 ], [ 0.000000e+00, %if.then.i.i.us.us.7 ] - %146 = add nsw i64 %indvars.iv.next.i.i3.us.us.7, %143 - %arrayidx11.i.i.us.us.7 = getelementptr inbounds float, float* %12, i64 %146 - %147 = load float, float* %arrayidx11.i.i.us.us.7, align 4, !tbaa !12 - %mul12.i.i.us.us.7 = fmul float %32, %147 - %148 = mul nsw i64 %indvars.iv.next.i.i3.us.us.7, %33 - %149 = add nsw i64 %148, %144 - %arrayidx16.i.i.us.us.7 = getelementptr inbounds float, float* %16, i64 %149 - %150 = load float, float* %arrayidx16.i.i.us.us.7, align 4, !tbaa !12 - %151 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.7, float %150, float %145) #2 - store float %151, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.7 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.7, 1 - %exitcond.not.i.i.us.us.7 = icmp eq i64 %indvars.iv.next.i.i.us.us.7, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.7, label %if.end.i.i.us.us.7.loopexit, label %for.body.i.i.us.us.7, !llvm.loop !19 - -if.end.i.i.us.us.7.loopexit: ; preds = %for.body.i.i.us.us.7 - br label %if.end.i.i.us.us.7 - -if.end.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.loopexit, %pregion_for_entry.entry.i.i.us.us.7 - %152 = or i64 %_local_id_x.i.0.us.us.7, 1 - %add1.i.i.i.us.us.7.1 = add nuw nsw i64 %152, %mul.i.i.i - %conv.i.i.us.us.7.1 = trunc i64 %add1.i.i.i.us.us.7.1 to i32 - %cmp4.i.i.us.us.7.1 = icmp sgt i32 %24, %conv.i.i.us.us.7.1 - br i1 %cmp4.i.i.us.us.7.1, label %if.then.i.i.us.us.7.1, label %if.end.i.i.us.us.7.1 - -if.then.i.i.us.1: ; preds = %if.end.i.i.us - %add.i.i.us.1 = add nsw i32 %mul.i.i, %conv.i.i.us.1 - %idxprom.i.i.us.1 = sext i32 %add.i.i.us.1 to i64 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1 - -if.end.i.i.us.1: ; preds = %if.then.i.i.us.1, %if.end.i.i.us - br i1 %cmp4.i.i.us.2, label %if.then.i.i.us.2, label %if.end.i.i.us.2 - -if.then.i.i.us.2: ; preds = %if.end.i.i.us.1 - %add.i.i.us.2 = add nsw i32 %mul.i.i, %conv.i.i.us.2 - %idxprom.i.i.us.2 = sext i32 %add.i.i.us.2 to i64 - %arrayidx.i.i.us.2 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.2 - store float 0.000000e+00, float* %arrayidx.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2 - -if.end.i.i.us.2: ; preds = %if.then.i.i.us.2, %if.end.i.i.us.1 - br i1 %cmp4.i.i.us.3, label %if.then.i.i.us.3, label %if.end.i.i.us.3 - -if.then.i.i.us.3: ; preds = %if.end.i.i.us.2 - %add.i.i.us.3 = add nsw i32 %mul.i.i, %conv.i.i.us.3 - %idxprom.i.i.us.3 = sext i32 %add.i.i.us.3 to i64 - %arrayidx.i.i.us.3 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.3 - store float 0.000000e+00, float* %arrayidx.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3 - -if.end.i.i.us.3: ; preds = %if.then.i.i.us.3, %if.end.i.i.us.2 - br i1 %cmp4.i.i.us.4, label %if.then.i.i.us.4, label %if.end.i.i.us.4 - -if.then.i.i.us.4: ; preds = %if.end.i.i.us.3 - %add.i.i.us.4 = add nsw i32 %mul.i.i, %conv.i.i.us.4 - %idxprom.i.i.us.4 = sext i32 %add.i.i.us.4 to i64 - %arrayidx.i.i.us.4 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.4 - store float 0.000000e+00, float* %arrayidx.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4 - -if.end.i.i.us.4: ; preds = %if.then.i.i.us.4, %if.end.i.i.us.3 - br i1 %cmp4.i.i.us.5, label %if.then.i.i.us.5, label %if.end.i.i.us.5 - -if.then.i.i.us.5: ; preds = %if.end.i.i.us.4 - %add.i.i.us.5 = add nsw i32 %mul.i.i, %conv.i.i.us.5 - %idxprom.i.i.us.5 = sext i32 %add.i.i.us.5 to i64 - %arrayidx.i.i.us.5 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.5 - store float 0.000000e+00, float* %arrayidx.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5 - -if.end.i.i.us.5: ; preds = %if.then.i.i.us.5, %if.end.i.i.us.4 - br i1 %cmp4.i.i.us.6, label %if.then.i.i.us.6, label %if.end.i.i.us.6 - -if.then.i.i.us.6: ; preds = %if.end.i.i.us.5 - %add.i.i.us.6 = add nsw i32 %mul.i.i, %conv.i.i.us.6 - %idxprom.i.i.us.6 = sext i32 %add.i.i.us.6 to i64 - %arrayidx.i.i.us.6 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.6 - store float 0.000000e+00, float* %arrayidx.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6 - -if.end.i.i.us.6: ; preds = %if.then.i.i.us.6, %if.end.i.i.us.5 - br i1 %cmp4.i.i.us.7, label %if.then.i.i.us.7, label %if.end.i.i.us.7 - -if.then.i.i.us.7: ; preds = %if.end.i.i.us.6 - %add.i.i.us.7 = add nsw i32 %mul.i.i, %conv.i.i.us.7 - %idxprom.i.i.us.7 = sext i32 %add.i.i.us.7 to i64 - %arrayidx.i.i.us.7 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.7 - store float 0.000000e+00, float* %arrayidx.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7 - -if.end.i.i.us.7: ; preds = %if.then.i.i.us.7, %if.end.i.i.us.6 - br i1 %cmp4.i.i.us.8, label %if.then.i.i.us.8, label %if.end.i.i.us.8 - -if.then.i.i.us.8: ; preds = %if.end.i.i.us.7 - %add.i.i.us.8 = add nsw i32 %mul.i.i, %conv.i.i.us.8 - %idxprom.i.i.us.8 = sext i32 %add.i.i.us.8 to i64 - %arrayidx.i.i.us.8 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.8 - store float 0.000000e+00, float* %arrayidx.i.i.us.8, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.8 - -if.end.i.i.us.8: ; preds = %if.then.i.i.us.8, %if.end.i.i.us.7 - br i1 %cmp4.i.i.us.9, label %if.then.i.i.us.9, label %if.end.i.i.us.9 - -if.then.i.i.us.9: ; preds = %if.end.i.i.us.8 - %add.i.i.us.9 = add nsw i32 %mul.i.i, %conv.i.i.us.9 - %idxprom.i.i.us.9 = sext i32 %add.i.i.us.9 to i64 - %arrayidx.i.i.us.9 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.9 - store float 0.000000e+00, float* %arrayidx.i.i.us.9, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.9 - -if.end.i.i.us.9: ; preds = %if.then.i.i.us.9, %if.end.i.i.us.8 - br i1 %cmp4.i.i.us.10, label %if.then.i.i.us.10, label %if.end.i.i.us.10 - -if.then.i.i.us.10: ; preds = %if.end.i.i.us.9 - %add.i.i.us.10 = add nsw i32 %mul.i.i, %conv.i.i.us.10 - %idxprom.i.i.us.10 = sext i32 %add.i.i.us.10 to i64 - %arrayidx.i.i.us.10 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.10 - store float 0.000000e+00, float* %arrayidx.i.i.us.10, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.10 - -if.end.i.i.us.10: ; preds = %if.then.i.i.us.10, %if.end.i.i.us.9 - br i1 %cmp4.i.i.us.11, label %if.then.i.i.us.11, label %if.end.i.i.us.11 - -if.then.i.i.us.11: ; preds = %if.end.i.i.us.10 - %add.i.i.us.11 = add nsw i32 %mul.i.i, %conv.i.i.us.11 - %idxprom.i.i.us.11 = sext i32 %add.i.i.us.11 to i64 - %arrayidx.i.i.us.11 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.11 - store float 0.000000e+00, float* %arrayidx.i.i.us.11, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.11 - -if.end.i.i.us.11: ; preds = %if.then.i.i.us.11, %if.end.i.i.us.10 - br i1 %cmp4.i.i.us.12, label %if.then.i.i.us.12, label %if.end.i.i.us.12 - -if.then.i.i.us.12: ; preds = %if.end.i.i.us.11 - %add.i.i.us.12 = add nsw i32 %mul.i.i, %conv.i.i.us.12 - %idxprom.i.i.us.12 = sext i32 %add.i.i.us.12 to i64 - %arrayidx.i.i.us.12 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.12 - store float 0.000000e+00, float* %arrayidx.i.i.us.12, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.12 - -if.end.i.i.us.12: ; preds = %if.then.i.i.us.12, %if.end.i.i.us.11 - br i1 %cmp4.i.i.us.13, label %if.then.i.i.us.13, label %if.end.i.i.us.13 - -if.then.i.i.us.13: ; preds = %if.end.i.i.us.12 - %add.i.i.us.13 = add nsw i32 %mul.i.i, %conv.i.i.us.13 - %idxprom.i.i.us.13 = sext i32 %add.i.i.us.13 to i64 - %arrayidx.i.i.us.13 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.13 - store float 0.000000e+00, float* %arrayidx.i.i.us.13, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.13 - -if.end.i.i.us.13: ; preds = %if.then.i.i.us.13, %if.end.i.i.us.12 - br i1 %cmp4.i.i.us.14, label %if.then.i.i.us.14, label %if.end.i.i.us.14 - -if.then.i.i.us.14: ; preds = %if.end.i.i.us.13 - %add.i.i.us.14 = add nsw i32 %mul.i.i, %conv.i.i.us.14 - %idxprom.i.i.us.14 = sext i32 %add.i.i.us.14 to i64 - %arrayidx.i.i.us.14 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.14 - store float 0.000000e+00, float* %arrayidx.i.i.us.14, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.14 - -if.end.i.i.us.14: ; preds = %if.then.i.i.us.14, %if.end.i.i.us.13 - br i1 %cmp4.i.i.us.15, label %if.then.i.i.us.15, label %if.end.i.i.us.15 - -if.then.i.i.us.15: ; preds = %if.end.i.i.us.14 - %add.i.i.us.15 = add nsw i32 %mul.i.i, %conv.i.i.us.15 - %idxprom.i.i.us.15 = sext i32 %add.i.i.us.15 to i64 - %arrayidx.i.i.us.15 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.15 - store float 0.000000e+00, float* %arrayidx.i.i.us.15, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.15 - -if.end.i.i.us.15: ; preds = %if.then.i.i.us.15, %if.end.i.i.us.14 - br i1 %cmp4.i.i.us.16, label %if.then.i.i.us.16, label %if.end.i.i.us.16 - -if.then.i.i.us.16: ; preds = %if.end.i.i.us.15 - %add.i.i.us.16 = add nsw i32 %mul.i.i, %conv.i.i.us.16 - %idxprom.i.i.us.16 = sext i32 %add.i.i.us.16 to i64 - %arrayidx.i.i.us.16 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.16 - store float 0.000000e+00, float* %arrayidx.i.i.us.16, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.16 - -if.end.i.i.us.16: ; preds = %if.then.i.i.us.16, %if.end.i.i.us.15 - br i1 %cmp4.i.i.us.17, label %if.then.i.i.us.17, label %if.end.i.i.us.17 - -if.then.i.i.us.17: ; preds = %if.end.i.i.us.16 - %add.i.i.us.17 = add nsw i32 %mul.i.i, %conv.i.i.us.17 - %idxprom.i.i.us.17 = sext i32 %add.i.i.us.17 to i64 - %arrayidx.i.i.us.17 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.17 - store float 0.000000e+00, float* %arrayidx.i.i.us.17, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.17 - -if.end.i.i.us.17: ; preds = %if.then.i.i.us.17, %if.end.i.i.us.16 - br i1 %cmp4.i.i.us.18, label %if.then.i.i.us.18, label %if.end.i.i.us.18 - -if.then.i.i.us.18: ; preds = %if.end.i.i.us.17 - %add.i.i.us.18 = add nsw i32 %mul.i.i, %conv.i.i.us.18 - %idxprom.i.i.us.18 = sext i32 %add.i.i.us.18 to i64 - %arrayidx.i.i.us.18 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.18 - store float 0.000000e+00, float* %arrayidx.i.i.us.18, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.18 - -if.end.i.i.us.18: ; preds = %if.then.i.i.us.18, %if.end.i.i.us.17 - br i1 %cmp4.i.i.us.19, label %if.then.i.i.us.19, label %if.end.i.i.us.19 - -if.then.i.i.us.19: ; preds = %if.end.i.i.us.18 - %add.i.i.us.19 = add nsw i32 %mul.i.i, %conv.i.i.us.19 - %idxprom.i.i.us.19 = sext i32 %add.i.i.us.19 to i64 - %arrayidx.i.i.us.19 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.19 - store float 0.000000e+00, float* %arrayidx.i.i.us.19, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.19 - -if.end.i.i.us.19: ; preds = %if.then.i.i.us.19, %if.end.i.i.us.18 - br i1 %cmp4.i.i.us.20, label %if.then.i.i.us.20, label %if.end.i.i.us.20 - -if.then.i.i.us.20: ; preds = %if.end.i.i.us.19 - %add.i.i.us.20 = add nsw i32 %mul.i.i, %conv.i.i.us.20 - %idxprom.i.i.us.20 = sext i32 %add.i.i.us.20 to i64 - %arrayidx.i.i.us.20 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.20 - store float 0.000000e+00, float* %arrayidx.i.i.us.20, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.20 - -if.end.i.i.us.20: ; preds = %if.then.i.i.us.20, %if.end.i.i.us.19 - br i1 %cmp4.i.i.us.21, label %if.then.i.i.us.21, label %if.end.i.i.us.21 - -if.then.i.i.us.21: ; preds = %if.end.i.i.us.20 - %add.i.i.us.21 = add nsw i32 %mul.i.i, %conv.i.i.us.21 - %idxprom.i.i.us.21 = sext i32 %add.i.i.us.21 to i64 - %arrayidx.i.i.us.21 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.21 - store float 0.000000e+00, float* %arrayidx.i.i.us.21, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.21 - -if.end.i.i.us.21: ; preds = %if.then.i.i.us.21, %if.end.i.i.us.20 - br i1 %cmp4.i.i.us.22, label %if.then.i.i.us.22, label %if.end.i.i.us.22 - -if.then.i.i.us.22: ; preds = %if.end.i.i.us.21 - %add.i.i.us.22 = add nsw i32 %mul.i.i, %conv.i.i.us.22 - %idxprom.i.i.us.22 = sext i32 %add.i.i.us.22 to i64 - %arrayidx.i.i.us.22 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.22 - store float 0.000000e+00, float* %arrayidx.i.i.us.22, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.22 - -if.end.i.i.us.22: ; preds = %if.then.i.i.us.22, %if.end.i.i.us.21 - br i1 %cmp4.i.i.us.23, label %if.then.i.i.us.23, label %if.end.i.i.us.23 - -if.then.i.i.us.23: ; preds = %if.end.i.i.us.22 - %add.i.i.us.23 = add nsw i32 %mul.i.i, %conv.i.i.us.23 - %idxprom.i.i.us.23 = sext i32 %add.i.i.us.23 to i64 - %arrayidx.i.i.us.23 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.23 - store float 0.000000e+00, float* %arrayidx.i.i.us.23, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.23 - -if.end.i.i.us.23: ; preds = %if.then.i.i.us.23, %if.end.i.i.us.22 - br i1 %cmp4.i.i.us.24, label %if.then.i.i.us.24, label %if.end.i.i.us.24 - -if.then.i.i.us.24: ; preds = %if.end.i.i.us.23 - %add.i.i.us.24 = add nsw i32 %mul.i.i, %conv.i.i.us.24 - %idxprom.i.i.us.24 = sext i32 %add.i.i.us.24 to i64 - %arrayidx.i.i.us.24 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.24 - store float 0.000000e+00, float* %arrayidx.i.i.us.24, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.24 - -if.end.i.i.us.24: ; preds = %if.then.i.i.us.24, %if.end.i.i.us.23 - br i1 %cmp4.i.i.us.25, label %if.then.i.i.us.25, label %if.end.i.i.us.25 - -if.then.i.i.us.25: ; preds = %if.end.i.i.us.24 - %add.i.i.us.25 = add nsw i32 %mul.i.i, %conv.i.i.us.25 - %idxprom.i.i.us.25 = sext i32 %add.i.i.us.25 to i64 - %arrayidx.i.i.us.25 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.25 - store float 0.000000e+00, float* %arrayidx.i.i.us.25, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.25 - -if.end.i.i.us.25: ; preds = %if.then.i.i.us.25, %if.end.i.i.us.24 - br i1 %cmp4.i.i.us.26, label %if.then.i.i.us.26, label %if.end.i.i.us.26 - -if.then.i.i.us.26: ; preds = %if.end.i.i.us.25 - %add.i.i.us.26 = add nsw i32 %mul.i.i, %conv.i.i.us.26 - %idxprom.i.i.us.26 = sext i32 %add.i.i.us.26 to i64 - %arrayidx.i.i.us.26 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.26 - store float 0.000000e+00, float* %arrayidx.i.i.us.26, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.26 - -if.end.i.i.us.26: ; preds = %if.then.i.i.us.26, %if.end.i.i.us.25 - br i1 %cmp4.i.i.us.27, label %if.then.i.i.us.27, label %if.end.i.i.us.27 - -if.then.i.i.us.27: ; preds = %if.end.i.i.us.26 - %add.i.i.us.27 = add nsw i32 %mul.i.i, %conv.i.i.us.27 - %idxprom.i.i.us.27 = sext i32 %add.i.i.us.27 to i64 - %arrayidx.i.i.us.27 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.27 - store float 0.000000e+00, float* %arrayidx.i.i.us.27, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.27 - -if.end.i.i.us.27: ; preds = %if.then.i.i.us.27, %if.end.i.i.us.26 - br i1 %cmp4.i.i.us.28, label %if.then.i.i.us.28, label %if.end.i.i.us.28 - -if.then.i.i.us.28: ; preds = %if.end.i.i.us.27 - %add.i.i.us.28 = add nsw i32 %mul.i.i, %conv.i.i.us.28 - %idxprom.i.i.us.28 = sext i32 %add.i.i.us.28 to i64 - %arrayidx.i.i.us.28 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.28 - store float 0.000000e+00, float* %arrayidx.i.i.us.28, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.28 - -if.end.i.i.us.28: ; preds = %if.then.i.i.us.28, %if.end.i.i.us.27 - br i1 %cmp4.i.i.us.29, label %if.then.i.i.us.29, label %if.end.i.i.us.29 - -if.then.i.i.us.29: ; preds = %if.end.i.i.us.28 - %add.i.i.us.29 = add nsw i32 %mul.i.i, %conv.i.i.us.29 - %idxprom.i.i.us.29 = sext i32 %add.i.i.us.29 to i64 - %arrayidx.i.i.us.29 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.29 - store float 0.000000e+00, float* %arrayidx.i.i.us.29, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.29 - -if.end.i.i.us.29: ; preds = %if.then.i.i.us.29, %if.end.i.i.us.28 - br i1 %cmp4.i.i.us.30, label %if.then.i.i.us.30, label %if.end.i.i.us.30 - -if.then.i.i.us.30: ; preds = %if.end.i.i.us.29 - %add.i.i.us.30 = add nsw i32 %mul.i.i, %conv.i.i.us.30 - %idxprom.i.i.us.30 = sext i32 %add.i.i.us.30 to i64 - %arrayidx.i.i.us.30 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.30 - store float 0.000000e+00, float* %arrayidx.i.i.us.30, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.30 - -if.end.i.i.us.30: ; preds = %if.then.i.i.us.30, %if.end.i.i.us.29 - br i1 %cmp4.i.i.us.31, label %if.then.i.i.us.31, label %pregion_for_end.i.i - -if.then.i.i.us.31: ; preds = %if.end.i.i.us.30 - %add.i.i.us.31 = add nsw i32 %mul.i.i, %conv.i.i.us.31 - %idxprom.i.i.us.31 = sext i32 %add.i.i.us.31 to i64 - %arrayidx.i.i.us.31 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.31 - store float 0.000000e+00, float* %arrayidx.i.i.us.31, align 4, !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i - -if.then.i.i.us.us.7.1: ; preds = %if.end.i.i.us.us.7 - %add.i.i.us.us.7.1 = add nsw i32 %mul.i.i.us.7, %conv.i.i.us.us.7.1 - %idxprom.i.i.us.us.7.1 = sext i32 %add.i.i.us.us.7.1 to i64 - %arrayidx.i.i.us.us.7.1 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.us.7.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.7.1 = shl i64 %add1.i.i.i.us.us.7.1, 32 - %153 = ashr exact i64 %sext.i.i.us.us.7.1, 32 - br label %for.body.i.i.us.us.7.1 - -for.body.i.i.us.us.7.1: ; preds = %for.body.i.i.us.us.7.1, %if.then.i.i.us.us.7.1 - %indvars.iv.next.i.i3.us.us.7.1 = phi i64 [ %indvars.iv.next.i.i.us.us.7.1, %for.body.i.i.us.us.7.1 ], [ 0, %if.then.i.i.us.us.7.1 ] - %154 = phi float [ %160, %for.body.i.i.us.us.7.1 ], [ 0.000000e+00, %if.then.i.i.us.us.7.1 ] - %155 = add nsw i64 %indvars.iv.next.i.i3.us.us.7.1, %143 - %arrayidx11.i.i.us.us.7.1 = getelementptr inbounds float, float* %12, i64 %155 - %156 = load float, float* %arrayidx11.i.i.us.us.7.1, align 4, !tbaa !12 - %mul12.i.i.us.us.7.1 = fmul float %32, %156 - %157 = mul nsw i64 %indvars.iv.next.i.i3.us.us.7.1, %33 - %158 = add nsw i64 %157, %153 - %arrayidx16.i.i.us.us.7.1 = getelementptr inbounds float, float* %16, i64 %158 - %159 = load float, float* %arrayidx16.i.i.us.us.7.1, align 4, !tbaa !12 - %160 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.7.1, float %159, float %154) #2 - store float %160, float* %arrayidx.i.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.7.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.7.1, 1 - %exitcond.not.i.i.us.us.7.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.7.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.7.1, label %if.end.i.i.us.us.7.1.loopexit, label %for.body.i.i.us.us.7.1, !llvm.loop !19 - -if.end.i.i.us.us.7.1.loopexit: ; preds = %for.body.i.i.us.us.7.1 - br label %if.end.i.i.us.us.7.1 - -if.end.i.i.us.us.7.1: ; preds = %if.end.i.i.us.us.7.1.loopexit, %if.end.i.i.us.us.7 - %161 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %161, 32 - br i1 %exitcond.7.not.1, label %_pocl_kernel_mm2_kernel1.exit.loopexit, label %pregion_for_entry.entry.i.i.us.us.7, !llvm.loop !23 - -if.then.i.i.us.us.6.1: ; preds = %if.end.i.i.us.us.6 - %add.i.i.us.us.6.1 = add nsw i32 %mul.i.i.us.6, %conv.i.i.us.us.6.1 - %idxprom.i.i.us.us.6.1 = sext i32 %add.i.i.us.us.6.1 to i64 - %arrayidx.i.i.us.us.6.1 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.us.6.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.6.1 = shl i64 %add1.i.i.i.us.us.6.1, 32 - %162 = ashr exact i64 %sext.i.i.us.us.6.1, 32 - br label %for.body.i.i.us.us.6.1 - -for.body.i.i.us.us.6.1: ; preds = %for.body.i.i.us.us.6.1, %if.then.i.i.us.us.6.1 - %indvars.iv.next.i.i3.us.us.6.1 = phi i64 [ %indvars.iv.next.i.i.us.us.6.1, %for.body.i.i.us.us.6.1 ], [ 0, %if.then.i.i.us.us.6.1 ] - %163 = phi float [ %169, %for.body.i.i.us.us.6.1 ], [ 0.000000e+00, %if.then.i.i.us.us.6.1 ] - %164 = add nsw i64 %indvars.iv.next.i.i3.us.us.6.1, %132 - %arrayidx11.i.i.us.us.6.1 = getelementptr inbounds float, float* %12, i64 %164 - %165 = load float, float* %arrayidx11.i.i.us.us.6.1, align 4, !tbaa !12 - %mul12.i.i.us.us.6.1 = fmul float %32, %165 - %166 = mul nsw i64 %indvars.iv.next.i.i3.us.us.6.1, %33 - %167 = add nsw i64 %166, %162 - %arrayidx16.i.i.us.us.6.1 = getelementptr inbounds float, float* %16, i64 %167 - %168 = load float, float* %arrayidx16.i.i.us.us.6.1, align 4, !tbaa !12 - %169 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.6.1, float %168, float %163) #2 - store float %169, float* %arrayidx.i.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.6.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.6.1, 1 - %exitcond.not.i.i.us.us.6.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.6.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.6.1, label %if.end.i.i.us.us.6.1.loopexit, label %for.body.i.i.us.us.6.1, !llvm.loop !19 - -if.end.i.i.us.us.6.1.loopexit: ; preds = %for.body.i.i.us.us.6.1 - br label %if.end.i.i.us.us.6.1 - -if.end.i.i.us.us.6.1: ; preds = %if.end.i.i.us.us.6.1.loopexit, %if.end.i.i.us.us.6 - %170 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %170, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.i.us.6.loopexit, label %pregion_for_entry.entry.i.i.us.us.6, !llvm.loop !23 - -if.then.i.i.us.us.5.1: ; preds = %if.end.i.i.us.us.5 - %add.i.i.us.us.5.1 = add nsw i32 %mul.i.i.us.5, %conv.i.i.us.us.5.1 - %idxprom.i.i.us.us.5.1 = sext i32 %add.i.i.us.us.5.1 to i64 - %arrayidx.i.i.us.us.5.1 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.us.5.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.5.1 = shl i64 %add1.i.i.i.us.us.5.1, 32 - %171 = ashr exact i64 %sext.i.i.us.us.5.1, 32 - br label %for.body.i.i.us.us.5.1 - -for.body.i.i.us.us.5.1: ; preds = %for.body.i.i.us.us.5.1, %if.then.i.i.us.us.5.1 - %indvars.iv.next.i.i3.us.us.5.1 = phi i64 [ %indvars.iv.next.i.i.us.us.5.1, %for.body.i.i.us.us.5.1 ], [ 0, %if.then.i.i.us.us.5.1 ] - %172 = phi float [ %178, %for.body.i.i.us.us.5.1 ], [ 0.000000e+00, %if.then.i.i.us.us.5.1 ] - %173 = add nsw i64 %indvars.iv.next.i.i3.us.us.5.1, %121 - %arrayidx11.i.i.us.us.5.1 = getelementptr inbounds float, float* %12, i64 %173 - %174 = load float, float* %arrayidx11.i.i.us.us.5.1, align 4, !tbaa !12 - %mul12.i.i.us.us.5.1 = fmul float %32, %174 - %175 = mul nsw i64 %indvars.iv.next.i.i3.us.us.5.1, %33 - %176 = add nsw i64 %175, %171 - %arrayidx16.i.i.us.us.5.1 = getelementptr inbounds float, float* %16, i64 %176 - %177 = load float, float* %arrayidx16.i.i.us.us.5.1, align 4, !tbaa !12 - %178 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.5.1, float %177, float %172) #2 - store float %178, float* %arrayidx.i.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.5.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.5.1, 1 - %exitcond.not.i.i.us.us.5.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.5.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.5.1, label %if.end.i.i.us.us.5.1.loopexit, label %for.body.i.i.us.us.5.1, !llvm.loop !19 - -if.end.i.i.us.us.5.1.loopexit: ; preds = %for.body.i.i.us.us.5.1 - br label %if.end.i.i.us.us.5.1 - -if.end.i.i.us.us.5.1: ; preds = %if.end.i.i.us.us.5.1.loopexit, %if.end.i.i.us.us.5 - %179 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %179, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.i.us.5.loopexit, label %pregion_for_entry.entry.i.i.us.us.5, !llvm.loop !23 - -if.then.i.i.us.us.4.1: ; preds = %if.end.i.i.us.us.4 - %add.i.i.us.us.4.1 = add nsw i32 %mul.i.i.us.4, %conv.i.i.us.us.4.1 - %idxprom.i.i.us.us.4.1 = sext i32 %add.i.i.us.us.4.1 to i64 - %arrayidx.i.i.us.us.4.1 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.us.4.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.4.1 = shl i64 %add1.i.i.i.us.us.4.1, 32 - %180 = ashr exact i64 %sext.i.i.us.us.4.1, 32 - br label %for.body.i.i.us.us.4.1 - -for.body.i.i.us.us.4.1: ; preds = %for.body.i.i.us.us.4.1, %if.then.i.i.us.us.4.1 - %indvars.iv.next.i.i3.us.us.4.1 = phi i64 [ %indvars.iv.next.i.i.us.us.4.1, %for.body.i.i.us.us.4.1 ], [ 0, %if.then.i.i.us.us.4.1 ] - %181 = phi float [ %187, %for.body.i.i.us.us.4.1 ], [ 0.000000e+00, %if.then.i.i.us.us.4.1 ] - %182 = add nsw i64 %indvars.iv.next.i.i3.us.us.4.1, %110 - %arrayidx11.i.i.us.us.4.1 = getelementptr inbounds float, float* %12, i64 %182 - %183 = load float, float* %arrayidx11.i.i.us.us.4.1, align 4, !tbaa !12 - %mul12.i.i.us.us.4.1 = fmul float %32, %183 - %184 = mul nsw i64 %indvars.iv.next.i.i3.us.us.4.1, %33 - %185 = add nsw i64 %184, %180 - %arrayidx16.i.i.us.us.4.1 = getelementptr inbounds float, float* %16, i64 %185 - %186 = load float, float* %arrayidx16.i.i.us.us.4.1, align 4, !tbaa !12 - %187 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.4.1, float %186, float %181) #2 - store float %187, float* %arrayidx.i.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.4.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.4.1, 1 - %exitcond.not.i.i.us.us.4.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.4.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.4.1, label %if.end.i.i.us.us.4.1.loopexit, label %for.body.i.i.us.us.4.1, !llvm.loop !19 - -if.end.i.i.us.us.4.1.loopexit: ; preds = %for.body.i.i.us.us.4.1 - br label %if.end.i.i.us.us.4.1 - -if.end.i.i.us.us.4.1: ; preds = %if.end.i.i.us.us.4.1.loopexit, %if.end.i.i.us.us.4 - %188 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %188, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.i.us.4.loopexit, label %pregion_for_entry.entry.i.i.us.us.4, !llvm.loop !23 - -if.then.i.i.us.us.3.1: ; preds = %if.end.i.i.us.us.3 - %add.i.i.us.us.3.1 = add nsw i32 %mul.i.i.us.3, %conv.i.i.us.us.3.1 - %idxprom.i.i.us.us.3.1 = sext i32 %add.i.i.us.us.3.1 to i64 - %arrayidx.i.i.us.us.3.1 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.us.3.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.3.1 = shl i64 %add1.i.i.i.us.us.3.1, 32 - %189 = ashr exact i64 %sext.i.i.us.us.3.1, 32 - br label %for.body.i.i.us.us.3.1 - -for.body.i.i.us.us.3.1: ; preds = %for.body.i.i.us.us.3.1, %if.then.i.i.us.us.3.1 - %indvars.iv.next.i.i3.us.us.3.1 = phi i64 [ %indvars.iv.next.i.i.us.us.3.1, %for.body.i.i.us.us.3.1 ], [ 0, %if.then.i.i.us.us.3.1 ] - %190 = phi float [ %196, %for.body.i.i.us.us.3.1 ], [ 0.000000e+00, %if.then.i.i.us.us.3.1 ] - %191 = add nsw i64 %indvars.iv.next.i.i3.us.us.3.1, %99 - %arrayidx11.i.i.us.us.3.1 = getelementptr inbounds float, float* %12, i64 %191 - %192 = load float, float* %arrayidx11.i.i.us.us.3.1, align 4, !tbaa !12 - %mul12.i.i.us.us.3.1 = fmul float %32, %192 - %193 = mul nsw i64 %indvars.iv.next.i.i3.us.us.3.1, %33 - %194 = add nsw i64 %193, %189 - %arrayidx16.i.i.us.us.3.1 = getelementptr inbounds float, float* %16, i64 %194 - %195 = load float, float* %arrayidx16.i.i.us.us.3.1, align 4, !tbaa !12 - %196 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.3.1, float %195, float %190) #2 - store float %196, float* %arrayidx.i.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.3.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.3.1, 1 - %exitcond.not.i.i.us.us.3.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.3.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.3.1, label %if.end.i.i.us.us.3.1.loopexit, label %for.body.i.i.us.us.3.1, !llvm.loop !19 - -if.end.i.i.us.us.3.1.loopexit: ; preds = %for.body.i.i.us.us.3.1 - br label %if.end.i.i.us.us.3.1 - -if.end.i.i.us.us.3.1: ; preds = %if.end.i.i.us.us.3.1.loopexit, %if.end.i.i.us.us.3 - %197 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %197, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.i.us.3.loopexit, label %pregion_for_entry.entry.i.i.us.us.3, !llvm.loop !23 - -if.then.i.i.us.us.2.1: ; preds = %if.end.i.i.us.us.2 - %add.i.i.us.us.2.1 = add nsw i32 %mul.i.i.us.2, %conv.i.i.us.us.2.1 - %idxprom.i.i.us.us.2.1 = sext i32 %add.i.i.us.us.2.1 to i64 - %arrayidx.i.i.us.us.2.1 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.us.2.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.2.1 = shl i64 %add1.i.i.i.us.us.2.1, 32 - %198 = ashr exact i64 %sext.i.i.us.us.2.1, 32 - br label %for.body.i.i.us.us.2.1 - -for.body.i.i.us.us.2.1: ; preds = %for.body.i.i.us.us.2.1, %if.then.i.i.us.us.2.1 - %indvars.iv.next.i.i3.us.us.2.1 = phi i64 [ %indvars.iv.next.i.i.us.us.2.1, %for.body.i.i.us.us.2.1 ], [ 0, %if.then.i.i.us.us.2.1 ] - %199 = phi float [ %205, %for.body.i.i.us.us.2.1 ], [ 0.000000e+00, %if.then.i.i.us.us.2.1 ] - %200 = add nsw i64 %indvars.iv.next.i.i3.us.us.2.1, %88 - %arrayidx11.i.i.us.us.2.1 = getelementptr inbounds float, float* %12, i64 %200 - %201 = load float, float* %arrayidx11.i.i.us.us.2.1, align 4, !tbaa !12 - %mul12.i.i.us.us.2.1 = fmul float %32, %201 - %202 = mul nsw i64 %indvars.iv.next.i.i3.us.us.2.1, %33 - %203 = add nsw i64 %202, %198 - %arrayidx16.i.i.us.us.2.1 = getelementptr inbounds float, float* %16, i64 %203 - %204 = load float, float* %arrayidx16.i.i.us.us.2.1, align 4, !tbaa !12 - %205 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.2.1, float %204, float %199) #2 - store float %205, float* %arrayidx.i.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.2.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.2.1, 1 - %exitcond.not.i.i.us.us.2.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.2.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.2.1, label %if.end.i.i.us.us.2.1.loopexit, label %for.body.i.i.us.us.2.1, !llvm.loop !19 - -if.end.i.i.us.us.2.1.loopexit: ; preds = %for.body.i.i.us.us.2.1 - br label %if.end.i.i.us.us.2.1 - -if.end.i.i.us.us.2.1: ; preds = %if.end.i.i.us.us.2.1.loopexit, %if.end.i.i.us.us.2 - %206 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %206, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.i.us.2.loopexit, label %pregion_for_entry.entry.i.i.us.us.2, !llvm.loop !23 - -if.then.i.i.us.us.1.1: ; preds = %if.end.i.i.us.us.1 - %add.i.i.us.us.1.1 = add nsw i32 %mul.i.i.us.1, %conv.i.i.us.us.1.1 - %idxprom.i.i.us.us.1.1 = sext i32 %add.i.i.us.us.1.1 to i64 - %arrayidx.i.i.us.us.1.1 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.us.1.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.1.1 = shl i64 %add1.i.i.i.us.us.1.1, 32 - %207 = ashr exact i64 %sext.i.i.us.us.1.1, 32 - br label %for.body.i.i.us.us.1.1 - -for.body.i.i.us.us.1.1: ; preds = %for.body.i.i.us.us.1.1, %if.then.i.i.us.us.1.1 - %indvars.iv.next.i.i3.us.us.1.1 = phi i64 [ %indvars.iv.next.i.i.us.us.1.1, %for.body.i.i.us.us.1.1 ], [ 0, %if.then.i.i.us.us.1.1 ] - %208 = phi float [ %214, %for.body.i.i.us.us.1.1 ], [ 0.000000e+00, %if.then.i.i.us.us.1.1 ] - %209 = add nsw i64 %indvars.iv.next.i.i3.us.us.1.1, %67 - %arrayidx11.i.i.us.us.1.1 = getelementptr inbounds float, float* %12, i64 %209 - %210 = load float, float* %arrayidx11.i.i.us.us.1.1, align 4, !tbaa !12 - %mul12.i.i.us.us.1.1 = fmul float %32, %210 - %211 = mul nsw i64 %indvars.iv.next.i.i3.us.us.1.1, %33 - %212 = add nsw i64 %211, %207 - %arrayidx16.i.i.us.us.1.1 = getelementptr inbounds float, float* %16, i64 %212 - %213 = load float, float* %arrayidx16.i.i.us.us.1.1, align 4, !tbaa !12 - %214 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.1.1, float %213, float %208) #2 - store float %214, float* %arrayidx.i.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.1.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.1.1, 1 - %exitcond.not.i.i.us.us.1.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.1.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.1.1, label %if.end.i.i.us.us.1.1.loopexit, label %for.body.i.i.us.us.1.1, !llvm.loop !19 - -if.end.i.i.us.us.1.1.loopexit: ; preds = %for.body.i.i.us.us.1.1 - br label %if.end.i.i.us.us.1.1 - -if.end.i.i.us.us.1.1: ; preds = %if.end.i.i.us.us.1.1.loopexit, %if.end.i.i.us.us.1 - %215 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %215, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.i.us.1.loopexit, label %pregion_for_entry.entry.i.i.us.us.1, !llvm.loop !23 - -if.then.i.i.us.us.147: ; preds = %if.end.i.i.us.us - %add.i.i.us.us.143 = add nsw i32 %mul.i.i.us, %conv.i.i.us.us.140 - %idxprom.i.i.us.us.144 = sext i32 %add.i.i.us.us.143 to i64 - %arrayidx.i.i.us.us.145 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.us.144 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.145, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.146 = shl i64 %add1.i.i.i.us.us.139, 32 - %216 = ashr exact i64 %sext.i.i.us.us.146, 32 - br label %for.body.i.i.us.us.154 - -for.body.i.i.us.us.154: ; preds = %for.body.i.i.us.us.154, %if.then.i.i.us.us.147 - %indvars.iv.next.i.i3.us.us.148 = phi i64 [ %indvars.iv.next.i.i.us.us.152, %for.body.i.i.us.us.154 ], [ 0, %if.then.i.i.us.us.147 ] - %217 = phi float [ %223, %for.body.i.i.us.us.154 ], [ 0.000000e+00, %if.then.i.i.us.us.147 ] - %218 = add nsw i64 %indvars.iv.next.i.i3.us.us.148, %65 - %arrayidx11.i.i.us.us.149 = getelementptr inbounds float, float* %12, i64 %218 - %219 = load float, float* %arrayidx11.i.i.us.us.149, align 4, !tbaa !12 - %mul12.i.i.us.us.150 = fmul float %32, %219 - %220 = mul nsw i64 %indvars.iv.next.i.i3.us.us.148, %33 - %221 = add nsw i64 %220, %216 - %arrayidx16.i.i.us.us.151 = getelementptr inbounds float, float* %16, i64 %221 - %222 = load float, float* %arrayidx16.i.i.us.us.151, align 4, !tbaa !12 - %223 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.150, float %222, float %217) #2 - store float %223, float* %arrayidx.i.i.us.us.145, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.152 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.148, 1 - %exitcond.not.i.i.us.us.153 = icmp eq i64 %indvars.iv.next.i.i.us.us.152, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.153, label %if.end.i.i.us.us.155.loopexit, label %for.body.i.i.us.us.154, !llvm.loop !19 - -if.end.i.i.us.us.155.loopexit: ; preds = %for.body.i.i.us.us.154 - br label %if.end.i.i.us.us.155 - -if.end.i.i.us.us.155: ; preds = %if.end.i.i.us.us.155.loopexit, %if.end.i.i.us.us - %224 = add nuw nsw i64 %_local_id_x.i.0.us.us, 2 - %exitcond.not.1 = icmp eq i64 %224, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.us.loopexit, label %pregion_for_entry.entry.i.i.us.us, !llvm.loop !23 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_mm2_kernel1_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to float** - %13 = load float*, float** %12, align 8 - %14 = getelementptr i8*, i8** %0, i64 3 - %15 = bitcast i8** %14 to i32** - %16 = load i32*, i32** %15, align 8 - %17 = load i32, i32* %16, align 4 - %18 = getelementptr i8*, i8** %0, i64 4 - %19 = bitcast i8** %18 to i32** - %20 = load i32*, i32** %19, align 8 - %21 = load i32, i32* %20, align 4 - %22 = getelementptr i8*, i8** %0, i64 5 - %23 = bitcast i8** %22 to i32** - %24 = load i32*, i32** %23, align 8 - %25 = load i32, i32* %24, align 4 - %26 = getelementptr i8*, i8** %0, i64 7 - %27 = bitcast i8** %26 to float** - %28 = load float*, float** %27, align 8 - %29 = load float, float* %28, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %cmp639.i.i = icmp sgt i32 %25, 0 - %30 = sext i32 %21 to i64 - %wide.trip.count.i.i = zext i32 %25 to i64 - br i1 %cmp639.i.i, label %pregion_for_entry.pregion_for_init.i.i.us.preheader, label %pregion_for_entry.pregion_for_init.i.i.preheader - -pregion_for_entry.pregion_for_init.i.i.preheader: ; preds = %5 - %conv.i.i.us = trunc i64 %mul.i.i.i to i32 - %cmp4.i.i.us = icmp sgt i32 %21, %conv.i.i.us - %31 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.1 = or i32 %31, 1 - %cmp4.i.i.us.1 = icmp sgt i32 %21, %conv.i.i.us.1 - %32 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.2 = or i32 %32, 2 - %cmp4.i.i.us.2 = icmp sgt i32 %21, %conv.i.i.us.2 - %33 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.3 = or i32 %33, 3 - %cmp4.i.i.us.3 = icmp sgt i32 %21, %conv.i.i.us.3 - %34 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.4 = or i32 %34, 4 - %cmp4.i.i.us.4 = icmp sgt i32 %21, %conv.i.i.us.4 - %35 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.5 = or i32 %35, 5 - %cmp4.i.i.us.5 = icmp sgt i32 %21, %conv.i.i.us.5 - %36 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.6 = or i32 %36, 6 - %cmp4.i.i.us.6 = icmp sgt i32 %21, %conv.i.i.us.6 - %37 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.7 = or i32 %37, 7 - %cmp4.i.i.us.7 = icmp sgt i32 %21, %conv.i.i.us.7 - %38 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.8 = or i32 %38, 8 - %cmp4.i.i.us.8 = icmp sgt i32 %21, %conv.i.i.us.8 - %39 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.9 = or i32 %39, 9 - %cmp4.i.i.us.9 = icmp sgt i32 %21, %conv.i.i.us.9 - %40 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.10 = or i32 %40, 10 - %cmp4.i.i.us.10 = icmp sgt i32 %21, %conv.i.i.us.10 - %41 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.11 = or i32 %41, 11 - %cmp4.i.i.us.11 = icmp sgt i32 %21, %conv.i.i.us.11 - %42 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.12 = or i32 %42, 12 - %cmp4.i.i.us.12 = icmp sgt i32 %21, %conv.i.i.us.12 - %43 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.13 = or i32 %43, 13 - %cmp4.i.i.us.13 = icmp sgt i32 %21, %conv.i.i.us.13 - %44 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.14 = or i32 %44, 14 - %cmp4.i.i.us.14 = icmp sgt i32 %21, %conv.i.i.us.14 - %45 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.15 = or i32 %45, 15 - %cmp4.i.i.us.15 = icmp sgt i32 %21, %conv.i.i.us.15 - %46 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.16 = or i32 %46, 16 - %cmp4.i.i.us.16 = icmp sgt i32 %21, %conv.i.i.us.16 - %47 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.17 = or i32 %47, 17 - %cmp4.i.i.us.17 = icmp sgt i32 %21, %conv.i.i.us.17 - %48 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.18 = or i32 %48, 18 - %cmp4.i.i.us.18 = icmp sgt i32 %21, %conv.i.i.us.18 - %49 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.19 = or i32 %49, 19 - %cmp4.i.i.us.19 = icmp sgt i32 %21, %conv.i.i.us.19 - %50 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.20 = or i32 %50, 20 - %cmp4.i.i.us.20 = icmp sgt i32 %21, %conv.i.i.us.20 - %51 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.21 = or i32 %51, 21 - %cmp4.i.i.us.21 = icmp sgt i32 %21, %conv.i.i.us.21 - %52 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.22 = or i32 %52, 22 - %cmp4.i.i.us.22 = icmp sgt i32 %21, %conv.i.i.us.22 - %53 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.23 = or i32 %53, 23 - %cmp4.i.i.us.23 = icmp sgt i32 %21, %conv.i.i.us.23 - %54 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.24 = or i32 %54, 24 - %cmp4.i.i.us.24 = icmp sgt i32 %21, %conv.i.i.us.24 - %55 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.25 = or i32 %55, 25 - %cmp4.i.i.us.25 = icmp sgt i32 %21, %conv.i.i.us.25 - %56 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.26 = or i32 %56, 26 - %cmp4.i.i.us.26 = icmp sgt i32 %21, %conv.i.i.us.26 - %57 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.27 = or i32 %57, 27 - %cmp4.i.i.us.27 = icmp sgt i32 %21, %conv.i.i.us.27 - %58 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.28 = or i32 %58, 28 - %cmp4.i.i.us.28 = icmp sgt i32 %21, %conv.i.i.us.28 - %59 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.29 = or i32 %59, 29 - %cmp4.i.i.us.29 = icmp sgt i32 %21, %conv.i.i.us.29 - %60 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.30 = or i32 %60, 30 - %cmp4.i.i.us.30 = icmp sgt i32 %21, %conv.i.i.us.30 - %61 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.31 = or i32 %61, 31 - %cmp4.i.i.us.31 = icmp sgt i32 %21, %conv.i.i.us.31 - br label %pregion_for_entry.pregion_for_init.i.i - -pregion_for_entry.pregion_for_init.i.i.us.preheader: ; preds = %5 - %conv2.i.i.us = trunc i64 %mul3.i.i.i to i32 - %cmp.i.i.us = icmp sgt i32 %17, %conv2.i.i.us - %mul.i.i.us = mul nsw i32 %21, %conv2.i.i.us - %mul8.i.i.us = mul nsw i32 %25, %conv2.i.i.us - %62 = sext i32 %mul8.i.i.us to i64 - br i1 %cmp.i.i.us, label %pregion_for_entry.entry.i.i.us.us.preheader, label %pregion_for_end.i.i.us - -pregion_for_entry.entry.i.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i.us.preheader - br label %pregion_for_entry.entry.i.i.us.us - -pregion_for_end.i.i.us.loopexit: ; preds = %if.end.i.i.us.us.155 - br label %pregion_for_end.i.i.us - -pregion_for_end.i.i.us: ; preds = %pregion_for_end.i.i.us.loopexit, %pregion_for_entry.pregion_for_init.i.i.us.preheader - %63 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.1 = or i32 %63, 1 - %cmp.i.i.us.1 = icmp sgt i32 %17, %conv2.i.i.us.1 - %mul.i.i.us.1 = mul nsw i32 %21, %conv2.i.i.us.1 - %mul8.i.i.us.1 = mul nsw i32 %25, %conv2.i.i.us.1 - %64 = sext i32 %mul8.i.i.us.1 to i64 - br i1 %cmp.i.i.us.1, label %pregion_for_entry.entry.i.i.us.us.1.preheader, label %pregion_for_end.i.i.us.1 - -pregion_for_entry.entry.i.i.us.us.1.preheader: ; preds = %pregion_for_end.i.i.us - br label %pregion_for_entry.entry.i.i.us.us.1 - -pregion_for_entry.entry.i.i.us.us: ; preds = %if.end.i.i.us.us.155, %pregion_for_entry.entry.i.i.us.us.preheader - %_local_id_x.i.0.us.us = phi i64 [ %221, %if.end.i.i.us.us.155 ], [ 0, %pregion_for_entry.entry.i.i.us.us.preheader ] - %add1.i.i.i.us.us = add nuw nsw i64 %_local_id_x.i.0.us.us, %mul.i.i.i - %conv.i.i.us.us = trunc i64 %add1.i.i.i.us.us to i32 - %cmp4.i.i.us.us = icmp sgt i32 %21, %conv.i.i.us.us - br i1 %cmp4.i.i.us.us, label %if.then.i.i.us.us, label %if.end.i.i.us.us - -if.then.i.i.us.us: ; preds = %pregion_for_entry.entry.i.i.us.us - %add.i.i.us.us = add nsw i32 %mul.i.i.us, %conv.i.i.us.us - %idxprom.i.i.us.us = sext i32 %add.i.i.us.us to i64 - %arrayidx.i.i.us.us = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us - store float 0.000000e+00, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us = shl i64 %add1.i.i.i.us.us, 32 - %65 = ashr exact i64 %sext.i.i.us.us, 32 - br label %for.body.i.i.us.us - -if.end.i.i.us.us.loopexit: ; preds = %for.body.i.i.us.us - br label %if.end.i.i.us.us - -if.end.i.i.us.us: ; preds = %if.end.i.i.us.us.loopexit, %pregion_for_entry.entry.i.i.us.us - %66 = or i64 %_local_id_x.i.0.us.us, 1 - %add1.i.i.i.us.us.139 = add nuw nsw i64 %66, %mul.i.i.i - %conv.i.i.us.us.140 = trunc i64 %add1.i.i.i.us.us.139 to i32 - %cmp4.i.i.us.us.141 = icmp sgt i32 %21, %conv.i.i.us.us.140 - br i1 %cmp4.i.i.us.us.141, label %if.then.i.i.us.us.147, label %if.end.i.i.us.us.155 - -for.body.i.i.us.us: ; preds = %for.body.i.i.us.us, %if.then.i.i.us.us - %indvars.iv.next.i.i3.us.us = phi i64 [ %indvars.iv.next.i.i.us.us, %for.body.i.i.us.us ], [ 0, %if.then.i.i.us.us ] - %67 = phi float [ %73, %for.body.i.i.us.us ], [ 0.000000e+00, %if.then.i.i.us.us ] - %68 = add nsw i64 %indvars.iv.next.i.i3.us.us, %62 - %arrayidx11.i.i.us.us = getelementptr inbounds float, float* %10, i64 %68 - %69 = load float, float* %arrayidx11.i.i.us.us, align 4, !tbaa !12 - %mul12.i.i.us.us = fmul float %29, %69 - %70 = mul nsw i64 %indvars.iv.next.i.i3.us.us, %30 - %71 = add nsw i64 %70, %65 - %arrayidx16.i.i.us.us = getelementptr inbounds float, float* %13, i64 %71 - %72 = load float, float* %arrayidx16.i.i.us.us, align 4, !tbaa !12 - %73 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us, float %72, float %67) #2 - store float %73, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us = add nuw nsw i64 %indvars.iv.next.i.i3.us.us, 1 - %exitcond.not.i.i.us.us = icmp eq i64 %indvars.iv.next.i.i.us.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us, label %if.end.i.i.us.us.loopexit, label %for.body.i.i.us.us, !llvm.loop !19 - -pregion_for_entry.pregion_for_init.i.i: ; preds = %pregion_for_end.i.i, %pregion_for_entry.pregion_for_init.i.i.preheader - %_local_id_y.i.0 = phi i64 [ %74, %pregion_for_end.i.i ], [ 0, %pregion_for_entry.pregion_for_init.i.i.preheader ] - %add6.i.i.i = add nuw nsw i64 %_local_id_y.i.0, %mul3.i.i.i - %conv2.i.i = trunc i64 %add6.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %17, %conv2.i.i - %mul.i.i = mul nsw i32 %21, %conv2.i.i - br i1 %cmp.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i - br i1 %cmp4.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us.preheader - %add.i.i.us = add nsw i32 %mul.i.i, %conv.i.i.us - %idxprom.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us - store float 0.000000e+00, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - br i1 %cmp4.i.i.us.1, label %if.then.i.i.us.1, label %if.end.i.i.us.1 - -pregion_for_end.i.i: ; preds = %if.then.i.i.us.31, %if.end.i.i.us.30, %pregion_for_entry.pregion_for_init.i.i - %74 = add nuw nsw i64 %_local_id_y.i.0, 1 - %exitcond34.not = icmp eq i64 %74, 8 - br i1 %exitcond34.not, label %_pocl_kernel_mm2_kernel1.exit.loopexit56, label %pregion_for_entry.pregion_for_init.i.i, !llvm.loop !21 - -_pocl_kernel_mm2_kernel1.exit.loopexit: ; preds = %if.end.i.i.us.us.7.1 - br label %_pocl_kernel_mm2_kernel1.exit - -_pocl_kernel_mm2_kernel1.exit.loopexit56: ; preds = %pregion_for_end.i.i - br label %_pocl_kernel_mm2_kernel1.exit - -_pocl_kernel_mm2_kernel1.exit: ; preds = %pregion_for_end.i.i.us.6, %_pocl_kernel_mm2_kernel1.exit.loopexit56, %_pocl_kernel_mm2_kernel1.exit.loopexit - ret void - -pregion_for_entry.entry.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.1, %pregion_for_entry.entry.i.i.us.us.1.preheader - %_local_id_x.i.0.us.us.1 = phi i64 [ %212, %if.end.i.i.us.us.1.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.1.preheader ] - %add1.i.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, %mul.i.i.i - %conv.i.i.us.us.1 = trunc i64 %add1.i.i.i.us.us.1 to i32 - %cmp4.i.i.us.us.1 = icmp sgt i32 %21, %conv.i.i.us.us.1 - br i1 %cmp4.i.i.us.us.1, label %if.then.i.i.us.us.1, label %if.end.i.i.us.us.1 - -if.then.i.i.us.us.1: ; preds = %pregion_for_entry.entry.i.i.us.us.1 - %add.i.i.us.us.1 = add nsw i32 %mul.i.i.us.1, %conv.i.i.us.us.1 - %idxprom.i.i.us.us.1 = sext i32 %add.i.i.us.us.1 to i64 - %arrayidx.i.i.us.us.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.1 = shl i64 %add1.i.i.i.us.us.1, 32 - %75 = ashr exact i64 %sext.i.i.us.us.1, 32 - br label %for.body.i.i.us.us.1 - -for.body.i.i.us.us.1: ; preds = %for.body.i.i.us.us.1, %if.then.i.i.us.us.1 - %indvars.iv.next.i.i3.us.us.1 = phi i64 [ %indvars.iv.next.i.i.us.us.1, %for.body.i.i.us.us.1 ], [ 0, %if.then.i.i.us.us.1 ] - %76 = phi float [ %82, %for.body.i.i.us.us.1 ], [ 0.000000e+00, %if.then.i.i.us.us.1 ] - %77 = add nsw i64 %indvars.iv.next.i.i3.us.us.1, %64 - %arrayidx11.i.i.us.us.1 = getelementptr inbounds float, float* %10, i64 %77 - %78 = load float, float* %arrayidx11.i.i.us.us.1, align 4, !tbaa !12 - %mul12.i.i.us.us.1 = fmul float %29, %78 - %79 = mul nsw i64 %indvars.iv.next.i.i3.us.us.1, %30 - %80 = add nsw i64 %79, %75 - %arrayidx16.i.i.us.us.1 = getelementptr inbounds float, float* %13, i64 %80 - %81 = load float, float* %arrayidx16.i.i.us.us.1, align 4, !tbaa !12 - %82 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.1, float %81, float %76) #2 - store float %82, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.1, 1 - %exitcond.not.i.i.us.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.1, label %if.end.i.i.us.us.1.loopexit, label %for.body.i.i.us.us.1, !llvm.loop !19 - -if.end.i.i.us.us.1.loopexit: ; preds = %for.body.i.i.us.us.1 - br label %if.end.i.i.us.us.1 - -if.end.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.loopexit, %pregion_for_entry.entry.i.i.us.us.1 - %83 = or i64 %_local_id_x.i.0.us.us.1, 1 - %add1.i.i.i.us.us.1.1 = add nuw nsw i64 %83, %mul.i.i.i - %conv.i.i.us.us.1.1 = trunc i64 %add1.i.i.i.us.us.1.1 to i32 - %cmp4.i.i.us.us.1.1 = icmp sgt i32 %21, %conv.i.i.us.us.1.1 - br i1 %cmp4.i.i.us.us.1.1, label %if.then.i.i.us.us.1.1, label %if.end.i.i.us.us.1.1 - -pregion_for_end.i.i.us.1.loopexit: ; preds = %if.end.i.i.us.us.1.1 - br label %pregion_for_end.i.i.us.1 - -pregion_for_end.i.i.us.1: ; preds = %pregion_for_end.i.i.us.1.loopexit, %pregion_for_end.i.i.us - %84 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.2 = or i32 %84, 2 - %cmp.i.i.us.2 = icmp sgt i32 %17, %conv2.i.i.us.2 - %mul.i.i.us.2 = mul nsw i32 %21, %conv2.i.i.us.2 - %mul8.i.i.us.2 = mul nsw i32 %25, %conv2.i.i.us.2 - %85 = sext i32 %mul8.i.i.us.2 to i64 - br i1 %cmp.i.i.us.2, label %pregion_for_entry.entry.i.i.us.us.2.preheader, label %pregion_for_end.i.i.us.2 - -pregion_for_entry.entry.i.i.us.us.2.preheader: ; preds = %pregion_for_end.i.i.us.1 - br label %pregion_for_entry.entry.i.i.us.us.2 - -pregion_for_entry.entry.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.1, %pregion_for_entry.entry.i.i.us.us.2.preheader - %_local_id_x.i.0.us.us.2 = phi i64 [ %203, %if.end.i.i.us.us.2.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.2.preheader ] - %add1.i.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, %mul.i.i.i - %conv.i.i.us.us.2 = trunc i64 %add1.i.i.i.us.us.2 to i32 - %cmp4.i.i.us.us.2 = icmp sgt i32 %21, %conv.i.i.us.us.2 - br i1 %cmp4.i.i.us.us.2, label %if.then.i.i.us.us.2, label %if.end.i.i.us.us.2 - -if.then.i.i.us.us.2: ; preds = %pregion_for_entry.entry.i.i.us.us.2 - %add.i.i.us.us.2 = add nsw i32 %mul.i.i.us.2, %conv.i.i.us.us.2 - %idxprom.i.i.us.us.2 = sext i32 %add.i.i.us.us.2 to i64 - %arrayidx.i.i.us.us.2 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.2 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.2 = shl i64 %add1.i.i.i.us.us.2, 32 - %86 = ashr exact i64 %sext.i.i.us.us.2, 32 - br label %for.body.i.i.us.us.2 - -for.body.i.i.us.us.2: ; preds = %for.body.i.i.us.us.2, %if.then.i.i.us.us.2 - %indvars.iv.next.i.i3.us.us.2 = phi i64 [ %indvars.iv.next.i.i.us.us.2, %for.body.i.i.us.us.2 ], [ 0, %if.then.i.i.us.us.2 ] - %87 = phi float [ %93, %for.body.i.i.us.us.2 ], [ 0.000000e+00, %if.then.i.i.us.us.2 ] - %88 = add nsw i64 %indvars.iv.next.i.i3.us.us.2, %85 - %arrayidx11.i.i.us.us.2 = getelementptr inbounds float, float* %10, i64 %88 - %89 = load float, float* %arrayidx11.i.i.us.us.2, align 4, !tbaa !12 - %mul12.i.i.us.us.2 = fmul float %29, %89 - %90 = mul nsw i64 %indvars.iv.next.i.i3.us.us.2, %30 - %91 = add nsw i64 %90, %86 - %arrayidx16.i.i.us.us.2 = getelementptr inbounds float, float* %13, i64 %91 - %92 = load float, float* %arrayidx16.i.i.us.us.2, align 4, !tbaa !12 - %93 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.2, float %92, float %87) #2 - store float %93, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.2 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.2, 1 - %exitcond.not.i.i.us.us.2 = icmp eq i64 %indvars.iv.next.i.i.us.us.2, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.2, label %if.end.i.i.us.us.2.loopexit, label %for.body.i.i.us.us.2, !llvm.loop !19 - -if.end.i.i.us.us.2.loopexit: ; preds = %for.body.i.i.us.us.2 - br label %if.end.i.i.us.us.2 - -if.end.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.loopexit, %pregion_for_entry.entry.i.i.us.us.2 - %94 = or i64 %_local_id_x.i.0.us.us.2, 1 - %add1.i.i.i.us.us.2.1 = add nuw nsw i64 %94, %mul.i.i.i - %conv.i.i.us.us.2.1 = trunc i64 %add1.i.i.i.us.us.2.1 to i32 - %cmp4.i.i.us.us.2.1 = icmp sgt i32 %21, %conv.i.i.us.us.2.1 - br i1 %cmp4.i.i.us.us.2.1, label %if.then.i.i.us.us.2.1, label %if.end.i.i.us.us.2.1 - -pregion_for_end.i.i.us.2.loopexit: ; preds = %if.end.i.i.us.us.2.1 - br label %pregion_for_end.i.i.us.2 - -pregion_for_end.i.i.us.2: ; preds = %pregion_for_end.i.i.us.2.loopexit, %pregion_for_end.i.i.us.1 - %95 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.3 = or i32 %95, 3 - %cmp.i.i.us.3 = icmp sgt i32 %17, %conv2.i.i.us.3 - %mul.i.i.us.3 = mul nsw i32 %21, %conv2.i.i.us.3 - %mul8.i.i.us.3 = mul nsw i32 %25, %conv2.i.i.us.3 - %96 = sext i32 %mul8.i.i.us.3 to i64 - br i1 %cmp.i.i.us.3, label %pregion_for_entry.entry.i.i.us.us.3.preheader, label %pregion_for_end.i.i.us.3 - -pregion_for_entry.entry.i.i.us.us.3.preheader: ; preds = %pregion_for_end.i.i.us.2 - br label %pregion_for_entry.entry.i.i.us.us.3 - -pregion_for_entry.entry.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.1, %pregion_for_entry.entry.i.i.us.us.3.preheader - %_local_id_x.i.0.us.us.3 = phi i64 [ %194, %if.end.i.i.us.us.3.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.3.preheader ] - %add1.i.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, %mul.i.i.i - %conv.i.i.us.us.3 = trunc i64 %add1.i.i.i.us.us.3 to i32 - %cmp4.i.i.us.us.3 = icmp sgt i32 %21, %conv.i.i.us.us.3 - br i1 %cmp4.i.i.us.us.3, label %if.then.i.i.us.us.3, label %if.end.i.i.us.us.3 - -if.then.i.i.us.us.3: ; preds = %pregion_for_entry.entry.i.i.us.us.3 - %add.i.i.us.us.3 = add nsw i32 %mul.i.i.us.3, %conv.i.i.us.us.3 - %idxprom.i.i.us.us.3 = sext i32 %add.i.i.us.us.3 to i64 - %arrayidx.i.i.us.us.3 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.3 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.3 = shl i64 %add1.i.i.i.us.us.3, 32 - %97 = ashr exact i64 %sext.i.i.us.us.3, 32 - br label %for.body.i.i.us.us.3 - -for.body.i.i.us.us.3: ; preds = %for.body.i.i.us.us.3, %if.then.i.i.us.us.3 - %indvars.iv.next.i.i3.us.us.3 = phi i64 [ %indvars.iv.next.i.i.us.us.3, %for.body.i.i.us.us.3 ], [ 0, %if.then.i.i.us.us.3 ] - %98 = phi float [ %104, %for.body.i.i.us.us.3 ], [ 0.000000e+00, %if.then.i.i.us.us.3 ] - %99 = add nsw i64 %indvars.iv.next.i.i3.us.us.3, %96 - %arrayidx11.i.i.us.us.3 = getelementptr inbounds float, float* %10, i64 %99 - %100 = load float, float* %arrayidx11.i.i.us.us.3, align 4, !tbaa !12 - %mul12.i.i.us.us.3 = fmul float %29, %100 - %101 = mul nsw i64 %indvars.iv.next.i.i3.us.us.3, %30 - %102 = add nsw i64 %101, %97 - %arrayidx16.i.i.us.us.3 = getelementptr inbounds float, float* %13, i64 %102 - %103 = load float, float* %arrayidx16.i.i.us.us.3, align 4, !tbaa !12 - %104 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.3, float %103, float %98) #2 - store float %104, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.3 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.3, 1 - %exitcond.not.i.i.us.us.3 = icmp eq i64 %indvars.iv.next.i.i.us.us.3, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.3, label %if.end.i.i.us.us.3.loopexit, label %for.body.i.i.us.us.3, !llvm.loop !19 - -if.end.i.i.us.us.3.loopexit: ; preds = %for.body.i.i.us.us.3 - br label %if.end.i.i.us.us.3 - -if.end.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.loopexit, %pregion_for_entry.entry.i.i.us.us.3 - %105 = or i64 %_local_id_x.i.0.us.us.3, 1 - %add1.i.i.i.us.us.3.1 = add nuw nsw i64 %105, %mul.i.i.i - %conv.i.i.us.us.3.1 = trunc i64 %add1.i.i.i.us.us.3.1 to i32 - %cmp4.i.i.us.us.3.1 = icmp sgt i32 %21, %conv.i.i.us.us.3.1 - br i1 %cmp4.i.i.us.us.3.1, label %if.then.i.i.us.us.3.1, label %if.end.i.i.us.us.3.1 - -pregion_for_end.i.i.us.3.loopexit: ; preds = %if.end.i.i.us.us.3.1 - br label %pregion_for_end.i.i.us.3 - -pregion_for_end.i.i.us.3: ; preds = %pregion_for_end.i.i.us.3.loopexit, %pregion_for_end.i.i.us.2 - %106 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.4 = or i32 %106, 4 - %cmp.i.i.us.4 = icmp sgt i32 %17, %conv2.i.i.us.4 - %mul.i.i.us.4 = mul nsw i32 %21, %conv2.i.i.us.4 - %mul8.i.i.us.4 = mul nsw i32 %25, %conv2.i.i.us.4 - %107 = sext i32 %mul8.i.i.us.4 to i64 - br i1 %cmp.i.i.us.4, label %pregion_for_entry.entry.i.i.us.us.4.preheader, label %pregion_for_end.i.i.us.4 - -pregion_for_entry.entry.i.i.us.us.4.preheader: ; preds = %pregion_for_end.i.i.us.3 - br label %pregion_for_entry.entry.i.i.us.us.4 - -pregion_for_entry.entry.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.1, %pregion_for_entry.entry.i.i.us.us.4.preheader - %_local_id_x.i.0.us.us.4 = phi i64 [ %185, %if.end.i.i.us.us.4.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.4.preheader ] - %add1.i.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, %mul.i.i.i - %conv.i.i.us.us.4 = trunc i64 %add1.i.i.i.us.us.4 to i32 - %cmp4.i.i.us.us.4 = icmp sgt i32 %21, %conv.i.i.us.us.4 - br i1 %cmp4.i.i.us.us.4, label %if.then.i.i.us.us.4, label %if.end.i.i.us.us.4 - -if.then.i.i.us.us.4: ; preds = %pregion_for_entry.entry.i.i.us.us.4 - %add.i.i.us.us.4 = add nsw i32 %mul.i.i.us.4, %conv.i.i.us.us.4 - %idxprom.i.i.us.us.4 = sext i32 %add.i.i.us.us.4 to i64 - %arrayidx.i.i.us.us.4 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.4 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.4 = shl i64 %add1.i.i.i.us.us.4, 32 - %108 = ashr exact i64 %sext.i.i.us.us.4, 32 - br label %for.body.i.i.us.us.4 - -for.body.i.i.us.us.4: ; preds = %for.body.i.i.us.us.4, %if.then.i.i.us.us.4 - %indvars.iv.next.i.i3.us.us.4 = phi i64 [ %indvars.iv.next.i.i.us.us.4, %for.body.i.i.us.us.4 ], [ 0, %if.then.i.i.us.us.4 ] - %109 = phi float [ %115, %for.body.i.i.us.us.4 ], [ 0.000000e+00, %if.then.i.i.us.us.4 ] - %110 = add nsw i64 %indvars.iv.next.i.i3.us.us.4, %107 - %arrayidx11.i.i.us.us.4 = getelementptr inbounds float, float* %10, i64 %110 - %111 = load float, float* %arrayidx11.i.i.us.us.4, align 4, !tbaa !12 - %mul12.i.i.us.us.4 = fmul float %29, %111 - %112 = mul nsw i64 %indvars.iv.next.i.i3.us.us.4, %30 - %113 = add nsw i64 %112, %108 - %arrayidx16.i.i.us.us.4 = getelementptr inbounds float, float* %13, i64 %113 - %114 = load float, float* %arrayidx16.i.i.us.us.4, align 4, !tbaa !12 - %115 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.4, float %114, float %109) #2 - store float %115, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.4 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.4, 1 - %exitcond.not.i.i.us.us.4 = icmp eq i64 %indvars.iv.next.i.i.us.us.4, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.4, label %if.end.i.i.us.us.4.loopexit, label %for.body.i.i.us.us.4, !llvm.loop !19 - -if.end.i.i.us.us.4.loopexit: ; preds = %for.body.i.i.us.us.4 - br label %if.end.i.i.us.us.4 - -if.end.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.loopexit, %pregion_for_entry.entry.i.i.us.us.4 - %116 = or i64 %_local_id_x.i.0.us.us.4, 1 - %add1.i.i.i.us.us.4.1 = add nuw nsw i64 %116, %mul.i.i.i - %conv.i.i.us.us.4.1 = trunc i64 %add1.i.i.i.us.us.4.1 to i32 - %cmp4.i.i.us.us.4.1 = icmp sgt i32 %21, %conv.i.i.us.us.4.1 - br i1 %cmp4.i.i.us.us.4.1, label %if.then.i.i.us.us.4.1, label %if.end.i.i.us.us.4.1 - -pregion_for_end.i.i.us.4.loopexit: ; preds = %if.end.i.i.us.us.4.1 - br label %pregion_for_end.i.i.us.4 - -pregion_for_end.i.i.us.4: ; preds = %pregion_for_end.i.i.us.4.loopexit, %pregion_for_end.i.i.us.3 - %117 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.5 = or i32 %117, 5 - %cmp.i.i.us.5 = icmp sgt i32 %17, %conv2.i.i.us.5 - %mul.i.i.us.5 = mul nsw i32 %21, %conv2.i.i.us.5 - %mul8.i.i.us.5 = mul nsw i32 %25, %conv2.i.i.us.5 - %118 = sext i32 %mul8.i.i.us.5 to i64 - br i1 %cmp.i.i.us.5, label %pregion_for_entry.entry.i.i.us.us.5.preheader, label %pregion_for_end.i.i.us.5 - -pregion_for_entry.entry.i.i.us.us.5.preheader: ; preds = %pregion_for_end.i.i.us.4 - br label %pregion_for_entry.entry.i.i.us.us.5 - -pregion_for_entry.entry.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.1, %pregion_for_entry.entry.i.i.us.us.5.preheader - %_local_id_x.i.0.us.us.5 = phi i64 [ %176, %if.end.i.i.us.us.5.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.5.preheader ] - %add1.i.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, %mul.i.i.i - %conv.i.i.us.us.5 = trunc i64 %add1.i.i.i.us.us.5 to i32 - %cmp4.i.i.us.us.5 = icmp sgt i32 %21, %conv.i.i.us.us.5 - br i1 %cmp4.i.i.us.us.5, label %if.then.i.i.us.us.5, label %if.end.i.i.us.us.5 - -if.then.i.i.us.us.5: ; preds = %pregion_for_entry.entry.i.i.us.us.5 - %add.i.i.us.us.5 = add nsw i32 %mul.i.i.us.5, %conv.i.i.us.us.5 - %idxprom.i.i.us.us.5 = sext i32 %add.i.i.us.us.5 to i64 - %arrayidx.i.i.us.us.5 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.5 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.5 = shl i64 %add1.i.i.i.us.us.5, 32 - %119 = ashr exact i64 %sext.i.i.us.us.5, 32 - br label %for.body.i.i.us.us.5 - -for.body.i.i.us.us.5: ; preds = %for.body.i.i.us.us.5, %if.then.i.i.us.us.5 - %indvars.iv.next.i.i3.us.us.5 = phi i64 [ %indvars.iv.next.i.i.us.us.5, %for.body.i.i.us.us.5 ], [ 0, %if.then.i.i.us.us.5 ] - %120 = phi float [ %126, %for.body.i.i.us.us.5 ], [ 0.000000e+00, %if.then.i.i.us.us.5 ] - %121 = add nsw i64 %indvars.iv.next.i.i3.us.us.5, %118 - %arrayidx11.i.i.us.us.5 = getelementptr inbounds float, float* %10, i64 %121 - %122 = load float, float* %arrayidx11.i.i.us.us.5, align 4, !tbaa !12 - %mul12.i.i.us.us.5 = fmul float %29, %122 - %123 = mul nsw i64 %indvars.iv.next.i.i3.us.us.5, %30 - %124 = add nsw i64 %123, %119 - %arrayidx16.i.i.us.us.5 = getelementptr inbounds float, float* %13, i64 %124 - %125 = load float, float* %arrayidx16.i.i.us.us.5, align 4, !tbaa !12 - %126 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.5, float %125, float %120) #2 - store float %126, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.5 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.5, 1 - %exitcond.not.i.i.us.us.5 = icmp eq i64 %indvars.iv.next.i.i.us.us.5, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.5, label %if.end.i.i.us.us.5.loopexit, label %for.body.i.i.us.us.5, !llvm.loop !19 - -if.end.i.i.us.us.5.loopexit: ; preds = %for.body.i.i.us.us.5 - br label %if.end.i.i.us.us.5 - -if.end.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.loopexit, %pregion_for_entry.entry.i.i.us.us.5 - %127 = or i64 %_local_id_x.i.0.us.us.5, 1 - %add1.i.i.i.us.us.5.1 = add nuw nsw i64 %127, %mul.i.i.i - %conv.i.i.us.us.5.1 = trunc i64 %add1.i.i.i.us.us.5.1 to i32 - %cmp4.i.i.us.us.5.1 = icmp sgt i32 %21, %conv.i.i.us.us.5.1 - br i1 %cmp4.i.i.us.us.5.1, label %if.then.i.i.us.us.5.1, label %if.end.i.i.us.us.5.1 - -pregion_for_end.i.i.us.5.loopexit: ; preds = %if.end.i.i.us.us.5.1 - br label %pregion_for_end.i.i.us.5 - -pregion_for_end.i.i.us.5: ; preds = %pregion_for_end.i.i.us.5.loopexit, %pregion_for_end.i.i.us.4 - %128 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.6 = or i32 %128, 6 - %cmp.i.i.us.6 = icmp sgt i32 %17, %conv2.i.i.us.6 - %mul.i.i.us.6 = mul nsw i32 %21, %conv2.i.i.us.6 - %mul8.i.i.us.6 = mul nsw i32 %25, %conv2.i.i.us.6 - %129 = sext i32 %mul8.i.i.us.6 to i64 - br i1 %cmp.i.i.us.6, label %pregion_for_entry.entry.i.i.us.us.6.preheader, label %pregion_for_end.i.i.us.6 - -pregion_for_entry.entry.i.i.us.us.6.preheader: ; preds = %pregion_for_end.i.i.us.5 - br label %pregion_for_entry.entry.i.i.us.us.6 - -pregion_for_entry.entry.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.1, %pregion_for_entry.entry.i.i.us.us.6.preheader - %_local_id_x.i.0.us.us.6 = phi i64 [ %167, %if.end.i.i.us.us.6.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.6.preheader ] - %add1.i.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, %mul.i.i.i - %conv.i.i.us.us.6 = trunc i64 %add1.i.i.i.us.us.6 to i32 - %cmp4.i.i.us.us.6 = icmp sgt i32 %21, %conv.i.i.us.us.6 - br i1 %cmp4.i.i.us.us.6, label %if.then.i.i.us.us.6, label %if.end.i.i.us.us.6 - -if.then.i.i.us.us.6: ; preds = %pregion_for_entry.entry.i.i.us.us.6 - %add.i.i.us.us.6 = add nsw i32 %mul.i.i.us.6, %conv.i.i.us.us.6 - %idxprom.i.i.us.us.6 = sext i32 %add.i.i.us.us.6 to i64 - %arrayidx.i.i.us.us.6 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.6 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.6 = shl i64 %add1.i.i.i.us.us.6, 32 - %130 = ashr exact i64 %sext.i.i.us.us.6, 32 - br label %for.body.i.i.us.us.6 - -for.body.i.i.us.us.6: ; preds = %for.body.i.i.us.us.6, %if.then.i.i.us.us.6 - %indvars.iv.next.i.i3.us.us.6 = phi i64 [ %indvars.iv.next.i.i.us.us.6, %for.body.i.i.us.us.6 ], [ 0, %if.then.i.i.us.us.6 ] - %131 = phi float [ %137, %for.body.i.i.us.us.6 ], [ 0.000000e+00, %if.then.i.i.us.us.6 ] - %132 = add nsw i64 %indvars.iv.next.i.i3.us.us.6, %129 - %arrayidx11.i.i.us.us.6 = getelementptr inbounds float, float* %10, i64 %132 - %133 = load float, float* %arrayidx11.i.i.us.us.6, align 4, !tbaa !12 - %mul12.i.i.us.us.6 = fmul float %29, %133 - %134 = mul nsw i64 %indvars.iv.next.i.i3.us.us.6, %30 - %135 = add nsw i64 %134, %130 - %arrayidx16.i.i.us.us.6 = getelementptr inbounds float, float* %13, i64 %135 - %136 = load float, float* %arrayidx16.i.i.us.us.6, align 4, !tbaa !12 - %137 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.6, float %136, float %131) #2 - store float %137, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.6 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.6, 1 - %exitcond.not.i.i.us.us.6 = icmp eq i64 %indvars.iv.next.i.i.us.us.6, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.6, label %if.end.i.i.us.us.6.loopexit, label %for.body.i.i.us.us.6, !llvm.loop !19 - -if.end.i.i.us.us.6.loopexit: ; preds = %for.body.i.i.us.us.6 - br label %if.end.i.i.us.us.6 - -if.end.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.loopexit, %pregion_for_entry.entry.i.i.us.us.6 - %138 = or i64 %_local_id_x.i.0.us.us.6, 1 - %add1.i.i.i.us.us.6.1 = add nuw nsw i64 %138, %mul.i.i.i - %conv.i.i.us.us.6.1 = trunc i64 %add1.i.i.i.us.us.6.1 to i32 - %cmp4.i.i.us.us.6.1 = icmp sgt i32 %21, %conv.i.i.us.us.6.1 - br i1 %cmp4.i.i.us.us.6.1, label %if.then.i.i.us.us.6.1, label %if.end.i.i.us.us.6.1 - -pregion_for_end.i.i.us.6.loopexit: ; preds = %if.end.i.i.us.us.6.1 - br label %pregion_for_end.i.i.us.6 - -pregion_for_end.i.i.us.6: ; preds = %pregion_for_end.i.i.us.6.loopexit, %pregion_for_end.i.i.us.5 - %139 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.7 = or i32 %139, 7 - %cmp.i.i.us.7 = icmp sgt i32 %17, %conv2.i.i.us.7 - %mul.i.i.us.7 = mul nsw i32 %21, %conv2.i.i.us.7 - %mul8.i.i.us.7 = mul nsw i32 %25, %conv2.i.i.us.7 - %140 = sext i32 %mul8.i.i.us.7 to i64 - br i1 %cmp.i.i.us.7, label %pregion_for_entry.entry.i.i.us.us.7.preheader, label %_pocl_kernel_mm2_kernel1.exit - -pregion_for_entry.entry.i.i.us.us.7.preheader: ; preds = %pregion_for_end.i.i.us.6 - br label %pregion_for_entry.entry.i.i.us.us.7 - -pregion_for_entry.entry.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.1, %pregion_for_entry.entry.i.i.us.us.7.preheader - %_local_id_x.i.0.us.us.7 = phi i64 [ %158, %if.end.i.i.us.us.7.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.7.preheader ] - %add1.i.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, %mul.i.i.i - %conv.i.i.us.us.7 = trunc i64 %add1.i.i.i.us.us.7 to i32 - %cmp4.i.i.us.us.7 = icmp sgt i32 %21, %conv.i.i.us.us.7 - br i1 %cmp4.i.i.us.us.7, label %if.then.i.i.us.us.7, label %if.end.i.i.us.us.7 - -if.then.i.i.us.us.7: ; preds = %pregion_for_entry.entry.i.i.us.us.7 - %add.i.i.us.us.7 = add nsw i32 %mul.i.i.us.7, %conv.i.i.us.us.7 - %idxprom.i.i.us.us.7 = sext i32 %add.i.i.us.us.7 to i64 - %arrayidx.i.i.us.us.7 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.7 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.7 = shl i64 %add1.i.i.i.us.us.7, 32 - %141 = ashr exact i64 %sext.i.i.us.us.7, 32 - br label %for.body.i.i.us.us.7 - -for.body.i.i.us.us.7: ; preds = %for.body.i.i.us.us.7, %if.then.i.i.us.us.7 - %indvars.iv.next.i.i3.us.us.7 = phi i64 [ %indvars.iv.next.i.i.us.us.7, %for.body.i.i.us.us.7 ], [ 0, %if.then.i.i.us.us.7 ] - %142 = phi float [ %148, %for.body.i.i.us.us.7 ], [ 0.000000e+00, %if.then.i.i.us.us.7 ] - %143 = add nsw i64 %indvars.iv.next.i.i3.us.us.7, %140 - %arrayidx11.i.i.us.us.7 = getelementptr inbounds float, float* %10, i64 %143 - %144 = load float, float* %arrayidx11.i.i.us.us.7, align 4, !tbaa !12 - %mul12.i.i.us.us.7 = fmul float %29, %144 - %145 = mul nsw i64 %indvars.iv.next.i.i3.us.us.7, %30 - %146 = add nsw i64 %145, %141 - %arrayidx16.i.i.us.us.7 = getelementptr inbounds float, float* %13, i64 %146 - %147 = load float, float* %arrayidx16.i.i.us.us.7, align 4, !tbaa !12 - %148 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.7, float %147, float %142) #2 - store float %148, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.7 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.7, 1 - %exitcond.not.i.i.us.us.7 = icmp eq i64 %indvars.iv.next.i.i.us.us.7, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.7, label %if.end.i.i.us.us.7.loopexit, label %for.body.i.i.us.us.7, !llvm.loop !19 - -if.end.i.i.us.us.7.loopexit: ; preds = %for.body.i.i.us.us.7 - br label %if.end.i.i.us.us.7 - -if.end.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.loopexit, %pregion_for_entry.entry.i.i.us.us.7 - %149 = or i64 %_local_id_x.i.0.us.us.7, 1 - %add1.i.i.i.us.us.7.1 = add nuw nsw i64 %149, %mul.i.i.i - %conv.i.i.us.us.7.1 = trunc i64 %add1.i.i.i.us.us.7.1 to i32 - %cmp4.i.i.us.us.7.1 = icmp sgt i32 %21, %conv.i.i.us.us.7.1 - br i1 %cmp4.i.i.us.us.7.1, label %if.then.i.i.us.us.7.1, label %if.end.i.i.us.us.7.1 - -if.then.i.i.us.1: ; preds = %if.end.i.i.us - %add.i.i.us.1 = add nsw i32 %mul.i.i, %conv.i.i.us.1 - %idxprom.i.i.us.1 = sext i32 %add.i.i.us.1 to i64 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1 - -if.end.i.i.us.1: ; preds = %if.then.i.i.us.1, %if.end.i.i.us - br i1 %cmp4.i.i.us.2, label %if.then.i.i.us.2, label %if.end.i.i.us.2 - -if.then.i.i.us.2: ; preds = %if.end.i.i.us.1 - %add.i.i.us.2 = add nsw i32 %mul.i.i, %conv.i.i.us.2 - %idxprom.i.i.us.2 = sext i32 %add.i.i.us.2 to i64 - %arrayidx.i.i.us.2 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.2 - store float 0.000000e+00, float* %arrayidx.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2 - -if.end.i.i.us.2: ; preds = %if.then.i.i.us.2, %if.end.i.i.us.1 - br i1 %cmp4.i.i.us.3, label %if.then.i.i.us.3, label %if.end.i.i.us.3 - -if.then.i.i.us.3: ; preds = %if.end.i.i.us.2 - %add.i.i.us.3 = add nsw i32 %mul.i.i, %conv.i.i.us.3 - %idxprom.i.i.us.3 = sext i32 %add.i.i.us.3 to i64 - %arrayidx.i.i.us.3 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.3 - store float 0.000000e+00, float* %arrayidx.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3 - -if.end.i.i.us.3: ; preds = %if.then.i.i.us.3, %if.end.i.i.us.2 - br i1 %cmp4.i.i.us.4, label %if.then.i.i.us.4, label %if.end.i.i.us.4 - -if.then.i.i.us.4: ; preds = %if.end.i.i.us.3 - %add.i.i.us.4 = add nsw i32 %mul.i.i, %conv.i.i.us.4 - %idxprom.i.i.us.4 = sext i32 %add.i.i.us.4 to i64 - %arrayidx.i.i.us.4 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.4 - store float 0.000000e+00, float* %arrayidx.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4 - -if.end.i.i.us.4: ; preds = %if.then.i.i.us.4, %if.end.i.i.us.3 - br i1 %cmp4.i.i.us.5, label %if.then.i.i.us.5, label %if.end.i.i.us.5 - -if.then.i.i.us.5: ; preds = %if.end.i.i.us.4 - %add.i.i.us.5 = add nsw i32 %mul.i.i, %conv.i.i.us.5 - %idxprom.i.i.us.5 = sext i32 %add.i.i.us.5 to i64 - %arrayidx.i.i.us.5 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.5 - store float 0.000000e+00, float* %arrayidx.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5 - -if.end.i.i.us.5: ; preds = %if.then.i.i.us.5, %if.end.i.i.us.4 - br i1 %cmp4.i.i.us.6, label %if.then.i.i.us.6, label %if.end.i.i.us.6 - -if.then.i.i.us.6: ; preds = %if.end.i.i.us.5 - %add.i.i.us.6 = add nsw i32 %mul.i.i, %conv.i.i.us.6 - %idxprom.i.i.us.6 = sext i32 %add.i.i.us.6 to i64 - %arrayidx.i.i.us.6 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.6 - store float 0.000000e+00, float* %arrayidx.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6 - -if.end.i.i.us.6: ; preds = %if.then.i.i.us.6, %if.end.i.i.us.5 - br i1 %cmp4.i.i.us.7, label %if.then.i.i.us.7, label %if.end.i.i.us.7 - -if.then.i.i.us.7: ; preds = %if.end.i.i.us.6 - %add.i.i.us.7 = add nsw i32 %mul.i.i, %conv.i.i.us.7 - %idxprom.i.i.us.7 = sext i32 %add.i.i.us.7 to i64 - %arrayidx.i.i.us.7 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.7 - store float 0.000000e+00, float* %arrayidx.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7 - -if.end.i.i.us.7: ; preds = %if.then.i.i.us.7, %if.end.i.i.us.6 - br i1 %cmp4.i.i.us.8, label %if.then.i.i.us.8, label %if.end.i.i.us.8 - -if.then.i.i.us.8: ; preds = %if.end.i.i.us.7 - %add.i.i.us.8 = add nsw i32 %mul.i.i, %conv.i.i.us.8 - %idxprom.i.i.us.8 = sext i32 %add.i.i.us.8 to i64 - %arrayidx.i.i.us.8 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.8 - store float 0.000000e+00, float* %arrayidx.i.i.us.8, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.8 - -if.end.i.i.us.8: ; preds = %if.then.i.i.us.8, %if.end.i.i.us.7 - br i1 %cmp4.i.i.us.9, label %if.then.i.i.us.9, label %if.end.i.i.us.9 - -if.then.i.i.us.9: ; preds = %if.end.i.i.us.8 - %add.i.i.us.9 = add nsw i32 %mul.i.i, %conv.i.i.us.9 - %idxprom.i.i.us.9 = sext i32 %add.i.i.us.9 to i64 - %arrayidx.i.i.us.9 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.9 - store float 0.000000e+00, float* %arrayidx.i.i.us.9, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.9 - -if.end.i.i.us.9: ; preds = %if.then.i.i.us.9, %if.end.i.i.us.8 - br i1 %cmp4.i.i.us.10, label %if.then.i.i.us.10, label %if.end.i.i.us.10 - -if.then.i.i.us.10: ; preds = %if.end.i.i.us.9 - %add.i.i.us.10 = add nsw i32 %mul.i.i, %conv.i.i.us.10 - %idxprom.i.i.us.10 = sext i32 %add.i.i.us.10 to i64 - %arrayidx.i.i.us.10 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.10 - store float 0.000000e+00, float* %arrayidx.i.i.us.10, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.10 - -if.end.i.i.us.10: ; preds = %if.then.i.i.us.10, %if.end.i.i.us.9 - br i1 %cmp4.i.i.us.11, label %if.then.i.i.us.11, label %if.end.i.i.us.11 - -if.then.i.i.us.11: ; preds = %if.end.i.i.us.10 - %add.i.i.us.11 = add nsw i32 %mul.i.i, %conv.i.i.us.11 - %idxprom.i.i.us.11 = sext i32 %add.i.i.us.11 to i64 - %arrayidx.i.i.us.11 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.11 - store float 0.000000e+00, float* %arrayidx.i.i.us.11, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.11 - -if.end.i.i.us.11: ; preds = %if.then.i.i.us.11, %if.end.i.i.us.10 - br i1 %cmp4.i.i.us.12, label %if.then.i.i.us.12, label %if.end.i.i.us.12 - -if.then.i.i.us.12: ; preds = %if.end.i.i.us.11 - %add.i.i.us.12 = add nsw i32 %mul.i.i, %conv.i.i.us.12 - %idxprom.i.i.us.12 = sext i32 %add.i.i.us.12 to i64 - %arrayidx.i.i.us.12 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.12 - store float 0.000000e+00, float* %arrayidx.i.i.us.12, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.12 - -if.end.i.i.us.12: ; preds = %if.then.i.i.us.12, %if.end.i.i.us.11 - br i1 %cmp4.i.i.us.13, label %if.then.i.i.us.13, label %if.end.i.i.us.13 - -if.then.i.i.us.13: ; preds = %if.end.i.i.us.12 - %add.i.i.us.13 = add nsw i32 %mul.i.i, %conv.i.i.us.13 - %idxprom.i.i.us.13 = sext i32 %add.i.i.us.13 to i64 - %arrayidx.i.i.us.13 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.13 - store float 0.000000e+00, float* %arrayidx.i.i.us.13, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.13 - -if.end.i.i.us.13: ; preds = %if.then.i.i.us.13, %if.end.i.i.us.12 - br i1 %cmp4.i.i.us.14, label %if.then.i.i.us.14, label %if.end.i.i.us.14 - -if.then.i.i.us.14: ; preds = %if.end.i.i.us.13 - %add.i.i.us.14 = add nsw i32 %mul.i.i, %conv.i.i.us.14 - %idxprom.i.i.us.14 = sext i32 %add.i.i.us.14 to i64 - %arrayidx.i.i.us.14 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.14 - store float 0.000000e+00, float* %arrayidx.i.i.us.14, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.14 - -if.end.i.i.us.14: ; preds = %if.then.i.i.us.14, %if.end.i.i.us.13 - br i1 %cmp4.i.i.us.15, label %if.then.i.i.us.15, label %if.end.i.i.us.15 - -if.then.i.i.us.15: ; preds = %if.end.i.i.us.14 - %add.i.i.us.15 = add nsw i32 %mul.i.i, %conv.i.i.us.15 - %idxprom.i.i.us.15 = sext i32 %add.i.i.us.15 to i64 - %arrayidx.i.i.us.15 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.15 - store float 0.000000e+00, float* %arrayidx.i.i.us.15, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.15 - -if.end.i.i.us.15: ; preds = %if.then.i.i.us.15, %if.end.i.i.us.14 - br i1 %cmp4.i.i.us.16, label %if.then.i.i.us.16, label %if.end.i.i.us.16 - -if.then.i.i.us.16: ; preds = %if.end.i.i.us.15 - %add.i.i.us.16 = add nsw i32 %mul.i.i, %conv.i.i.us.16 - %idxprom.i.i.us.16 = sext i32 %add.i.i.us.16 to i64 - %arrayidx.i.i.us.16 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.16 - store float 0.000000e+00, float* %arrayidx.i.i.us.16, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.16 - -if.end.i.i.us.16: ; preds = %if.then.i.i.us.16, %if.end.i.i.us.15 - br i1 %cmp4.i.i.us.17, label %if.then.i.i.us.17, label %if.end.i.i.us.17 - -if.then.i.i.us.17: ; preds = %if.end.i.i.us.16 - %add.i.i.us.17 = add nsw i32 %mul.i.i, %conv.i.i.us.17 - %idxprom.i.i.us.17 = sext i32 %add.i.i.us.17 to i64 - %arrayidx.i.i.us.17 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.17 - store float 0.000000e+00, float* %arrayidx.i.i.us.17, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.17 - -if.end.i.i.us.17: ; preds = %if.then.i.i.us.17, %if.end.i.i.us.16 - br i1 %cmp4.i.i.us.18, label %if.then.i.i.us.18, label %if.end.i.i.us.18 - -if.then.i.i.us.18: ; preds = %if.end.i.i.us.17 - %add.i.i.us.18 = add nsw i32 %mul.i.i, %conv.i.i.us.18 - %idxprom.i.i.us.18 = sext i32 %add.i.i.us.18 to i64 - %arrayidx.i.i.us.18 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.18 - store float 0.000000e+00, float* %arrayidx.i.i.us.18, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.18 - -if.end.i.i.us.18: ; preds = %if.then.i.i.us.18, %if.end.i.i.us.17 - br i1 %cmp4.i.i.us.19, label %if.then.i.i.us.19, label %if.end.i.i.us.19 - -if.then.i.i.us.19: ; preds = %if.end.i.i.us.18 - %add.i.i.us.19 = add nsw i32 %mul.i.i, %conv.i.i.us.19 - %idxprom.i.i.us.19 = sext i32 %add.i.i.us.19 to i64 - %arrayidx.i.i.us.19 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.19 - store float 0.000000e+00, float* %arrayidx.i.i.us.19, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.19 - -if.end.i.i.us.19: ; preds = %if.then.i.i.us.19, %if.end.i.i.us.18 - br i1 %cmp4.i.i.us.20, label %if.then.i.i.us.20, label %if.end.i.i.us.20 - -if.then.i.i.us.20: ; preds = %if.end.i.i.us.19 - %add.i.i.us.20 = add nsw i32 %mul.i.i, %conv.i.i.us.20 - %idxprom.i.i.us.20 = sext i32 %add.i.i.us.20 to i64 - %arrayidx.i.i.us.20 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.20 - store float 0.000000e+00, float* %arrayidx.i.i.us.20, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.20 - -if.end.i.i.us.20: ; preds = %if.then.i.i.us.20, %if.end.i.i.us.19 - br i1 %cmp4.i.i.us.21, label %if.then.i.i.us.21, label %if.end.i.i.us.21 - -if.then.i.i.us.21: ; preds = %if.end.i.i.us.20 - %add.i.i.us.21 = add nsw i32 %mul.i.i, %conv.i.i.us.21 - %idxprom.i.i.us.21 = sext i32 %add.i.i.us.21 to i64 - %arrayidx.i.i.us.21 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.21 - store float 0.000000e+00, float* %arrayidx.i.i.us.21, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.21 - -if.end.i.i.us.21: ; preds = %if.then.i.i.us.21, %if.end.i.i.us.20 - br i1 %cmp4.i.i.us.22, label %if.then.i.i.us.22, label %if.end.i.i.us.22 - -if.then.i.i.us.22: ; preds = %if.end.i.i.us.21 - %add.i.i.us.22 = add nsw i32 %mul.i.i, %conv.i.i.us.22 - %idxprom.i.i.us.22 = sext i32 %add.i.i.us.22 to i64 - %arrayidx.i.i.us.22 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.22 - store float 0.000000e+00, float* %arrayidx.i.i.us.22, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.22 - -if.end.i.i.us.22: ; preds = %if.then.i.i.us.22, %if.end.i.i.us.21 - br i1 %cmp4.i.i.us.23, label %if.then.i.i.us.23, label %if.end.i.i.us.23 - -if.then.i.i.us.23: ; preds = %if.end.i.i.us.22 - %add.i.i.us.23 = add nsw i32 %mul.i.i, %conv.i.i.us.23 - %idxprom.i.i.us.23 = sext i32 %add.i.i.us.23 to i64 - %arrayidx.i.i.us.23 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.23 - store float 0.000000e+00, float* %arrayidx.i.i.us.23, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.23 - -if.end.i.i.us.23: ; preds = %if.then.i.i.us.23, %if.end.i.i.us.22 - br i1 %cmp4.i.i.us.24, label %if.then.i.i.us.24, label %if.end.i.i.us.24 - -if.then.i.i.us.24: ; preds = %if.end.i.i.us.23 - %add.i.i.us.24 = add nsw i32 %mul.i.i, %conv.i.i.us.24 - %idxprom.i.i.us.24 = sext i32 %add.i.i.us.24 to i64 - %arrayidx.i.i.us.24 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.24 - store float 0.000000e+00, float* %arrayidx.i.i.us.24, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.24 - -if.end.i.i.us.24: ; preds = %if.then.i.i.us.24, %if.end.i.i.us.23 - br i1 %cmp4.i.i.us.25, label %if.then.i.i.us.25, label %if.end.i.i.us.25 - -if.then.i.i.us.25: ; preds = %if.end.i.i.us.24 - %add.i.i.us.25 = add nsw i32 %mul.i.i, %conv.i.i.us.25 - %idxprom.i.i.us.25 = sext i32 %add.i.i.us.25 to i64 - %arrayidx.i.i.us.25 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.25 - store float 0.000000e+00, float* %arrayidx.i.i.us.25, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.25 - -if.end.i.i.us.25: ; preds = %if.then.i.i.us.25, %if.end.i.i.us.24 - br i1 %cmp4.i.i.us.26, label %if.then.i.i.us.26, label %if.end.i.i.us.26 - -if.then.i.i.us.26: ; preds = %if.end.i.i.us.25 - %add.i.i.us.26 = add nsw i32 %mul.i.i, %conv.i.i.us.26 - %idxprom.i.i.us.26 = sext i32 %add.i.i.us.26 to i64 - %arrayidx.i.i.us.26 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.26 - store float 0.000000e+00, float* %arrayidx.i.i.us.26, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.26 - -if.end.i.i.us.26: ; preds = %if.then.i.i.us.26, %if.end.i.i.us.25 - br i1 %cmp4.i.i.us.27, label %if.then.i.i.us.27, label %if.end.i.i.us.27 - -if.then.i.i.us.27: ; preds = %if.end.i.i.us.26 - %add.i.i.us.27 = add nsw i32 %mul.i.i, %conv.i.i.us.27 - %idxprom.i.i.us.27 = sext i32 %add.i.i.us.27 to i64 - %arrayidx.i.i.us.27 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.27 - store float 0.000000e+00, float* %arrayidx.i.i.us.27, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.27 - -if.end.i.i.us.27: ; preds = %if.then.i.i.us.27, %if.end.i.i.us.26 - br i1 %cmp4.i.i.us.28, label %if.then.i.i.us.28, label %if.end.i.i.us.28 - -if.then.i.i.us.28: ; preds = %if.end.i.i.us.27 - %add.i.i.us.28 = add nsw i32 %mul.i.i, %conv.i.i.us.28 - %idxprom.i.i.us.28 = sext i32 %add.i.i.us.28 to i64 - %arrayidx.i.i.us.28 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.28 - store float 0.000000e+00, float* %arrayidx.i.i.us.28, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.28 - -if.end.i.i.us.28: ; preds = %if.then.i.i.us.28, %if.end.i.i.us.27 - br i1 %cmp4.i.i.us.29, label %if.then.i.i.us.29, label %if.end.i.i.us.29 - -if.then.i.i.us.29: ; preds = %if.end.i.i.us.28 - %add.i.i.us.29 = add nsw i32 %mul.i.i, %conv.i.i.us.29 - %idxprom.i.i.us.29 = sext i32 %add.i.i.us.29 to i64 - %arrayidx.i.i.us.29 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.29 - store float 0.000000e+00, float* %arrayidx.i.i.us.29, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.29 - -if.end.i.i.us.29: ; preds = %if.then.i.i.us.29, %if.end.i.i.us.28 - br i1 %cmp4.i.i.us.30, label %if.then.i.i.us.30, label %if.end.i.i.us.30 - -if.then.i.i.us.30: ; preds = %if.end.i.i.us.29 - %add.i.i.us.30 = add nsw i32 %mul.i.i, %conv.i.i.us.30 - %idxprom.i.i.us.30 = sext i32 %add.i.i.us.30 to i64 - %arrayidx.i.i.us.30 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.30 - store float 0.000000e+00, float* %arrayidx.i.i.us.30, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.30 - -if.end.i.i.us.30: ; preds = %if.then.i.i.us.30, %if.end.i.i.us.29 - br i1 %cmp4.i.i.us.31, label %if.then.i.i.us.31, label %pregion_for_end.i.i - -if.then.i.i.us.31: ; preds = %if.end.i.i.us.30 - %add.i.i.us.31 = add nsw i32 %mul.i.i, %conv.i.i.us.31 - %idxprom.i.i.us.31 = sext i32 %add.i.i.us.31 to i64 - %arrayidx.i.i.us.31 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.31 - store float 0.000000e+00, float* %arrayidx.i.i.us.31, align 4, !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i - -if.then.i.i.us.us.7.1: ; preds = %if.end.i.i.us.us.7 - %add.i.i.us.us.7.1 = add nsw i32 %mul.i.i.us.7, %conv.i.i.us.us.7.1 - %idxprom.i.i.us.us.7.1 = sext i32 %add.i.i.us.us.7.1 to i64 - %arrayidx.i.i.us.us.7.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.7.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.7.1 = shl i64 %add1.i.i.i.us.us.7.1, 32 - %150 = ashr exact i64 %sext.i.i.us.us.7.1, 32 - br label %for.body.i.i.us.us.7.1 - -for.body.i.i.us.us.7.1: ; preds = %for.body.i.i.us.us.7.1, %if.then.i.i.us.us.7.1 - %indvars.iv.next.i.i3.us.us.7.1 = phi i64 [ %indvars.iv.next.i.i.us.us.7.1, %for.body.i.i.us.us.7.1 ], [ 0, %if.then.i.i.us.us.7.1 ] - %151 = phi float [ %157, %for.body.i.i.us.us.7.1 ], [ 0.000000e+00, %if.then.i.i.us.us.7.1 ] - %152 = add nsw i64 %indvars.iv.next.i.i3.us.us.7.1, %140 - %arrayidx11.i.i.us.us.7.1 = getelementptr inbounds float, float* %10, i64 %152 - %153 = load float, float* %arrayidx11.i.i.us.us.7.1, align 4, !tbaa !12 - %mul12.i.i.us.us.7.1 = fmul float %29, %153 - %154 = mul nsw i64 %indvars.iv.next.i.i3.us.us.7.1, %30 - %155 = add nsw i64 %154, %150 - %arrayidx16.i.i.us.us.7.1 = getelementptr inbounds float, float* %13, i64 %155 - %156 = load float, float* %arrayidx16.i.i.us.us.7.1, align 4, !tbaa !12 - %157 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.7.1, float %156, float %151) #2 - store float %157, float* %arrayidx.i.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.7.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.7.1, 1 - %exitcond.not.i.i.us.us.7.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.7.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.7.1, label %if.end.i.i.us.us.7.1.loopexit, label %for.body.i.i.us.us.7.1, !llvm.loop !19 - -if.end.i.i.us.us.7.1.loopexit: ; preds = %for.body.i.i.us.us.7.1 - br label %if.end.i.i.us.us.7.1 - -if.end.i.i.us.us.7.1: ; preds = %if.end.i.i.us.us.7.1.loopexit, %if.end.i.i.us.us.7 - %158 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %158, 32 - br i1 %exitcond.7.not.1, label %_pocl_kernel_mm2_kernel1.exit.loopexit, label %pregion_for_entry.entry.i.i.us.us.7, !llvm.loop !23 - -if.then.i.i.us.us.6.1: ; preds = %if.end.i.i.us.us.6 - %add.i.i.us.us.6.1 = add nsw i32 %mul.i.i.us.6, %conv.i.i.us.us.6.1 - %idxprom.i.i.us.us.6.1 = sext i32 %add.i.i.us.us.6.1 to i64 - %arrayidx.i.i.us.us.6.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.6.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.6.1 = shl i64 %add1.i.i.i.us.us.6.1, 32 - %159 = ashr exact i64 %sext.i.i.us.us.6.1, 32 - br label %for.body.i.i.us.us.6.1 - -for.body.i.i.us.us.6.1: ; preds = %for.body.i.i.us.us.6.1, %if.then.i.i.us.us.6.1 - %indvars.iv.next.i.i3.us.us.6.1 = phi i64 [ %indvars.iv.next.i.i.us.us.6.1, %for.body.i.i.us.us.6.1 ], [ 0, %if.then.i.i.us.us.6.1 ] - %160 = phi float [ %166, %for.body.i.i.us.us.6.1 ], [ 0.000000e+00, %if.then.i.i.us.us.6.1 ] - %161 = add nsw i64 %indvars.iv.next.i.i3.us.us.6.1, %129 - %arrayidx11.i.i.us.us.6.1 = getelementptr inbounds float, float* %10, i64 %161 - %162 = load float, float* %arrayidx11.i.i.us.us.6.1, align 4, !tbaa !12 - %mul12.i.i.us.us.6.1 = fmul float %29, %162 - %163 = mul nsw i64 %indvars.iv.next.i.i3.us.us.6.1, %30 - %164 = add nsw i64 %163, %159 - %arrayidx16.i.i.us.us.6.1 = getelementptr inbounds float, float* %13, i64 %164 - %165 = load float, float* %arrayidx16.i.i.us.us.6.1, align 4, !tbaa !12 - %166 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.6.1, float %165, float %160) #2 - store float %166, float* %arrayidx.i.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.6.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.6.1, 1 - %exitcond.not.i.i.us.us.6.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.6.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.6.1, label %if.end.i.i.us.us.6.1.loopexit, label %for.body.i.i.us.us.6.1, !llvm.loop !19 - -if.end.i.i.us.us.6.1.loopexit: ; preds = %for.body.i.i.us.us.6.1 - br label %if.end.i.i.us.us.6.1 - -if.end.i.i.us.us.6.1: ; preds = %if.end.i.i.us.us.6.1.loopexit, %if.end.i.i.us.us.6 - %167 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %167, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.i.us.6.loopexit, label %pregion_for_entry.entry.i.i.us.us.6, !llvm.loop !23 - -if.then.i.i.us.us.5.1: ; preds = %if.end.i.i.us.us.5 - %add.i.i.us.us.5.1 = add nsw i32 %mul.i.i.us.5, %conv.i.i.us.us.5.1 - %idxprom.i.i.us.us.5.1 = sext i32 %add.i.i.us.us.5.1 to i64 - %arrayidx.i.i.us.us.5.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.5.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.5.1 = shl i64 %add1.i.i.i.us.us.5.1, 32 - %168 = ashr exact i64 %sext.i.i.us.us.5.1, 32 - br label %for.body.i.i.us.us.5.1 - -for.body.i.i.us.us.5.1: ; preds = %for.body.i.i.us.us.5.1, %if.then.i.i.us.us.5.1 - %indvars.iv.next.i.i3.us.us.5.1 = phi i64 [ %indvars.iv.next.i.i.us.us.5.1, %for.body.i.i.us.us.5.1 ], [ 0, %if.then.i.i.us.us.5.1 ] - %169 = phi float [ %175, %for.body.i.i.us.us.5.1 ], [ 0.000000e+00, %if.then.i.i.us.us.5.1 ] - %170 = add nsw i64 %indvars.iv.next.i.i3.us.us.5.1, %118 - %arrayidx11.i.i.us.us.5.1 = getelementptr inbounds float, float* %10, i64 %170 - %171 = load float, float* %arrayidx11.i.i.us.us.5.1, align 4, !tbaa !12 - %mul12.i.i.us.us.5.1 = fmul float %29, %171 - %172 = mul nsw i64 %indvars.iv.next.i.i3.us.us.5.1, %30 - %173 = add nsw i64 %172, %168 - %arrayidx16.i.i.us.us.5.1 = getelementptr inbounds float, float* %13, i64 %173 - %174 = load float, float* %arrayidx16.i.i.us.us.5.1, align 4, !tbaa !12 - %175 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.5.1, float %174, float %169) #2 - store float %175, float* %arrayidx.i.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.5.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.5.1, 1 - %exitcond.not.i.i.us.us.5.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.5.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.5.1, label %if.end.i.i.us.us.5.1.loopexit, label %for.body.i.i.us.us.5.1, !llvm.loop !19 - -if.end.i.i.us.us.5.1.loopexit: ; preds = %for.body.i.i.us.us.5.1 - br label %if.end.i.i.us.us.5.1 - -if.end.i.i.us.us.5.1: ; preds = %if.end.i.i.us.us.5.1.loopexit, %if.end.i.i.us.us.5 - %176 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %176, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.i.us.5.loopexit, label %pregion_for_entry.entry.i.i.us.us.5, !llvm.loop !23 - -if.then.i.i.us.us.4.1: ; preds = %if.end.i.i.us.us.4 - %add.i.i.us.us.4.1 = add nsw i32 %mul.i.i.us.4, %conv.i.i.us.us.4.1 - %idxprom.i.i.us.us.4.1 = sext i32 %add.i.i.us.us.4.1 to i64 - %arrayidx.i.i.us.us.4.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.4.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.4.1 = shl i64 %add1.i.i.i.us.us.4.1, 32 - %177 = ashr exact i64 %sext.i.i.us.us.4.1, 32 - br label %for.body.i.i.us.us.4.1 - -for.body.i.i.us.us.4.1: ; preds = %for.body.i.i.us.us.4.1, %if.then.i.i.us.us.4.1 - %indvars.iv.next.i.i3.us.us.4.1 = phi i64 [ %indvars.iv.next.i.i.us.us.4.1, %for.body.i.i.us.us.4.1 ], [ 0, %if.then.i.i.us.us.4.1 ] - %178 = phi float [ %184, %for.body.i.i.us.us.4.1 ], [ 0.000000e+00, %if.then.i.i.us.us.4.1 ] - %179 = add nsw i64 %indvars.iv.next.i.i3.us.us.4.1, %107 - %arrayidx11.i.i.us.us.4.1 = getelementptr inbounds float, float* %10, i64 %179 - %180 = load float, float* %arrayidx11.i.i.us.us.4.1, align 4, !tbaa !12 - %mul12.i.i.us.us.4.1 = fmul float %29, %180 - %181 = mul nsw i64 %indvars.iv.next.i.i3.us.us.4.1, %30 - %182 = add nsw i64 %181, %177 - %arrayidx16.i.i.us.us.4.1 = getelementptr inbounds float, float* %13, i64 %182 - %183 = load float, float* %arrayidx16.i.i.us.us.4.1, align 4, !tbaa !12 - %184 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.4.1, float %183, float %178) #2 - store float %184, float* %arrayidx.i.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.4.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.4.1, 1 - %exitcond.not.i.i.us.us.4.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.4.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.4.1, label %if.end.i.i.us.us.4.1.loopexit, label %for.body.i.i.us.us.4.1, !llvm.loop !19 - -if.end.i.i.us.us.4.1.loopexit: ; preds = %for.body.i.i.us.us.4.1 - br label %if.end.i.i.us.us.4.1 - -if.end.i.i.us.us.4.1: ; preds = %if.end.i.i.us.us.4.1.loopexit, %if.end.i.i.us.us.4 - %185 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %185, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.i.us.4.loopexit, label %pregion_for_entry.entry.i.i.us.us.4, !llvm.loop !23 - -if.then.i.i.us.us.3.1: ; preds = %if.end.i.i.us.us.3 - %add.i.i.us.us.3.1 = add nsw i32 %mul.i.i.us.3, %conv.i.i.us.us.3.1 - %idxprom.i.i.us.us.3.1 = sext i32 %add.i.i.us.us.3.1 to i64 - %arrayidx.i.i.us.us.3.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.3.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.3.1 = shl i64 %add1.i.i.i.us.us.3.1, 32 - %186 = ashr exact i64 %sext.i.i.us.us.3.1, 32 - br label %for.body.i.i.us.us.3.1 - -for.body.i.i.us.us.3.1: ; preds = %for.body.i.i.us.us.3.1, %if.then.i.i.us.us.3.1 - %indvars.iv.next.i.i3.us.us.3.1 = phi i64 [ %indvars.iv.next.i.i.us.us.3.1, %for.body.i.i.us.us.3.1 ], [ 0, %if.then.i.i.us.us.3.1 ] - %187 = phi float [ %193, %for.body.i.i.us.us.3.1 ], [ 0.000000e+00, %if.then.i.i.us.us.3.1 ] - %188 = add nsw i64 %indvars.iv.next.i.i3.us.us.3.1, %96 - %arrayidx11.i.i.us.us.3.1 = getelementptr inbounds float, float* %10, i64 %188 - %189 = load float, float* %arrayidx11.i.i.us.us.3.1, align 4, !tbaa !12 - %mul12.i.i.us.us.3.1 = fmul float %29, %189 - %190 = mul nsw i64 %indvars.iv.next.i.i3.us.us.3.1, %30 - %191 = add nsw i64 %190, %186 - %arrayidx16.i.i.us.us.3.1 = getelementptr inbounds float, float* %13, i64 %191 - %192 = load float, float* %arrayidx16.i.i.us.us.3.1, align 4, !tbaa !12 - %193 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.3.1, float %192, float %187) #2 - store float %193, float* %arrayidx.i.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.3.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.3.1, 1 - %exitcond.not.i.i.us.us.3.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.3.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.3.1, label %if.end.i.i.us.us.3.1.loopexit, label %for.body.i.i.us.us.3.1, !llvm.loop !19 - -if.end.i.i.us.us.3.1.loopexit: ; preds = %for.body.i.i.us.us.3.1 - br label %if.end.i.i.us.us.3.1 - -if.end.i.i.us.us.3.1: ; preds = %if.end.i.i.us.us.3.1.loopexit, %if.end.i.i.us.us.3 - %194 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %194, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.i.us.3.loopexit, label %pregion_for_entry.entry.i.i.us.us.3, !llvm.loop !23 - -if.then.i.i.us.us.2.1: ; preds = %if.end.i.i.us.us.2 - %add.i.i.us.us.2.1 = add nsw i32 %mul.i.i.us.2, %conv.i.i.us.us.2.1 - %idxprom.i.i.us.us.2.1 = sext i32 %add.i.i.us.us.2.1 to i64 - %arrayidx.i.i.us.us.2.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.2.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.2.1 = shl i64 %add1.i.i.i.us.us.2.1, 32 - %195 = ashr exact i64 %sext.i.i.us.us.2.1, 32 - br label %for.body.i.i.us.us.2.1 - -for.body.i.i.us.us.2.1: ; preds = %for.body.i.i.us.us.2.1, %if.then.i.i.us.us.2.1 - %indvars.iv.next.i.i3.us.us.2.1 = phi i64 [ %indvars.iv.next.i.i.us.us.2.1, %for.body.i.i.us.us.2.1 ], [ 0, %if.then.i.i.us.us.2.1 ] - %196 = phi float [ %202, %for.body.i.i.us.us.2.1 ], [ 0.000000e+00, %if.then.i.i.us.us.2.1 ] - %197 = add nsw i64 %indvars.iv.next.i.i3.us.us.2.1, %85 - %arrayidx11.i.i.us.us.2.1 = getelementptr inbounds float, float* %10, i64 %197 - %198 = load float, float* %arrayidx11.i.i.us.us.2.1, align 4, !tbaa !12 - %mul12.i.i.us.us.2.1 = fmul float %29, %198 - %199 = mul nsw i64 %indvars.iv.next.i.i3.us.us.2.1, %30 - %200 = add nsw i64 %199, %195 - %arrayidx16.i.i.us.us.2.1 = getelementptr inbounds float, float* %13, i64 %200 - %201 = load float, float* %arrayidx16.i.i.us.us.2.1, align 4, !tbaa !12 - %202 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.2.1, float %201, float %196) #2 - store float %202, float* %arrayidx.i.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.2.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.2.1, 1 - %exitcond.not.i.i.us.us.2.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.2.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.2.1, label %if.end.i.i.us.us.2.1.loopexit, label %for.body.i.i.us.us.2.1, !llvm.loop !19 - -if.end.i.i.us.us.2.1.loopexit: ; preds = %for.body.i.i.us.us.2.1 - br label %if.end.i.i.us.us.2.1 - -if.end.i.i.us.us.2.1: ; preds = %if.end.i.i.us.us.2.1.loopexit, %if.end.i.i.us.us.2 - %203 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %203, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.i.us.2.loopexit, label %pregion_for_entry.entry.i.i.us.us.2, !llvm.loop !23 - -if.then.i.i.us.us.1.1: ; preds = %if.end.i.i.us.us.1 - %add.i.i.us.us.1.1 = add nsw i32 %mul.i.i.us.1, %conv.i.i.us.us.1.1 - %idxprom.i.i.us.us.1.1 = sext i32 %add.i.i.us.us.1.1 to i64 - %arrayidx.i.i.us.us.1.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.1.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.1.1 = shl i64 %add1.i.i.i.us.us.1.1, 32 - %204 = ashr exact i64 %sext.i.i.us.us.1.1, 32 - br label %for.body.i.i.us.us.1.1 - -for.body.i.i.us.us.1.1: ; preds = %for.body.i.i.us.us.1.1, %if.then.i.i.us.us.1.1 - %indvars.iv.next.i.i3.us.us.1.1 = phi i64 [ %indvars.iv.next.i.i.us.us.1.1, %for.body.i.i.us.us.1.1 ], [ 0, %if.then.i.i.us.us.1.1 ] - %205 = phi float [ %211, %for.body.i.i.us.us.1.1 ], [ 0.000000e+00, %if.then.i.i.us.us.1.1 ] - %206 = add nsw i64 %indvars.iv.next.i.i3.us.us.1.1, %64 - %arrayidx11.i.i.us.us.1.1 = getelementptr inbounds float, float* %10, i64 %206 - %207 = load float, float* %arrayidx11.i.i.us.us.1.1, align 4, !tbaa !12 - %mul12.i.i.us.us.1.1 = fmul float %29, %207 - %208 = mul nsw i64 %indvars.iv.next.i.i3.us.us.1.1, %30 - %209 = add nsw i64 %208, %204 - %arrayidx16.i.i.us.us.1.1 = getelementptr inbounds float, float* %13, i64 %209 - %210 = load float, float* %arrayidx16.i.i.us.us.1.1, align 4, !tbaa !12 - %211 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.1.1, float %210, float %205) #2 - store float %211, float* %arrayidx.i.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.1.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.1.1, 1 - %exitcond.not.i.i.us.us.1.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.1.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.1.1, label %if.end.i.i.us.us.1.1.loopexit, label %for.body.i.i.us.us.1.1, !llvm.loop !19 - -if.end.i.i.us.us.1.1.loopexit: ; preds = %for.body.i.i.us.us.1.1 - br label %if.end.i.i.us.us.1.1 - -if.end.i.i.us.us.1.1: ; preds = %if.end.i.i.us.us.1.1.loopexit, %if.end.i.i.us.us.1 - %212 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %212, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.i.us.1.loopexit, label %pregion_for_entry.entry.i.i.us.us.1, !llvm.loop !23 - -if.then.i.i.us.us.147: ; preds = %if.end.i.i.us.us - %add.i.i.us.us.143 = add nsw i32 %mul.i.i.us, %conv.i.i.us.us.140 - %idxprom.i.i.us.us.144 = sext i32 %add.i.i.us.us.143 to i64 - %arrayidx.i.i.us.us.145 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.144 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.145, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.146 = shl i64 %add1.i.i.i.us.us.139, 32 - %213 = ashr exact i64 %sext.i.i.us.us.146, 32 - br label %for.body.i.i.us.us.154 - -for.body.i.i.us.us.154: ; preds = %for.body.i.i.us.us.154, %if.then.i.i.us.us.147 - %indvars.iv.next.i.i3.us.us.148 = phi i64 [ %indvars.iv.next.i.i.us.us.152, %for.body.i.i.us.us.154 ], [ 0, %if.then.i.i.us.us.147 ] - %214 = phi float [ %220, %for.body.i.i.us.us.154 ], [ 0.000000e+00, %if.then.i.i.us.us.147 ] - %215 = add nsw i64 %indvars.iv.next.i.i3.us.us.148, %62 - %arrayidx11.i.i.us.us.149 = getelementptr inbounds float, float* %10, i64 %215 - %216 = load float, float* %arrayidx11.i.i.us.us.149, align 4, !tbaa !12 - %mul12.i.i.us.us.150 = fmul float %29, %216 - %217 = mul nsw i64 %indvars.iv.next.i.i3.us.us.148, %30 - %218 = add nsw i64 %217, %213 - %arrayidx16.i.i.us.us.151 = getelementptr inbounds float, float* %13, i64 %218 - %219 = load float, float* %arrayidx16.i.i.us.us.151, align 4, !tbaa !12 - %220 = tail call float @llvm.fmuladd.f32(float %mul12.i.i.us.us.150, float %219, float %214) #2 - store float %220, float* %arrayidx.i.i.us.us.145, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.152 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.148, 1 - %exitcond.not.i.i.us.us.153 = icmp eq i64 %indvars.iv.next.i.i.us.us.152, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.153, label %if.end.i.i.us.us.155.loopexit, label %for.body.i.i.us.us.154, !llvm.loop !19 - -if.end.i.i.us.us.155.loopexit: ; preds = %for.body.i.i.us.us.154 - br label %if.end.i.i.us.us.155 - -if.end.i.i.us.us.155: ; preds = %if.end.i.i.us.us.155.loopexit, %if.end.i.i.us.us - %221 = add nuw nsw i64 %_local_id_x.i.0.us.us, 2 - %exitcond.not.1 = icmp eq i64 %221, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.us.loopexit, label %pregion_for_entry.entry.i.i.us.us, !llvm.loop !23 -} - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"int", !"int", !"int", !"int", !"DATA_TYPE", !"DATA_TYPE"} -!8 = !{!"float*", !"float*", !"float*", !"int", !"int", !"int", !"int", !"float", !"float"} -!9 = !{!"", !"", !"", !"", !"", !"", !"", !"", !""} -!10 = !{!"tmp", !"A", !"B", !"ni", !"nj", !"nk", !"nl", !"alpha", !"beta"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17, !18} -!17 = distinct !{} -!18 = distinct !{} -!19 = distinct !{!19, !20} -!20 = !{!"llvm.loop.unroll.disable"} -!21 = distinct !{!21, !22} -!22 = !{!"llvm.loop.parallel_accesses", !18} -!23 = distinct !{!23, !24} -!24 = !{!"llvm.loop.parallel_accesses", !17} diff --git a/pocl_irs/2mm_kernel2.ll b/pocl_irs/2mm_kernel2.ll deleted file mode 100644 index 90fe668..0000000 --- a/pocl_irs/2mm_kernel2.ll +++ /dev/null @@ -1,5050 +0,0 @@ -; ModuleID = './AE/OAMONJBAJKCKPFEIFGGGJLBIMMFAJGMJFHHDO/mm2_kernel2/32-8-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_mm2_kernel2(float* nocapture readonly %0, float* nocapture readonly %1, float* nocapture %2, i32 %3, i32 %4, i32 %5, i32 %6, float %7, float %8, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %9, i64 %10, i64 %11, i64 %12) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %10, 5 - %mul3.i.i = shl i64 %11, 3 - %cmp739.i = icmp sgt i32 %4, 0 - %14 = sext i32 %6 to i64 - %wide.trip.count.i = zext i32 %4 to i64 - %conv2.i.us = trunc i64 %mul3.i.i to i32 - %cmp.i.us = icmp slt i32 %conv2.i.us, %3 - %mul.i.us = mul nsw i32 %conv2.i.us, %6 - br i1 %cmp739.i, label %pregion_for_entry.pregion_for_init.i.us, label %pregion_for_entry.pregion_for_init.i.preheader - -pregion_for_entry.pregion_for_init.i.preheader: ; preds = %13 - br i1 %cmp.i.us, label %vector.scevcheck, label %pregion_for_end.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.preheader - %15 = trunc i64 %11 to i32 - %16 = mul i32 %15, %6 - %17 = shl i32 %16, 3 - %18 = trunc i64 %10 to i32 - %19 = shl i32 %18, 5 - %20 = add i32 %17, %19 - %21 = icmp sgt i32 %20, 2147483616 - br i1 %21, label %pregion_for_entry.entry.i.us.preheader, label %vector.ph - -pregion_for_entry.entry.i.us.preheader: ; preds = %vector.scevcheck - br label %pregion_for_entry.entry.i.us - -vector.ph: ; preds = %vector.scevcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert37 = insertelement <8 x i32> undef, i32 %6, i32 0 - %broadcast.splat38 = shufflevector <8 x i32> %broadcast.splatinsert37, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert39 = insertelement <8 x float> undef, float %8, i32 0 - %broadcast.splat40 = shufflevector <8 x float> %broadcast.splatinsert39, <8 x float> undef, <8 x i32> zeroinitializer - %22 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %23 = or <8 x i32> %22, - %24 = icmp sgt <8 x i32> %broadcast.splat38, %23 - %25 = extractelement <8 x i32> %23, i32 0 - %26 = add nsw i32 %mul.i.us, %25 - %27 = sext i32 %26 to i64 - %28 = getelementptr inbounds float, float* %2, i64 %27 - %29 = bitcast float* %28 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %29, i32 4, <8 x i1> %24, <8 x float> undef), !tbaa !12 - %30 = fmul <8 x float> %wide.masked.load, %broadcast.splat40 - %31 = bitcast float* %28 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %30, <8 x float>* %31, i32 4, <8 x i1> %24), !tbaa !12, !llvm.access.group !16 - %32 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %33 = or <8 x i32> %32, - %34 = icmp sgt <8 x i32> %broadcast.splat38, %33 - %35 = extractelement <8 x i32> %33, i32 0 - %36 = add nsw i32 %mul.i.us, %35 - %37 = sext i32 %36 to i64 - %38 = getelementptr inbounds float, float* %2, i64 %37 - %39 = bitcast float* %38 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %39, i32 4, <8 x i1> %34, <8 x float> undef), !tbaa !12 - %40 = fmul <8 x float> %wide.masked.load.1, %broadcast.splat40 - %41 = bitcast float* %38 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %40, <8 x float>* %41, i32 4, <8 x i1> %34), !tbaa !12, !llvm.access.group !16 - %42 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %43 = or <8 x i32> %42, - %44 = icmp sgt <8 x i32> %broadcast.splat38, %43 - %45 = extractelement <8 x i32> %43, i32 0 - %46 = add nsw i32 %mul.i.us, %45 - %47 = sext i32 %46 to i64 - %48 = getelementptr inbounds float, float* %2, i64 %47 - %49 = bitcast float* %48 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %49, i32 4, <8 x i1> %44, <8 x float> undef), !tbaa !12 - %50 = fmul <8 x float> %wide.masked.load.2, %broadcast.splat40 - %51 = bitcast float* %48 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %50, <8 x float>* %51, i32 4, <8 x i1> %44), !tbaa !12, !llvm.access.group !16 - %52 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %53 = or <8 x i32> %52, - %54 = icmp sgt <8 x i32> %broadcast.splat38, %53 - %55 = extractelement <8 x i32> %53, i32 0 - %56 = add nsw i32 %mul.i.us, %55 - %57 = sext i32 %56 to i64 - %58 = getelementptr inbounds float, float* %2, i64 %57 - %59 = bitcast float* %58 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %59, i32 4, <8 x i1> %54, <8 x float> undef), !tbaa !12 - %60 = fmul <8 x float> %wide.masked.load.3, %broadcast.splat40 - %61 = bitcast float* %58 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %60, <8 x float>* %61, i32 4, <8 x i1> %54), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i - -pregion_for_entry.pregion_for_init.i.us: ; preds = %13 - %mul9.i.us = mul nsw i32 %conv2.i.us, %4 - %62 = sext i32 %mul9.i.us to i64 - br i1 %cmp.i.us, label %pregion_for_entry.entry.i.us.us.preheader, label %pregion_for_end.i.us - -pregion_for_entry.entry.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.us - br label %pregion_for_entry.entry.i.us.us - -pregion_for_end.i.us.loopexit: ; preds = %if.end.i.us.us - br label %pregion_for_end.i.us - -pregion_for_end.i.us: ; preds = %pregion_for_end.i.us.loopexit, %pregion_for_entry.pregion_for_init.i.us - %63 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.1 = or i32 %63, 1 - %cmp.i.us.1 = icmp slt i32 %conv2.i.us.1, %3 - %mul.i.us.1 = mul nsw i32 %conv2.i.us.1, %6 - %mul9.i.us.1 = mul nsw i32 %conv2.i.us.1, %4 - %64 = sext i32 %mul9.i.us.1 to i64 - br i1 %cmp.i.us.1, label %pregion_for_entry.entry.i.us.us.1.preheader, label %pregion_for_end.i.us.1 - -pregion_for_entry.entry.i.us.us.1.preheader: ; preds = %pregion_for_end.i.us - br label %pregion_for_entry.entry.i.us.us.1 - -pregion_for_entry.entry.i.us.us: ; preds = %if.end.i.us.us, %pregion_for_entry.entry.i.us.us.preheader - %_local_id_x.0.us.us = phi i64 [ %67, %if.end.i.us.us ], [ 0, %pregion_for_entry.entry.i.us.us.preheader ] - %add1.i.i.us.us = add nuw nsw i64 %_local_id_x.0.us.us, %mul.i.i - %conv.i.us.us = trunc i64 %add1.i.i.us.us to i32 - %cmp4.i.us.us = icmp slt i32 %conv.i.us.us, %6 - br i1 %cmp4.i.us.us, label %if.then.i.us.us, label %if.end.i.us.us - -if.then.i.us.us: ; preds = %pregion_for_entry.entry.i.us.us - %add.i.us.us = add nsw i32 %mul.i.us, %conv.i.us.us - %idxprom.i.us.us = sext i32 %add.i.us.us to i64 - %arrayidx.i.us.us = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us - %65 = load float, float* %arrayidx.i.us.us, align 4, !tbaa !12 - %mul6.i.us.us = fmul float %65, %8 - store float %mul6.i.us.us, float* %arrayidx.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us = shl i64 %add1.i.i.us.us, 32 - %66 = ashr exact i64 %sext.i.us.us, 32 - br label %for.body.i.us.us - -if.end.i.us.us.loopexit: ; preds = %for.body.i.us.us - br label %if.end.i.us.us - -if.end.i.us.us: ; preds = %if.end.i.us.us.loopexit, %pregion_for_entry.entry.i.us.us - %67 = add nuw nsw i64 %_local_id_x.0.us.us, 1 - %exitcond.not = icmp eq i64 %67, 32 - br i1 %exitcond.not, label %pregion_for_end.i.us.loopexit, label %pregion_for_entry.entry.i.us.us, !llvm.loop !19 - -for.body.i.us.us: ; preds = %for.body.i.us.us, %if.then.i.us.us - %indvars.iv.next.i3.us.us = phi i64 [ %indvars.iv.next.i.us.us, %for.body.i.us.us ], [ 0, %if.then.i.us.us ] - %68 = phi float [ %74, %for.body.i.us.us ], [ %mul6.i.us.us, %if.then.i.us.us ] - %69 = add nsw i64 %indvars.iv.next.i3.us.us, %62 - %arrayidx12.i.us.us = getelementptr inbounds float, float* %0, i64 %69 - %70 = load float, float* %arrayidx12.i.us.us, align 4, !tbaa !12 - %71 = mul nsw i64 %indvars.iv.next.i3.us.us, %14 - %72 = add nsw i64 %71, %66 - %arrayidx16.i.us.us = getelementptr inbounds float, float* %1, i64 %72 - %73 = load float, float* %arrayidx16.i.us.us, align 4, !tbaa !12 - %74 = tail call float @llvm.fmuladd.f32(float %70, float %73, float %68) #2 - store float %74, float* %arrayidx.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us = add nuw nsw i64 %indvars.iv.next.i3.us.us, 1 - %exitcond.not.i.us.us = icmp eq i64 %indvars.iv.next.i.us.us, %wide.trip.count.i - br i1 %exitcond.not.i.us.us, label %if.end.i.us.us.loopexit, label %for.body.i.us.us, !llvm.loop !21 - -pregion_for_entry.entry.i.us: ; preds = %if.end.i.us.3236, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ %542, %if.end.i.us.3236 ], [ 0, %pregion_for_entry.entry.i.us.preheader ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp4.i.us = icmp slt i32 %conv.i.us, %6 - br i1 %cmp4.i.us, label %if.then.i.us, label %if.end.i.us - -if.then.i.us: ; preds = %pregion_for_entry.entry.i.us - %add.i.us = add nsw i32 %mul.i.us, %conv.i.us - %idxprom.i.us = sext i32 %add.i.us to i64 - %arrayidx.i.us = getelementptr inbounds float, float* %2, i64 %idxprom.i.us - %75 = load float, float* %arrayidx.i.us, align 4, !tbaa !12 - %mul6.i.us = fmul float %75, %8 - store float %mul6.i.us, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us - -if.end.i.us: ; preds = %if.then.i.us, %pregion_for_entry.entry.i.us - %76 = or i64 %_local_id_x.0.us, 1 - %add1.i.i.us.1205 = add nuw nsw i64 %76, %mul.i.i - %conv.i.us.1206 = trunc i64 %add1.i.i.us.1205 to i32 - %cmp4.i.us.1207 = icmp slt i32 %conv.i.us.1206, %6 - br i1 %cmp4.i.us.1207, label %if.then.i.us.1213, label %if.end.i.us.1214 - -pregion_for_end.i.loopexit: ; preds = %if.end.i.us.3236 - br label %pregion_for_end.i - -pregion_for_end.i: ; preds = %pregion_for_end.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i.preheader - %77 = trunc i64 %mul3.i.i to i32 - %conv2.i.1 = or i32 %77, 1 - %cmp.i.1 = icmp slt i32 %conv2.i.1, %3 - %mul.i.1 = mul nsw i32 %conv2.i.1, %6 - br i1 %cmp.i.1, label %vector.scevcheck48, label %pregion_for_end.i.1 - -vector.scevcheck48: ; preds = %pregion_for_end.i - %78 = mul i32 %conv2.i.1, %6 - %79 = trunc i64 %10 to i32 - %80 = shl i32 %79, 5 - %81 = add i32 %78, %80 - %82 = icmp sgt i32 %81, 2147483616 - br i1 %82, label %pregion_for_entry.entry.i.us.1.preheader, label %vector.ph49 - -pregion_for_entry.entry.i.us.1.preheader: ; preds = %vector.scevcheck48 - br label %pregion_for_entry.entry.i.us.1 - -vector.ph49: ; preds = %vector.scevcheck48 - %broadcast.splatinsert56 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat57 = shufflevector <8 x i64> %broadcast.splatinsert56, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert58 = insertelement <8 x i32> undef, i32 %6, i32 0 - %broadcast.splat59 = shufflevector <8 x i32> %broadcast.splatinsert58, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert61 = insertelement <8 x float> undef, float %8, i32 0 - %broadcast.splat62 = shufflevector <8 x float> %broadcast.splatinsert61, <8 x float> undef, <8 x i32> zeroinitializer - %83 = trunc <8 x i64> %broadcast.splat57 to <8 x i32> - %84 = or <8 x i32> %83, - %85 = icmp sgt <8 x i32> %broadcast.splat59, %84 - %86 = extractelement <8 x i32> %84, i32 0 - %87 = add nsw i32 %mul.i.1, %86 - %88 = sext i32 %87 to i64 - %89 = getelementptr inbounds float, float* %2, i64 %88 - %90 = bitcast float* %89 to <8 x float>* - %wide.masked.load60 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %90, i32 4, <8 x i1> %85, <8 x float> undef), !tbaa !12 - %91 = fmul <8 x float> %wide.masked.load60, %broadcast.splat62 - %92 = bitcast float* %89 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %91, <8 x float>* %92, i32 4, <8 x i1> %85), !tbaa !12, !llvm.access.group !16 - %93 = trunc <8 x i64> %broadcast.splat57 to <8 x i32> - %94 = or <8 x i32> %93, - %95 = icmp sgt <8 x i32> %broadcast.splat59, %94 - %96 = extractelement <8 x i32> %94, i32 0 - %97 = add nsw i32 %mul.i.1, %96 - %98 = sext i32 %97 to i64 - %99 = getelementptr inbounds float, float* %2, i64 %98 - %100 = bitcast float* %99 to <8 x float>* - %wide.masked.load60.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %100, i32 4, <8 x i1> %95, <8 x float> undef), !tbaa !12 - %101 = fmul <8 x float> %wide.masked.load60.1, %broadcast.splat62 - %102 = bitcast float* %99 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %101, <8 x float>* %102, i32 4, <8 x i1> %95), !tbaa !12, !llvm.access.group !16 - %103 = trunc <8 x i64> %broadcast.splat57 to <8 x i32> - %104 = or <8 x i32> %103, - %105 = icmp sgt <8 x i32> %broadcast.splat59, %104 - %106 = extractelement <8 x i32> %104, i32 0 - %107 = add nsw i32 %mul.i.1, %106 - %108 = sext i32 %107 to i64 - %109 = getelementptr inbounds float, float* %2, i64 %108 - %110 = bitcast float* %109 to <8 x float>* - %wide.masked.load60.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %110, i32 4, <8 x i1> %105, <8 x float> undef), !tbaa !12 - %111 = fmul <8 x float> %wide.masked.load60.2, %broadcast.splat62 - %112 = bitcast float* %109 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %111, <8 x float>* %112, i32 4, <8 x i1> %105), !tbaa !12, !llvm.access.group !16 - %113 = trunc <8 x i64> %broadcast.splat57 to <8 x i32> - %114 = or <8 x i32> %113, - %115 = icmp sgt <8 x i32> %broadcast.splat59, %114 - %116 = extractelement <8 x i32> %114, i32 0 - %117 = add nsw i32 %mul.i.1, %116 - %118 = sext i32 %117 to i64 - %119 = getelementptr inbounds float, float* %2, i64 %118 - %120 = bitcast float* %119 to <8 x float>* - %wide.masked.load60.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %120, i32 4, <8 x i1> %115, <8 x float> undef), !tbaa !12 - %121 = fmul <8 x float> %wide.masked.load60.3, %broadcast.splat62 - %122 = bitcast float* %119 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %121, <8 x float>* %122, i32 4, <8 x i1> %115), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.1 - -mm2_kernel2.exit.loopexit: ; preds = %if.end.i.us.us.7 - br label %mm2_kernel2.exit - -mm2_kernel2.exit.loopexit237: ; preds = %if.end.i.us.7.3 - br label %mm2_kernel2.exit - -mm2_kernel2.exit: ; preds = %pregion_for_end.i.us.6, %vector.ph181, %pregion_for_end.i.6, %mm2_kernel2.exit.loopexit237, %mm2_kernel2.exit.loopexit - ret void - -pregion_for_entry.entry.i.us.1: ; preds = %if.end.i.us.1.3, %pregion_for_entry.entry.i.us.1.preheader - %_local_id_x.0.us.1 = phi i64 [ %536, %if.end.i.us.1.3 ], [ 0, %pregion_for_entry.entry.i.us.1.preheader ] - %add1.i.i.us.1 = add nuw nsw i64 %_local_id_x.0.us.1, %mul.i.i - %conv.i.us.1 = trunc i64 %add1.i.i.us.1 to i32 - %cmp4.i.us.1 = icmp slt i32 %conv.i.us.1, %6 - br i1 %cmp4.i.us.1, label %if.then.i.us.1, label %if.end.i.us.1 - -if.then.i.us.1: ; preds = %pregion_for_entry.entry.i.us.1 - %add.i.us.1 = add nsw i32 %mul.i.1, %conv.i.us.1 - %idxprom.i.us.1 = sext i32 %add.i.us.1 to i64 - %arrayidx.i.us.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.1 - %123 = load float, float* %arrayidx.i.us.1, align 4, !tbaa !12 - %mul6.i.us.1 = fmul float %123, %8 - store float %mul6.i.us.1, float* %arrayidx.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.1 - -if.end.i.us.1: ; preds = %if.then.i.us.1, %pregion_for_entry.entry.i.us.1 - %124 = or i64 %_local_id_x.0.us.1, 1 - %add1.i.i.us.1.1 = add nuw nsw i64 %124, %mul.i.i - %conv.i.us.1.1 = trunc i64 %add1.i.i.us.1.1 to i32 - %cmp4.i.us.1.1 = icmp slt i32 %conv.i.us.1.1, %6 - br i1 %cmp4.i.us.1.1, label %if.then.i.us.1.1, label %if.end.i.us.1.1 - -pregion_for_end.i.1.loopexit: ; preds = %if.end.i.us.1.3 - br label %pregion_for_end.i.1 - -pregion_for_end.i.1: ; preds = %pregion_for_end.i.1.loopexit, %vector.ph49, %pregion_for_end.i - %125 = trunc i64 %mul3.i.i to i32 - %conv2.i.2 = or i32 %125, 2 - %cmp.i.2 = icmp slt i32 %conv2.i.2, %3 - %mul.i.2 = mul nsw i32 %conv2.i.2, %6 - br i1 %cmp.i.2, label %vector.scevcheck70, label %pregion_for_end.i.2 - -vector.scevcheck70: ; preds = %pregion_for_end.i.1 - %126 = mul i32 %conv2.i.2, %6 - %127 = trunc i64 %10 to i32 - %128 = shl i32 %127, 5 - %129 = add i32 %126, %128 - %130 = icmp sgt i32 %129, 2147483616 - br i1 %130, label %pregion_for_entry.entry.i.us.2.preheader, label %vector.ph71 - -pregion_for_entry.entry.i.us.2.preheader: ; preds = %vector.scevcheck70 - br label %pregion_for_entry.entry.i.us.2 - -vector.ph71: ; preds = %vector.scevcheck70 - %broadcast.splatinsert78 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat79 = shufflevector <8 x i64> %broadcast.splatinsert78, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert80 = insertelement <8 x i32> undef, i32 %6, i32 0 - %broadcast.splat81 = shufflevector <8 x i32> %broadcast.splatinsert80, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert83 = insertelement <8 x float> undef, float %8, i32 0 - %broadcast.splat84 = shufflevector <8 x float> %broadcast.splatinsert83, <8 x float> undef, <8 x i32> zeroinitializer - %131 = trunc <8 x i64> %broadcast.splat79 to <8 x i32> - %132 = or <8 x i32> %131, - %133 = icmp sgt <8 x i32> %broadcast.splat81, %132 - %134 = extractelement <8 x i32> %132, i32 0 - %135 = add nsw i32 %mul.i.2, %134 - %136 = sext i32 %135 to i64 - %137 = getelementptr inbounds float, float* %2, i64 %136 - %138 = bitcast float* %137 to <8 x float>* - %wide.masked.load82 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %138, i32 4, <8 x i1> %133, <8 x float> undef), !tbaa !12 - %139 = fmul <8 x float> %wide.masked.load82, %broadcast.splat84 - %140 = bitcast float* %137 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %139, <8 x float>* %140, i32 4, <8 x i1> %133), !tbaa !12, !llvm.access.group !16 - %141 = trunc <8 x i64> %broadcast.splat79 to <8 x i32> - %142 = or <8 x i32> %141, - %143 = icmp sgt <8 x i32> %broadcast.splat81, %142 - %144 = extractelement <8 x i32> %142, i32 0 - %145 = add nsw i32 %mul.i.2, %144 - %146 = sext i32 %145 to i64 - %147 = getelementptr inbounds float, float* %2, i64 %146 - %148 = bitcast float* %147 to <8 x float>* - %wide.masked.load82.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %148, i32 4, <8 x i1> %143, <8 x float> undef), !tbaa !12 - %149 = fmul <8 x float> %wide.masked.load82.1, %broadcast.splat84 - %150 = bitcast float* %147 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %149, <8 x float>* %150, i32 4, <8 x i1> %143), !tbaa !12, !llvm.access.group !16 - %151 = trunc <8 x i64> %broadcast.splat79 to <8 x i32> - %152 = or <8 x i32> %151, - %153 = icmp sgt <8 x i32> %broadcast.splat81, %152 - %154 = extractelement <8 x i32> %152, i32 0 - %155 = add nsw i32 %mul.i.2, %154 - %156 = sext i32 %155 to i64 - %157 = getelementptr inbounds float, float* %2, i64 %156 - %158 = bitcast float* %157 to <8 x float>* - %wide.masked.load82.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %158, i32 4, <8 x i1> %153, <8 x float> undef), !tbaa !12 - %159 = fmul <8 x float> %wide.masked.load82.2, %broadcast.splat84 - %160 = bitcast float* %157 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %159, <8 x float>* %160, i32 4, <8 x i1> %153), !tbaa !12, !llvm.access.group !16 - %161 = trunc <8 x i64> %broadcast.splat79 to <8 x i32> - %162 = or <8 x i32> %161, - %163 = icmp sgt <8 x i32> %broadcast.splat81, %162 - %164 = extractelement <8 x i32> %162, i32 0 - %165 = add nsw i32 %mul.i.2, %164 - %166 = sext i32 %165 to i64 - %167 = getelementptr inbounds float, float* %2, i64 %166 - %168 = bitcast float* %167 to <8 x float>* - %wide.masked.load82.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %168, i32 4, <8 x i1> %163, <8 x float> undef), !tbaa !12 - %169 = fmul <8 x float> %wide.masked.load82.3, %broadcast.splat84 - %170 = bitcast float* %167 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %169, <8 x float>* %170, i32 4, <8 x i1> %163), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.2 - -pregion_for_entry.entry.i.us.2: ; preds = %if.end.i.us.2.3, %pregion_for_entry.entry.i.us.2.preheader - %_local_id_x.0.us.2 = phi i64 [ %530, %if.end.i.us.2.3 ], [ 0, %pregion_for_entry.entry.i.us.2.preheader ] - %add1.i.i.us.2 = add nuw nsw i64 %_local_id_x.0.us.2, %mul.i.i - %conv.i.us.2 = trunc i64 %add1.i.i.us.2 to i32 - %cmp4.i.us.2 = icmp slt i32 %conv.i.us.2, %6 - br i1 %cmp4.i.us.2, label %if.then.i.us.2, label %if.end.i.us.2 - -if.then.i.us.2: ; preds = %pregion_for_entry.entry.i.us.2 - %add.i.us.2 = add nsw i32 %mul.i.2, %conv.i.us.2 - %idxprom.i.us.2 = sext i32 %add.i.us.2 to i64 - %arrayidx.i.us.2 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.2 - %171 = load float, float* %arrayidx.i.us.2, align 4, !tbaa !12 - %mul6.i.us.2 = fmul float %171, %8 - store float %mul6.i.us.2, float* %arrayidx.i.us.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.2 - -if.end.i.us.2: ; preds = %if.then.i.us.2, %pregion_for_entry.entry.i.us.2 - %172 = or i64 %_local_id_x.0.us.2, 1 - %add1.i.i.us.2.1 = add nuw nsw i64 %172, %mul.i.i - %conv.i.us.2.1 = trunc i64 %add1.i.i.us.2.1 to i32 - %cmp4.i.us.2.1 = icmp slt i32 %conv.i.us.2.1, %6 - br i1 %cmp4.i.us.2.1, label %if.then.i.us.2.1, label %if.end.i.us.2.1 - -pregion_for_end.i.2.loopexit: ; preds = %if.end.i.us.2.3 - br label %pregion_for_end.i.2 - -pregion_for_end.i.2: ; preds = %pregion_for_end.i.2.loopexit, %vector.ph71, %pregion_for_end.i.1 - %173 = trunc i64 %mul3.i.i to i32 - %conv2.i.3 = or i32 %173, 3 - %cmp.i.3 = icmp slt i32 %conv2.i.3, %3 - %mul.i.3 = mul nsw i32 %conv2.i.3, %6 - br i1 %cmp.i.3, label %vector.scevcheck92, label %pregion_for_end.i.3 - -vector.scevcheck92: ; preds = %pregion_for_end.i.2 - %174 = mul i32 %conv2.i.3, %6 - %175 = trunc i64 %10 to i32 - %176 = shl i32 %175, 5 - %177 = add i32 %174, %176 - %178 = icmp sgt i32 %177, 2147483616 - br i1 %178, label %pregion_for_entry.entry.i.us.3.preheader, label %vector.ph93 - -pregion_for_entry.entry.i.us.3.preheader: ; preds = %vector.scevcheck92 - br label %pregion_for_entry.entry.i.us.3 - -vector.ph93: ; preds = %vector.scevcheck92 - %broadcast.splatinsert100 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat101 = shufflevector <8 x i64> %broadcast.splatinsert100, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert102 = insertelement <8 x i32> undef, i32 %6, i32 0 - %broadcast.splat103 = shufflevector <8 x i32> %broadcast.splatinsert102, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert105 = insertelement <8 x float> undef, float %8, i32 0 - %broadcast.splat106 = shufflevector <8 x float> %broadcast.splatinsert105, <8 x float> undef, <8 x i32> zeroinitializer - %179 = trunc <8 x i64> %broadcast.splat101 to <8 x i32> - %180 = or <8 x i32> %179, - %181 = icmp sgt <8 x i32> %broadcast.splat103, %180 - %182 = extractelement <8 x i32> %180, i32 0 - %183 = add nsw i32 %mul.i.3, %182 - %184 = sext i32 %183 to i64 - %185 = getelementptr inbounds float, float* %2, i64 %184 - %186 = bitcast float* %185 to <8 x float>* - %wide.masked.load104 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %186, i32 4, <8 x i1> %181, <8 x float> undef), !tbaa !12 - %187 = fmul <8 x float> %wide.masked.load104, %broadcast.splat106 - %188 = bitcast float* %185 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %187, <8 x float>* %188, i32 4, <8 x i1> %181), !tbaa !12, !llvm.access.group !16 - %189 = trunc <8 x i64> %broadcast.splat101 to <8 x i32> - %190 = or <8 x i32> %189, - %191 = icmp sgt <8 x i32> %broadcast.splat103, %190 - %192 = extractelement <8 x i32> %190, i32 0 - %193 = add nsw i32 %mul.i.3, %192 - %194 = sext i32 %193 to i64 - %195 = getelementptr inbounds float, float* %2, i64 %194 - %196 = bitcast float* %195 to <8 x float>* - %wide.masked.load104.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %196, i32 4, <8 x i1> %191, <8 x float> undef), !tbaa !12 - %197 = fmul <8 x float> %wide.masked.load104.1, %broadcast.splat106 - %198 = bitcast float* %195 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %197, <8 x float>* %198, i32 4, <8 x i1> %191), !tbaa !12, !llvm.access.group !16 - %199 = trunc <8 x i64> %broadcast.splat101 to <8 x i32> - %200 = or <8 x i32> %199, - %201 = icmp sgt <8 x i32> %broadcast.splat103, %200 - %202 = extractelement <8 x i32> %200, i32 0 - %203 = add nsw i32 %mul.i.3, %202 - %204 = sext i32 %203 to i64 - %205 = getelementptr inbounds float, float* %2, i64 %204 - %206 = bitcast float* %205 to <8 x float>* - %wide.masked.load104.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %206, i32 4, <8 x i1> %201, <8 x float> undef), !tbaa !12 - %207 = fmul <8 x float> %wide.masked.load104.2, %broadcast.splat106 - %208 = bitcast float* %205 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %207, <8 x float>* %208, i32 4, <8 x i1> %201), !tbaa !12, !llvm.access.group !16 - %209 = trunc <8 x i64> %broadcast.splat101 to <8 x i32> - %210 = or <8 x i32> %209, - %211 = icmp sgt <8 x i32> %broadcast.splat103, %210 - %212 = extractelement <8 x i32> %210, i32 0 - %213 = add nsw i32 %mul.i.3, %212 - %214 = sext i32 %213 to i64 - %215 = getelementptr inbounds float, float* %2, i64 %214 - %216 = bitcast float* %215 to <8 x float>* - %wide.masked.load104.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %216, i32 4, <8 x i1> %211, <8 x float> undef), !tbaa !12 - %217 = fmul <8 x float> %wide.masked.load104.3, %broadcast.splat106 - %218 = bitcast float* %215 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %217, <8 x float>* %218, i32 4, <8 x i1> %211), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.3 - -pregion_for_entry.entry.i.us.3: ; preds = %if.end.i.us.3.3, %pregion_for_entry.entry.i.us.3.preheader - %_local_id_x.0.us.3 = phi i64 [ %524, %if.end.i.us.3.3 ], [ 0, %pregion_for_entry.entry.i.us.3.preheader ] - %add1.i.i.us.3 = add nuw nsw i64 %_local_id_x.0.us.3, %mul.i.i - %conv.i.us.3 = trunc i64 %add1.i.i.us.3 to i32 - %cmp4.i.us.3 = icmp slt i32 %conv.i.us.3, %6 - br i1 %cmp4.i.us.3, label %if.then.i.us.3, label %if.end.i.us.3 - -if.then.i.us.3: ; preds = %pregion_for_entry.entry.i.us.3 - %add.i.us.3 = add nsw i32 %mul.i.3, %conv.i.us.3 - %idxprom.i.us.3 = sext i32 %add.i.us.3 to i64 - %arrayidx.i.us.3 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.3 - %219 = load float, float* %arrayidx.i.us.3, align 4, !tbaa !12 - %mul6.i.us.3 = fmul float %219, %8 - store float %mul6.i.us.3, float* %arrayidx.i.us.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.3 - -if.end.i.us.3: ; preds = %if.then.i.us.3, %pregion_for_entry.entry.i.us.3 - %220 = or i64 %_local_id_x.0.us.3, 1 - %add1.i.i.us.3.1 = add nuw nsw i64 %220, %mul.i.i - %conv.i.us.3.1 = trunc i64 %add1.i.i.us.3.1 to i32 - %cmp4.i.us.3.1 = icmp slt i32 %conv.i.us.3.1, %6 - br i1 %cmp4.i.us.3.1, label %if.then.i.us.3.1, label %if.end.i.us.3.1 - -pregion_for_end.i.3.loopexit: ; preds = %if.end.i.us.3.3 - br label %pregion_for_end.i.3 - -pregion_for_end.i.3: ; preds = %pregion_for_end.i.3.loopexit, %vector.ph93, %pregion_for_end.i.2 - %221 = trunc i64 %mul3.i.i to i32 - %conv2.i.4 = or i32 %221, 4 - %cmp.i.4 = icmp slt i32 %conv2.i.4, %3 - %mul.i.4 = mul nsw i32 %conv2.i.4, %6 - br i1 %cmp.i.4, label %vector.scevcheck114, label %pregion_for_end.i.4 - -vector.scevcheck114: ; preds = %pregion_for_end.i.3 - %222 = mul i32 %conv2.i.4, %6 - %223 = trunc i64 %10 to i32 - %224 = shl i32 %223, 5 - %225 = add i32 %222, %224 - %226 = icmp sgt i32 %225, 2147483616 - br i1 %226, label %pregion_for_entry.entry.i.us.4.preheader, label %vector.ph115 - -pregion_for_entry.entry.i.us.4.preheader: ; preds = %vector.scevcheck114 - br label %pregion_for_entry.entry.i.us.4 - -vector.ph115: ; preds = %vector.scevcheck114 - %broadcast.splatinsert122 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat123 = shufflevector <8 x i64> %broadcast.splatinsert122, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert124 = insertelement <8 x i32> undef, i32 %6, i32 0 - %broadcast.splat125 = shufflevector <8 x i32> %broadcast.splatinsert124, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert127 = insertelement <8 x float> undef, float %8, i32 0 - %broadcast.splat128 = shufflevector <8 x float> %broadcast.splatinsert127, <8 x float> undef, <8 x i32> zeroinitializer - %227 = trunc <8 x i64> %broadcast.splat123 to <8 x i32> - %228 = or <8 x i32> %227, - %229 = icmp sgt <8 x i32> %broadcast.splat125, %228 - %230 = extractelement <8 x i32> %228, i32 0 - %231 = add nsw i32 %mul.i.4, %230 - %232 = sext i32 %231 to i64 - %233 = getelementptr inbounds float, float* %2, i64 %232 - %234 = bitcast float* %233 to <8 x float>* - %wide.masked.load126 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %234, i32 4, <8 x i1> %229, <8 x float> undef), !tbaa !12 - %235 = fmul <8 x float> %wide.masked.load126, %broadcast.splat128 - %236 = bitcast float* %233 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %235, <8 x float>* %236, i32 4, <8 x i1> %229), !tbaa !12, !llvm.access.group !16 - %237 = trunc <8 x i64> %broadcast.splat123 to <8 x i32> - %238 = or <8 x i32> %237, - %239 = icmp sgt <8 x i32> %broadcast.splat125, %238 - %240 = extractelement <8 x i32> %238, i32 0 - %241 = add nsw i32 %mul.i.4, %240 - %242 = sext i32 %241 to i64 - %243 = getelementptr inbounds float, float* %2, i64 %242 - %244 = bitcast float* %243 to <8 x float>* - %wide.masked.load126.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %244, i32 4, <8 x i1> %239, <8 x float> undef), !tbaa !12 - %245 = fmul <8 x float> %wide.masked.load126.1, %broadcast.splat128 - %246 = bitcast float* %243 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %245, <8 x float>* %246, i32 4, <8 x i1> %239), !tbaa !12, !llvm.access.group !16 - %247 = trunc <8 x i64> %broadcast.splat123 to <8 x i32> - %248 = or <8 x i32> %247, - %249 = icmp sgt <8 x i32> %broadcast.splat125, %248 - %250 = extractelement <8 x i32> %248, i32 0 - %251 = add nsw i32 %mul.i.4, %250 - %252 = sext i32 %251 to i64 - %253 = getelementptr inbounds float, float* %2, i64 %252 - %254 = bitcast float* %253 to <8 x float>* - %wide.masked.load126.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %254, i32 4, <8 x i1> %249, <8 x float> undef), !tbaa !12 - %255 = fmul <8 x float> %wide.masked.load126.2, %broadcast.splat128 - %256 = bitcast float* %253 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %255, <8 x float>* %256, i32 4, <8 x i1> %249), !tbaa !12, !llvm.access.group !16 - %257 = trunc <8 x i64> %broadcast.splat123 to <8 x i32> - %258 = or <8 x i32> %257, - %259 = icmp sgt <8 x i32> %broadcast.splat125, %258 - %260 = extractelement <8 x i32> %258, i32 0 - %261 = add nsw i32 %mul.i.4, %260 - %262 = sext i32 %261 to i64 - %263 = getelementptr inbounds float, float* %2, i64 %262 - %264 = bitcast float* %263 to <8 x float>* - %wide.masked.load126.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %264, i32 4, <8 x i1> %259, <8 x float> undef), !tbaa !12 - %265 = fmul <8 x float> %wide.masked.load126.3, %broadcast.splat128 - %266 = bitcast float* %263 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %265, <8 x float>* %266, i32 4, <8 x i1> %259), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.4 - -pregion_for_entry.entry.i.us.4: ; preds = %if.end.i.us.4.3, %pregion_for_entry.entry.i.us.4.preheader - %_local_id_x.0.us.4 = phi i64 [ %518, %if.end.i.us.4.3 ], [ 0, %pregion_for_entry.entry.i.us.4.preheader ] - %add1.i.i.us.4 = add nuw nsw i64 %_local_id_x.0.us.4, %mul.i.i - %conv.i.us.4 = trunc i64 %add1.i.i.us.4 to i32 - %cmp4.i.us.4 = icmp slt i32 %conv.i.us.4, %6 - br i1 %cmp4.i.us.4, label %if.then.i.us.4, label %if.end.i.us.4 - -if.then.i.us.4: ; preds = %pregion_for_entry.entry.i.us.4 - %add.i.us.4 = add nsw i32 %mul.i.4, %conv.i.us.4 - %idxprom.i.us.4 = sext i32 %add.i.us.4 to i64 - %arrayidx.i.us.4 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.4 - %267 = load float, float* %arrayidx.i.us.4, align 4, !tbaa !12 - %mul6.i.us.4 = fmul float %267, %8 - store float %mul6.i.us.4, float* %arrayidx.i.us.4, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.4 - -if.end.i.us.4: ; preds = %if.then.i.us.4, %pregion_for_entry.entry.i.us.4 - %268 = or i64 %_local_id_x.0.us.4, 1 - %add1.i.i.us.4.1 = add nuw nsw i64 %268, %mul.i.i - %conv.i.us.4.1 = trunc i64 %add1.i.i.us.4.1 to i32 - %cmp4.i.us.4.1 = icmp slt i32 %conv.i.us.4.1, %6 - br i1 %cmp4.i.us.4.1, label %if.then.i.us.4.1, label %if.end.i.us.4.1 - -pregion_for_end.i.4.loopexit: ; preds = %if.end.i.us.4.3 - br label %pregion_for_end.i.4 - -pregion_for_end.i.4: ; preds = %pregion_for_end.i.4.loopexit, %vector.ph115, %pregion_for_end.i.3 - %269 = trunc i64 %mul3.i.i to i32 - %conv2.i.5 = or i32 %269, 5 - %cmp.i.5 = icmp slt i32 %conv2.i.5, %3 - %mul.i.5 = mul nsw i32 %conv2.i.5, %6 - br i1 %cmp.i.5, label %vector.scevcheck136, label %pregion_for_end.i.5 - -vector.scevcheck136: ; preds = %pregion_for_end.i.4 - %270 = mul i32 %conv2.i.5, %6 - %271 = trunc i64 %10 to i32 - %272 = shl i32 %271, 5 - %273 = add i32 %270, %272 - %274 = icmp sgt i32 %273, 2147483616 - br i1 %274, label %pregion_for_entry.entry.i.us.5.preheader, label %vector.ph137 - -pregion_for_entry.entry.i.us.5.preheader: ; preds = %vector.scevcheck136 - br label %pregion_for_entry.entry.i.us.5 - -vector.ph137: ; preds = %vector.scevcheck136 - %broadcast.splatinsert144 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat145 = shufflevector <8 x i64> %broadcast.splatinsert144, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert146 = insertelement <8 x i32> undef, i32 %6, i32 0 - %broadcast.splat147 = shufflevector <8 x i32> %broadcast.splatinsert146, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert149 = insertelement <8 x float> undef, float %8, i32 0 - %broadcast.splat150 = shufflevector <8 x float> %broadcast.splatinsert149, <8 x float> undef, <8 x i32> zeroinitializer - %275 = trunc <8 x i64> %broadcast.splat145 to <8 x i32> - %276 = or <8 x i32> %275, - %277 = icmp sgt <8 x i32> %broadcast.splat147, %276 - %278 = extractelement <8 x i32> %276, i32 0 - %279 = add nsw i32 %mul.i.5, %278 - %280 = sext i32 %279 to i64 - %281 = getelementptr inbounds float, float* %2, i64 %280 - %282 = bitcast float* %281 to <8 x float>* - %wide.masked.load148 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %282, i32 4, <8 x i1> %277, <8 x float> undef), !tbaa !12 - %283 = fmul <8 x float> %wide.masked.load148, %broadcast.splat150 - %284 = bitcast float* %281 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %283, <8 x float>* %284, i32 4, <8 x i1> %277), !tbaa !12, !llvm.access.group !16 - %285 = trunc <8 x i64> %broadcast.splat145 to <8 x i32> - %286 = or <8 x i32> %285, - %287 = icmp sgt <8 x i32> %broadcast.splat147, %286 - %288 = extractelement <8 x i32> %286, i32 0 - %289 = add nsw i32 %mul.i.5, %288 - %290 = sext i32 %289 to i64 - %291 = getelementptr inbounds float, float* %2, i64 %290 - %292 = bitcast float* %291 to <8 x float>* - %wide.masked.load148.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %292, i32 4, <8 x i1> %287, <8 x float> undef), !tbaa !12 - %293 = fmul <8 x float> %wide.masked.load148.1, %broadcast.splat150 - %294 = bitcast float* %291 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %293, <8 x float>* %294, i32 4, <8 x i1> %287), !tbaa !12, !llvm.access.group !16 - %295 = trunc <8 x i64> %broadcast.splat145 to <8 x i32> - %296 = or <8 x i32> %295, - %297 = icmp sgt <8 x i32> %broadcast.splat147, %296 - %298 = extractelement <8 x i32> %296, i32 0 - %299 = add nsw i32 %mul.i.5, %298 - %300 = sext i32 %299 to i64 - %301 = getelementptr inbounds float, float* %2, i64 %300 - %302 = bitcast float* %301 to <8 x float>* - %wide.masked.load148.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %302, i32 4, <8 x i1> %297, <8 x float> undef), !tbaa !12 - %303 = fmul <8 x float> %wide.masked.load148.2, %broadcast.splat150 - %304 = bitcast float* %301 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %303, <8 x float>* %304, i32 4, <8 x i1> %297), !tbaa !12, !llvm.access.group !16 - %305 = trunc <8 x i64> %broadcast.splat145 to <8 x i32> - %306 = or <8 x i32> %305, - %307 = icmp sgt <8 x i32> %broadcast.splat147, %306 - %308 = extractelement <8 x i32> %306, i32 0 - %309 = add nsw i32 %mul.i.5, %308 - %310 = sext i32 %309 to i64 - %311 = getelementptr inbounds float, float* %2, i64 %310 - %312 = bitcast float* %311 to <8 x float>* - %wide.masked.load148.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %312, i32 4, <8 x i1> %307, <8 x float> undef), !tbaa !12 - %313 = fmul <8 x float> %wide.masked.load148.3, %broadcast.splat150 - %314 = bitcast float* %311 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %313, <8 x float>* %314, i32 4, <8 x i1> %307), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.5 - -pregion_for_entry.entry.i.us.5: ; preds = %if.end.i.us.5.3, %pregion_for_entry.entry.i.us.5.preheader - %_local_id_x.0.us.5 = phi i64 [ %512, %if.end.i.us.5.3 ], [ 0, %pregion_for_entry.entry.i.us.5.preheader ] - %add1.i.i.us.5 = add nuw nsw i64 %_local_id_x.0.us.5, %mul.i.i - %conv.i.us.5 = trunc i64 %add1.i.i.us.5 to i32 - %cmp4.i.us.5 = icmp slt i32 %conv.i.us.5, %6 - br i1 %cmp4.i.us.5, label %if.then.i.us.5, label %if.end.i.us.5 - -if.then.i.us.5: ; preds = %pregion_for_entry.entry.i.us.5 - %add.i.us.5 = add nsw i32 %mul.i.5, %conv.i.us.5 - %idxprom.i.us.5 = sext i32 %add.i.us.5 to i64 - %arrayidx.i.us.5 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.5 - %315 = load float, float* %arrayidx.i.us.5, align 4, !tbaa !12 - %mul6.i.us.5 = fmul float %315, %8 - store float %mul6.i.us.5, float* %arrayidx.i.us.5, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.5 - -if.end.i.us.5: ; preds = %if.then.i.us.5, %pregion_for_entry.entry.i.us.5 - %316 = or i64 %_local_id_x.0.us.5, 1 - %add1.i.i.us.5.1 = add nuw nsw i64 %316, %mul.i.i - %conv.i.us.5.1 = trunc i64 %add1.i.i.us.5.1 to i32 - %cmp4.i.us.5.1 = icmp slt i32 %conv.i.us.5.1, %6 - br i1 %cmp4.i.us.5.1, label %if.then.i.us.5.1, label %if.end.i.us.5.1 - -pregion_for_end.i.5.loopexit: ; preds = %if.end.i.us.5.3 - br label %pregion_for_end.i.5 - -pregion_for_end.i.5: ; preds = %pregion_for_end.i.5.loopexit, %vector.ph137, %pregion_for_end.i.4 - %317 = trunc i64 %mul3.i.i to i32 - %conv2.i.6 = or i32 %317, 6 - %cmp.i.6 = icmp slt i32 %conv2.i.6, %3 - %mul.i.6 = mul nsw i32 %conv2.i.6, %6 - br i1 %cmp.i.6, label %vector.scevcheck158, label %pregion_for_end.i.6 - -vector.scevcheck158: ; preds = %pregion_for_end.i.5 - %318 = mul i32 %conv2.i.6, %6 - %319 = trunc i64 %10 to i32 - %320 = shl i32 %319, 5 - %321 = add i32 %318, %320 - %322 = icmp sgt i32 %321, 2147483616 - br i1 %322, label %pregion_for_entry.entry.i.us.6.preheader, label %vector.ph159 - -pregion_for_entry.entry.i.us.6.preheader: ; preds = %vector.scevcheck158 - br label %pregion_for_entry.entry.i.us.6 - -vector.ph159: ; preds = %vector.scevcheck158 - %broadcast.splatinsert166 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat167 = shufflevector <8 x i64> %broadcast.splatinsert166, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert168 = insertelement <8 x i32> undef, i32 %6, i32 0 - %broadcast.splat169 = shufflevector <8 x i32> %broadcast.splatinsert168, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert171 = insertelement <8 x float> undef, float %8, i32 0 - %broadcast.splat172 = shufflevector <8 x float> %broadcast.splatinsert171, <8 x float> undef, <8 x i32> zeroinitializer - %323 = trunc <8 x i64> %broadcast.splat167 to <8 x i32> - %324 = or <8 x i32> %323, - %325 = icmp sgt <8 x i32> %broadcast.splat169, %324 - %326 = extractelement <8 x i32> %324, i32 0 - %327 = add nsw i32 %mul.i.6, %326 - %328 = sext i32 %327 to i64 - %329 = getelementptr inbounds float, float* %2, i64 %328 - %330 = bitcast float* %329 to <8 x float>* - %wide.masked.load170 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %330, i32 4, <8 x i1> %325, <8 x float> undef), !tbaa !12 - %331 = fmul <8 x float> %wide.masked.load170, %broadcast.splat172 - %332 = bitcast float* %329 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %331, <8 x float>* %332, i32 4, <8 x i1> %325), !tbaa !12, !llvm.access.group !16 - %333 = trunc <8 x i64> %broadcast.splat167 to <8 x i32> - %334 = or <8 x i32> %333, - %335 = icmp sgt <8 x i32> %broadcast.splat169, %334 - %336 = extractelement <8 x i32> %334, i32 0 - %337 = add nsw i32 %mul.i.6, %336 - %338 = sext i32 %337 to i64 - %339 = getelementptr inbounds float, float* %2, i64 %338 - %340 = bitcast float* %339 to <8 x float>* - %wide.masked.load170.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %340, i32 4, <8 x i1> %335, <8 x float> undef), !tbaa !12 - %341 = fmul <8 x float> %wide.masked.load170.1, %broadcast.splat172 - %342 = bitcast float* %339 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %341, <8 x float>* %342, i32 4, <8 x i1> %335), !tbaa !12, !llvm.access.group !16 - %343 = trunc <8 x i64> %broadcast.splat167 to <8 x i32> - %344 = or <8 x i32> %343, - %345 = icmp sgt <8 x i32> %broadcast.splat169, %344 - %346 = extractelement <8 x i32> %344, i32 0 - %347 = add nsw i32 %mul.i.6, %346 - %348 = sext i32 %347 to i64 - %349 = getelementptr inbounds float, float* %2, i64 %348 - %350 = bitcast float* %349 to <8 x float>* - %wide.masked.load170.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %350, i32 4, <8 x i1> %345, <8 x float> undef), !tbaa !12 - %351 = fmul <8 x float> %wide.masked.load170.2, %broadcast.splat172 - %352 = bitcast float* %349 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %351, <8 x float>* %352, i32 4, <8 x i1> %345), !tbaa !12, !llvm.access.group !16 - %353 = trunc <8 x i64> %broadcast.splat167 to <8 x i32> - %354 = or <8 x i32> %353, - %355 = icmp sgt <8 x i32> %broadcast.splat169, %354 - %356 = extractelement <8 x i32> %354, i32 0 - %357 = add nsw i32 %mul.i.6, %356 - %358 = sext i32 %357 to i64 - %359 = getelementptr inbounds float, float* %2, i64 %358 - %360 = bitcast float* %359 to <8 x float>* - %wide.masked.load170.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %360, i32 4, <8 x i1> %355, <8 x float> undef), !tbaa !12 - %361 = fmul <8 x float> %wide.masked.load170.3, %broadcast.splat172 - %362 = bitcast float* %359 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %361, <8 x float>* %362, i32 4, <8 x i1> %355), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.6 - -pregion_for_entry.entry.i.us.6: ; preds = %if.end.i.us.6.3, %pregion_for_entry.entry.i.us.6.preheader - %_local_id_x.0.us.6 = phi i64 [ %506, %if.end.i.us.6.3 ], [ 0, %pregion_for_entry.entry.i.us.6.preheader ] - %add1.i.i.us.6 = add nuw nsw i64 %_local_id_x.0.us.6, %mul.i.i - %conv.i.us.6 = trunc i64 %add1.i.i.us.6 to i32 - %cmp4.i.us.6 = icmp slt i32 %conv.i.us.6, %6 - br i1 %cmp4.i.us.6, label %if.then.i.us.6, label %if.end.i.us.6 - -if.then.i.us.6: ; preds = %pregion_for_entry.entry.i.us.6 - %add.i.us.6 = add nsw i32 %mul.i.6, %conv.i.us.6 - %idxprom.i.us.6 = sext i32 %add.i.us.6 to i64 - %arrayidx.i.us.6 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.6 - %363 = load float, float* %arrayidx.i.us.6, align 4, !tbaa !12 - %mul6.i.us.6 = fmul float %363, %8 - store float %mul6.i.us.6, float* %arrayidx.i.us.6, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.6 - -if.end.i.us.6: ; preds = %if.then.i.us.6, %pregion_for_entry.entry.i.us.6 - %364 = or i64 %_local_id_x.0.us.6, 1 - %add1.i.i.us.6.1 = add nuw nsw i64 %364, %mul.i.i - %conv.i.us.6.1 = trunc i64 %add1.i.i.us.6.1 to i32 - %cmp4.i.us.6.1 = icmp slt i32 %conv.i.us.6.1, %6 - br i1 %cmp4.i.us.6.1, label %if.then.i.us.6.1, label %if.end.i.us.6.1 - -pregion_for_end.i.6.loopexit: ; preds = %if.end.i.us.6.3 - br label %pregion_for_end.i.6 - -pregion_for_end.i.6: ; preds = %pregion_for_end.i.6.loopexit, %vector.ph159, %pregion_for_end.i.5 - %365 = trunc i64 %mul3.i.i to i32 - %conv2.i.7 = or i32 %365, 7 - %cmp.i.7 = icmp slt i32 %conv2.i.7, %3 - %mul.i.7 = mul nsw i32 %conv2.i.7, %6 - br i1 %cmp.i.7, label %vector.scevcheck180, label %mm2_kernel2.exit - -vector.scevcheck180: ; preds = %pregion_for_end.i.6 - %366 = mul i32 %conv2.i.7, %6 - %367 = trunc i64 %10 to i32 - %368 = shl i32 %367, 5 - %369 = add i32 %366, %368 - %370 = icmp sgt i32 %369, 2147483616 - br i1 %370, label %pregion_for_entry.entry.i.us.7.preheader, label %vector.ph181 - -pregion_for_entry.entry.i.us.7.preheader: ; preds = %vector.scevcheck180 - br label %pregion_for_entry.entry.i.us.7 - -vector.ph181: ; preds = %vector.scevcheck180 - %broadcast.splatinsert188 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat189 = shufflevector <8 x i64> %broadcast.splatinsert188, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert190 = insertelement <8 x i32> undef, i32 %6, i32 0 - %broadcast.splat191 = shufflevector <8 x i32> %broadcast.splatinsert190, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert193 = insertelement <8 x float> undef, float %8, i32 0 - %broadcast.splat194 = shufflevector <8 x float> %broadcast.splatinsert193, <8 x float> undef, <8 x i32> zeroinitializer - %371 = trunc <8 x i64> %broadcast.splat189 to <8 x i32> - %372 = or <8 x i32> %371, - %373 = icmp sgt <8 x i32> %broadcast.splat191, %372 - %374 = extractelement <8 x i32> %372, i32 0 - %375 = add nsw i32 %mul.i.7, %374 - %376 = sext i32 %375 to i64 - %377 = getelementptr inbounds float, float* %2, i64 %376 - %378 = bitcast float* %377 to <8 x float>* - %wide.masked.load192 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %378, i32 4, <8 x i1> %373, <8 x float> undef), !tbaa !12 - %379 = fmul <8 x float> %wide.masked.load192, %broadcast.splat194 - %380 = bitcast float* %377 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %379, <8 x float>* %380, i32 4, <8 x i1> %373), !tbaa !12, !llvm.access.group !16 - %381 = trunc <8 x i64> %broadcast.splat189 to <8 x i32> - %382 = or <8 x i32> %381, - %383 = icmp sgt <8 x i32> %broadcast.splat191, %382 - %384 = extractelement <8 x i32> %382, i32 0 - %385 = add nsw i32 %mul.i.7, %384 - %386 = sext i32 %385 to i64 - %387 = getelementptr inbounds float, float* %2, i64 %386 - %388 = bitcast float* %387 to <8 x float>* - %wide.masked.load192.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %388, i32 4, <8 x i1> %383, <8 x float> undef), !tbaa !12 - %389 = fmul <8 x float> %wide.masked.load192.1, %broadcast.splat194 - %390 = bitcast float* %387 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %389, <8 x float>* %390, i32 4, <8 x i1> %383), !tbaa !12, !llvm.access.group !16 - %391 = trunc <8 x i64> %broadcast.splat189 to <8 x i32> - %392 = or <8 x i32> %391, - %393 = icmp sgt <8 x i32> %broadcast.splat191, %392 - %394 = extractelement <8 x i32> %392, i32 0 - %395 = add nsw i32 %mul.i.7, %394 - %396 = sext i32 %395 to i64 - %397 = getelementptr inbounds float, float* %2, i64 %396 - %398 = bitcast float* %397 to <8 x float>* - %wide.masked.load192.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %398, i32 4, <8 x i1> %393, <8 x float> undef), !tbaa !12 - %399 = fmul <8 x float> %wide.masked.load192.2, %broadcast.splat194 - %400 = bitcast float* %397 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %399, <8 x float>* %400, i32 4, <8 x i1> %393), !tbaa !12, !llvm.access.group !16 - %401 = trunc <8 x i64> %broadcast.splat189 to <8 x i32> - %402 = or <8 x i32> %401, - %403 = icmp sgt <8 x i32> %broadcast.splat191, %402 - %404 = extractelement <8 x i32> %402, i32 0 - %405 = add nsw i32 %mul.i.7, %404 - %406 = sext i32 %405 to i64 - %407 = getelementptr inbounds float, float* %2, i64 %406 - %408 = bitcast float* %407 to <8 x float>* - %wide.masked.load192.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %408, i32 4, <8 x i1> %403, <8 x float> undef), !tbaa !12 - %409 = fmul <8 x float> %wide.masked.load192.3, %broadcast.splat194 - %410 = bitcast float* %407 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %409, <8 x float>* %410, i32 4, <8 x i1> %403), !tbaa !12, !llvm.access.group !16 - br label %mm2_kernel2.exit - -pregion_for_entry.entry.i.us.7: ; preds = %if.end.i.us.7.3, %pregion_for_entry.entry.i.us.7.preheader - %_local_id_x.0.us.7 = phi i64 [ %500, %if.end.i.us.7.3 ], [ 0, %pregion_for_entry.entry.i.us.7.preheader ] - %add1.i.i.us.7 = add nuw nsw i64 %_local_id_x.0.us.7, %mul.i.i - %conv.i.us.7 = trunc i64 %add1.i.i.us.7 to i32 - %cmp4.i.us.7 = icmp slt i32 %conv.i.us.7, %6 - br i1 %cmp4.i.us.7, label %if.then.i.us.7, label %if.end.i.us.7 - -if.then.i.us.7: ; preds = %pregion_for_entry.entry.i.us.7 - %add.i.us.7 = add nsw i32 %mul.i.7, %conv.i.us.7 - %idxprom.i.us.7 = sext i32 %add.i.us.7 to i64 - %arrayidx.i.us.7 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.7 - %411 = load float, float* %arrayidx.i.us.7, align 4, !tbaa !12 - %mul6.i.us.7 = fmul float %411, %8 - store float %mul6.i.us.7, float* %arrayidx.i.us.7, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.7 - -if.end.i.us.7: ; preds = %if.then.i.us.7, %pregion_for_entry.entry.i.us.7 - %412 = or i64 %_local_id_x.0.us.7, 1 - %add1.i.i.us.7.1 = add nuw nsw i64 %412, %mul.i.i - %conv.i.us.7.1 = trunc i64 %add1.i.i.us.7.1 to i32 - %cmp4.i.us.7.1 = icmp slt i32 %conv.i.us.7.1, %6 - br i1 %cmp4.i.us.7.1, label %if.then.i.us.7.1, label %if.end.i.us.7.1 - -pregion_for_entry.entry.i.us.us.1: ; preds = %if.end.i.us.us.1, %pregion_for_entry.entry.i.us.us.1.preheader - %_local_id_x.0.us.us.1 = phi i64 [ %422, %if.end.i.us.us.1 ], [ 0, %pregion_for_entry.entry.i.us.us.1.preheader ] - %add1.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.0.us.us.1, %mul.i.i - %conv.i.us.us.1 = trunc i64 %add1.i.i.us.us.1 to i32 - %cmp4.i.us.us.1 = icmp slt i32 %conv.i.us.us.1, %6 - br i1 %cmp4.i.us.us.1, label %if.then.i.us.us.1, label %if.end.i.us.us.1 - -if.then.i.us.us.1: ; preds = %pregion_for_entry.entry.i.us.us.1 - %add.i.us.us.1 = add nsw i32 %mul.i.us.1, %conv.i.us.us.1 - %idxprom.i.us.us.1 = sext i32 %add.i.us.us.1 to i64 - %arrayidx.i.us.us.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.1 - %413 = load float, float* %arrayidx.i.us.us.1, align 4, !tbaa !12 - %mul6.i.us.us.1 = fmul float %413, %8 - store float %mul6.i.us.us.1, float* %arrayidx.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.1 = shl i64 %add1.i.i.us.us.1, 32 - %414 = ashr exact i64 %sext.i.us.us.1, 32 - br label %for.body.i.us.us.1 - -for.body.i.us.us.1: ; preds = %for.body.i.us.us.1, %if.then.i.us.us.1 - %indvars.iv.next.i3.us.us.1 = phi i64 [ %indvars.iv.next.i.us.us.1, %for.body.i.us.us.1 ], [ 0, %if.then.i.us.us.1 ] - %415 = phi float [ %421, %for.body.i.us.us.1 ], [ %mul6.i.us.us.1, %if.then.i.us.us.1 ] - %416 = add nsw i64 %indvars.iv.next.i3.us.us.1, %64 - %arrayidx12.i.us.us.1 = getelementptr inbounds float, float* %0, i64 %416 - %417 = load float, float* %arrayidx12.i.us.us.1, align 4, !tbaa !12 - %418 = mul nsw i64 %indvars.iv.next.i3.us.us.1, %14 - %419 = add nsw i64 %418, %414 - %arrayidx16.i.us.us.1 = getelementptr inbounds float, float* %1, i64 %419 - %420 = load float, float* %arrayidx16.i.us.us.1, align 4, !tbaa !12 - %421 = tail call float @llvm.fmuladd.f32(float %417, float %420, float %415) #2 - store float %421, float* %arrayidx.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.1, 1 - %exitcond.not.i.us.us.1 = icmp eq i64 %indvars.iv.next.i.us.us.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.1, label %if.end.i.us.us.1.loopexit, label %for.body.i.us.us.1, !llvm.loop !21 - -if.end.i.us.us.1.loopexit: ; preds = %for.body.i.us.us.1 - br label %if.end.i.us.us.1 - -if.end.i.us.us.1: ; preds = %if.end.i.us.us.1.loopexit, %pregion_for_entry.entry.i.us.us.1 - %422 = add nuw nsw i64 %_local_id_x.0.us.us.1, 1 - %exitcond.not.1 = icmp eq i64 %422, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.us.1.loopexit, label %pregion_for_entry.entry.i.us.us.1, !llvm.loop !19 - -pregion_for_end.i.us.1.loopexit: ; preds = %if.end.i.us.us.1 - br label %pregion_for_end.i.us.1 - -pregion_for_end.i.us.1: ; preds = %pregion_for_end.i.us.1.loopexit, %pregion_for_end.i.us - %423 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.2 = or i32 %423, 2 - %cmp.i.us.2 = icmp slt i32 %conv2.i.us.2, %3 - %mul.i.us.2 = mul nsw i32 %conv2.i.us.2, %6 - %mul9.i.us.2 = mul nsw i32 %conv2.i.us.2, %4 - %424 = sext i32 %mul9.i.us.2 to i64 - br i1 %cmp.i.us.2, label %pregion_for_entry.entry.i.us.us.2.preheader, label %pregion_for_end.i.us.2 - -pregion_for_entry.entry.i.us.us.2.preheader: ; preds = %pregion_for_end.i.us.1 - br label %pregion_for_entry.entry.i.us.us.2 - -pregion_for_entry.entry.i.us.us.2: ; preds = %if.end.i.us.us.2, %pregion_for_entry.entry.i.us.us.2.preheader - %_local_id_x.0.us.us.2 = phi i64 [ %434, %if.end.i.us.us.2 ], [ 0, %pregion_for_entry.entry.i.us.us.2.preheader ] - %add1.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.0.us.us.2, %mul.i.i - %conv.i.us.us.2 = trunc i64 %add1.i.i.us.us.2 to i32 - %cmp4.i.us.us.2 = icmp slt i32 %conv.i.us.us.2, %6 - br i1 %cmp4.i.us.us.2, label %if.then.i.us.us.2, label %if.end.i.us.us.2 - -if.then.i.us.us.2: ; preds = %pregion_for_entry.entry.i.us.us.2 - %add.i.us.us.2 = add nsw i32 %mul.i.us.2, %conv.i.us.us.2 - %idxprom.i.us.us.2 = sext i32 %add.i.us.us.2 to i64 - %arrayidx.i.us.us.2 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.2 - %425 = load float, float* %arrayidx.i.us.us.2, align 4, !tbaa !12 - %mul6.i.us.us.2 = fmul float %425, %8 - store float %mul6.i.us.us.2, float* %arrayidx.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.2 = shl i64 %add1.i.i.us.us.2, 32 - %426 = ashr exact i64 %sext.i.us.us.2, 32 - br label %for.body.i.us.us.2 - -for.body.i.us.us.2: ; preds = %for.body.i.us.us.2, %if.then.i.us.us.2 - %indvars.iv.next.i3.us.us.2 = phi i64 [ %indvars.iv.next.i.us.us.2, %for.body.i.us.us.2 ], [ 0, %if.then.i.us.us.2 ] - %427 = phi float [ %433, %for.body.i.us.us.2 ], [ %mul6.i.us.us.2, %if.then.i.us.us.2 ] - %428 = add nsw i64 %indvars.iv.next.i3.us.us.2, %424 - %arrayidx12.i.us.us.2 = getelementptr inbounds float, float* %0, i64 %428 - %429 = load float, float* %arrayidx12.i.us.us.2, align 4, !tbaa !12 - %430 = mul nsw i64 %indvars.iv.next.i3.us.us.2, %14 - %431 = add nsw i64 %430, %426 - %arrayidx16.i.us.us.2 = getelementptr inbounds float, float* %1, i64 %431 - %432 = load float, float* %arrayidx16.i.us.us.2, align 4, !tbaa !12 - %433 = tail call float @llvm.fmuladd.f32(float %429, float %432, float %427) #2 - store float %433, float* %arrayidx.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.2 = add nuw nsw i64 %indvars.iv.next.i3.us.us.2, 1 - %exitcond.not.i.us.us.2 = icmp eq i64 %indvars.iv.next.i.us.us.2, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.2, label %if.end.i.us.us.2.loopexit, label %for.body.i.us.us.2, !llvm.loop !21 - -if.end.i.us.us.2.loopexit: ; preds = %for.body.i.us.us.2 - br label %if.end.i.us.us.2 - -if.end.i.us.us.2: ; preds = %if.end.i.us.us.2.loopexit, %pregion_for_entry.entry.i.us.us.2 - %434 = add nuw nsw i64 %_local_id_x.0.us.us.2, 1 - %exitcond.not.2 = icmp eq i64 %434, 32 - br i1 %exitcond.not.2, label %pregion_for_end.i.us.2.loopexit, label %pregion_for_entry.entry.i.us.us.2, !llvm.loop !19 - -pregion_for_end.i.us.2.loopexit: ; preds = %if.end.i.us.us.2 - br label %pregion_for_end.i.us.2 - -pregion_for_end.i.us.2: ; preds = %pregion_for_end.i.us.2.loopexit, %pregion_for_end.i.us.1 - %435 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.3 = or i32 %435, 3 - %cmp.i.us.3 = icmp slt i32 %conv2.i.us.3, %3 - %mul.i.us.3 = mul nsw i32 %conv2.i.us.3, %6 - %mul9.i.us.3 = mul nsw i32 %conv2.i.us.3, %4 - %436 = sext i32 %mul9.i.us.3 to i64 - br i1 %cmp.i.us.3, label %pregion_for_entry.entry.i.us.us.3.preheader, label %pregion_for_end.i.us.3 - -pregion_for_entry.entry.i.us.us.3.preheader: ; preds = %pregion_for_end.i.us.2 - br label %pregion_for_entry.entry.i.us.us.3 - -pregion_for_entry.entry.i.us.us.3: ; preds = %if.end.i.us.us.3, %pregion_for_entry.entry.i.us.us.3.preheader - %_local_id_x.0.us.us.3 = phi i64 [ %446, %if.end.i.us.us.3 ], [ 0, %pregion_for_entry.entry.i.us.us.3.preheader ] - %add1.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.0.us.us.3, %mul.i.i - %conv.i.us.us.3 = trunc i64 %add1.i.i.us.us.3 to i32 - %cmp4.i.us.us.3 = icmp slt i32 %conv.i.us.us.3, %6 - br i1 %cmp4.i.us.us.3, label %if.then.i.us.us.3, label %if.end.i.us.us.3 - -if.then.i.us.us.3: ; preds = %pregion_for_entry.entry.i.us.us.3 - %add.i.us.us.3 = add nsw i32 %mul.i.us.3, %conv.i.us.us.3 - %idxprom.i.us.us.3 = sext i32 %add.i.us.us.3 to i64 - %arrayidx.i.us.us.3 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.3 - %437 = load float, float* %arrayidx.i.us.us.3, align 4, !tbaa !12 - %mul6.i.us.us.3 = fmul float %437, %8 - store float %mul6.i.us.us.3, float* %arrayidx.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.3 = shl i64 %add1.i.i.us.us.3, 32 - %438 = ashr exact i64 %sext.i.us.us.3, 32 - br label %for.body.i.us.us.3 - -for.body.i.us.us.3: ; preds = %for.body.i.us.us.3, %if.then.i.us.us.3 - %indvars.iv.next.i3.us.us.3 = phi i64 [ %indvars.iv.next.i.us.us.3, %for.body.i.us.us.3 ], [ 0, %if.then.i.us.us.3 ] - %439 = phi float [ %445, %for.body.i.us.us.3 ], [ %mul6.i.us.us.3, %if.then.i.us.us.3 ] - %440 = add nsw i64 %indvars.iv.next.i3.us.us.3, %436 - %arrayidx12.i.us.us.3 = getelementptr inbounds float, float* %0, i64 %440 - %441 = load float, float* %arrayidx12.i.us.us.3, align 4, !tbaa !12 - %442 = mul nsw i64 %indvars.iv.next.i3.us.us.3, %14 - %443 = add nsw i64 %442, %438 - %arrayidx16.i.us.us.3 = getelementptr inbounds float, float* %1, i64 %443 - %444 = load float, float* %arrayidx16.i.us.us.3, align 4, !tbaa !12 - %445 = tail call float @llvm.fmuladd.f32(float %441, float %444, float %439) #2 - store float %445, float* %arrayidx.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.3 = add nuw nsw i64 %indvars.iv.next.i3.us.us.3, 1 - %exitcond.not.i.us.us.3 = icmp eq i64 %indvars.iv.next.i.us.us.3, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.3, label %if.end.i.us.us.3.loopexit, label %for.body.i.us.us.3, !llvm.loop !21 - -if.end.i.us.us.3.loopexit: ; preds = %for.body.i.us.us.3 - br label %if.end.i.us.us.3 - -if.end.i.us.us.3: ; preds = %if.end.i.us.us.3.loopexit, %pregion_for_entry.entry.i.us.us.3 - %446 = add nuw nsw i64 %_local_id_x.0.us.us.3, 1 - %exitcond.not.3 = icmp eq i64 %446, 32 - br i1 %exitcond.not.3, label %pregion_for_end.i.us.3.loopexit, label %pregion_for_entry.entry.i.us.us.3, !llvm.loop !19 - -pregion_for_end.i.us.3.loopexit: ; preds = %if.end.i.us.us.3 - br label %pregion_for_end.i.us.3 - -pregion_for_end.i.us.3: ; preds = %pregion_for_end.i.us.3.loopexit, %pregion_for_end.i.us.2 - %447 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.4 = or i32 %447, 4 - %cmp.i.us.4 = icmp slt i32 %conv2.i.us.4, %3 - %mul.i.us.4 = mul nsw i32 %conv2.i.us.4, %6 - %mul9.i.us.4 = mul nsw i32 %conv2.i.us.4, %4 - %448 = sext i32 %mul9.i.us.4 to i64 - br i1 %cmp.i.us.4, label %pregion_for_entry.entry.i.us.us.4.preheader, label %pregion_for_end.i.us.4 - -pregion_for_entry.entry.i.us.us.4.preheader: ; preds = %pregion_for_end.i.us.3 - br label %pregion_for_entry.entry.i.us.us.4 - -pregion_for_entry.entry.i.us.us.4: ; preds = %if.end.i.us.us.4, %pregion_for_entry.entry.i.us.us.4.preheader - %_local_id_x.0.us.us.4 = phi i64 [ %458, %if.end.i.us.us.4 ], [ 0, %pregion_for_entry.entry.i.us.us.4.preheader ] - %add1.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.0.us.us.4, %mul.i.i - %conv.i.us.us.4 = trunc i64 %add1.i.i.us.us.4 to i32 - %cmp4.i.us.us.4 = icmp slt i32 %conv.i.us.us.4, %6 - br i1 %cmp4.i.us.us.4, label %if.then.i.us.us.4, label %if.end.i.us.us.4 - -if.then.i.us.us.4: ; preds = %pregion_for_entry.entry.i.us.us.4 - %add.i.us.us.4 = add nsw i32 %mul.i.us.4, %conv.i.us.us.4 - %idxprom.i.us.us.4 = sext i32 %add.i.us.us.4 to i64 - %arrayidx.i.us.us.4 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.4 - %449 = load float, float* %arrayidx.i.us.us.4, align 4, !tbaa !12 - %mul6.i.us.us.4 = fmul float %449, %8 - store float %mul6.i.us.us.4, float* %arrayidx.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.4 = shl i64 %add1.i.i.us.us.4, 32 - %450 = ashr exact i64 %sext.i.us.us.4, 32 - br label %for.body.i.us.us.4 - -for.body.i.us.us.4: ; preds = %for.body.i.us.us.4, %if.then.i.us.us.4 - %indvars.iv.next.i3.us.us.4 = phi i64 [ %indvars.iv.next.i.us.us.4, %for.body.i.us.us.4 ], [ 0, %if.then.i.us.us.4 ] - %451 = phi float [ %457, %for.body.i.us.us.4 ], [ %mul6.i.us.us.4, %if.then.i.us.us.4 ] - %452 = add nsw i64 %indvars.iv.next.i3.us.us.4, %448 - %arrayidx12.i.us.us.4 = getelementptr inbounds float, float* %0, i64 %452 - %453 = load float, float* %arrayidx12.i.us.us.4, align 4, !tbaa !12 - %454 = mul nsw i64 %indvars.iv.next.i3.us.us.4, %14 - %455 = add nsw i64 %454, %450 - %arrayidx16.i.us.us.4 = getelementptr inbounds float, float* %1, i64 %455 - %456 = load float, float* %arrayidx16.i.us.us.4, align 4, !tbaa !12 - %457 = tail call float @llvm.fmuladd.f32(float %453, float %456, float %451) #2 - store float %457, float* %arrayidx.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.4 = add nuw nsw i64 %indvars.iv.next.i3.us.us.4, 1 - %exitcond.not.i.us.us.4 = icmp eq i64 %indvars.iv.next.i.us.us.4, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.4, label %if.end.i.us.us.4.loopexit, label %for.body.i.us.us.4, !llvm.loop !21 - -if.end.i.us.us.4.loopexit: ; preds = %for.body.i.us.us.4 - br label %if.end.i.us.us.4 - -if.end.i.us.us.4: ; preds = %if.end.i.us.us.4.loopexit, %pregion_for_entry.entry.i.us.us.4 - %458 = add nuw nsw i64 %_local_id_x.0.us.us.4, 1 - %exitcond.not.4 = icmp eq i64 %458, 32 - br i1 %exitcond.not.4, label %pregion_for_end.i.us.4.loopexit, label %pregion_for_entry.entry.i.us.us.4, !llvm.loop !19 - -pregion_for_end.i.us.4.loopexit: ; preds = %if.end.i.us.us.4 - br label %pregion_for_end.i.us.4 - -pregion_for_end.i.us.4: ; preds = %pregion_for_end.i.us.4.loopexit, %pregion_for_end.i.us.3 - %459 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.5 = or i32 %459, 5 - %cmp.i.us.5 = icmp slt i32 %conv2.i.us.5, %3 - %mul.i.us.5 = mul nsw i32 %conv2.i.us.5, %6 - %mul9.i.us.5 = mul nsw i32 %conv2.i.us.5, %4 - %460 = sext i32 %mul9.i.us.5 to i64 - br i1 %cmp.i.us.5, label %pregion_for_entry.entry.i.us.us.5.preheader, label %pregion_for_end.i.us.5 - -pregion_for_entry.entry.i.us.us.5.preheader: ; preds = %pregion_for_end.i.us.4 - br label %pregion_for_entry.entry.i.us.us.5 - -pregion_for_entry.entry.i.us.us.5: ; preds = %if.end.i.us.us.5, %pregion_for_entry.entry.i.us.us.5.preheader - %_local_id_x.0.us.us.5 = phi i64 [ %470, %if.end.i.us.us.5 ], [ 0, %pregion_for_entry.entry.i.us.us.5.preheader ] - %add1.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.0.us.us.5, %mul.i.i - %conv.i.us.us.5 = trunc i64 %add1.i.i.us.us.5 to i32 - %cmp4.i.us.us.5 = icmp slt i32 %conv.i.us.us.5, %6 - br i1 %cmp4.i.us.us.5, label %if.then.i.us.us.5, label %if.end.i.us.us.5 - -if.then.i.us.us.5: ; preds = %pregion_for_entry.entry.i.us.us.5 - %add.i.us.us.5 = add nsw i32 %mul.i.us.5, %conv.i.us.us.5 - %idxprom.i.us.us.5 = sext i32 %add.i.us.us.5 to i64 - %arrayidx.i.us.us.5 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.5 - %461 = load float, float* %arrayidx.i.us.us.5, align 4, !tbaa !12 - %mul6.i.us.us.5 = fmul float %461, %8 - store float %mul6.i.us.us.5, float* %arrayidx.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.5 = shl i64 %add1.i.i.us.us.5, 32 - %462 = ashr exact i64 %sext.i.us.us.5, 32 - br label %for.body.i.us.us.5 - -for.body.i.us.us.5: ; preds = %for.body.i.us.us.5, %if.then.i.us.us.5 - %indvars.iv.next.i3.us.us.5 = phi i64 [ %indvars.iv.next.i.us.us.5, %for.body.i.us.us.5 ], [ 0, %if.then.i.us.us.5 ] - %463 = phi float [ %469, %for.body.i.us.us.5 ], [ %mul6.i.us.us.5, %if.then.i.us.us.5 ] - %464 = add nsw i64 %indvars.iv.next.i3.us.us.5, %460 - %arrayidx12.i.us.us.5 = getelementptr inbounds float, float* %0, i64 %464 - %465 = load float, float* %arrayidx12.i.us.us.5, align 4, !tbaa !12 - %466 = mul nsw i64 %indvars.iv.next.i3.us.us.5, %14 - %467 = add nsw i64 %466, %462 - %arrayidx16.i.us.us.5 = getelementptr inbounds float, float* %1, i64 %467 - %468 = load float, float* %arrayidx16.i.us.us.5, align 4, !tbaa !12 - %469 = tail call float @llvm.fmuladd.f32(float %465, float %468, float %463) #2 - store float %469, float* %arrayidx.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.5 = add nuw nsw i64 %indvars.iv.next.i3.us.us.5, 1 - %exitcond.not.i.us.us.5 = icmp eq i64 %indvars.iv.next.i.us.us.5, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.5, label %if.end.i.us.us.5.loopexit, label %for.body.i.us.us.5, !llvm.loop !21 - -if.end.i.us.us.5.loopexit: ; preds = %for.body.i.us.us.5 - br label %if.end.i.us.us.5 - -if.end.i.us.us.5: ; preds = %if.end.i.us.us.5.loopexit, %pregion_for_entry.entry.i.us.us.5 - %470 = add nuw nsw i64 %_local_id_x.0.us.us.5, 1 - %exitcond.not.5 = icmp eq i64 %470, 32 - br i1 %exitcond.not.5, label %pregion_for_end.i.us.5.loopexit, label %pregion_for_entry.entry.i.us.us.5, !llvm.loop !19 - -pregion_for_end.i.us.5.loopexit: ; preds = %if.end.i.us.us.5 - br label %pregion_for_end.i.us.5 - -pregion_for_end.i.us.5: ; preds = %pregion_for_end.i.us.5.loopexit, %pregion_for_end.i.us.4 - %471 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.6 = or i32 %471, 6 - %cmp.i.us.6 = icmp slt i32 %conv2.i.us.6, %3 - %mul.i.us.6 = mul nsw i32 %conv2.i.us.6, %6 - %mul9.i.us.6 = mul nsw i32 %conv2.i.us.6, %4 - %472 = sext i32 %mul9.i.us.6 to i64 - br i1 %cmp.i.us.6, label %pregion_for_entry.entry.i.us.us.6.preheader, label %pregion_for_end.i.us.6 - -pregion_for_entry.entry.i.us.us.6.preheader: ; preds = %pregion_for_end.i.us.5 - br label %pregion_for_entry.entry.i.us.us.6 - -pregion_for_entry.entry.i.us.us.6: ; preds = %if.end.i.us.us.6, %pregion_for_entry.entry.i.us.us.6.preheader - %_local_id_x.0.us.us.6 = phi i64 [ %482, %if.end.i.us.us.6 ], [ 0, %pregion_for_entry.entry.i.us.us.6.preheader ] - %add1.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.0.us.us.6, %mul.i.i - %conv.i.us.us.6 = trunc i64 %add1.i.i.us.us.6 to i32 - %cmp4.i.us.us.6 = icmp slt i32 %conv.i.us.us.6, %6 - br i1 %cmp4.i.us.us.6, label %if.then.i.us.us.6, label %if.end.i.us.us.6 - -if.then.i.us.us.6: ; preds = %pregion_for_entry.entry.i.us.us.6 - %add.i.us.us.6 = add nsw i32 %mul.i.us.6, %conv.i.us.us.6 - %idxprom.i.us.us.6 = sext i32 %add.i.us.us.6 to i64 - %arrayidx.i.us.us.6 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.6 - %473 = load float, float* %arrayidx.i.us.us.6, align 4, !tbaa !12 - %mul6.i.us.us.6 = fmul float %473, %8 - store float %mul6.i.us.us.6, float* %arrayidx.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.6 = shl i64 %add1.i.i.us.us.6, 32 - %474 = ashr exact i64 %sext.i.us.us.6, 32 - br label %for.body.i.us.us.6 - -for.body.i.us.us.6: ; preds = %for.body.i.us.us.6, %if.then.i.us.us.6 - %indvars.iv.next.i3.us.us.6 = phi i64 [ %indvars.iv.next.i.us.us.6, %for.body.i.us.us.6 ], [ 0, %if.then.i.us.us.6 ] - %475 = phi float [ %481, %for.body.i.us.us.6 ], [ %mul6.i.us.us.6, %if.then.i.us.us.6 ] - %476 = add nsw i64 %indvars.iv.next.i3.us.us.6, %472 - %arrayidx12.i.us.us.6 = getelementptr inbounds float, float* %0, i64 %476 - %477 = load float, float* %arrayidx12.i.us.us.6, align 4, !tbaa !12 - %478 = mul nsw i64 %indvars.iv.next.i3.us.us.6, %14 - %479 = add nsw i64 %478, %474 - %arrayidx16.i.us.us.6 = getelementptr inbounds float, float* %1, i64 %479 - %480 = load float, float* %arrayidx16.i.us.us.6, align 4, !tbaa !12 - %481 = tail call float @llvm.fmuladd.f32(float %477, float %480, float %475) #2 - store float %481, float* %arrayidx.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.6 = add nuw nsw i64 %indvars.iv.next.i3.us.us.6, 1 - %exitcond.not.i.us.us.6 = icmp eq i64 %indvars.iv.next.i.us.us.6, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.6, label %if.end.i.us.us.6.loopexit, label %for.body.i.us.us.6, !llvm.loop !21 - -if.end.i.us.us.6.loopexit: ; preds = %for.body.i.us.us.6 - br label %if.end.i.us.us.6 - -if.end.i.us.us.6: ; preds = %if.end.i.us.us.6.loopexit, %pregion_for_entry.entry.i.us.us.6 - %482 = add nuw nsw i64 %_local_id_x.0.us.us.6, 1 - %exitcond.not.6 = icmp eq i64 %482, 32 - br i1 %exitcond.not.6, label %pregion_for_end.i.us.6.loopexit, label %pregion_for_entry.entry.i.us.us.6, !llvm.loop !19 - -pregion_for_end.i.us.6.loopexit: ; preds = %if.end.i.us.us.6 - br label %pregion_for_end.i.us.6 - -pregion_for_end.i.us.6: ; preds = %pregion_for_end.i.us.6.loopexit, %pregion_for_end.i.us.5 - %483 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.7 = or i32 %483, 7 - %cmp.i.us.7 = icmp slt i32 %conv2.i.us.7, %3 - %mul.i.us.7 = mul nsw i32 %conv2.i.us.7, %6 - %mul9.i.us.7 = mul nsw i32 %conv2.i.us.7, %4 - %484 = sext i32 %mul9.i.us.7 to i64 - br i1 %cmp.i.us.7, label %pregion_for_entry.entry.i.us.us.7.preheader, label %mm2_kernel2.exit - -pregion_for_entry.entry.i.us.us.7.preheader: ; preds = %pregion_for_end.i.us.6 - br label %pregion_for_entry.entry.i.us.us.7 - -pregion_for_entry.entry.i.us.us.7: ; preds = %if.end.i.us.us.7, %pregion_for_entry.entry.i.us.us.7.preheader - %_local_id_x.0.us.us.7 = phi i64 [ %494, %if.end.i.us.us.7 ], [ 0, %pregion_for_entry.entry.i.us.us.7.preheader ] - %add1.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.0.us.us.7, %mul.i.i - %conv.i.us.us.7 = trunc i64 %add1.i.i.us.us.7 to i32 - %cmp4.i.us.us.7 = icmp slt i32 %conv.i.us.us.7, %6 - br i1 %cmp4.i.us.us.7, label %if.then.i.us.us.7, label %if.end.i.us.us.7 - -if.then.i.us.us.7: ; preds = %pregion_for_entry.entry.i.us.us.7 - %add.i.us.us.7 = add nsw i32 %mul.i.us.7, %conv.i.us.us.7 - %idxprom.i.us.us.7 = sext i32 %add.i.us.us.7 to i64 - %arrayidx.i.us.us.7 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.7 - %485 = load float, float* %arrayidx.i.us.us.7, align 4, !tbaa !12 - %mul6.i.us.us.7 = fmul float %485, %8 - store float %mul6.i.us.us.7, float* %arrayidx.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.7 = shl i64 %add1.i.i.us.us.7, 32 - %486 = ashr exact i64 %sext.i.us.us.7, 32 - br label %for.body.i.us.us.7 - -for.body.i.us.us.7: ; preds = %for.body.i.us.us.7, %if.then.i.us.us.7 - %indvars.iv.next.i3.us.us.7 = phi i64 [ %indvars.iv.next.i.us.us.7, %for.body.i.us.us.7 ], [ 0, %if.then.i.us.us.7 ] - %487 = phi float [ %493, %for.body.i.us.us.7 ], [ %mul6.i.us.us.7, %if.then.i.us.us.7 ] - %488 = add nsw i64 %indvars.iv.next.i3.us.us.7, %484 - %arrayidx12.i.us.us.7 = getelementptr inbounds float, float* %0, i64 %488 - %489 = load float, float* %arrayidx12.i.us.us.7, align 4, !tbaa !12 - %490 = mul nsw i64 %indvars.iv.next.i3.us.us.7, %14 - %491 = add nsw i64 %490, %486 - %arrayidx16.i.us.us.7 = getelementptr inbounds float, float* %1, i64 %491 - %492 = load float, float* %arrayidx16.i.us.us.7, align 4, !tbaa !12 - %493 = tail call float @llvm.fmuladd.f32(float %489, float %492, float %487) #2 - store float %493, float* %arrayidx.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.7 = add nuw nsw i64 %indvars.iv.next.i3.us.us.7, 1 - %exitcond.not.i.us.us.7 = icmp eq i64 %indvars.iv.next.i.us.us.7, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.7, label %if.end.i.us.us.7.loopexit, label %for.body.i.us.us.7, !llvm.loop !21 - -if.end.i.us.us.7.loopexit: ; preds = %for.body.i.us.us.7 - br label %if.end.i.us.us.7 - -if.end.i.us.us.7: ; preds = %if.end.i.us.us.7.loopexit, %pregion_for_entry.entry.i.us.us.7 - %494 = add nuw nsw i64 %_local_id_x.0.us.us.7, 1 - %exitcond.not.7 = icmp eq i64 %494, 32 - br i1 %exitcond.not.7, label %mm2_kernel2.exit.loopexit, label %pregion_for_entry.entry.i.us.us.7, !llvm.loop !19 - -if.then.i.us.7.1: ; preds = %if.end.i.us.7 - %add.i.us.7.1 = add nsw i32 %mul.i.7, %conv.i.us.7.1 - %idxprom.i.us.7.1 = sext i32 %add.i.us.7.1 to i64 - %arrayidx.i.us.7.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.7.1 - %495 = load float, float* %arrayidx.i.us.7.1, align 4, !tbaa !12 - %mul6.i.us.7.1 = fmul float %495, %8 - store float %mul6.i.us.7.1, float* %arrayidx.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.7.1 - -if.end.i.us.7.1: ; preds = %if.then.i.us.7.1, %if.end.i.us.7 - %496 = or i64 %_local_id_x.0.us.7, 2 - %add1.i.i.us.7.2 = add nuw nsw i64 %496, %mul.i.i - %conv.i.us.7.2 = trunc i64 %add1.i.i.us.7.2 to i32 - %cmp4.i.us.7.2 = icmp slt i32 %conv.i.us.7.2, %6 - br i1 %cmp4.i.us.7.2, label %if.then.i.us.7.2, label %if.end.i.us.7.2 - -if.then.i.us.7.2: ; preds = %if.end.i.us.7.1 - %add.i.us.7.2 = add nsw i32 %mul.i.7, %conv.i.us.7.2 - %idxprom.i.us.7.2 = sext i32 %add.i.us.7.2 to i64 - %arrayidx.i.us.7.2 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.7.2 - %497 = load float, float* %arrayidx.i.us.7.2, align 4, !tbaa !12 - %mul6.i.us.7.2 = fmul float %497, %8 - store float %mul6.i.us.7.2, float* %arrayidx.i.us.7.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.7.2 - -if.end.i.us.7.2: ; preds = %if.then.i.us.7.2, %if.end.i.us.7.1 - %498 = or i64 %_local_id_x.0.us.7, 3 - %add1.i.i.us.7.3 = add nuw nsw i64 %498, %mul.i.i - %conv.i.us.7.3 = trunc i64 %add1.i.i.us.7.3 to i32 - %cmp4.i.us.7.3 = icmp slt i32 %conv.i.us.7.3, %6 - br i1 %cmp4.i.us.7.3, label %if.then.i.us.7.3, label %if.end.i.us.7.3 - -if.then.i.us.7.3: ; preds = %if.end.i.us.7.2 - %add.i.us.7.3 = add nsw i32 %mul.i.7, %conv.i.us.7.3 - %idxprom.i.us.7.3 = sext i32 %add.i.us.7.3 to i64 - %arrayidx.i.us.7.3 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.7.3 - %499 = load float, float* %arrayidx.i.us.7.3, align 4, !tbaa !12 - %mul6.i.us.7.3 = fmul float %499, %8 - store float %mul6.i.us.7.3, float* %arrayidx.i.us.7.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.7.3 - -if.end.i.us.7.3: ; preds = %if.then.i.us.7.3, %if.end.i.us.7.2 - %500 = add nuw nsw i64 %_local_id_x.0.us.7, 4 - %exitcond33.7.not.3 = icmp eq i64 %500, 32 - br i1 %exitcond33.7.not.3, label %mm2_kernel2.exit.loopexit237, label %pregion_for_entry.entry.i.us.7, !llvm.loop !23 - -if.then.i.us.6.1: ; preds = %if.end.i.us.6 - %add.i.us.6.1 = add nsw i32 %mul.i.6, %conv.i.us.6.1 - %idxprom.i.us.6.1 = sext i32 %add.i.us.6.1 to i64 - %arrayidx.i.us.6.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.6.1 - %501 = load float, float* %arrayidx.i.us.6.1, align 4, !tbaa !12 - %mul6.i.us.6.1 = fmul float %501, %8 - store float %mul6.i.us.6.1, float* %arrayidx.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.6.1 - -if.end.i.us.6.1: ; preds = %if.then.i.us.6.1, %if.end.i.us.6 - %502 = or i64 %_local_id_x.0.us.6, 2 - %add1.i.i.us.6.2 = add nuw nsw i64 %502, %mul.i.i - %conv.i.us.6.2 = trunc i64 %add1.i.i.us.6.2 to i32 - %cmp4.i.us.6.2 = icmp slt i32 %conv.i.us.6.2, %6 - br i1 %cmp4.i.us.6.2, label %if.then.i.us.6.2, label %if.end.i.us.6.2 - -if.then.i.us.6.2: ; preds = %if.end.i.us.6.1 - %add.i.us.6.2 = add nsw i32 %mul.i.6, %conv.i.us.6.2 - %idxprom.i.us.6.2 = sext i32 %add.i.us.6.2 to i64 - %arrayidx.i.us.6.2 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.6.2 - %503 = load float, float* %arrayidx.i.us.6.2, align 4, !tbaa !12 - %mul6.i.us.6.2 = fmul float %503, %8 - store float %mul6.i.us.6.2, float* %arrayidx.i.us.6.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.6.2 - -if.end.i.us.6.2: ; preds = %if.then.i.us.6.2, %if.end.i.us.6.1 - %504 = or i64 %_local_id_x.0.us.6, 3 - %add1.i.i.us.6.3 = add nuw nsw i64 %504, %mul.i.i - %conv.i.us.6.3 = trunc i64 %add1.i.i.us.6.3 to i32 - %cmp4.i.us.6.3 = icmp slt i32 %conv.i.us.6.3, %6 - br i1 %cmp4.i.us.6.3, label %if.then.i.us.6.3, label %if.end.i.us.6.3 - -if.then.i.us.6.3: ; preds = %if.end.i.us.6.2 - %add.i.us.6.3 = add nsw i32 %mul.i.6, %conv.i.us.6.3 - %idxprom.i.us.6.3 = sext i32 %add.i.us.6.3 to i64 - %arrayidx.i.us.6.3 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.6.3 - %505 = load float, float* %arrayidx.i.us.6.3, align 4, !tbaa !12 - %mul6.i.us.6.3 = fmul float %505, %8 - store float %mul6.i.us.6.3, float* %arrayidx.i.us.6.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.6.3 - -if.end.i.us.6.3: ; preds = %if.then.i.us.6.3, %if.end.i.us.6.2 - %506 = add nuw nsw i64 %_local_id_x.0.us.6, 4 - %exitcond33.6.not.3 = icmp eq i64 %506, 32 - br i1 %exitcond33.6.not.3, label %pregion_for_end.i.6.loopexit, label %pregion_for_entry.entry.i.us.6, !llvm.loop !25 - -if.then.i.us.5.1: ; preds = %if.end.i.us.5 - %add.i.us.5.1 = add nsw i32 %mul.i.5, %conv.i.us.5.1 - %idxprom.i.us.5.1 = sext i32 %add.i.us.5.1 to i64 - %arrayidx.i.us.5.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.5.1 - %507 = load float, float* %arrayidx.i.us.5.1, align 4, !tbaa !12 - %mul6.i.us.5.1 = fmul float %507, %8 - store float %mul6.i.us.5.1, float* %arrayidx.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.5.1 - -if.end.i.us.5.1: ; preds = %if.then.i.us.5.1, %if.end.i.us.5 - %508 = or i64 %_local_id_x.0.us.5, 2 - %add1.i.i.us.5.2 = add nuw nsw i64 %508, %mul.i.i - %conv.i.us.5.2 = trunc i64 %add1.i.i.us.5.2 to i32 - %cmp4.i.us.5.2 = icmp slt i32 %conv.i.us.5.2, %6 - br i1 %cmp4.i.us.5.2, label %if.then.i.us.5.2, label %if.end.i.us.5.2 - -if.then.i.us.5.2: ; preds = %if.end.i.us.5.1 - %add.i.us.5.2 = add nsw i32 %mul.i.5, %conv.i.us.5.2 - %idxprom.i.us.5.2 = sext i32 %add.i.us.5.2 to i64 - %arrayidx.i.us.5.2 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.5.2 - %509 = load float, float* %arrayidx.i.us.5.2, align 4, !tbaa !12 - %mul6.i.us.5.2 = fmul float %509, %8 - store float %mul6.i.us.5.2, float* %arrayidx.i.us.5.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.5.2 - -if.end.i.us.5.2: ; preds = %if.then.i.us.5.2, %if.end.i.us.5.1 - %510 = or i64 %_local_id_x.0.us.5, 3 - %add1.i.i.us.5.3 = add nuw nsw i64 %510, %mul.i.i - %conv.i.us.5.3 = trunc i64 %add1.i.i.us.5.3 to i32 - %cmp4.i.us.5.3 = icmp slt i32 %conv.i.us.5.3, %6 - br i1 %cmp4.i.us.5.3, label %if.then.i.us.5.3, label %if.end.i.us.5.3 - -if.then.i.us.5.3: ; preds = %if.end.i.us.5.2 - %add.i.us.5.3 = add nsw i32 %mul.i.5, %conv.i.us.5.3 - %idxprom.i.us.5.3 = sext i32 %add.i.us.5.3 to i64 - %arrayidx.i.us.5.3 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.5.3 - %511 = load float, float* %arrayidx.i.us.5.3, align 4, !tbaa !12 - %mul6.i.us.5.3 = fmul float %511, %8 - store float %mul6.i.us.5.3, float* %arrayidx.i.us.5.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.5.3 - -if.end.i.us.5.3: ; preds = %if.then.i.us.5.3, %if.end.i.us.5.2 - %512 = add nuw nsw i64 %_local_id_x.0.us.5, 4 - %exitcond33.5.not.3 = icmp eq i64 %512, 32 - br i1 %exitcond33.5.not.3, label %pregion_for_end.i.5.loopexit, label %pregion_for_entry.entry.i.us.5, !llvm.loop !26 - -if.then.i.us.4.1: ; preds = %if.end.i.us.4 - %add.i.us.4.1 = add nsw i32 %mul.i.4, %conv.i.us.4.1 - %idxprom.i.us.4.1 = sext i32 %add.i.us.4.1 to i64 - %arrayidx.i.us.4.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.4.1 - %513 = load float, float* %arrayidx.i.us.4.1, align 4, !tbaa !12 - %mul6.i.us.4.1 = fmul float %513, %8 - store float %mul6.i.us.4.1, float* %arrayidx.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.4.1 - -if.end.i.us.4.1: ; preds = %if.then.i.us.4.1, %if.end.i.us.4 - %514 = or i64 %_local_id_x.0.us.4, 2 - %add1.i.i.us.4.2 = add nuw nsw i64 %514, %mul.i.i - %conv.i.us.4.2 = trunc i64 %add1.i.i.us.4.2 to i32 - %cmp4.i.us.4.2 = icmp slt i32 %conv.i.us.4.2, %6 - br i1 %cmp4.i.us.4.2, label %if.then.i.us.4.2, label %if.end.i.us.4.2 - -if.then.i.us.4.2: ; preds = %if.end.i.us.4.1 - %add.i.us.4.2 = add nsw i32 %mul.i.4, %conv.i.us.4.2 - %idxprom.i.us.4.2 = sext i32 %add.i.us.4.2 to i64 - %arrayidx.i.us.4.2 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.4.2 - %515 = load float, float* %arrayidx.i.us.4.2, align 4, !tbaa !12 - %mul6.i.us.4.2 = fmul float %515, %8 - store float %mul6.i.us.4.2, float* %arrayidx.i.us.4.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.4.2 - -if.end.i.us.4.2: ; preds = %if.then.i.us.4.2, %if.end.i.us.4.1 - %516 = or i64 %_local_id_x.0.us.4, 3 - %add1.i.i.us.4.3 = add nuw nsw i64 %516, %mul.i.i - %conv.i.us.4.3 = trunc i64 %add1.i.i.us.4.3 to i32 - %cmp4.i.us.4.3 = icmp slt i32 %conv.i.us.4.3, %6 - br i1 %cmp4.i.us.4.3, label %if.then.i.us.4.3, label %if.end.i.us.4.3 - -if.then.i.us.4.3: ; preds = %if.end.i.us.4.2 - %add.i.us.4.3 = add nsw i32 %mul.i.4, %conv.i.us.4.3 - %idxprom.i.us.4.3 = sext i32 %add.i.us.4.3 to i64 - %arrayidx.i.us.4.3 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.4.3 - %517 = load float, float* %arrayidx.i.us.4.3, align 4, !tbaa !12 - %mul6.i.us.4.3 = fmul float %517, %8 - store float %mul6.i.us.4.3, float* %arrayidx.i.us.4.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.4.3 - -if.end.i.us.4.3: ; preds = %if.then.i.us.4.3, %if.end.i.us.4.2 - %518 = add nuw nsw i64 %_local_id_x.0.us.4, 4 - %exitcond33.4.not.3 = icmp eq i64 %518, 32 - br i1 %exitcond33.4.not.3, label %pregion_for_end.i.4.loopexit, label %pregion_for_entry.entry.i.us.4, !llvm.loop !27 - -if.then.i.us.3.1: ; preds = %if.end.i.us.3 - %add.i.us.3.1 = add nsw i32 %mul.i.3, %conv.i.us.3.1 - %idxprom.i.us.3.1 = sext i32 %add.i.us.3.1 to i64 - %arrayidx.i.us.3.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.3.1 - %519 = load float, float* %arrayidx.i.us.3.1, align 4, !tbaa !12 - %mul6.i.us.3.1 = fmul float %519, %8 - store float %mul6.i.us.3.1, float* %arrayidx.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.3.1 - -if.end.i.us.3.1: ; preds = %if.then.i.us.3.1, %if.end.i.us.3 - %520 = or i64 %_local_id_x.0.us.3, 2 - %add1.i.i.us.3.2 = add nuw nsw i64 %520, %mul.i.i - %conv.i.us.3.2 = trunc i64 %add1.i.i.us.3.2 to i32 - %cmp4.i.us.3.2 = icmp slt i32 %conv.i.us.3.2, %6 - br i1 %cmp4.i.us.3.2, label %if.then.i.us.3.2, label %if.end.i.us.3.2 - -if.then.i.us.3.2: ; preds = %if.end.i.us.3.1 - %add.i.us.3.2 = add nsw i32 %mul.i.3, %conv.i.us.3.2 - %idxprom.i.us.3.2 = sext i32 %add.i.us.3.2 to i64 - %arrayidx.i.us.3.2 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.3.2 - %521 = load float, float* %arrayidx.i.us.3.2, align 4, !tbaa !12 - %mul6.i.us.3.2 = fmul float %521, %8 - store float %mul6.i.us.3.2, float* %arrayidx.i.us.3.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.3.2 - -if.end.i.us.3.2: ; preds = %if.then.i.us.3.2, %if.end.i.us.3.1 - %522 = or i64 %_local_id_x.0.us.3, 3 - %add1.i.i.us.3.3 = add nuw nsw i64 %522, %mul.i.i - %conv.i.us.3.3 = trunc i64 %add1.i.i.us.3.3 to i32 - %cmp4.i.us.3.3 = icmp slt i32 %conv.i.us.3.3, %6 - br i1 %cmp4.i.us.3.3, label %if.then.i.us.3.3, label %if.end.i.us.3.3 - -if.then.i.us.3.3: ; preds = %if.end.i.us.3.2 - %add.i.us.3.3 = add nsw i32 %mul.i.3, %conv.i.us.3.3 - %idxprom.i.us.3.3 = sext i32 %add.i.us.3.3 to i64 - %arrayidx.i.us.3.3 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.3.3 - %523 = load float, float* %arrayidx.i.us.3.3, align 4, !tbaa !12 - %mul6.i.us.3.3 = fmul float %523, %8 - store float %mul6.i.us.3.3, float* %arrayidx.i.us.3.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.3.3 - -if.end.i.us.3.3: ; preds = %if.then.i.us.3.3, %if.end.i.us.3.2 - %524 = add nuw nsw i64 %_local_id_x.0.us.3, 4 - %exitcond33.3.not.3 = icmp eq i64 %524, 32 - br i1 %exitcond33.3.not.3, label %pregion_for_end.i.3.loopexit, label %pregion_for_entry.entry.i.us.3, !llvm.loop !28 - -if.then.i.us.2.1: ; preds = %if.end.i.us.2 - %add.i.us.2.1 = add nsw i32 %mul.i.2, %conv.i.us.2.1 - %idxprom.i.us.2.1 = sext i32 %add.i.us.2.1 to i64 - %arrayidx.i.us.2.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.2.1 - %525 = load float, float* %arrayidx.i.us.2.1, align 4, !tbaa !12 - %mul6.i.us.2.1 = fmul float %525, %8 - store float %mul6.i.us.2.1, float* %arrayidx.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.2.1 - -if.end.i.us.2.1: ; preds = %if.then.i.us.2.1, %if.end.i.us.2 - %526 = or i64 %_local_id_x.0.us.2, 2 - %add1.i.i.us.2.2 = add nuw nsw i64 %526, %mul.i.i - %conv.i.us.2.2 = trunc i64 %add1.i.i.us.2.2 to i32 - %cmp4.i.us.2.2 = icmp slt i32 %conv.i.us.2.2, %6 - br i1 %cmp4.i.us.2.2, label %if.then.i.us.2.2, label %if.end.i.us.2.2 - -if.then.i.us.2.2: ; preds = %if.end.i.us.2.1 - %add.i.us.2.2 = add nsw i32 %mul.i.2, %conv.i.us.2.2 - %idxprom.i.us.2.2 = sext i32 %add.i.us.2.2 to i64 - %arrayidx.i.us.2.2 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.2.2 - %527 = load float, float* %arrayidx.i.us.2.2, align 4, !tbaa !12 - %mul6.i.us.2.2 = fmul float %527, %8 - store float %mul6.i.us.2.2, float* %arrayidx.i.us.2.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.2.2 - -if.end.i.us.2.2: ; preds = %if.then.i.us.2.2, %if.end.i.us.2.1 - %528 = or i64 %_local_id_x.0.us.2, 3 - %add1.i.i.us.2.3 = add nuw nsw i64 %528, %mul.i.i - %conv.i.us.2.3 = trunc i64 %add1.i.i.us.2.3 to i32 - %cmp4.i.us.2.3 = icmp slt i32 %conv.i.us.2.3, %6 - br i1 %cmp4.i.us.2.3, label %if.then.i.us.2.3, label %if.end.i.us.2.3 - -if.then.i.us.2.3: ; preds = %if.end.i.us.2.2 - %add.i.us.2.3 = add nsw i32 %mul.i.2, %conv.i.us.2.3 - %idxprom.i.us.2.3 = sext i32 %add.i.us.2.3 to i64 - %arrayidx.i.us.2.3 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.2.3 - %529 = load float, float* %arrayidx.i.us.2.3, align 4, !tbaa !12 - %mul6.i.us.2.3 = fmul float %529, %8 - store float %mul6.i.us.2.3, float* %arrayidx.i.us.2.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.2.3 - -if.end.i.us.2.3: ; preds = %if.then.i.us.2.3, %if.end.i.us.2.2 - %530 = add nuw nsw i64 %_local_id_x.0.us.2, 4 - %exitcond33.2.not.3 = icmp eq i64 %530, 32 - br i1 %exitcond33.2.not.3, label %pregion_for_end.i.2.loopexit, label %pregion_for_entry.entry.i.us.2, !llvm.loop !29 - -if.then.i.us.1.1: ; preds = %if.end.i.us.1 - %add.i.us.1.1 = add nsw i32 %mul.i.1, %conv.i.us.1.1 - %idxprom.i.us.1.1 = sext i32 %add.i.us.1.1 to i64 - %arrayidx.i.us.1.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.1.1 - %531 = load float, float* %arrayidx.i.us.1.1, align 4, !tbaa !12 - %mul6.i.us.1.1 = fmul float %531, %8 - store float %mul6.i.us.1.1, float* %arrayidx.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.1.1 - -if.end.i.us.1.1: ; preds = %if.then.i.us.1.1, %if.end.i.us.1 - %532 = or i64 %_local_id_x.0.us.1, 2 - %add1.i.i.us.1.2 = add nuw nsw i64 %532, %mul.i.i - %conv.i.us.1.2 = trunc i64 %add1.i.i.us.1.2 to i32 - %cmp4.i.us.1.2 = icmp slt i32 %conv.i.us.1.2, %6 - br i1 %cmp4.i.us.1.2, label %if.then.i.us.1.2, label %if.end.i.us.1.2 - -if.then.i.us.1.2: ; preds = %if.end.i.us.1.1 - %add.i.us.1.2 = add nsw i32 %mul.i.1, %conv.i.us.1.2 - %idxprom.i.us.1.2 = sext i32 %add.i.us.1.2 to i64 - %arrayidx.i.us.1.2 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.1.2 - %533 = load float, float* %arrayidx.i.us.1.2, align 4, !tbaa !12 - %mul6.i.us.1.2 = fmul float %533, %8 - store float %mul6.i.us.1.2, float* %arrayidx.i.us.1.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.1.2 - -if.end.i.us.1.2: ; preds = %if.then.i.us.1.2, %if.end.i.us.1.1 - %534 = or i64 %_local_id_x.0.us.1, 3 - %add1.i.i.us.1.3 = add nuw nsw i64 %534, %mul.i.i - %conv.i.us.1.3 = trunc i64 %add1.i.i.us.1.3 to i32 - %cmp4.i.us.1.3 = icmp slt i32 %conv.i.us.1.3, %6 - br i1 %cmp4.i.us.1.3, label %if.then.i.us.1.3, label %if.end.i.us.1.3 - -if.then.i.us.1.3: ; preds = %if.end.i.us.1.2 - %add.i.us.1.3 = add nsw i32 %mul.i.1, %conv.i.us.1.3 - %idxprom.i.us.1.3 = sext i32 %add.i.us.1.3 to i64 - %arrayidx.i.us.1.3 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.1.3 - %535 = load float, float* %arrayidx.i.us.1.3, align 4, !tbaa !12 - %mul6.i.us.1.3 = fmul float %535, %8 - store float %mul6.i.us.1.3, float* %arrayidx.i.us.1.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.1.3 - -if.end.i.us.1.3: ; preds = %if.then.i.us.1.3, %if.end.i.us.1.2 - %536 = add nuw nsw i64 %_local_id_x.0.us.1, 4 - %exitcond33.1.not.3 = icmp eq i64 %536, 32 - br i1 %exitcond33.1.not.3, label %pregion_for_end.i.1.loopexit, label %pregion_for_entry.entry.i.us.1, !llvm.loop !30 - -if.then.i.us.1213: ; preds = %if.end.i.us - %add.i.us.1209 = add nsw i32 %mul.i.us, %conv.i.us.1206 - %idxprom.i.us.1210 = sext i32 %add.i.us.1209 to i64 - %arrayidx.i.us.1211 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.1210 - %537 = load float, float* %arrayidx.i.us.1211, align 4, !tbaa !12 - %mul6.i.us.1212 = fmul float %537, %8 - store float %mul6.i.us.1212, float* %arrayidx.i.us.1211, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.1214 - -if.end.i.us.1214: ; preds = %if.then.i.us.1213, %if.end.i.us - %538 = or i64 %_local_id_x.0.us, 2 - %add1.i.i.us.2216 = add nuw nsw i64 %538, %mul.i.i - %conv.i.us.2217 = trunc i64 %add1.i.i.us.2216 to i32 - %cmp4.i.us.2218 = icmp slt i32 %conv.i.us.2217, %6 - br i1 %cmp4.i.us.2218, label %if.then.i.us.2224, label %if.end.i.us.2225 - -if.then.i.us.2224: ; preds = %if.end.i.us.1214 - %add.i.us.2220 = add nsw i32 %mul.i.us, %conv.i.us.2217 - %idxprom.i.us.2221 = sext i32 %add.i.us.2220 to i64 - %arrayidx.i.us.2222 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.2221 - %539 = load float, float* %arrayidx.i.us.2222, align 4, !tbaa !12 - %mul6.i.us.2223 = fmul float %539, %8 - store float %mul6.i.us.2223, float* %arrayidx.i.us.2222, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.2225 - -if.end.i.us.2225: ; preds = %if.then.i.us.2224, %if.end.i.us.1214 - %540 = or i64 %_local_id_x.0.us, 3 - %add1.i.i.us.3227 = add nuw nsw i64 %540, %mul.i.i - %conv.i.us.3228 = trunc i64 %add1.i.i.us.3227 to i32 - %cmp4.i.us.3229 = icmp slt i32 %conv.i.us.3228, %6 - br i1 %cmp4.i.us.3229, label %if.then.i.us.3235, label %if.end.i.us.3236 - -if.then.i.us.3235: ; preds = %if.end.i.us.2225 - %add.i.us.3231 = add nsw i32 %mul.i.us, %conv.i.us.3228 - %idxprom.i.us.3232 = sext i32 %add.i.us.3231 to i64 - %arrayidx.i.us.3233 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.3232 - %541 = load float, float* %arrayidx.i.us.3233, align 4, !tbaa !12 - %mul6.i.us.3234 = fmul float %541, %8 - store float %mul6.i.us.3234, float* %arrayidx.i.us.3233, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.3236 - -if.end.i.us.3236: ; preds = %if.then.i.us.3235, %if.end.i.us.2225 - %542 = add nuw nsw i64 %_local_id_x.0.us, 4 - %exitcond33.not.3 = icmp eq i64 %542, 32 - br i1 %exitcond33.not.3, label %pregion_for_end.i.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !31 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_mm2_kernel2_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to float*** - %15 = load float**, float*** %14, align 8 - %16 = load float*, float** %15, align 8 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %21 = getelementptr i8*, i8** %0, i64 4 - %22 = bitcast i8** %21 to i32** - %23 = load i32*, i32** %22, align 8 - %24 = load i32, i32* %23, align 4 - %25 = getelementptr i8*, i8** %0, i64 6 - %26 = bitcast i8** %25 to i32** - %27 = load i32*, i32** %26, align 8 - %28 = load i32, i32* %27, align 4 - %29 = getelementptr i8*, i8** %0, i64 8 - %30 = bitcast i8** %29 to float** - %31 = load float*, float** %30, align 8 - %32 = load float, float* %31, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %cmp739.i.i = icmp sgt i32 %24, 0 - %33 = sext i32 %28 to i64 - %wide.trip.count.i.i = zext i32 %24 to i64 - %conv2.i.i.us = trunc i64 %mul3.i.i.i to i32 - %cmp.i.i.us = icmp sgt i32 %20, %conv2.i.i.us - %mul.i.i.us = mul nsw i32 %28, %conv2.i.i.us - br i1 %cmp739.i.i, label %pregion_for_entry.pregion_for_init.i.i.us, label %pregion_for_entry.pregion_for_init.i.i.preheader - -pregion_for_entry.pregion_for_init.i.i.preheader: ; preds = %5 - br i1 %cmp.i.i.us, label %vector.scevcheck, label %pregion_for_end.i.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.i.preheader - %34 = trunc i64 %3 to i32 - %35 = mul i32 %28, %34 - %36 = shl i32 %35, 3 - %37 = trunc i64 %2 to i32 - %38 = shl i32 %37, 5 - %39 = add i32 %36, %38 - %40 = icmp sgt i32 %39, 2147483616 - br i1 %40, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.ph - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %vector.scevcheck - br label %pregion_for_entry.entry.i.i.us - -vector.ph: ; preds = %vector.scevcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert37 = insertelement <8 x i32> undef, i32 %28, i32 0 - %broadcast.splat38 = shufflevector <8 x i32> %broadcast.splatinsert37, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert39 = insertelement <8 x float> undef, float %32, i32 0 - %broadcast.splat40 = shufflevector <8 x float> %broadcast.splatinsert39, <8 x float> undef, <8 x i32> zeroinitializer - %41 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %42 = or <8 x i32> %41, - %43 = icmp sgt <8 x i32> %broadcast.splat38, %42 - %44 = extractelement <8 x i32> %42, i32 0 - %45 = add nsw i32 %mul.i.i.us, %44 - %46 = sext i32 %45 to i64 - %47 = getelementptr inbounds float, float* %16, i64 %46 - %48 = bitcast float* %47 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %48, i32 4, <8 x i1> %43, <8 x float> undef), !tbaa !12 - %49 = fmul <8 x float> %broadcast.splat40, %wide.masked.load - %50 = bitcast float* %47 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %49, <8 x float>* %50, i32 4, <8 x i1> %43), !tbaa !12, !llvm.access.group !16 - %51 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %52 = or <8 x i32> %51, - %53 = icmp sgt <8 x i32> %broadcast.splat38, %52 - %54 = extractelement <8 x i32> %52, i32 0 - %55 = add nsw i32 %mul.i.i.us, %54 - %56 = sext i32 %55 to i64 - %57 = getelementptr inbounds float, float* %16, i64 %56 - %58 = bitcast float* %57 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %58, i32 4, <8 x i1> %53, <8 x float> undef), !tbaa !12 - %59 = fmul <8 x float> %broadcast.splat40, %wide.masked.load.1 - %60 = bitcast float* %57 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %59, <8 x float>* %60, i32 4, <8 x i1> %53), !tbaa !12, !llvm.access.group !16 - %61 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %62 = or <8 x i32> %61, - %63 = icmp sgt <8 x i32> %broadcast.splat38, %62 - %64 = extractelement <8 x i32> %62, i32 0 - %65 = add nsw i32 %mul.i.i.us, %64 - %66 = sext i32 %65 to i64 - %67 = getelementptr inbounds float, float* %16, i64 %66 - %68 = bitcast float* %67 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %68, i32 4, <8 x i1> %63, <8 x float> undef), !tbaa !12 - %69 = fmul <8 x float> %broadcast.splat40, %wide.masked.load.2 - %70 = bitcast float* %67 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %69, <8 x float>* %70, i32 4, <8 x i1> %63), !tbaa !12, !llvm.access.group !16 - %71 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %72 = or <8 x i32> %71, - %73 = icmp sgt <8 x i32> %broadcast.splat38, %72 - %74 = extractelement <8 x i32> %72, i32 0 - %75 = add nsw i32 %mul.i.i.us, %74 - %76 = sext i32 %75 to i64 - %77 = getelementptr inbounds float, float* %16, i64 %76 - %78 = bitcast float* %77 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %78, i32 4, <8 x i1> %73, <8 x float> undef), !tbaa !12 - %79 = fmul <8 x float> %broadcast.splat40, %wide.masked.load.3 - %80 = bitcast float* %77 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %79, <8 x float>* %80, i32 4, <8 x i1> %73), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i - -pregion_for_entry.pregion_for_init.i.i.us: ; preds = %5 - %mul9.i.i.us = mul nsw i32 %24, %conv2.i.i.us - %81 = sext i32 %mul9.i.i.us to i64 - br i1 %cmp.i.i.us, label %pregion_for_entry.entry.i.i.us.us.preheader, label %pregion_for_end.i.i.us - -pregion_for_entry.entry.i.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i.us - br label %pregion_for_entry.entry.i.i.us.us - -pregion_for_end.i.i.us.loopexit: ; preds = %if.end.i.i.us.us - br label %pregion_for_end.i.i.us - -pregion_for_end.i.i.us: ; preds = %pregion_for_end.i.i.us.loopexit, %pregion_for_entry.pregion_for_init.i.i.us - %82 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.1 = or i32 %82, 1 - %cmp.i.i.us.1 = icmp sgt i32 %20, %conv2.i.i.us.1 - %mul.i.i.us.1 = mul nsw i32 %28, %conv2.i.i.us.1 - %mul9.i.i.us.1 = mul nsw i32 %24, %conv2.i.i.us.1 - %83 = sext i32 %mul9.i.i.us.1 to i64 - br i1 %cmp.i.i.us.1, label %pregion_for_entry.entry.i.i.us.us.1.preheader, label %pregion_for_end.i.i.us.1 - -pregion_for_entry.entry.i.i.us.us.1.preheader: ; preds = %pregion_for_end.i.i.us - br label %pregion_for_entry.entry.i.i.us.us.1 - -pregion_for_entry.entry.i.i.us.us: ; preds = %if.end.i.i.us.us, %pregion_for_entry.entry.i.i.us.us.preheader - %_local_id_x.i.0.us.us = phi i64 [ %86, %if.end.i.i.us.us ], [ 0, %pregion_for_entry.entry.i.i.us.us.preheader ] - %add1.i.i.i.us.us = add nuw nsw i64 %_local_id_x.i.0.us.us, %mul.i.i.i - %conv.i.i.us.us = trunc i64 %add1.i.i.i.us.us to i32 - %cmp4.i.i.us.us = icmp sgt i32 %28, %conv.i.i.us.us - br i1 %cmp4.i.i.us.us, label %if.then.i.i.us.us, label %if.end.i.i.us.us - -if.then.i.i.us.us: ; preds = %pregion_for_entry.entry.i.i.us.us - %add.i.i.us.us = add nsw i32 %mul.i.i.us, %conv.i.i.us.us - %idxprom.i.i.us.us = sext i32 %add.i.i.us.us to i64 - %arrayidx.i.i.us.us = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us - %84 = load float, float* %arrayidx.i.i.us.us, align 4, !tbaa !12 - %mul6.i.i.us.us = fmul float %32, %84 - store float %mul6.i.i.us.us, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us = shl i64 %add1.i.i.i.us.us, 32 - %85 = ashr exact i64 %sext.i.i.us.us, 32 - br label %for.body.i.i.us.us - -if.end.i.i.us.us.loopexit: ; preds = %for.body.i.i.us.us - br label %if.end.i.i.us.us - -if.end.i.i.us.us: ; preds = %if.end.i.i.us.us.loopexit, %pregion_for_entry.entry.i.i.us.us - %86 = add nuw nsw i64 %_local_id_x.i.0.us.us, 1 - %exitcond.not = icmp eq i64 %86, 32 - br i1 %exitcond.not, label %pregion_for_end.i.i.us.loopexit, label %pregion_for_entry.entry.i.i.us.us, !llvm.loop !19 - -for.body.i.i.us.us: ; preds = %for.body.i.i.us.us, %if.then.i.i.us.us - %indvars.iv.next.i.i3.us.us = phi i64 [ %indvars.iv.next.i.i.us.us, %for.body.i.i.us.us ], [ 0, %if.then.i.i.us.us ] - %87 = phi float [ %93, %for.body.i.i.us.us ], [ %mul6.i.i.us.us, %if.then.i.i.us.us ] - %88 = add nsw i64 %indvars.iv.next.i.i3.us.us, %81 - %arrayidx12.i.i.us.us = getelementptr inbounds float, float* %8, i64 %88 - %89 = load float, float* %arrayidx12.i.i.us.us, align 4, !tbaa !12 - %90 = mul nsw i64 %indvars.iv.next.i.i3.us.us, %33 - %91 = add nsw i64 %90, %85 - %arrayidx16.i.i.us.us = getelementptr inbounds float, float* %12, i64 %91 - %92 = load float, float* %arrayidx16.i.i.us.us, align 4, !tbaa !12 - %93 = tail call float @llvm.fmuladd.f32(float %89, float %92, float %87) #2 - store float %93, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us = add nuw nsw i64 %indvars.iv.next.i.i3.us.us, 1 - %exitcond.not.i.i.us.us = icmp eq i64 %indvars.iv.next.i.i.us.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us, label %if.end.i.i.us.us.loopexit, label %for.body.i.i.us.us, !llvm.loop !21 - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.i.i.us.3236, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %561, %if.end.i.i.us.3236 ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp4.i.i.us = icmp sgt i32 %28, %conv.i.i.us - br i1 %cmp4.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %add.i.i.us = add nsw i32 %mul.i.i.us, %conv.i.i.us - %idxprom.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us - %94 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %mul6.i.i.us = fmul float %32, %94 - store float %mul6.i.i.us, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us - %95 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1205 = add nuw nsw i64 %95, %mul.i.i.i - %conv.i.i.us.1206 = trunc i64 %add1.i.i.i.us.1205 to i32 - %cmp4.i.i.us.1207 = icmp sgt i32 %28, %conv.i.i.us.1206 - br i1 %cmp4.i.i.us.1207, label %if.then.i.i.us.1213, label %if.end.i.i.us.1214 - -pregion_for_end.i.i.loopexit: ; preds = %if.end.i.i.us.3236 - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i.i.preheader - %96 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.1 = or i32 %96, 1 - %cmp.i.i.1 = icmp sgt i32 %20, %conv2.i.i.1 - %mul.i.i.1 = mul nsw i32 %28, %conv2.i.i.1 - br i1 %cmp.i.i.1, label %vector.scevcheck48, label %pregion_for_end.i.i.1 - -vector.scevcheck48: ; preds = %pregion_for_end.i.i - %97 = mul i32 %28, %conv2.i.i.1 - %98 = trunc i64 %2 to i32 - %99 = shl i32 %98, 5 - %100 = add i32 %97, %99 - %101 = icmp sgt i32 %100, 2147483616 - br i1 %101, label %pregion_for_entry.entry.i.i.us.1.preheader, label %vector.ph49 - -pregion_for_entry.entry.i.i.us.1.preheader: ; preds = %vector.scevcheck48 - br label %pregion_for_entry.entry.i.i.us.1 - -vector.ph49: ; preds = %vector.scevcheck48 - %broadcast.splatinsert56 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat57 = shufflevector <8 x i64> %broadcast.splatinsert56, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert58 = insertelement <8 x i32> undef, i32 %28, i32 0 - %broadcast.splat59 = shufflevector <8 x i32> %broadcast.splatinsert58, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert61 = insertelement <8 x float> undef, float %32, i32 0 - %broadcast.splat62 = shufflevector <8 x float> %broadcast.splatinsert61, <8 x float> undef, <8 x i32> zeroinitializer - %102 = trunc <8 x i64> %broadcast.splat57 to <8 x i32> - %103 = or <8 x i32> %102, - %104 = icmp sgt <8 x i32> %broadcast.splat59, %103 - %105 = extractelement <8 x i32> %103, i32 0 - %106 = add nsw i32 %mul.i.i.1, %105 - %107 = sext i32 %106 to i64 - %108 = getelementptr inbounds float, float* %16, i64 %107 - %109 = bitcast float* %108 to <8 x float>* - %wide.masked.load60 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %109, i32 4, <8 x i1> %104, <8 x float> undef), !tbaa !12 - %110 = fmul <8 x float> %broadcast.splat62, %wide.masked.load60 - %111 = bitcast float* %108 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %110, <8 x float>* %111, i32 4, <8 x i1> %104), !tbaa !12, !llvm.access.group !16 - %112 = trunc <8 x i64> %broadcast.splat57 to <8 x i32> - %113 = or <8 x i32> %112, - %114 = icmp sgt <8 x i32> %broadcast.splat59, %113 - %115 = extractelement <8 x i32> %113, i32 0 - %116 = add nsw i32 %mul.i.i.1, %115 - %117 = sext i32 %116 to i64 - %118 = getelementptr inbounds float, float* %16, i64 %117 - %119 = bitcast float* %118 to <8 x float>* - %wide.masked.load60.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %119, i32 4, <8 x i1> %114, <8 x float> undef), !tbaa !12 - %120 = fmul <8 x float> %broadcast.splat62, %wide.masked.load60.1 - %121 = bitcast float* %118 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %120, <8 x float>* %121, i32 4, <8 x i1> %114), !tbaa !12, !llvm.access.group !16 - %122 = trunc <8 x i64> %broadcast.splat57 to <8 x i32> - %123 = or <8 x i32> %122, - %124 = icmp sgt <8 x i32> %broadcast.splat59, %123 - %125 = extractelement <8 x i32> %123, i32 0 - %126 = add nsw i32 %mul.i.i.1, %125 - %127 = sext i32 %126 to i64 - %128 = getelementptr inbounds float, float* %16, i64 %127 - %129 = bitcast float* %128 to <8 x float>* - %wide.masked.load60.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %129, i32 4, <8 x i1> %124, <8 x float> undef), !tbaa !12 - %130 = fmul <8 x float> %broadcast.splat62, %wide.masked.load60.2 - %131 = bitcast float* %128 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %130, <8 x float>* %131, i32 4, <8 x i1> %124), !tbaa !12, !llvm.access.group !16 - %132 = trunc <8 x i64> %broadcast.splat57 to <8 x i32> - %133 = or <8 x i32> %132, - %134 = icmp sgt <8 x i32> %broadcast.splat59, %133 - %135 = extractelement <8 x i32> %133, i32 0 - %136 = add nsw i32 %mul.i.i.1, %135 - %137 = sext i32 %136 to i64 - %138 = getelementptr inbounds float, float* %16, i64 %137 - %139 = bitcast float* %138 to <8 x float>* - %wide.masked.load60.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %139, i32 4, <8 x i1> %134, <8 x float> undef), !tbaa !12 - %140 = fmul <8 x float> %broadcast.splat62, %wide.masked.load60.3 - %141 = bitcast float* %138 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %140, <8 x float>* %141, i32 4, <8 x i1> %134), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.1 - -_pocl_kernel_mm2_kernel2.exit.loopexit: ; preds = %if.end.i.i.us.us.7 - br label %_pocl_kernel_mm2_kernel2.exit - -_pocl_kernel_mm2_kernel2.exit.loopexit237: ; preds = %if.end.i.i.us.7.3 - br label %_pocl_kernel_mm2_kernel2.exit - -_pocl_kernel_mm2_kernel2.exit: ; preds = %pregion_for_end.i.i.us.6, %vector.ph181, %pregion_for_end.i.i.6, %_pocl_kernel_mm2_kernel2.exit.loopexit237, %_pocl_kernel_mm2_kernel2.exit.loopexit - ret void - -pregion_for_entry.entry.i.i.us.1: ; preds = %if.end.i.i.us.1.3, %pregion_for_entry.entry.i.i.us.1.preheader - %_local_id_x.i.0.us.1 = phi i64 [ %555, %if.end.i.i.us.1.3 ], [ 0, %pregion_for_entry.entry.i.i.us.1.preheader ] - %add1.i.i.i.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.1, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp4.i.i.us.1 = icmp sgt i32 %28, %conv.i.i.us.1 - br i1 %cmp4.i.i.us.1, label %if.then.i.i.us.1, label %if.end.i.i.us.1 - -if.then.i.i.us.1: ; preds = %pregion_for_entry.entry.i.i.us.1 - %add.i.i.us.1 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1 - %idxprom.i.i.us.1 = sext i32 %add.i.i.us.1 to i64 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.1 - %142 = load float, float* %arrayidx.i.i.us.1, align 4, !tbaa !12 - %mul6.i.i.us.1 = fmul float %32, %142 - store float %mul6.i.i.us.1, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1 - -if.end.i.i.us.1: ; preds = %if.then.i.i.us.1, %pregion_for_entry.entry.i.i.us.1 - %143 = or i64 %_local_id_x.i.0.us.1, 1 - %add1.i.i.i.us.1.1 = add nuw nsw i64 %143, %mul.i.i.i - %conv.i.i.us.1.1 = trunc i64 %add1.i.i.i.us.1.1 to i32 - %cmp4.i.i.us.1.1 = icmp sgt i32 %28, %conv.i.i.us.1.1 - br i1 %cmp4.i.i.us.1.1, label %if.then.i.i.us.1.1, label %if.end.i.i.us.1.1 - -pregion_for_end.i.i.1.loopexit: ; preds = %if.end.i.i.us.1.3 - br label %pregion_for_end.i.i.1 - -pregion_for_end.i.i.1: ; preds = %pregion_for_end.i.i.1.loopexit, %vector.ph49, %pregion_for_end.i.i - %144 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.2 = or i32 %144, 2 - %cmp.i.i.2 = icmp sgt i32 %20, %conv2.i.i.2 - %mul.i.i.2 = mul nsw i32 %28, %conv2.i.i.2 - br i1 %cmp.i.i.2, label %vector.scevcheck70, label %pregion_for_end.i.i.2 - -vector.scevcheck70: ; preds = %pregion_for_end.i.i.1 - %145 = mul i32 %28, %conv2.i.i.2 - %146 = trunc i64 %2 to i32 - %147 = shl i32 %146, 5 - %148 = add i32 %145, %147 - %149 = icmp sgt i32 %148, 2147483616 - br i1 %149, label %pregion_for_entry.entry.i.i.us.2.preheader, label %vector.ph71 - -pregion_for_entry.entry.i.i.us.2.preheader: ; preds = %vector.scevcheck70 - br label %pregion_for_entry.entry.i.i.us.2 - -vector.ph71: ; preds = %vector.scevcheck70 - %broadcast.splatinsert78 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat79 = shufflevector <8 x i64> %broadcast.splatinsert78, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert80 = insertelement <8 x i32> undef, i32 %28, i32 0 - %broadcast.splat81 = shufflevector <8 x i32> %broadcast.splatinsert80, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert83 = insertelement <8 x float> undef, float %32, i32 0 - %broadcast.splat84 = shufflevector <8 x float> %broadcast.splatinsert83, <8 x float> undef, <8 x i32> zeroinitializer - %150 = trunc <8 x i64> %broadcast.splat79 to <8 x i32> - %151 = or <8 x i32> %150, - %152 = icmp sgt <8 x i32> %broadcast.splat81, %151 - %153 = extractelement <8 x i32> %151, i32 0 - %154 = add nsw i32 %mul.i.i.2, %153 - %155 = sext i32 %154 to i64 - %156 = getelementptr inbounds float, float* %16, i64 %155 - %157 = bitcast float* %156 to <8 x float>* - %wide.masked.load82 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %157, i32 4, <8 x i1> %152, <8 x float> undef), !tbaa !12 - %158 = fmul <8 x float> %broadcast.splat84, %wide.masked.load82 - %159 = bitcast float* %156 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %158, <8 x float>* %159, i32 4, <8 x i1> %152), !tbaa !12, !llvm.access.group !16 - %160 = trunc <8 x i64> %broadcast.splat79 to <8 x i32> - %161 = or <8 x i32> %160, - %162 = icmp sgt <8 x i32> %broadcast.splat81, %161 - %163 = extractelement <8 x i32> %161, i32 0 - %164 = add nsw i32 %mul.i.i.2, %163 - %165 = sext i32 %164 to i64 - %166 = getelementptr inbounds float, float* %16, i64 %165 - %167 = bitcast float* %166 to <8 x float>* - %wide.masked.load82.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %167, i32 4, <8 x i1> %162, <8 x float> undef), !tbaa !12 - %168 = fmul <8 x float> %broadcast.splat84, %wide.masked.load82.1 - %169 = bitcast float* %166 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %168, <8 x float>* %169, i32 4, <8 x i1> %162), !tbaa !12, !llvm.access.group !16 - %170 = trunc <8 x i64> %broadcast.splat79 to <8 x i32> - %171 = or <8 x i32> %170, - %172 = icmp sgt <8 x i32> %broadcast.splat81, %171 - %173 = extractelement <8 x i32> %171, i32 0 - %174 = add nsw i32 %mul.i.i.2, %173 - %175 = sext i32 %174 to i64 - %176 = getelementptr inbounds float, float* %16, i64 %175 - %177 = bitcast float* %176 to <8 x float>* - %wide.masked.load82.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %177, i32 4, <8 x i1> %172, <8 x float> undef), !tbaa !12 - %178 = fmul <8 x float> %broadcast.splat84, %wide.masked.load82.2 - %179 = bitcast float* %176 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %178, <8 x float>* %179, i32 4, <8 x i1> %172), !tbaa !12, !llvm.access.group !16 - %180 = trunc <8 x i64> %broadcast.splat79 to <8 x i32> - %181 = or <8 x i32> %180, - %182 = icmp sgt <8 x i32> %broadcast.splat81, %181 - %183 = extractelement <8 x i32> %181, i32 0 - %184 = add nsw i32 %mul.i.i.2, %183 - %185 = sext i32 %184 to i64 - %186 = getelementptr inbounds float, float* %16, i64 %185 - %187 = bitcast float* %186 to <8 x float>* - %wide.masked.load82.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %187, i32 4, <8 x i1> %182, <8 x float> undef), !tbaa !12 - %188 = fmul <8 x float> %broadcast.splat84, %wide.masked.load82.3 - %189 = bitcast float* %186 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %188, <8 x float>* %189, i32 4, <8 x i1> %182), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.2 - -pregion_for_entry.entry.i.i.us.2: ; preds = %if.end.i.i.us.2.3, %pregion_for_entry.entry.i.i.us.2.preheader - %_local_id_x.i.0.us.2 = phi i64 [ %549, %if.end.i.i.us.2.3 ], [ 0, %pregion_for_entry.entry.i.i.us.2.preheader ] - %add1.i.i.i.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.2, %mul.i.i.i - %conv.i.i.us.2 = trunc i64 %add1.i.i.i.us.2 to i32 - %cmp4.i.i.us.2 = icmp sgt i32 %28, %conv.i.i.us.2 - br i1 %cmp4.i.i.us.2, label %if.then.i.i.us.2, label %if.end.i.i.us.2 - -if.then.i.i.us.2: ; preds = %pregion_for_entry.entry.i.i.us.2 - %add.i.i.us.2 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2 - %idxprom.i.i.us.2 = sext i32 %add.i.i.us.2 to i64 - %arrayidx.i.i.us.2 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.2 - %190 = load float, float* %arrayidx.i.i.us.2, align 4, !tbaa !12 - %mul6.i.i.us.2 = fmul float %32, %190 - store float %mul6.i.i.us.2, float* %arrayidx.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2 - -if.end.i.i.us.2: ; preds = %if.then.i.i.us.2, %pregion_for_entry.entry.i.i.us.2 - %191 = or i64 %_local_id_x.i.0.us.2, 1 - %add1.i.i.i.us.2.1 = add nuw nsw i64 %191, %mul.i.i.i - %conv.i.i.us.2.1 = trunc i64 %add1.i.i.i.us.2.1 to i32 - %cmp4.i.i.us.2.1 = icmp sgt i32 %28, %conv.i.i.us.2.1 - br i1 %cmp4.i.i.us.2.1, label %if.then.i.i.us.2.1, label %if.end.i.i.us.2.1 - -pregion_for_end.i.i.2.loopexit: ; preds = %if.end.i.i.us.2.3 - br label %pregion_for_end.i.i.2 - -pregion_for_end.i.i.2: ; preds = %pregion_for_end.i.i.2.loopexit, %vector.ph71, %pregion_for_end.i.i.1 - %192 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.3 = or i32 %192, 3 - %cmp.i.i.3 = icmp sgt i32 %20, %conv2.i.i.3 - %mul.i.i.3 = mul nsw i32 %28, %conv2.i.i.3 - br i1 %cmp.i.i.3, label %vector.scevcheck92, label %pregion_for_end.i.i.3 - -vector.scevcheck92: ; preds = %pregion_for_end.i.i.2 - %193 = mul i32 %28, %conv2.i.i.3 - %194 = trunc i64 %2 to i32 - %195 = shl i32 %194, 5 - %196 = add i32 %193, %195 - %197 = icmp sgt i32 %196, 2147483616 - br i1 %197, label %pregion_for_entry.entry.i.i.us.3.preheader, label %vector.ph93 - -pregion_for_entry.entry.i.i.us.3.preheader: ; preds = %vector.scevcheck92 - br label %pregion_for_entry.entry.i.i.us.3 - -vector.ph93: ; preds = %vector.scevcheck92 - %broadcast.splatinsert100 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat101 = shufflevector <8 x i64> %broadcast.splatinsert100, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert102 = insertelement <8 x i32> undef, i32 %28, i32 0 - %broadcast.splat103 = shufflevector <8 x i32> %broadcast.splatinsert102, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert105 = insertelement <8 x float> undef, float %32, i32 0 - %broadcast.splat106 = shufflevector <8 x float> %broadcast.splatinsert105, <8 x float> undef, <8 x i32> zeroinitializer - %198 = trunc <8 x i64> %broadcast.splat101 to <8 x i32> - %199 = or <8 x i32> %198, - %200 = icmp sgt <8 x i32> %broadcast.splat103, %199 - %201 = extractelement <8 x i32> %199, i32 0 - %202 = add nsw i32 %mul.i.i.3, %201 - %203 = sext i32 %202 to i64 - %204 = getelementptr inbounds float, float* %16, i64 %203 - %205 = bitcast float* %204 to <8 x float>* - %wide.masked.load104 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %205, i32 4, <8 x i1> %200, <8 x float> undef), !tbaa !12 - %206 = fmul <8 x float> %broadcast.splat106, %wide.masked.load104 - %207 = bitcast float* %204 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %206, <8 x float>* %207, i32 4, <8 x i1> %200), !tbaa !12, !llvm.access.group !16 - %208 = trunc <8 x i64> %broadcast.splat101 to <8 x i32> - %209 = or <8 x i32> %208, - %210 = icmp sgt <8 x i32> %broadcast.splat103, %209 - %211 = extractelement <8 x i32> %209, i32 0 - %212 = add nsw i32 %mul.i.i.3, %211 - %213 = sext i32 %212 to i64 - %214 = getelementptr inbounds float, float* %16, i64 %213 - %215 = bitcast float* %214 to <8 x float>* - %wide.masked.load104.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %215, i32 4, <8 x i1> %210, <8 x float> undef), !tbaa !12 - %216 = fmul <8 x float> %broadcast.splat106, %wide.masked.load104.1 - %217 = bitcast float* %214 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %216, <8 x float>* %217, i32 4, <8 x i1> %210), !tbaa !12, !llvm.access.group !16 - %218 = trunc <8 x i64> %broadcast.splat101 to <8 x i32> - %219 = or <8 x i32> %218, - %220 = icmp sgt <8 x i32> %broadcast.splat103, %219 - %221 = extractelement <8 x i32> %219, i32 0 - %222 = add nsw i32 %mul.i.i.3, %221 - %223 = sext i32 %222 to i64 - %224 = getelementptr inbounds float, float* %16, i64 %223 - %225 = bitcast float* %224 to <8 x float>* - %wide.masked.load104.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %225, i32 4, <8 x i1> %220, <8 x float> undef), !tbaa !12 - %226 = fmul <8 x float> %broadcast.splat106, %wide.masked.load104.2 - %227 = bitcast float* %224 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %226, <8 x float>* %227, i32 4, <8 x i1> %220), !tbaa !12, !llvm.access.group !16 - %228 = trunc <8 x i64> %broadcast.splat101 to <8 x i32> - %229 = or <8 x i32> %228, - %230 = icmp sgt <8 x i32> %broadcast.splat103, %229 - %231 = extractelement <8 x i32> %229, i32 0 - %232 = add nsw i32 %mul.i.i.3, %231 - %233 = sext i32 %232 to i64 - %234 = getelementptr inbounds float, float* %16, i64 %233 - %235 = bitcast float* %234 to <8 x float>* - %wide.masked.load104.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %235, i32 4, <8 x i1> %230, <8 x float> undef), !tbaa !12 - %236 = fmul <8 x float> %broadcast.splat106, %wide.masked.load104.3 - %237 = bitcast float* %234 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %236, <8 x float>* %237, i32 4, <8 x i1> %230), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.3 - -pregion_for_entry.entry.i.i.us.3: ; preds = %if.end.i.i.us.3.3, %pregion_for_entry.entry.i.i.us.3.preheader - %_local_id_x.i.0.us.3 = phi i64 [ %543, %if.end.i.i.us.3.3 ], [ 0, %pregion_for_entry.entry.i.i.us.3.preheader ] - %add1.i.i.i.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.3, %mul.i.i.i - %conv.i.i.us.3 = trunc i64 %add1.i.i.i.us.3 to i32 - %cmp4.i.i.us.3 = icmp sgt i32 %28, %conv.i.i.us.3 - br i1 %cmp4.i.i.us.3, label %if.then.i.i.us.3, label %if.end.i.i.us.3 - -if.then.i.i.us.3: ; preds = %pregion_for_entry.entry.i.i.us.3 - %add.i.i.us.3 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3 - %idxprom.i.i.us.3 = sext i32 %add.i.i.us.3 to i64 - %arrayidx.i.i.us.3 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.3 - %238 = load float, float* %arrayidx.i.i.us.3, align 4, !tbaa !12 - %mul6.i.i.us.3 = fmul float %32, %238 - store float %mul6.i.i.us.3, float* %arrayidx.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3 - -if.end.i.i.us.3: ; preds = %if.then.i.i.us.3, %pregion_for_entry.entry.i.i.us.3 - %239 = or i64 %_local_id_x.i.0.us.3, 1 - %add1.i.i.i.us.3.1 = add nuw nsw i64 %239, %mul.i.i.i - %conv.i.i.us.3.1 = trunc i64 %add1.i.i.i.us.3.1 to i32 - %cmp4.i.i.us.3.1 = icmp sgt i32 %28, %conv.i.i.us.3.1 - br i1 %cmp4.i.i.us.3.1, label %if.then.i.i.us.3.1, label %if.end.i.i.us.3.1 - -pregion_for_end.i.i.3.loopexit: ; preds = %if.end.i.i.us.3.3 - br label %pregion_for_end.i.i.3 - -pregion_for_end.i.i.3: ; preds = %pregion_for_end.i.i.3.loopexit, %vector.ph93, %pregion_for_end.i.i.2 - %240 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.4 = or i32 %240, 4 - %cmp.i.i.4 = icmp sgt i32 %20, %conv2.i.i.4 - %mul.i.i.4 = mul nsw i32 %28, %conv2.i.i.4 - br i1 %cmp.i.i.4, label %vector.scevcheck114, label %pregion_for_end.i.i.4 - -vector.scevcheck114: ; preds = %pregion_for_end.i.i.3 - %241 = mul i32 %28, %conv2.i.i.4 - %242 = trunc i64 %2 to i32 - %243 = shl i32 %242, 5 - %244 = add i32 %241, %243 - %245 = icmp sgt i32 %244, 2147483616 - br i1 %245, label %pregion_for_entry.entry.i.i.us.4.preheader, label %vector.ph115 - -pregion_for_entry.entry.i.i.us.4.preheader: ; preds = %vector.scevcheck114 - br label %pregion_for_entry.entry.i.i.us.4 - -vector.ph115: ; preds = %vector.scevcheck114 - %broadcast.splatinsert122 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat123 = shufflevector <8 x i64> %broadcast.splatinsert122, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert124 = insertelement <8 x i32> undef, i32 %28, i32 0 - %broadcast.splat125 = shufflevector <8 x i32> %broadcast.splatinsert124, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert127 = insertelement <8 x float> undef, float %32, i32 0 - %broadcast.splat128 = shufflevector <8 x float> %broadcast.splatinsert127, <8 x float> undef, <8 x i32> zeroinitializer - %246 = trunc <8 x i64> %broadcast.splat123 to <8 x i32> - %247 = or <8 x i32> %246, - %248 = icmp sgt <8 x i32> %broadcast.splat125, %247 - %249 = extractelement <8 x i32> %247, i32 0 - %250 = add nsw i32 %mul.i.i.4, %249 - %251 = sext i32 %250 to i64 - %252 = getelementptr inbounds float, float* %16, i64 %251 - %253 = bitcast float* %252 to <8 x float>* - %wide.masked.load126 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %253, i32 4, <8 x i1> %248, <8 x float> undef), !tbaa !12 - %254 = fmul <8 x float> %broadcast.splat128, %wide.masked.load126 - %255 = bitcast float* %252 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %254, <8 x float>* %255, i32 4, <8 x i1> %248), !tbaa !12, !llvm.access.group !16 - %256 = trunc <8 x i64> %broadcast.splat123 to <8 x i32> - %257 = or <8 x i32> %256, - %258 = icmp sgt <8 x i32> %broadcast.splat125, %257 - %259 = extractelement <8 x i32> %257, i32 0 - %260 = add nsw i32 %mul.i.i.4, %259 - %261 = sext i32 %260 to i64 - %262 = getelementptr inbounds float, float* %16, i64 %261 - %263 = bitcast float* %262 to <8 x float>* - %wide.masked.load126.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %263, i32 4, <8 x i1> %258, <8 x float> undef), !tbaa !12 - %264 = fmul <8 x float> %broadcast.splat128, %wide.masked.load126.1 - %265 = bitcast float* %262 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %264, <8 x float>* %265, i32 4, <8 x i1> %258), !tbaa !12, !llvm.access.group !16 - %266 = trunc <8 x i64> %broadcast.splat123 to <8 x i32> - %267 = or <8 x i32> %266, - %268 = icmp sgt <8 x i32> %broadcast.splat125, %267 - %269 = extractelement <8 x i32> %267, i32 0 - %270 = add nsw i32 %mul.i.i.4, %269 - %271 = sext i32 %270 to i64 - %272 = getelementptr inbounds float, float* %16, i64 %271 - %273 = bitcast float* %272 to <8 x float>* - %wide.masked.load126.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %273, i32 4, <8 x i1> %268, <8 x float> undef), !tbaa !12 - %274 = fmul <8 x float> %broadcast.splat128, %wide.masked.load126.2 - %275 = bitcast float* %272 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %274, <8 x float>* %275, i32 4, <8 x i1> %268), !tbaa !12, !llvm.access.group !16 - %276 = trunc <8 x i64> %broadcast.splat123 to <8 x i32> - %277 = or <8 x i32> %276, - %278 = icmp sgt <8 x i32> %broadcast.splat125, %277 - %279 = extractelement <8 x i32> %277, i32 0 - %280 = add nsw i32 %mul.i.i.4, %279 - %281 = sext i32 %280 to i64 - %282 = getelementptr inbounds float, float* %16, i64 %281 - %283 = bitcast float* %282 to <8 x float>* - %wide.masked.load126.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %283, i32 4, <8 x i1> %278, <8 x float> undef), !tbaa !12 - %284 = fmul <8 x float> %broadcast.splat128, %wide.masked.load126.3 - %285 = bitcast float* %282 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %284, <8 x float>* %285, i32 4, <8 x i1> %278), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.4 - -pregion_for_entry.entry.i.i.us.4: ; preds = %if.end.i.i.us.4.3, %pregion_for_entry.entry.i.i.us.4.preheader - %_local_id_x.i.0.us.4 = phi i64 [ %537, %if.end.i.i.us.4.3 ], [ 0, %pregion_for_entry.entry.i.i.us.4.preheader ] - %add1.i.i.i.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.4, %mul.i.i.i - %conv.i.i.us.4 = trunc i64 %add1.i.i.i.us.4 to i32 - %cmp4.i.i.us.4 = icmp sgt i32 %28, %conv.i.i.us.4 - br i1 %cmp4.i.i.us.4, label %if.then.i.i.us.4, label %if.end.i.i.us.4 - -if.then.i.i.us.4: ; preds = %pregion_for_entry.entry.i.i.us.4 - %add.i.i.us.4 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4 - %idxprom.i.i.us.4 = sext i32 %add.i.i.us.4 to i64 - %arrayidx.i.i.us.4 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.4 - %286 = load float, float* %arrayidx.i.i.us.4, align 4, !tbaa !12 - %mul6.i.i.us.4 = fmul float %32, %286 - store float %mul6.i.i.us.4, float* %arrayidx.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4 - -if.end.i.i.us.4: ; preds = %if.then.i.i.us.4, %pregion_for_entry.entry.i.i.us.4 - %287 = or i64 %_local_id_x.i.0.us.4, 1 - %add1.i.i.i.us.4.1 = add nuw nsw i64 %287, %mul.i.i.i - %conv.i.i.us.4.1 = trunc i64 %add1.i.i.i.us.4.1 to i32 - %cmp4.i.i.us.4.1 = icmp sgt i32 %28, %conv.i.i.us.4.1 - br i1 %cmp4.i.i.us.4.1, label %if.then.i.i.us.4.1, label %if.end.i.i.us.4.1 - -pregion_for_end.i.i.4.loopexit: ; preds = %if.end.i.i.us.4.3 - br label %pregion_for_end.i.i.4 - -pregion_for_end.i.i.4: ; preds = %pregion_for_end.i.i.4.loopexit, %vector.ph115, %pregion_for_end.i.i.3 - %288 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.5 = or i32 %288, 5 - %cmp.i.i.5 = icmp sgt i32 %20, %conv2.i.i.5 - %mul.i.i.5 = mul nsw i32 %28, %conv2.i.i.5 - br i1 %cmp.i.i.5, label %vector.scevcheck136, label %pregion_for_end.i.i.5 - -vector.scevcheck136: ; preds = %pregion_for_end.i.i.4 - %289 = mul i32 %28, %conv2.i.i.5 - %290 = trunc i64 %2 to i32 - %291 = shl i32 %290, 5 - %292 = add i32 %289, %291 - %293 = icmp sgt i32 %292, 2147483616 - br i1 %293, label %pregion_for_entry.entry.i.i.us.5.preheader, label %vector.ph137 - -pregion_for_entry.entry.i.i.us.5.preheader: ; preds = %vector.scevcheck136 - br label %pregion_for_entry.entry.i.i.us.5 - -vector.ph137: ; preds = %vector.scevcheck136 - %broadcast.splatinsert144 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat145 = shufflevector <8 x i64> %broadcast.splatinsert144, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert146 = insertelement <8 x i32> undef, i32 %28, i32 0 - %broadcast.splat147 = shufflevector <8 x i32> %broadcast.splatinsert146, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert149 = insertelement <8 x float> undef, float %32, i32 0 - %broadcast.splat150 = shufflevector <8 x float> %broadcast.splatinsert149, <8 x float> undef, <8 x i32> zeroinitializer - %294 = trunc <8 x i64> %broadcast.splat145 to <8 x i32> - %295 = or <8 x i32> %294, - %296 = icmp sgt <8 x i32> %broadcast.splat147, %295 - %297 = extractelement <8 x i32> %295, i32 0 - %298 = add nsw i32 %mul.i.i.5, %297 - %299 = sext i32 %298 to i64 - %300 = getelementptr inbounds float, float* %16, i64 %299 - %301 = bitcast float* %300 to <8 x float>* - %wide.masked.load148 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %301, i32 4, <8 x i1> %296, <8 x float> undef), !tbaa !12 - %302 = fmul <8 x float> %broadcast.splat150, %wide.masked.load148 - %303 = bitcast float* %300 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %302, <8 x float>* %303, i32 4, <8 x i1> %296), !tbaa !12, !llvm.access.group !16 - %304 = trunc <8 x i64> %broadcast.splat145 to <8 x i32> - %305 = or <8 x i32> %304, - %306 = icmp sgt <8 x i32> %broadcast.splat147, %305 - %307 = extractelement <8 x i32> %305, i32 0 - %308 = add nsw i32 %mul.i.i.5, %307 - %309 = sext i32 %308 to i64 - %310 = getelementptr inbounds float, float* %16, i64 %309 - %311 = bitcast float* %310 to <8 x float>* - %wide.masked.load148.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %311, i32 4, <8 x i1> %306, <8 x float> undef), !tbaa !12 - %312 = fmul <8 x float> %broadcast.splat150, %wide.masked.load148.1 - %313 = bitcast float* %310 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %312, <8 x float>* %313, i32 4, <8 x i1> %306), !tbaa !12, !llvm.access.group !16 - %314 = trunc <8 x i64> %broadcast.splat145 to <8 x i32> - %315 = or <8 x i32> %314, - %316 = icmp sgt <8 x i32> %broadcast.splat147, %315 - %317 = extractelement <8 x i32> %315, i32 0 - %318 = add nsw i32 %mul.i.i.5, %317 - %319 = sext i32 %318 to i64 - %320 = getelementptr inbounds float, float* %16, i64 %319 - %321 = bitcast float* %320 to <8 x float>* - %wide.masked.load148.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %321, i32 4, <8 x i1> %316, <8 x float> undef), !tbaa !12 - %322 = fmul <8 x float> %broadcast.splat150, %wide.masked.load148.2 - %323 = bitcast float* %320 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %322, <8 x float>* %323, i32 4, <8 x i1> %316), !tbaa !12, !llvm.access.group !16 - %324 = trunc <8 x i64> %broadcast.splat145 to <8 x i32> - %325 = or <8 x i32> %324, - %326 = icmp sgt <8 x i32> %broadcast.splat147, %325 - %327 = extractelement <8 x i32> %325, i32 0 - %328 = add nsw i32 %mul.i.i.5, %327 - %329 = sext i32 %328 to i64 - %330 = getelementptr inbounds float, float* %16, i64 %329 - %331 = bitcast float* %330 to <8 x float>* - %wide.masked.load148.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %331, i32 4, <8 x i1> %326, <8 x float> undef), !tbaa !12 - %332 = fmul <8 x float> %broadcast.splat150, %wide.masked.load148.3 - %333 = bitcast float* %330 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %332, <8 x float>* %333, i32 4, <8 x i1> %326), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.5 - -pregion_for_entry.entry.i.i.us.5: ; preds = %if.end.i.i.us.5.3, %pregion_for_entry.entry.i.i.us.5.preheader - %_local_id_x.i.0.us.5 = phi i64 [ %531, %if.end.i.i.us.5.3 ], [ 0, %pregion_for_entry.entry.i.i.us.5.preheader ] - %add1.i.i.i.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.5, %mul.i.i.i - %conv.i.i.us.5 = trunc i64 %add1.i.i.i.us.5 to i32 - %cmp4.i.i.us.5 = icmp sgt i32 %28, %conv.i.i.us.5 - br i1 %cmp4.i.i.us.5, label %if.then.i.i.us.5, label %if.end.i.i.us.5 - -if.then.i.i.us.5: ; preds = %pregion_for_entry.entry.i.i.us.5 - %add.i.i.us.5 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5 - %idxprom.i.i.us.5 = sext i32 %add.i.i.us.5 to i64 - %arrayidx.i.i.us.5 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.5 - %334 = load float, float* %arrayidx.i.i.us.5, align 4, !tbaa !12 - %mul6.i.i.us.5 = fmul float %32, %334 - store float %mul6.i.i.us.5, float* %arrayidx.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5 - -if.end.i.i.us.5: ; preds = %if.then.i.i.us.5, %pregion_for_entry.entry.i.i.us.5 - %335 = or i64 %_local_id_x.i.0.us.5, 1 - %add1.i.i.i.us.5.1 = add nuw nsw i64 %335, %mul.i.i.i - %conv.i.i.us.5.1 = trunc i64 %add1.i.i.i.us.5.1 to i32 - %cmp4.i.i.us.5.1 = icmp sgt i32 %28, %conv.i.i.us.5.1 - br i1 %cmp4.i.i.us.5.1, label %if.then.i.i.us.5.1, label %if.end.i.i.us.5.1 - -pregion_for_end.i.i.5.loopexit: ; preds = %if.end.i.i.us.5.3 - br label %pregion_for_end.i.i.5 - -pregion_for_end.i.i.5: ; preds = %pregion_for_end.i.i.5.loopexit, %vector.ph137, %pregion_for_end.i.i.4 - %336 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.6 = or i32 %336, 6 - %cmp.i.i.6 = icmp sgt i32 %20, %conv2.i.i.6 - %mul.i.i.6 = mul nsw i32 %28, %conv2.i.i.6 - br i1 %cmp.i.i.6, label %vector.scevcheck158, label %pregion_for_end.i.i.6 - -vector.scevcheck158: ; preds = %pregion_for_end.i.i.5 - %337 = mul i32 %28, %conv2.i.i.6 - %338 = trunc i64 %2 to i32 - %339 = shl i32 %338, 5 - %340 = add i32 %337, %339 - %341 = icmp sgt i32 %340, 2147483616 - br i1 %341, label %pregion_for_entry.entry.i.i.us.6.preheader, label %vector.ph159 - -pregion_for_entry.entry.i.i.us.6.preheader: ; preds = %vector.scevcheck158 - br label %pregion_for_entry.entry.i.i.us.6 - -vector.ph159: ; preds = %vector.scevcheck158 - %broadcast.splatinsert166 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat167 = shufflevector <8 x i64> %broadcast.splatinsert166, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert168 = insertelement <8 x i32> undef, i32 %28, i32 0 - %broadcast.splat169 = shufflevector <8 x i32> %broadcast.splatinsert168, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert171 = insertelement <8 x float> undef, float %32, i32 0 - %broadcast.splat172 = shufflevector <8 x float> %broadcast.splatinsert171, <8 x float> undef, <8 x i32> zeroinitializer - %342 = trunc <8 x i64> %broadcast.splat167 to <8 x i32> - %343 = or <8 x i32> %342, - %344 = icmp sgt <8 x i32> %broadcast.splat169, %343 - %345 = extractelement <8 x i32> %343, i32 0 - %346 = add nsw i32 %mul.i.i.6, %345 - %347 = sext i32 %346 to i64 - %348 = getelementptr inbounds float, float* %16, i64 %347 - %349 = bitcast float* %348 to <8 x float>* - %wide.masked.load170 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %349, i32 4, <8 x i1> %344, <8 x float> undef), !tbaa !12 - %350 = fmul <8 x float> %broadcast.splat172, %wide.masked.load170 - %351 = bitcast float* %348 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %350, <8 x float>* %351, i32 4, <8 x i1> %344), !tbaa !12, !llvm.access.group !16 - %352 = trunc <8 x i64> %broadcast.splat167 to <8 x i32> - %353 = or <8 x i32> %352, - %354 = icmp sgt <8 x i32> %broadcast.splat169, %353 - %355 = extractelement <8 x i32> %353, i32 0 - %356 = add nsw i32 %mul.i.i.6, %355 - %357 = sext i32 %356 to i64 - %358 = getelementptr inbounds float, float* %16, i64 %357 - %359 = bitcast float* %358 to <8 x float>* - %wide.masked.load170.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %359, i32 4, <8 x i1> %354, <8 x float> undef), !tbaa !12 - %360 = fmul <8 x float> %broadcast.splat172, %wide.masked.load170.1 - %361 = bitcast float* %358 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %360, <8 x float>* %361, i32 4, <8 x i1> %354), !tbaa !12, !llvm.access.group !16 - %362 = trunc <8 x i64> %broadcast.splat167 to <8 x i32> - %363 = or <8 x i32> %362, - %364 = icmp sgt <8 x i32> %broadcast.splat169, %363 - %365 = extractelement <8 x i32> %363, i32 0 - %366 = add nsw i32 %mul.i.i.6, %365 - %367 = sext i32 %366 to i64 - %368 = getelementptr inbounds float, float* %16, i64 %367 - %369 = bitcast float* %368 to <8 x float>* - %wide.masked.load170.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %369, i32 4, <8 x i1> %364, <8 x float> undef), !tbaa !12 - %370 = fmul <8 x float> %broadcast.splat172, %wide.masked.load170.2 - %371 = bitcast float* %368 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %370, <8 x float>* %371, i32 4, <8 x i1> %364), !tbaa !12, !llvm.access.group !16 - %372 = trunc <8 x i64> %broadcast.splat167 to <8 x i32> - %373 = or <8 x i32> %372, - %374 = icmp sgt <8 x i32> %broadcast.splat169, %373 - %375 = extractelement <8 x i32> %373, i32 0 - %376 = add nsw i32 %mul.i.i.6, %375 - %377 = sext i32 %376 to i64 - %378 = getelementptr inbounds float, float* %16, i64 %377 - %379 = bitcast float* %378 to <8 x float>* - %wide.masked.load170.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %379, i32 4, <8 x i1> %374, <8 x float> undef), !tbaa !12 - %380 = fmul <8 x float> %broadcast.splat172, %wide.masked.load170.3 - %381 = bitcast float* %378 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %380, <8 x float>* %381, i32 4, <8 x i1> %374), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.6 - -pregion_for_entry.entry.i.i.us.6: ; preds = %if.end.i.i.us.6.3, %pregion_for_entry.entry.i.i.us.6.preheader - %_local_id_x.i.0.us.6 = phi i64 [ %525, %if.end.i.i.us.6.3 ], [ 0, %pregion_for_entry.entry.i.i.us.6.preheader ] - %add1.i.i.i.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.6, %mul.i.i.i - %conv.i.i.us.6 = trunc i64 %add1.i.i.i.us.6 to i32 - %cmp4.i.i.us.6 = icmp sgt i32 %28, %conv.i.i.us.6 - br i1 %cmp4.i.i.us.6, label %if.then.i.i.us.6, label %if.end.i.i.us.6 - -if.then.i.i.us.6: ; preds = %pregion_for_entry.entry.i.i.us.6 - %add.i.i.us.6 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6 - %idxprom.i.i.us.6 = sext i32 %add.i.i.us.6 to i64 - %arrayidx.i.i.us.6 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.6 - %382 = load float, float* %arrayidx.i.i.us.6, align 4, !tbaa !12 - %mul6.i.i.us.6 = fmul float %32, %382 - store float %mul6.i.i.us.6, float* %arrayidx.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6 - -if.end.i.i.us.6: ; preds = %if.then.i.i.us.6, %pregion_for_entry.entry.i.i.us.6 - %383 = or i64 %_local_id_x.i.0.us.6, 1 - %add1.i.i.i.us.6.1 = add nuw nsw i64 %383, %mul.i.i.i - %conv.i.i.us.6.1 = trunc i64 %add1.i.i.i.us.6.1 to i32 - %cmp4.i.i.us.6.1 = icmp sgt i32 %28, %conv.i.i.us.6.1 - br i1 %cmp4.i.i.us.6.1, label %if.then.i.i.us.6.1, label %if.end.i.i.us.6.1 - -pregion_for_end.i.i.6.loopexit: ; preds = %if.end.i.i.us.6.3 - br label %pregion_for_end.i.i.6 - -pregion_for_end.i.i.6: ; preds = %pregion_for_end.i.i.6.loopexit, %vector.ph159, %pregion_for_end.i.i.5 - %384 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.7 = or i32 %384, 7 - %cmp.i.i.7 = icmp sgt i32 %20, %conv2.i.i.7 - %mul.i.i.7 = mul nsw i32 %28, %conv2.i.i.7 - br i1 %cmp.i.i.7, label %vector.scevcheck180, label %_pocl_kernel_mm2_kernel2.exit - -vector.scevcheck180: ; preds = %pregion_for_end.i.i.6 - %385 = mul i32 %28, %conv2.i.i.7 - %386 = trunc i64 %2 to i32 - %387 = shl i32 %386, 5 - %388 = add i32 %385, %387 - %389 = icmp sgt i32 %388, 2147483616 - br i1 %389, label %pregion_for_entry.entry.i.i.us.7.preheader, label %vector.ph181 - -pregion_for_entry.entry.i.i.us.7.preheader: ; preds = %vector.scevcheck180 - br label %pregion_for_entry.entry.i.i.us.7 - -vector.ph181: ; preds = %vector.scevcheck180 - %broadcast.splatinsert188 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat189 = shufflevector <8 x i64> %broadcast.splatinsert188, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert190 = insertelement <8 x i32> undef, i32 %28, i32 0 - %broadcast.splat191 = shufflevector <8 x i32> %broadcast.splatinsert190, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert193 = insertelement <8 x float> undef, float %32, i32 0 - %broadcast.splat194 = shufflevector <8 x float> %broadcast.splatinsert193, <8 x float> undef, <8 x i32> zeroinitializer - %390 = trunc <8 x i64> %broadcast.splat189 to <8 x i32> - %391 = or <8 x i32> %390, - %392 = icmp sgt <8 x i32> %broadcast.splat191, %391 - %393 = extractelement <8 x i32> %391, i32 0 - %394 = add nsw i32 %mul.i.i.7, %393 - %395 = sext i32 %394 to i64 - %396 = getelementptr inbounds float, float* %16, i64 %395 - %397 = bitcast float* %396 to <8 x float>* - %wide.masked.load192 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %397, i32 4, <8 x i1> %392, <8 x float> undef), !tbaa !12 - %398 = fmul <8 x float> %broadcast.splat194, %wide.masked.load192 - %399 = bitcast float* %396 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %398, <8 x float>* %399, i32 4, <8 x i1> %392), !tbaa !12, !llvm.access.group !16 - %400 = trunc <8 x i64> %broadcast.splat189 to <8 x i32> - %401 = or <8 x i32> %400, - %402 = icmp sgt <8 x i32> %broadcast.splat191, %401 - %403 = extractelement <8 x i32> %401, i32 0 - %404 = add nsw i32 %mul.i.i.7, %403 - %405 = sext i32 %404 to i64 - %406 = getelementptr inbounds float, float* %16, i64 %405 - %407 = bitcast float* %406 to <8 x float>* - %wide.masked.load192.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %407, i32 4, <8 x i1> %402, <8 x float> undef), !tbaa !12 - %408 = fmul <8 x float> %broadcast.splat194, %wide.masked.load192.1 - %409 = bitcast float* %406 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %408, <8 x float>* %409, i32 4, <8 x i1> %402), !tbaa !12, !llvm.access.group !16 - %410 = trunc <8 x i64> %broadcast.splat189 to <8 x i32> - %411 = or <8 x i32> %410, - %412 = icmp sgt <8 x i32> %broadcast.splat191, %411 - %413 = extractelement <8 x i32> %411, i32 0 - %414 = add nsw i32 %mul.i.i.7, %413 - %415 = sext i32 %414 to i64 - %416 = getelementptr inbounds float, float* %16, i64 %415 - %417 = bitcast float* %416 to <8 x float>* - %wide.masked.load192.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %417, i32 4, <8 x i1> %412, <8 x float> undef), !tbaa !12 - %418 = fmul <8 x float> %broadcast.splat194, %wide.masked.load192.2 - %419 = bitcast float* %416 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %418, <8 x float>* %419, i32 4, <8 x i1> %412), !tbaa !12, !llvm.access.group !16 - %420 = trunc <8 x i64> %broadcast.splat189 to <8 x i32> - %421 = or <8 x i32> %420, - %422 = icmp sgt <8 x i32> %broadcast.splat191, %421 - %423 = extractelement <8 x i32> %421, i32 0 - %424 = add nsw i32 %mul.i.i.7, %423 - %425 = sext i32 %424 to i64 - %426 = getelementptr inbounds float, float* %16, i64 %425 - %427 = bitcast float* %426 to <8 x float>* - %wide.masked.load192.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %427, i32 4, <8 x i1> %422, <8 x float> undef), !tbaa !12 - %428 = fmul <8 x float> %broadcast.splat194, %wide.masked.load192.3 - %429 = bitcast float* %426 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %428, <8 x float>* %429, i32 4, <8 x i1> %422), !tbaa !12, !llvm.access.group !16 - br label %_pocl_kernel_mm2_kernel2.exit - -pregion_for_entry.entry.i.i.us.7: ; preds = %if.end.i.i.us.7.3, %pregion_for_entry.entry.i.i.us.7.preheader - %_local_id_x.i.0.us.7 = phi i64 [ %519, %if.end.i.i.us.7.3 ], [ 0, %pregion_for_entry.entry.i.i.us.7.preheader ] - %add1.i.i.i.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.7, %mul.i.i.i - %conv.i.i.us.7 = trunc i64 %add1.i.i.i.us.7 to i32 - %cmp4.i.i.us.7 = icmp sgt i32 %28, %conv.i.i.us.7 - br i1 %cmp4.i.i.us.7, label %if.then.i.i.us.7, label %if.end.i.i.us.7 - -if.then.i.i.us.7: ; preds = %pregion_for_entry.entry.i.i.us.7 - %add.i.i.us.7 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7 - %idxprom.i.i.us.7 = sext i32 %add.i.i.us.7 to i64 - %arrayidx.i.i.us.7 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.7 - %430 = load float, float* %arrayidx.i.i.us.7, align 4, !tbaa !12 - %mul6.i.i.us.7 = fmul float %32, %430 - store float %mul6.i.i.us.7, float* %arrayidx.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7 - -if.end.i.i.us.7: ; preds = %if.then.i.i.us.7, %pregion_for_entry.entry.i.i.us.7 - %431 = or i64 %_local_id_x.i.0.us.7, 1 - %add1.i.i.i.us.7.1 = add nuw nsw i64 %431, %mul.i.i.i - %conv.i.i.us.7.1 = trunc i64 %add1.i.i.i.us.7.1 to i32 - %cmp4.i.i.us.7.1 = icmp sgt i32 %28, %conv.i.i.us.7.1 - br i1 %cmp4.i.i.us.7.1, label %if.then.i.i.us.7.1, label %if.end.i.i.us.7.1 - -pregion_for_entry.entry.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1, %pregion_for_entry.entry.i.i.us.us.1.preheader - %_local_id_x.i.0.us.us.1 = phi i64 [ %441, %if.end.i.i.us.us.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.1.preheader ] - %add1.i.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, %mul.i.i.i - %conv.i.i.us.us.1 = trunc i64 %add1.i.i.i.us.us.1 to i32 - %cmp4.i.i.us.us.1 = icmp sgt i32 %28, %conv.i.i.us.us.1 - br i1 %cmp4.i.i.us.us.1, label %if.then.i.i.us.us.1, label %if.end.i.i.us.us.1 - -if.then.i.i.us.us.1: ; preds = %pregion_for_entry.entry.i.i.us.us.1 - %add.i.i.us.us.1 = add nsw i32 %mul.i.i.us.1, %conv.i.i.us.us.1 - %idxprom.i.i.us.us.1 = sext i32 %add.i.i.us.us.1 to i64 - %arrayidx.i.i.us.us.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.1 - %432 = load float, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12 - %mul6.i.i.us.us.1 = fmul float %32, %432 - store float %mul6.i.i.us.us.1, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.1 = shl i64 %add1.i.i.i.us.us.1, 32 - %433 = ashr exact i64 %sext.i.i.us.us.1, 32 - br label %for.body.i.i.us.us.1 - -for.body.i.i.us.us.1: ; preds = %for.body.i.i.us.us.1, %if.then.i.i.us.us.1 - %indvars.iv.next.i.i3.us.us.1 = phi i64 [ %indvars.iv.next.i.i.us.us.1, %for.body.i.i.us.us.1 ], [ 0, %if.then.i.i.us.us.1 ] - %434 = phi float [ %440, %for.body.i.i.us.us.1 ], [ %mul6.i.i.us.us.1, %if.then.i.i.us.us.1 ] - %435 = add nsw i64 %indvars.iv.next.i.i3.us.us.1, %83 - %arrayidx12.i.i.us.us.1 = getelementptr inbounds float, float* %8, i64 %435 - %436 = load float, float* %arrayidx12.i.i.us.us.1, align 4, !tbaa !12 - %437 = mul nsw i64 %indvars.iv.next.i.i3.us.us.1, %33 - %438 = add nsw i64 %437, %433 - %arrayidx16.i.i.us.us.1 = getelementptr inbounds float, float* %12, i64 %438 - %439 = load float, float* %arrayidx16.i.i.us.us.1, align 4, !tbaa !12 - %440 = tail call float @llvm.fmuladd.f32(float %436, float %439, float %434) #2 - store float %440, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.1, 1 - %exitcond.not.i.i.us.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.1, label %if.end.i.i.us.us.1.loopexit, label %for.body.i.i.us.us.1, !llvm.loop !21 - -if.end.i.i.us.us.1.loopexit: ; preds = %for.body.i.i.us.us.1 - br label %if.end.i.i.us.us.1 - -if.end.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.loopexit, %pregion_for_entry.entry.i.i.us.us.1 - %441 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, 1 - %exitcond.not.1 = icmp eq i64 %441, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.us.1.loopexit, label %pregion_for_entry.entry.i.i.us.us.1, !llvm.loop !19 - -pregion_for_end.i.i.us.1.loopexit: ; preds = %if.end.i.i.us.us.1 - br label %pregion_for_end.i.i.us.1 - -pregion_for_end.i.i.us.1: ; preds = %pregion_for_end.i.i.us.1.loopexit, %pregion_for_end.i.i.us - %442 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.2 = or i32 %442, 2 - %cmp.i.i.us.2 = icmp sgt i32 %20, %conv2.i.i.us.2 - %mul.i.i.us.2 = mul nsw i32 %28, %conv2.i.i.us.2 - %mul9.i.i.us.2 = mul nsw i32 %24, %conv2.i.i.us.2 - %443 = sext i32 %mul9.i.i.us.2 to i64 - br i1 %cmp.i.i.us.2, label %pregion_for_entry.entry.i.i.us.us.2.preheader, label %pregion_for_end.i.i.us.2 - -pregion_for_entry.entry.i.i.us.us.2.preheader: ; preds = %pregion_for_end.i.i.us.1 - br label %pregion_for_entry.entry.i.i.us.us.2 - -pregion_for_entry.entry.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2, %pregion_for_entry.entry.i.i.us.us.2.preheader - %_local_id_x.i.0.us.us.2 = phi i64 [ %453, %if.end.i.i.us.us.2 ], [ 0, %pregion_for_entry.entry.i.i.us.us.2.preheader ] - %add1.i.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, %mul.i.i.i - %conv.i.i.us.us.2 = trunc i64 %add1.i.i.i.us.us.2 to i32 - %cmp4.i.i.us.us.2 = icmp sgt i32 %28, %conv.i.i.us.us.2 - br i1 %cmp4.i.i.us.us.2, label %if.then.i.i.us.us.2, label %if.end.i.i.us.us.2 - -if.then.i.i.us.us.2: ; preds = %pregion_for_entry.entry.i.i.us.us.2 - %add.i.i.us.us.2 = add nsw i32 %mul.i.i.us.2, %conv.i.i.us.us.2 - %idxprom.i.i.us.us.2 = sext i32 %add.i.i.us.us.2 to i64 - %arrayidx.i.i.us.us.2 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.2 - %444 = load float, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12 - %mul6.i.i.us.us.2 = fmul float %32, %444 - store float %mul6.i.i.us.us.2, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.2 = shl i64 %add1.i.i.i.us.us.2, 32 - %445 = ashr exact i64 %sext.i.i.us.us.2, 32 - br label %for.body.i.i.us.us.2 - -for.body.i.i.us.us.2: ; preds = %for.body.i.i.us.us.2, %if.then.i.i.us.us.2 - %indvars.iv.next.i.i3.us.us.2 = phi i64 [ %indvars.iv.next.i.i.us.us.2, %for.body.i.i.us.us.2 ], [ 0, %if.then.i.i.us.us.2 ] - %446 = phi float [ %452, %for.body.i.i.us.us.2 ], [ %mul6.i.i.us.us.2, %if.then.i.i.us.us.2 ] - %447 = add nsw i64 %indvars.iv.next.i.i3.us.us.2, %443 - %arrayidx12.i.i.us.us.2 = getelementptr inbounds float, float* %8, i64 %447 - %448 = load float, float* %arrayidx12.i.i.us.us.2, align 4, !tbaa !12 - %449 = mul nsw i64 %indvars.iv.next.i.i3.us.us.2, %33 - %450 = add nsw i64 %449, %445 - %arrayidx16.i.i.us.us.2 = getelementptr inbounds float, float* %12, i64 %450 - %451 = load float, float* %arrayidx16.i.i.us.us.2, align 4, !tbaa !12 - %452 = tail call float @llvm.fmuladd.f32(float %448, float %451, float %446) #2 - store float %452, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.2 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.2, 1 - %exitcond.not.i.i.us.us.2 = icmp eq i64 %indvars.iv.next.i.i.us.us.2, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.2, label %if.end.i.i.us.us.2.loopexit, label %for.body.i.i.us.us.2, !llvm.loop !21 - -if.end.i.i.us.us.2.loopexit: ; preds = %for.body.i.i.us.us.2 - br label %if.end.i.i.us.us.2 - -if.end.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.loopexit, %pregion_for_entry.entry.i.i.us.us.2 - %453 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, 1 - %exitcond.not.2 = icmp eq i64 %453, 32 - br i1 %exitcond.not.2, label %pregion_for_end.i.i.us.2.loopexit, label %pregion_for_entry.entry.i.i.us.us.2, !llvm.loop !19 - -pregion_for_end.i.i.us.2.loopexit: ; preds = %if.end.i.i.us.us.2 - br label %pregion_for_end.i.i.us.2 - -pregion_for_end.i.i.us.2: ; preds = %pregion_for_end.i.i.us.2.loopexit, %pregion_for_end.i.i.us.1 - %454 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.3 = or i32 %454, 3 - %cmp.i.i.us.3 = icmp sgt i32 %20, %conv2.i.i.us.3 - %mul.i.i.us.3 = mul nsw i32 %28, %conv2.i.i.us.3 - %mul9.i.i.us.3 = mul nsw i32 %24, %conv2.i.i.us.3 - %455 = sext i32 %mul9.i.i.us.3 to i64 - br i1 %cmp.i.i.us.3, label %pregion_for_entry.entry.i.i.us.us.3.preheader, label %pregion_for_end.i.i.us.3 - -pregion_for_entry.entry.i.i.us.us.3.preheader: ; preds = %pregion_for_end.i.i.us.2 - br label %pregion_for_entry.entry.i.i.us.us.3 - -pregion_for_entry.entry.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3, %pregion_for_entry.entry.i.i.us.us.3.preheader - %_local_id_x.i.0.us.us.3 = phi i64 [ %465, %if.end.i.i.us.us.3 ], [ 0, %pregion_for_entry.entry.i.i.us.us.3.preheader ] - %add1.i.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, %mul.i.i.i - %conv.i.i.us.us.3 = trunc i64 %add1.i.i.i.us.us.3 to i32 - %cmp4.i.i.us.us.3 = icmp sgt i32 %28, %conv.i.i.us.us.3 - br i1 %cmp4.i.i.us.us.3, label %if.then.i.i.us.us.3, label %if.end.i.i.us.us.3 - -if.then.i.i.us.us.3: ; preds = %pregion_for_entry.entry.i.i.us.us.3 - %add.i.i.us.us.3 = add nsw i32 %mul.i.i.us.3, %conv.i.i.us.us.3 - %idxprom.i.i.us.us.3 = sext i32 %add.i.i.us.us.3 to i64 - %arrayidx.i.i.us.us.3 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.3 - %456 = load float, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12 - %mul6.i.i.us.us.3 = fmul float %32, %456 - store float %mul6.i.i.us.us.3, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.3 = shl i64 %add1.i.i.i.us.us.3, 32 - %457 = ashr exact i64 %sext.i.i.us.us.3, 32 - br label %for.body.i.i.us.us.3 - -for.body.i.i.us.us.3: ; preds = %for.body.i.i.us.us.3, %if.then.i.i.us.us.3 - %indvars.iv.next.i.i3.us.us.3 = phi i64 [ %indvars.iv.next.i.i.us.us.3, %for.body.i.i.us.us.3 ], [ 0, %if.then.i.i.us.us.3 ] - %458 = phi float [ %464, %for.body.i.i.us.us.3 ], [ %mul6.i.i.us.us.3, %if.then.i.i.us.us.3 ] - %459 = add nsw i64 %indvars.iv.next.i.i3.us.us.3, %455 - %arrayidx12.i.i.us.us.3 = getelementptr inbounds float, float* %8, i64 %459 - %460 = load float, float* %arrayidx12.i.i.us.us.3, align 4, !tbaa !12 - %461 = mul nsw i64 %indvars.iv.next.i.i3.us.us.3, %33 - %462 = add nsw i64 %461, %457 - %arrayidx16.i.i.us.us.3 = getelementptr inbounds float, float* %12, i64 %462 - %463 = load float, float* %arrayidx16.i.i.us.us.3, align 4, !tbaa !12 - %464 = tail call float @llvm.fmuladd.f32(float %460, float %463, float %458) #2 - store float %464, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.3 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.3, 1 - %exitcond.not.i.i.us.us.3 = icmp eq i64 %indvars.iv.next.i.i.us.us.3, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.3, label %if.end.i.i.us.us.3.loopexit, label %for.body.i.i.us.us.3, !llvm.loop !21 - -if.end.i.i.us.us.3.loopexit: ; preds = %for.body.i.i.us.us.3 - br label %if.end.i.i.us.us.3 - -if.end.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.loopexit, %pregion_for_entry.entry.i.i.us.us.3 - %465 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, 1 - %exitcond.not.3 = icmp eq i64 %465, 32 - br i1 %exitcond.not.3, label %pregion_for_end.i.i.us.3.loopexit, label %pregion_for_entry.entry.i.i.us.us.3, !llvm.loop !19 - -pregion_for_end.i.i.us.3.loopexit: ; preds = %if.end.i.i.us.us.3 - br label %pregion_for_end.i.i.us.3 - -pregion_for_end.i.i.us.3: ; preds = %pregion_for_end.i.i.us.3.loopexit, %pregion_for_end.i.i.us.2 - %466 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.4 = or i32 %466, 4 - %cmp.i.i.us.4 = icmp sgt i32 %20, %conv2.i.i.us.4 - %mul.i.i.us.4 = mul nsw i32 %28, %conv2.i.i.us.4 - %mul9.i.i.us.4 = mul nsw i32 %24, %conv2.i.i.us.4 - %467 = sext i32 %mul9.i.i.us.4 to i64 - br i1 %cmp.i.i.us.4, label %pregion_for_entry.entry.i.i.us.us.4.preheader, label %pregion_for_end.i.i.us.4 - -pregion_for_entry.entry.i.i.us.us.4.preheader: ; preds = %pregion_for_end.i.i.us.3 - br label %pregion_for_entry.entry.i.i.us.us.4 - -pregion_for_entry.entry.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4, %pregion_for_entry.entry.i.i.us.us.4.preheader - %_local_id_x.i.0.us.us.4 = phi i64 [ %477, %if.end.i.i.us.us.4 ], [ 0, %pregion_for_entry.entry.i.i.us.us.4.preheader ] - %add1.i.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, %mul.i.i.i - %conv.i.i.us.us.4 = trunc i64 %add1.i.i.i.us.us.4 to i32 - %cmp4.i.i.us.us.4 = icmp sgt i32 %28, %conv.i.i.us.us.4 - br i1 %cmp4.i.i.us.us.4, label %if.then.i.i.us.us.4, label %if.end.i.i.us.us.4 - -if.then.i.i.us.us.4: ; preds = %pregion_for_entry.entry.i.i.us.us.4 - %add.i.i.us.us.4 = add nsw i32 %mul.i.i.us.4, %conv.i.i.us.us.4 - %idxprom.i.i.us.us.4 = sext i32 %add.i.i.us.us.4 to i64 - %arrayidx.i.i.us.us.4 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.4 - %468 = load float, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12 - %mul6.i.i.us.us.4 = fmul float %32, %468 - store float %mul6.i.i.us.us.4, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.4 = shl i64 %add1.i.i.i.us.us.4, 32 - %469 = ashr exact i64 %sext.i.i.us.us.4, 32 - br label %for.body.i.i.us.us.4 - -for.body.i.i.us.us.4: ; preds = %for.body.i.i.us.us.4, %if.then.i.i.us.us.4 - %indvars.iv.next.i.i3.us.us.4 = phi i64 [ %indvars.iv.next.i.i.us.us.4, %for.body.i.i.us.us.4 ], [ 0, %if.then.i.i.us.us.4 ] - %470 = phi float [ %476, %for.body.i.i.us.us.4 ], [ %mul6.i.i.us.us.4, %if.then.i.i.us.us.4 ] - %471 = add nsw i64 %indvars.iv.next.i.i3.us.us.4, %467 - %arrayidx12.i.i.us.us.4 = getelementptr inbounds float, float* %8, i64 %471 - %472 = load float, float* %arrayidx12.i.i.us.us.4, align 4, !tbaa !12 - %473 = mul nsw i64 %indvars.iv.next.i.i3.us.us.4, %33 - %474 = add nsw i64 %473, %469 - %arrayidx16.i.i.us.us.4 = getelementptr inbounds float, float* %12, i64 %474 - %475 = load float, float* %arrayidx16.i.i.us.us.4, align 4, !tbaa !12 - %476 = tail call float @llvm.fmuladd.f32(float %472, float %475, float %470) #2 - store float %476, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.4 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.4, 1 - %exitcond.not.i.i.us.us.4 = icmp eq i64 %indvars.iv.next.i.i.us.us.4, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.4, label %if.end.i.i.us.us.4.loopexit, label %for.body.i.i.us.us.4, !llvm.loop !21 - -if.end.i.i.us.us.4.loopexit: ; preds = %for.body.i.i.us.us.4 - br label %if.end.i.i.us.us.4 - -if.end.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.loopexit, %pregion_for_entry.entry.i.i.us.us.4 - %477 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, 1 - %exitcond.not.4 = icmp eq i64 %477, 32 - br i1 %exitcond.not.4, label %pregion_for_end.i.i.us.4.loopexit, label %pregion_for_entry.entry.i.i.us.us.4, !llvm.loop !19 - -pregion_for_end.i.i.us.4.loopexit: ; preds = %if.end.i.i.us.us.4 - br label %pregion_for_end.i.i.us.4 - -pregion_for_end.i.i.us.4: ; preds = %pregion_for_end.i.i.us.4.loopexit, %pregion_for_end.i.i.us.3 - %478 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.5 = or i32 %478, 5 - %cmp.i.i.us.5 = icmp sgt i32 %20, %conv2.i.i.us.5 - %mul.i.i.us.5 = mul nsw i32 %28, %conv2.i.i.us.5 - %mul9.i.i.us.5 = mul nsw i32 %24, %conv2.i.i.us.5 - %479 = sext i32 %mul9.i.i.us.5 to i64 - br i1 %cmp.i.i.us.5, label %pregion_for_entry.entry.i.i.us.us.5.preheader, label %pregion_for_end.i.i.us.5 - -pregion_for_entry.entry.i.i.us.us.5.preheader: ; preds = %pregion_for_end.i.i.us.4 - br label %pregion_for_entry.entry.i.i.us.us.5 - -pregion_for_entry.entry.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5, %pregion_for_entry.entry.i.i.us.us.5.preheader - %_local_id_x.i.0.us.us.5 = phi i64 [ %489, %if.end.i.i.us.us.5 ], [ 0, %pregion_for_entry.entry.i.i.us.us.5.preheader ] - %add1.i.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, %mul.i.i.i - %conv.i.i.us.us.5 = trunc i64 %add1.i.i.i.us.us.5 to i32 - %cmp4.i.i.us.us.5 = icmp sgt i32 %28, %conv.i.i.us.us.5 - br i1 %cmp4.i.i.us.us.5, label %if.then.i.i.us.us.5, label %if.end.i.i.us.us.5 - -if.then.i.i.us.us.5: ; preds = %pregion_for_entry.entry.i.i.us.us.5 - %add.i.i.us.us.5 = add nsw i32 %mul.i.i.us.5, %conv.i.i.us.us.5 - %idxprom.i.i.us.us.5 = sext i32 %add.i.i.us.us.5 to i64 - %arrayidx.i.i.us.us.5 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.5 - %480 = load float, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12 - %mul6.i.i.us.us.5 = fmul float %32, %480 - store float %mul6.i.i.us.us.5, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.5 = shl i64 %add1.i.i.i.us.us.5, 32 - %481 = ashr exact i64 %sext.i.i.us.us.5, 32 - br label %for.body.i.i.us.us.5 - -for.body.i.i.us.us.5: ; preds = %for.body.i.i.us.us.5, %if.then.i.i.us.us.5 - %indvars.iv.next.i.i3.us.us.5 = phi i64 [ %indvars.iv.next.i.i.us.us.5, %for.body.i.i.us.us.5 ], [ 0, %if.then.i.i.us.us.5 ] - %482 = phi float [ %488, %for.body.i.i.us.us.5 ], [ %mul6.i.i.us.us.5, %if.then.i.i.us.us.5 ] - %483 = add nsw i64 %indvars.iv.next.i.i3.us.us.5, %479 - %arrayidx12.i.i.us.us.5 = getelementptr inbounds float, float* %8, i64 %483 - %484 = load float, float* %arrayidx12.i.i.us.us.5, align 4, !tbaa !12 - %485 = mul nsw i64 %indvars.iv.next.i.i3.us.us.5, %33 - %486 = add nsw i64 %485, %481 - %arrayidx16.i.i.us.us.5 = getelementptr inbounds float, float* %12, i64 %486 - %487 = load float, float* %arrayidx16.i.i.us.us.5, align 4, !tbaa !12 - %488 = tail call float @llvm.fmuladd.f32(float %484, float %487, float %482) #2 - store float %488, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.5 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.5, 1 - %exitcond.not.i.i.us.us.5 = icmp eq i64 %indvars.iv.next.i.i.us.us.5, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.5, label %if.end.i.i.us.us.5.loopexit, label %for.body.i.i.us.us.5, !llvm.loop !21 - -if.end.i.i.us.us.5.loopexit: ; preds = %for.body.i.i.us.us.5 - br label %if.end.i.i.us.us.5 - -if.end.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.loopexit, %pregion_for_entry.entry.i.i.us.us.5 - %489 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, 1 - %exitcond.not.5 = icmp eq i64 %489, 32 - br i1 %exitcond.not.5, label %pregion_for_end.i.i.us.5.loopexit, label %pregion_for_entry.entry.i.i.us.us.5, !llvm.loop !19 - -pregion_for_end.i.i.us.5.loopexit: ; preds = %if.end.i.i.us.us.5 - br label %pregion_for_end.i.i.us.5 - -pregion_for_end.i.i.us.5: ; preds = %pregion_for_end.i.i.us.5.loopexit, %pregion_for_end.i.i.us.4 - %490 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.6 = or i32 %490, 6 - %cmp.i.i.us.6 = icmp sgt i32 %20, %conv2.i.i.us.6 - %mul.i.i.us.6 = mul nsw i32 %28, %conv2.i.i.us.6 - %mul9.i.i.us.6 = mul nsw i32 %24, %conv2.i.i.us.6 - %491 = sext i32 %mul9.i.i.us.6 to i64 - br i1 %cmp.i.i.us.6, label %pregion_for_entry.entry.i.i.us.us.6.preheader, label %pregion_for_end.i.i.us.6 - -pregion_for_entry.entry.i.i.us.us.6.preheader: ; preds = %pregion_for_end.i.i.us.5 - br label %pregion_for_entry.entry.i.i.us.us.6 - -pregion_for_entry.entry.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6, %pregion_for_entry.entry.i.i.us.us.6.preheader - %_local_id_x.i.0.us.us.6 = phi i64 [ %501, %if.end.i.i.us.us.6 ], [ 0, %pregion_for_entry.entry.i.i.us.us.6.preheader ] - %add1.i.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, %mul.i.i.i - %conv.i.i.us.us.6 = trunc i64 %add1.i.i.i.us.us.6 to i32 - %cmp4.i.i.us.us.6 = icmp sgt i32 %28, %conv.i.i.us.us.6 - br i1 %cmp4.i.i.us.us.6, label %if.then.i.i.us.us.6, label %if.end.i.i.us.us.6 - -if.then.i.i.us.us.6: ; preds = %pregion_for_entry.entry.i.i.us.us.6 - %add.i.i.us.us.6 = add nsw i32 %mul.i.i.us.6, %conv.i.i.us.us.6 - %idxprom.i.i.us.us.6 = sext i32 %add.i.i.us.us.6 to i64 - %arrayidx.i.i.us.us.6 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.6 - %492 = load float, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12 - %mul6.i.i.us.us.6 = fmul float %32, %492 - store float %mul6.i.i.us.us.6, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.6 = shl i64 %add1.i.i.i.us.us.6, 32 - %493 = ashr exact i64 %sext.i.i.us.us.6, 32 - br label %for.body.i.i.us.us.6 - -for.body.i.i.us.us.6: ; preds = %for.body.i.i.us.us.6, %if.then.i.i.us.us.6 - %indvars.iv.next.i.i3.us.us.6 = phi i64 [ %indvars.iv.next.i.i.us.us.6, %for.body.i.i.us.us.6 ], [ 0, %if.then.i.i.us.us.6 ] - %494 = phi float [ %500, %for.body.i.i.us.us.6 ], [ %mul6.i.i.us.us.6, %if.then.i.i.us.us.6 ] - %495 = add nsw i64 %indvars.iv.next.i.i3.us.us.6, %491 - %arrayidx12.i.i.us.us.6 = getelementptr inbounds float, float* %8, i64 %495 - %496 = load float, float* %arrayidx12.i.i.us.us.6, align 4, !tbaa !12 - %497 = mul nsw i64 %indvars.iv.next.i.i3.us.us.6, %33 - %498 = add nsw i64 %497, %493 - %arrayidx16.i.i.us.us.6 = getelementptr inbounds float, float* %12, i64 %498 - %499 = load float, float* %arrayidx16.i.i.us.us.6, align 4, !tbaa !12 - %500 = tail call float @llvm.fmuladd.f32(float %496, float %499, float %494) #2 - store float %500, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.6 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.6, 1 - %exitcond.not.i.i.us.us.6 = icmp eq i64 %indvars.iv.next.i.i.us.us.6, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.6, label %if.end.i.i.us.us.6.loopexit, label %for.body.i.i.us.us.6, !llvm.loop !21 - -if.end.i.i.us.us.6.loopexit: ; preds = %for.body.i.i.us.us.6 - br label %if.end.i.i.us.us.6 - -if.end.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.loopexit, %pregion_for_entry.entry.i.i.us.us.6 - %501 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, 1 - %exitcond.not.6 = icmp eq i64 %501, 32 - br i1 %exitcond.not.6, label %pregion_for_end.i.i.us.6.loopexit, label %pregion_for_entry.entry.i.i.us.us.6, !llvm.loop !19 - -pregion_for_end.i.i.us.6.loopexit: ; preds = %if.end.i.i.us.us.6 - br label %pregion_for_end.i.i.us.6 - -pregion_for_end.i.i.us.6: ; preds = %pregion_for_end.i.i.us.6.loopexit, %pregion_for_end.i.i.us.5 - %502 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.7 = or i32 %502, 7 - %cmp.i.i.us.7 = icmp sgt i32 %20, %conv2.i.i.us.7 - %mul.i.i.us.7 = mul nsw i32 %28, %conv2.i.i.us.7 - %mul9.i.i.us.7 = mul nsw i32 %24, %conv2.i.i.us.7 - %503 = sext i32 %mul9.i.i.us.7 to i64 - br i1 %cmp.i.i.us.7, label %pregion_for_entry.entry.i.i.us.us.7.preheader, label %_pocl_kernel_mm2_kernel2.exit - -pregion_for_entry.entry.i.i.us.us.7.preheader: ; preds = %pregion_for_end.i.i.us.6 - br label %pregion_for_entry.entry.i.i.us.us.7 - -pregion_for_entry.entry.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7, %pregion_for_entry.entry.i.i.us.us.7.preheader - %_local_id_x.i.0.us.us.7 = phi i64 [ %513, %if.end.i.i.us.us.7 ], [ 0, %pregion_for_entry.entry.i.i.us.us.7.preheader ] - %add1.i.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, %mul.i.i.i - %conv.i.i.us.us.7 = trunc i64 %add1.i.i.i.us.us.7 to i32 - %cmp4.i.i.us.us.7 = icmp sgt i32 %28, %conv.i.i.us.us.7 - br i1 %cmp4.i.i.us.us.7, label %if.then.i.i.us.us.7, label %if.end.i.i.us.us.7 - -if.then.i.i.us.us.7: ; preds = %pregion_for_entry.entry.i.i.us.us.7 - %add.i.i.us.us.7 = add nsw i32 %mul.i.i.us.7, %conv.i.i.us.us.7 - %idxprom.i.i.us.us.7 = sext i32 %add.i.i.us.us.7 to i64 - %arrayidx.i.i.us.us.7 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.7 - %504 = load float, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12 - %mul6.i.i.us.us.7 = fmul float %32, %504 - store float %mul6.i.i.us.us.7, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.7 = shl i64 %add1.i.i.i.us.us.7, 32 - %505 = ashr exact i64 %sext.i.i.us.us.7, 32 - br label %for.body.i.i.us.us.7 - -for.body.i.i.us.us.7: ; preds = %for.body.i.i.us.us.7, %if.then.i.i.us.us.7 - %indvars.iv.next.i.i3.us.us.7 = phi i64 [ %indvars.iv.next.i.i.us.us.7, %for.body.i.i.us.us.7 ], [ 0, %if.then.i.i.us.us.7 ] - %506 = phi float [ %512, %for.body.i.i.us.us.7 ], [ %mul6.i.i.us.us.7, %if.then.i.i.us.us.7 ] - %507 = add nsw i64 %indvars.iv.next.i.i3.us.us.7, %503 - %arrayidx12.i.i.us.us.7 = getelementptr inbounds float, float* %8, i64 %507 - %508 = load float, float* %arrayidx12.i.i.us.us.7, align 4, !tbaa !12 - %509 = mul nsw i64 %indvars.iv.next.i.i3.us.us.7, %33 - %510 = add nsw i64 %509, %505 - %arrayidx16.i.i.us.us.7 = getelementptr inbounds float, float* %12, i64 %510 - %511 = load float, float* %arrayidx16.i.i.us.us.7, align 4, !tbaa !12 - %512 = tail call float @llvm.fmuladd.f32(float %508, float %511, float %506) #2 - store float %512, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.7 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.7, 1 - %exitcond.not.i.i.us.us.7 = icmp eq i64 %indvars.iv.next.i.i.us.us.7, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.7, label %if.end.i.i.us.us.7.loopexit, label %for.body.i.i.us.us.7, !llvm.loop !21 - -if.end.i.i.us.us.7.loopexit: ; preds = %for.body.i.i.us.us.7 - br label %if.end.i.i.us.us.7 - -if.end.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.loopexit, %pregion_for_entry.entry.i.i.us.us.7 - %513 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, 1 - %exitcond.not.7 = icmp eq i64 %513, 32 - br i1 %exitcond.not.7, label %_pocl_kernel_mm2_kernel2.exit.loopexit, label %pregion_for_entry.entry.i.i.us.us.7, !llvm.loop !19 - -if.then.i.i.us.7.1: ; preds = %if.end.i.i.us.7 - %add.i.i.us.7.1 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7.1 - %idxprom.i.i.us.7.1 = sext i32 %add.i.i.us.7.1 to i64 - %arrayidx.i.i.us.7.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.7.1 - %514 = load float, float* %arrayidx.i.i.us.7.1, align 4, !tbaa !12 - %mul6.i.i.us.7.1 = fmul float %32, %514 - store float %mul6.i.i.us.7.1, float* %arrayidx.i.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7.1 - -if.end.i.i.us.7.1: ; preds = %if.then.i.i.us.7.1, %if.end.i.i.us.7 - %515 = or i64 %_local_id_x.i.0.us.7, 2 - %add1.i.i.i.us.7.2 = add nuw nsw i64 %515, %mul.i.i.i - %conv.i.i.us.7.2 = trunc i64 %add1.i.i.i.us.7.2 to i32 - %cmp4.i.i.us.7.2 = icmp sgt i32 %28, %conv.i.i.us.7.2 - br i1 %cmp4.i.i.us.7.2, label %if.then.i.i.us.7.2, label %if.end.i.i.us.7.2 - -if.then.i.i.us.7.2: ; preds = %if.end.i.i.us.7.1 - %add.i.i.us.7.2 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7.2 - %idxprom.i.i.us.7.2 = sext i32 %add.i.i.us.7.2 to i64 - %arrayidx.i.i.us.7.2 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.7.2 - %516 = load float, float* %arrayidx.i.i.us.7.2, align 4, !tbaa !12 - %mul6.i.i.us.7.2 = fmul float %32, %516 - store float %mul6.i.i.us.7.2, float* %arrayidx.i.i.us.7.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7.2 - -if.end.i.i.us.7.2: ; preds = %if.then.i.i.us.7.2, %if.end.i.i.us.7.1 - %517 = or i64 %_local_id_x.i.0.us.7, 3 - %add1.i.i.i.us.7.3 = add nuw nsw i64 %517, %mul.i.i.i - %conv.i.i.us.7.3 = trunc i64 %add1.i.i.i.us.7.3 to i32 - %cmp4.i.i.us.7.3 = icmp sgt i32 %28, %conv.i.i.us.7.3 - br i1 %cmp4.i.i.us.7.3, label %if.then.i.i.us.7.3, label %if.end.i.i.us.7.3 - -if.then.i.i.us.7.3: ; preds = %if.end.i.i.us.7.2 - %add.i.i.us.7.3 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7.3 - %idxprom.i.i.us.7.3 = sext i32 %add.i.i.us.7.3 to i64 - %arrayidx.i.i.us.7.3 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.7.3 - %518 = load float, float* %arrayidx.i.i.us.7.3, align 4, !tbaa !12 - %mul6.i.i.us.7.3 = fmul float %32, %518 - store float %mul6.i.i.us.7.3, float* %arrayidx.i.i.us.7.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7.3 - -if.end.i.i.us.7.3: ; preds = %if.then.i.i.us.7.3, %if.end.i.i.us.7.2 - %519 = add nuw nsw i64 %_local_id_x.i.0.us.7, 4 - %exitcond33.7.not.3 = icmp eq i64 %519, 32 - br i1 %exitcond33.7.not.3, label %_pocl_kernel_mm2_kernel2.exit.loopexit237, label %pregion_for_entry.entry.i.i.us.7, !llvm.loop !32 - -if.then.i.i.us.6.1: ; preds = %if.end.i.i.us.6 - %add.i.i.us.6.1 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6.1 - %idxprom.i.i.us.6.1 = sext i32 %add.i.i.us.6.1 to i64 - %arrayidx.i.i.us.6.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.6.1 - %520 = load float, float* %arrayidx.i.i.us.6.1, align 4, !tbaa !12 - %mul6.i.i.us.6.1 = fmul float %32, %520 - store float %mul6.i.i.us.6.1, float* %arrayidx.i.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6.1 - -if.end.i.i.us.6.1: ; preds = %if.then.i.i.us.6.1, %if.end.i.i.us.6 - %521 = or i64 %_local_id_x.i.0.us.6, 2 - %add1.i.i.i.us.6.2 = add nuw nsw i64 %521, %mul.i.i.i - %conv.i.i.us.6.2 = trunc i64 %add1.i.i.i.us.6.2 to i32 - %cmp4.i.i.us.6.2 = icmp sgt i32 %28, %conv.i.i.us.6.2 - br i1 %cmp4.i.i.us.6.2, label %if.then.i.i.us.6.2, label %if.end.i.i.us.6.2 - -if.then.i.i.us.6.2: ; preds = %if.end.i.i.us.6.1 - %add.i.i.us.6.2 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6.2 - %idxprom.i.i.us.6.2 = sext i32 %add.i.i.us.6.2 to i64 - %arrayidx.i.i.us.6.2 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.6.2 - %522 = load float, float* %arrayidx.i.i.us.6.2, align 4, !tbaa !12 - %mul6.i.i.us.6.2 = fmul float %32, %522 - store float %mul6.i.i.us.6.2, float* %arrayidx.i.i.us.6.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6.2 - -if.end.i.i.us.6.2: ; preds = %if.then.i.i.us.6.2, %if.end.i.i.us.6.1 - %523 = or i64 %_local_id_x.i.0.us.6, 3 - %add1.i.i.i.us.6.3 = add nuw nsw i64 %523, %mul.i.i.i - %conv.i.i.us.6.3 = trunc i64 %add1.i.i.i.us.6.3 to i32 - %cmp4.i.i.us.6.3 = icmp sgt i32 %28, %conv.i.i.us.6.3 - br i1 %cmp4.i.i.us.6.3, label %if.then.i.i.us.6.3, label %if.end.i.i.us.6.3 - -if.then.i.i.us.6.3: ; preds = %if.end.i.i.us.6.2 - %add.i.i.us.6.3 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6.3 - %idxprom.i.i.us.6.3 = sext i32 %add.i.i.us.6.3 to i64 - %arrayidx.i.i.us.6.3 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.6.3 - %524 = load float, float* %arrayidx.i.i.us.6.3, align 4, !tbaa !12 - %mul6.i.i.us.6.3 = fmul float %32, %524 - store float %mul6.i.i.us.6.3, float* %arrayidx.i.i.us.6.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6.3 - -if.end.i.i.us.6.3: ; preds = %if.then.i.i.us.6.3, %if.end.i.i.us.6.2 - %525 = add nuw nsw i64 %_local_id_x.i.0.us.6, 4 - %exitcond33.6.not.3 = icmp eq i64 %525, 32 - br i1 %exitcond33.6.not.3, label %pregion_for_end.i.i.6.loopexit, label %pregion_for_entry.entry.i.i.us.6, !llvm.loop !33 - -if.then.i.i.us.5.1: ; preds = %if.end.i.i.us.5 - %add.i.i.us.5.1 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5.1 - %idxprom.i.i.us.5.1 = sext i32 %add.i.i.us.5.1 to i64 - %arrayidx.i.i.us.5.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.5.1 - %526 = load float, float* %arrayidx.i.i.us.5.1, align 4, !tbaa !12 - %mul6.i.i.us.5.1 = fmul float %32, %526 - store float %mul6.i.i.us.5.1, float* %arrayidx.i.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5.1 - -if.end.i.i.us.5.1: ; preds = %if.then.i.i.us.5.1, %if.end.i.i.us.5 - %527 = or i64 %_local_id_x.i.0.us.5, 2 - %add1.i.i.i.us.5.2 = add nuw nsw i64 %527, %mul.i.i.i - %conv.i.i.us.5.2 = trunc i64 %add1.i.i.i.us.5.2 to i32 - %cmp4.i.i.us.5.2 = icmp sgt i32 %28, %conv.i.i.us.5.2 - br i1 %cmp4.i.i.us.5.2, label %if.then.i.i.us.5.2, label %if.end.i.i.us.5.2 - -if.then.i.i.us.5.2: ; preds = %if.end.i.i.us.5.1 - %add.i.i.us.5.2 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5.2 - %idxprom.i.i.us.5.2 = sext i32 %add.i.i.us.5.2 to i64 - %arrayidx.i.i.us.5.2 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.5.2 - %528 = load float, float* %arrayidx.i.i.us.5.2, align 4, !tbaa !12 - %mul6.i.i.us.5.2 = fmul float %32, %528 - store float %mul6.i.i.us.5.2, float* %arrayidx.i.i.us.5.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5.2 - -if.end.i.i.us.5.2: ; preds = %if.then.i.i.us.5.2, %if.end.i.i.us.5.1 - %529 = or i64 %_local_id_x.i.0.us.5, 3 - %add1.i.i.i.us.5.3 = add nuw nsw i64 %529, %mul.i.i.i - %conv.i.i.us.5.3 = trunc i64 %add1.i.i.i.us.5.3 to i32 - %cmp4.i.i.us.5.3 = icmp sgt i32 %28, %conv.i.i.us.5.3 - br i1 %cmp4.i.i.us.5.3, label %if.then.i.i.us.5.3, label %if.end.i.i.us.5.3 - -if.then.i.i.us.5.3: ; preds = %if.end.i.i.us.5.2 - %add.i.i.us.5.3 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5.3 - %idxprom.i.i.us.5.3 = sext i32 %add.i.i.us.5.3 to i64 - %arrayidx.i.i.us.5.3 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.5.3 - %530 = load float, float* %arrayidx.i.i.us.5.3, align 4, !tbaa !12 - %mul6.i.i.us.5.3 = fmul float %32, %530 - store float %mul6.i.i.us.5.3, float* %arrayidx.i.i.us.5.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5.3 - -if.end.i.i.us.5.3: ; preds = %if.then.i.i.us.5.3, %if.end.i.i.us.5.2 - %531 = add nuw nsw i64 %_local_id_x.i.0.us.5, 4 - %exitcond33.5.not.3 = icmp eq i64 %531, 32 - br i1 %exitcond33.5.not.3, label %pregion_for_end.i.i.5.loopexit, label %pregion_for_entry.entry.i.i.us.5, !llvm.loop !34 - -if.then.i.i.us.4.1: ; preds = %if.end.i.i.us.4 - %add.i.i.us.4.1 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4.1 - %idxprom.i.i.us.4.1 = sext i32 %add.i.i.us.4.1 to i64 - %arrayidx.i.i.us.4.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.4.1 - %532 = load float, float* %arrayidx.i.i.us.4.1, align 4, !tbaa !12 - %mul6.i.i.us.4.1 = fmul float %32, %532 - store float %mul6.i.i.us.4.1, float* %arrayidx.i.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4.1 - -if.end.i.i.us.4.1: ; preds = %if.then.i.i.us.4.1, %if.end.i.i.us.4 - %533 = or i64 %_local_id_x.i.0.us.4, 2 - %add1.i.i.i.us.4.2 = add nuw nsw i64 %533, %mul.i.i.i - %conv.i.i.us.4.2 = trunc i64 %add1.i.i.i.us.4.2 to i32 - %cmp4.i.i.us.4.2 = icmp sgt i32 %28, %conv.i.i.us.4.2 - br i1 %cmp4.i.i.us.4.2, label %if.then.i.i.us.4.2, label %if.end.i.i.us.4.2 - -if.then.i.i.us.4.2: ; preds = %if.end.i.i.us.4.1 - %add.i.i.us.4.2 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4.2 - %idxprom.i.i.us.4.2 = sext i32 %add.i.i.us.4.2 to i64 - %arrayidx.i.i.us.4.2 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.4.2 - %534 = load float, float* %arrayidx.i.i.us.4.2, align 4, !tbaa !12 - %mul6.i.i.us.4.2 = fmul float %32, %534 - store float %mul6.i.i.us.4.2, float* %arrayidx.i.i.us.4.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4.2 - -if.end.i.i.us.4.2: ; preds = %if.then.i.i.us.4.2, %if.end.i.i.us.4.1 - %535 = or i64 %_local_id_x.i.0.us.4, 3 - %add1.i.i.i.us.4.3 = add nuw nsw i64 %535, %mul.i.i.i - %conv.i.i.us.4.3 = trunc i64 %add1.i.i.i.us.4.3 to i32 - %cmp4.i.i.us.4.3 = icmp sgt i32 %28, %conv.i.i.us.4.3 - br i1 %cmp4.i.i.us.4.3, label %if.then.i.i.us.4.3, label %if.end.i.i.us.4.3 - -if.then.i.i.us.4.3: ; preds = %if.end.i.i.us.4.2 - %add.i.i.us.4.3 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4.3 - %idxprom.i.i.us.4.3 = sext i32 %add.i.i.us.4.3 to i64 - %arrayidx.i.i.us.4.3 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.4.3 - %536 = load float, float* %arrayidx.i.i.us.4.3, align 4, !tbaa !12 - %mul6.i.i.us.4.3 = fmul float %32, %536 - store float %mul6.i.i.us.4.3, float* %arrayidx.i.i.us.4.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4.3 - -if.end.i.i.us.4.3: ; preds = %if.then.i.i.us.4.3, %if.end.i.i.us.4.2 - %537 = add nuw nsw i64 %_local_id_x.i.0.us.4, 4 - %exitcond33.4.not.3 = icmp eq i64 %537, 32 - br i1 %exitcond33.4.not.3, label %pregion_for_end.i.i.4.loopexit, label %pregion_for_entry.entry.i.i.us.4, !llvm.loop !35 - -if.then.i.i.us.3.1: ; preds = %if.end.i.i.us.3 - %add.i.i.us.3.1 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3.1 - %idxprom.i.i.us.3.1 = sext i32 %add.i.i.us.3.1 to i64 - %arrayidx.i.i.us.3.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.3.1 - %538 = load float, float* %arrayidx.i.i.us.3.1, align 4, !tbaa !12 - %mul6.i.i.us.3.1 = fmul float %32, %538 - store float %mul6.i.i.us.3.1, float* %arrayidx.i.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3.1 - -if.end.i.i.us.3.1: ; preds = %if.then.i.i.us.3.1, %if.end.i.i.us.3 - %539 = or i64 %_local_id_x.i.0.us.3, 2 - %add1.i.i.i.us.3.2 = add nuw nsw i64 %539, %mul.i.i.i - %conv.i.i.us.3.2 = trunc i64 %add1.i.i.i.us.3.2 to i32 - %cmp4.i.i.us.3.2 = icmp sgt i32 %28, %conv.i.i.us.3.2 - br i1 %cmp4.i.i.us.3.2, label %if.then.i.i.us.3.2, label %if.end.i.i.us.3.2 - -if.then.i.i.us.3.2: ; preds = %if.end.i.i.us.3.1 - %add.i.i.us.3.2 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3.2 - %idxprom.i.i.us.3.2 = sext i32 %add.i.i.us.3.2 to i64 - %arrayidx.i.i.us.3.2 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.3.2 - %540 = load float, float* %arrayidx.i.i.us.3.2, align 4, !tbaa !12 - %mul6.i.i.us.3.2 = fmul float %32, %540 - store float %mul6.i.i.us.3.2, float* %arrayidx.i.i.us.3.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3.2 - -if.end.i.i.us.3.2: ; preds = %if.then.i.i.us.3.2, %if.end.i.i.us.3.1 - %541 = or i64 %_local_id_x.i.0.us.3, 3 - %add1.i.i.i.us.3.3 = add nuw nsw i64 %541, %mul.i.i.i - %conv.i.i.us.3.3 = trunc i64 %add1.i.i.i.us.3.3 to i32 - %cmp4.i.i.us.3.3 = icmp sgt i32 %28, %conv.i.i.us.3.3 - br i1 %cmp4.i.i.us.3.3, label %if.then.i.i.us.3.3, label %if.end.i.i.us.3.3 - -if.then.i.i.us.3.3: ; preds = %if.end.i.i.us.3.2 - %add.i.i.us.3.3 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3.3 - %idxprom.i.i.us.3.3 = sext i32 %add.i.i.us.3.3 to i64 - %arrayidx.i.i.us.3.3 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.3.3 - %542 = load float, float* %arrayidx.i.i.us.3.3, align 4, !tbaa !12 - %mul6.i.i.us.3.3 = fmul float %32, %542 - store float %mul6.i.i.us.3.3, float* %arrayidx.i.i.us.3.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3.3 - -if.end.i.i.us.3.3: ; preds = %if.then.i.i.us.3.3, %if.end.i.i.us.3.2 - %543 = add nuw nsw i64 %_local_id_x.i.0.us.3, 4 - %exitcond33.3.not.3 = icmp eq i64 %543, 32 - br i1 %exitcond33.3.not.3, label %pregion_for_end.i.i.3.loopexit, label %pregion_for_entry.entry.i.i.us.3, !llvm.loop !36 - -if.then.i.i.us.2.1: ; preds = %if.end.i.i.us.2 - %add.i.i.us.2.1 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2.1 - %idxprom.i.i.us.2.1 = sext i32 %add.i.i.us.2.1 to i64 - %arrayidx.i.i.us.2.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.2.1 - %544 = load float, float* %arrayidx.i.i.us.2.1, align 4, !tbaa !12 - %mul6.i.i.us.2.1 = fmul float %32, %544 - store float %mul6.i.i.us.2.1, float* %arrayidx.i.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2.1 - -if.end.i.i.us.2.1: ; preds = %if.then.i.i.us.2.1, %if.end.i.i.us.2 - %545 = or i64 %_local_id_x.i.0.us.2, 2 - %add1.i.i.i.us.2.2 = add nuw nsw i64 %545, %mul.i.i.i - %conv.i.i.us.2.2 = trunc i64 %add1.i.i.i.us.2.2 to i32 - %cmp4.i.i.us.2.2 = icmp sgt i32 %28, %conv.i.i.us.2.2 - br i1 %cmp4.i.i.us.2.2, label %if.then.i.i.us.2.2, label %if.end.i.i.us.2.2 - -if.then.i.i.us.2.2: ; preds = %if.end.i.i.us.2.1 - %add.i.i.us.2.2 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2.2 - %idxprom.i.i.us.2.2 = sext i32 %add.i.i.us.2.2 to i64 - %arrayidx.i.i.us.2.2 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.2.2 - %546 = load float, float* %arrayidx.i.i.us.2.2, align 4, !tbaa !12 - %mul6.i.i.us.2.2 = fmul float %32, %546 - store float %mul6.i.i.us.2.2, float* %arrayidx.i.i.us.2.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2.2 - -if.end.i.i.us.2.2: ; preds = %if.then.i.i.us.2.2, %if.end.i.i.us.2.1 - %547 = or i64 %_local_id_x.i.0.us.2, 3 - %add1.i.i.i.us.2.3 = add nuw nsw i64 %547, %mul.i.i.i - %conv.i.i.us.2.3 = trunc i64 %add1.i.i.i.us.2.3 to i32 - %cmp4.i.i.us.2.3 = icmp sgt i32 %28, %conv.i.i.us.2.3 - br i1 %cmp4.i.i.us.2.3, label %if.then.i.i.us.2.3, label %if.end.i.i.us.2.3 - -if.then.i.i.us.2.3: ; preds = %if.end.i.i.us.2.2 - %add.i.i.us.2.3 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2.3 - %idxprom.i.i.us.2.3 = sext i32 %add.i.i.us.2.3 to i64 - %arrayidx.i.i.us.2.3 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.2.3 - %548 = load float, float* %arrayidx.i.i.us.2.3, align 4, !tbaa !12 - %mul6.i.i.us.2.3 = fmul float %32, %548 - store float %mul6.i.i.us.2.3, float* %arrayidx.i.i.us.2.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2.3 - -if.end.i.i.us.2.3: ; preds = %if.then.i.i.us.2.3, %if.end.i.i.us.2.2 - %549 = add nuw nsw i64 %_local_id_x.i.0.us.2, 4 - %exitcond33.2.not.3 = icmp eq i64 %549, 32 - br i1 %exitcond33.2.not.3, label %pregion_for_end.i.i.2.loopexit, label %pregion_for_entry.entry.i.i.us.2, !llvm.loop !37 - -if.then.i.i.us.1.1: ; preds = %if.end.i.i.us.1 - %add.i.i.us.1.1 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1.1 - %idxprom.i.i.us.1.1 = sext i32 %add.i.i.us.1.1 to i64 - %arrayidx.i.i.us.1.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.1.1 - %550 = load float, float* %arrayidx.i.i.us.1.1, align 4, !tbaa !12 - %mul6.i.i.us.1.1 = fmul float %32, %550 - store float %mul6.i.i.us.1.1, float* %arrayidx.i.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1.1 - -if.end.i.i.us.1.1: ; preds = %if.then.i.i.us.1.1, %if.end.i.i.us.1 - %551 = or i64 %_local_id_x.i.0.us.1, 2 - %add1.i.i.i.us.1.2 = add nuw nsw i64 %551, %mul.i.i.i - %conv.i.i.us.1.2 = trunc i64 %add1.i.i.i.us.1.2 to i32 - %cmp4.i.i.us.1.2 = icmp sgt i32 %28, %conv.i.i.us.1.2 - br i1 %cmp4.i.i.us.1.2, label %if.then.i.i.us.1.2, label %if.end.i.i.us.1.2 - -if.then.i.i.us.1.2: ; preds = %if.end.i.i.us.1.1 - %add.i.i.us.1.2 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1.2 - %idxprom.i.i.us.1.2 = sext i32 %add.i.i.us.1.2 to i64 - %arrayidx.i.i.us.1.2 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.1.2 - %552 = load float, float* %arrayidx.i.i.us.1.2, align 4, !tbaa !12 - %mul6.i.i.us.1.2 = fmul float %32, %552 - store float %mul6.i.i.us.1.2, float* %arrayidx.i.i.us.1.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1.2 - -if.end.i.i.us.1.2: ; preds = %if.then.i.i.us.1.2, %if.end.i.i.us.1.1 - %553 = or i64 %_local_id_x.i.0.us.1, 3 - %add1.i.i.i.us.1.3 = add nuw nsw i64 %553, %mul.i.i.i - %conv.i.i.us.1.3 = trunc i64 %add1.i.i.i.us.1.3 to i32 - %cmp4.i.i.us.1.3 = icmp sgt i32 %28, %conv.i.i.us.1.3 - br i1 %cmp4.i.i.us.1.3, label %if.then.i.i.us.1.3, label %if.end.i.i.us.1.3 - -if.then.i.i.us.1.3: ; preds = %if.end.i.i.us.1.2 - %add.i.i.us.1.3 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1.3 - %idxprom.i.i.us.1.3 = sext i32 %add.i.i.us.1.3 to i64 - %arrayidx.i.i.us.1.3 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.1.3 - %554 = load float, float* %arrayidx.i.i.us.1.3, align 4, !tbaa !12 - %mul6.i.i.us.1.3 = fmul float %32, %554 - store float %mul6.i.i.us.1.3, float* %arrayidx.i.i.us.1.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1.3 - -if.end.i.i.us.1.3: ; preds = %if.then.i.i.us.1.3, %if.end.i.i.us.1.2 - %555 = add nuw nsw i64 %_local_id_x.i.0.us.1, 4 - %exitcond33.1.not.3 = icmp eq i64 %555, 32 - br i1 %exitcond33.1.not.3, label %pregion_for_end.i.i.1.loopexit, label %pregion_for_entry.entry.i.i.us.1, !llvm.loop !38 - -if.then.i.i.us.1213: ; preds = %if.end.i.i.us - %add.i.i.us.1209 = add nsw i32 %mul.i.i.us, %conv.i.i.us.1206 - %idxprom.i.i.us.1210 = sext i32 %add.i.i.us.1209 to i64 - %arrayidx.i.i.us.1211 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.1210 - %556 = load float, float* %arrayidx.i.i.us.1211, align 4, !tbaa !12 - %mul6.i.i.us.1212 = fmul float %32, %556 - store float %mul6.i.i.us.1212, float* %arrayidx.i.i.us.1211, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1214 - -if.end.i.i.us.1214: ; preds = %if.then.i.i.us.1213, %if.end.i.i.us - %557 = or i64 %_local_id_x.i.0.us, 2 - %add1.i.i.i.us.2216 = add nuw nsw i64 %557, %mul.i.i.i - %conv.i.i.us.2217 = trunc i64 %add1.i.i.i.us.2216 to i32 - %cmp4.i.i.us.2218 = icmp sgt i32 %28, %conv.i.i.us.2217 - br i1 %cmp4.i.i.us.2218, label %if.then.i.i.us.2224, label %if.end.i.i.us.2225 - -if.then.i.i.us.2224: ; preds = %if.end.i.i.us.1214 - %add.i.i.us.2220 = add nsw i32 %mul.i.i.us, %conv.i.i.us.2217 - %idxprom.i.i.us.2221 = sext i32 %add.i.i.us.2220 to i64 - %arrayidx.i.i.us.2222 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.2221 - %558 = load float, float* %arrayidx.i.i.us.2222, align 4, !tbaa !12 - %mul6.i.i.us.2223 = fmul float %32, %558 - store float %mul6.i.i.us.2223, float* %arrayidx.i.i.us.2222, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2225 - -if.end.i.i.us.2225: ; preds = %if.then.i.i.us.2224, %if.end.i.i.us.1214 - %559 = or i64 %_local_id_x.i.0.us, 3 - %add1.i.i.i.us.3227 = add nuw nsw i64 %559, %mul.i.i.i - %conv.i.i.us.3228 = trunc i64 %add1.i.i.i.us.3227 to i32 - %cmp4.i.i.us.3229 = icmp sgt i32 %28, %conv.i.i.us.3228 - br i1 %cmp4.i.i.us.3229, label %if.then.i.i.us.3235, label %if.end.i.i.us.3236 - -if.then.i.i.us.3235: ; preds = %if.end.i.i.us.2225 - %add.i.i.us.3231 = add nsw i32 %mul.i.i.us, %conv.i.i.us.3228 - %idxprom.i.i.us.3232 = sext i32 %add.i.i.us.3231 to i64 - %arrayidx.i.i.us.3233 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.3232 - %560 = load float, float* %arrayidx.i.i.us.3233, align 4, !tbaa !12 - %mul6.i.i.us.3234 = fmul float %32, %560 - store float %mul6.i.i.us.3234, float* %arrayidx.i.i.us.3233, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3236 - -if.end.i.i.us.3236: ; preds = %if.then.i.i.us.3235, %if.end.i.i.us.2225 - %561 = add nuw nsw i64 %_local_id_x.i.0.us, 4 - %exitcond33.not.3 = icmp eq i64 %561, 32 - br i1 %exitcond33.not.3, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !39 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_mm2_kernel2_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to float** - %13 = load float*, float** %12, align 8 - %14 = getelementptr i8*, i8** %0, i64 3 - %15 = bitcast i8** %14 to i32** - %16 = load i32*, i32** %15, align 8 - %17 = load i32, i32* %16, align 4 - %18 = getelementptr i8*, i8** %0, i64 4 - %19 = bitcast i8** %18 to i32** - %20 = load i32*, i32** %19, align 8 - %21 = load i32, i32* %20, align 4 - %22 = getelementptr i8*, i8** %0, i64 6 - %23 = bitcast i8** %22 to i32** - %24 = load i32*, i32** %23, align 8 - %25 = load i32, i32* %24, align 4 - %26 = getelementptr i8*, i8** %0, i64 8 - %27 = bitcast i8** %26 to float** - %28 = load float*, float** %27, align 8 - %29 = load float, float* %28, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %cmp739.i.i = icmp sgt i32 %21, 0 - %30 = sext i32 %25 to i64 - %wide.trip.count.i.i = zext i32 %21 to i64 - %conv2.i.i.us = trunc i64 %mul3.i.i.i to i32 - %cmp.i.i.us = icmp sgt i32 %17, %conv2.i.i.us - %mul.i.i.us = mul nsw i32 %25, %conv2.i.i.us - br i1 %cmp739.i.i, label %pregion_for_entry.pregion_for_init.i.i.us, label %pregion_for_entry.pregion_for_init.i.i.preheader - -pregion_for_entry.pregion_for_init.i.i.preheader: ; preds = %5 - br i1 %cmp.i.i.us, label %vector.scevcheck, label %pregion_for_end.i.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.i.preheader - %31 = trunc i64 %3 to i32 - %32 = mul i32 %25, %31 - %33 = shl i32 %32, 3 - %34 = trunc i64 %2 to i32 - %35 = shl i32 %34, 5 - %36 = add i32 %33, %35 - %37 = icmp sgt i32 %36, 2147483616 - br i1 %37, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.ph - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %vector.scevcheck - br label %pregion_for_entry.entry.i.i.us - -vector.ph: ; preds = %vector.scevcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert37 = insertelement <8 x i32> undef, i32 %25, i32 0 - %broadcast.splat38 = shufflevector <8 x i32> %broadcast.splatinsert37, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert39 = insertelement <8 x float> undef, float %29, i32 0 - %broadcast.splat40 = shufflevector <8 x float> %broadcast.splatinsert39, <8 x float> undef, <8 x i32> zeroinitializer - %38 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %39 = or <8 x i32> %38, - %40 = icmp sgt <8 x i32> %broadcast.splat38, %39 - %41 = extractelement <8 x i32> %39, i32 0 - %42 = add nsw i32 %mul.i.i.us, %41 - %43 = sext i32 %42 to i64 - %44 = getelementptr inbounds float, float* %13, i64 %43 - %45 = bitcast float* %44 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %45, i32 4, <8 x i1> %40, <8 x float> undef), !tbaa !12 - %46 = fmul <8 x float> %broadcast.splat40, %wide.masked.load - %47 = bitcast float* %44 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %46, <8 x float>* %47, i32 4, <8 x i1> %40), !tbaa !12, !llvm.access.group !16 - %48 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %49 = or <8 x i32> %48, - %50 = icmp sgt <8 x i32> %broadcast.splat38, %49 - %51 = extractelement <8 x i32> %49, i32 0 - %52 = add nsw i32 %mul.i.i.us, %51 - %53 = sext i32 %52 to i64 - %54 = getelementptr inbounds float, float* %13, i64 %53 - %55 = bitcast float* %54 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %55, i32 4, <8 x i1> %50, <8 x float> undef), !tbaa !12 - %56 = fmul <8 x float> %broadcast.splat40, %wide.masked.load.1 - %57 = bitcast float* %54 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %56, <8 x float>* %57, i32 4, <8 x i1> %50), !tbaa !12, !llvm.access.group !16 - %58 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %59 = or <8 x i32> %58, - %60 = icmp sgt <8 x i32> %broadcast.splat38, %59 - %61 = extractelement <8 x i32> %59, i32 0 - %62 = add nsw i32 %mul.i.i.us, %61 - %63 = sext i32 %62 to i64 - %64 = getelementptr inbounds float, float* %13, i64 %63 - %65 = bitcast float* %64 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %65, i32 4, <8 x i1> %60, <8 x float> undef), !tbaa !12 - %66 = fmul <8 x float> %broadcast.splat40, %wide.masked.load.2 - %67 = bitcast float* %64 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %66, <8 x float>* %67, i32 4, <8 x i1> %60), !tbaa !12, !llvm.access.group !16 - %68 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %69 = or <8 x i32> %68, - %70 = icmp sgt <8 x i32> %broadcast.splat38, %69 - %71 = extractelement <8 x i32> %69, i32 0 - %72 = add nsw i32 %mul.i.i.us, %71 - %73 = sext i32 %72 to i64 - %74 = getelementptr inbounds float, float* %13, i64 %73 - %75 = bitcast float* %74 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %75, i32 4, <8 x i1> %70, <8 x float> undef), !tbaa !12 - %76 = fmul <8 x float> %broadcast.splat40, %wide.masked.load.3 - %77 = bitcast float* %74 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %76, <8 x float>* %77, i32 4, <8 x i1> %70), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i - -pregion_for_entry.pregion_for_init.i.i.us: ; preds = %5 - %mul9.i.i.us = mul nsw i32 %21, %conv2.i.i.us - %78 = sext i32 %mul9.i.i.us to i64 - br i1 %cmp.i.i.us, label %pregion_for_entry.entry.i.i.us.us.preheader, label %pregion_for_end.i.i.us - -pregion_for_entry.entry.i.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i.us - br label %pregion_for_entry.entry.i.i.us.us - -pregion_for_end.i.i.us.loopexit: ; preds = %if.end.i.i.us.us - br label %pregion_for_end.i.i.us - -pregion_for_end.i.i.us: ; preds = %pregion_for_end.i.i.us.loopexit, %pregion_for_entry.pregion_for_init.i.i.us - %79 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.1 = or i32 %79, 1 - %cmp.i.i.us.1 = icmp sgt i32 %17, %conv2.i.i.us.1 - %mul.i.i.us.1 = mul nsw i32 %25, %conv2.i.i.us.1 - %mul9.i.i.us.1 = mul nsw i32 %21, %conv2.i.i.us.1 - %80 = sext i32 %mul9.i.i.us.1 to i64 - br i1 %cmp.i.i.us.1, label %pregion_for_entry.entry.i.i.us.us.1.preheader, label %pregion_for_end.i.i.us.1 - -pregion_for_entry.entry.i.i.us.us.1.preheader: ; preds = %pregion_for_end.i.i.us - br label %pregion_for_entry.entry.i.i.us.us.1 - -pregion_for_entry.entry.i.i.us.us: ; preds = %if.end.i.i.us.us, %pregion_for_entry.entry.i.i.us.us.preheader - %_local_id_x.i.0.us.us = phi i64 [ %83, %if.end.i.i.us.us ], [ 0, %pregion_for_entry.entry.i.i.us.us.preheader ] - %add1.i.i.i.us.us = add nuw nsw i64 %_local_id_x.i.0.us.us, %mul.i.i.i - %conv.i.i.us.us = trunc i64 %add1.i.i.i.us.us to i32 - %cmp4.i.i.us.us = icmp sgt i32 %25, %conv.i.i.us.us - br i1 %cmp4.i.i.us.us, label %if.then.i.i.us.us, label %if.end.i.i.us.us - -if.then.i.i.us.us: ; preds = %pregion_for_entry.entry.i.i.us.us - %add.i.i.us.us = add nsw i32 %mul.i.i.us, %conv.i.i.us.us - %idxprom.i.i.us.us = sext i32 %add.i.i.us.us to i64 - %arrayidx.i.i.us.us = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us - %81 = load float, float* %arrayidx.i.i.us.us, align 4, !tbaa !12 - %mul6.i.i.us.us = fmul float %29, %81 - store float %mul6.i.i.us.us, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us = shl i64 %add1.i.i.i.us.us, 32 - %82 = ashr exact i64 %sext.i.i.us.us, 32 - br label %for.body.i.i.us.us - -if.end.i.i.us.us.loopexit: ; preds = %for.body.i.i.us.us - br label %if.end.i.i.us.us - -if.end.i.i.us.us: ; preds = %if.end.i.i.us.us.loopexit, %pregion_for_entry.entry.i.i.us.us - %83 = add nuw nsw i64 %_local_id_x.i.0.us.us, 1 - %exitcond.not = icmp eq i64 %83, 32 - br i1 %exitcond.not, label %pregion_for_end.i.i.us.loopexit, label %pregion_for_entry.entry.i.i.us.us, !llvm.loop !19 - -for.body.i.i.us.us: ; preds = %for.body.i.i.us.us, %if.then.i.i.us.us - %indvars.iv.next.i.i3.us.us = phi i64 [ %indvars.iv.next.i.i.us.us, %for.body.i.i.us.us ], [ 0, %if.then.i.i.us.us ] - %84 = phi float [ %90, %for.body.i.i.us.us ], [ %mul6.i.i.us.us, %if.then.i.i.us.us ] - %85 = add nsw i64 %indvars.iv.next.i.i3.us.us, %78 - %arrayidx12.i.i.us.us = getelementptr inbounds float, float* %7, i64 %85 - %86 = load float, float* %arrayidx12.i.i.us.us, align 4, !tbaa !12 - %87 = mul nsw i64 %indvars.iv.next.i.i3.us.us, %30 - %88 = add nsw i64 %87, %82 - %arrayidx16.i.i.us.us = getelementptr inbounds float, float* %10, i64 %88 - %89 = load float, float* %arrayidx16.i.i.us.us, align 4, !tbaa !12 - %90 = tail call float @llvm.fmuladd.f32(float %86, float %89, float %84) #2 - store float %90, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us = add nuw nsw i64 %indvars.iv.next.i.i3.us.us, 1 - %exitcond.not.i.i.us.us = icmp eq i64 %indvars.iv.next.i.i.us.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us, label %if.end.i.i.us.us.loopexit, label %for.body.i.i.us.us, !llvm.loop !21 - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.i.i.us.3236, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %558, %if.end.i.i.us.3236 ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp4.i.i.us = icmp sgt i32 %25, %conv.i.i.us - br i1 %cmp4.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %add.i.i.us = add nsw i32 %mul.i.i.us, %conv.i.i.us - %idxprom.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us - %91 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %mul6.i.i.us = fmul float %29, %91 - store float %mul6.i.i.us, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us - %92 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1205 = add nuw nsw i64 %92, %mul.i.i.i - %conv.i.i.us.1206 = trunc i64 %add1.i.i.i.us.1205 to i32 - %cmp4.i.i.us.1207 = icmp sgt i32 %25, %conv.i.i.us.1206 - br i1 %cmp4.i.i.us.1207, label %if.then.i.i.us.1213, label %if.end.i.i.us.1214 - -pregion_for_end.i.i.loopexit: ; preds = %if.end.i.i.us.3236 - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i.i.preheader - %93 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.1 = or i32 %93, 1 - %cmp.i.i.1 = icmp sgt i32 %17, %conv2.i.i.1 - %mul.i.i.1 = mul nsw i32 %25, %conv2.i.i.1 - br i1 %cmp.i.i.1, label %vector.scevcheck48, label %pregion_for_end.i.i.1 - -vector.scevcheck48: ; preds = %pregion_for_end.i.i - %94 = mul i32 %25, %conv2.i.i.1 - %95 = trunc i64 %2 to i32 - %96 = shl i32 %95, 5 - %97 = add i32 %94, %96 - %98 = icmp sgt i32 %97, 2147483616 - br i1 %98, label %pregion_for_entry.entry.i.i.us.1.preheader, label %vector.ph49 - -pregion_for_entry.entry.i.i.us.1.preheader: ; preds = %vector.scevcheck48 - br label %pregion_for_entry.entry.i.i.us.1 - -vector.ph49: ; preds = %vector.scevcheck48 - %broadcast.splatinsert56 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat57 = shufflevector <8 x i64> %broadcast.splatinsert56, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert58 = insertelement <8 x i32> undef, i32 %25, i32 0 - %broadcast.splat59 = shufflevector <8 x i32> %broadcast.splatinsert58, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert61 = insertelement <8 x float> undef, float %29, i32 0 - %broadcast.splat62 = shufflevector <8 x float> %broadcast.splatinsert61, <8 x float> undef, <8 x i32> zeroinitializer - %99 = trunc <8 x i64> %broadcast.splat57 to <8 x i32> - %100 = or <8 x i32> %99, - %101 = icmp sgt <8 x i32> %broadcast.splat59, %100 - %102 = extractelement <8 x i32> %100, i32 0 - %103 = add nsw i32 %mul.i.i.1, %102 - %104 = sext i32 %103 to i64 - %105 = getelementptr inbounds float, float* %13, i64 %104 - %106 = bitcast float* %105 to <8 x float>* - %wide.masked.load60 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %106, i32 4, <8 x i1> %101, <8 x float> undef), !tbaa !12 - %107 = fmul <8 x float> %broadcast.splat62, %wide.masked.load60 - %108 = bitcast float* %105 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %107, <8 x float>* %108, i32 4, <8 x i1> %101), !tbaa !12, !llvm.access.group !16 - %109 = trunc <8 x i64> %broadcast.splat57 to <8 x i32> - %110 = or <8 x i32> %109, - %111 = icmp sgt <8 x i32> %broadcast.splat59, %110 - %112 = extractelement <8 x i32> %110, i32 0 - %113 = add nsw i32 %mul.i.i.1, %112 - %114 = sext i32 %113 to i64 - %115 = getelementptr inbounds float, float* %13, i64 %114 - %116 = bitcast float* %115 to <8 x float>* - %wide.masked.load60.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %116, i32 4, <8 x i1> %111, <8 x float> undef), !tbaa !12 - %117 = fmul <8 x float> %broadcast.splat62, %wide.masked.load60.1 - %118 = bitcast float* %115 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %117, <8 x float>* %118, i32 4, <8 x i1> %111), !tbaa !12, !llvm.access.group !16 - %119 = trunc <8 x i64> %broadcast.splat57 to <8 x i32> - %120 = or <8 x i32> %119, - %121 = icmp sgt <8 x i32> %broadcast.splat59, %120 - %122 = extractelement <8 x i32> %120, i32 0 - %123 = add nsw i32 %mul.i.i.1, %122 - %124 = sext i32 %123 to i64 - %125 = getelementptr inbounds float, float* %13, i64 %124 - %126 = bitcast float* %125 to <8 x float>* - %wide.masked.load60.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %126, i32 4, <8 x i1> %121, <8 x float> undef), !tbaa !12 - %127 = fmul <8 x float> %broadcast.splat62, %wide.masked.load60.2 - %128 = bitcast float* %125 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %127, <8 x float>* %128, i32 4, <8 x i1> %121), !tbaa !12, !llvm.access.group !16 - %129 = trunc <8 x i64> %broadcast.splat57 to <8 x i32> - %130 = or <8 x i32> %129, - %131 = icmp sgt <8 x i32> %broadcast.splat59, %130 - %132 = extractelement <8 x i32> %130, i32 0 - %133 = add nsw i32 %mul.i.i.1, %132 - %134 = sext i32 %133 to i64 - %135 = getelementptr inbounds float, float* %13, i64 %134 - %136 = bitcast float* %135 to <8 x float>* - %wide.masked.load60.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %136, i32 4, <8 x i1> %131, <8 x float> undef), !tbaa !12 - %137 = fmul <8 x float> %broadcast.splat62, %wide.masked.load60.3 - %138 = bitcast float* %135 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %137, <8 x float>* %138, i32 4, <8 x i1> %131), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.1 - -_pocl_kernel_mm2_kernel2.exit.loopexit: ; preds = %if.end.i.i.us.us.7 - br label %_pocl_kernel_mm2_kernel2.exit - -_pocl_kernel_mm2_kernel2.exit.loopexit237: ; preds = %if.end.i.i.us.7.3 - br label %_pocl_kernel_mm2_kernel2.exit - -_pocl_kernel_mm2_kernel2.exit: ; preds = %pregion_for_end.i.i.us.6, %vector.ph181, %pregion_for_end.i.i.6, %_pocl_kernel_mm2_kernel2.exit.loopexit237, %_pocl_kernel_mm2_kernel2.exit.loopexit - ret void - -pregion_for_entry.entry.i.i.us.1: ; preds = %if.end.i.i.us.1.3, %pregion_for_entry.entry.i.i.us.1.preheader - %_local_id_x.i.0.us.1 = phi i64 [ %552, %if.end.i.i.us.1.3 ], [ 0, %pregion_for_entry.entry.i.i.us.1.preheader ] - %add1.i.i.i.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.1, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp4.i.i.us.1 = icmp sgt i32 %25, %conv.i.i.us.1 - br i1 %cmp4.i.i.us.1, label %if.then.i.i.us.1, label %if.end.i.i.us.1 - -if.then.i.i.us.1: ; preds = %pregion_for_entry.entry.i.i.us.1 - %add.i.i.us.1 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1 - %idxprom.i.i.us.1 = sext i32 %add.i.i.us.1 to i64 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.1 - %139 = load float, float* %arrayidx.i.i.us.1, align 4, !tbaa !12 - %mul6.i.i.us.1 = fmul float %29, %139 - store float %mul6.i.i.us.1, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1 - -if.end.i.i.us.1: ; preds = %if.then.i.i.us.1, %pregion_for_entry.entry.i.i.us.1 - %140 = or i64 %_local_id_x.i.0.us.1, 1 - %add1.i.i.i.us.1.1 = add nuw nsw i64 %140, %mul.i.i.i - %conv.i.i.us.1.1 = trunc i64 %add1.i.i.i.us.1.1 to i32 - %cmp4.i.i.us.1.1 = icmp sgt i32 %25, %conv.i.i.us.1.1 - br i1 %cmp4.i.i.us.1.1, label %if.then.i.i.us.1.1, label %if.end.i.i.us.1.1 - -pregion_for_end.i.i.1.loopexit: ; preds = %if.end.i.i.us.1.3 - br label %pregion_for_end.i.i.1 - -pregion_for_end.i.i.1: ; preds = %pregion_for_end.i.i.1.loopexit, %vector.ph49, %pregion_for_end.i.i - %141 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.2 = or i32 %141, 2 - %cmp.i.i.2 = icmp sgt i32 %17, %conv2.i.i.2 - %mul.i.i.2 = mul nsw i32 %25, %conv2.i.i.2 - br i1 %cmp.i.i.2, label %vector.scevcheck70, label %pregion_for_end.i.i.2 - -vector.scevcheck70: ; preds = %pregion_for_end.i.i.1 - %142 = mul i32 %25, %conv2.i.i.2 - %143 = trunc i64 %2 to i32 - %144 = shl i32 %143, 5 - %145 = add i32 %142, %144 - %146 = icmp sgt i32 %145, 2147483616 - br i1 %146, label %pregion_for_entry.entry.i.i.us.2.preheader, label %vector.ph71 - -pregion_for_entry.entry.i.i.us.2.preheader: ; preds = %vector.scevcheck70 - br label %pregion_for_entry.entry.i.i.us.2 - -vector.ph71: ; preds = %vector.scevcheck70 - %broadcast.splatinsert78 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat79 = shufflevector <8 x i64> %broadcast.splatinsert78, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert80 = insertelement <8 x i32> undef, i32 %25, i32 0 - %broadcast.splat81 = shufflevector <8 x i32> %broadcast.splatinsert80, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert83 = insertelement <8 x float> undef, float %29, i32 0 - %broadcast.splat84 = shufflevector <8 x float> %broadcast.splatinsert83, <8 x float> undef, <8 x i32> zeroinitializer - %147 = trunc <8 x i64> %broadcast.splat79 to <8 x i32> - %148 = or <8 x i32> %147, - %149 = icmp sgt <8 x i32> %broadcast.splat81, %148 - %150 = extractelement <8 x i32> %148, i32 0 - %151 = add nsw i32 %mul.i.i.2, %150 - %152 = sext i32 %151 to i64 - %153 = getelementptr inbounds float, float* %13, i64 %152 - %154 = bitcast float* %153 to <8 x float>* - %wide.masked.load82 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %154, i32 4, <8 x i1> %149, <8 x float> undef), !tbaa !12 - %155 = fmul <8 x float> %broadcast.splat84, %wide.masked.load82 - %156 = bitcast float* %153 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %155, <8 x float>* %156, i32 4, <8 x i1> %149), !tbaa !12, !llvm.access.group !16 - %157 = trunc <8 x i64> %broadcast.splat79 to <8 x i32> - %158 = or <8 x i32> %157, - %159 = icmp sgt <8 x i32> %broadcast.splat81, %158 - %160 = extractelement <8 x i32> %158, i32 0 - %161 = add nsw i32 %mul.i.i.2, %160 - %162 = sext i32 %161 to i64 - %163 = getelementptr inbounds float, float* %13, i64 %162 - %164 = bitcast float* %163 to <8 x float>* - %wide.masked.load82.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %164, i32 4, <8 x i1> %159, <8 x float> undef), !tbaa !12 - %165 = fmul <8 x float> %broadcast.splat84, %wide.masked.load82.1 - %166 = bitcast float* %163 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %165, <8 x float>* %166, i32 4, <8 x i1> %159), !tbaa !12, !llvm.access.group !16 - %167 = trunc <8 x i64> %broadcast.splat79 to <8 x i32> - %168 = or <8 x i32> %167, - %169 = icmp sgt <8 x i32> %broadcast.splat81, %168 - %170 = extractelement <8 x i32> %168, i32 0 - %171 = add nsw i32 %mul.i.i.2, %170 - %172 = sext i32 %171 to i64 - %173 = getelementptr inbounds float, float* %13, i64 %172 - %174 = bitcast float* %173 to <8 x float>* - %wide.masked.load82.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %174, i32 4, <8 x i1> %169, <8 x float> undef), !tbaa !12 - %175 = fmul <8 x float> %broadcast.splat84, %wide.masked.load82.2 - %176 = bitcast float* %173 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %175, <8 x float>* %176, i32 4, <8 x i1> %169), !tbaa !12, !llvm.access.group !16 - %177 = trunc <8 x i64> %broadcast.splat79 to <8 x i32> - %178 = or <8 x i32> %177, - %179 = icmp sgt <8 x i32> %broadcast.splat81, %178 - %180 = extractelement <8 x i32> %178, i32 0 - %181 = add nsw i32 %mul.i.i.2, %180 - %182 = sext i32 %181 to i64 - %183 = getelementptr inbounds float, float* %13, i64 %182 - %184 = bitcast float* %183 to <8 x float>* - %wide.masked.load82.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %184, i32 4, <8 x i1> %179, <8 x float> undef), !tbaa !12 - %185 = fmul <8 x float> %broadcast.splat84, %wide.masked.load82.3 - %186 = bitcast float* %183 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %185, <8 x float>* %186, i32 4, <8 x i1> %179), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.2 - -pregion_for_entry.entry.i.i.us.2: ; preds = %if.end.i.i.us.2.3, %pregion_for_entry.entry.i.i.us.2.preheader - %_local_id_x.i.0.us.2 = phi i64 [ %546, %if.end.i.i.us.2.3 ], [ 0, %pregion_for_entry.entry.i.i.us.2.preheader ] - %add1.i.i.i.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.2, %mul.i.i.i - %conv.i.i.us.2 = trunc i64 %add1.i.i.i.us.2 to i32 - %cmp4.i.i.us.2 = icmp sgt i32 %25, %conv.i.i.us.2 - br i1 %cmp4.i.i.us.2, label %if.then.i.i.us.2, label %if.end.i.i.us.2 - -if.then.i.i.us.2: ; preds = %pregion_for_entry.entry.i.i.us.2 - %add.i.i.us.2 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2 - %idxprom.i.i.us.2 = sext i32 %add.i.i.us.2 to i64 - %arrayidx.i.i.us.2 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.2 - %187 = load float, float* %arrayidx.i.i.us.2, align 4, !tbaa !12 - %mul6.i.i.us.2 = fmul float %29, %187 - store float %mul6.i.i.us.2, float* %arrayidx.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2 - -if.end.i.i.us.2: ; preds = %if.then.i.i.us.2, %pregion_for_entry.entry.i.i.us.2 - %188 = or i64 %_local_id_x.i.0.us.2, 1 - %add1.i.i.i.us.2.1 = add nuw nsw i64 %188, %mul.i.i.i - %conv.i.i.us.2.1 = trunc i64 %add1.i.i.i.us.2.1 to i32 - %cmp4.i.i.us.2.1 = icmp sgt i32 %25, %conv.i.i.us.2.1 - br i1 %cmp4.i.i.us.2.1, label %if.then.i.i.us.2.1, label %if.end.i.i.us.2.1 - -pregion_for_end.i.i.2.loopexit: ; preds = %if.end.i.i.us.2.3 - br label %pregion_for_end.i.i.2 - -pregion_for_end.i.i.2: ; preds = %pregion_for_end.i.i.2.loopexit, %vector.ph71, %pregion_for_end.i.i.1 - %189 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.3 = or i32 %189, 3 - %cmp.i.i.3 = icmp sgt i32 %17, %conv2.i.i.3 - %mul.i.i.3 = mul nsw i32 %25, %conv2.i.i.3 - br i1 %cmp.i.i.3, label %vector.scevcheck92, label %pregion_for_end.i.i.3 - -vector.scevcheck92: ; preds = %pregion_for_end.i.i.2 - %190 = mul i32 %25, %conv2.i.i.3 - %191 = trunc i64 %2 to i32 - %192 = shl i32 %191, 5 - %193 = add i32 %190, %192 - %194 = icmp sgt i32 %193, 2147483616 - br i1 %194, label %pregion_for_entry.entry.i.i.us.3.preheader, label %vector.ph93 - -pregion_for_entry.entry.i.i.us.3.preheader: ; preds = %vector.scevcheck92 - br label %pregion_for_entry.entry.i.i.us.3 - -vector.ph93: ; preds = %vector.scevcheck92 - %broadcast.splatinsert100 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat101 = shufflevector <8 x i64> %broadcast.splatinsert100, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert102 = insertelement <8 x i32> undef, i32 %25, i32 0 - %broadcast.splat103 = shufflevector <8 x i32> %broadcast.splatinsert102, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert105 = insertelement <8 x float> undef, float %29, i32 0 - %broadcast.splat106 = shufflevector <8 x float> %broadcast.splatinsert105, <8 x float> undef, <8 x i32> zeroinitializer - %195 = trunc <8 x i64> %broadcast.splat101 to <8 x i32> - %196 = or <8 x i32> %195, - %197 = icmp sgt <8 x i32> %broadcast.splat103, %196 - %198 = extractelement <8 x i32> %196, i32 0 - %199 = add nsw i32 %mul.i.i.3, %198 - %200 = sext i32 %199 to i64 - %201 = getelementptr inbounds float, float* %13, i64 %200 - %202 = bitcast float* %201 to <8 x float>* - %wide.masked.load104 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %202, i32 4, <8 x i1> %197, <8 x float> undef), !tbaa !12 - %203 = fmul <8 x float> %broadcast.splat106, %wide.masked.load104 - %204 = bitcast float* %201 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %203, <8 x float>* %204, i32 4, <8 x i1> %197), !tbaa !12, !llvm.access.group !16 - %205 = trunc <8 x i64> %broadcast.splat101 to <8 x i32> - %206 = or <8 x i32> %205, - %207 = icmp sgt <8 x i32> %broadcast.splat103, %206 - %208 = extractelement <8 x i32> %206, i32 0 - %209 = add nsw i32 %mul.i.i.3, %208 - %210 = sext i32 %209 to i64 - %211 = getelementptr inbounds float, float* %13, i64 %210 - %212 = bitcast float* %211 to <8 x float>* - %wide.masked.load104.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %212, i32 4, <8 x i1> %207, <8 x float> undef), !tbaa !12 - %213 = fmul <8 x float> %broadcast.splat106, %wide.masked.load104.1 - %214 = bitcast float* %211 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %213, <8 x float>* %214, i32 4, <8 x i1> %207), !tbaa !12, !llvm.access.group !16 - %215 = trunc <8 x i64> %broadcast.splat101 to <8 x i32> - %216 = or <8 x i32> %215, - %217 = icmp sgt <8 x i32> %broadcast.splat103, %216 - %218 = extractelement <8 x i32> %216, i32 0 - %219 = add nsw i32 %mul.i.i.3, %218 - %220 = sext i32 %219 to i64 - %221 = getelementptr inbounds float, float* %13, i64 %220 - %222 = bitcast float* %221 to <8 x float>* - %wide.masked.load104.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %222, i32 4, <8 x i1> %217, <8 x float> undef), !tbaa !12 - %223 = fmul <8 x float> %broadcast.splat106, %wide.masked.load104.2 - %224 = bitcast float* %221 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %223, <8 x float>* %224, i32 4, <8 x i1> %217), !tbaa !12, !llvm.access.group !16 - %225 = trunc <8 x i64> %broadcast.splat101 to <8 x i32> - %226 = or <8 x i32> %225, - %227 = icmp sgt <8 x i32> %broadcast.splat103, %226 - %228 = extractelement <8 x i32> %226, i32 0 - %229 = add nsw i32 %mul.i.i.3, %228 - %230 = sext i32 %229 to i64 - %231 = getelementptr inbounds float, float* %13, i64 %230 - %232 = bitcast float* %231 to <8 x float>* - %wide.masked.load104.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %232, i32 4, <8 x i1> %227, <8 x float> undef), !tbaa !12 - %233 = fmul <8 x float> %broadcast.splat106, %wide.masked.load104.3 - %234 = bitcast float* %231 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %233, <8 x float>* %234, i32 4, <8 x i1> %227), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.3 - -pregion_for_entry.entry.i.i.us.3: ; preds = %if.end.i.i.us.3.3, %pregion_for_entry.entry.i.i.us.3.preheader - %_local_id_x.i.0.us.3 = phi i64 [ %540, %if.end.i.i.us.3.3 ], [ 0, %pregion_for_entry.entry.i.i.us.3.preheader ] - %add1.i.i.i.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.3, %mul.i.i.i - %conv.i.i.us.3 = trunc i64 %add1.i.i.i.us.3 to i32 - %cmp4.i.i.us.3 = icmp sgt i32 %25, %conv.i.i.us.3 - br i1 %cmp4.i.i.us.3, label %if.then.i.i.us.3, label %if.end.i.i.us.3 - -if.then.i.i.us.3: ; preds = %pregion_for_entry.entry.i.i.us.3 - %add.i.i.us.3 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3 - %idxprom.i.i.us.3 = sext i32 %add.i.i.us.3 to i64 - %arrayidx.i.i.us.3 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.3 - %235 = load float, float* %arrayidx.i.i.us.3, align 4, !tbaa !12 - %mul6.i.i.us.3 = fmul float %29, %235 - store float %mul6.i.i.us.3, float* %arrayidx.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3 - -if.end.i.i.us.3: ; preds = %if.then.i.i.us.3, %pregion_for_entry.entry.i.i.us.3 - %236 = or i64 %_local_id_x.i.0.us.3, 1 - %add1.i.i.i.us.3.1 = add nuw nsw i64 %236, %mul.i.i.i - %conv.i.i.us.3.1 = trunc i64 %add1.i.i.i.us.3.1 to i32 - %cmp4.i.i.us.3.1 = icmp sgt i32 %25, %conv.i.i.us.3.1 - br i1 %cmp4.i.i.us.3.1, label %if.then.i.i.us.3.1, label %if.end.i.i.us.3.1 - -pregion_for_end.i.i.3.loopexit: ; preds = %if.end.i.i.us.3.3 - br label %pregion_for_end.i.i.3 - -pregion_for_end.i.i.3: ; preds = %pregion_for_end.i.i.3.loopexit, %vector.ph93, %pregion_for_end.i.i.2 - %237 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.4 = or i32 %237, 4 - %cmp.i.i.4 = icmp sgt i32 %17, %conv2.i.i.4 - %mul.i.i.4 = mul nsw i32 %25, %conv2.i.i.4 - br i1 %cmp.i.i.4, label %vector.scevcheck114, label %pregion_for_end.i.i.4 - -vector.scevcheck114: ; preds = %pregion_for_end.i.i.3 - %238 = mul i32 %25, %conv2.i.i.4 - %239 = trunc i64 %2 to i32 - %240 = shl i32 %239, 5 - %241 = add i32 %238, %240 - %242 = icmp sgt i32 %241, 2147483616 - br i1 %242, label %pregion_for_entry.entry.i.i.us.4.preheader, label %vector.ph115 - -pregion_for_entry.entry.i.i.us.4.preheader: ; preds = %vector.scevcheck114 - br label %pregion_for_entry.entry.i.i.us.4 - -vector.ph115: ; preds = %vector.scevcheck114 - %broadcast.splatinsert122 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat123 = shufflevector <8 x i64> %broadcast.splatinsert122, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert124 = insertelement <8 x i32> undef, i32 %25, i32 0 - %broadcast.splat125 = shufflevector <8 x i32> %broadcast.splatinsert124, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert127 = insertelement <8 x float> undef, float %29, i32 0 - %broadcast.splat128 = shufflevector <8 x float> %broadcast.splatinsert127, <8 x float> undef, <8 x i32> zeroinitializer - %243 = trunc <8 x i64> %broadcast.splat123 to <8 x i32> - %244 = or <8 x i32> %243, - %245 = icmp sgt <8 x i32> %broadcast.splat125, %244 - %246 = extractelement <8 x i32> %244, i32 0 - %247 = add nsw i32 %mul.i.i.4, %246 - %248 = sext i32 %247 to i64 - %249 = getelementptr inbounds float, float* %13, i64 %248 - %250 = bitcast float* %249 to <8 x float>* - %wide.masked.load126 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %250, i32 4, <8 x i1> %245, <8 x float> undef), !tbaa !12 - %251 = fmul <8 x float> %broadcast.splat128, %wide.masked.load126 - %252 = bitcast float* %249 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %251, <8 x float>* %252, i32 4, <8 x i1> %245), !tbaa !12, !llvm.access.group !16 - %253 = trunc <8 x i64> %broadcast.splat123 to <8 x i32> - %254 = or <8 x i32> %253, - %255 = icmp sgt <8 x i32> %broadcast.splat125, %254 - %256 = extractelement <8 x i32> %254, i32 0 - %257 = add nsw i32 %mul.i.i.4, %256 - %258 = sext i32 %257 to i64 - %259 = getelementptr inbounds float, float* %13, i64 %258 - %260 = bitcast float* %259 to <8 x float>* - %wide.masked.load126.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %260, i32 4, <8 x i1> %255, <8 x float> undef), !tbaa !12 - %261 = fmul <8 x float> %broadcast.splat128, %wide.masked.load126.1 - %262 = bitcast float* %259 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %261, <8 x float>* %262, i32 4, <8 x i1> %255), !tbaa !12, !llvm.access.group !16 - %263 = trunc <8 x i64> %broadcast.splat123 to <8 x i32> - %264 = or <8 x i32> %263, - %265 = icmp sgt <8 x i32> %broadcast.splat125, %264 - %266 = extractelement <8 x i32> %264, i32 0 - %267 = add nsw i32 %mul.i.i.4, %266 - %268 = sext i32 %267 to i64 - %269 = getelementptr inbounds float, float* %13, i64 %268 - %270 = bitcast float* %269 to <8 x float>* - %wide.masked.load126.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %270, i32 4, <8 x i1> %265, <8 x float> undef), !tbaa !12 - %271 = fmul <8 x float> %broadcast.splat128, %wide.masked.load126.2 - %272 = bitcast float* %269 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %271, <8 x float>* %272, i32 4, <8 x i1> %265), !tbaa !12, !llvm.access.group !16 - %273 = trunc <8 x i64> %broadcast.splat123 to <8 x i32> - %274 = or <8 x i32> %273, - %275 = icmp sgt <8 x i32> %broadcast.splat125, %274 - %276 = extractelement <8 x i32> %274, i32 0 - %277 = add nsw i32 %mul.i.i.4, %276 - %278 = sext i32 %277 to i64 - %279 = getelementptr inbounds float, float* %13, i64 %278 - %280 = bitcast float* %279 to <8 x float>* - %wide.masked.load126.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %280, i32 4, <8 x i1> %275, <8 x float> undef), !tbaa !12 - %281 = fmul <8 x float> %broadcast.splat128, %wide.masked.load126.3 - %282 = bitcast float* %279 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %281, <8 x float>* %282, i32 4, <8 x i1> %275), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.4 - -pregion_for_entry.entry.i.i.us.4: ; preds = %if.end.i.i.us.4.3, %pregion_for_entry.entry.i.i.us.4.preheader - %_local_id_x.i.0.us.4 = phi i64 [ %534, %if.end.i.i.us.4.3 ], [ 0, %pregion_for_entry.entry.i.i.us.4.preheader ] - %add1.i.i.i.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.4, %mul.i.i.i - %conv.i.i.us.4 = trunc i64 %add1.i.i.i.us.4 to i32 - %cmp4.i.i.us.4 = icmp sgt i32 %25, %conv.i.i.us.4 - br i1 %cmp4.i.i.us.4, label %if.then.i.i.us.4, label %if.end.i.i.us.4 - -if.then.i.i.us.4: ; preds = %pregion_for_entry.entry.i.i.us.4 - %add.i.i.us.4 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4 - %idxprom.i.i.us.4 = sext i32 %add.i.i.us.4 to i64 - %arrayidx.i.i.us.4 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.4 - %283 = load float, float* %arrayidx.i.i.us.4, align 4, !tbaa !12 - %mul6.i.i.us.4 = fmul float %29, %283 - store float %mul6.i.i.us.4, float* %arrayidx.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4 - -if.end.i.i.us.4: ; preds = %if.then.i.i.us.4, %pregion_for_entry.entry.i.i.us.4 - %284 = or i64 %_local_id_x.i.0.us.4, 1 - %add1.i.i.i.us.4.1 = add nuw nsw i64 %284, %mul.i.i.i - %conv.i.i.us.4.1 = trunc i64 %add1.i.i.i.us.4.1 to i32 - %cmp4.i.i.us.4.1 = icmp sgt i32 %25, %conv.i.i.us.4.1 - br i1 %cmp4.i.i.us.4.1, label %if.then.i.i.us.4.1, label %if.end.i.i.us.4.1 - -pregion_for_end.i.i.4.loopexit: ; preds = %if.end.i.i.us.4.3 - br label %pregion_for_end.i.i.4 - -pregion_for_end.i.i.4: ; preds = %pregion_for_end.i.i.4.loopexit, %vector.ph115, %pregion_for_end.i.i.3 - %285 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.5 = or i32 %285, 5 - %cmp.i.i.5 = icmp sgt i32 %17, %conv2.i.i.5 - %mul.i.i.5 = mul nsw i32 %25, %conv2.i.i.5 - br i1 %cmp.i.i.5, label %vector.scevcheck136, label %pregion_for_end.i.i.5 - -vector.scevcheck136: ; preds = %pregion_for_end.i.i.4 - %286 = mul i32 %25, %conv2.i.i.5 - %287 = trunc i64 %2 to i32 - %288 = shl i32 %287, 5 - %289 = add i32 %286, %288 - %290 = icmp sgt i32 %289, 2147483616 - br i1 %290, label %pregion_for_entry.entry.i.i.us.5.preheader, label %vector.ph137 - -pregion_for_entry.entry.i.i.us.5.preheader: ; preds = %vector.scevcheck136 - br label %pregion_for_entry.entry.i.i.us.5 - -vector.ph137: ; preds = %vector.scevcheck136 - %broadcast.splatinsert144 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat145 = shufflevector <8 x i64> %broadcast.splatinsert144, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert146 = insertelement <8 x i32> undef, i32 %25, i32 0 - %broadcast.splat147 = shufflevector <8 x i32> %broadcast.splatinsert146, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert149 = insertelement <8 x float> undef, float %29, i32 0 - %broadcast.splat150 = shufflevector <8 x float> %broadcast.splatinsert149, <8 x float> undef, <8 x i32> zeroinitializer - %291 = trunc <8 x i64> %broadcast.splat145 to <8 x i32> - %292 = or <8 x i32> %291, - %293 = icmp sgt <8 x i32> %broadcast.splat147, %292 - %294 = extractelement <8 x i32> %292, i32 0 - %295 = add nsw i32 %mul.i.i.5, %294 - %296 = sext i32 %295 to i64 - %297 = getelementptr inbounds float, float* %13, i64 %296 - %298 = bitcast float* %297 to <8 x float>* - %wide.masked.load148 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %298, i32 4, <8 x i1> %293, <8 x float> undef), !tbaa !12 - %299 = fmul <8 x float> %broadcast.splat150, %wide.masked.load148 - %300 = bitcast float* %297 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %299, <8 x float>* %300, i32 4, <8 x i1> %293), !tbaa !12, !llvm.access.group !16 - %301 = trunc <8 x i64> %broadcast.splat145 to <8 x i32> - %302 = or <8 x i32> %301, - %303 = icmp sgt <8 x i32> %broadcast.splat147, %302 - %304 = extractelement <8 x i32> %302, i32 0 - %305 = add nsw i32 %mul.i.i.5, %304 - %306 = sext i32 %305 to i64 - %307 = getelementptr inbounds float, float* %13, i64 %306 - %308 = bitcast float* %307 to <8 x float>* - %wide.masked.load148.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %308, i32 4, <8 x i1> %303, <8 x float> undef), !tbaa !12 - %309 = fmul <8 x float> %broadcast.splat150, %wide.masked.load148.1 - %310 = bitcast float* %307 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %309, <8 x float>* %310, i32 4, <8 x i1> %303), !tbaa !12, !llvm.access.group !16 - %311 = trunc <8 x i64> %broadcast.splat145 to <8 x i32> - %312 = or <8 x i32> %311, - %313 = icmp sgt <8 x i32> %broadcast.splat147, %312 - %314 = extractelement <8 x i32> %312, i32 0 - %315 = add nsw i32 %mul.i.i.5, %314 - %316 = sext i32 %315 to i64 - %317 = getelementptr inbounds float, float* %13, i64 %316 - %318 = bitcast float* %317 to <8 x float>* - %wide.masked.load148.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %318, i32 4, <8 x i1> %313, <8 x float> undef), !tbaa !12 - %319 = fmul <8 x float> %broadcast.splat150, %wide.masked.load148.2 - %320 = bitcast float* %317 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %319, <8 x float>* %320, i32 4, <8 x i1> %313), !tbaa !12, !llvm.access.group !16 - %321 = trunc <8 x i64> %broadcast.splat145 to <8 x i32> - %322 = or <8 x i32> %321, - %323 = icmp sgt <8 x i32> %broadcast.splat147, %322 - %324 = extractelement <8 x i32> %322, i32 0 - %325 = add nsw i32 %mul.i.i.5, %324 - %326 = sext i32 %325 to i64 - %327 = getelementptr inbounds float, float* %13, i64 %326 - %328 = bitcast float* %327 to <8 x float>* - %wide.masked.load148.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %328, i32 4, <8 x i1> %323, <8 x float> undef), !tbaa !12 - %329 = fmul <8 x float> %broadcast.splat150, %wide.masked.load148.3 - %330 = bitcast float* %327 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %329, <8 x float>* %330, i32 4, <8 x i1> %323), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.5 - -pregion_for_entry.entry.i.i.us.5: ; preds = %if.end.i.i.us.5.3, %pregion_for_entry.entry.i.i.us.5.preheader - %_local_id_x.i.0.us.5 = phi i64 [ %528, %if.end.i.i.us.5.3 ], [ 0, %pregion_for_entry.entry.i.i.us.5.preheader ] - %add1.i.i.i.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.5, %mul.i.i.i - %conv.i.i.us.5 = trunc i64 %add1.i.i.i.us.5 to i32 - %cmp4.i.i.us.5 = icmp sgt i32 %25, %conv.i.i.us.5 - br i1 %cmp4.i.i.us.5, label %if.then.i.i.us.5, label %if.end.i.i.us.5 - -if.then.i.i.us.5: ; preds = %pregion_for_entry.entry.i.i.us.5 - %add.i.i.us.5 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5 - %idxprom.i.i.us.5 = sext i32 %add.i.i.us.5 to i64 - %arrayidx.i.i.us.5 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.5 - %331 = load float, float* %arrayidx.i.i.us.5, align 4, !tbaa !12 - %mul6.i.i.us.5 = fmul float %29, %331 - store float %mul6.i.i.us.5, float* %arrayidx.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5 - -if.end.i.i.us.5: ; preds = %if.then.i.i.us.5, %pregion_for_entry.entry.i.i.us.5 - %332 = or i64 %_local_id_x.i.0.us.5, 1 - %add1.i.i.i.us.5.1 = add nuw nsw i64 %332, %mul.i.i.i - %conv.i.i.us.5.1 = trunc i64 %add1.i.i.i.us.5.1 to i32 - %cmp4.i.i.us.5.1 = icmp sgt i32 %25, %conv.i.i.us.5.1 - br i1 %cmp4.i.i.us.5.1, label %if.then.i.i.us.5.1, label %if.end.i.i.us.5.1 - -pregion_for_end.i.i.5.loopexit: ; preds = %if.end.i.i.us.5.3 - br label %pregion_for_end.i.i.5 - -pregion_for_end.i.i.5: ; preds = %pregion_for_end.i.i.5.loopexit, %vector.ph137, %pregion_for_end.i.i.4 - %333 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.6 = or i32 %333, 6 - %cmp.i.i.6 = icmp sgt i32 %17, %conv2.i.i.6 - %mul.i.i.6 = mul nsw i32 %25, %conv2.i.i.6 - br i1 %cmp.i.i.6, label %vector.scevcheck158, label %pregion_for_end.i.i.6 - -vector.scevcheck158: ; preds = %pregion_for_end.i.i.5 - %334 = mul i32 %25, %conv2.i.i.6 - %335 = trunc i64 %2 to i32 - %336 = shl i32 %335, 5 - %337 = add i32 %334, %336 - %338 = icmp sgt i32 %337, 2147483616 - br i1 %338, label %pregion_for_entry.entry.i.i.us.6.preheader, label %vector.ph159 - -pregion_for_entry.entry.i.i.us.6.preheader: ; preds = %vector.scevcheck158 - br label %pregion_for_entry.entry.i.i.us.6 - -vector.ph159: ; preds = %vector.scevcheck158 - %broadcast.splatinsert166 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat167 = shufflevector <8 x i64> %broadcast.splatinsert166, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert168 = insertelement <8 x i32> undef, i32 %25, i32 0 - %broadcast.splat169 = shufflevector <8 x i32> %broadcast.splatinsert168, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert171 = insertelement <8 x float> undef, float %29, i32 0 - %broadcast.splat172 = shufflevector <8 x float> %broadcast.splatinsert171, <8 x float> undef, <8 x i32> zeroinitializer - %339 = trunc <8 x i64> %broadcast.splat167 to <8 x i32> - %340 = or <8 x i32> %339, - %341 = icmp sgt <8 x i32> %broadcast.splat169, %340 - %342 = extractelement <8 x i32> %340, i32 0 - %343 = add nsw i32 %mul.i.i.6, %342 - %344 = sext i32 %343 to i64 - %345 = getelementptr inbounds float, float* %13, i64 %344 - %346 = bitcast float* %345 to <8 x float>* - %wide.masked.load170 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %346, i32 4, <8 x i1> %341, <8 x float> undef), !tbaa !12 - %347 = fmul <8 x float> %broadcast.splat172, %wide.masked.load170 - %348 = bitcast float* %345 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %347, <8 x float>* %348, i32 4, <8 x i1> %341), !tbaa !12, !llvm.access.group !16 - %349 = trunc <8 x i64> %broadcast.splat167 to <8 x i32> - %350 = or <8 x i32> %349, - %351 = icmp sgt <8 x i32> %broadcast.splat169, %350 - %352 = extractelement <8 x i32> %350, i32 0 - %353 = add nsw i32 %mul.i.i.6, %352 - %354 = sext i32 %353 to i64 - %355 = getelementptr inbounds float, float* %13, i64 %354 - %356 = bitcast float* %355 to <8 x float>* - %wide.masked.load170.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %356, i32 4, <8 x i1> %351, <8 x float> undef), !tbaa !12 - %357 = fmul <8 x float> %broadcast.splat172, %wide.masked.load170.1 - %358 = bitcast float* %355 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %357, <8 x float>* %358, i32 4, <8 x i1> %351), !tbaa !12, !llvm.access.group !16 - %359 = trunc <8 x i64> %broadcast.splat167 to <8 x i32> - %360 = or <8 x i32> %359, - %361 = icmp sgt <8 x i32> %broadcast.splat169, %360 - %362 = extractelement <8 x i32> %360, i32 0 - %363 = add nsw i32 %mul.i.i.6, %362 - %364 = sext i32 %363 to i64 - %365 = getelementptr inbounds float, float* %13, i64 %364 - %366 = bitcast float* %365 to <8 x float>* - %wide.masked.load170.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %366, i32 4, <8 x i1> %361, <8 x float> undef), !tbaa !12 - %367 = fmul <8 x float> %broadcast.splat172, %wide.masked.load170.2 - %368 = bitcast float* %365 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %367, <8 x float>* %368, i32 4, <8 x i1> %361), !tbaa !12, !llvm.access.group !16 - %369 = trunc <8 x i64> %broadcast.splat167 to <8 x i32> - %370 = or <8 x i32> %369, - %371 = icmp sgt <8 x i32> %broadcast.splat169, %370 - %372 = extractelement <8 x i32> %370, i32 0 - %373 = add nsw i32 %mul.i.i.6, %372 - %374 = sext i32 %373 to i64 - %375 = getelementptr inbounds float, float* %13, i64 %374 - %376 = bitcast float* %375 to <8 x float>* - %wide.masked.load170.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %376, i32 4, <8 x i1> %371, <8 x float> undef), !tbaa !12 - %377 = fmul <8 x float> %broadcast.splat172, %wide.masked.load170.3 - %378 = bitcast float* %375 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %377, <8 x float>* %378, i32 4, <8 x i1> %371), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.6 - -pregion_for_entry.entry.i.i.us.6: ; preds = %if.end.i.i.us.6.3, %pregion_for_entry.entry.i.i.us.6.preheader - %_local_id_x.i.0.us.6 = phi i64 [ %522, %if.end.i.i.us.6.3 ], [ 0, %pregion_for_entry.entry.i.i.us.6.preheader ] - %add1.i.i.i.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.6, %mul.i.i.i - %conv.i.i.us.6 = trunc i64 %add1.i.i.i.us.6 to i32 - %cmp4.i.i.us.6 = icmp sgt i32 %25, %conv.i.i.us.6 - br i1 %cmp4.i.i.us.6, label %if.then.i.i.us.6, label %if.end.i.i.us.6 - -if.then.i.i.us.6: ; preds = %pregion_for_entry.entry.i.i.us.6 - %add.i.i.us.6 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6 - %idxprom.i.i.us.6 = sext i32 %add.i.i.us.6 to i64 - %arrayidx.i.i.us.6 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.6 - %379 = load float, float* %arrayidx.i.i.us.6, align 4, !tbaa !12 - %mul6.i.i.us.6 = fmul float %29, %379 - store float %mul6.i.i.us.6, float* %arrayidx.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6 - -if.end.i.i.us.6: ; preds = %if.then.i.i.us.6, %pregion_for_entry.entry.i.i.us.6 - %380 = or i64 %_local_id_x.i.0.us.6, 1 - %add1.i.i.i.us.6.1 = add nuw nsw i64 %380, %mul.i.i.i - %conv.i.i.us.6.1 = trunc i64 %add1.i.i.i.us.6.1 to i32 - %cmp4.i.i.us.6.1 = icmp sgt i32 %25, %conv.i.i.us.6.1 - br i1 %cmp4.i.i.us.6.1, label %if.then.i.i.us.6.1, label %if.end.i.i.us.6.1 - -pregion_for_end.i.i.6.loopexit: ; preds = %if.end.i.i.us.6.3 - br label %pregion_for_end.i.i.6 - -pregion_for_end.i.i.6: ; preds = %pregion_for_end.i.i.6.loopexit, %vector.ph159, %pregion_for_end.i.i.5 - %381 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.7 = or i32 %381, 7 - %cmp.i.i.7 = icmp sgt i32 %17, %conv2.i.i.7 - %mul.i.i.7 = mul nsw i32 %25, %conv2.i.i.7 - br i1 %cmp.i.i.7, label %vector.scevcheck180, label %_pocl_kernel_mm2_kernel2.exit - -vector.scevcheck180: ; preds = %pregion_for_end.i.i.6 - %382 = mul i32 %25, %conv2.i.i.7 - %383 = trunc i64 %2 to i32 - %384 = shl i32 %383, 5 - %385 = add i32 %382, %384 - %386 = icmp sgt i32 %385, 2147483616 - br i1 %386, label %pregion_for_entry.entry.i.i.us.7.preheader, label %vector.ph181 - -pregion_for_entry.entry.i.i.us.7.preheader: ; preds = %vector.scevcheck180 - br label %pregion_for_entry.entry.i.i.us.7 - -vector.ph181: ; preds = %vector.scevcheck180 - %broadcast.splatinsert188 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat189 = shufflevector <8 x i64> %broadcast.splatinsert188, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert190 = insertelement <8 x i32> undef, i32 %25, i32 0 - %broadcast.splat191 = shufflevector <8 x i32> %broadcast.splatinsert190, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert193 = insertelement <8 x float> undef, float %29, i32 0 - %broadcast.splat194 = shufflevector <8 x float> %broadcast.splatinsert193, <8 x float> undef, <8 x i32> zeroinitializer - %387 = trunc <8 x i64> %broadcast.splat189 to <8 x i32> - %388 = or <8 x i32> %387, - %389 = icmp sgt <8 x i32> %broadcast.splat191, %388 - %390 = extractelement <8 x i32> %388, i32 0 - %391 = add nsw i32 %mul.i.i.7, %390 - %392 = sext i32 %391 to i64 - %393 = getelementptr inbounds float, float* %13, i64 %392 - %394 = bitcast float* %393 to <8 x float>* - %wide.masked.load192 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %394, i32 4, <8 x i1> %389, <8 x float> undef), !tbaa !12 - %395 = fmul <8 x float> %broadcast.splat194, %wide.masked.load192 - %396 = bitcast float* %393 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %395, <8 x float>* %396, i32 4, <8 x i1> %389), !tbaa !12, !llvm.access.group !16 - %397 = trunc <8 x i64> %broadcast.splat189 to <8 x i32> - %398 = or <8 x i32> %397, - %399 = icmp sgt <8 x i32> %broadcast.splat191, %398 - %400 = extractelement <8 x i32> %398, i32 0 - %401 = add nsw i32 %mul.i.i.7, %400 - %402 = sext i32 %401 to i64 - %403 = getelementptr inbounds float, float* %13, i64 %402 - %404 = bitcast float* %403 to <8 x float>* - %wide.masked.load192.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %404, i32 4, <8 x i1> %399, <8 x float> undef), !tbaa !12 - %405 = fmul <8 x float> %broadcast.splat194, %wide.masked.load192.1 - %406 = bitcast float* %403 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %405, <8 x float>* %406, i32 4, <8 x i1> %399), !tbaa !12, !llvm.access.group !16 - %407 = trunc <8 x i64> %broadcast.splat189 to <8 x i32> - %408 = or <8 x i32> %407, - %409 = icmp sgt <8 x i32> %broadcast.splat191, %408 - %410 = extractelement <8 x i32> %408, i32 0 - %411 = add nsw i32 %mul.i.i.7, %410 - %412 = sext i32 %411 to i64 - %413 = getelementptr inbounds float, float* %13, i64 %412 - %414 = bitcast float* %413 to <8 x float>* - %wide.masked.load192.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %414, i32 4, <8 x i1> %409, <8 x float> undef), !tbaa !12 - %415 = fmul <8 x float> %broadcast.splat194, %wide.masked.load192.2 - %416 = bitcast float* %413 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %415, <8 x float>* %416, i32 4, <8 x i1> %409), !tbaa !12, !llvm.access.group !16 - %417 = trunc <8 x i64> %broadcast.splat189 to <8 x i32> - %418 = or <8 x i32> %417, - %419 = icmp sgt <8 x i32> %broadcast.splat191, %418 - %420 = extractelement <8 x i32> %418, i32 0 - %421 = add nsw i32 %mul.i.i.7, %420 - %422 = sext i32 %421 to i64 - %423 = getelementptr inbounds float, float* %13, i64 %422 - %424 = bitcast float* %423 to <8 x float>* - %wide.masked.load192.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %424, i32 4, <8 x i1> %419, <8 x float> undef), !tbaa !12 - %425 = fmul <8 x float> %broadcast.splat194, %wide.masked.load192.3 - %426 = bitcast float* %423 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %425, <8 x float>* %426, i32 4, <8 x i1> %419), !tbaa !12, !llvm.access.group !16 - br label %_pocl_kernel_mm2_kernel2.exit - -pregion_for_entry.entry.i.i.us.7: ; preds = %if.end.i.i.us.7.3, %pregion_for_entry.entry.i.i.us.7.preheader - %_local_id_x.i.0.us.7 = phi i64 [ %516, %if.end.i.i.us.7.3 ], [ 0, %pregion_for_entry.entry.i.i.us.7.preheader ] - %add1.i.i.i.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.7, %mul.i.i.i - %conv.i.i.us.7 = trunc i64 %add1.i.i.i.us.7 to i32 - %cmp4.i.i.us.7 = icmp sgt i32 %25, %conv.i.i.us.7 - br i1 %cmp4.i.i.us.7, label %if.then.i.i.us.7, label %if.end.i.i.us.7 - -if.then.i.i.us.7: ; preds = %pregion_for_entry.entry.i.i.us.7 - %add.i.i.us.7 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7 - %idxprom.i.i.us.7 = sext i32 %add.i.i.us.7 to i64 - %arrayidx.i.i.us.7 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.7 - %427 = load float, float* %arrayidx.i.i.us.7, align 4, !tbaa !12 - %mul6.i.i.us.7 = fmul float %29, %427 - store float %mul6.i.i.us.7, float* %arrayidx.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7 - -if.end.i.i.us.7: ; preds = %if.then.i.i.us.7, %pregion_for_entry.entry.i.i.us.7 - %428 = or i64 %_local_id_x.i.0.us.7, 1 - %add1.i.i.i.us.7.1 = add nuw nsw i64 %428, %mul.i.i.i - %conv.i.i.us.7.1 = trunc i64 %add1.i.i.i.us.7.1 to i32 - %cmp4.i.i.us.7.1 = icmp sgt i32 %25, %conv.i.i.us.7.1 - br i1 %cmp4.i.i.us.7.1, label %if.then.i.i.us.7.1, label %if.end.i.i.us.7.1 - -pregion_for_entry.entry.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1, %pregion_for_entry.entry.i.i.us.us.1.preheader - %_local_id_x.i.0.us.us.1 = phi i64 [ %438, %if.end.i.i.us.us.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.1.preheader ] - %add1.i.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, %mul.i.i.i - %conv.i.i.us.us.1 = trunc i64 %add1.i.i.i.us.us.1 to i32 - %cmp4.i.i.us.us.1 = icmp sgt i32 %25, %conv.i.i.us.us.1 - br i1 %cmp4.i.i.us.us.1, label %if.then.i.i.us.us.1, label %if.end.i.i.us.us.1 - -if.then.i.i.us.us.1: ; preds = %pregion_for_entry.entry.i.i.us.us.1 - %add.i.i.us.us.1 = add nsw i32 %mul.i.i.us.1, %conv.i.i.us.us.1 - %idxprom.i.i.us.us.1 = sext i32 %add.i.i.us.us.1 to i64 - %arrayidx.i.i.us.us.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.1 - %429 = load float, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12 - %mul6.i.i.us.us.1 = fmul float %29, %429 - store float %mul6.i.i.us.us.1, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.1 = shl i64 %add1.i.i.i.us.us.1, 32 - %430 = ashr exact i64 %sext.i.i.us.us.1, 32 - br label %for.body.i.i.us.us.1 - -for.body.i.i.us.us.1: ; preds = %for.body.i.i.us.us.1, %if.then.i.i.us.us.1 - %indvars.iv.next.i.i3.us.us.1 = phi i64 [ %indvars.iv.next.i.i.us.us.1, %for.body.i.i.us.us.1 ], [ 0, %if.then.i.i.us.us.1 ] - %431 = phi float [ %437, %for.body.i.i.us.us.1 ], [ %mul6.i.i.us.us.1, %if.then.i.i.us.us.1 ] - %432 = add nsw i64 %indvars.iv.next.i.i3.us.us.1, %80 - %arrayidx12.i.i.us.us.1 = getelementptr inbounds float, float* %7, i64 %432 - %433 = load float, float* %arrayidx12.i.i.us.us.1, align 4, !tbaa !12 - %434 = mul nsw i64 %indvars.iv.next.i.i3.us.us.1, %30 - %435 = add nsw i64 %434, %430 - %arrayidx16.i.i.us.us.1 = getelementptr inbounds float, float* %10, i64 %435 - %436 = load float, float* %arrayidx16.i.i.us.us.1, align 4, !tbaa !12 - %437 = tail call float @llvm.fmuladd.f32(float %433, float %436, float %431) #2 - store float %437, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.1, 1 - %exitcond.not.i.i.us.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.1, label %if.end.i.i.us.us.1.loopexit, label %for.body.i.i.us.us.1, !llvm.loop !21 - -if.end.i.i.us.us.1.loopexit: ; preds = %for.body.i.i.us.us.1 - br label %if.end.i.i.us.us.1 - -if.end.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.loopexit, %pregion_for_entry.entry.i.i.us.us.1 - %438 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, 1 - %exitcond.not.1 = icmp eq i64 %438, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.us.1.loopexit, label %pregion_for_entry.entry.i.i.us.us.1, !llvm.loop !19 - -pregion_for_end.i.i.us.1.loopexit: ; preds = %if.end.i.i.us.us.1 - br label %pregion_for_end.i.i.us.1 - -pregion_for_end.i.i.us.1: ; preds = %pregion_for_end.i.i.us.1.loopexit, %pregion_for_end.i.i.us - %439 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.2 = or i32 %439, 2 - %cmp.i.i.us.2 = icmp sgt i32 %17, %conv2.i.i.us.2 - %mul.i.i.us.2 = mul nsw i32 %25, %conv2.i.i.us.2 - %mul9.i.i.us.2 = mul nsw i32 %21, %conv2.i.i.us.2 - %440 = sext i32 %mul9.i.i.us.2 to i64 - br i1 %cmp.i.i.us.2, label %pregion_for_entry.entry.i.i.us.us.2.preheader, label %pregion_for_end.i.i.us.2 - -pregion_for_entry.entry.i.i.us.us.2.preheader: ; preds = %pregion_for_end.i.i.us.1 - br label %pregion_for_entry.entry.i.i.us.us.2 - -pregion_for_entry.entry.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2, %pregion_for_entry.entry.i.i.us.us.2.preheader - %_local_id_x.i.0.us.us.2 = phi i64 [ %450, %if.end.i.i.us.us.2 ], [ 0, %pregion_for_entry.entry.i.i.us.us.2.preheader ] - %add1.i.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, %mul.i.i.i - %conv.i.i.us.us.2 = trunc i64 %add1.i.i.i.us.us.2 to i32 - %cmp4.i.i.us.us.2 = icmp sgt i32 %25, %conv.i.i.us.us.2 - br i1 %cmp4.i.i.us.us.2, label %if.then.i.i.us.us.2, label %if.end.i.i.us.us.2 - -if.then.i.i.us.us.2: ; preds = %pregion_for_entry.entry.i.i.us.us.2 - %add.i.i.us.us.2 = add nsw i32 %mul.i.i.us.2, %conv.i.i.us.us.2 - %idxprom.i.i.us.us.2 = sext i32 %add.i.i.us.us.2 to i64 - %arrayidx.i.i.us.us.2 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.2 - %441 = load float, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12 - %mul6.i.i.us.us.2 = fmul float %29, %441 - store float %mul6.i.i.us.us.2, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.2 = shl i64 %add1.i.i.i.us.us.2, 32 - %442 = ashr exact i64 %sext.i.i.us.us.2, 32 - br label %for.body.i.i.us.us.2 - -for.body.i.i.us.us.2: ; preds = %for.body.i.i.us.us.2, %if.then.i.i.us.us.2 - %indvars.iv.next.i.i3.us.us.2 = phi i64 [ %indvars.iv.next.i.i.us.us.2, %for.body.i.i.us.us.2 ], [ 0, %if.then.i.i.us.us.2 ] - %443 = phi float [ %449, %for.body.i.i.us.us.2 ], [ %mul6.i.i.us.us.2, %if.then.i.i.us.us.2 ] - %444 = add nsw i64 %indvars.iv.next.i.i3.us.us.2, %440 - %arrayidx12.i.i.us.us.2 = getelementptr inbounds float, float* %7, i64 %444 - %445 = load float, float* %arrayidx12.i.i.us.us.2, align 4, !tbaa !12 - %446 = mul nsw i64 %indvars.iv.next.i.i3.us.us.2, %30 - %447 = add nsw i64 %446, %442 - %arrayidx16.i.i.us.us.2 = getelementptr inbounds float, float* %10, i64 %447 - %448 = load float, float* %arrayidx16.i.i.us.us.2, align 4, !tbaa !12 - %449 = tail call float @llvm.fmuladd.f32(float %445, float %448, float %443) #2 - store float %449, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.2 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.2, 1 - %exitcond.not.i.i.us.us.2 = icmp eq i64 %indvars.iv.next.i.i.us.us.2, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.2, label %if.end.i.i.us.us.2.loopexit, label %for.body.i.i.us.us.2, !llvm.loop !21 - -if.end.i.i.us.us.2.loopexit: ; preds = %for.body.i.i.us.us.2 - br label %if.end.i.i.us.us.2 - -if.end.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.loopexit, %pregion_for_entry.entry.i.i.us.us.2 - %450 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, 1 - %exitcond.not.2 = icmp eq i64 %450, 32 - br i1 %exitcond.not.2, label %pregion_for_end.i.i.us.2.loopexit, label %pregion_for_entry.entry.i.i.us.us.2, !llvm.loop !19 - -pregion_for_end.i.i.us.2.loopexit: ; preds = %if.end.i.i.us.us.2 - br label %pregion_for_end.i.i.us.2 - -pregion_for_end.i.i.us.2: ; preds = %pregion_for_end.i.i.us.2.loopexit, %pregion_for_end.i.i.us.1 - %451 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.3 = or i32 %451, 3 - %cmp.i.i.us.3 = icmp sgt i32 %17, %conv2.i.i.us.3 - %mul.i.i.us.3 = mul nsw i32 %25, %conv2.i.i.us.3 - %mul9.i.i.us.3 = mul nsw i32 %21, %conv2.i.i.us.3 - %452 = sext i32 %mul9.i.i.us.3 to i64 - br i1 %cmp.i.i.us.3, label %pregion_for_entry.entry.i.i.us.us.3.preheader, label %pregion_for_end.i.i.us.3 - -pregion_for_entry.entry.i.i.us.us.3.preheader: ; preds = %pregion_for_end.i.i.us.2 - br label %pregion_for_entry.entry.i.i.us.us.3 - -pregion_for_entry.entry.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3, %pregion_for_entry.entry.i.i.us.us.3.preheader - %_local_id_x.i.0.us.us.3 = phi i64 [ %462, %if.end.i.i.us.us.3 ], [ 0, %pregion_for_entry.entry.i.i.us.us.3.preheader ] - %add1.i.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, %mul.i.i.i - %conv.i.i.us.us.3 = trunc i64 %add1.i.i.i.us.us.3 to i32 - %cmp4.i.i.us.us.3 = icmp sgt i32 %25, %conv.i.i.us.us.3 - br i1 %cmp4.i.i.us.us.3, label %if.then.i.i.us.us.3, label %if.end.i.i.us.us.3 - -if.then.i.i.us.us.3: ; preds = %pregion_for_entry.entry.i.i.us.us.3 - %add.i.i.us.us.3 = add nsw i32 %mul.i.i.us.3, %conv.i.i.us.us.3 - %idxprom.i.i.us.us.3 = sext i32 %add.i.i.us.us.3 to i64 - %arrayidx.i.i.us.us.3 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.3 - %453 = load float, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12 - %mul6.i.i.us.us.3 = fmul float %29, %453 - store float %mul6.i.i.us.us.3, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.3 = shl i64 %add1.i.i.i.us.us.3, 32 - %454 = ashr exact i64 %sext.i.i.us.us.3, 32 - br label %for.body.i.i.us.us.3 - -for.body.i.i.us.us.3: ; preds = %for.body.i.i.us.us.3, %if.then.i.i.us.us.3 - %indvars.iv.next.i.i3.us.us.3 = phi i64 [ %indvars.iv.next.i.i.us.us.3, %for.body.i.i.us.us.3 ], [ 0, %if.then.i.i.us.us.3 ] - %455 = phi float [ %461, %for.body.i.i.us.us.3 ], [ %mul6.i.i.us.us.3, %if.then.i.i.us.us.3 ] - %456 = add nsw i64 %indvars.iv.next.i.i3.us.us.3, %452 - %arrayidx12.i.i.us.us.3 = getelementptr inbounds float, float* %7, i64 %456 - %457 = load float, float* %arrayidx12.i.i.us.us.3, align 4, !tbaa !12 - %458 = mul nsw i64 %indvars.iv.next.i.i3.us.us.3, %30 - %459 = add nsw i64 %458, %454 - %arrayidx16.i.i.us.us.3 = getelementptr inbounds float, float* %10, i64 %459 - %460 = load float, float* %arrayidx16.i.i.us.us.3, align 4, !tbaa !12 - %461 = tail call float @llvm.fmuladd.f32(float %457, float %460, float %455) #2 - store float %461, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.3 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.3, 1 - %exitcond.not.i.i.us.us.3 = icmp eq i64 %indvars.iv.next.i.i.us.us.3, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.3, label %if.end.i.i.us.us.3.loopexit, label %for.body.i.i.us.us.3, !llvm.loop !21 - -if.end.i.i.us.us.3.loopexit: ; preds = %for.body.i.i.us.us.3 - br label %if.end.i.i.us.us.3 - -if.end.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.loopexit, %pregion_for_entry.entry.i.i.us.us.3 - %462 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, 1 - %exitcond.not.3 = icmp eq i64 %462, 32 - br i1 %exitcond.not.3, label %pregion_for_end.i.i.us.3.loopexit, label %pregion_for_entry.entry.i.i.us.us.3, !llvm.loop !19 - -pregion_for_end.i.i.us.3.loopexit: ; preds = %if.end.i.i.us.us.3 - br label %pregion_for_end.i.i.us.3 - -pregion_for_end.i.i.us.3: ; preds = %pregion_for_end.i.i.us.3.loopexit, %pregion_for_end.i.i.us.2 - %463 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.4 = or i32 %463, 4 - %cmp.i.i.us.4 = icmp sgt i32 %17, %conv2.i.i.us.4 - %mul.i.i.us.4 = mul nsw i32 %25, %conv2.i.i.us.4 - %mul9.i.i.us.4 = mul nsw i32 %21, %conv2.i.i.us.4 - %464 = sext i32 %mul9.i.i.us.4 to i64 - br i1 %cmp.i.i.us.4, label %pregion_for_entry.entry.i.i.us.us.4.preheader, label %pregion_for_end.i.i.us.4 - -pregion_for_entry.entry.i.i.us.us.4.preheader: ; preds = %pregion_for_end.i.i.us.3 - br label %pregion_for_entry.entry.i.i.us.us.4 - -pregion_for_entry.entry.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4, %pregion_for_entry.entry.i.i.us.us.4.preheader - %_local_id_x.i.0.us.us.4 = phi i64 [ %474, %if.end.i.i.us.us.4 ], [ 0, %pregion_for_entry.entry.i.i.us.us.4.preheader ] - %add1.i.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, %mul.i.i.i - %conv.i.i.us.us.4 = trunc i64 %add1.i.i.i.us.us.4 to i32 - %cmp4.i.i.us.us.4 = icmp sgt i32 %25, %conv.i.i.us.us.4 - br i1 %cmp4.i.i.us.us.4, label %if.then.i.i.us.us.4, label %if.end.i.i.us.us.4 - -if.then.i.i.us.us.4: ; preds = %pregion_for_entry.entry.i.i.us.us.4 - %add.i.i.us.us.4 = add nsw i32 %mul.i.i.us.4, %conv.i.i.us.us.4 - %idxprom.i.i.us.us.4 = sext i32 %add.i.i.us.us.4 to i64 - %arrayidx.i.i.us.us.4 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.4 - %465 = load float, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12 - %mul6.i.i.us.us.4 = fmul float %29, %465 - store float %mul6.i.i.us.us.4, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.4 = shl i64 %add1.i.i.i.us.us.4, 32 - %466 = ashr exact i64 %sext.i.i.us.us.4, 32 - br label %for.body.i.i.us.us.4 - -for.body.i.i.us.us.4: ; preds = %for.body.i.i.us.us.4, %if.then.i.i.us.us.4 - %indvars.iv.next.i.i3.us.us.4 = phi i64 [ %indvars.iv.next.i.i.us.us.4, %for.body.i.i.us.us.4 ], [ 0, %if.then.i.i.us.us.4 ] - %467 = phi float [ %473, %for.body.i.i.us.us.4 ], [ %mul6.i.i.us.us.4, %if.then.i.i.us.us.4 ] - %468 = add nsw i64 %indvars.iv.next.i.i3.us.us.4, %464 - %arrayidx12.i.i.us.us.4 = getelementptr inbounds float, float* %7, i64 %468 - %469 = load float, float* %arrayidx12.i.i.us.us.4, align 4, !tbaa !12 - %470 = mul nsw i64 %indvars.iv.next.i.i3.us.us.4, %30 - %471 = add nsw i64 %470, %466 - %arrayidx16.i.i.us.us.4 = getelementptr inbounds float, float* %10, i64 %471 - %472 = load float, float* %arrayidx16.i.i.us.us.4, align 4, !tbaa !12 - %473 = tail call float @llvm.fmuladd.f32(float %469, float %472, float %467) #2 - store float %473, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.4 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.4, 1 - %exitcond.not.i.i.us.us.4 = icmp eq i64 %indvars.iv.next.i.i.us.us.4, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.4, label %if.end.i.i.us.us.4.loopexit, label %for.body.i.i.us.us.4, !llvm.loop !21 - -if.end.i.i.us.us.4.loopexit: ; preds = %for.body.i.i.us.us.4 - br label %if.end.i.i.us.us.4 - -if.end.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.loopexit, %pregion_for_entry.entry.i.i.us.us.4 - %474 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, 1 - %exitcond.not.4 = icmp eq i64 %474, 32 - br i1 %exitcond.not.4, label %pregion_for_end.i.i.us.4.loopexit, label %pregion_for_entry.entry.i.i.us.us.4, !llvm.loop !19 - -pregion_for_end.i.i.us.4.loopexit: ; preds = %if.end.i.i.us.us.4 - br label %pregion_for_end.i.i.us.4 - -pregion_for_end.i.i.us.4: ; preds = %pregion_for_end.i.i.us.4.loopexit, %pregion_for_end.i.i.us.3 - %475 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.5 = or i32 %475, 5 - %cmp.i.i.us.5 = icmp sgt i32 %17, %conv2.i.i.us.5 - %mul.i.i.us.5 = mul nsw i32 %25, %conv2.i.i.us.5 - %mul9.i.i.us.5 = mul nsw i32 %21, %conv2.i.i.us.5 - %476 = sext i32 %mul9.i.i.us.5 to i64 - br i1 %cmp.i.i.us.5, label %pregion_for_entry.entry.i.i.us.us.5.preheader, label %pregion_for_end.i.i.us.5 - -pregion_for_entry.entry.i.i.us.us.5.preheader: ; preds = %pregion_for_end.i.i.us.4 - br label %pregion_for_entry.entry.i.i.us.us.5 - -pregion_for_entry.entry.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5, %pregion_for_entry.entry.i.i.us.us.5.preheader - %_local_id_x.i.0.us.us.5 = phi i64 [ %486, %if.end.i.i.us.us.5 ], [ 0, %pregion_for_entry.entry.i.i.us.us.5.preheader ] - %add1.i.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, %mul.i.i.i - %conv.i.i.us.us.5 = trunc i64 %add1.i.i.i.us.us.5 to i32 - %cmp4.i.i.us.us.5 = icmp sgt i32 %25, %conv.i.i.us.us.5 - br i1 %cmp4.i.i.us.us.5, label %if.then.i.i.us.us.5, label %if.end.i.i.us.us.5 - -if.then.i.i.us.us.5: ; preds = %pregion_for_entry.entry.i.i.us.us.5 - %add.i.i.us.us.5 = add nsw i32 %mul.i.i.us.5, %conv.i.i.us.us.5 - %idxprom.i.i.us.us.5 = sext i32 %add.i.i.us.us.5 to i64 - %arrayidx.i.i.us.us.5 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.5 - %477 = load float, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12 - %mul6.i.i.us.us.5 = fmul float %29, %477 - store float %mul6.i.i.us.us.5, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.5 = shl i64 %add1.i.i.i.us.us.5, 32 - %478 = ashr exact i64 %sext.i.i.us.us.5, 32 - br label %for.body.i.i.us.us.5 - -for.body.i.i.us.us.5: ; preds = %for.body.i.i.us.us.5, %if.then.i.i.us.us.5 - %indvars.iv.next.i.i3.us.us.5 = phi i64 [ %indvars.iv.next.i.i.us.us.5, %for.body.i.i.us.us.5 ], [ 0, %if.then.i.i.us.us.5 ] - %479 = phi float [ %485, %for.body.i.i.us.us.5 ], [ %mul6.i.i.us.us.5, %if.then.i.i.us.us.5 ] - %480 = add nsw i64 %indvars.iv.next.i.i3.us.us.5, %476 - %arrayidx12.i.i.us.us.5 = getelementptr inbounds float, float* %7, i64 %480 - %481 = load float, float* %arrayidx12.i.i.us.us.5, align 4, !tbaa !12 - %482 = mul nsw i64 %indvars.iv.next.i.i3.us.us.5, %30 - %483 = add nsw i64 %482, %478 - %arrayidx16.i.i.us.us.5 = getelementptr inbounds float, float* %10, i64 %483 - %484 = load float, float* %arrayidx16.i.i.us.us.5, align 4, !tbaa !12 - %485 = tail call float @llvm.fmuladd.f32(float %481, float %484, float %479) #2 - store float %485, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.5 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.5, 1 - %exitcond.not.i.i.us.us.5 = icmp eq i64 %indvars.iv.next.i.i.us.us.5, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.5, label %if.end.i.i.us.us.5.loopexit, label %for.body.i.i.us.us.5, !llvm.loop !21 - -if.end.i.i.us.us.5.loopexit: ; preds = %for.body.i.i.us.us.5 - br label %if.end.i.i.us.us.5 - -if.end.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.loopexit, %pregion_for_entry.entry.i.i.us.us.5 - %486 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, 1 - %exitcond.not.5 = icmp eq i64 %486, 32 - br i1 %exitcond.not.5, label %pregion_for_end.i.i.us.5.loopexit, label %pregion_for_entry.entry.i.i.us.us.5, !llvm.loop !19 - -pregion_for_end.i.i.us.5.loopexit: ; preds = %if.end.i.i.us.us.5 - br label %pregion_for_end.i.i.us.5 - -pregion_for_end.i.i.us.5: ; preds = %pregion_for_end.i.i.us.5.loopexit, %pregion_for_end.i.i.us.4 - %487 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.6 = or i32 %487, 6 - %cmp.i.i.us.6 = icmp sgt i32 %17, %conv2.i.i.us.6 - %mul.i.i.us.6 = mul nsw i32 %25, %conv2.i.i.us.6 - %mul9.i.i.us.6 = mul nsw i32 %21, %conv2.i.i.us.6 - %488 = sext i32 %mul9.i.i.us.6 to i64 - br i1 %cmp.i.i.us.6, label %pregion_for_entry.entry.i.i.us.us.6.preheader, label %pregion_for_end.i.i.us.6 - -pregion_for_entry.entry.i.i.us.us.6.preheader: ; preds = %pregion_for_end.i.i.us.5 - br label %pregion_for_entry.entry.i.i.us.us.6 - -pregion_for_entry.entry.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6, %pregion_for_entry.entry.i.i.us.us.6.preheader - %_local_id_x.i.0.us.us.6 = phi i64 [ %498, %if.end.i.i.us.us.6 ], [ 0, %pregion_for_entry.entry.i.i.us.us.6.preheader ] - %add1.i.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, %mul.i.i.i - %conv.i.i.us.us.6 = trunc i64 %add1.i.i.i.us.us.6 to i32 - %cmp4.i.i.us.us.6 = icmp sgt i32 %25, %conv.i.i.us.us.6 - br i1 %cmp4.i.i.us.us.6, label %if.then.i.i.us.us.6, label %if.end.i.i.us.us.6 - -if.then.i.i.us.us.6: ; preds = %pregion_for_entry.entry.i.i.us.us.6 - %add.i.i.us.us.6 = add nsw i32 %mul.i.i.us.6, %conv.i.i.us.us.6 - %idxprom.i.i.us.us.6 = sext i32 %add.i.i.us.us.6 to i64 - %arrayidx.i.i.us.us.6 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.6 - %489 = load float, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12 - %mul6.i.i.us.us.6 = fmul float %29, %489 - store float %mul6.i.i.us.us.6, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.6 = shl i64 %add1.i.i.i.us.us.6, 32 - %490 = ashr exact i64 %sext.i.i.us.us.6, 32 - br label %for.body.i.i.us.us.6 - -for.body.i.i.us.us.6: ; preds = %for.body.i.i.us.us.6, %if.then.i.i.us.us.6 - %indvars.iv.next.i.i3.us.us.6 = phi i64 [ %indvars.iv.next.i.i.us.us.6, %for.body.i.i.us.us.6 ], [ 0, %if.then.i.i.us.us.6 ] - %491 = phi float [ %497, %for.body.i.i.us.us.6 ], [ %mul6.i.i.us.us.6, %if.then.i.i.us.us.6 ] - %492 = add nsw i64 %indvars.iv.next.i.i3.us.us.6, %488 - %arrayidx12.i.i.us.us.6 = getelementptr inbounds float, float* %7, i64 %492 - %493 = load float, float* %arrayidx12.i.i.us.us.6, align 4, !tbaa !12 - %494 = mul nsw i64 %indvars.iv.next.i.i3.us.us.6, %30 - %495 = add nsw i64 %494, %490 - %arrayidx16.i.i.us.us.6 = getelementptr inbounds float, float* %10, i64 %495 - %496 = load float, float* %arrayidx16.i.i.us.us.6, align 4, !tbaa !12 - %497 = tail call float @llvm.fmuladd.f32(float %493, float %496, float %491) #2 - store float %497, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.6 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.6, 1 - %exitcond.not.i.i.us.us.6 = icmp eq i64 %indvars.iv.next.i.i.us.us.6, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.6, label %if.end.i.i.us.us.6.loopexit, label %for.body.i.i.us.us.6, !llvm.loop !21 - -if.end.i.i.us.us.6.loopexit: ; preds = %for.body.i.i.us.us.6 - br label %if.end.i.i.us.us.6 - -if.end.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.loopexit, %pregion_for_entry.entry.i.i.us.us.6 - %498 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, 1 - %exitcond.not.6 = icmp eq i64 %498, 32 - br i1 %exitcond.not.6, label %pregion_for_end.i.i.us.6.loopexit, label %pregion_for_entry.entry.i.i.us.us.6, !llvm.loop !19 - -pregion_for_end.i.i.us.6.loopexit: ; preds = %if.end.i.i.us.us.6 - br label %pregion_for_end.i.i.us.6 - -pregion_for_end.i.i.us.6: ; preds = %pregion_for_end.i.i.us.6.loopexit, %pregion_for_end.i.i.us.5 - %499 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.7 = or i32 %499, 7 - %cmp.i.i.us.7 = icmp sgt i32 %17, %conv2.i.i.us.7 - %mul.i.i.us.7 = mul nsw i32 %25, %conv2.i.i.us.7 - %mul9.i.i.us.7 = mul nsw i32 %21, %conv2.i.i.us.7 - %500 = sext i32 %mul9.i.i.us.7 to i64 - br i1 %cmp.i.i.us.7, label %pregion_for_entry.entry.i.i.us.us.7.preheader, label %_pocl_kernel_mm2_kernel2.exit - -pregion_for_entry.entry.i.i.us.us.7.preheader: ; preds = %pregion_for_end.i.i.us.6 - br label %pregion_for_entry.entry.i.i.us.us.7 - -pregion_for_entry.entry.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7, %pregion_for_entry.entry.i.i.us.us.7.preheader - %_local_id_x.i.0.us.us.7 = phi i64 [ %510, %if.end.i.i.us.us.7 ], [ 0, %pregion_for_entry.entry.i.i.us.us.7.preheader ] - %add1.i.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, %mul.i.i.i - %conv.i.i.us.us.7 = trunc i64 %add1.i.i.i.us.us.7 to i32 - %cmp4.i.i.us.us.7 = icmp sgt i32 %25, %conv.i.i.us.us.7 - br i1 %cmp4.i.i.us.us.7, label %if.then.i.i.us.us.7, label %if.end.i.i.us.us.7 - -if.then.i.i.us.us.7: ; preds = %pregion_for_entry.entry.i.i.us.us.7 - %add.i.i.us.us.7 = add nsw i32 %mul.i.i.us.7, %conv.i.i.us.us.7 - %idxprom.i.i.us.us.7 = sext i32 %add.i.i.us.us.7 to i64 - %arrayidx.i.i.us.us.7 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.7 - %501 = load float, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12 - %mul6.i.i.us.us.7 = fmul float %29, %501 - store float %mul6.i.i.us.us.7, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.7 = shl i64 %add1.i.i.i.us.us.7, 32 - %502 = ashr exact i64 %sext.i.i.us.us.7, 32 - br label %for.body.i.i.us.us.7 - -for.body.i.i.us.us.7: ; preds = %for.body.i.i.us.us.7, %if.then.i.i.us.us.7 - %indvars.iv.next.i.i3.us.us.7 = phi i64 [ %indvars.iv.next.i.i.us.us.7, %for.body.i.i.us.us.7 ], [ 0, %if.then.i.i.us.us.7 ] - %503 = phi float [ %509, %for.body.i.i.us.us.7 ], [ %mul6.i.i.us.us.7, %if.then.i.i.us.us.7 ] - %504 = add nsw i64 %indvars.iv.next.i.i3.us.us.7, %500 - %arrayidx12.i.i.us.us.7 = getelementptr inbounds float, float* %7, i64 %504 - %505 = load float, float* %arrayidx12.i.i.us.us.7, align 4, !tbaa !12 - %506 = mul nsw i64 %indvars.iv.next.i.i3.us.us.7, %30 - %507 = add nsw i64 %506, %502 - %arrayidx16.i.i.us.us.7 = getelementptr inbounds float, float* %10, i64 %507 - %508 = load float, float* %arrayidx16.i.i.us.us.7, align 4, !tbaa !12 - %509 = tail call float @llvm.fmuladd.f32(float %505, float %508, float %503) #2 - store float %509, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.7 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.7, 1 - %exitcond.not.i.i.us.us.7 = icmp eq i64 %indvars.iv.next.i.i.us.us.7, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.7, label %if.end.i.i.us.us.7.loopexit, label %for.body.i.i.us.us.7, !llvm.loop !21 - -if.end.i.i.us.us.7.loopexit: ; preds = %for.body.i.i.us.us.7 - br label %if.end.i.i.us.us.7 - -if.end.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.loopexit, %pregion_for_entry.entry.i.i.us.us.7 - %510 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, 1 - %exitcond.not.7 = icmp eq i64 %510, 32 - br i1 %exitcond.not.7, label %_pocl_kernel_mm2_kernel2.exit.loopexit, label %pregion_for_entry.entry.i.i.us.us.7, !llvm.loop !19 - -if.then.i.i.us.7.1: ; preds = %if.end.i.i.us.7 - %add.i.i.us.7.1 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7.1 - %idxprom.i.i.us.7.1 = sext i32 %add.i.i.us.7.1 to i64 - %arrayidx.i.i.us.7.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.7.1 - %511 = load float, float* %arrayidx.i.i.us.7.1, align 4, !tbaa !12 - %mul6.i.i.us.7.1 = fmul float %29, %511 - store float %mul6.i.i.us.7.1, float* %arrayidx.i.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7.1 - -if.end.i.i.us.7.1: ; preds = %if.then.i.i.us.7.1, %if.end.i.i.us.7 - %512 = or i64 %_local_id_x.i.0.us.7, 2 - %add1.i.i.i.us.7.2 = add nuw nsw i64 %512, %mul.i.i.i - %conv.i.i.us.7.2 = trunc i64 %add1.i.i.i.us.7.2 to i32 - %cmp4.i.i.us.7.2 = icmp sgt i32 %25, %conv.i.i.us.7.2 - br i1 %cmp4.i.i.us.7.2, label %if.then.i.i.us.7.2, label %if.end.i.i.us.7.2 - -if.then.i.i.us.7.2: ; preds = %if.end.i.i.us.7.1 - %add.i.i.us.7.2 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7.2 - %idxprom.i.i.us.7.2 = sext i32 %add.i.i.us.7.2 to i64 - %arrayidx.i.i.us.7.2 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.7.2 - %513 = load float, float* %arrayidx.i.i.us.7.2, align 4, !tbaa !12 - %mul6.i.i.us.7.2 = fmul float %29, %513 - store float %mul6.i.i.us.7.2, float* %arrayidx.i.i.us.7.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7.2 - -if.end.i.i.us.7.2: ; preds = %if.then.i.i.us.7.2, %if.end.i.i.us.7.1 - %514 = or i64 %_local_id_x.i.0.us.7, 3 - %add1.i.i.i.us.7.3 = add nuw nsw i64 %514, %mul.i.i.i - %conv.i.i.us.7.3 = trunc i64 %add1.i.i.i.us.7.3 to i32 - %cmp4.i.i.us.7.3 = icmp sgt i32 %25, %conv.i.i.us.7.3 - br i1 %cmp4.i.i.us.7.3, label %if.then.i.i.us.7.3, label %if.end.i.i.us.7.3 - -if.then.i.i.us.7.3: ; preds = %if.end.i.i.us.7.2 - %add.i.i.us.7.3 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7.3 - %idxprom.i.i.us.7.3 = sext i32 %add.i.i.us.7.3 to i64 - %arrayidx.i.i.us.7.3 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.7.3 - %515 = load float, float* %arrayidx.i.i.us.7.3, align 4, !tbaa !12 - %mul6.i.i.us.7.3 = fmul float %29, %515 - store float %mul6.i.i.us.7.3, float* %arrayidx.i.i.us.7.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7.3 - -if.end.i.i.us.7.3: ; preds = %if.then.i.i.us.7.3, %if.end.i.i.us.7.2 - %516 = add nuw nsw i64 %_local_id_x.i.0.us.7, 4 - %exitcond33.7.not.3 = icmp eq i64 %516, 32 - br i1 %exitcond33.7.not.3, label %_pocl_kernel_mm2_kernel2.exit.loopexit237, label %pregion_for_entry.entry.i.i.us.7, !llvm.loop !40 - -if.then.i.i.us.6.1: ; preds = %if.end.i.i.us.6 - %add.i.i.us.6.1 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6.1 - %idxprom.i.i.us.6.1 = sext i32 %add.i.i.us.6.1 to i64 - %arrayidx.i.i.us.6.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.6.1 - %517 = load float, float* %arrayidx.i.i.us.6.1, align 4, !tbaa !12 - %mul6.i.i.us.6.1 = fmul float %29, %517 - store float %mul6.i.i.us.6.1, float* %arrayidx.i.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6.1 - -if.end.i.i.us.6.1: ; preds = %if.then.i.i.us.6.1, %if.end.i.i.us.6 - %518 = or i64 %_local_id_x.i.0.us.6, 2 - %add1.i.i.i.us.6.2 = add nuw nsw i64 %518, %mul.i.i.i - %conv.i.i.us.6.2 = trunc i64 %add1.i.i.i.us.6.2 to i32 - %cmp4.i.i.us.6.2 = icmp sgt i32 %25, %conv.i.i.us.6.2 - br i1 %cmp4.i.i.us.6.2, label %if.then.i.i.us.6.2, label %if.end.i.i.us.6.2 - -if.then.i.i.us.6.2: ; preds = %if.end.i.i.us.6.1 - %add.i.i.us.6.2 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6.2 - %idxprom.i.i.us.6.2 = sext i32 %add.i.i.us.6.2 to i64 - %arrayidx.i.i.us.6.2 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.6.2 - %519 = load float, float* %arrayidx.i.i.us.6.2, align 4, !tbaa !12 - %mul6.i.i.us.6.2 = fmul float %29, %519 - store float %mul6.i.i.us.6.2, float* %arrayidx.i.i.us.6.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6.2 - -if.end.i.i.us.6.2: ; preds = %if.then.i.i.us.6.2, %if.end.i.i.us.6.1 - %520 = or i64 %_local_id_x.i.0.us.6, 3 - %add1.i.i.i.us.6.3 = add nuw nsw i64 %520, %mul.i.i.i - %conv.i.i.us.6.3 = trunc i64 %add1.i.i.i.us.6.3 to i32 - %cmp4.i.i.us.6.3 = icmp sgt i32 %25, %conv.i.i.us.6.3 - br i1 %cmp4.i.i.us.6.3, label %if.then.i.i.us.6.3, label %if.end.i.i.us.6.3 - -if.then.i.i.us.6.3: ; preds = %if.end.i.i.us.6.2 - %add.i.i.us.6.3 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6.3 - %idxprom.i.i.us.6.3 = sext i32 %add.i.i.us.6.3 to i64 - %arrayidx.i.i.us.6.3 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.6.3 - %521 = load float, float* %arrayidx.i.i.us.6.3, align 4, !tbaa !12 - %mul6.i.i.us.6.3 = fmul float %29, %521 - store float %mul6.i.i.us.6.3, float* %arrayidx.i.i.us.6.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6.3 - -if.end.i.i.us.6.3: ; preds = %if.then.i.i.us.6.3, %if.end.i.i.us.6.2 - %522 = add nuw nsw i64 %_local_id_x.i.0.us.6, 4 - %exitcond33.6.not.3 = icmp eq i64 %522, 32 - br i1 %exitcond33.6.not.3, label %pregion_for_end.i.i.6.loopexit, label %pregion_for_entry.entry.i.i.us.6, !llvm.loop !41 - -if.then.i.i.us.5.1: ; preds = %if.end.i.i.us.5 - %add.i.i.us.5.1 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5.1 - %idxprom.i.i.us.5.1 = sext i32 %add.i.i.us.5.1 to i64 - %arrayidx.i.i.us.5.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.5.1 - %523 = load float, float* %arrayidx.i.i.us.5.1, align 4, !tbaa !12 - %mul6.i.i.us.5.1 = fmul float %29, %523 - store float %mul6.i.i.us.5.1, float* %arrayidx.i.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5.1 - -if.end.i.i.us.5.1: ; preds = %if.then.i.i.us.5.1, %if.end.i.i.us.5 - %524 = or i64 %_local_id_x.i.0.us.5, 2 - %add1.i.i.i.us.5.2 = add nuw nsw i64 %524, %mul.i.i.i - %conv.i.i.us.5.2 = trunc i64 %add1.i.i.i.us.5.2 to i32 - %cmp4.i.i.us.5.2 = icmp sgt i32 %25, %conv.i.i.us.5.2 - br i1 %cmp4.i.i.us.5.2, label %if.then.i.i.us.5.2, label %if.end.i.i.us.5.2 - -if.then.i.i.us.5.2: ; preds = %if.end.i.i.us.5.1 - %add.i.i.us.5.2 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5.2 - %idxprom.i.i.us.5.2 = sext i32 %add.i.i.us.5.2 to i64 - %arrayidx.i.i.us.5.2 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.5.2 - %525 = load float, float* %arrayidx.i.i.us.5.2, align 4, !tbaa !12 - %mul6.i.i.us.5.2 = fmul float %29, %525 - store float %mul6.i.i.us.5.2, float* %arrayidx.i.i.us.5.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5.2 - -if.end.i.i.us.5.2: ; preds = %if.then.i.i.us.5.2, %if.end.i.i.us.5.1 - %526 = or i64 %_local_id_x.i.0.us.5, 3 - %add1.i.i.i.us.5.3 = add nuw nsw i64 %526, %mul.i.i.i - %conv.i.i.us.5.3 = trunc i64 %add1.i.i.i.us.5.3 to i32 - %cmp4.i.i.us.5.3 = icmp sgt i32 %25, %conv.i.i.us.5.3 - br i1 %cmp4.i.i.us.5.3, label %if.then.i.i.us.5.3, label %if.end.i.i.us.5.3 - -if.then.i.i.us.5.3: ; preds = %if.end.i.i.us.5.2 - %add.i.i.us.5.3 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5.3 - %idxprom.i.i.us.5.3 = sext i32 %add.i.i.us.5.3 to i64 - %arrayidx.i.i.us.5.3 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.5.3 - %527 = load float, float* %arrayidx.i.i.us.5.3, align 4, !tbaa !12 - %mul6.i.i.us.5.3 = fmul float %29, %527 - store float %mul6.i.i.us.5.3, float* %arrayidx.i.i.us.5.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5.3 - -if.end.i.i.us.5.3: ; preds = %if.then.i.i.us.5.3, %if.end.i.i.us.5.2 - %528 = add nuw nsw i64 %_local_id_x.i.0.us.5, 4 - %exitcond33.5.not.3 = icmp eq i64 %528, 32 - br i1 %exitcond33.5.not.3, label %pregion_for_end.i.i.5.loopexit, label %pregion_for_entry.entry.i.i.us.5, !llvm.loop !42 - -if.then.i.i.us.4.1: ; preds = %if.end.i.i.us.4 - %add.i.i.us.4.1 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4.1 - %idxprom.i.i.us.4.1 = sext i32 %add.i.i.us.4.1 to i64 - %arrayidx.i.i.us.4.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.4.1 - %529 = load float, float* %arrayidx.i.i.us.4.1, align 4, !tbaa !12 - %mul6.i.i.us.4.1 = fmul float %29, %529 - store float %mul6.i.i.us.4.1, float* %arrayidx.i.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4.1 - -if.end.i.i.us.4.1: ; preds = %if.then.i.i.us.4.1, %if.end.i.i.us.4 - %530 = or i64 %_local_id_x.i.0.us.4, 2 - %add1.i.i.i.us.4.2 = add nuw nsw i64 %530, %mul.i.i.i - %conv.i.i.us.4.2 = trunc i64 %add1.i.i.i.us.4.2 to i32 - %cmp4.i.i.us.4.2 = icmp sgt i32 %25, %conv.i.i.us.4.2 - br i1 %cmp4.i.i.us.4.2, label %if.then.i.i.us.4.2, label %if.end.i.i.us.4.2 - -if.then.i.i.us.4.2: ; preds = %if.end.i.i.us.4.1 - %add.i.i.us.4.2 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4.2 - %idxprom.i.i.us.4.2 = sext i32 %add.i.i.us.4.2 to i64 - %arrayidx.i.i.us.4.2 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.4.2 - %531 = load float, float* %arrayidx.i.i.us.4.2, align 4, !tbaa !12 - %mul6.i.i.us.4.2 = fmul float %29, %531 - store float %mul6.i.i.us.4.2, float* %arrayidx.i.i.us.4.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4.2 - -if.end.i.i.us.4.2: ; preds = %if.then.i.i.us.4.2, %if.end.i.i.us.4.1 - %532 = or i64 %_local_id_x.i.0.us.4, 3 - %add1.i.i.i.us.4.3 = add nuw nsw i64 %532, %mul.i.i.i - %conv.i.i.us.4.3 = trunc i64 %add1.i.i.i.us.4.3 to i32 - %cmp4.i.i.us.4.3 = icmp sgt i32 %25, %conv.i.i.us.4.3 - br i1 %cmp4.i.i.us.4.3, label %if.then.i.i.us.4.3, label %if.end.i.i.us.4.3 - -if.then.i.i.us.4.3: ; preds = %if.end.i.i.us.4.2 - %add.i.i.us.4.3 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4.3 - %idxprom.i.i.us.4.3 = sext i32 %add.i.i.us.4.3 to i64 - %arrayidx.i.i.us.4.3 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.4.3 - %533 = load float, float* %arrayidx.i.i.us.4.3, align 4, !tbaa !12 - %mul6.i.i.us.4.3 = fmul float %29, %533 - store float %mul6.i.i.us.4.3, float* %arrayidx.i.i.us.4.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4.3 - -if.end.i.i.us.4.3: ; preds = %if.then.i.i.us.4.3, %if.end.i.i.us.4.2 - %534 = add nuw nsw i64 %_local_id_x.i.0.us.4, 4 - %exitcond33.4.not.3 = icmp eq i64 %534, 32 - br i1 %exitcond33.4.not.3, label %pregion_for_end.i.i.4.loopexit, label %pregion_for_entry.entry.i.i.us.4, !llvm.loop !43 - -if.then.i.i.us.3.1: ; preds = %if.end.i.i.us.3 - %add.i.i.us.3.1 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3.1 - %idxprom.i.i.us.3.1 = sext i32 %add.i.i.us.3.1 to i64 - %arrayidx.i.i.us.3.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.3.1 - %535 = load float, float* %arrayidx.i.i.us.3.1, align 4, !tbaa !12 - %mul6.i.i.us.3.1 = fmul float %29, %535 - store float %mul6.i.i.us.3.1, float* %arrayidx.i.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3.1 - -if.end.i.i.us.3.1: ; preds = %if.then.i.i.us.3.1, %if.end.i.i.us.3 - %536 = or i64 %_local_id_x.i.0.us.3, 2 - %add1.i.i.i.us.3.2 = add nuw nsw i64 %536, %mul.i.i.i - %conv.i.i.us.3.2 = trunc i64 %add1.i.i.i.us.3.2 to i32 - %cmp4.i.i.us.3.2 = icmp sgt i32 %25, %conv.i.i.us.3.2 - br i1 %cmp4.i.i.us.3.2, label %if.then.i.i.us.3.2, label %if.end.i.i.us.3.2 - -if.then.i.i.us.3.2: ; preds = %if.end.i.i.us.3.1 - %add.i.i.us.3.2 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3.2 - %idxprom.i.i.us.3.2 = sext i32 %add.i.i.us.3.2 to i64 - %arrayidx.i.i.us.3.2 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.3.2 - %537 = load float, float* %arrayidx.i.i.us.3.2, align 4, !tbaa !12 - %mul6.i.i.us.3.2 = fmul float %29, %537 - store float %mul6.i.i.us.3.2, float* %arrayidx.i.i.us.3.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3.2 - -if.end.i.i.us.3.2: ; preds = %if.then.i.i.us.3.2, %if.end.i.i.us.3.1 - %538 = or i64 %_local_id_x.i.0.us.3, 3 - %add1.i.i.i.us.3.3 = add nuw nsw i64 %538, %mul.i.i.i - %conv.i.i.us.3.3 = trunc i64 %add1.i.i.i.us.3.3 to i32 - %cmp4.i.i.us.3.3 = icmp sgt i32 %25, %conv.i.i.us.3.3 - br i1 %cmp4.i.i.us.3.3, label %if.then.i.i.us.3.3, label %if.end.i.i.us.3.3 - -if.then.i.i.us.3.3: ; preds = %if.end.i.i.us.3.2 - %add.i.i.us.3.3 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3.3 - %idxprom.i.i.us.3.3 = sext i32 %add.i.i.us.3.3 to i64 - %arrayidx.i.i.us.3.3 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.3.3 - %539 = load float, float* %arrayidx.i.i.us.3.3, align 4, !tbaa !12 - %mul6.i.i.us.3.3 = fmul float %29, %539 - store float %mul6.i.i.us.3.3, float* %arrayidx.i.i.us.3.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3.3 - -if.end.i.i.us.3.3: ; preds = %if.then.i.i.us.3.3, %if.end.i.i.us.3.2 - %540 = add nuw nsw i64 %_local_id_x.i.0.us.3, 4 - %exitcond33.3.not.3 = icmp eq i64 %540, 32 - br i1 %exitcond33.3.not.3, label %pregion_for_end.i.i.3.loopexit, label %pregion_for_entry.entry.i.i.us.3, !llvm.loop !44 - -if.then.i.i.us.2.1: ; preds = %if.end.i.i.us.2 - %add.i.i.us.2.1 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2.1 - %idxprom.i.i.us.2.1 = sext i32 %add.i.i.us.2.1 to i64 - %arrayidx.i.i.us.2.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.2.1 - %541 = load float, float* %arrayidx.i.i.us.2.1, align 4, !tbaa !12 - %mul6.i.i.us.2.1 = fmul float %29, %541 - store float %mul6.i.i.us.2.1, float* %arrayidx.i.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2.1 - -if.end.i.i.us.2.1: ; preds = %if.then.i.i.us.2.1, %if.end.i.i.us.2 - %542 = or i64 %_local_id_x.i.0.us.2, 2 - %add1.i.i.i.us.2.2 = add nuw nsw i64 %542, %mul.i.i.i - %conv.i.i.us.2.2 = trunc i64 %add1.i.i.i.us.2.2 to i32 - %cmp4.i.i.us.2.2 = icmp sgt i32 %25, %conv.i.i.us.2.2 - br i1 %cmp4.i.i.us.2.2, label %if.then.i.i.us.2.2, label %if.end.i.i.us.2.2 - -if.then.i.i.us.2.2: ; preds = %if.end.i.i.us.2.1 - %add.i.i.us.2.2 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2.2 - %idxprom.i.i.us.2.2 = sext i32 %add.i.i.us.2.2 to i64 - %arrayidx.i.i.us.2.2 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.2.2 - %543 = load float, float* %arrayidx.i.i.us.2.2, align 4, !tbaa !12 - %mul6.i.i.us.2.2 = fmul float %29, %543 - store float %mul6.i.i.us.2.2, float* %arrayidx.i.i.us.2.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2.2 - -if.end.i.i.us.2.2: ; preds = %if.then.i.i.us.2.2, %if.end.i.i.us.2.1 - %544 = or i64 %_local_id_x.i.0.us.2, 3 - %add1.i.i.i.us.2.3 = add nuw nsw i64 %544, %mul.i.i.i - %conv.i.i.us.2.3 = trunc i64 %add1.i.i.i.us.2.3 to i32 - %cmp4.i.i.us.2.3 = icmp sgt i32 %25, %conv.i.i.us.2.3 - br i1 %cmp4.i.i.us.2.3, label %if.then.i.i.us.2.3, label %if.end.i.i.us.2.3 - -if.then.i.i.us.2.3: ; preds = %if.end.i.i.us.2.2 - %add.i.i.us.2.3 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2.3 - %idxprom.i.i.us.2.3 = sext i32 %add.i.i.us.2.3 to i64 - %arrayidx.i.i.us.2.3 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.2.3 - %545 = load float, float* %arrayidx.i.i.us.2.3, align 4, !tbaa !12 - %mul6.i.i.us.2.3 = fmul float %29, %545 - store float %mul6.i.i.us.2.3, float* %arrayidx.i.i.us.2.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2.3 - -if.end.i.i.us.2.3: ; preds = %if.then.i.i.us.2.3, %if.end.i.i.us.2.2 - %546 = add nuw nsw i64 %_local_id_x.i.0.us.2, 4 - %exitcond33.2.not.3 = icmp eq i64 %546, 32 - br i1 %exitcond33.2.not.3, label %pregion_for_end.i.i.2.loopexit, label %pregion_for_entry.entry.i.i.us.2, !llvm.loop !45 - -if.then.i.i.us.1.1: ; preds = %if.end.i.i.us.1 - %add.i.i.us.1.1 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1.1 - %idxprom.i.i.us.1.1 = sext i32 %add.i.i.us.1.1 to i64 - %arrayidx.i.i.us.1.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.1.1 - %547 = load float, float* %arrayidx.i.i.us.1.1, align 4, !tbaa !12 - %mul6.i.i.us.1.1 = fmul float %29, %547 - store float %mul6.i.i.us.1.1, float* %arrayidx.i.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1.1 - -if.end.i.i.us.1.1: ; preds = %if.then.i.i.us.1.1, %if.end.i.i.us.1 - %548 = or i64 %_local_id_x.i.0.us.1, 2 - %add1.i.i.i.us.1.2 = add nuw nsw i64 %548, %mul.i.i.i - %conv.i.i.us.1.2 = trunc i64 %add1.i.i.i.us.1.2 to i32 - %cmp4.i.i.us.1.2 = icmp sgt i32 %25, %conv.i.i.us.1.2 - br i1 %cmp4.i.i.us.1.2, label %if.then.i.i.us.1.2, label %if.end.i.i.us.1.2 - -if.then.i.i.us.1.2: ; preds = %if.end.i.i.us.1.1 - %add.i.i.us.1.2 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1.2 - %idxprom.i.i.us.1.2 = sext i32 %add.i.i.us.1.2 to i64 - %arrayidx.i.i.us.1.2 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.1.2 - %549 = load float, float* %arrayidx.i.i.us.1.2, align 4, !tbaa !12 - %mul6.i.i.us.1.2 = fmul float %29, %549 - store float %mul6.i.i.us.1.2, float* %arrayidx.i.i.us.1.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1.2 - -if.end.i.i.us.1.2: ; preds = %if.then.i.i.us.1.2, %if.end.i.i.us.1.1 - %550 = or i64 %_local_id_x.i.0.us.1, 3 - %add1.i.i.i.us.1.3 = add nuw nsw i64 %550, %mul.i.i.i - %conv.i.i.us.1.3 = trunc i64 %add1.i.i.i.us.1.3 to i32 - %cmp4.i.i.us.1.3 = icmp sgt i32 %25, %conv.i.i.us.1.3 - br i1 %cmp4.i.i.us.1.3, label %if.then.i.i.us.1.3, label %if.end.i.i.us.1.3 - -if.then.i.i.us.1.3: ; preds = %if.end.i.i.us.1.2 - %add.i.i.us.1.3 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1.3 - %idxprom.i.i.us.1.3 = sext i32 %add.i.i.us.1.3 to i64 - %arrayidx.i.i.us.1.3 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.1.3 - %551 = load float, float* %arrayidx.i.i.us.1.3, align 4, !tbaa !12 - %mul6.i.i.us.1.3 = fmul float %29, %551 - store float %mul6.i.i.us.1.3, float* %arrayidx.i.i.us.1.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1.3 - -if.end.i.i.us.1.3: ; preds = %if.then.i.i.us.1.3, %if.end.i.i.us.1.2 - %552 = add nuw nsw i64 %_local_id_x.i.0.us.1, 4 - %exitcond33.1.not.3 = icmp eq i64 %552, 32 - br i1 %exitcond33.1.not.3, label %pregion_for_end.i.i.1.loopexit, label %pregion_for_entry.entry.i.i.us.1, !llvm.loop !46 - -if.then.i.i.us.1213: ; preds = %if.end.i.i.us - %add.i.i.us.1209 = add nsw i32 %mul.i.i.us, %conv.i.i.us.1206 - %idxprom.i.i.us.1210 = sext i32 %add.i.i.us.1209 to i64 - %arrayidx.i.i.us.1211 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.1210 - %553 = load float, float* %arrayidx.i.i.us.1211, align 4, !tbaa !12 - %mul6.i.i.us.1212 = fmul float %29, %553 - store float %mul6.i.i.us.1212, float* %arrayidx.i.i.us.1211, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1214 - -if.end.i.i.us.1214: ; preds = %if.then.i.i.us.1213, %if.end.i.i.us - %554 = or i64 %_local_id_x.i.0.us, 2 - %add1.i.i.i.us.2216 = add nuw nsw i64 %554, %mul.i.i.i - %conv.i.i.us.2217 = trunc i64 %add1.i.i.i.us.2216 to i32 - %cmp4.i.i.us.2218 = icmp sgt i32 %25, %conv.i.i.us.2217 - br i1 %cmp4.i.i.us.2218, label %if.then.i.i.us.2224, label %if.end.i.i.us.2225 - -if.then.i.i.us.2224: ; preds = %if.end.i.i.us.1214 - %add.i.i.us.2220 = add nsw i32 %mul.i.i.us, %conv.i.i.us.2217 - %idxprom.i.i.us.2221 = sext i32 %add.i.i.us.2220 to i64 - %arrayidx.i.i.us.2222 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.2221 - %555 = load float, float* %arrayidx.i.i.us.2222, align 4, !tbaa !12 - %mul6.i.i.us.2223 = fmul float %29, %555 - store float %mul6.i.i.us.2223, float* %arrayidx.i.i.us.2222, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2225 - -if.end.i.i.us.2225: ; preds = %if.then.i.i.us.2224, %if.end.i.i.us.1214 - %556 = or i64 %_local_id_x.i.0.us, 3 - %add1.i.i.i.us.3227 = add nuw nsw i64 %556, %mul.i.i.i - %conv.i.i.us.3228 = trunc i64 %add1.i.i.i.us.3227 to i32 - %cmp4.i.i.us.3229 = icmp sgt i32 %25, %conv.i.i.us.3228 - br i1 %cmp4.i.i.us.3229, label %if.then.i.i.us.3235, label %if.end.i.i.us.3236 - -if.then.i.i.us.3235: ; preds = %if.end.i.i.us.2225 - %add.i.i.us.3231 = add nsw i32 %mul.i.i.us, %conv.i.i.us.3228 - %idxprom.i.i.us.3232 = sext i32 %add.i.i.us.3231 to i64 - %arrayidx.i.i.us.3233 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.3232 - %557 = load float, float* %arrayidx.i.i.us.3233, align 4, !tbaa !12 - %mul6.i.i.us.3234 = fmul float %29, %557 - store float %mul6.i.i.us.3234, float* %arrayidx.i.i.us.3233, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3236 - -if.end.i.i.us.3236: ; preds = %if.then.i.i.us.3235, %if.end.i.i.us.2225 - %558 = add nuw nsw i64 %_local_id_x.i.0.us, 4 - %exitcond33.not.3 = icmp eq i64 %558, 32 - br i1 %exitcond33.not.3, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !47 -} - -; Function Attrs: argmemonly nounwind readonly willreturn -declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32 immarg, <8 x i1>, <8 x float>) #3 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>) #4 - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } -attributes #3 = { argmemonly nounwind readonly willreturn } -attributes #4 = { argmemonly nounwind willreturn } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"int", !"int", !"int", !"int", !"DATA_TYPE", !"DATA_TYPE"} -!8 = !{!"float*", !"float*", !"float*", !"int", !"int", !"int", !"int", !"float", !"float"} -!9 = !{!"", !"", !"", !"", !"", !"", !"", !"", !""} -!10 = !{!"tmp", !"C", !"D", !"ni", !"nj", !"nk", !"nl", !"alpha", !"beta"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17, !18} -!17 = distinct !{} -!18 = distinct !{} -!19 = distinct !{!19, !20} -!20 = !{!"llvm.loop.parallel_accesses", !17} -!21 = distinct !{!21, !22} -!22 = !{!"llvm.loop.unroll.disable"} -!23 = distinct !{!23, !20, !24} -!24 = !{!"llvm.loop.isvectorized", i32 1} -!25 = distinct !{!25, !20, !24} -!26 = distinct !{!26, !20, !24} -!27 = distinct !{!27, !20, !24} -!28 = distinct !{!28, !20, !24} -!29 = distinct !{!29, !20, !24} -!30 = distinct !{!30, !20, !24} -!31 = distinct !{!31, !20, !24} -!32 = distinct !{!32, !20, !24} -!33 = distinct !{!33, !20, !24} -!34 = distinct !{!34, !20, !24} -!35 = distinct !{!35, !20, !24} -!36 = distinct !{!36, !20, !24} -!37 = distinct !{!37, !20, !24} -!38 = distinct !{!38, !20, !24} -!39 = distinct !{!39, !20, !24} -!40 = distinct !{!40, !20, !24} -!41 = distinct !{!41, !20, !24} -!42 = distinct !{!42, !20, !24} -!43 = distinct !{!43, !20, !24} -!44 = distinct !{!44, !20, !24} -!45 = distinct !{!45, !20, !24} -!46 = distinct !{!46, !20, !24} -!47 = distinct !{!47, !20, !24} diff --git a/pocl_irs/3DConvolution.ll b/pocl_irs/3DConvolution.ll deleted file mode 100644 index dba6afb..0000000 --- a/pocl_irs/3DConvolution.ll +++ /dev/null @@ -1,1323 +0,0 @@ -; ModuleID = './IG/LJEKIBHBJBLBAGKGFKBMHFMKJDPFDGDLLMHFN/Convolution3D_kernel/32-8-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_Convolution3D_kernel(float* nocapture readonly %0, float* nocapture %1, i32 %2, i32 %3, i32 %4, i32 %5, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %6, i64 %7, i64 %8, i64 %9) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %7, 5 - %mul3.i.i = shl i64 %8, 3 - %sub.i = add nsw i32 %2, -1 - %cmp.i = icmp sgt i32 %sub.i, %5 - %sub4.i = add nsw i32 %3, -1 - %sub8.i = add nsw i32 %4, -1 - %cmp12.i = icmp sgt i32 %5, 0 - %mul189.i = mul i32 %5, %3 - %sub20.i = add nsw i32 %5, -1 - %mul.i = mul nsw i32 %4, %3 - %mul21.i = mul nsw i32 %sub20.i, %mul.i - %add27.i = add nuw nsw i32 %5, 1 - %mul29.i = mul nsw i32 %add27.i, %mul.i - %mul84.i = mul nsw i32 %mul.i, %5 - %11 = trunc i64 %8 to i32 - %12 = shl i32 %11, 3 - %13 = add i32 %mul189.i, %12 - %14 = mul i32 %13, %4 - %15 = trunc i64 %7 to i32 - %16 = shl i32 %15, 5 - %17 = add i32 %14, %16 - %18 = trunc i64 %mul.i.i to i32 - %19 = trunc i64 %mul.i.i to i32 - %20 = or i32 %19, 8 - %21 = trunc i64 %mul.i.i to i32 - %22 = or i32 %21, 16 - %23 = trunc i64 %mul.i.i to i32 - %24 = or i32 %23, 24 - %conv.i = trunc i64 %mul.i.i to i32 - %25 = trunc i64 %mul.i.i to i32 - %conv.i.1 = or i32 %25, 1 - %26 = trunc i64 %mul.i.i to i32 - %conv.i.2 = or i32 %26, 2 - %27 = trunc i64 %mul.i.i to i32 - %conv.i.3 = or i32 %27, 3 - %28 = trunc i64 %mul.i.i to i32 - %conv.i.4 = or i32 %28, 4 - %29 = trunc i64 %mul.i.i to i32 - %conv.i.5 = or i32 %29, 5 - %30 = trunc i64 %mul.i.i to i32 - %conv.i.6 = or i32 %30, 6 - %31 = trunc i64 %mul.i.i to i32 - %conv.i.7 = or i32 %31, 7 - %32 = trunc i64 %mul.i.i to i32 - %conv.i.8 = or i32 %32, 8 - %33 = trunc i64 %mul.i.i to i32 - %conv.i.9 = or i32 %33, 9 - %34 = trunc i64 %mul.i.i to i32 - %conv.i.10 = or i32 %34, 10 - %35 = trunc i64 %mul.i.i to i32 - %conv.i.11 = or i32 %35, 11 - %36 = trunc i64 %mul.i.i to i32 - %conv.i.12 = or i32 %36, 12 - %37 = trunc i64 %mul.i.i to i32 - %conv.i.13 = or i32 %37, 13 - %38 = trunc i64 %mul.i.i to i32 - %conv.i.14 = or i32 %38, 14 - %39 = trunc i64 %mul.i.i to i32 - %conv.i.15 = or i32 %39, 15 - %40 = trunc i64 %mul.i.i to i32 - %conv.i.16 = or i32 %40, 16 - %41 = trunc i64 %mul.i.i to i32 - %conv.i.17 = or i32 %41, 17 - %42 = trunc i64 %mul.i.i to i32 - %conv.i.18 = or i32 %42, 18 - %43 = trunc i64 %mul.i.i to i32 - %conv.i.19 = or i32 %43, 19 - %44 = trunc i64 %mul.i.i to i32 - %conv.i.20 = or i32 %44, 20 - %45 = trunc i64 %mul.i.i to i32 - %conv.i.21 = or i32 %45, 21 - %46 = trunc i64 %mul.i.i to i32 - %conv.i.22 = or i32 %46, 22 - %47 = trunc i64 %mul.i.i to i32 - %conv.i.23 = or i32 %47, 23 - %48 = trunc i64 %mul.i.i to i32 - %conv.i.24 = or i32 %48, 24 - %49 = trunc i64 %mul.i.i to i32 - %conv.i.25 = or i32 %49, 25 - %50 = trunc i64 %mul.i.i to i32 - %conv.i.26 = or i32 %50, 26 - %51 = trunc i64 %mul.i.i to i32 - %conv.i.27 = or i32 %51, 27 - %52 = trunc i64 %mul.i.i to i32 - %conv.i.28 = or i32 %52, 28 - %53 = trunc i64 %mul.i.i to i32 - %conv.i.29 = or i32 %53, 29 - %54 = trunc i64 %mul.i.i to i32 - %conv.i.30 = or i32 %54, 30 - %55 = trunc i64 %mul.i.i to i32 - %conv.i.31 = or i32 %55, 31 - br label %pregion_for_entry.pregion_for_init.i - -pregion_for_entry.pregion_for_init.i: ; preds = %pregion_for_end.i, %10 - %_local_id_y.0 = phi i64 [ 0, %10 ], [ %134, %pregion_for_end.i ] - %add6.i.i = add nuw nsw i64 %_local_id_y.0, %mul3.i.i - %conv2.i = trunc i64 %add6.i.i to i32 - %cmp5.i = icmp sgt i32 %sub4.i, %conv2.i - %or.cond334.i = and i1 %cmp.i, %cmp5.i - %cmp15.i = icmp sgt i32 %conv2.i, 0 - %reass.add.i = add i32 %mul189.i, %conv2.i - %reass.mul.i = mul i32 %reass.add.i, %4 - %sub22.i = add nsw i32 %conv2.i, -1 - %mul23.i = mul nsw i32 %sub22.i, %4 - %add.i = add nsw i32 %mul23.i, %mul21.i - %add32.i = add nsw i32 %mul23.i, %mul29.i - %mul97.i = mul nsw i32 %conv2.i, %4 - %add107.i = add nuw nsw i32 %conv2.i, 1 - %mul108.i = mul nsw i32 %add107.i, %4 - br i1 %or.cond334.i, label %pregion_for_entry.entry.i.us.preheader, label %vector.scevcheck - -pregion_for_entry.entry.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i - br label %pregion_for_entry.entry.i.us - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i - %56 = trunc i64 %_local_id_y.0 to i32 - %57 = mul i32 %56, %4 - %58 = add i32 %57, %17 - %59 = icmp sgt i32 %58, 2147483616 - br i1 %59, label %pregion_for_entry.entry.i, label %vector.body - -vector.body: ; preds = %vector.scevcheck - %60 = add i32 %reass.mul.i, %18 - %61 = sext i32 %60 to i64 - %62 = getelementptr inbounds float, float* %1, i64 %61 - %63 = bitcast float* %62 to <8 x i32>* - store <8 x i32> zeroinitializer, <8 x i32>* %63, align 4, !tbaa !12, !llvm.access.group !16 - %64 = add i32 %reass.mul.i, %20 - %65 = sext i32 %64 to i64 - %66 = getelementptr inbounds float, float* %1, i64 %65 - %67 = bitcast float* %66 to <8 x i32>* - store <8 x i32> zeroinitializer, <8 x i32>* %67, align 4, !tbaa !12, !llvm.access.group !16 - %68 = add i32 %reass.mul.i, %22 - %69 = sext i32 %68 to i64 - %70 = getelementptr inbounds float, float* %1, i64 %69 - %71 = bitcast float* %70 to <8 x i32>* - store <8 x i32> zeroinitializer, <8 x i32>* %71, align 4, !tbaa !12, !llvm.access.group !16 - %72 = add i32 %reass.mul.i, %24 - %73 = sext i32 %72 to i64 - %74 = getelementptr inbounds float, float* %1, i64 %73 - %75 = bitcast float* %74 to <8 x i32>* - store <8 x i32> zeroinitializer, <8 x i32>* %75, align 4, !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i - -pregion_for_entry.entry.i.us: ; preds = %if.end.r_exit.i.us, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ %101, %if.end.r_exit.i.us ], [ 0, %pregion_for_entry.entry.i.us.preheader ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp9.i.us = icmp sgt i32 %sub8.i, %conv.i.us - %or.cond.i.us = and i1 %cmp12.i, %cmp9.i.us - %or.cond195.i.us = and i1 %cmp15.i, %or.cond.i.us - %cmp18.i.us = icmp sgt i32 %conv.i.us, 0 - %or.cond196.i.us = and i1 %cmp18.i.us, %or.cond195.i.us - br i1 %or.cond196.i.us, label %if.then.i.us, label %if.else.i.us - -if.else.i.us: ; preds = %pregion_for_entry.entry.i.us - %add192.i.us = add i32 %reass.mul.i, %conv.i.us - %idxprom193.i.us = sext i32 %add192.i.us to i64 - br label %if.end.r_exit.i.us - -if.then.i.us: ; preds = %pregion_for_entry.entry.i.us - %sub24.i.us = add nsw i32 %conv.i.us, -1 - %add25.i.us = add nsw i32 %sub24.i.us, %add.i - %idxprom.i.us = sext i32 %add25.i.us to i64 - %arrayidx.i.us = getelementptr inbounds float, float* %0, i64 %idxprom.i.us - %76 = load float, float* %arrayidx.i.us, align 4, !tbaa !12 - %add34.i.us = add nsw i32 %sub24.i.us, %add32.i - %idxprom35.i.us = sext i32 %add34.i.us to i64 - %arrayidx36.i.us = getelementptr inbounds float, float* %0, i64 %idxprom35.i.us - %77 = load float, float* %arrayidx36.i.us, align 4, !tbaa !12 - %mul37.i.us = fmul float %77, 4.000000e+00 - %78 = tail call float @llvm.fmuladd.f32(float %76, float 2.000000e+00, float %mul37.i.us) #2 - %79 = tail call float @llvm.fmuladd.f32(float %76, float 5.000000e+00, float %78) #2 - %80 = tail call float @llvm.fmuladd.f32(float %77, float 7.000000e+00, float %79) #2 - %81 = tail call float @llvm.fmuladd.f32(float %76, float -8.000000e+00, float %80) #2 - %82 = tail call float @llvm.fmuladd.f32(float %77, float 1.000000e+01, float %81) #2 - %add87.i.us = add i32 %mul84.i, %conv.i.us - %add89.i.us = add i32 %add87.i.us, %mul23.i - %idxprom90.i.us = sext i32 %add89.i.us to i64 - %arrayidx91.i.us = getelementptr inbounds float, float* %0, i64 %idxprom90.i.us - %83 = load float, float* %arrayidx91.i.us, align 4, !tbaa !12 - %84 = tail call float @llvm.fmuladd.f32(float %83, float -3.000000e+00, float %82) #2 - %add100.i.us = add i32 %add87.i.us, %mul97.i - %idxprom101.i.us = sext i32 %add100.i.us to i64 - %arrayidx102.i.us = getelementptr inbounds float, float* %0, i64 %idxprom101.i.us - %85 = load float, float* %arrayidx102.i.us, align 4, !tbaa !12 - %86 = tail call float @llvm.fmuladd.f32(float %85, float 6.000000e+00, float %84) #2 - %add111.i.us = add i32 %add87.i.us, %mul108.i - %idxprom112.i.us = sext i32 %add111.i.us to i64 - %arrayidx113.i.us = getelementptr inbounds float, float* %0, i64 %idxprom112.i.us - %87 = load float, float* %arrayidx113.i.us, align 4, !tbaa !12 - %88 = tail call float @llvm.fmuladd.f32(float %87, float -9.000000e+00, float %86) #2 - %add121.i.us = add nuw nsw i32 %conv.i.us, 1 - %add122.i.us = add nsw i32 %add121.i.us, %add.i - %idxprom123.i.us = sext i32 %add122.i.us to i64 - %arrayidx124.i.us = getelementptr inbounds float, float* %0, i64 %idxprom123.i.us - %89 = load float, float* %arrayidx124.i.us, align 4, !tbaa !12 - %90 = tail call float @llvm.fmuladd.f32(float %89, float 2.000000e+00, float %88) #2 - %add133.i.us = add nsw i32 %add121.i.us, %add32.i - %idxprom134.i.us = sext i32 %add133.i.us to i64 - %arrayidx135.i.us = getelementptr inbounds float, float* %0, i64 %idxprom134.i.us - %91 = load float, float* %arrayidx135.i.us, align 4, !tbaa !12 - %92 = tail call float @llvm.fmuladd.f32(float %91, float 4.000000e+00, float %90) #2 - %add142.i.us = add i32 %add121.i.us, %mul21.i - %add144.i.us = add i32 %add142.i.us, %mul97.i - %idxprom145.i.us = sext i32 %add144.i.us to i64 - %arrayidx146.i.us = getelementptr inbounds float, float* %0, i64 %idxprom145.i.us - %93 = load float, float* %arrayidx146.i.us, align 4, !tbaa !12 - %94 = tail call float @llvm.fmuladd.f32(float %93, float 5.000000e+00, float %92) #2 - %add153.i.us = add i32 %add121.i.us, %mul29.i - %add155.i.us = add i32 %add153.i.us, %mul97.i - %idxprom156.i.us = sext i32 %add155.i.us to i64 - %arrayidx157.i.us = getelementptr inbounds float, float* %0, i64 %idxprom156.i.us - %95 = load float, float* %arrayidx157.i.us, align 4, !tbaa !12 - %96 = tail call float @llvm.fmuladd.f32(float %95, float 7.000000e+00, float %94) #2 - %add166.i.us = add i32 %add142.i.us, %mul108.i - %idxprom167.i.us = sext i32 %add166.i.us to i64 - %arrayidx168.i.us = getelementptr inbounds float, float* %0, i64 %idxprom167.i.us - %97 = load float, float* %arrayidx168.i.us, align 4, !tbaa !12 - %98 = tail call float @llvm.fmuladd.f32(float %97, float -8.000000e+00, float %96) #2 - %add177.i.us = add i32 %add153.i.us, %mul108.i - %idxprom178.i.us = sext i32 %add177.i.us to i64 - %arrayidx179.i.us = getelementptr inbounds float, float* %0, i64 %idxprom178.i.us - %99 = load float, float* %arrayidx179.i.us, align 4, !tbaa !12 - %100 = tail call float @llvm.fmuladd.f32(float %99, float 1.000000e+01, float %98) #2 - br label %if.end.r_exit.i.us - -if.end.r_exit.i.us: ; preds = %if.then.i.us, %if.else.i.us - %idxprom193.i.us.sink = phi i64 [ %idxprom101.i.us, %if.then.i.us ], [ %idxprom193.i.us, %if.else.i.us ] - %.sink = phi float [ %100, %if.then.i.us ], [ 0.000000e+00, %if.else.i.us ] - %arrayidx194.i.us = getelementptr inbounds float, float* %1, i64 %idxprom193.i.us.sink - store float %.sink, float* %arrayidx194.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %101 = add nuw nsw i64 %_local_id_x.0.us, 1 - %exitcond2.not = icmp eq i64 %101, 32 - br i1 %exitcond2.not, label %pregion_for_end.i.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !19 - -pregion_for_entry.entry.i: ; preds = %vector.scevcheck - %add192.i = add i32 %reass.mul.i, %conv.i - %idxprom193.i = sext i32 %add192.i to i64 - %arrayidx194.i = getelementptr inbounds float, float* %1, i64 %idxprom193.i - %102 = bitcast float* %arrayidx194.i to i32* - store i32 0, i32* %102, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.1 = add i32 %reass.mul.i, %conv.i.1 - %idxprom193.i.1 = sext i32 %add192.i.1 to i64 - %arrayidx194.i.1 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.1 - %103 = bitcast float* %arrayidx194.i.1 to i32* - store i32 0, i32* %103, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.2 = add i32 %reass.mul.i, %conv.i.2 - %idxprom193.i.2 = sext i32 %add192.i.2 to i64 - %arrayidx194.i.2 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.2 - %104 = bitcast float* %arrayidx194.i.2 to i32* - store i32 0, i32* %104, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.3 = add i32 %reass.mul.i, %conv.i.3 - %idxprom193.i.3 = sext i32 %add192.i.3 to i64 - %arrayidx194.i.3 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.3 - %105 = bitcast float* %arrayidx194.i.3 to i32* - store i32 0, i32* %105, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.4 = add i32 %reass.mul.i, %conv.i.4 - %idxprom193.i.4 = sext i32 %add192.i.4 to i64 - %arrayidx194.i.4 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.4 - %106 = bitcast float* %arrayidx194.i.4 to i32* - store i32 0, i32* %106, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.5 = add i32 %reass.mul.i, %conv.i.5 - %idxprom193.i.5 = sext i32 %add192.i.5 to i64 - %arrayidx194.i.5 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.5 - %107 = bitcast float* %arrayidx194.i.5 to i32* - store i32 0, i32* %107, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.6 = add i32 %reass.mul.i, %conv.i.6 - %idxprom193.i.6 = sext i32 %add192.i.6 to i64 - %arrayidx194.i.6 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.6 - %108 = bitcast float* %arrayidx194.i.6 to i32* - store i32 0, i32* %108, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.7 = add i32 %reass.mul.i, %conv.i.7 - %idxprom193.i.7 = sext i32 %add192.i.7 to i64 - %arrayidx194.i.7 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.7 - %109 = bitcast float* %arrayidx194.i.7 to i32* - store i32 0, i32* %109, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.8 = add i32 %reass.mul.i, %conv.i.8 - %idxprom193.i.8 = sext i32 %add192.i.8 to i64 - %arrayidx194.i.8 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.8 - %110 = bitcast float* %arrayidx194.i.8 to i32* - store i32 0, i32* %110, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.9 = add i32 %reass.mul.i, %conv.i.9 - %idxprom193.i.9 = sext i32 %add192.i.9 to i64 - %arrayidx194.i.9 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.9 - %111 = bitcast float* %arrayidx194.i.9 to i32* - store i32 0, i32* %111, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.10 = add i32 %reass.mul.i, %conv.i.10 - %idxprom193.i.10 = sext i32 %add192.i.10 to i64 - %arrayidx194.i.10 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.10 - %112 = bitcast float* %arrayidx194.i.10 to i32* - store i32 0, i32* %112, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.11 = add i32 %reass.mul.i, %conv.i.11 - %idxprom193.i.11 = sext i32 %add192.i.11 to i64 - %arrayidx194.i.11 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.11 - %113 = bitcast float* %arrayidx194.i.11 to i32* - store i32 0, i32* %113, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.12 = add i32 %reass.mul.i, %conv.i.12 - %idxprom193.i.12 = sext i32 %add192.i.12 to i64 - %arrayidx194.i.12 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.12 - %114 = bitcast float* %arrayidx194.i.12 to i32* - store i32 0, i32* %114, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.13 = add i32 %reass.mul.i, %conv.i.13 - %idxprom193.i.13 = sext i32 %add192.i.13 to i64 - %arrayidx194.i.13 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.13 - %115 = bitcast float* %arrayidx194.i.13 to i32* - store i32 0, i32* %115, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.14 = add i32 %reass.mul.i, %conv.i.14 - %idxprom193.i.14 = sext i32 %add192.i.14 to i64 - %arrayidx194.i.14 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.14 - %116 = bitcast float* %arrayidx194.i.14 to i32* - store i32 0, i32* %116, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.15 = add i32 %reass.mul.i, %conv.i.15 - %idxprom193.i.15 = sext i32 %add192.i.15 to i64 - %arrayidx194.i.15 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.15 - %117 = bitcast float* %arrayidx194.i.15 to i32* - store i32 0, i32* %117, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.16 = add i32 %reass.mul.i, %conv.i.16 - %idxprom193.i.16 = sext i32 %add192.i.16 to i64 - %arrayidx194.i.16 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.16 - %118 = bitcast float* %arrayidx194.i.16 to i32* - store i32 0, i32* %118, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.17 = add i32 %reass.mul.i, %conv.i.17 - %idxprom193.i.17 = sext i32 %add192.i.17 to i64 - %arrayidx194.i.17 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.17 - %119 = bitcast float* %arrayidx194.i.17 to i32* - store i32 0, i32* %119, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.18 = add i32 %reass.mul.i, %conv.i.18 - %idxprom193.i.18 = sext i32 %add192.i.18 to i64 - %arrayidx194.i.18 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.18 - %120 = bitcast float* %arrayidx194.i.18 to i32* - store i32 0, i32* %120, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.19 = add i32 %reass.mul.i, %conv.i.19 - %idxprom193.i.19 = sext i32 %add192.i.19 to i64 - %arrayidx194.i.19 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.19 - %121 = bitcast float* %arrayidx194.i.19 to i32* - store i32 0, i32* %121, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.20 = add i32 %reass.mul.i, %conv.i.20 - %idxprom193.i.20 = sext i32 %add192.i.20 to i64 - %arrayidx194.i.20 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.20 - %122 = bitcast float* %arrayidx194.i.20 to i32* - store i32 0, i32* %122, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.21 = add i32 %reass.mul.i, %conv.i.21 - %idxprom193.i.21 = sext i32 %add192.i.21 to i64 - %arrayidx194.i.21 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.21 - %123 = bitcast float* %arrayidx194.i.21 to i32* - store i32 0, i32* %123, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.22 = add i32 %reass.mul.i, %conv.i.22 - %idxprom193.i.22 = sext i32 %add192.i.22 to i64 - %arrayidx194.i.22 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.22 - %124 = bitcast float* %arrayidx194.i.22 to i32* - store i32 0, i32* %124, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.23 = add i32 %reass.mul.i, %conv.i.23 - %idxprom193.i.23 = sext i32 %add192.i.23 to i64 - %arrayidx194.i.23 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.23 - %125 = bitcast float* %arrayidx194.i.23 to i32* - store i32 0, i32* %125, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.24 = add i32 %reass.mul.i, %conv.i.24 - %idxprom193.i.24 = sext i32 %add192.i.24 to i64 - %arrayidx194.i.24 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.24 - %126 = bitcast float* %arrayidx194.i.24 to i32* - store i32 0, i32* %126, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.25 = add i32 %reass.mul.i, %conv.i.25 - %idxprom193.i.25 = sext i32 %add192.i.25 to i64 - %arrayidx194.i.25 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.25 - %127 = bitcast float* %arrayidx194.i.25 to i32* - store i32 0, i32* %127, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.26 = add i32 %reass.mul.i, %conv.i.26 - %idxprom193.i.26 = sext i32 %add192.i.26 to i64 - %arrayidx194.i.26 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.26 - %128 = bitcast float* %arrayidx194.i.26 to i32* - store i32 0, i32* %128, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.27 = add i32 %reass.mul.i, %conv.i.27 - %idxprom193.i.27 = sext i32 %add192.i.27 to i64 - %arrayidx194.i.27 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.27 - %129 = bitcast float* %arrayidx194.i.27 to i32* - store i32 0, i32* %129, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.28 = add i32 %reass.mul.i, %conv.i.28 - %idxprom193.i.28 = sext i32 %add192.i.28 to i64 - %arrayidx194.i.28 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.28 - %130 = bitcast float* %arrayidx194.i.28 to i32* - store i32 0, i32* %130, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.29 = add i32 %reass.mul.i, %conv.i.29 - %idxprom193.i.29 = sext i32 %add192.i.29 to i64 - %arrayidx194.i.29 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.29 - %131 = bitcast float* %arrayidx194.i.29 to i32* - store i32 0, i32* %131, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.30 = add i32 %reass.mul.i, %conv.i.30 - %idxprom193.i.30 = sext i32 %add192.i.30 to i64 - %arrayidx194.i.30 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.30 - %132 = bitcast float* %arrayidx194.i.30 to i32* - store i32 0, i32* %132, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.31 = add i32 %reass.mul.i, %conv.i.31 - %idxprom193.i.31 = sext i32 %add192.i.31 to i64 - %arrayidx194.i.31 = getelementptr inbounds float, float* %1, i64 %idxprom193.i.31 - %133 = bitcast float* %arrayidx194.i.31 to i32* - store i32 0, i32* %133, align 4, !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i - -pregion_for_end.i.loopexit: ; preds = %if.end.r_exit.i.us - br label %pregion_for_end.i - -pregion_for_end.i: ; preds = %pregion_for_end.i.loopexit, %pregion_for_entry.entry.i, %vector.body - %134 = add nuw nsw i64 %_local_id_y.0, 1 - %exitcond3.not = icmp eq i64 %134, 8 - br i1 %exitcond3.not, label %Convolution3D_kernel.exit, label %pregion_for_entry.pregion_for_init.i, !llvm.loop !21 - -Convolution3D_kernel.exit: ; preds = %pregion_for_end.i - ret void -} - -; Function Attrs: nounwind -define void @_pocl_kernel_Convolution3D_kernel_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to i32** - %15 = load i32*, i32** %14, align 8 - %16 = load i32, i32* %15, align 4 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %21 = getelementptr i8*, i8** %0, i64 4 - %22 = bitcast i8** %21 to i32** - %23 = load i32*, i32** %22, align 8 - %24 = load i32, i32* %23, align 4 - %25 = getelementptr i8*, i8** %0, i64 5 - %26 = bitcast i8** %25 to i32** - %27 = load i32*, i32** %26, align 8 - %28 = load i32, i32* %27, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %sub.i.i = add nsw i32 %16, -1 - %cmp.i.i = icmp sgt i32 %sub.i.i, %28 - %sub4.i.i = add nsw i32 %20, -1 - %sub8.i.i = add nsw i32 %24, -1 - %cmp12.i.i = icmp sgt i32 %28, 0 - %mul189.i.i = mul i32 %28, %20 - %sub20.i.i = add nsw i32 %28, -1 - %mul.i.i = mul nsw i32 %24, %20 - %mul21.i.i = mul nsw i32 %sub20.i.i, %mul.i.i - %add27.i.i = add nuw nsw i32 %28, 1 - %mul29.i.i = mul nsw i32 %add27.i.i, %mul.i.i - %mul84.i.i = mul nsw i32 %28, %mul.i.i - %29 = trunc i64 %3 to i32 - %30 = shl i32 %29, 3 - %31 = add i32 %mul189.i.i, %30 - %32 = mul i32 %24, %31 - %33 = trunc i64 %2 to i32 - %34 = shl i32 %33, 5 - %35 = add i32 %32, %34 - %36 = trunc i64 %mul.i.i.i to i32 - %37 = trunc i64 %mul.i.i.i to i32 - %38 = or i32 %37, 8 - %39 = trunc i64 %mul.i.i.i to i32 - %40 = or i32 %39, 16 - %41 = trunc i64 %mul.i.i.i to i32 - %42 = or i32 %41, 24 - %conv.i.i = trunc i64 %mul.i.i.i to i32 - %43 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.1 = or i32 %43, 1 - %44 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.2 = or i32 %44, 2 - %45 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.3 = or i32 %45, 3 - %46 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.4 = or i32 %46, 4 - %47 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.5 = or i32 %47, 5 - %48 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.6 = or i32 %48, 6 - %49 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.7 = or i32 %49, 7 - %50 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.8 = or i32 %50, 8 - %51 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.9 = or i32 %51, 9 - %52 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.10 = or i32 %52, 10 - %53 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.11 = or i32 %53, 11 - %54 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.12 = or i32 %54, 12 - %55 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.13 = or i32 %55, 13 - %56 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.14 = or i32 %56, 14 - %57 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.15 = or i32 %57, 15 - %58 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.16 = or i32 %58, 16 - %59 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.17 = or i32 %59, 17 - %60 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.18 = or i32 %60, 18 - %61 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.19 = or i32 %61, 19 - %62 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.20 = or i32 %62, 20 - %63 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.21 = or i32 %63, 21 - %64 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.22 = or i32 %64, 22 - %65 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.23 = or i32 %65, 23 - %66 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.24 = or i32 %66, 24 - %67 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.25 = or i32 %67, 25 - %68 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.26 = or i32 %68, 26 - %69 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.27 = or i32 %69, 27 - %70 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.28 = or i32 %70, 28 - %71 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.29 = or i32 %71, 29 - %72 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.30 = or i32 %72, 30 - %73 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.31 = or i32 %73, 31 - br label %pregion_for_entry.pregion_for_init.i.i - -pregion_for_entry.pregion_for_init.i.i: ; preds = %pregion_for_end.i.i, %5 - %_local_id_y.i.0 = phi i64 [ 0, %5 ], [ %152, %pregion_for_end.i.i ] - %add6.i.i.i = add nuw nsw i64 %_local_id_y.i.0, %mul3.i.i.i - %conv2.i.i = trunc i64 %add6.i.i.i to i32 - %cmp5.i.i = icmp sgt i32 %sub4.i.i, %conv2.i.i - %or.cond334.i.i = and i1 %cmp.i.i, %cmp5.i.i - %cmp15.i.i = icmp sgt i32 %conv2.i.i, 0 - %reass.add.i.i = add i32 %mul189.i.i, %conv2.i.i - %reass.mul.i.i = mul i32 %reass.add.i.i, %24 - %sub22.i.i = add nsw i32 %conv2.i.i, -1 - %mul23.i.i = mul nsw i32 %sub22.i.i, %24 - %add.i.i = add nsw i32 %mul23.i.i, %mul21.i.i - %add32.i.i = add nsw i32 %mul23.i.i, %mul29.i.i - %mul97.i.i = mul nsw i32 %24, %conv2.i.i - %add107.i.i = add nuw nsw i32 %conv2.i.i, 1 - %mul108.i.i = mul nsw i32 %add107.i.i, %24 - br i1 %or.cond334.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.scevcheck - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i - br label %pregion_for_entry.entry.i.i.us - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.i - %74 = trunc i64 %_local_id_y.i.0 to i32 - %75 = mul i32 %24, %74 - %76 = add i32 %75, %35 - %77 = icmp sgt i32 %76, 2147483616 - br i1 %77, label %pregion_for_entry.entry.i.i, label %vector.body - -vector.body: ; preds = %vector.scevcheck - %78 = add i32 %reass.mul.i.i, %36 - %79 = sext i32 %78 to i64 - %80 = getelementptr inbounds float, float* %12, i64 %79 - %81 = bitcast float* %80 to <8 x i32>* - store <8 x i32> zeroinitializer, <8 x i32>* %81, align 4, !tbaa !12, !llvm.access.group !16 - %82 = add i32 %reass.mul.i.i, %38 - %83 = sext i32 %82 to i64 - %84 = getelementptr inbounds float, float* %12, i64 %83 - %85 = bitcast float* %84 to <8 x i32>* - store <8 x i32> zeroinitializer, <8 x i32>* %85, align 4, !tbaa !12, !llvm.access.group !16 - %86 = add i32 %reass.mul.i.i, %40 - %87 = sext i32 %86 to i64 - %88 = getelementptr inbounds float, float* %12, i64 %87 - %89 = bitcast float* %88 to <8 x i32>* - store <8 x i32> zeroinitializer, <8 x i32>* %89, align 4, !tbaa !12, !llvm.access.group !16 - %90 = add i32 %reass.mul.i.i, %42 - %91 = sext i32 %90 to i64 - %92 = getelementptr inbounds float, float* %12, i64 %91 - %93 = bitcast float* %92 to <8 x i32>* - store <8 x i32> zeroinitializer, <8 x i32>* %93, align 4, !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %119, %if.end.r_exit.i.i.us ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp9.i.i.us = icmp sgt i32 %sub8.i.i, %conv.i.i.us - %or.cond.i.i.us = and i1 %cmp12.i.i, %cmp9.i.i.us - %or.cond195.i.i.us = and i1 %cmp15.i.i, %or.cond.i.i.us - %cmp18.i.i.us = icmp sgt i32 %conv.i.i.us, 0 - %or.cond196.i.i.us = and i1 %cmp18.i.i.us, %or.cond195.i.i.us - br i1 %or.cond196.i.i.us, label %if.then.i.i.us, label %if.else.i.i.us - -if.else.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %add192.i.i.us = add i32 %reass.mul.i.i, %conv.i.i.us - %idxprom193.i.i.us = sext i32 %add192.i.i.us to i64 - br label %if.end.r_exit.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %sub24.i.i.us = add nsw i32 %conv.i.i.us, -1 - %add25.i.i.us = add nsw i32 %sub24.i.i.us, %add.i.i - %idxprom.i.i.us = sext i32 %add25.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us - %94 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %add34.i.i.us = add nsw i32 %sub24.i.i.us, %add32.i.i - %idxprom35.i.i.us = sext i32 %add34.i.i.us to i64 - %arrayidx36.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom35.i.i.us - %95 = load float, float* %arrayidx36.i.i.us, align 4, !tbaa !12 - %mul37.i.i.us = fmul float %95, 4.000000e+00 - %96 = tail call float @llvm.fmuladd.f32(float %94, float 2.000000e+00, float %mul37.i.i.us) #2 - %97 = tail call float @llvm.fmuladd.f32(float %94, float 5.000000e+00, float %96) #2 - %98 = tail call float @llvm.fmuladd.f32(float %95, float 7.000000e+00, float %97) #2 - %99 = tail call float @llvm.fmuladd.f32(float %94, float -8.000000e+00, float %98) #2 - %100 = tail call float @llvm.fmuladd.f32(float %95, float 1.000000e+01, float %99) #2 - %add87.i.i.us = add i32 %mul84.i.i, %conv.i.i.us - %add89.i.i.us = add i32 %add87.i.i.us, %mul23.i.i - %idxprom90.i.i.us = sext i32 %add89.i.i.us to i64 - %arrayidx91.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom90.i.i.us - %101 = load float, float* %arrayidx91.i.i.us, align 4, !tbaa !12 - %102 = tail call float @llvm.fmuladd.f32(float %101, float -3.000000e+00, float %100) #2 - %add100.i.i.us = add i32 %add87.i.i.us, %mul97.i.i - %idxprom101.i.i.us = sext i32 %add100.i.i.us to i64 - %arrayidx102.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom101.i.i.us - %103 = load float, float* %arrayidx102.i.i.us, align 4, !tbaa !12 - %104 = tail call float @llvm.fmuladd.f32(float %103, float 6.000000e+00, float %102) #2 - %add111.i.i.us = add i32 %add87.i.i.us, %mul108.i.i - %idxprom112.i.i.us = sext i32 %add111.i.i.us to i64 - %arrayidx113.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom112.i.i.us - %105 = load float, float* %arrayidx113.i.i.us, align 4, !tbaa !12 - %106 = tail call float @llvm.fmuladd.f32(float %105, float -9.000000e+00, float %104) #2 - %add121.i.i.us = add nuw nsw i32 %conv.i.i.us, 1 - %add122.i.i.us = add nsw i32 %add121.i.i.us, %add.i.i - %idxprom123.i.i.us = sext i32 %add122.i.i.us to i64 - %arrayidx124.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom123.i.i.us - %107 = load float, float* %arrayidx124.i.i.us, align 4, !tbaa !12 - %108 = tail call float @llvm.fmuladd.f32(float %107, float 2.000000e+00, float %106) #2 - %add133.i.i.us = add nsw i32 %add121.i.i.us, %add32.i.i - %idxprom134.i.i.us = sext i32 %add133.i.i.us to i64 - %arrayidx135.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom134.i.i.us - %109 = load float, float* %arrayidx135.i.i.us, align 4, !tbaa !12 - %110 = tail call float @llvm.fmuladd.f32(float %109, float 4.000000e+00, float %108) #2 - %add142.i.i.us = add i32 %add121.i.i.us, %mul21.i.i - %add144.i.i.us = add i32 %add142.i.i.us, %mul97.i.i - %idxprom145.i.i.us = sext i32 %add144.i.i.us to i64 - %arrayidx146.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom145.i.i.us - %111 = load float, float* %arrayidx146.i.i.us, align 4, !tbaa !12 - %112 = tail call float @llvm.fmuladd.f32(float %111, float 5.000000e+00, float %110) #2 - %add153.i.i.us = add i32 %add121.i.i.us, %mul29.i.i - %add155.i.i.us = add i32 %add153.i.i.us, %mul97.i.i - %idxprom156.i.i.us = sext i32 %add155.i.i.us to i64 - %arrayidx157.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom156.i.i.us - %113 = load float, float* %arrayidx157.i.i.us, align 4, !tbaa !12 - %114 = tail call float @llvm.fmuladd.f32(float %113, float 7.000000e+00, float %112) #2 - %add166.i.i.us = add i32 %add142.i.i.us, %mul108.i.i - %idxprom167.i.i.us = sext i32 %add166.i.i.us to i64 - %arrayidx168.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom167.i.i.us - %115 = load float, float* %arrayidx168.i.i.us, align 4, !tbaa !12 - %116 = tail call float @llvm.fmuladd.f32(float %115, float -8.000000e+00, float %114) #2 - %add177.i.i.us = add i32 %add153.i.i.us, %mul108.i.i - %idxprom178.i.i.us = sext i32 %add177.i.i.us to i64 - %arrayidx179.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom178.i.i.us - %117 = load float, float* %arrayidx179.i.i.us, align 4, !tbaa !12 - %118 = tail call float @llvm.fmuladd.f32(float %117, float 1.000000e+01, float %116) #2 - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %if.then.i.i.us, %if.else.i.i.us - %idxprom193.i.i.us.sink = phi i64 [ %idxprom101.i.i.us, %if.then.i.i.us ], [ %idxprom193.i.i.us, %if.else.i.i.us ] - %.sink = phi float [ %118, %if.then.i.i.us ], [ 0.000000e+00, %if.else.i.i.us ] - %arrayidx194.i.i.us = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.us.sink - store float %.sink, float* %arrayidx194.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %119 = add nuw nsw i64 %_local_id_x.i.0.us, 1 - %exitcond2.not = icmp eq i64 %119, 32 - br i1 %exitcond2.not, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !19 - -pregion_for_entry.entry.i.i: ; preds = %vector.scevcheck - %add192.i.i = add i32 %reass.mul.i.i, %conv.i.i - %idxprom193.i.i = sext i32 %add192.i.i to i64 - %arrayidx194.i.i = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i - %120 = bitcast float* %arrayidx194.i.i to i32* - store i32 0, i32* %120, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.1 = add i32 %reass.mul.i.i, %conv.i.i.1 - %idxprom193.i.i.1 = sext i32 %add192.i.i.1 to i64 - %arrayidx194.i.i.1 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.1 - %121 = bitcast float* %arrayidx194.i.i.1 to i32* - store i32 0, i32* %121, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.2 = add i32 %reass.mul.i.i, %conv.i.i.2 - %idxprom193.i.i.2 = sext i32 %add192.i.i.2 to i64 - %arrayidx194.i.i.2 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.2 - %122 = bitcast float* %arrayidx194.i.i.2 to i32* - store i32 0, i32* %122, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.3 = add i32 %reass.mul.i.i, %conv.i.i.3 - %idxprom193.i.i.3 = sext i32 %add192.i.i.3 to i64 - %arrayidx194.i.i.3 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.3 - %123 = bitcast float* %arrayidx194.i.i.3 to i32* - store i32 0, i32* %123, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.4 = add i32 %reass.mul.i.i, %conv.i.i.4 - %idxprom193.i.i.4 = sext i32 %add192.i.i.4 to i64 - %arrayidx194.i.i.4 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.4 - %124 = bitcast float* %arrayidx194.i.i.4 to i32* - store i32 0, i32* %124, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.5 = add i32 %reass.mul.i.i, %conv.i.i.5 - %idxprom193.i.i.5 = sext i32 %add192.i.i.5 to i64 - %arrayidx194.i.i.5 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.5 - %125 = bitcast float* %arrayidx194.i.i.5 to i32* - store i32 0, i32* %125, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.6 = add i32 %reass.mul.i.i, %conv.i.i.6 - %idxprom193.i.i.6 = sext i32 %add192.i.i.6 to i64 - %arrayidx194.i.i.6 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.6 - %126 = bitcast float* %arrayidx194.i.i.6 to i32* - store i32 0, i32* %126, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.7 = add i32 %reass.mul.i.i, %conv.i.i.7 - %idxprom193.i.i.7 = sext i32 %add192.i.i.7 to i64 - %arrayidx194.i.i.7 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.7 - %127 = bitcast float* %arrayidx194.i.i.7 to i32* - store i32 0, i32* %127, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.8 = add i32 %reass.mul.i.i, %conv.i.i.8 - %idxprom193.i.i.8 = sext i32 %add192.i.i.8 to i64 - %arrayidx194.i.i.8 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.8 - %128 = bitcast float* %arrayidx194.i.i.8 to i32* - store i32 0, i32* %128, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.9 = add i32 %reass.mul.i.i, %conv.i.i.9 - %idxprom193.i.i.9 = sext i32 %add192.i.i.9 to i64 - %arrayidx194.i.i.9 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.9 - %129 = bitcast float* %arrayidx194.i.i.9 to i32* - store i32 0, i32* %129, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.10 = add i32 %reass.mul.i.i, %conv.i.i.10 - %idxprom193.i.i.10 = sext i32 %add192.i.i.10 to i64 - %arrayidx194.i.i.10 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.10 - %130 = bitcast float* %arrayidx194.i.i.10 to i32* - store i32 0, i32* %130, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.11 = add i32 %reass.mul.i.i, %conv.i.i.11 - %idxprom193.i.i.11 = sext i32 %add192.i.i.11 to i64 - %arrayidx194.i.i.11 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.11 - %131 = bitcast float* %arrayidx194.i.i.11 to i32* - store i32 0, i32* %131, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.12 = add i32 %reass.mul.i.i, %conv.i.i.12 - %idxprom193.i.i.12 = sext i32 %add192.i.i.12 to i64 - %arrayidx194.i.i.12 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.12 - %132 = bitcast float* %arrayidx194.i.i.12 to i32* - store i32 0, i32* %132, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.13 = add i32 %reass.mul.i.i, %conv.i.i.13 - %idxprom193.i.i.13 = sext i32 %add192.i.i.13 to i64 - %arrayidx194.i.i.13 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.13 - %133 = bitcast float* %arrayidx194.i.i.13 to i32* - store i32 0, i32* %133, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.14 = add i32 %reass.mul.i.i, %conv.i.i.14 - %idxprom193.i.i.14 = sext i32 %add192.i.i.14 to i64 - %arrayidx194.i.i.14 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.14 - %134 = bitcast float* %arrayidx194.i.i.14 to i32* - store i32 0, i32* %134, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.15 = add i32 %reass.mul.i.i, %conv.i.i.15 - %idxprom193.i.i.15 = sext i32 %add192.i.i.15 to i64 - %arrayidx194.i.i.15 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.15 - %135 = bitcast float* %arrayidx194.i.i.15 to i32* - store i32 0, i32* %135, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.16 = add i32 %reass.mul.i.i, %conv.i.i.16 - %idxprom193.i.i.16 = sext i32 %add192.i.i.16 to i64 - %arrayidx194.i.i.16 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.16 - %136 = bitcast float* %arrayidx194.i.i.16 to i32* - store i32 0, i32* %136, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.17 = add i32 %reass.mul.i.i, %conv.i.i.17 - %idxprom193.i.i.17 = sext i32 %add192.i.i.17 to i64 - %arrayidx194.i.i.17 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.17 - %137 = bitcast float* %arrayidx194.i.i.17 to i32* - store i32 0, i32* %137, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.18 = add i32 %reass.mul.i.i, %conv.i.i.18 - %idxprom193.i.i.18 = sext i32 %add192.i.i.18 to i64 - %arrayidx194.i.i.18 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.18 - %138 = bitcast float* %arrayidx194.i.i.18 to i32* - store i32 0, i32* %138, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.19 = add i32 %reass.mul.i.i, %conv.i.i.19 - %idxprom193.i.i.19 = sext i32 %add192.i.i.19 to i64 - %arrayidx194.i.i.19 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.19 - %139 = bitcast float* %arrayidx194.i.i.19 to i32* - store i32 0, i32* %139, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.20 = add i32 %reass.mul.i.i, %conv.i.i.20 - %idxprom193.i.i.20 = sext i32 %add192.i.i.20 to i64 - %arrayidx194.i.i.20 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.20 - %140 = bitcast float* %arrayidx194.i.i.20 to i32* - store i32 0, i32* %140, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.21 = add i32 %reass.mul.i.i, %conv.i.i.21 - %idxprom193.i.i.21 = sext i32 %add192.i.i.21 to i64 - %arrayidx194.i.i.21 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.21 - %141 = bitcast float* %arrayidx194.i.i.21 to i32* - store i32 0, i32* %141, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.22 = add i32 %reass.mul.i.i, %conv.i.i.22 - %idxprom193.i.i.22 = sext i32 %add192.i.i.22 to i64 - %arrayidx194.i.i.22 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.22 - %142 = bitcast float* %arrayidx194.i.i.22 to i32* - store i32 0, i32* %142, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.23 = add i32 %reass.mul.i.i, %conv.i.i.23 - %idxprom193.i.i.23 = sext i32 %add192.i.i.23 to i64 - %arrayidx194.i.i.23 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.23 - %143 = bitcast float* %arrayidx194.i.i.23 to i32* - store i32 0, i32* %143, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.24 = add i32 %reass.mul.i.i, %conv.i.i.24 - %idxprom193.i.i.24 = sext i32 %add192.i.i.24 to i64 - %arrayidx194.i.i.24 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.24 - %144 = bitcast float* %arrayidx194.i.i.24 to i32* - store i32 0, i32* %144, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.25 = add i32 %reass.mul.i.i, %conv.i.i.25 - %idxprom193.i.i.25 = sext i32 %add192.i.i.25 to i64 - %arrayidx194.i.i.25 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.25 - %145 = bitcast float* %arrayidx194.i.i.25 to i32* - store i32 0, i32* %145, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.26 = add i32 %reass.mul.i.i, %conv.i.i.26 - %idxprom193.i.i.26 = sext i32 %add192.i.i.26 to i64 - %arrayidx194.i.i.26 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.26 - %146 = bitcast float* %arrayidx194.i.i.26 to i32* - store i32 0, i32* %146, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.27 = add i32 %reass.mul.i.i, %conv.i.i.27 - %idxprom193.i.i.27 = sext i32 %add192.i.i.27 to i64 - %arrayidx194.i.i.27 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.27 - %147 = bitcast float* %arrayidx194.i.i.27 to i32* - store i32 0, i32* %147, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.28 = add i32 %reass.mul.i.i, %conv.i.i.28 - %idxprom193.i.i.28 = sext i32 %add192.i.i.28 to i64 - %arrayidx194.i.i.28 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.28 - %148 = bitcast float* %arrayidx194.i.i.28 to i32* - store i32 0, i32* %148, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.29 = add i32 %reass.mul.i.i, %conv.i.i.29 - %idxprom193.i.i.29 = sext i32 %add192.i.i.29 to i64 - %arrayidx194.i.i.29 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.29 - %149 = bitcast float* %arrayidx194.i.i.29 to i32* - store i32 0, i32* %149, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.30 = add i32 %reass.mul.i.i, %conv.i.i.30 - %idxprom193.i.i.30 = sext i32 %add192.i.i.30 to i64 - %arrayidx194.i.i.30 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.30 - %150 = bitcast float* %arrayidx194.i.i.30 to i32* - store i32 0, i32* %150, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.31 = add i32 %reass.mul.i.i, %conv.i.i.31 - %idxprom193.i.i.31 = sext i32 %add192.i.i.31 to i64 - %arrayidx194.i.i.31 = getelementptr inbounds float, float* %12, i64 %idxprom193.i.i.31 - %151 = bitcast float* %arrayidx194.i.i.31 to i32* - store i32 0, i32* %151, align 4, !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i - -pregion_for_end.i.i.loopexit: ; preds = %if.end.r_exit.i.i.us - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %pregion_for_entry.entry.i.i, %vector.body - %152 = add nuw nsw i64 %_local_id_y.i.0, 1 - %exitcond3.not = icmp eq i64 %152, 8 - br i1 %exitcond3.not, label %_pocl_kernel_Convolution3D_kernel.exit, label %pregion_for_entry.pregion_for_init.i.i, !llvm.loop !21 - -_pocl_kernel_Convolution3D_kernel.exit: ; preds = %pregion_for_end.i.i - ret void -} - -; Function Attrs: nounwind -define void @_pocl_kernel_Convolution3D_kernel_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to i32** - %13 = load i32*, i32** %12, align 8 - %14 = load i32, i32* %13, align 4 - %15 = getelementptr i8*, i8** %0, i64 3 - %16 = bitcast i8** %15 to i32** - %17 = load i32*, i32** %16, align 8 - %18 = load i32, i32* %17, align 4 - %19 = getelementptr i8*, i8** %0, i64 4 - %20 = bitcast i8** %19 to i32** - %21 = load i32*, i32** %20, align 8 - %22 = load i32, i32* %21, align 4 - %23 = getelementptr i8*, i8** %0, i64 5 - %24 = bitcast i8** %23 to i32** - %25 = load i32*, i32** %24, align 8 - %26 = load i32, i32* %25, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %sub.i.i = add nsw i32 %14, -1 - %cmp.i.i = icmp sgt i32 %sub.i.i, %26 - %sub4.i.i = add nsw i32 %18, -1 - %sub8.i.i = add nsw i32 %22, -1 - %cmp12.i.i = icmp sgt i32 %26, 0 - %mul189.i.i = mul i32 %26, %18 - %sub20.i.i = add nsw i32 %26, -1 - %mul.i.i = mul nsw i32 %22, %18 - %mul21.i.i = mul nsw i32 %sub20.i.i, %mul.i.i - %add27.i.i = add nuw nsw i32 %26, 1 - %mul29.i.i = mul nsw i32 %add27.i.i, %mul.i.i - %mul84.i.i = mul nsw i32 %26, %mul.i.i - %27 = trunc i64 %3 to i32 - %28 = shl i32 %27, 3 - %29 = add i32 %mul189.i.i, %28 - %30 = mul i32 %22, %29 - %31 = trunc i64 %2 to i32 - %32 = shl i32 %31, 5 - %33 = add i32 %30, %32 - %34 = trunc i64 %mul.i.i.i to i32 - %35 = trunc i64 %mul.i.i.i to i32 - %36 = or i32 %35, 8 - %37 = trunc i64 %mul.i.i.i to i32 - %38 = or i32 %37, 16 - %39 = trunc i64 %mul.i.i.i to i32 - %40 = or i32 %39, 24 - %conv.i.i = trunc i64 %mul.i.i.i to i32 - %41 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.1 = or i32 %41, 1 - %42 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.2 = or i32 %42, 2 - %43 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.3 = or i32 %43, 3 - %44 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.4 = or i32 %44, 4 - %45 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.5 = or i32 %45, 5 - %46 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.6 = or i32 %46, 6 - %47 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.7 = or i32 %47, 7 - %48 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.8 = or i32 %48, 8 - %49 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.9 = or i32 %49, 9 - %50 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.10 = or i32 %50, 10 - %51 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.11 = or i32 %51, 11 - %52 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.12 = or i32 %52, 12 - %53 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.13 = or i32 %53, 13 - %54 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.14 = or i32 %54, 14 - %55 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.15 = or i32 %55, 15 - %56 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.16 = or i32 %56, 16 - %57 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.17 = or i32 %57, 17 - %58 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.18 = or i32 %58, 18 - %59 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.19 = or i32 %59, 19 - %60 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.20 = or i32 %60, 20 - %61 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.21 = or i32 %61, 21 - %62 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.22 = or i32 %62, 22 - %63 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.23 = or i32 %63, 23 - %64 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.24 = or i32 %64, 24 - %65 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.25 = or i32 %65, 25 - %66 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.26 = or i32 %66, 26 - %67 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.27 = or i32 %67, 27 - %68 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.28 = or i32 %68, 28 - %69 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.29 = or i32 %69, 29 - %70 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.30 = or i32 %70, 30 - %71 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.31 = or i32 %71, 31 - br label %pregion_for_entry.pregion_for_init.i.i - -pregion_for_entry.pregion_for_init.i.i: ; preds = %pregion_for_end.i.i, %5 - %_local_id_y.i.0 = phi i64 [ 0, %5 ], [ %150, %pregion_for_end.i.i ] - %add6.i.i.i = add nuw nsw i64 %_local_id_y.i.0, %mul3.i.i.i - %conv2.i.i = trunc i64 %add6.i.i.i to i32 - %cmp5.i.i = icmp sgt i32 %sub4.i.i, %conv2.i.i - %or.cond334.i.i = and i1 %cmp.i.i, %cmp5.i.i - %cmp15.i.i = icmp sgt i32 %conv2.i.i, 0 - %reass.add.i.i = add i32 %mul189.i.i, %conv2.i.i - %reass.mul.i.i = mul i32 %reass.add.i.i, %22 - %sub22.i.i = add nsw i32 %conv2.i.i, -1 - %mul23.i.i = mul nsw i32 %sub22.i.i, %22 - %add.i.i = add nsw i32 %mul23.i.i, %mul21.i.i - %add32.i.i = add nsw i32 %mul23.i.i, %mul29.i.i - %mul97.i.i = mul nsw i32 %22, %conv2.i.i - %add107.i.i = add nuw nsw i32 %conv2.i.i, 1 - %mul108.i.i = mul nsw i32 %add107.i.i, %22 - br i1 %or.cond334.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.scevcheck - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i - br label %pregion_for_entry.entry.i.i.us - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.i - %72 = trunc i64 %_local_id_y.i.0 to i32 - %73 = mul i32 %22, %72 - %74 = add i32 %73, %33 - %75 = icmp sgt i32 %74, 2147483616 - br i1 %75, label %pregion_for_entry.entry.i.i, label %vector.body - -vector.body: ; preds = %vector.scevcheck - %76 = add i32 %reass.mul.i.i, %34 - %77 = sext i32 %76 to i64 - %78 = getelementptr inbounds float, float* %10, i64 %77 - %79 = bitcast float* %78 to <8 x i32>* - store <8 x i32> zeroinitializer, <8 x i32>* %79, align 4, !tbaa !12, !llvm.access.group !16 - %80 = add i32 %reass.mul.i.i, %36 - %81 = sext i32 %80 to i64 - %82 = getelementptr inbounds float, float* %10, i64 %81 - %83 = bitcast float* %82 to <8 x i32>* - store <8 x i32> zeroinitializer, <8 x i32>* %83, align 4, !tbaa !12, !llvm.access.group !16 - %84 = add i32 %reass.mul.i.i, %38 - %85 = sext i32 %84 to i64 - %86 = getelementptr inbounds float, float* %10, i64 %85 - %87 = bitcast float* %86 to <8 x i32>* - store <8 x i32> zeroinitializer, <8 x i32>* %87, align 4, !tbaa !12, !llvm.access.group !16 - %88 = add i32 %reass.mul.i.i, %40 - %89 = sext i32 %88 to i64 - %90 = getelementptr inbounds float, float* %10, i64 %89 - %91 = bitcast float* %90 to <8 x i32>* - store <8 x i32> zeroinitializer, <8 x i32>* %91, align 4, !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %117, %if.end.r_exit.i.i.us ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp9.i.i.us = icmp sgt i32 %sub8.i.i, %conv.i.i.us - %or.cond.i.i.us = and i1 %cmp12.i.i, %cmp9.i.i.us - %or.cond195.i.i.us = and i1 %cmp15.i.i, %or.cond.i.i.us - %cmp18.i.i.us = icmp sgt i32 %conv.i.i.us, 0 - %or.cond196.i.i.us = and i1 %cmp18.i.i.us, %or.cond195.i.i.us - br i1 %or.cond196.i.i.us, label %if.then.i.i.us, label %if.else.i.i.us - -if.else.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %add192.i.i.us = add i32 %reass.mul.i.i, %conv.i.i.us - %idxprom193.i.i.us = sext i32 %add192.i.i.us to i64 - br label %if.end.r_exit.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %sub24.i.i.us = add nsw i32 %conv.i.i.us, -1 - %add25.i.i.us = add nsw i32 %sub24.i.i.us, %add.i.i - %idxprom.i.i.us = sext i32 %add25.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us - %92 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %add34.i.i.us = add nsw i32 %sub24.i.i.us, %add32.i.i - %idxprom35.i.i.us = sext i32 %add34.i.i.us to i64 - %arrayidx36.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom35.i.i.us - %93 = load float, float* %arrayidx36.i.i.us, align 4, !tbaa !12 - %mul37.i.i.us = fmul float %93, 4.000000e+00 - %94 = tail call float @llvm.fmuladd.f32(float %92, float 2.000000e+00, float %mul37.i.i.us) #2 - %95 = tail call float @llvm.fmuladd.f32(float %92, float 5.000000e+00, float %94) #2 - %96 = tail call float @llvm.fmuladd.f32(float %93, float 7.000000e+00, float %95) #2 - %97 = tail call float @llvm.fmuladd.f32(float %92, float -8.000000e+00, float %96) #2 - %98 = tail call float @llvm.fmuladd.f32(float %93, float 1.000000e+01, float %97) #2 - %add87.i.i.us = add i32 %mul84.i.i, %conv.i.i.us - %add89.i.i.us = add i32 %add87.i.i.us, %mul23.i.i - %idxprom90.i.i.us = sext i32 %add89.i.i.us to i64 - %arrayidx91.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom90.i.i.us - %99 = load float, float* %arrayidx91.i.i.us, align 4, !tbaa !12 - %100 = tail call float @llvm.fmuladd.f32(float %99, float -3.000000e+00, float %98) #2 - %add100.i.i.us = add i32 %add87.i.i.us, %mul97.i.i - %idxprom101.i.i.us = sext i32 %add100.i.i.us to i64 - %arrayidx102.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom101.i.i.us - %101 = load float, float* %arrayidx102.i.i.us, align 4, !tbaa !12 - %102 = tail call float @llvm.fmuladd.f32(float %101, float 6.000000e+00, float %100) #2 - %add111.i.i.us = add i32 %add87.i.i.us, %mul108.i.i - %idxprom112.i.i.us = sext i32 %add111.i.i.us to i64 - %arrayidx113.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom112.i.i.us - %103 = load float, float* %arrayidx113.i.i.us, align 4, !tbaa !12 - %104 = tail call float @llvm.fmuladd.f32(float %103, float -9.000000e+00, float %102) #2 - %add121.i.i.us = add nuw nsw i32 %conv.i.i.us, 1 - %add122.i.i.us = add nsw i32 %add121.i.i.us, %add.i.i - %idxprom123.i.i.us = sext i32 %add122.i.i.us to i64 - %arrayidx124.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom123.i.i.us - %105 = load float, float* %arrayidx124.i.i.us, align 4, !tbaa !12 - %106 = tail call float @llvm.fmuladd.f32(float %105, float 2.000000e+00, float %104) #2 - %add133.i.i.us = add nsw i32 %add121.i.i.us, %add32.i.i - %idxprom134.i.i.us = sext i32 %add133.i.i.us to i64 - %arrayidx135.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom134.i.i.us - %107 = load float, float* %arrayidx135.i.i.us, align 4, !tbaa !12 - %108 = tail call float @llvm.fmuladd.f32(float %107, float 4.000000e+00, float %106) #2 - %add142.i.i.us = add i32 %add121.i.i.us, %mul21.i.i - %add144.i.i.us = add i32 %add142.i.i.us, %mul97.i.i - %idxprom145.i.i.us = sext i32 %add144.i.i.us to i64 - %arrayidx146.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom145.i.i.us - %109 = load float, float* %arrayidx146.i.i.us, align 4, !tbaa !12 - %110 = tail call float @llvm.fmuladd.f32(float %109, float 5.000000e+00, float %108) #2 - %add153.i.i.us = add i32 %add121.i.i.us, %mul29.i.i - %add155.i.i.us = add i32 %add153.i.i.us, %mul97.i.i - %idxprom156.i.i.us = sext i32 %add155.i.i.us to i64 - %arrayidx157.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom156.i.i.us - %111 = load float, float* %arrayidx157.i.i.us, align 4, !tbaa !12 - %112 = tail call float @llvm.fmuladd.f32(float %111, float 7.000000e+00, float %110) #2 - %add166.i.i.us = add i32 %add142.i.i.us, %mul108.i.i - %idxprom167.i.i.us = sext i32 %add166.i.i.us to i64 - %arrayidx168.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom167.i.i.us - %113 = load float, float* %arrayidx168.i.i.us, align 4, !tbaa !12 - %114 = tail call float @llvm.fmuladd.f32(float %113, float -8.000000e+00, float %112) #2 - %add177.i.i.us = add i32 %add153.i.i.us, %mul108.i.i - %idxprom178.i.i.us = sext i32 %add177.i.i.us to i64 - %arrayidx179.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom178.i.i.us - %115 = load float, float* %arrayidx179.i.i.us, align 4, !tbaa !12 - %116 = tail call float @llvm.fmuladd.f32(float %115, float 1.000000e+01, float %114) #2 - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %if.then.i.i.us, %if.else.i.i.us - %idxprom193.i.i.us.sink = phi i64 [ %idxprom101.i.i.us, %if.then.i.i.us ], [ %idxprom193.i.i.us, %if.else.i.i.us ] - %.sink = phi float [ %116, %if.then.i.i.us ], [ 0.000000e+00, %if.else.i.i.us ] - %arrayidx194.i.i.us = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.us.sink - store float %.sink, float* %arrayidx194.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %117 = add nuw nsw i64 %_local_id_x.i.0.us, 1 - %exitcond2.not = icmp eq i64 %117, 32 - br i1 %exitcond2.not, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !19 - -pregion_for_entry.entry.i.i: ; preds = %vector.scevcheck - %add192.i.i = add i32 %reass.mul.i.i, %conv.i.i - %idxprom193.i.i = sext i32 %add192.i.i to i64 - %arrayidx194.i.i = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i - %118 = bitcast float* %arrayidx194.i.i to i32* - store i32 0, i32* %118, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.1 = add i32 %reass.mul.i.i, %conv.i.i.1 - %idxprom193.i.i.1 = sext i32 %add192.i.i.1 to i64 - %arrayidx194.i.i.1 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.1 - %119 = bitcast float* %arrayidx194.i.i.1 to i32* - store i32 0, i32* %119, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.2 = add i32 %reass.mul.i.i, %conv.i.i.2 - %idxprom193.i.i.2 = sext i32 %add192.i.i.2 to i64 - %arrayidx194.i.i.2 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.2 - %120 = bitcast float* %arrayidx194.i.i.2 to i32* - store i32 0, i32* %120, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.3 = add i32 %reass.mul.i.i, %conv.i.i.3 - %idxprom193.i.i.3 = sext i32 %add192.i.i.3 to i64 - %arrayidx194.i.i.3 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.3 - %121 = bitcast float* %arrayidx194.i.i.3 to i32* - store i32 0, i32* %121, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.4 = add i32 %reass.mul.i.i, %conv.i.i.4 - %idxprom193.i.i.4 = sext i32 %add192.i.i.4 to i64 - %arrayidx194.i.i.4 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.4 - %122 = bitcast float* %arrayidx194.i.i.4 to i32* - store i32 0, i32* %122, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.5 = add i32 %reass.mul.i.i, %conv.i.i.5 - %idxprom193.i.i.5 = sext i32 %add192.i.i.5 to i64 - %arrayidx194.i.i.5 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.5 - %123 = bitcast float* %arrayidx194.i.i.5 to i32* - store i32 0, i32* %123, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.6 = add i32 %reass.mul.i.i, %conv.i.i.6 - %idxprom193.i.i.6 = sext i32 %add192.i.i.6 to i64 - %arrayidx194.i.i.6 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.6 - %124 = bitcast float* %arrayidx194.i.i.6 to i32* - store i32 0, i32* %124, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.7 = add i32 %reass.mul.i.i, %conv.i.i.7 - %idxprom193.i.i.7 = sext i32 %add192.i.i.7 to i64 - %arrayidx194.i.i.7 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.7 - %125 = bitcast float* %arrayidx194.i.i.7 to i32* - store i32 0, i32* %125, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.8 = add i32 %reass.mul.i.i, %conv.i.i.8 - %idxprom193.i.i.8 = sext i32 %add192.i.i.8 to i64 - %arrayidx194.i.i.8 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.8 - %126 = bitcast float* %arrayidx194.i.i.8 to i32* - store i32 0, i32* %126, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.9 = add i32 %reass.mul.i.i, %conv.i.i.9 - %idxprom193.i.i.9 = sext i32 %add192.i.i.9 to i64 - %arrayidx194.i.i.9 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.9 - %127 = bitcast float* %arrayidx194.i.i.9 to i32* - store i32 0, i32* %127, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.10 = add i32 %reass.mul.i.i, %conv.i.i.10 - %idxprom193.i.i.10 = sext i32 %add192.i.i.10 to i64 - %arrayidx194.i.i.10 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.10 - %128 = bitcast float* %arrayidx194.i.i.10 to i32* - store i32 0, i32* %128, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.11 = add i32 %reass.mul.i.i, %conv.i.i.11 - %idxprom193.i.i.11 = sext i32 %add192.i.i.11 to i64 - %arrayidx194.i.i.11 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.11 - %129 = bitcast float* %arrayidx194.i.i.11 to i32* - store i32 0, i32* %129, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.12 = add i32 %reass.mul.i.i, %conv.i.i.12 - %idxprom193.i.i.12 = sext i32 %add192.i.i.12 to i64 - %arrayidx194.i.i.12 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.12 - %130 = bitcast float* %arrayidx194.i.i.12 to i32* - store i32 0, i32* %130, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.13 = add i32 %reass.mul.i.i, %conv.i.i.13 - %idxprom193.i.i.13 = sext i32 %add192.i.i.13 to i64 - %arrayidx194.i.i.13 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.13 - %131 = bitcast float* %arrayidx194.i.i.13 to i32* - store i32 0, i32* %131, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.14 = add i32 %reass.mul.i.i, %conv.i.i.14 - %idxprom193.i.i.14 = sext i32 %add192.i.i.14 to i64 - %arrayidx194.i.i.14 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.14 - %132 = bitcast float* %arrayidx194.i.i.14 to i32* - store i32 0, i32* %132, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.15 = add i32 %reass.mul.i.i, %conv.i.i.15 - %idxprom193.i.i.15 = sext i32 %add192.i.i.15 to i64 - %arrayidx194.i.i.15 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.15 - %133 = bitcast float* %arrayidx194.i.i.15 to i32* - store i32 0, i32* %133, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.16 = add i32 %reass.mul.i.i, %conv.i.i.16 - %idxprom193.i.i.16 = sext i32 %add192.i.i.16 to i64 - %arrayidx194.i.i.16 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.16 - %134 = bitcast float* %arrayidx194.i.i.16 to i32* - store i32 0, i32* %134, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.17 = add i32 %reass.mul.i.i, %conv.i.i.17 - %idxprom193.i.i.17 = sext i32 %add192.i.i.17 to i64 - %arrayidx194.i.i.17 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.17 - %135 = bitcast float* %arrayidx194.i.i.17 to i32* - store i32 0, i32* %135, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.18 = add i32 %reass.mul.i.i, %conv.i.i.18 - %idxprom193.i.i.18 = sext i32 %add192.i.i.18 to i64 - %arrayidx194.i.i.18 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.18 - %136 = bitcast float* %arrayidx194.i.i.18 to i32* - store i32 0, i32* %136, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.19 = add i32 %reass.mul.i.i, %conv.i.i.19 - %idxprom193.i.i.19 = sext i32 %add192.i.i.19 to i64 - %arrayidx194.i.i.19 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.19 - %137 = bitcast float* %arrayidx194.i.i.19 to i32* - store i32 0, i32* %137, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.20 = add i32 %reass.mul.i.i, %conv.i.i.20 - %idxprom193.i.i.20 = sext i32 %add192.i.i.20 to i64 - %arrayidx194.i.i.20 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.20 - %138 = bitcast float* %arrayidx194.i.i.20 to i32* - store i32 0, i32* %138, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.21 = add i32 %reass.mul.i.i, %conv.i.i.21 - %idxprom193.i.i.21 = sext i32 %add192.i.i.21 to i64 - %arrayidx194.i.i.21 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.21 - %139 = bitcast float* %arrayidx194.i.i.21 to i32* - store i32 0, i32* %139, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.22 = add i32 %reass.mul.i.i, %conv.i.i.22 - %idxprom193.i.i.22 = sext i32 %add192.i.i.22 to i64 - %arrayidx194.i.i.22 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.22 - %140 = bitcast float* %arrayidx194.i.i.22 to i32* - store i32 0, i32* %140, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.23 = add i32 %reass.mul.i.i, %conv.i.i.23 - %idxprom193.i.i.23 = sext i32 %add192.i.i.23 to i64 - %arrayidx194.i.i.23 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.23 - %141 = bitcast float* %arrayidx194.i.i.23 to i32* - store i32 0, i32* %141, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.24 = add i32 %reass.mul.i.i, %conv.i.i.24 - %idxprom193.i.i.24 = sext i32 %add192.i.i.24 to i64 - %arrayidx194.i.i.24 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.24 - %142 = bitcast float* %arrayidx194.i.i.24 to i32* - store i32 0, i32* %142, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.25 = add i32 %reass.mul.i.i, %conv.i.i.25 - %idxprom193.i.i.25 = sext i32 %add192.i.i.25 to i64 - %arrayidx194.i.i.25 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.25 - %143 = bitcast float* %arrayidx194.i.i.25 to i32* - store i32 0, i32* %143, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.26 = add i32 %reass.mul.i.i, %conv.i.i.26 - %idxprom193.i.i.26 = sext i32 %add192.i.i.26 to i64 - %arrayidx194.i.i.26 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.26 - %144 = bitcast float* %arrayidx194.i.i.26 to i32* - store i32 0, i32* %144, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.27 = add i32 %reass.mul.i.i, %conv.i.i.27 - %idxprom193.i.i.27 = sext i32 %add192.i.i.27 to i64 - %arrayidx194.i.i.27 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.27 - %145 = bitcast float* %arrayidx194.i.i.27 to i32* - store i32 0, i32* %145, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.28 = add i32 %reass.mul.i.i, %conv.i.i.28 - %idxprom193.i.i.28 = sext i32 %add192.i.i.28 to i64 - %arrayidx194.i.i.28 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.28 - %146 = bitcast float* %arrayidx194.i.i.28 to i32* - store i32 0, i32* %146, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.29 = add i32 %reass.mul.i.i, %conv.i.i.29 - %idxprom193.i.i.29 = sext i32 %add192.i.i.29 to i64 - %arrayidx194.i.i.29 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.29 - %147 = bitcast float* %arrayidx194.i.i.29 to i32* - store i32 0, i32* %147, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.30 = add i32 %reass.mul.i.i, %conv.i.i.30 - %idxprom193.i.i.30 = sext i32 %add192.i.i.30 to i64 - %arrayidx194.i.i.30 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.30 - %148 = bitcast float* %arrayidx194.i.i.30 to i32* - store i32 0, i32* %148, align 4, !tbaa !12, !llvm.access.group !16 - %add192.i.i.31 = add i32 %reass.mul.i.i, %conv.i.i.31 - %idxprom193.i.i.31 = sext i32 %add192.i.i.31 to i64 - %arrayidx194.i.i.31 = getelementptr inbounds float, float* %10, i64 %idxprom193.i.i.31 - %149 = bitcast float* %arrayidx194.i.i.31 to i32* - store i32 0, i32* %149, align 4, !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i - -pregion_for_end.i.i.loopexit: ; preds = %if.end.r_exit.i.i.us - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %pregion_for_entry.entry.i.i, %vector.body - %150 = add nuw nsw i64 %_local_id_y.i.0, 1 - %exitcond3.not = icmp eq i64 %150, 8 - br i1 %exitcond3.not, label %_pocl_kernel_Convolution3D_kernel.exit, label %pregion_for_entry.pregion_for_init.i.i, !llvm.loop !21 - -_pocl_kernel_Convolution3D_kernel.exit: ; preds = %pregion_for_end.i.i - ret void -} - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 0, i32 0, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"int", !"int", !"int", !"int"} -!8 = !{!"float*", !"float*", !"int", !"int", !"int", !"int"} -!9 = !{!"", !"", !"", !"", !"", !""} -!10 = !{!"A", !"B", !"ni", !"nj", !"nk", !"i"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17, !18} -!17 = distinct !{} -!18 = distinct !{} -!19 = distinct !{!19, !20} -!20 = !{!"llvm.loop.parallel_accesses", !17} -!21 = distinct !{!21, !22} -!22 = !{!"llvm.loop.parallel_accesses", !18} diff --git a/pocl_irs/3mm_kernel1.ll b/pocl_irs/3mm_kernel1.ll deleted file mode 100644 index 1d3665b..0000000 --- a/pocl_irs/3mm_kernel1.ll +++ /dev/null @@ -1,3583 +0,0 @@ -; ModuleID = './CF/DAJJACGEKBAPHIEIKFDOEEKGMOBCBEEPDHBEI/mm3_kernel1/32-8-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_mm3_kernel1(float* nocapture readonly %0, float* nocapture readonly %1, float* nocapture %2, i32 %3, i32 %4, i32 %5, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %6, i64 %7, i64 %8, i64 %9) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %7, 5 - %mul3.i.i = shl i64 %8, 3 - %cmp638.i = icmp sgt i32 %5, 0 - %11 = sext i32 %4 to i64 - %wide.trip.count.i = zext i32 %5 to i64 - br i1 %cmp638.i, label %pregion_for_entry.pregion_for_init.i.us.preheader, label %pregion_for_entry.pregion_for_init.i.preheader - -pregion_for_entry.pregion_for_init.i.preheader: ; preds = %10 - %conv.i.us = trunc i64 %mul.i.i to i32 - %cmp4.i.us = icmp slt i32 %conv.i.us, %4 - %12 = trunc i64 %mul.i.i to i32 - %conv.i.us.1 = or i32 %12, 1 - %cmp4.i.us.1 = icmp slt i32 %conv.i.us.1, %4 - %13 = trunc i64 %mul.i.i to i32 - %conv.i.us.2 = or i32 %13, 2 - %cmp4.i.us.2 = icmp slt i32 %conv.i.us.2, %4 - %14 = trunc i64 %mul.i.i to i32 - %conv.i.us.3 = or i32 %14, 3 - %cmp4.i.us.3 = icmp slt i32 %conv.i.us.3, %4 - %15 = trunc i64 %mul.i.i to i32 - %conv.i.us.4 = or i32 %15, 4 - %cmp4.i.us.4 = icmp slt i32 %conv.i.us.4, %4 - %16 = trunc i64 %mul.i.i to i32 - %conv.i.us.5 = or i32 %16, 5 - %cmp4.i.us.5 = icmp slt i32 %conv.i.us.5, %4 - %17 = trunc i64 %mul.i.i to i32 - %conv.i.us.6 = or i32 %17, 6 - %cmp4.i.us.6 = icmp slt i32 %conv.i.us.6, %4 - %18 = trunc i64 %mul.i.i to i32 - %conv.i.us.7 = or i32 %18, 7 - %cmp4.i.us.7 = icmp slt i32 %conv.i.us.7, %4 - %19 = trunc i64 %mul.i.i to i32 - %conv.i.us.8 = or i32 %19, 8 - %cmp4.i.us.8 = icmp slt i32 %conv.i.us.8, %4 - %20 = trunc i64 %mul.i.i to i32 - %conv.i.us.9 = or i32 %20, 9 - %cmp4.i.us.9 = icmp slt i32 %conv.i.us.9, %4 - %21 = trunc i64 %mul.i.i to i32 - %conv.i.us.10 = or i32 %21, 10 - %cmp4.i.us.10 = icmp slt i32 %conv.i.us.10, %4 - %22 = trunc i64 %mul.i.i to i32 - %conv.i.us.11 = or i32 %22, 11 - %cmp4.i.us.11 = icmp slt i32 %conv.i.us.11, %4 - %23 = trunc i64 %mul.i.i to i32 - %conv.i.us.12 = or i32 %23, 12 - %cmp4.i.us.12 = icmp slt i32 %conv.i.us.12, %4 - %24 = trunc i64 %mul.i.i to i32 - %conv.i.us.13 = or i32 %24, 13 - %cmp4.i.us.13 = icmp slt i32 %conv.i.us.13, %4 - %25 = trunc i64 %mul.i.i to i32 - %conv.i.us.14 = or i32 %25, 14 - %cmp4.i.us.14 = icmp slt i32 %conv.i.us.14, %4 - %26 = trunc i64 %mul.i.i to i32 - %conv.i.us.15 = or i32 %26, 15 - %cmp4.i.us.15 = icmp slt i32 %conv.i.us.15, %4 - %27 = trunc i64 %mul.i.i to i32 - %conv.i.us.16 = or i32 %27, 16 - %cmp4.i.us.16 = icmp slt i32 %conv.i.us.16, %4 - %28 = trunc i64 %mul.i.i to i32 - %conv.i.us.17 = or i32 %28, 17 - %cmp4.i.us.17 = icmp slt i32 %conv.i.us.17, %4 - %29 = trunc i64 %mul.i.i to i32 - %conv.i.us.18 = or i32 %29, 18 - %cmp4.i.us.18 = icmp slt i32 %conv.i.us.18, %4 - %30 = trunc i64 %mul.i.i to i32 - %conv.i.us.19 = or i32 %30, 19 - %cmp4.i.us.19 = icmp slt i32 %conv.i.us.19, %4 - %31 = trunc i64 %mul.i.i to i32 - %conv.i.us.20 = or i32 %31, 20 - %cmp4.i.us.20 = icmp slt i32 %conv.i.us.20, %4 - %32 = trunc i64 %mul.i.i to i32 - %conv.i.us.21 = or i32 %32, 21 - %cmp4.i.us.21 = icmp slt i32 %conv.i.us.21, %4 - %33 = trunc i64 %mul.i.i to i32 - %conv.i.us.22 = or i32 %33, 22 - %cmp4.i.us.22 = icmp slt i32 %conv.i.us.22, %4 - %34 = trunc i64 %mul.i.i to i32 - %conv.i.us.23 = or i32 %34, 23 - %cmp4.i.us.23 = icmp slt i32 %conv.i.us.23, %4 - %35 = trunc i64 %mul.i.i to i32 - %conv.i.us.24 = or i32 %35, 24 - %cmp4.i.us.24 = icmp slt i32 %conv.i.us.24, %4 - %36 = trunc i64 %mul.i.i to i32 - %conv.i.us.25 = or i32 %36, 25 - %cmp4.i.us.25 = icmp slt i32 %conv.i.us.25, %4 - %37 = trunc i64 %mul.i.i to i32 - %conv.i.us.26 = or i32 %37, 26 - %cmp4.i.us.26 = icmp slt i32 %conv.i.us.26, %4 - %38 = trunc i64 %mul.i.i to i32 - %conv.i.us.27 = or i32 %38, 27 - %cmp4.i.us.27 = icmp slt i32 %conv.i.us.27, %4 - %39 = trunc i64 %mul.i.i to i32 - %conv.i.us.28 = or i32 %39, 28 - %cmp4.i.us.28 = icmp slt i32 %conv.i.us.28, %4 - %40 = trunc i64 %mul.i.i to i32 - %conv.i.us.29 = or i32 %40, 29 - %cmp4.i.us.29 = icmp slt i32 %conv.i.us.29, %4 - %41 = trunc i64 %mul.i.i to i32 - %conv.i.us.30 = or i32 %41, 30 - %cmp4.i.us.30 = icmp slt i32 %conv.i.us.30, %4 - %42 = trunc i64 %mul.i.i to i32 - %conv.i.us.31 = or i32 %42, 31 - %cmp4.i.us.31 = icmp slt i32 %conv.i.us.31, %4 - br label %pregion_for_entry.pregion_for_init.i - -pregion_for_entry.pregion_for_init.i.us.preheader: ; preds = %10 - %conv2.i.us = trunc i64 %mul3.i.i to i32 - %cmp.i.us = icmp slt i32 %conv2.i.us, %3 - %mul.i.us = mul nsw i32 %conv2.i.us, %4 - %mul8.i.us = mul nsw i32 %conv2.i.us, %5 - %43 = sext i32 %mul8.i.us to i64 - br i1 %cmp.i.us, label %pregion_for_entry.entry.i.us.us.preheader, label %pregion_for_end.i.us - -pregion_for_entry.entry.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.us.preheader - br label %pregion_for_entry.entry.i.us.us - -pregion_for_end.i.us.loopexit: ; preds = %if.end.i.us.us.153 - br label %pregion_for_end.i.us - -pregion_for_end.i.us: ; preds = %pregion_for_end.i.us.loopexit, %pregion_for_entry.pregion_for_init.i.us.preheader - %44 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.1 = or i32 %44, 1 - %cmp.i.us.1 = icmp slt i32 %conv2.i.us.1, %3 - %mul.i.us.1 = mul nsw i32 %conv2.i.us.1, %4 - %mul8.i.us.1 = mul nsw i32 %conv2.i.us.1, %5 - %45 = sext i32 %mul8.i.us.1 to i64 - br i1 %cmp.i.us.1, label %pregion_for_entry.entry.i.us.us.1.preheader, label %pregion_for_end.i.us.1 - -pregion_for_entry.entry.i.us.us.1.preheader: ; preds = %pregion_for_end.i.us - br label %pregion_for_entry.entry.i.us.us.1 - -pregion_for_entry.entry.i.us.us: ; preds = %if.end.i.us.us.153, %pregion_for_entry.entry.i.us.us.preheader - %_local_id_x.0.us.us = phi i64 [ %202, %if.end.i.us.us.153 ], [ 0, %pregion_for_entry.entry.i.us.us.preheader ] - %add1.i.i.us.us = add nuw nsw i64 %_local_id_x.0.us.us, %mul.i.i - %conv.i.us.us = trunc i64 %add1.i.i.us.us to i32 - %cmp4.i.us.us = icmp slt i32 %conv.i.us.us, %4 - br i1 %cmp4.i.us.us, label %if.then.i.us.us, label %if.end.i.us.us - -if.then.i.us.us: ; preds = %pregion_for_entry.entry.i.us.us - %add.i.us.us = add nsw i32 %mul.i.us, %conv.i.us.us - %idxprom.i.us.us = sext i32 %add.i.us.us to i64 - %arrayidx.i.us.us = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us - store float 0.000000e+00, float* %arrayidx.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us = shl i64 %add1.i.i.us.us, 32 - %46 = ashr exact i64 %sext.i.us.us, 32 - br label %for.body.i.us.us - -if.end.i.us.us.loopexit: ; preds = %for.body.i.us.us - br label %if.end.i.us.us - -if.end.i.us.us: ; preds = %if.end.i.us.us.loopexit, %pregion_for_entry.entry.i.us.us - %47 = or i64 %_local_id_x.0.us.us, 1 - %add1.i.i.us.us.138 = add nuw nsw i64 %47, %mul.i.i - %conv.i.us.us.139 = trunc i64 %add1.i.i.us.us.138 to i32 - %cmp4.i.us.us.140 = icmp slt i32 %conv.i.us.us.139, %4 - br i1 %cmp4.i.us.us.140, label %if.then.i.us.us.146, label %if.end.i.us.us.153 - -for.body.i.us.us: ; preds = %for.body.i.us.us, %if.then.i.us.us - %indvars.iv.next.i3.us.us = phi i64 [ %indvars.iv.next.i.us.us, %for.body.i.us.us ], [ 0, %if.then.i.us.us ] - %48 = phi float [ %54, %for.body.i.us.us ], [ 0.000000e+00, %if.then.i.us.us ] - %49 = add nsw i64 %indvars.iv.next.i3.us.us, %43 - %arrayidx11.i.us.us = getelementptr inbounds float, float* %0, i64 %49 - %50 = load float, float* %arrayidx11.i.us.us, align 4, !tbaa !12 - %51 = mul nsw i64 %indvars.iv.next.i3.us.us, %11 - %52 = add nsw i64 %51, %46 - %arrayidx15.i.us.us = getelementptr inbounds float, float* %1, i64 %52 - %53 = load float, float* %arrayidx15.i.us.us, align 4, !tbaa !12 - %54 = tail call float @llvm.fmuladd.f32(float %50, float %53, float %48) #2 - store float %54, float* %arrayidx.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us = add nuw nsw i64 %indvars.iv.next.i3.us.us, 1 - %exitcond.not.i.us.us = icmp eq i64 %indvars.iv.next.i.us.us, %wide.trip.count.i - br i1 %exitcond.not.i.us.us, label %if.end.i.us.us.loopexit, label %for.body.i.us.us, !llvm.loop !19 - -pregion_for_entry.pregion_for_init.i: ; preds = %pregion_for_end.i, %pregion_for_entry.pregion_for_init.i.preheader - %_local_id_y.0 = phi i64 [ %55, %pregion_for_end.i ], [ 0, %pregion_for_entry.pregion_for_init.i.preheader ] - %add6.i.i = add nuw nsw i64 %_local_id_y.0, %mul3.i.i - %conv2.i = trunc i64 %add6.i.i to i32 - %cmp.i = icmp slt i32 %conv2.i, %3 - %mul.i = mul nsw i32 %conv2.i, %4 - br i1 %cmp.i, label %pregion_for_entry.entry.i.us.preheader, label %pregion_for_end.i - -pregion_for_entry.entry.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i - br i1 %cmp4.i.us, label %if.then.i.us, label %if.end.i.us - -if.then.i.us: ; preds = %pregion_for_entry.entry.i.us.preheader - %add.i.us = add nsw i32 %mul.i, %conv.i.us - %idxprom.i.us = sext i32 %add.i.us to i64 - %arrayidx.i.us = getelementptr inbounds float, float* %2, i64 %idxprom.i.us - store float 0.000000e+00, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us - -if.end.i.us: ; preds = %if.then.i.us, %pregion_for_entry.entry.i.us.preheader - br i1 %cmp4.i.us.1, label %if.then.i.us.1, label %if.end.i.us.1 - -pregion_for_end.i: ; preds = %if.then.i.us.31, %if.end.i.us.30, %pregion_for_entry.pregion_for_init.i - %55 = add nuw nsw i64 %_local_id_y.0, 1 - %exitcond33.not = icmp eq i64 %55, 8 - br i1 %exitcond33.not, label %mm3_kernel1.exit.loopexit54, label %pregion_for_entry.pregion_for_init.i, !llvm.loop !21 - -mm3_kernel1.exit.loopexit: ; preds = %if.end.i.us.us.7.1 - br label %mm3_kernel1.exit - -mm3_kernel1.exit.loopexit54: ; preds = %pregion_for_end.i - br label %mm3_kernel1.exit - -mm3_kernel1.exit: ; preds = %pregion_for_end.i.us.6, %mm3_kernel1.exit.loopexit54, %mm3_kernel1.exit.loopexit - ret void - -pregion_for_entry.entry.i.us.us.1: ; preds = %if.end.i.us.us.1.1, %pregion_for_entry.entry.i.us.us.1.preheader - %_local_id_x.0.us.us.1 = phi i64 [ %193, %if.end.i.us.us.1.1 ], [ 0, %pregion_for_entry.entry.i.us.us.1.preheader ] - %add1.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.0.us.us.1, %mul.i.i - %conv.i.us.us.1 = trunc i64 %add1.i.i.us.us.1 to i32 - %cmp4.i.us.us.1 = icmp slt i32 %conv.i.us.us.1, %4 - br i1 %cmp4.i.us.us.1, label %if.then.i.us.us.1, label %if.end.i.us.us.1 - -if.then.i.us.us.1: ; preds = %pregion_for_entry.entry.i.us.us.1 - %add.i.us.us.1 = add nsw i32 %mul.i.us.1, %conv.i.us.us.1 - %idxprom.i.us.us.1 = sext i32 %add.i.us.us.1 to i64 - %arrayidx.i.us.us.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.1 = shl i64 %add1.i.i.us.us.1, 32 - %56 = ashr exact i64 %sext.i.us.us.1, 32 - br label %for.body.i.us.us.1 - -for.body.i.us.us.1: ; preds = %for.body.i.us.us.1, %if.then.i.us.us.1 - %indvars.iv.next.i3.us.us.1 = phi i64 [ %indvars.iv.next.i.us.us.1, %for.body.i.us.us.1 ], [ 0, %if.then.i.us.us.1 ] - %57 = phi float [ %63, %for.body.i.us.us.1 ], [ 0.000000e+00, %if.then.i.us.us.1 ] - %58 = add nsw i64 %indvars.iv.next.i3.us.us.1, %45 - %arrayidx11.i.us.us.1 = getelementptr inbounds float, float* %0, i64 %58 - %59 = load float, float* %arrayidx11.i.us.us.1, align 4, !tbaa !12 - %60 = mul nsw i64 %indvars.iv.next.i3.us.us.1, %11 - %61 = add nsw i64 %60, %56 - %arrayidx15.i.us.us.1 = getelementptr inbounds float, float* %1, i64 %61 - %62 = load float, float* %arrayidx15.i.us.us.1, align 4, !tbaa !12 - %63 = tail call float @llvm.fmuladd.f32(float %59, float %62, float %57) #2 - store float %63, float* %arrayidx.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.1, 1 - %exitcond.not.i.us.us.1 = icmp eq i64 %indvars.iv.next.i.us.us.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.1, label %if.end.i.us.us.1.loopexit, label %for.body.i.us.us.1, !llvm.loop !19 - -if.end.i.us.us.1.loopexit: ; preds = %for.body.i.us.us.1 - br label %if.end.i.us.us.1 - -if.end.i.us.us.1: ; preds = %if.end.i.us.us.1.loopexit, %pregion_for_entry.entry.i.us.us.1 - %64 = or i64 %_local_id_x.0.us.us.1, 1 - %add1.i.i.us.us.1.1 = add nuw nsw i64 %64, %mul.i.i - %conv.i.us.us.1.1 = trunc i64 %add1.i.i.us.us.1.1 to i32 - %cmp4.i.us.us.1.1 = icmp slt i32 %conv.i.us.us.1.1, %4 - br i1 %cmp4.i.us.us.1.1, label %if.then.i.us.us.1.1, label %if.end.i.us.us.1.1 - -pregion_for_end.i.us.1.loopexit: ; preds = %if.end.i.us.us.1.1 - br label %pregion_for_end.i.us.1 - -pregion_for_end.i.us.1: ; preds = %pregion_for_end.i.us.1.loopexit, %pregion_for_end.i.us - %65 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.2 = or i32 %65, 2 - %cmp.i.us.2 = icmp slt i32 %conv2.i.us.2, %3 - %mul.i.us.2 = mul nsw i32 %conv2.i.us.2, %4 - %mul8.i.us.2 = mul nsw i32 %conv2.i.us.2, %5 - %66 = sext i32 %mul8.i.us.2 to i64 - br i1 %cmp.i.us.2, label %pregion_for_entry.entry.i.us.us.2.preheader, label %pregion_for_end.i.us.2 - -pregion_for_entry.entry.i.us.us.2.preheader: ; preds = %pregion_for_end.i.us.1 - br label %pregion_for_entry.entry.i.us.us.2 - -pregion_for_entry.entry.i.us.us.2: ; preds = %if.end.i.us.us.2.1, %pregion_for_entry.entry.i.us.us.2.preheader - %_local_id_x.0.us.us.2 = phi i64 [ %184, %if.end.i.us.us.2.1 ], [ 0, %pregion_for_entry.entry.i.us.us.2.preheader ] - %add1.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.0.us.us.2, %mul.i.i - %conv.i.us.us.2 = trunc i64 %add1.i.i.us.us.2 to i32 - %cmp4.i.us.us.2 = icmp slt i32 %conv.i.us.us.2, %4 - br i1 %cmp4.i.us.us.2, label %if.then.i.us.us.2, label %if.end.i.us.us.2 - -if.then.i.us.us.2: ; preds = %pregion_for_entry.entry.i.us.us.2 - %add.i.us.us.2 = add nsw i32 %mul.i.us.2, %conv.i.us.us.2 - %idxprom.i.us.us.2 = sext i32 %add.i.us.us.2 to i64 - %arrayidx.i.us.us.2 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.2 - store float 0.000000e+00, float* %arrayidx.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.2 = shl i64 %add1.i.i.us.us.2, 32 - %67 = ashr exact i64 %sext.i.us.us.2, 32 - br label %for.body.i.us.us.2 - -for.body.i.us.us.2: ; preds = %for.body.i.us.us.2, %if.then.i.us.us.2 - %indvars.iv.next.i3.us.us.2 = phi i64 [ %indvars.iv.next.i.us.us.2, %for.body.i.us.us.2 ], [ 0, %if.then.i.us.us.2 ] - %68 = phi float [ %74, %for.body.i.us.us.2 ], [ 0.000000e+00, %if.then.i.us.us.2 ] - %69 = add nsw i64 %indvars.iv.next.i3.us.us.2, %66 - %arrayidx11.i.us.us.2 = getelementptr inbounds float, float* %0, i64 %69 - %70 = load float, float* %arrayidx11.i.us.us.2, align 4, !tbaa !12 - %71 = mul nsw i64 %indvars.iv.next.i3.us.us.2, %11 - %72 = add nsw i64 %71, %67 - %arrayidx15.i.us.us.2 = getelementptr inbounds float, float* %1, i64 %72 - %73 = load float, float* %arrayidx15.i.us.us.2, align 4, !tbaa !12 - %74 = tail call float @llvm.fmuladd.f32(float %70, float %73, float %68) #2 - store float %74, float* %arrayidx.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.2 = add nuw nsw i64 %indvars.iv.next.i3.us.us.2, 1 - %exitcond.not.i.us.us.2 = icmp eq i64 %indvars.iv.next.i.us.us.2, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.2, label %if.end.i.us.us.2.loopexit, label %for.body.i.us.us.2, !llvm.loop !19 - -if.end.i.us.us.2.loopexit: ; preds = %for.body.i.us.us.2 - br label %if.end.i.us.us.2 - -if.end.i.us.us.2: ; preds = %if.end.i.us.us.2.loopexit, %pregion_for_entry.entry.i.us.us.2 - %75 = or i64 %_local_id_x.0.us.us.2, 1 - %add1.i.i.us.us.2.1 = add nuw nsw i64 %75, %mul.i.i - %conv.i.us.us.2.1 = trunc i64 %add1.i.i.us.us.2.1 to i32 - %cmp4.i.us.us.2.1 = icmp slt i32 %conv.i.us.us.2.1, %4 - br i1 %cmp4.i.us.us.2.1, label %if.then.i.us.us.2.1, label %if.end.i.us.us.2.1 - -pregion_for_end.i.us.2.loopexit: ; preds = %if.end.i.us.us.2.1 - br label %pregion_for_end.i.us.2 - -pregion_for_end.i.us.2: ; preds = %pregion_for_end.i.us.2.loopexit, %pregion_for_end.i.us.1 - %76 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.3 = or i32 %76, 3 - %cmp.i.us.3 = icmp slt i32 %conv2.i.us.3, %3 - %mul.i.us.3 = mul nsw i32 %conv2.i.us.3, %4 - %mul8.i.us.3 = mul nsw i32 %conv2.i.us.3, %5 - %77 = sext i32 %mul8.i.us.3 to i64 - br i1 %cmp.i.us.3, label %pregion_for_entry.entry.i.us.us.3.preheader, label %pregion_for_end.i.us.3 - -pregion_for_entry.entry.i.us.us.3.preheader: ; preds = %pregion_for_end.i.us.2 - br label %pregion_for_entry.entry.i.us.us.3 - -pregion_for_entry.entry.i.us.us.3: ; preds = %if.end.i.us.us.3.1, %pregion_for_entry.entry.i.us.us.3.preheader - %_local_id_x.0.us.us.3 = phi i64 [ %175, %if.end.i.us.us.3.1 ], [ 0, %pregion_for_entry.entry.i.us.us.3.preheader ] - %add1.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.0.us.us.3, %mul.i.i - %conv.i.us.us.3 = trunc i64 %add1.i.i.us.us.3 to i32 - %cmp4.i.us.us.3 = icmp slt i32 %conv.i.us.us.3, %4 - br i1 %cmp4.i.us.us.3, label %if.then.i.us.us.3, label %if.end.i.us.us.3 - -if.then.i.us.us.3: ; preds = %pregion_for_entry.entry.i.us.us.3 - %add.i.us.us.3 = add nsw i32 %mul.i.us.3, %conv.i.us.us.3 - %idxprom.i.us.us.3 = sext i32 %add.i.us.us.3 to i64 - %arrayidx.i.us.us.3 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.3 - store float 0.000000e+00, float* %arrayidx.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.3 = shl i64 %add1.i.i.us.us.3, 32 - %78 = ashr exact i64 %sext.i.us.us.3, 32 - br label %for.body.i.us.us.3 - -for.body.i.us.us.3: ; preds = %for.body.i.us.us.3, %if.then.i.us.us.3 - %indvars.iv.next.i3.us.us.3 = phi i64 [ %indvars.iv.next.i.us.us.3, %for.body.i.us.us.3 ], [ 0, %if.then.i.us.us.3 ] - %79 = phi float [ %85, %for.body.i.us.us.3 ], [ 0.000000e+00, %if.then.i.us.us.3 ] - %80 = add nsw i64 %indvars.iv.next.i3.us.us.3, %77 - %arrayidx11.i.us.us.3 = getelementptr inbounds float, float* %0, i64 %80 - %81 = load float, float* %arrayidx11.i.us.us.3, align 4, !tbaa !12 - %82 = mul nsw i64 %indvars.iv.next.i3.us.us.3, %11 - %83 = add nsw i64 %82, %78 - %arrayidx15.i.us.us.3 = getelementptr inbounds float, float* %1, i64 %83 - %84 = load float, float* %arrayidx15.i.us.us.3, align 4, !tbaa !12 - %85 = tail call float @llvm.fmuladd.f32(float %81, float %84, float %79) #2 - store float %85, float* %arrayidx.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.3 = add nuw nsw i64 %indvars.iv.next.i3.us.us.3, 1 - %exitcond.not.i.us.us.3 = icmp eq i64 %indvars.iv.next.i.us.us.3, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.3, label %if.end.i.us.us.3.loopexit, label %for.body.i.us.us.3, !llvm.loop !19 - -if.end.i.us.us.3.loopexit: ; preds = %for.body.i.us.us.3 - br label %if.end.i.us.us.3 - -if.end.i.us.us.3: ; preds = %if.end.i.us.us.3.loopexit, %pregion_for_entry.entry.i.us.us.3 - %86 = or i64 %_local_id_x.0.us.us.3, 1 - %add1.i.i.us.us.3.1 = add nuw nsw i64 %86, %mul.i.i - %conv.i.us.us.3.1 = trunc i64 %add1.i.i.us.us.3.1 to i32 - %cmp4.i.us.us.3.1 = icmp slt i32 %conv.i.us.us.3.1, %4 - br i1 %cmp4.i.us.us.3.1, label %if.then.i.us.us.3.1, label %if.end.i.us.us.3.1 - -pregion_for_end.i.us.3.loopexit: ; preds = %if.end.i.us.us.3.1 - br label %pregion_for_end.i.us.3 - -pregion_for_end.i.us.3: ; preds = %pregion_for_end.i.us.3.loopexit, %pregion_for_end.i.us.2 - %87 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.4 = or i32 %87, 4 - %cmp.i.us.4 = icmp slt i32 %conv2.i.us.4, %3 - %mul.i.us.4 = mul nsw i32 %conv2.i.us.4, %4 - %mul8.i.us.4 = mul nsw i32 %conv2.i.us.4, %5 - %88 = sext i32 %mul8.i.us.4 to i64 - br i1 %cmp.i.us.4, label %pregion_for_entry.entry.i.us.us.4.preheader, label %pregion_for_end.i.us.4 - -pregion_for_entry.entry.i.us.us.4.preheader: ; preds = %pregion_for_end.i.us.3 - br label %pregion_for_entry.entry.i.us.us.4 - -pregion_for_entry.entry.i.us.us.4: ; preds = %if.end.i.us.us.4.1, %pregion_for_entry.entry.i.us.us.4.preheader - %_local_id_x.0.us.us.4 = phi i64 [ %166, %if.end.i.us.us.4.1 ], [ 0, %pregion_for_entry.entry.i.us.us.4.preheader ] - %add1.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.0.us.us.4, %mul.i.i - %conv.i.us.us.4 = trunc i64 %add1.i.i.us.us.4 to i32 - %cmp4.i.us.us.4 = icmp slt i32 %conv.i.us.us.4, %4 - br i1 %cmp4.i.us.us.4, label %if.then.i.us.us.4, label %if.end.i.us.us.4 - -if.then.i.us.us.4: ; preds = %pregion_for_entry.entry.i.us.us.4 - %add.i.us.us.4 = add nsw i32 %mul.i.us.4, %conv.i.us.us.4 - %idxprom.i.us.us.4 = sext i32 %add.i.us.us.4 to i64 - %arrayidx.i.us.us.4 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.4 - store float 0.000000e+00, float* %arrayidx.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.4 = shl i64 %add1.i.i.us.us.4, 32 - %89 = ashr exact i64 %sext.i.us.us.4, 32 - br label %for.body.i.us.us.4 - -for.body.i.us.us.4: ; preds = %for.body.i.us.us.4, %if.then.i.us.us.4 - %indvars.iv.next.i3.us.us.4 = phi i64 [ %indvars.iv.next.i.us.us.4, %for.body.i.us.us.4 ], [ 0, %if.then.i.us.us.4 ] - %90 = phi float [ %96, %for.body.i.us.us.4 ], [ 0.000000e+00, %if.then.i.us.us.4 ] - %91 = add nsw i64 %indvars.iv.next.i3.us.us.4, %88 - %arrayidx11.i.us.us.4 = getelementptr inbounds float, float* %0, i64 %91 - %92 = load float, float* %arrayidx11.i.us.us.4, align 4, !tbaa !12 - %93 = mul nsw i64 %indvars.iv.next.i3.us.us.4, %11 - %94 = add nsw i64 %93, %89 - %arrayidx15.i.us.us.4 = getelementptr inbounds float, float* %1, i64 %94 - %95 = load float, float* %arrayidx15.i.us.us.4, align 4, !tbaa !12 - %96 = tail call float @llvm.fmuladd.f32(float %92, float %95, float %90) #2 - store float %96, float* %arrayidx.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.4 = add nuw nsw i64 %indvars.iv.next.i3.us.us.4, 1 - %exitcond.not.i.us.us.4 = icmp eq i64 %indvars.iv.next.i.us.us.4, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.4, label %if.end.i.us.us.4.loopexit, label %for.body.i.us.us.4, !llvm.loop !19 - -if.end.i.us.us.4.loopexit: ; preds = %for.body.i.us.us.4 - br label %if.end.i.us.us.4 - -if.end.i.us.us.4: ; preds = %if.end.i.us.us.4.loopexit, %pregion_for_entry.entry.i.us.us.4 - %97 = or i64 %_local_id_x.0.us.us.4, 1 - %add1.i.i.us.us.4.1 = add nuw nsw i64 %97, %mul.i.i - %conv.i.us.us.4.1 = trunc i64 %add1.i.i.us.us.4.1 to i32 - %cmp4.i.us.us.4.1 = icmp slt i32 %conv.i.us.us.4.1, %4 - br i1 %cmp4.i.us.us.4.1, label %if.then.i.us.us.4.1, label %if.end.i.us.us.4.1 - -pregion_for_end.i.us.4.loopexit: ; preds = %if.end.i.us.us.4.1 - br label %pregion_for_end.i.us.4 - -pregion_for_end.i.us.4: ; preds = %pregion_for_end.i.us.4.loopexit, %pregion_for_end.i.us.3 - %98 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.5 = or i32 %98, 5 - %cmp.i.us.5 = icmp slt i32 %conv2.i.us.5, %3 - %mul.i.us.5 = mul nsw i32 %conv2.i.us.5, %4 - %mul8.i.us.5 = mul nsw i32 %conv2.i.us.5, %5 - %99 = sext i32 %mul8.i.us.5 to i64 - br i1 %cmp.i.us.5, label %pregion_for_entry.entry.i.us.us.5.preheader, label %pregion_for_end.i.us.5 - -pregion_for_entry.entry.i.us.us.5.preheader: ; preds = %pregion_for_end.i.us.4 - br label %pregion_for_entry.entry.i.us.us.5 - -pregion_for_entry.entry.i.us.us.5: ; preds = %if.end.i.us.us.5.1, %pregion_for_entry.entry.i.us.us.5.preheader - %_local_id_x.0.us.us.5 = phi i64 [ %157, %if.end.i.us.us.5.1 ], [ 0, %pregion_for_entry.entry.i.us.us.5.preheader ] - %add1.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.0.us.us.5, %mul.i.i - %conv.i.us.us.5 = trunc i64 %add1.i.i.us.us.5 to i32 - %cmp4.i.us.us.5 = icmp slt i32 %conv.i.us.us.5, %4 - br i1 %cmp4.i.us.us.5, label %if.then.i.us.us.5, label %if.end.i.us.us.5 - -if.then.i.us.us.5: ; preds = %pregion_for_entry.entry.i.us.us.5 - %add.i.us.us.5 = add nsw i32 %mul.i.us.5, %conv.i.us.us.5 - %idxprom.i.us.us.5 = sext i32 %add.i.us.us.5 to i64 - %arrayidx.i.us.us.5 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.5 - store float 0.000000e+00, float* %arrayidx.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.5 = shl i64 %add1.i.i.us.us.5, 32 - %100 = ashr exact i64 %sext.i.us.us.5, 32 - br label %for.body.i.us.us.5 - -for.body.i.us.us.5: ; preds = %for.body.i.us.us.5, %if.then.i.us.us.5 - %indvars.iv.next.i3.us.us.5 = phi i64 [ %indvars.iv.next.i.us.us.5, %for.body.i.us.us.5 ], [ 0, %if.then.i.us.us.5 ] - %101 = phi float [ %107, %for.body.i.us.us.5 ], [ 0.000000e+00, %if.then.i.us.us.5 ] - %102 = add nsw i64 %indvars.iv.next.i3.us.us.5, %99 - %arrayidx11.i.us.us.5 = getelementptr inbounds float, float* %0, i64 %102 - %103 = load float, float* %arrayidx11.i.us.us.5, align 4, !tbaa !12 - %104 = mul nsw i64 %indvars.iv.next.i3.us.us.5, %11 - %105 = add nsw i64 %104, %100 - %arrayidx15.i.us.us.5 = getelementptr inbounds float, float* %1, i64 %105 - %106 = load float, float* %arrayidx15.i.us.us.5, align 4, !tbaa !12 - %107 = tail call float @llvm.fmuladd.f32(float %103, float %106, float %101) #2 - store float %107, float* %arrayidx.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.5 = add nuw nsw i64 %indvars.iv.next.i3.us.us.5, 1 - %exitcond.not.i.us.us.5 = icmp eq i64 %indvars.iv.next.i.us.us.5, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.5, label %if.end.i.us.us.5.loopexit, label %for.body.i.us.us.5, !llvm.loop !19 - -if.end.i.us.us.5.loopexit: ; preds = %for.body.i.us.us.5 - br label %if.end.i.us.us.5 - -if.end.i.us.us.5: ; preds = %if.end.i.us.us.5.loopexit, %pregion_for_entry.entry.i.us.us.5 - %108 = or i64 %_local_id_x.0.us.us.5, 1 - %add1.i.i.us.us.5.1 = add nuw nsw i64 %108, %mul.i.i - %conv.i.us.us.5.1 = trunc i64 %add1.i.i.us.us.5.1 to i32 - %cmp4.i.us.us.5.1 = icmp slt i32 %conv.i.us.us.5.1, %4 - br i1 %cmp4.i.us.us.5.1, label %if.then.i.us.us.5.1, label %if.end.i.us.us.5.1 - -pregion_for_end.i.us.5.loopexit: ; preds = %if.end.i.us.us.5.1 - br label %pregion_for_end.i.us.5 - -pregion_for_end.i.us.5: ; preds = %pregion_for_end.i.us.5.loopexit, %pregion_for_end.i.us.4 - %109 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.6 = or i32 %109, 6 - %cmp.i.us.6 = icmp slt i32 %conv2.i.us.6, %3 - %mul.i.us.6 = mul nsw i32 %conv2.i.us.6, %4 - %mul8.i.us.6 = mul nsw i32 %conv2.i.us.6, %5 - %110 = sext i32 %mul8.i.us.6 to i64 - br i1 %cmp.i.us.6, label %pregion_for_entry.entry.i.us.us.6.preheader, label %pregion_for_end.i.us.6 - -pregion_for_entry.entry.i.us.us.6.preheader: ; preds = %pregion_for_end.i.us.5 - br label %pregion_for_entry.entry.i.us.us.6 - -pregion_for_entry.entry.i.us.us.6: ; preds = %if.end.i.us.us.6.1, %pregion_for_entry.entry.i.us.us.6.preheader - %_local_id_x.0.us.us.6 = phi i64 [ %148, %if.end.i.us.us.6.1 ], [ 0, %pregion_for_entry.entry.i.us.us.6.preheader ] - %add1.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.0.us.us.6, %mul.i.i - %conv.i.us.us.6 = trunc i64 %add1.i.i.us.us.6 to i32 - %cmp4.i.us.us.6 = icmp slt i32 %conv.i.us.us.6, %4 - br i1 %cmp4.i.us.us.6, label %if.then.i.us.us.6, label %if.end.i.us.us.6 - -if.then.i.us.us.6: ; preds = %pregion_for_entry.entry.i.us.us.6 - %add.i.us.us.6 = add nsw i32 %mul.i.us.6, %conv.i.us.us.6 - %idxprom.i.us.us.6 = sext i32 %add.i.us.us.6 to i64 - %arrayidx.i.us.us.6 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.6 - store float 0.000000e+00, float* %arrayidx.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.6 = shl i64 %add1.i.i.us.us.6, 32 - %111 = ashr exact i64 %sext.i.us.us.6, 32 - br label %for.body.i.us.us.6 - -for.body.i.us.us.6: ; preds = %for.body.i.us.us.6, %if.then.i.us.us.6 - %indvars.iv.next.i3.us.us.6 = phi i64 [ %indvars.iv.next.i.us.us.6, %for.body.i.us.us.6 ], [ 0, %if.then.i.us.us.6 ] - %112 = phi float [ %118, %for.body.i.us.us.6 ], [ 0.000000e+00, %if.then.i.us.us.6 ] - %113 = add nsw i64 %indvars.iv.next.i3.us.us.6, %110 - %arrayidx11.i.us.us.6 = getelementptr inbounds float, float* %0, i64 %113 - %114 = load float, float* %arrayidx11.i.us.us.6, align 4, !tbaa !12 - %115 = mul nsw i64 %indvars.iv.next.i3.us.us.6, %11 - %116 = add nsw i64 %115, %111 - %arrayidx15.i.us.us.6 = getelementptr inbounds float, float* %1, i64 %116 - %117 = load float, float* %arrayidx15.i.us.us.6, align 4, !tbaa !12 - %118 = tail call float @llvm.fmuladd.f32(float %114, float %117, float %112) #2 - store float %118, float* %arrayidx.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.6 = add nuw nsw i64 %indvars.iv.next.i3.us.us.6, 1 - %exitcond.not.i.us.us.6 = icmp eq i64 %indvars.iv.next.i.us.us.6, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.6, label %if.end.i.us.us.6.loopexit, label %for.body.i.us.us.6, !llvm.loop !19 - -if.end.i.us.us.6.loopexit: ; preds = %for.body.i.us.us.6 - br label %if.end.i.us.us.6 - -if.end.i.us.us.6: ; preds = %if.end.i.us.us.6.loopexit, %pregion_for_entry.entry.i.us.us.6 - %119 = or i64 %_local_id_x.0.us.us.6, 1 - %add1.i.i.us.us.6.1 = add nuw nsw i64 %119, %mul.i.i - %conv.i.us.us.6.1 = trunc i64 %add1.i.i.us.us.6.1 to i32 - %cmp4.i.us.us.6.1 = icmp slt i32 %conv.i.us.us.6.1, %4 - br i1 %cmp4.i.us.us.6.1, label %if.then.i.us.us.6.1, label %if.end.i.us.us.6.1 - -pregion_for_end.i.us.6.loopexit: ; preds = %if.end.i.us.us.6.1 - br label %pregion_for_end.i.us.6 - -pregion_for_end.i.us.6: ; preds = %pregion_for_end.i.us.6.loopexit, %pregion_for_end.i.us.5 - %120 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.7 = or i32 %120, 7 - %cmp.i.us.7 = icmp slt i32 %conv2.i.us.7, %3 - %mul.i.us.7 = mul nsw i32 %conv2.i.us.7, %4 - %mul8.i.us.7 = mul nsw i32 %conv2.i.us.7, %5 - %121 = sext i32 %mul8.i.us.7 to i64 - br i1 %cmp.i.us.7, label %pregion_for_entry.entry.i.us.us.7.preheader, label %mm3_kernel1.exit - -pregion_for_entry.entry.i.us.us.7.preheader: ; preds = %pregion_for_end.i.us.6 - br label %pregion_for_entry.entry.i.us.us.7 - -pregion_for_entry.entry.i.us.us.7: ; preds = %if.end.i.us.us.7.1, %pregion_for_entry.entry.i.us.us.7.preheader - %_local_id_x.0.us.us.7 = phi i64 [ %139, %if.end.i.us.us.7.1 ], [ 0, %pregion_for_entry.entry.i.us.us.7.preheader ] - %add1.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.0.us.us.7, %mul.i.i - %conv.i.us.us.7 = trunc i64 %add1.i.i.us.us.7 to i32 - %cmp4.i.us.us.7 = icmp slt i32 %conv.i.us.us.7, %4 - br i1 %cmp4.i.us.us.7, label %if.then.i.us.us.7, label %if.end.i.us.us.7 - -if.then.i.us.us.7: ; preds = %pregion_for_entry.entry.i.us.us.7 - %add.i.us.us.7 = add nsw i32 %mul.i.us.7, %conv.i.us.us.7 - %idxprom.i.us.us.7 = sext i32 %add.i.us.us.7 to i64 - %arrayidx.i.us.us.7 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.7 - store float 0.000000e+00, float* %arrayidx.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.7 = shl i64 %add1.i.i.us.us.7, 32 - %122 = ashr exact i64 %sext.i.us.us.7, 32 - br label %for.body.i.us.us.7 - -for.body.i.us.us.7: ; preds = %for.body.i.us.us.7, %if.then.i.us.us.7 - %indvars.iv.next.i3.us.us.7 = phi i64 [ %indvars.iv.next.i.us.us.7, %for.body.i.us.us.7 ], [ 0, %if.then.i.us.us.7 ] - %123 = phi float [ %129, %for.body.i.us.us.7 ], [ 0.000000e+00, %if.then.i.us.us.7 ] - %124 = add nsw i64 %indvars.iv.next.i3.us.us.7, %121 - %arrayidx11.i.us.us.7 = getelementptr inbounds float, float* %0, i64 %124 - %125 = load float, float* %arrayidx11.i.us.us.7, align 4, !tbaa !12 - %126 = mul nsw i64 %indvars.iv.next.i3.us.us.7, %11 - %127 = add nsw i64 %126, %122 - %arrayidx15.i.us.us.7 = getelementptr inbounds float, float* %1, i64 %127 - %128 = load float, float* %arrayidx15.i.us.us.7, align 4, !tbaa !12 - %129 = tail call float @llvm.fmuladd.f32(float %125, float %128, float %123) #2 - store float %129, float* %arrayidx.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.7 = add nuw nsw i64 %indvars.iv.next.i3.us.us.7, 1 - %exitcond.not.i.us.us.7 = icmp eq i64 %indvars.iv.next.i.us.us.7, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.7, label %if.end.i.us.us.7.loopexit, label %for.body.i.us.us.7, !llvm.loop !19 - -if.end.i.us.us.7.loopexit: ; preds = %for.body.i.us.us.7 - br label %if.end.i.us.us.7 - -if.end.i.us.us.7: ; preds = %if.end.i.us.us.7.loopexit, %pregion_for_entry.entry.i.us.us.7 - %130 = or i64 %_local_id_x.0.us.us.7, 1 - %add1.i.i.us.us.7.1 = add nuw nsw i64 %130, %mul.i.i - %conv.i.us.us.7.1 = trunc i64 %add1.i.i.us.us.7.1 to i32 - %cmp4.i.us.us.7.1 = icmp slt i32 %conv.i.us.us.7.1, %4 - br i1 %cmp4.i.us.us.7.1, label %if.then.i.us.us.7.1, label %if.end.i.us.us.7.1 - -if.then.i.us.1: ; preds = %if.end.i.us - %add.i.us.1 = add nsw i32 %mul.i, %conv.i.us.1 - %idxprom.i.us.1 = sext i32 %add.i.us.1 to i64 - %arrayidx.i.us.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.1 - store float 0.000000e+00, float* %arrayidx.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.1 - -if.end.i.us.1: ; preds = %if.then.i.us.1, %if.end.i.us - br i1 %cmp4.i.us.2, label %if.then.i.us.2, label %if.end.i.us.2 - -if.then.i.us.2: ; preds = %if.end.i.us.1 - %add.i.us.2 = add nsw i32 %mul.i, %conv.i.us.2 - %idxprom.i.us.2 = sext i32 %add.i.us.2 to i64 - %arrayidx.i.us.2 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.2 - store float 0.000000e+00, float* %arrayidx.i.us.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.2 - -if.end.i.us.2: ; preds = %if.then.i.us.2, %if.end.i.us.1 - br i1 %cmp4.i.us.3, label %if.then.i.us.3, label %if.end.i.us.3 - -if.then.i.us.3: ; preds = %if.end.i.us.2 - %add.i.us.3 = add nsw i32 %mul.i, %conv.i.us.3 - %idxprom.i.us.3 = sext i32 %add.i.us.3 to i64 - %arrayidx.i.us.3 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.3 - store float 0.000000e+00, float* %arrayidx.i.us.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.3 - -if.end.i.us.3: ; preds = %if.then.i.us.3, %if.end.i.us.2 - br i1 %cmp4.i.us.4, label %if.then.i.us.4, label %if.end.i.us.4 - -if.then.i.us.4: ; preds = %if.end.i.us.3 - %add.i.us.4 = add nsw i32 %mul.i, %conv.i.us.4 - %idxprom.i.us.4 = sext i32 %add.i.us.4 to i64 - %arrayidx.i.us.4 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.4 - store float 0.000000e+00, float* %arrayidx.i.us.4, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.4 - -if.end.i.us.4: ; preds = %if.then.i.us.4, %if.end.i.us.3 - br i1 %cmp4.i.us.5, label %if.then.i.us.5, label %if.end.i.us.5 - -if.then.i.us.5: ; preds = %if.end.i.us.4 - %add.i.us.5 = add nsw i32 %mul.i, %conv.i.us.5 - %idxprom.i.us.5 = sext i32 %add.i.us.5 to i64 - %arrayidx.i.us.5 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.5 - store float 0.000000e+00, float* %arrayidx.i.us.5, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.5 - -if.end.i.us.5: ; preds = %if.then.i.us.5, %if.end.i.us.4 - br i1 %cmp4.i.us.6, label %if.then.i.us.6, label %if.end.i.us.6 - -if.then.i.us.6: ; preds = %if.end.i.us.5 - %add.i.us.6 = add nsw i32 %mul.i, %conv.i.us.6 - %idxprom.i.us.6 = sext i32 %add.i.us.6 to i64 - %arrayidx.i.us.6 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.6 - store float 0.000000e+00, float* %arrayidx.i.us.6, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.6 - -if.end.i.us.6: ; preds = %if.then.i.us.6, %if.end.i.us.5 - br i1 %cmp4.i.us.7, label %if.then.i.us.7, label %if.end.i.us.7 - -if.then.i.us.7: ; preds = %if.end.i.us.6 - %add.i.us.7 = add nsw i32 %mul.i, %conv.i.us.7 - %idxprom.i.us.7 = sext i32 %add.i.us.7 to i64 - %arrayidx.i.us.7 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.7 - store float 0.000000e+00, float* %arrayidx.i.us.7, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.7 - -if.end.i.us.7: ; preds = %if.then.i.us.7, %if.end.i.us.6 - br i1 %cmp4.i.us.8, label %if.then.i.us.8, label %if.end.i.us.8 - -if.then.i.us.8: ; preds = %if.end.i.us.7 - %add.i.us.8 = add nsw i32 %mul.i, %conv.i.us.8 - %idxprom.i.us.8 = sext i32 %add.i.us.8 to i64 - %arrayidx.i.us.8 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.8 - store float 0.000000e+00, float* %arrayidx.i.us.8, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.8 - -if.end.i.us.8: ; preds = %if.then.i.us.8, %if.end.i.us.7 - br i1 %cmp4.i.us.9, label %if.then.i.us.9, label %if.end.i.us.9 - -if.then.i.us.9: ; preds = %if.end.i.us.8 - %add.i.us.9 = add nsw i32 %mul.i, %conv.i.us.9 - %idxprom.i.us.9 = sext i32 %add.i.us.9 to i64 - %arrayidx.i.us.9 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.9 - store float 0.000000e+00, float* %arrayidx.i.us.9, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.9 - -if.end.i.us.9: ; preds = %if.then.i.us.9, %if.end.i.us.8 - br i1 %cmp4.i.us.10, label %if.then.i.us.10, label %if.end.i.us.10 - -if.then.i.us.10: ; preds = %if.end.i.us.9 - %add.i.us.10 = add nsw i32 %mul.i, %conv.i.us.10 - %idxprom.i.us.10 = sext i32 %add.i.us.10 to i64 - %arrayidx.i.us.10 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.10 - store float 0.000000e+00, float* %arrayidx.i.us.10, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.10 - -if.end.i.us.10: ; preds = %if.then.i.us.10, %if.end.i.us.9 - br i1 %cmp4.i.us.11, label %if.then.i.us.11, label %if.end.i.us.11 - -if.then.i.us.11: ; preds = %if.end.i.us.10 - %add.i.us.11 = add nsw i32 %mul.i, %conv.i.us.11 - %idxprom.i.us.11 = sext i32 %add.i.us.11 to i64 - %arrayidx.i.us.11 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.11 - store float 0.000000e+00, float* %arrayidx.i.us.11, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.11 - -if.end.i.us.11: ; preds = %if.then.i.us.11, %if.end.i.us.10 - br i1 %cmp4.i.us.12, label %if.then.i.us.12, label %if.end.i.us.12 - -if.then.i.us.12: ; preds = %if.end.i.us.11 - %add.i.us.12 = add nsw i32 %mul.i, %conv.i.us.12 - %idxprom.i.us.12 = sext i32 %add.i.us.12 to i64 - %arrayidx.i.us.12 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.12 - store float 0.000000e+00, float* %arrayidx.i.us.12, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.12 - -if.end.i.us.12: ; preds = %if.then.i.us.12, %if.end.i.us.11 - br i1 %cmp4.i.us.13, label %if.then.i.us.13, label %if.end.i.us.13 - -if.then.i.us.13: ; preds = %if.end.i.us.12 - %add.i.us.13 = add nsw i32 %mul.i, %conv.i.us.13 - %idxprom.i.us.13 = sext i32 %add.i.us.13 to i64 - %arrayidx.i.us.13 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.13 - store float 0.000000e+00, float* %arrayidx.i.us.13, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.13 - -if.end.i.us.13: ; preds = %if.then.i.us.13, %if.end.i.us.12 - br i1 %cmp4.i.us.14, label %if.then.i.us.14, label %if.end.i.us.14 - -if.then.i.us.14: ; preds = %if.end.i.us.13 - %add.i.us.14 = add nsw i32 %mul.i, %conv.i.us.14 - %idxprom.i.us.14 = sext i32 %add.i.us.14 to i64 - %arrayidx.i.us.14 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.14 - store float 0.000000e+00, float* %arrayidx.i.us.14, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.14 - -if.end.i.us.14: ; preds = %if.then.i.us.14, %if.end.i.us.13 - br i1 %cmp4.i.us.15, label %if.then.i.us.15, label %if.end.i.us.15 - -if.then.i.us.15: ; preds = %if.end.i.us.14 - %add.i.us.15 = add nsw i32 %mul.i, %conv.i.us.15 - %idxprom.i.us.15 = sext i32 %add.i.us.15 to i64 - %arrayidx.i.us.15 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.15 - store float 0.000000e+00, float* %arrayidx.i.us.15, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.15 - -if.end.i.us.15: ; preds = %if.then.i.us.15, %if.end.i.us.14 - br i1 %cmp4.i.us.16, label %if.then.i.us.16, label %if.end.i.us.16 - -if.then.i.us.16: ; preds = %if.end.i.us.15 - %add.i.us.16 = add nsw i32 %mul.i, %conv.i.us.16 - %idxprom.i.us.16 = sext i32 %add.i.us.16 to i64 - %arrayidx.i.us.16 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.16 - store float 0.000000e+00, float* %arrayidx.i.us.16, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.16 - -if.end.i.us.16: ; preds = %if.then.i.us.16, %if.end.i.us.15 - br i1 %cmp4.i.us.17, label %if.then.i.us.17, label %if.end.i.us.17 - -if.then.i.us.17: ; preds = %if.end.i.us.16 - %add.i.us.17 = add nsw i32 %mul.i, %conv.i.us.17 - %idxprom.i.us.17 = sext i32 %add.i.us.17 to i64 - %arrayidx.i.us.17 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.17 - store float 0.000000e+00, float* %arrayidx.i.us.17, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.17 - -if.end.i.us.17: ; preds = %if.then.i.us.17, %if.end.i.us.16 - br i1 %cmp4.i.us.18, label %if.then.i.us.18, label %if.end.i.us.18 - -if.then.i.us.18: ; preds = %if.end.i.us.17 - %add.i.us.18 = add nsw i32 %mul.i, %conv.i.us.18 - %idxprom.i.us.18 = sext i32 %add.i.us.18 to i64 - %arrayidx.i.us.18 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.18 - store float 0.000000e+00, float* %arrayidx.i.us.18, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.18 - -if.end.i.us.18: ; preds = %if.then.i.us.18, %if.end.i.us.17 - br i1 %cmp4.i.us.19, label %if.then.i.us.19, label %if.end.i.us.19 - -if.then.i.us.19: ; preds = %if.end.i.us.18 - %add.i.us.19 = add nsw i32 %mul.i, %conv.i.us.19 - %idxprom.i.us.19 = sext i32 %add.i.us.19 to i64 - %arrayidx.i.us.19 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.19 - store float 0.000000e+00, float* %arrayidx.i.us.19, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.19 - -if.end.i.us.19: ; preds = %if.then.i.us.19, %if.end.i.us.18 - br i1 %cmp4.i.us.20, label %if.then.i.us.20, label %if.end.i.us.20 - -if.then.i.us.20: ; preds = %if.end.i.us.19 - %add.i.us.20 = add nsw i32 %mul.i, %conv.i.us.20 - %idxprom.i.us.20 = sext i32 %add.i.us.20 to i64 - %arrayidx.i.us.20 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.20 - store float 0.000000e+00, float* %arrayidx.i.us.20, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.20 - -if.end.i.us.20: ; preds = %if.then.i.us.20, %if.end.i.us.19 - br i1 %cmp4.i.us.21, label %if.then.i.us.21, label %if.end.i.us.21 - -if.then.i.us.21: ; preds = %if.end.i.us.20 - %add.i.us.21 = add nsw i32 %mul.i, %conv.i.us.21 - %idxprom.i.us.21 = sext i32 %add.i.us.21 to i64 - %arrayidx.i.us.21 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.21 - store float 0.000000e+00, float* %arrayidx.i.us.21, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.21 - -if.end.i.us.21: ; preds = %if.then.i.us.21, %if.end.i.us.20 - br i1 %cmp4.i.us.22, label %if.then.i.us.22, label %if.end.i.us.22 - -if.then.i.us.22: ; preds = %if.end.i.us.21 - %add.i.us.22 = add nsw i32 %mul.i, %conv.i.us.22 - %idxprom.i.us.22 = sext i32 %add.i.us.22 to i64 - %arrayidx.i.us.22 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.22 - store float 0.000000e+00, float* %arrayidx.i.us.22, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.22 - -if.end.i.us.22: ; preds = %if.then.i.us.22, %if.end.i.us.21 - br i1 %cmp4.i.us.23, label %if.then.i.us.23, label %if.end.i.us.23 - -if.then.i.us.23: ; preds = %if.end.i.us.22 - %add.i.us.23 = add nsw i32 %mul.i, %conv.i.us.23 - %idxprom.i.us.23 = sext i32 %add.i.us.23 to i64 - %arrayidx.i.us.23 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.23 - store float 0.000000e+00, float* %arrayidx.i.us.23, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.23 - -if.end.i.us.23: ; preds = %if.then.i.us.23, %if.end.i.us.22 - br i1 %cmp4.i.us.24, label %if.then.i.us.24, label %if.end.i.us.24 - -if.then.i.us.24: ; preds = %if.end.i.us.23 - %add.i.us.24 = add nsw i32 %mul.i, %conv.i.us.24 - %idxprom.i.us.24 = sext i32 %add.i.us.24 to i64 - %arrayidx.i.us.24 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.24 - store float 0.000000e+00, float* %arrayidx.i.us.24, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.24 - -if.end.i.us.24: ; preds = %if.then.i.us.24, %if.end.i.us.23 - br i1 %cmp4.i.us.25, label %if.then.i.us.25, label %if.end.i.us.25 - -if.then.i.us.25: ; preds = %if.end.i.us.24 - %add.i.us.25 = add nsw i32 %mul.i, %conv.i.us.25 - %idxprom.i.us.25 = sext i32 %add.i.us.25 to i64 - %arrayidx.i.us.25 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.25 - store float 0.000000e+00, float* %arrayidx.i.us.25, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.25 - -if.end.i.us.25: ; preds = %if.then.i.us.25, %if.end.i.us.24 - br i1 %cmp4.i.us.26, label %if.then.i.us.26, label %if.end.i.us.26 - -if.then.i.us.26: ; preds = %if.end.i.us.25 - %add.i.us.26 = add nsw i32 %mul.i, %conv.i.us.26 - %idxprom.i.us.26 = sext i32 %add.i.us.26 to i64 - %arrayidx.i.us.26 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.26 - store float 0.000000e+00, float* %arrayidx.i.us.26, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.26 - -if.end.i.us.26: ; preds = %if.then.i.us.26, %if.end.i.us.25 - br i1 %cmp4.i.us.27, label %if.then.i.us.27, label %if.end.i.us.27 - -if.then.i.us.27: ; preds = %if.end.i.us.26 - %add.i.us.27 = add nsw i32 %mul.i, %conv.i.us.27 - %idxprom.i.us.27 = sext i32 %add.i.us.27 to i64 - %arrayidx.i.us.27 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.27 - store float 0.000000e+00, float* %arrayidx.i.us.27, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.27 - -if.end.i.us.27: ; preds = %if.then.i.us.27, %if.end.i.us.26 - br i1 %cmp4.i.us.28, label %if.then.i.us.28, label %if.end.i.us.28 - -if.then.i.us.28: ; preds = %if.end.i.us.27 - %add.i.us.28 = add nsw i32 %mul.i, %conv.i.us.28 - %idxprom.i.us.28 = sext i32 %add.i.us.28 to i64 - %arrayidx.i.us.28 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.28 - store float 0.000000e+00, float* %arrayidx.i.us.28, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.28 - -if.end.i.us.28: ; preds = %if.then.i.us.28, %if.end.i.us.27 - br i1 %cmp4.i.us.29, label %if.then.i.us.29, label %if.end.i.us.29 - -if.then.i.us.29: ; preds = %if.end.i.us.28 - %add.i.us.29 = add nsw i32 %mul.i, %conv.i.us.29 - %idxprom.i.us.29 = sext i32 %add.i.us.29 to i64 - %arrayidx.i.us.29 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.29 - store float 0.000000e+00, float* %arrayidx.i.us.29, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.29 - -if.end.i.us.29: ; preds = %if.then.i.us.29, %if.end.i.us.28 - br i1 %cmp4.i.us.30, label %if.then.i.us.30, label %if.end.i.us.30 - -if.then.i.us.30: ; preds = %if.end.i.us.29 - %add.i.us.30 = add nsw i32 %mul.i, %conv.i.us.30 - %idxprom.i.us.30 = sext i32 %add.i.us.30 to i64 - %arrayidx.i.us.30 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.30 - store float 0.000000e+00, float* %arrayidx.i.us.30, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.30 - -if.end.i.us.30: ; preds = %if.then.i.us.30, %if.end.i.us.29 - br i1 %cmp4.i.us.31, label %if.then.i.us.31, label %pregion_for_end.i - -if.then.i.us.31: ; preds = %if.end.i.us.30 - %add.i.us.31 = add nsw i32 %mul.i, %conv.i.us.31 - %idxprom.i.us.31 = sext i32 %add.i.us.31 to i64 - %arrayidx.i.us.31 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.31 - store float 0.000000e+00, float* %arrayidx.i.us.31, align 4, !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i - -if.then.i.us.us.7.1: ; preds = %if.end.i.us.us.7 - %add.i.us.us.7.1 = add nsw i32 %mul.i.us.7, %conv.i.us.us.7.1 - %idxprom.i.us.us.7.1 = sext i32 %add.i.us.us.7.1 to i64 - %arrayidx.i.us.us.7.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.7.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.7.1 = shl i64 %add1.i.i.us.us.7.1, 32 - %131 = ashr exact i64 %sext.i.us.us.7.1, 32 - br label %for.body.i.us.us.7.1 - -for.body.i.us.us.7.1: ; preds = %for.body.i.us.us.7.1, %if.then.i.us.us.7.1 - %indvars.iv.next.i3.us.us.7.1 = phi i64 [ %indvars.iv.next.i.us.us.7.1, %for.body.i.us.us.7.1 ], [ 0, %if.then.i.us.us.7.1 ] - %132 = phi float [ %138, %for.body.i.us.us.7.1 ], [ 0.000000e+00, %if.then.i.us.us.7.1 ] - %133 = add nsw i64 %indvars.iv.next.i3.us.us.7.1, %121 - %arrayidx11.i.us.us.7.1 = getelementptr inbounds float, float* %0, i64 %133 - %134 = load float, float* %arrayidx11.i.us.us.7.1, align 4, !tbaa !12 - %135 = mul nsw i64 %indvars.iv.next.i3.us.us.7.1, %11 - %136 = add nsw i64 %135, %131 - %arrayidx15.i.us.us.7.1 = getelementptr inbounds float, float* %1, i64 %136 - %137 = load float, float* %arrayidx15.i.us.us.7.1, align 4, !tbaa !12 - %138 = tail call float @llvm.fmuladd.f32(float %134, float %137, float %132) #2 - store float %138, float* %arrayidx.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.7.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.7.1, 1 - %exitcond.not.i.us.us.7.1 = icmp eq i64 %indvars.iv.next.i.us.us.7.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.7.1, label %if.end.i.us.us.7.1.loopexit, label %for.body.i.us.us.7.1, !llvm.loop !19 - -if.end.i.us.us.7.1.loopexit: ; preds = %for.body.i.us.us.7.1 - br label %if.end.i.us.us.7.1 - -if.end.i.us.us.7.1: ; preds = %if.end.i.us.us.7.1.loopexit, %if.end.i.us.us.7 - %139 = add nuw nsw i64 %_local_id_x.0.us.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %139, 32 - br i1 %exitcond.7.not.1, label %mm3_kernel1.exit.loopexit, label %pregion_for_entry.entry.i.us.us.7, !llvm.loop !23 - -if.then.i.us.us.6.1: ; preds = %if.end.i.us.us.6 - %add.i.us.us.6.1 = add nsw i32 %mul.i.us.6, %conv.i.us.us.6.1 - %idxprom.i.us.us.6.1 = sext i32 %add.i.us.us.6.1 to i64 - %arrayidx.i.us.us.6.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.6.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.6.1 = shl i64 %add1.i.i.us.us.6.1, 32 - %140 = ashr exact i64 %sext.i.us.us.6.1, 32 - br label %for.body.i.us.us.6.1 - -for.body.i.us.us.6.1: ; preds = %for.body.i.us.us.6.1, %if.then.i.us.us.6.1 - %indvars.iv.next.i3.us.us.6.1 = phi i64 [ %indvars.iv.next.i.us.us.6.1, %for.body.i.us.us.6.1 ], [ 0, %if.then.i.us.us.6.1 ] - %141 = phi float [ %147, %for.body.i.us.us.6.1 ], [ 0.000000e+00, %if.then.i.us.us.6.1 ] - %142 = add nsw i64 %indvars.iv.next.i3.us.us.6.1, %110 - %arrayidx11.i.us.us.6.1 = getelementptr inbounds float, float* %0, i64 %142 - %143 = load float, float* %arrayidx11.i.us.us.6.1, align 4, !tbaa !12 - %144 = mul nsw i64 %indvars.iv.next.i3.us.us.6.1, %11 - %145 = add nsw i64 %144, %140 - %arrayidx15.i.us.us.6.1 = getelementptr inbounds float, float* %1, i64 %145 - %146 = load float, float* %arrayidx15.i.us.us.6.1, align 4, !tbaa !12 - %147 = tail call float @llvm.fmuladd.f32(float %143, float %146, float %141) #2 - store float %147, float* %arrayidx.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.6.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.6.1, 1 - %exitcond.not.i.us.us.6.1 = icmp eq i64 %indvars.iv.next.i.us.us.6.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.6.1, label %if.end.i.us.us.6.1.loopexit, label %for.body.i.us.us.6.1, !llvm.loop !19 - -if.end.i.us.us.6.1.loopexit: ; preds = %for.body.i.us.us.6.1 - br label %if.end.i.us.us.6.1 - -if.end.i.us.us.6.1: ; preds = %if.end.i.us.us.6.1.loopexit, %if.end.i.us.us.6 - %148 = add nuw nsw i64 %_local_id_x.0.us.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %148, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.us.6.loopexit, label %pregion_for_entry.entry.i.us.us.6, !llvm.loop !23 - -if.then.i.us.us.5.1: ; preds = %if.end.i.us.us.5 - %add.i.us.us.5.1 = add nsw i32 %mul.i.us.5, %conv.i.us.us.5.1 - %idxprom.i.us.us.5.1 = sext i32 %add.i.us.us.5.1 to i64 - %arrayidx.i.us.us.5.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.5.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.5.1 = shl i64 %add1.i.i.us.us.5.1, 32 - %149 = ashr exact i64 %sext.i.us.us.5.1, 32 - br label %for.body.i.us.us.5.1 - -for.body.i.us.us.5.1: ; preds = %for.body.i.us.us.5.1, %if.then.i.us.us.5.1 - %indvars.iv.next.i3.us.us.5.1 = phi i64 [ %indvars.iv.next.i.us.us.5.1, %for.body.i.us.us.5.1 ], [ 0, %if.then.i.us.us.5.1 ] - %150 = phi float [ %156, %for.body.i.us.us.5.1 ], [ 0.000000e+00, %if.then.i.us.us.5.1 ] - %151 = add nsw i64 %indvars.iv.next.i3.us.us.5.1, %99 - %arrayidx11.i.us.us.5.1 = getelementptr inbounds float, float* %0, i64 %151 - %152 = load float, float* %arrayidx11.i.us.us.5.1, align 4, !tbaa !12 - %153 = mul nsw i64 %indvars.iv.next.i3.us.us.5.1, %11 - %154 = add nsw i64 %153, %149 - %arrayidx15.i.us.us.5.1 = getelementptr inbounds float, float* %1, i64 %154 - %155 = load float, float* %arrayidx15.i.us.us.5.1, align 4, !tbaa !12 - %156 = tail call float @llvm.fmuladd.f32(float %152, float %155, float %150) #2 - store float %156, float* %arrayidx.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.5.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.5.1, 1 - %exitcond.not.i.us.us.5.1 = icmp eq i64 %indvars.iv.next.i.us.us.5.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.5.1, label %if.end.i.us.us.5.1.loopexit, label %for.body.i.us.us.5.1, !llvm.loop !19 - -if.end.i.us.us.5.1.loopexit: ; preds = %for.body.i.us.us.5.1 - br label %if.end.i.us.us.5.1 - -if.end.i.us.us.5.1: ; preds = %if.end.i.us.us.5.1.loopexit, %if.end.i.us.us.5 - %157 = add nuw nsw i64 %_local_id_x.0.us.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %157, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.us.5.loopexit, label %pregion_for_entry.entry.i.us.us.5, !llvm.loop !23 - -if.then.i.us.us.4.1: ; preds = %if.end.i.us.us.4 - %add.i.us.us.4.1 = add nsw i32 %mul.i.us.4, %conv.i.us.us.4.1 - %idxprom.i.us.us.4.1 = sext i32 %add.i.us.us.4.1 to i64 - %arrayidx.i.us.us.4.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.4.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.4.1 = shl i64 %add1.i.i.us.us.4.1, 32 - %158 = ashr exact i64 %sext.i.us.us.4.1, 32 - br label %for.body.i.us.us.4.1 - -for.body.i.us.us.4.1: ; preds = %for.body.i.us.us.4.1, %if.then.i.us.us.4.1 - %indvars.iv.next.i3.us.us.4.1 = phi i64 [ %indvars.iv.next.i.us.us.4.1, %for.body.i.us.us.4.1 ], [ 0, %if.then.i.us.us.4.1 ] - %159 = phi float [ %165, %for.body.i.us.us.4.1 ], [ 0.000000e+00, %if.then.i.us.us.4.1 ] - %160 = add nsw i64 %indvars.iv.next.i3.us.us.4.1, %88 - %arrayidx11.i.us.us.4.1 = getelementptr inbounds float, float* %0, i64 %160 - %161 = load float, float* %arrayidx11.i.us.us.4.1, align 4, !tbaa !12 - %162 = mul nsw i64 %indvars.iv.next.i3.us.us.4.1, %11 - %163 = add nsw i64 %162, %158 - %arrayidx15.i.us.us.4.1 = getelementptr inbounds float, float* %1, i64 %163 - %164 = load float, float* %arrayidx15.i.us.us.4.1, align 4, !tbaa !12 - %165 = tail call float @llvm.fmuladd.f32(float %161, float %164, float %159) #2 - store float %165, float* %arrayidx.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.4.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.4.1, 1 - %exitcond.not.i.us.us.4.1 = icmp eq i64 %indvars.iv.next.i.us.us.4.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.4.1, label %if.end.i.us.us.4.1.loopexit, label %for.body.i.us.us.4.1, !llvm.loop !19 - -if.end.i.us.us.4.1.loopexit: ; preds = %for.body.i.us.us.4.1 - br label %if.end.i.us.us.4.1 - -if.end.i.us.us.4.1: ; preds = %if.end.i.us.us.4.1.loopexit, %if.end.i.us.us.4 - %166 = add nuw nsw i64 %_local_id_x.0.us.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %166, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.us.4.loopexit, label %pregion_for_entry.entry.i.us.us.4, !llvm.loop !23 - -if.then.i.us.us.3.1: ; preds = %if.end.i.us.us.3 - %add.i.us.us.3.1 = add nsw i32 %mul.i.us.3, %conv.i.us.us.3.1 - %idxprom.i.us.us.3.1 = sext i32 %add.i.us.us.3.1 to i64 - %arrayidx.i.us.us.3.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.3.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.3.1 = shl i64 %add1.i.i.us.us.3.1, 32 - %167 = ashr exact i64 %sext.i.us.us.3.1, 32 - br label %for.body.i.us.us.3.1 - -for.body.i.us.us.3.1: ; preds = %for.body.i.us.us.3.1, %if.then.i.us.us.3.1 - %indvars.iv.next.i3.us.us.3.1 = phi i64 [ %indvars.iv.next.i.us.us.3.1, %for.body.i.us.us.3.1 ], [ 0, %if.then.i.us.us.3.1 ] - %168 = phi float [ %174, %for.body.i.us.us.3.1 ], [ 0.000000e+00, %if.then.i.us.us.3.1 ] - %169 = add nsw i64 %indvars.iv.next.i3.us.us.3.1, %77 - %arrayidx11.i.us.us.3.1 = getelementptr inbounds float, float* %0, i64 %169 - %170 = load float, float* %arrayidx11.i.us.us.3.1, align 4, !tbaa !12 - %171 = mul nsw i64 %indvars.iv.next.i3.us.us.3.1, %11 - %172 = add nsw i64 %171, %167 - %arrayidx15.i.us.us.3.1 = getelementptr inbounds float, float* %1, i64 %172 - %173 = load float, float* %arrayidx15.i.us.us.3.1, align 4, !tbaa !12 - %174 = tail call float @llvm.fmuladd.f32(float %170, float %173, float %168) #2 - store float %174, float* %arrayidx.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.3.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.3.1, 1 - %exitcond.not.i.us.us.3.1 = icmp eq i64 %indvars.iv.next.i.us.us.3.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.3.1, label %if.end.i.us.us.3.1.loopexit, label %for.body.i.us.us.3.1, !llvm.loop !19 - -if.end.i.us.us.3.1.loopexit: ; preds = %for.body.i.us.us.3.1 - br label %if.end.i.us.us.3.1 - -if.end.i.us.us.3.1: ; preds = %if.end.i.us.us.3.1.loopexit, %if.end.i.us.us.3 - %175 = add nuw nsw i64 %_local_id_x.0.us.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %175, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.us.3.loopexit, label %pregion_for_entry.entry.i.us.us.3, !llvm.loop !23 - -if.then.i.us.us.2.1: ; preds = %if.end.i.us.us.2 - %add.i.us.us.2.1 = add nsw i32 %mul.i.us.2, %conv.i.us.us.2.1 - %idxprom.i.us.us.2.1 = sext i32 %add.i.us.us.2.1 to i64 - %arrayidx.i.us.us.2.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.2.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.2.1 = shl i64 %add1.i.i.us.us.2.1, 32 - %176 = ashr exact i64 %sext.i.us.us.2.1, 32 - br label %for.body.i.us.us.2.1 - -for.body.i.us.us.2.1: ; preds = %for.body.i.us.us.2.1, %if.then.i.us.us.2.1 - %indvars.iv.next.i3.us.us.2.1 = phi i64 [ %indvars.iv.next.i.us.us.2.1, %for.body.i.us.us.2.1 ], [ 0, %if.then.i.us.us.2.1 ] - %177 = phi float [ %183, %for.body.i.us.us.2.1 ], [ 0.000000e+00, %if.then.i.us.us.2.1 ] - %178 = add nsw i64 %indvars.iv.next.i3.us.us.2.1, %66 - %arrayidx11.i.us.us.2.1 = getelementptr inbounds float, float* %0, i64 %178 - %179 = load float, float* %arrayidx11.i.us.us.2.1, align 4, !tbaa !12 - %180 = mul nsw i64 %indvars.iv.next.i3.us.us.2.1, %11 - %181 = add nsw i64 %180, %176 - %arrayidx15.i.us.us.2.1 = getelementptr inbounds float, float* %1, i64 %181 - %182 = load float, float* %arrayidx15.i.us.us.2.1, align 4, !tbaa !12 - %183 = tail call float @llvm.fmuladd.f32(float %179, float %182, float %177) #2 - store float %183, float* %arrayidx.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.2.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.2.1, 1 - %exitcond.not.i.us.us.2.1 = icmp eq i64 %indvars.iv.next.i.us.us.2.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.2.1, label %if.end.i.us.us.2.1.loopexit, label %for.body.i.us.us.2.1, !llvm.loop !19 - -if.end.i.us.us.2.1.loopexit: ; preds = %for.body.i.us.us.2.1 - br label %if.end.i.us.us.2.1 - -if.end.i.us.us.2.1: ; preds = %if.end.i.us.us.2.1.loopexit, %if.end.i.us.us.2 - %184 = add nuw nsw i64 %_local_id_x.0.us.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %184, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.us.2.loopexit, label %pregion_for_entry.entry.i.us.us.2, !llvm.loop !23 - -if.then.i.us.us.1.1: ; preds = %if.end.i.us.us.1 - %add.i.us.us.1.1 = add nsw i32 %mul.i.us.1, %conv.i.us.us.1.1 - %idxprom.i.us.us.1.1 = sext i32 %add.i.us.us.1.1 to i64 - %arrayidx.i.us.us.1.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.1.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.1.1 = shl i64 %add1.i.i.us.us.1.1, 32 - %185 = ashr exact i64 %sext.i.us.us.1.1, 32 - br label %for.body.i.us.us.1.1 - -for.body.i.us.us.1.1: ; preds = %for.body.i.us.us.1.1, %if.then.i.us.us.1.1 - %indvars.iv.next.i3.us.us.1.1 = phi i64 [ %indvars.iv.next.i.us.us.1.1, %for.body.i.us.us.1.1 ], [ 0, %if.then.i.us.us.1.1 ] - %186 = phi float [ %192, %for.body.i.us.us.1.1 ], [ 0.000000e+00, %if.then.i.us.us.1.1 ] - %187 = add nsw i64 %indvars.iv.next.i3.us.us.1.1, %45 - %arrayidx11.i.us.us.1.1 = getelementptr inbounds float, float* %0, i64 %187 - %188 = load float, float* %arrayidx11.i.us.us.1.1, align 4, !tbaa !12 - %189 = mul nsw i64 %indvars.iv.next.i3.us.us.1.1, %11 - %190 = add nsw i64 %189, %185 - %arrayidx15.i.us.us.1.1 = getelementptr inbounds float, float* %1, i64 %190 - %191 = load float, float* %arrayidx15.i.us.us.1.1, align 4, !tbaa !12 - %192 = tail call float @llvm.fmuladd.f32(float %188, float %191, float %186) #2 - store float %192, float* %arrayidx.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.1.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.1.1, 1 - %exitcond.not.i.us.us.1.1 = icmp eq i64 %indvars.iv.next.i.us.us.1.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.1.1, label %if.end.i.us.us.1.1.loopexit, label %for.body.i.us.us.1.1, !llvm.loop !19 - -if.end.i.us.us.1.1.loopexit: ; preds = %for.body.i.us.us.1.1 - br label %if.end.i.us.us.1.1 - -if.end.i.us.us.1.1: ; preds = %if.end.i.us.us.1.1.loopexit, %if.end.i.us.us.1 - %193 = add nuw nsw i64 %_local_id_x.0.us.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %193, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.us.1.loopexit, label %pregion_for_entry.entry.i.us.us.1, !llvm.loop !23 - -if.then.i.us.us.146: ; preds = %if.end.i.us.us - %add.i.us.us.142 = add nsw i32 %mul.i.us, %conv.i.us.us.139 - %idxprom.i.us.us.143 = sext i32 %add.i.us.us.142 to i64 - %arrayidx.i.us.us.144 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.143 - store float 0.000000e+00, float* %arrayidx.i.us.us.144, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.145 = shl i64 %add1.i.i.us.us.138, 32 - %194 = ashr exact i64 %sext.i.us.us.145, 32 - br label %for.body.i.us.us.152 - -for.body.i.us.us.152: ; preds = %for.body.i.us.us.152, %if.then.i.us.us.146 - %indvars.iv.next.i3.us.us.147 = phi i64 [ %indvars.iv.next.i.us.us.150, %for.body.i.us.us.152 ], [ 0, %if.then.i.us.us.146 ] - %195 = phi float [ %201, %for.body.i.us.us.152 ], [ 0.000000e+00, %if.then.i.us.us.146 ] - %196 = add nsw i64 %indvars.iv.next.i3.us.us.147, %43 - %arrayidx11.i.us.us.148 = getelementptr inbounds float, float* %0, i64 %196 - %197 = load float, float* %arrayidx11.i.us.us.148, align 4, !tbaa !12 - %198 = mul nsw i64 %indvars.iv.next.i3.us.us.147, %11 - %199 = add nsw i64 %198, %194 - %arrayidx15.i.us.us.149 = getelementptr inbounds float, float* %1, i64 %199 - %200 = load float, float* %arrayidx15.i.us.us.149, align 4, !tbaa !12 - %201 = tail call float @llvm.fmuladd.f32(float %197, float %200, float %195) #2 - store float %201, float* %arrayidx.i.us.us.144, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.150 = add nuw nsw i64 %indvars.iv.next.i3.us.us.147, 1 - %exitcond.not.i.us.us.151 = icmp eq i64 %indvars.iv.next.i.us.us.150, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.151, label %if.end.i.us.us.153.loopexit, label %for.body.i.us.us.152, !llvm.loop !19 - -if.end.i.us.us.153.loopexit: ; preds = %for.body.i.us.us.152 - br label %if.end.i.us.us.153 - -if.end.i.us.us.153: ; preds = %if.end.i.us.us.153.loopexit, %if.end.i.us.us - %202 = add nuw nsw i64 %_local_id_x.0.us.us, 2 - %exitcond.not.1 = icmp eq i64 %202, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.us.loopexit, label %pregion_for_entry.entry.i.us.us, !llvm.loop !23 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_mm3_kernel1_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to float*** - %15 = load float**, float*** %14, align 8 - %16 = load float*, float** %15, align 8 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %21 = getelementptr i8*, i8** %0, i64 4 - %22 = bitcast i8** %21 to i32** - %23 = load i32*, i32** %22, align 8 - %24 = load i32, i32* %23, align 4 - %25 = getelementptr i8*, i8** %0, i64 5 - %26 = bitcast i8** %25 to i32** - %27 = load i32*, i32** %26, align 8 - %28 = load i32, i32* %27, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %cmp638.i.i = icmp sgt i32 %28, 0 - %29 = sext i32 %24 to i64 - %wide.trip.count.i.i = zext i32 %28 to i64 - br i1 %cmp638.i.i, label %pregion_for_entry.pregion_for_init.i.i.us.preheader, label %pregion_for_entry.pregion_for_init.i.i.preheader - -pregion_for_entry.pregion_for_init.i.i.preheader: ; preds = %5 - %conv.i.i.us = trunc i64 %mul.i.i.i to i32 - %cmp4.i.i.us = icmp sgt i32 %24, %conv.i.i.us - %30 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.1 = or i32 %30, 1 - %cmp4.i.i.us.1 = icmp sgt i32 %24, %conv.i.i.us.1 - %31 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.2 = or i32 %31, 2 - %cmp4.i.i.us.2 = icmp sgt i32 %24, %conv.i.i.us.2 - %32 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.3 = or i32 %32, 3 - %cmp4.i.i.us.3 = icmp sgt i32 %24, %conv.i.i.us.3 - %33 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.4 = or i32 %33, 4 - %cmp4.i.i.us.4 = icmp sgt i32 %24, %conv.i.i.us.4 - %34 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.5 = or i32 %34, 5 - %cmp4.i.i.us.5 = icmp sgt i32 %24, %conv.i.i.us.5 - %35 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.6 = or i32 %35, 6 - %cmp4.i.i.us.6 = icmp sgt i32 %24, %conv.i.i.us.6 - %36 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.7 = or i32 %36, 7 - %cmp4.i.i.us.7 = icmp sgt i32 %24, %conv.i.i.us.7 - %37 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.8 = or i32 %37, 8 - %cmp4.i.i.us.8 = icmp sgt i32 %24, %conv.i.i.us.8 - %38 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.9 = or i32 %38, 9 - %cmp4.i.i.us.9 = icmp sgt i32 %24, %conv.i.i.us.9 - %39 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.10 = or i32 %39, 10 - %cmp4.i.i.us.10 = icmp sgt i32 %24, %conv.i.i.us.10 - %40 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.11 = or i32 %40, 11 - %cmp4.i.i.us.11 = icmp sgt i32 %24, %conv.i.i.us.11 - %41 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.12 = or i32 %41, 12 - %cmp4.i.i.us.12 = icmp sgt i32 %24, %conv.i.i.us.12 - %42 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.13 = or i32 %42, 13 - %cmp4.i.i.us.13 = icmp sgt i32 %24, %conv.i.i.us.13 - %43 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.14 = or i32 %43, 14 - %cmp4.i.i.us.14 = icmp sgt i32 %24, %conv.i.i.us.14 - %44 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.15 = or i32 %44, 15 - %cmp4.i.i.us.15 = icmp sgt i32 %24, %conv.i.i.us.15 - %45 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.16 = or i32 %45, 16 - %cmp4.i.i.us.16 = icmp sgt i32 %24, %conv.i.i.us.16 - %46 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.17 = or i32 %46, 17 - %cmp4.i.i.us.17 = icmp sgt i32 %24, %conv.i.i.us.17 - %47 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.18 = or i32 %47, 18 - %cmp4.i.i.us.18 = icmp sgt i32 %24, %conv.i.i.us.18 - %48 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.19 = or i32 %48, 19 - %cmp4.i.i.us.19 = icmp sgt i32 %24, %conv.i.i.us.19 - %49 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.20 = or i32 %49, 20 - %cmp4.i.i.us.20 = icmp sgt i32 %24, %conv.i.i.us.20 - %50 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.21 = or i32 %50, 21 - %cmp4.i.i.us.21 = icmp sgt i32 %24, %conv.i.i.us.21 - %51 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.22 = or i32 %51, 22 - %cmp4.i.i.us.22 = icmp sgt i32 %24, %conv.i.i.us.22 - %52 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.23 = or i32 %52, 23 - %cmp4.i.i.us.23 = icmp sgt i32 %24, %conv.i.i.us.23 - %53 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.24 = or i32 %53, 24 - %cmp4.i.i.us.24 = icmp sgt i32 %24, %conv.i.i.us.24 - %54 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.25 = or i32 %54, 25 - %cmp4.i.i.us.25 = icmp sgt i32 %24, %conv.i.i.us.25 - %55 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.26 = or i32 %55, 26 - %cmp4.i.i.us.26 = icmp sgt i32 %24, %conv.i.i.us.26 - %56 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.27 = or i32 %56, 27 - %cmp4.i.i.us.27 = icmp sgt i32 %24, %conv.i.i.us.27 - %57 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.28 = or i32 %57, 28 - %cmp4.i.i.us.28 = icmp sgt i32 %24, %conv.i.i.us.28 - %58 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.29 = or i32 %58, 29 - %cmp4.i.i.us.29 = icmp sgt i32 %24, %conv.i.i.us.29 - %59 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.30 = or i32 %59, 30 - %cmp4.i.i.us.30 = icmp sgt i32 %24, %conv.i.i.us.30 - %60 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.31 = or i32 %60, 31 - %cmp4.i.i.us.31 = icmp sgt i32 %24, %conv.i.i.us.31 - br label %pregion_for_entry.pregion_for_init.i.i - -pregion_for_entry.pregion_for_init.i.i.us.preheader: ; preds = %5 - %conv2.i.i.us = trunc i64 %mul3.i.i.i to i32 - %cmp.i.i.us = icmp sgt i32 %20, %conv2.i.i.us - %mul.i.i.us = mul nsw i32 %24, %conv2.i.i.us - %mul8.i.i.us = mul nsw i32 %28, %conv2.i.i.us - %61 = sext i32 %mul8.i.i.us to i64 - br i1 %cmp.i.i.us, label %pregion_for_entry.entry.i.i.us.us.preheader, label %pregion_for_end.i.i.us - -pregion_for_entry.entry.i.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i.us.preheader - br label %pregion_for_entry.entry.i.i.us.us - -pregion_for_end.i.i.us.loopexit: ; preds = %if.end.i.i.us.us.153 - br label %pregion_for_end.i.i.us - -pregion_for_end.i.i.us: ; preds = %pregion_for_end.i.i.us.loopexit, %pregion_for_entry.pregion_for_init.i.i.us.preheader - %62 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.1 = or i32 %62, 1 - %cmp.i.i.us.1 = icmp sgt i32 %20, %conv2.i.i.us.1 - %mul.i.i.us.1 = mul nsw i32 %24, %conv2.i.i.us.1 - %mul8.i.i.us.1 = mul nsw i32 %28, %conv2.i.i.us.1 - %63 = sext i32 %mul8.i.i.us.1 to i64 - br i1 %cmp.i.i.us.1, label %pregion_for_entry.entry.i.i.us.us.1.preheader, label %pregion_for_end.i.i.us.1 - -pregion_for_entry.entry.i.i.us.us.1.preheader: ; preds = %pregion_for_end.i.i.us - br label %pregion_for_entry.entry.i.i.us.us.1 - -pregion_for_entry.entry.i.i.us.us: ; preds = %if.end.i.i.us.us.153, %pregion_for_entry.entry.i.i.us.us.preheader - %_local_id_x.i.0.us.us = phi i64 [ %220, %if.end.i.i.us.us.153 ], [ 0, %pregion_for_entry.entry.i.i.us.us.preheader ] - %add1.i.i.i.us.us = add nuw nsw i64 %_local_id_x.i.0.us.us, %mul.i.i.i - %conv.i.i.us.us = trunc i64 %add1.i.i.i.us.us to i32 - %cmp4.i.i.us.us = icmp sgt i32 %24, %conv.i.i.us.us - br i1 %cmp4.i.i.us.us, label %if.then.i.i.us.us, label %if.end.i.i.us.us - -if.then.i.i.us.us: ; preds = %pregion_for_entry.entry.i.i.us.us - %add.i.i.us.us = add nsw i32 %mul.i.i.us, %conv.i.i.us.us - %idxprom.i.i.us.us = sext i32 %add.i.i.us.us to i64 - %arrayidx.i.i.us.us = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us - store float 0.000000e+00, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us = shl i64 %add1.i.i.i.us.us, 32 - %64 = ashr exact i64 %sext.i.i.us.us, 32 - br label %for.body.i.i.us.us - -if.end.i.i.us.us.loopexit: ; preds = %for.body.i.i.us.us - br label %if.end.i.i.us.us - -if.end.i.i.us.us: ; preds = %if.end.i.i.us.us.loopexit, %pregion_for_entry.entry.i.i.us.us - %65 = or i64 %_local_id_x.i.0.us.us, 1 - %add1.i.i.i.us.us.138 = add nuw nsw i64 %65, %mul.i.i.i - %conv.i.i.us.us.139 = trunc i64 %add1.i.i.i.us.us.138 to i32 - %cmp4.i.i.us.us.140 = icmp sgt i32 %24, %conv.i.i.us.us.139 - br i1 %cmp4.i.i.us.us.140, label %if.then.i.i.us.us.146, label %if.end.i.i.us.us.153 - -for.body.i.i.us.us: ; preds = %for.body.i.i.us.us, %if.then.i.i.us.us - %indvars.iv.next.i.i3.us.us = phi i64 [ %indvars.iv.next.i.i.us.us, %for.body.i.i.us.us ], [ 0, %if.then.i.i.us.us ] - %66 = phi float [ %72, %for.body.i.i.us.us ], [ 0.000000e+00, %if.then.i.i.us.us ] - %67 = add nsw i64 %indvars.iv.next.i.i3.us.us, %61 - %arrayidx11.i.i.us.us = getelementptr inbounds float, float* %8, i64 %67 - %68 = load float, float* %arrayidx11.i.i.us.us, align 4, !tbaa !12 - %69 = mul nsw i64 %indvars.iv.next.i.i3.us.us, %29 - %70 = add nsw i64 %69, %64 - %arrayidx15.i.i.us.us = getelementptr inbounds float, float* %12, i64 %70 - %71 = load float, float* %arrayidx15.i.i.us.us, align 4, !tbaa !12 - %72 = tail call float @llvm.fmuladd.f32(float %68, float %71, float %66) #2 - store float %72, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us = add nuw nsw i64 %indvars.iv.next.i.i3.us.us, 1 - %exitcond.not.i.i.us.us = icmp eq i64 %indvars.iv.next.i.i.us.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us, label %if.end.i.i.us.us.loopexit, label %for.body.i.i.us.us, !llvm.loop !19 - -pregion_for_entry.pregion_for_init.i.i: ; preds = %pregion_for_end.i.i, %pregion_for_entry.pregion_for_init.i.i.preheader - %_local_id_y.i.0 = phi i64 [ %73, %pregion_for_end.i.i ], [ 0, %pregion_for_entry.pregion_for_init.i.i.preheader ] - %add6.i.i.i = add nuw nsw i64 %_local_id_y.i.0, %mul3.i.i.i - %conv2.i.i = trunc i64 %add6.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %20, %conv2.i.i - %mul.i.i = mul nsw i32 %24, %conv2.i.i - br i1 %cmp.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i - br i1 %cmp4.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us.preheader - %add.i.i.us = add nsw i32 %mul.i.i, %conv.i.i.us - %idxprom.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us - store float 0.000000e+00, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - br i1 %cmp4.i.i.us.1, label %if.then.i.i.us.1, label %if.end.i.i.us.1 - -pregion_for_end.i.i: ; preds = %if.then.i.i.us.31, %if.end.i.i.us.30, %pregion_for_entry.pregion_for_init.i.i - %73 = add nuw nsw i64 %_local_id_y.i.0, 1 - %exitcond33.not = icmp eq i64 %73, 8 - br i1 %exitcond33.not, label %_pocl_kernel_mm3_kernel1.exit.loopexit54, label %pregion_for_entry.pregion_for_init.i.i, !llvm.loop !21 - -_pocl_kernel_mm3_kernel1.exit.loopexit: ; preds = %if.end.i.i.us.us.7.1 - br label %_pocl_kernel_mm3_kernel1.exit - -_pocl_kernel_mm3_kernel1.exit.loopexit54: ; preds = %pregion_for_end.i.i - br label %_pocl_kernel_mm3_kernel1.exit - -_pocl_kernel_mm3_kernel1.exit: ; preds = %pregion_for_end.i.i.us.6, %_pocl_kernel_mm3_kernel1.exit.loopexit54, %_pocl_kernel_mm3_kernel1.exit.loopexit - ret void - -pregion_for_entry.entry.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.1, %pregion_for_entry.entry.i.i.us.us.1.preheader - %_local_id_x.i.0.us.us.1 = phi i64 [ %211, %if.end.i.i.us.us.1.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.1.preheader ] - %add1.i.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, %mul.i.i.i - %conv.i.i.us.us.1 = trunc i64 %add1.i.i.i.us.us.1 to i32 - %cmp4.i.i.us.us.1 = icmp sgt i32 %24, %conv.i.i.us.us.1 - br i1 %cmp4.i.i.us.us.1, label %if.then.i.i.us.us.1, label %if.end.i.i.us.us.1 - -if.then.i.i.us.us.1: ; preds = %pregion_for_entry.entry.i.i.us.us.1 - %add.i.i.us.us.1 = add nsw i32 %mul.i.i.us.1, %conv.i.i.us.us.1 - %idxprom.i.i.us.us.1 = sext i32 %add.i.i.us.us.1 to i64 - %arrayidx.i.i.us.us.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.1 = shl i64 %add1.i.i.i.us.us.1, 32 - %74 = ashr exact i64 %sext.i.i.us.us.1, 32 - br label %for.body.i.i.us.us.1 - -for.body.i.i.us.us.1: ; preds = %for.body.i.i.us.us.1, %if.then.i.i.us.us.1 - %indvars.iv.next.i.i3.us.us.1 = phi i64 [ %indvars.iv.next.i.i.us.us.1, %for.body.i.i.us.us.1 ], [ 0, %if.then.i.i.us.us.1 ] - %75 = phi float [ %81, %for.body.i.i.us.us.1 ], [ 0.000000e+00, %if.then.i.i.us.us.1 ] - %76 = add nsw i64 %indvars.iv.next.i.i3.us.us.1, %63 - %arrayidx11.i.i.us.us.1 = getelementptr inbounds float, float* %8, i64 %76 - %77 = load float, float* %arrayidx11.i.i.us.us.1, align 4, !tbaa !12 - %78 = mul nsw i64 %indvars.iv.next.i.i3.us.us.1, %29 - %79 = add nsw i64 %78, %74 - %arrayidx15.i.i.us.us.1 = getelementptr inbounds float, float* %12, i64 %79 - %80 = load float, float* %arrayidx15.i.i.us.us.1, align 4, !tbaa !12 - %81 = tail call float @llvm.fmuladd.f32(float %77, float %80, float %75) #2 - store float %81, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.1, 1 - %exitcond.not.i.i.us.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.1, label %if.end.i.i.us.us.1.loopexit, label %for.body.i.i.us.us.1, !llvm.loop !19 - -if.end.i.i.us.us.1.loopexit: ; preds = %for.body.i.i.us.us.1 - br label %if.end.i.i.us.us.1 - -if.end.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.loopexit, %pregion_for_entry.entry.i.i.us.us.1 - %82 = or i64 %_local_id_x.i.0.us.us.1, 1 - %add1.i.i.i.us.us.1.1 = add nuw nsw i64 %82, %mul.i.i.i - %conv.i.i.us.us.1.1 = trunc i64 %add1.i.i.i.us.us.1.1 to i32 - %cmp4.i.i.us.us.1.1 = icmp sgt i32 %24, %conv.i.i.us.us.1.1 - br i1 %cmp4.i.i.us.us.1.1, label %if.then.i.i.us.us.1.1, label %if.end.i.i.us.us.1.1 - -pregion_for_end.i.i.us.1.loopexit: ; preds = %if.end.i.i.us.us.1.1 - br label %pregion_for_end.i.i.us.1 - -pregion_for_end.i.i.us.1: ; preds = %pregion_for_end.i.i.us.1.loopexit, %pregion_for_end.i.i.us - %83 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.2 = or i32 %83, 2 - %cmp.i.i.us.2 = icmp sgt i32 %20, %conv2.i.i.us.2 - %mul.i.i.us.2 = mul nsw i32 %24, %conv2.i.i.us.2 - %mul8.i.i.us.2 = mul nsw i32 %28, %conv2.i.i.us.2 - %84 = sext i32 %mul8.i.i.us.2 to i64 - br i1 %cmp.i.i.us.2, label %pregion_for_entry.entry.i.i.us.us.2.preheader, label %pregion_for_end.i.i.us.2 - -pregion_for_entry.entry.i.i.us.us.2.preheader: ; preds = %pregion_for_end.i.i.us.1 - br label %pregion_for_entry.entry.i.i.us.us.2 - -pregion_for_entry.entry.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.1, %pregion_for_entry.entry.i.i.us.us.2.preheader - %_local_id_x.i.0.us.us.2 = phi i64 [ %202, %if.end.i.i.us.us.2.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.2.preheader ] - %add1.i.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, %mul.i.i.i - %conv.i.i.us.us.2 = trunc i64 %add1.i.i.i.us.us.2 to i32 - %cmp4.i.i.us.us.2 = icmp sgt i32 %24, %conv.i.i.us.us.2 - br i1 %cmp4.i.i.us.us.2, label %if.then.i.i.us.us.2, label %if.end.i.i.us.us.2 - -if.then.i.i.us.us.2: ; preds = %pregion_for_entry.entry.i.i.us.us.2 - %add.i.i.us.us.2 = add nsw i32 %mul.i.i.us.2, %conv.i.i.us.us.2 - %idxprom.i.i.us.us.2 = sext i32 %add.i.i.us.us.2 to i64 - %arrayidx.i.i.us.us.2 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.2 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.2 = shl i64 %add1.i.i.i.us.us.2, 32 - %85 = ashr exact i64 %sext.i.i.us.us.2, 32 - br label %for.body.i.i.us.us.2 - -for.body.i.i.us.us.2: ; preds = %for.body.i.i.us.us.2, %if.then.i.i.us.us.2 - %indvars.iv.next.i.i3.us.us.2 = phi i64 [ %indvars.iv.next.i.i.us.us.2, %for.body.i.i.us.us.2 ], [ 0, %if.then.i.i.us.us.2 ] - %86 = phi float [ %92, %for.body.i.i.us.us.2 ], [ 0.000000e+00, %if.then.i.i.us.us.2 ] - %87 = add nsw i64 %indvars.iv.next.i.i3.us.us.2, %84 - %arrayidx11.i.i.us.us.2 = getelementptr inbounds float, float* %8, i64 %87 - %88 = load float, float* %arrayidx11.i.i.us.us.2, align 4, !tbaa !12 - %89 = mul nsw i64 %indvars.iv.next.i.i3.us.us.2, %29 - %90 = add nsw i64 %89, %85 - %arrayidx15.i.i.us.us.2 = getelementptr inbounds float, float* %12, i64 %90 - %91 = load float, float* %arrayidx15.i.i.us.us.2, align 4, !tbaa !12 - %92 = tail call float @llvm.fmuladd.f32(float %88, float %91, float %86) #2 - store float %92, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.2 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.2, 1 - %exitcond.not.i.i.us.us.2 = icmp eq i64 %indvars.iv.next.i.i.us.us.2, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.2, label %if.end.i.i.us.us.2.loopexit, label %for.body.i.i.us.us.2, !llvm.loop !19 - -if.end.i.i.us.us.2.loopexit: ; preds = %for.body.i.i.us.us.2 - br label %if.end.i.i.us.us.2 - -if.end.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.loopexit, %pregion_for_entry.entry.i.i.us.us.2 - %93 = or i64 %_local_id_x.i.0.us.us.2, 1 - %add1.i.i.i.us.us.2.1 = add nuw nsw i64 %93, %mul.i.i.i - %conv.i.i.us.us.2.1 = trunc i64 %add1.i.i.i.us.us.2.1 to i32 - %cmp4.i.i.us.us.2.1 = icmp sgt i32 %24, %conv.i.i.us.us.2.1 - br i1 %cmp4.i.i.us.us.2.1, label %if.then.i.i.us.us.2.1, label %if.end.i.i.us.us.2.1 - -pregion_for_end.i.i.us.2.loopexit: ; preds = %if.end.i.i.us.us.2.1 - br label %pregion_for_end.i.i.us.2 - -pregion_for_end.i.i.us.2: ; preds = %pregion_for_end.i.i.us.2.loopexit, %pregion_for_end.i.i.us.1 - %94 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.3 = or i32 %94, 3 - %cmp.i.i.us.3 = icmp sgt i32 %20, %conv2.i.i.us.3 - %mul.i.i.us.3 = mul nsw i32 %24, %conv2.i.i.us.3 - %mul8.i.i.us.3 = mul nsw i32 %28, %conv2.i.i.us.3 - %95 = sext i32 %mul8.i.i.us.3 to i64 - br i1 %cmp.i.i.us.3, label %pregion_for_entry.entry.i.i.us.us.3.preheader, label %pregion_for_end.i.i.us.3 - -pregion_for_entry.entry.i.i.us.us.3.preheader: ; preds = %pregion_for_end.i.i.us.2 - br label %pregion_for_entry.entry.i.i.us.us.3 - -pregion_for_entry.entry.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.1, %pregion_for_entry.entry.i.i.us.us.3.preheader - %_local_id_x.i.0.us.us.3 = phi i64 [ %193, %if.end.i.i.us.us.3.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.3.preheader ] - %add1.i.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, %mul.i.i.i - %conv.i.i.us.us.3 = trunc i64 %add1.i.i.i.us.us.3 to i32 - %cmp4.i.i.us.us.3 = icmp sgt i32 %24, %conv.i.i.us.us.3 - br i1 %cmp4.i.i.us.us.3, label %if.then.i.i.us.us.3, label %if.end.i.i.us.us.3 - -if.then.i.i.us.us.3: ; preds = %pregion_for_entry.entry.i.i.us.us.3 - %add.i.i.us.us.3 = add nsw i32 %mul.i.i.us.3, %conv.i.i.us.us.3 - %idxprom.i.i.us.us.3 = sext i32 %add.i.i.us.us.3 to i64 - %arrayidx.i.i.us.us.3 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.3 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.3 = shl i64 %add1.i.i.i.us.us.3, 32 - %96 = ashr exact i64 %sext.i.i.us.us.3, 32 - br label %for.body.i.i.us.us.3 - -for.body.i.i.us.us.3: ; preds = %for.body.i.i.us.us.3, %if.then.i.i.us.us.3 - %indvars.iv.next.i.i3.us.us.3 = phi i64 [ %indvars.iv.next.i.i.us.us.3, %for.body.i.i.us.us.3 ], [ 0, %if.then.i.i.us.us.3 ] - %97 = phi float [ %103, %for.body.i.i.us.us.3 ], [ 0.000000e+00, %if.then.i.i.us.us.3 ] - %98 = add nsw i64 %indvars.iv.next.i.i3.us.us.3, %95 - %arrayidx11.i.i.us.us.3 = getelementptr inbounds float, float* %8, i64 %98 - %99 = load float, float* %arrayidx11.i.i.us.us.3, align 4, !tbaa !12 - %100 = mul nsw i64 %indvars.iv.next.i.i3.us.us.3, %29 - %101 = add nsw i64 %100, %96 - %arrayidx15.i.i.us.us.3 = getelementptr inbounds float, float* %12, i64 %101 - %102 = load float, float* %arrayidx15.i.i.us.us.3, align 4, !tbaa !12 - %103 = tail call float @llvm.fmuladd.f32(float %99, float %102, float %97) #2 - store float %103, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.3 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.3, 1 - %exitcond.not.i.i.us.us.3 = icmp eq i64 %indvars.iv.next.i.i.us.us.3, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.3, label %if.end.i.i.us.us.3.loopexit, label %for.body.i.i.us.us.3, !llvm.loop !19 - -if.end.i.i.us.us.3.loopexit: ; preds = %for.body.i.i.us.us.3 - br label %if.end.i.i.us.us.3 - -if.end.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.loopexit, %pregion_for_entry.entry.i.i.us.us.3 - %104 = or i64 %_local_id_x.i.0.us.us.3, 1 - %add1.i.i.i.us.us.3.1 = add nuw nsw i64 %104, %mul.i.i.i - %conv.i.i.us.us.3.1 = trunc i64 %add1.i.i.i.us.us.3.1 to i32 - %cmp4.i.i.us.us.3.1 = icmp sgt i32 %24, %conv.i.i.us.us.3.1 - br i1 %cmp4.i.i.us.us.3.1, label %if.then.i.i.us.us.3.1, label %if.end.i.i.us.us.3.1 - -pregion_for_end.i.i.us.3.loopexit: ; preds = %if.end.i.i.us.us.3.1 - br label %pregion_for_end.i.i.us.3 - -pregion_for_end.i.i.us.3: ; preds = %pregion_for_end.i.i.us.3.loopexit, %pregion_for_end.i.i.us.2 - %105 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.4 = or i32 %105, 4 - %cmp.i.i.us.4 = icmp sgt i32 %20, %conv2.i.i.us.4 - %mul.i.i.us.4 = mul nsw i32 %24, %conv2.i.i.us.4 - %mul8.i.i.us.4 = mul nsw i32 %28, %conv2.i.i.us.4 - %106 = sext i32 %mul8.i.i.us.4 to i64 - br i1 %cmp.i.i.us.4, label %pregion_for_entry.entry.i.i.us.us.4.preheader, label %pregion_for_end.i.i.us.4 - -pregion_for_entry.entry.i.i.us.us.4.preheader: ; preds = %pregion_for_end.i.i.us.3 - br label %pregion_for_entry.entry.i.i.us.us.4 - -pregion_for_entry.entry.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.1, %pregion_for_entry.entry.i.i.us.us.4.preheader - %_local_id_x.i.0.us.us.4 = phi i64 [ %184, %if.end.i.i.us.us.4.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.4.preheader ] - %add1.i.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, %mul.i.i.i - %conv.i.i.us.us.4 = trunc i64 %add1.i.i.i.us.us.4 to i32 - %cmp4.i.i.us.us.4 = icmp sgt i32 %24, %conv.i.i.us.us.4 - br i1 %cmp4.i.i.us.us.4, label %if.then.i.i.us.us.4, label %if.end.i.i.us.us.4 - -if.then.i.i.us.us.4: ; preds = %pregion_for_entry.entry.i.i.us.us.4 - %add.i.i.us.us.4 = add nsw i32 %mul.i.i.us.4, %conv.i.i.us.us.4 - %idxprom.i.i.us.us.4 = sext i32 %add.i.i.us.us.4 to i64 - %arrayidx.i.i.us.us.4 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.4 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.4 = shl i64 %add1.i.i.i.us.us.4, 32 - %107 = ashr exact i64 %sext.i.i.us.us.4, 32 - br label %for.body.i.i.us.us.4 - -for.body.i.i.us.us.4: ; preds = %for.body.i.i.us.us.4, %if.then.i.i.us.us.4 - %indvars.iv.next.i.i3.us.us.4 = phi i64 [ %indvars.iv.next.i.i.us.us.4, %for.body.i.i.us.us.4 ], [ 0, %if.then.i.i.us.us.4 ] - %108 = phi float [ %114, %for.body.i.i.us.us.4 ], [ 0.000000e+00, %if.then.i.i.us.us.4 ] - %109 = add nsw i64 %indvars.iv.next.i.i3.us.us.4, %106 - %arrayidx11.i.i.us.us.4 = getelementptr inbounds float, float* %8, i64 %109 - %110 = load float, float* %arrayidx11.i.i.us.us.4, align 4, !tbaa !12 - %111 = mul nsw i64 %indvars.iv.next.i.i3.us.us.4, %29 - %112 = add nsw i64 %111, %107 - %arrayidx15.i.i.us.us.4 = getelementptr inbounds float, float* %12, i64 %112 - %113 = load float, float* %arrayidx15.i.i.us.us.4, align 4, !tbaa !12 - %114 = tail call float @llvm.fmuladd.f32(float %110, float %113, float %108) #2 - store float %114, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.4 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.4, 1 - %exitcond.not.i.i.us.us.4 = icmp eq i64 %indvars.iv.next.i.i.us.us.4, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.4, label %if.end.i.i.us.us.4.loopexit, label %for.body.i.i.us.us.4, !llvm.loop !19 - -if.end.i.i.us.us.4.loopexit: ; preds = %for.body.i.i.us.us.4 - br label %if.end.i.i.us.us.4 - -if.end.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.loopexit, %pregion_for_entry.entry.i.i.us.us.4 - %115 = or i64 %_local_id_x.i.0.us.us.4, 1 - %add1.i.i.i.us.us.4.1 = add nuw nsw i64 %115, %mul.i.i.i - %conv.i.i.us.us.4.1 = trunc i64 %add1.i.i.i.us.us.4.1 to i32 - %cmp4.i.i.us.us.4.1 = icmp sgt i32 %24, %conv.i.i.us.us.4.1 - br i1 %cmp4.i.i.us.us.4.1, label %if.then.i.i.us.us.4.1, label %if.end.i.i.us.us.4.1 - -pregion_for_end.i.i.us.4.loopexit: ; preds = %if.end.i.i.us.us.4.1 - br label %pregion_for_end.i.i.us.4 - -pregion_for_end.i.i.us.4: ; preds = %pregion_for_end.i.i.us.4.loopexit, %pregion_for_end.i.i.us.3 - %116 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.5 = or i32 %116, 5 - %cmp.i.i.us.5 = icmp sgt i32 %20, %conv2.i.i.us.5 - %mul.i.i.us.5 = mul nsw i32 %24, %conv2.i.i.us.5 - %mul8.i.i.us.5 = mul nsw i32 %28, %conv2.i.i.us.5 - %117 = sext i32 %mul8.i.i.us.5 to i64 - br i1 %cmp.i.i.us.5, label %pregion_for_entry.entry.i.i.us.us.5.preheader, label %pregion_for_end.i.i.us.5 - -pregion_for_entry.entry.i.i.us.us.5.preheader: ; preds = %pregion_for_end.i.i.us.4 - br label %pregion_for_entry.entry.i.i.us.us.5 - -pregion_for_entry.entry.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.1, %pregion_for_entry.entry.i.i.us.us.5.preheader - %_local_id_x.i.0.us.us.5 = phi i64 [ %175, %if.end.i.i.us.us.5.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.5.preheader ] - %add1.i.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, %mul.i.i.i - %conv.i.i.us.us.5 = trunc i64 %add1.i.i.i.us.us.5 to i32 - %cmp4.i.i.us.us.5 = icmp sgt i32 %24, %conv.i.i.us.us.5 - br i1 %cmp4.i.i.us.us.5, label %if.then.i.i.us.us.5, label %if.end.i.i.us.us.5 - -if.then.i.i.us.us.5: ; preds = %pregion_for_entry.entry.i.i.us.us.5 - %add.i.i.us.us.5 = add nsw i32 %mul.i.i.us.5, %conv.i.i.us.us.5 - %idxprom.i.i.us.us.5 = sext i32 %add.i.i.us.us.5 to i64 - %arrayidx.i.i.us.us.5 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.5 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.5 = shl i64 %add1.i.i.i.us.us.5, 32 - %118 = ashr exact i64 %sext.i.i.us.us.5, 32 - br label %for.body.i.i.us.us.5 - -for.body.i.i.us.us.5: ; preds = %for.body.i.i.us.us.5, %if.then.i.i.us.us.5 - %indvars.iv.next.i.i3.us.us.5 = phi i64 [ %indvars.iv.next.i.i.us.us.5, %for.body.i.i.us.us.5 ], [ 0, %if.then.i.i.us.us.5 ] - %119 = phi float [ %125, %for.body.i.i.us.us.5 ], [ 0.000000e+00, %if.then.i.i.us.us.5 ] - %120 = add nsw i64 %indvars.iv.next.i.i3.us.us.5, %117 - %arrayidx11.i.i.us.us.5 = getelementptr inbounds float, float* %8, i64 %120 - %121 = load float, float* %arrayidx11.i.i.us.us.5, align 4, !tbaa !12 - %122 = mul nsw i64 %indvars.iv.next.i.i3.us.us.5, %29 - %123 = add nsw i64 %122, %118 - %arrayidx15.i.i.us.us.5 = getelementptr inbounds float, float* %12, i64 %123 - %124 = load float, float* %arrayidx15.i.i.us.us.5, align 4, !tbaa !12 - %125 = tail call float @llvm.fmuladd.f32(float %121, float %124, float %119) #2 - store float %125, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.5 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.5, 1 - %exitcond.not.i.i.us.us.5 = icmp eq i64 %indvars.iv.next.i.i.us.us.5, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.5, label %if.end.i.i.us.us.5.loopexit, label %for.body.i.i.us.us.5, !llvm.loop !19 - -if.end.i.i.us.us.5.loopexit: ; preds = %for.body.i.i.us.us.5 - br label %if.end.i.i.us.us.5 - -if.end.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.loopexit, %pregion_for_entry.entry.i.i.us.us.5 - %126 = or i64 %_local_id_x.i.0.us.us.5, 1 - %add1.i.i.i.us.us.5.1 = add nuw nsw i64 %126, %mul.i.i.i - %conv.i.i.us.us.5.1 = trunc i64 %add1.i.i.i.us.us.5.1 to i32 - %cmp4.i.i.us.us.5.1 = icmp sgt i32 %24, %conv.i.i.us.us.5.1 - br i1 %cmp4.i.i.us.us.5.1, label %if.then.i.i.us.us.5.1, label %if.end.i.i.us.us.5.1 - -pregion_for_end.i.i.us.5.loopexit: ; preds = %if.end.i.i.us.us.5.1 - br label %pregion_for_end.i.i.us.5 - -pregion_for_end.i.i.us.5: ; preds = %pregion_for_end.i.i.us.5.loopexit, %pregion_for_end.i.i.us.4 - %127 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.6 = or i32 %127, 6 - %cmp.i.i.us.6 = icmp sgt i32 %20, %conv2.i.i.us.6 - %mul.i.i.us.6 = mul nsw i32 %24, %conv2.i.i.us.6 - %mul8.i.i.us.6 = mul nsw i32 %28, %conv2.i.i.us.6 - %128 = sext i32 %mul8.i.i.us.6 to i64 - br i1 %cmp.i.i.us.6, label %pregion_for_entry.entry.i.i.us.us.6.preheader, label %pregion_for_end.i.i.us.6 - -pregion_for_entry.entry.i.i.us.us.6.preheader: ; preds = %pregion_for_end.i.i.us.5 - br label %pregion_for_entry.entry.i.i.us.us.6 - -pregion_for_entry.entry.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.1, %pregion_for_entry.entry.i.i.us.us.6.preheader - %_local_id_x.i.0.us.us.6 = phi i64 [ %166, %if.end.i.i.us.us.6.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.6.preheader ] - %add1.i.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, %mul.i.i.i - %conv.i.i.us.us.6 = trunc i64 %add1.i.i.i.us.us.6 to i32 - %cmp4.i.i.us.us.6 = icmp sgt i32 %24, %conv.i.i.us.us.6 - br i1 %cmp4.i.i.us.us.6, label %if.then.i.i.us.us.6, label %if.end.i.i.us.us.6 - -if.then.i.i.us.us.6: ; preds = %pregion_for_entry.entry.i.i.us.us.6 - %add.i.i.us.us.6 = add nsw i32 %mul.i.i.us.6, %conv.i.i.us.us.6 - %idxprom.i.i.us.us.6 = sext i32 %add.i.i.us.us.6 to i64 - %arrayidx.i.i.us.us.6 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.6 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.6 = shl i64 %add1.i.i.i.us.us.6, 32 - %129 = ashr exact i64 %sext.i.i.us.us.6, 32 - br label %for.body.i.i.us.us.6 - -for.body.i.i.us.us.6: ; preds = %for.body.i.i.us.us.6, %if.then.i.i.us.us.6 - %indvars.iv.next.i.i3.us.us.6 = phi i64 [ %indvars.iv.next.i.i.us.us.6, %for.body.i.i.us.us.6 ], [ 0, %if.then.i.i.us.us.6 ] - %130 = phi float [ %136, %for.body.i.i.us.us.6 ], [ 0.000000e+00, %if.then.i.i.us.us.6 ] - %131 = add nsw i64 %indvars.iv.next.i.i3.us.us.6, %128 - %arrayidx11.i.i.us.us.6 = getelementptr inbounds float, float* %8, i64 %131 - %132 = load float, float* %arrayidx11.i.i.us.us.6, align 4, !tbaa !12 - %133 = mul nsw i64 %indvars.iv.next.i.i3.us.us.6, %29 - %134 = add nsw i64 %133, %129 - %arrayidx15.i.i.us.us.6 = getelementptr inbounds float, float* %12, i64 %134 - %135 = load float, float* %arrayidx15.i.i.us.us.6, align 4, !tbaa !12 - %136 = tail call float @llvm.fmuladd.f32(float %132, float %135, float %130) #2 - store float %136, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.6 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.6, 1 - %exitcond.not.i.i.us.us.6 = icmp eq i64 %indvars.iv.next.i.i.us.us.6, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.6, label %if.end.i.i.us.us.6.loopexit, label %for.body.i.i.us.us.6, !llvm.loop !19 - -if.end.i.i.us.us.6.loopexit: ; preds = %for.body.i.i.us.us.6 - br label %if.end.i.i.us.us.6 - -if.end.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.loopexit, %pregion_for_entry.entry.i.i.us.us.6 - %137 = or i64 %_local_id_x.i.0.us.us.6, 1 - %add1.i.i.i.us.us.6.1 = add nuw nsw i64 %137, %mul.i.i.i - %conv.i.i.us.us.6.1 = trunc i64 %add1.i.i.i.us.us.6.1 to i32 - %cmp4.i.i.us.us.6.1 = icmp sgt i32 %24, %conv.i.i.us.us.6.1 - br i1 %cmp4.i.i.us.us.6.1, label %if.then.i.i.us.us.6.1, label %if.end.i.i.us.us.6.1 - -pregion_for_end.i.i.us.6.loopexit: ; preds = %if.end.i.i.us.us.6.1 - br label %pregion_for_end.i.i.us.6 - -pregion_for_end.i.i.us.6: ; preds = %pregion_for_end.i.i.us.6.loopexit, %pregion_for_end.i.i.us.5 - %138 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.7 = or i32 %138, 7 - %cmp.i.i.us.7 = icmp sgt i32 %20, %conv2.i.i.us.7 - %mul.i.i.us.7 = mul nsw i32 %24, %conv2.i.i.us.7 - %mul8.i.i.us.7 = mul nsw i32 %28, %conv2.i.i.us.7 - %139 = sext i32 %mul8.i.i.us.7 to i64 - br i1 %cmp.i.i.us.7, label %pregion_for_entry.entry.i.i.us.us.7.preheader, label %_pocl_kernel_mm3_kernel1.exit - -pregion_for_entry.entry.i.i.us.us.7.preheader: ; preds = %pregion_for_end.i.i.us.6 - br label %pregion_for_entry.entry.i.i.us.us.7 - -pregion_for_entry.entry.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.1, %pregion_for_entry.entry.i.i.us.us.7.preheader - %_local_id_x.i.0.us.us.7 = phi i64 [ %157, %if.end.i.i.us.us.7.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.7.preheader ] - %add1.i.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, %mul.i.i.i - %conv.i.i.us.us.7 = trunc i64 %add1.i.i.i.us.us.7 to i32 - %cmp4.i.i.us.us.7 = icmp sgt i32 %24, %conv.i.i.us.us.7 - br i1 %cmp4.i.i.us.us.7, label %if.then.i.i.us.us.7, label %if.end.i.i.us.us.7 - -if.then.i.i.us.us.7: ; preds = %pregion_for_entry.entry.i.i.us.us.7 - %add.i.i.us.us.7 = add nsw i32 %mul.i.i.us.7, %conv.i.i.us.us.7 - %idxprom.i.i.us.us.7 = sext i32 %add.i.i.us.us.7 to i64 - %arrayidx.i.i.us.us.7 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.7 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.7 = shl i64 %add1.i.i.i.us.us.7, 32 - %140 = ashr exact i64 %sext.i.i.us.us.7, 32 - br label %for.body.i.i.us.us.7 - -for.body.i.i.us.us.7: ; preds = %for.body.i.i.us.us.7, %if.then.i.i.us.us.7 - %indvars.iv.next.i.i3.us.us.7 = phi i64 [ %indvars.iv.next.i.i.us.us.7, %for.body.i.i.us.us.7 ], [ 0, %if.then.i.i.us.us.7 ] - %141 = phi float [ %147, %for.body.i.i.us.us.7 ], [ 0.000000e+00, %if.then.i.i.us.us.7 ] - %142 = add nsw i64 %indvars.iv.next.i.i3.us.us.7, %139 - %arrayidx11.i.i.us.us.7 = getelementptr inbounds float, float* %8, i64 %142 - %143 = load float, float* %arrayidx11.i.i.us.us.7, align 4, !tbaa !12 - %144 = mul nsw i64 %indvars.iv.next.i.i3.us.us.7, %29 - %145 = add nsw i64 %144, %140 - %arrayidx15.i.i.us.us.7 = getelementptr inbounds float, float* %12, i64 %145 - %146 = load float, float* %arrayidx15.i.i.us.us.7, align 4, !tbaa !12 - %147 = tail call float @llvm.fmuladd.f32(float %143, float %146, float %141) #2 - store float %147, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.7 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.7, 1 - %exitcond.not.i.i.us.us.7 = icmp eq i64 %indvars.iv.next.i.i.us.us.7, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.7, label %if.end.i.i.us.us.7.loopexit, label %for.body.i.i.us.us.7, !llvm.loop !19 - -if.end.i.i.us.us.7.loopexit: ; preds = %for.body.i.i.us.us.7 - br label %if.end.i.i.us.us.7 - -if.end.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.loopexit, %pregion_for_entry.entry.i.i.us.us.7 - %148 = or i64 %_local_id_x.i.0.us.us.7, 1 - %add1.i.i.i.us.us.7.1 = add nuw nsw i64 %148, %mul.i.i.i - %conv.i.i.us.us.7.1 = trunc i64 %add1.i.i.i.us.us.7.1 to i32 - %cmp4.i.i.us.us.7.1 = icmp sgt i32 %24, %conv.i.i.us.us.7.1 - br i1 %cmp4.i.i.us.us.7.1, label %if.then.i.i.us.us.7.1, label %if.end.i.i.us.us.7.1 - -if.then.i.i.us.1: ; preds = %if.end.i.i.us - %add.i.i.us.1 = add nsw i32 %mul.i.i, %conv.i.i.us.1 - %idxprom.i.i.us.1 = sext i32 %add.i.i.us.1 to i64 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1 - -if.end.i.i.us.1: ; preds = %if.then.i.i.us.1, %if.end.i.i.us - br i1 %cmp4.i.i.us.2, label %if.then.i.i.us.2, label %if.end.i.i.us.2 - -if.then.i.i.us.2: ; preds = %if.end.i.i.us.1 - %add.i.i.us.2 = add nsw i32 %mul.i.i, %conv.i.i.us.2 - %idxprom.i.i.us.2 = sext i32 %add.i.i.us.2 to i64 - %arrayidx.i.i.us.2 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.2 - store float 0.000000e+00, float* %arrayidx.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2 - -if.end.i.i.us.2: ; preds = %if.then.i.i.us.2, %if.end.i.i.us.1 - br i1 %cmp4.i.i.us.3, label %if.then.i.i.us.3, label %if.end.i.i.us.3 - -if.then.i.i.us.3: ; preds = %if.end.i.i.us.2 - %add.i.i.us.3 = add nsw i32 %mul.i.i, %conv.i.i.us.3 - %idxprom.i.i.us.3 = sext i32 %add.i.i.us.3 to i64 - %arrayidx.i.i.us.3 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.3 - store float 0.000000e+00, float* %arrayidx.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3 - -if.end.i.i.us.3: ; preds = %if.then.i.i.us.3, %if.end.i.i.us.2 - br i1 %cmp4.i.i.us.4, label %if.then.i.i.us.4, label %if.end.i.i.us.4 - -if.then.i.i.us.4: ; preds = %if.end.i.i.us.3 - %add.i.i.us.4 = add nsw i32 %mul.i.i, %conv.i.i.us.4 - %idxprom.i.i.us.4 = sext i32 %add.i.i.us.4 to i64 - %arrayidx.i.i.us.4 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.4 - store float 0.000000e+00, float* %arrayidx.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4 - -if.end.i.i.us.4: ; preds = %if.then.i.i.us.4, %if.end.i.i.us.3 - br i1 %cmp4.i.i.us.5, label %if.then.i.i.us.5, label %if.end.i.i.us.5 - -if.then.i.i.us.5: ; preds = %if.end.i.i.us.4 - %add.i.i.us.5 = add nsw i32 %mul.i.i, %conv.i.i.us.5 - %idxprom.i.i.us.5 = sext i32 %add.i.i.us.5 to i64 - %arrayidx.i.i.us.5 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.5 - store float 0.000000e+00, float* %arrayidx.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5 - -if.end.i.i.us.5: ; preds = %if.then.i.i.us.5, %if.end.i.i.us.4 - br i1 %cmp4.i.i.us.6, label %if.then.i.i.us.6, label %if.end.i.i.us.6 - -if.then.i.i.us.6: ; preds = %if.end.i.i.us.5 - %add.i.i.us.6 = add nsw i32 %mul.i.i, %conv.i.i.us.6 - %idxprom.i.i.us.6 = sext i32 %add.i.i.us.6 to i64 - %arrayidx.i.i.us.6 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.6 - store float 0.000000e+00, float* %arrayidx.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6 - -if.end.i.i.us.6: ; preds = %if.then.i.i.us.6, %if.end.i.i.us.5 - br i1 %cmp4.i.i.us.7, label %if.then.i.i.us.7, label %if.end.i.i.us.7 - -if.then.i.i.us.7: ; preds = %if.end.i.i.us.6 - %add.i.i.us.7 = add nsw i32 %mul.i.i, %conv.i.i.us.7 - %idxprom.i.i.us.7 = sext i32 %add.i.i.us.7 to i64 - %arrayidx.i.i.us.7 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.7 - store float 0.000000e+00, float* %arrayidx.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7 - -if.end.i.i.us.7: ; preds = %if.then.i.i.us.7, %if.end.i.i.us.6 - br i1 %cmp4.i.i.us.8, label %if.then.i.i.us.8, label %if.end.i.i.us.8 - -if.then.i.i.us.8: ; preds = %if.end.i.i.us.7 - %add.i.i.us.8 = add nsw i32 %mul.i.i, %conv.i.i.us.8 - %idxprom.i.i.us.8 = sext i32 %add.i.i.us.8 to i64 - %arrayidx.i.i.us.8 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.8 - store float 0.000000e+00, float* %arrayidx.i.i.us.8, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.8 - -if.end.i.i.us.8: ; preds = %if.then.i.i.us.8, %if.end.i.i.us.7 - br i1 %cmp4.i.i.us.9, label %if.then.i.i.us.9, label %if.end.i.i.us.9 - -if.then.i.i.us.9: ; preds = %if.end.i.i.us.8 - %add.i.i.us.9 = add nsw i32 %mul.i.i, %conv.i.i.us.9 - %idxprom.i.i.us.9 = sext i32 %add.i.i.us.9 to i64 - %arrayidx.i.i.us.9 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.9 - store float 0.000000e+00, float* %arrayidx.i.i.us.9, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.9 - -if.end.i.i.us.9: ; preds = %if.then.i.i.us.9, %if.end.i.i.us.8 - br i1 %cmp4.i.i.us.10, label %if.then.i.i.us.10, label %if.end.i.i.us.10 - -if.then.i.i.us.10: ; preds = %if.end.i.i.us.9 - %add.i.i.us.10 = add nsw i32 %mul.i.i, %conv.i.i.us.10 - %idxprom.i.i.us.10 = sext i32 %add.i.i.us.10 to i64 - %arrayidx.i.i.us.10 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.10 - store float 0.000000e+00, float* %arrayidx.i.i.us.10, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.10 - -if.end.i.i.us.10: ; preds = %if.then.i.i.us.10, %if.end.i.i.us.9 - br i1 %cmp4.i.i.us.11, label %if.then.i.i.us.11, label %if.end.i.i.us.11 - -if.then.i.i.us.11: ; preds = %if.end.i.i.us.10 - %add.i.i.us.11 = add nsw i32 %mul.i.i, %conv.i.i.us.11 - %idxprom.i.i.us.11 = sext i32 %add.i.i.us.11 to i64 - %arrayidx.i.i.us.11 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.11 - store float 0.000000e+00, float* %arrayidx.i.i.us.11, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.11 - -if.end.i.i.us.11: ; preds = %if.then.i.i.us.11, %if.end.i.i.us.10 - br i1 %cmp4.i.i.us.12, label %if.then.i.i.us.12, label %if.end.i.i.us.12 - -if.then.i.i.us.12: ; preds = %if.end.i.i.us.11 - %add.i.i.us.12 = add nsw i32 %mul.i.i, %conv.i.i.us.12 - %idxprom.i.i.us.12 = sext i32 %add.i.i.us.12 to i64 - %arrayidx.i.i.us.12 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.12 - store float 0.000000e+00, float* %arrayidx.i.i.us.12, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.12 - -if.end.i.i.us.12: ; preds = %if.then.i.i.us.12, %if.end.i.i.us.11 - br i1 %cmp4.i.i.us.13, label %if.then.i.i.us.13, label %if.end.i.i.us.13 - -if.then.i.i.us.13: ; preds = %if.end.i.i.us.12 - %add.i.i.us.13 = add nsw i32 %mul.i.i, %conv.i.i.us.13 - %idxprom.i.i.us.13 = sext i32 %add.i.i.us.13 to i64 - %arrayidx.i.i.us.13 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.13 - store float 0.000000e+00, float* %arrayidx.i.i.us.13, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.13 - -if.end.i.i.us.13: ; preds = %if.then.i.i.us.13, %if.end.i.i.us.12 - br i1 %cmp4.i.i.us.14, label %if.then.i.i.us.14, label %if.end.i.i.us.14 - -if.then.i.i.us.14: ; preds = %if.end.i.i.us.13 - %add.i.i.us.14 = add nsw i32 %mul.i.i, %conv.i.i.us.14 - %idxprom.i.i.us.14 = sext i32 %add.i.i.us.14 to i64 - %arrayidx.i.i.us.14 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.14 - store float 0.000000e+00, float* %arrayidx.i.i.us.14, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.14 - -if.end.i.i.us.14: ; preds = %if.then.i.i.us.14, %if.end.i.i.us.13 - br i1 %cmp4.i.i.us.15, label %if.then.i.i.us.15, label %if.end.i.i.us.15 - -if.then.i.i.us.15: ; preds = %if.end.i.i.us.14 - %add.i.i.us.15 = add nsw i32 %mul.i.i, %conv.i.i.us.15 - %idxprom.i.i.us.15 = sext i32 %add.i.i.us.15 to i64 - %arrayidx.i.i.us.15 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.15 - store float 0.000000e+00, float* %arrayidx.i.i.us.15, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.15 - -if.end.i.i.us.15: ; preds = %if.then.i.i.us.15, %if.end.i.i.us.14 - br i1 %cmp4.i.i.us.16, label %if.then.i.i.us.16, label %if.end.i.i.us.16 - -if.then.i.i.us.16: ; preds = %if.end.i.i.us.15 - %add.i.i.us.16 = add nsw i32 %mul.i.i, %conv.i.i.us.16 - %idxprom.i.i.us.16 = sext i32 %add.i.i.us.16 to i64 - %arrayidx.i.i.us.16 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.16 - store float 0.000000e+00, float* %arrayidx.i.i.us.16, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.16 - -if.end.i.i.us.16: ; preds = %if.then.i.i.us.16, %if.end.i.i.us.15 - br i1 %cmp4.i.i.us.17, label %if.then.i.i.us.17, label %if.end.i.i.us.17 - -if.then.i.i.us.17: ; preds = %if.end.i.i.us.16 - %add.i.i.us.17 = add nsw i32 %mul.i.i, %conv.i.i.us.17 - %idxprom.i.i.us.17 = sext i32 %add.i.i.us.17 to i64 - %arrayidx.i.i.us.17 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.17 - store float 0.000000e+00, float* %arrayidx.i.i.us.17, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.17 - -if.end.i.i.us.17: ; preds = %if.then.i.i.us.17, %if.end.i.i.us.16 - br i1 %cmp4.i.i.us.18, label %if.then.i.i.us.18, label %if.end.i.i.us.18 - -if.then.i.i.us.18: ; preds = %if.end.i.i.us.17 - %add.i.i.us.18 = add nsw i32 %mul.i.i, %conv.i.i.us.18 - %idxprom.i.i.us.18 = sext i32 %add.i.i.us.18 to i64 - %arrayidx.i.i.us.18 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.18 - store float 0.000000e+00, float* %arrayidx.i.i.us.18, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.18 - -if.end.i.i.us.18: ; preds = %if.then.i.i.us.18, %if.end.i.i.us.17 - br i1 %cmp4.i.i.us.19, label %if.then.i.i.us.19, label %if.end.i.i.us.19 - -if.then.i.i.us.19: ; preds = %if.end.i.i.us.18 - %add.i.i.us.19 = add nsw i32 %mul.i.i, %conv.i.i.us.19 - %idxprom.i.i.us.19 = sext i32 %add.i.i.us.19 to i64 - %arrayidx.i.i.us.19 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.19 - store float 0.000000e+00, float* %arrayidx.i.i.us.19, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.19 - -if.end.i.i.us.19: ; preds = %if.then.i.i.us.19, %if.end.i.i.us.18 - br i1 %cmp4.i.i.us.20, label %if.then.i.i.us.20, label %if.end.i.i.us.20 - -if.then.i.i.us.20: ; preds = %if.end.i.i.us.19 - %add.i.i.us.20 = add nsw i32 %mul.i.i, %conv.i.i.us.20 - %idxprom.i.i.us.20 = sext i32 %add.i.i.us.20 to i64 - %arrayidx.i.i.us.20 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.20 - store float 0.000000e+00, float* %arrayidx.i.i.us.20, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.20 - -if.end.i.i.us.20: ; preds = %if.then.i.i.us.20, %if.end.i.i.us.19 - br i1 %cmp4.i.i.us.21, label %if.then.i.i.us.21, label %if.end.i.i.us.21 - -if.then.i.i.us.21: ; preds = %if.end.i.i.us.20 - %add.i.i.us.21 = add nsw i32 %mul.i.i, %conv.i.i.us.21 - %idxprom.i.i.us.21 = sext i32 %add.i.i.us.21 to i64 - %arrayidx.i.i.us.21 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.21 - store float 0.000000e+00, float* %arrayidx.i.i.us.21, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.21 - -if.end.i.i.us.21: ; preds = %if.then.i.i.us.21, %if.end.i.i.us.20 - br i1 %cmp4.i.i.us.22, label %if.then.i.i.us.22, label %if.end.i.i.us.22 - -if.then.i.i.us.22: ; preds = %if.end.i.i.us.21 - %add.i.i.us.22 = add nsw i32 %mul.i.i, %conv.i.i.us.22 - %idxprom.i.i.us.22 = sext i32 %add.i.i.us.22 to i64 - %arrayidx.i.i.us.22 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.22 - store float 0.000000e+00, float* %arrayidx.i.i.us.22, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.22 - -if.end.i.i.us.22: ; preds = %if.then.i.i.us.22, %if.end.i.i.us.21 - br i1 %cmp4.i.i.us.23, label %if.then.i.i.us.23, label %if.end.i.i.us.23 - -if.then.i.i.us.23: ; preds = %if.end.i.i.us.22 - %add.i.i.us.23 = add nsw i32 %mul.i.i, %conv.i.i.us.23 - %idxprom.i.i.us.23 = sext i32 %add.i.i.us.23 to i64 - %arrayidx.i.i.us.23 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.23 - store float 0.000000e+00, float* %arrayidx.i.i.us.23, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.23 - -if.end.i.i.us.23: ; preds = %if.then.i.i.us.23, %if.end.i.i.us.22 - br i1 %cmp4.i.i.us.24, label %if.then.i.i.us.24, label %if.end.i.i.us.24 - -if.then.i.i.us.24: ; preds = %if.end.i.i.us.23 - %add.i.i.us.24 = add nsw i32 %mul.i.i, %conv.i.i.us.24 - %idxprom.i.i.us.24 = sext i32 %add.i.i.us.24 to i64 - %arrayidx.i.i.us.24 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.24 - store float 0.000000e+00, float* %arrayidx.i.i.us.24, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.24 - -if.end.i.i.us.24: ; preds = %if.then.i.i.us.24, %if.end.i.i.us.23 - br i1 %cmp4.i.i.us.25, label %if.then.i.i.us.25, label %if.end.i.i.us.25 - -if.then.i.i.us.25: ; preds = %if.end.i.i.us.24 - %add.i.i.us.25 = add nsw i32 %mul.i.i, %conv.i.i.us.25 - %idxprom.i.i.us.25 = sext i32 %add.i.i.us.25 to i64 - %arrayidx.i.i.us.25 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.25 - store float 0.000000e+00, float* %arrayidx.i.i.us.25, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.25 - -if.end.i.i.us.25: ; preds = %if.then.i.i.us.25, %if.end.i.i.us.24 - br i1 %cmp4.i.i.us.26, label %if.then.i.i.us.26, label %if.end.i.i.us.26 - -if.then.i.i.us.26: ; preds = %if.end.i.i.us.25 - %add.i.i.us.26 = add nsw i32 %mul.i.i, %conv.i.i.us.26 - %idxprom.i.i.us.26 = sext i32 %add.i.i.us.26 to i64 - %arrayidx.i.i.us.26 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.26 - store float 0.000000e+00, float* %arrayidx.i.i.us.26, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.26 - -if.end.i.i.us.26: ; preds = %if.then.i.i.us.26, %if.end.i.i.us.25 - br i1 %cmp4.i.i.us.27, label %if.then.i.i.us.27, label %if.end.i.i.us.27 - -if.then.i.i.us.27: ; preds = %if.end.i.i.us.26 - %add.i.i.us.27 = add nsw i32 %mul.i.i, %conv.i.i.us.27 - %idxprom.i.i.us.27 = sext i32 %add.i.i.us.27 to i64 - %arrayidx.i.i.us.27 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.27 - store float 0.000000e+00, float* %arrayidx.i.i.us.27, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.27 - -if.end.i.i.us.27: ; preds = %if.then.i.i.us.27, %if.end.i.i.us.26 - br i1 %cmp4.i.i.us.28, label %if.then.i.i.us.28, label %if.end.i.i.us.28 - -if.then.i.i.us.28: ; preds = %if.end.i.i.us.27 - %add.i.i.us.28 = add nsw i32 %mul.i.i, %conv.i.i.us.28 - %idxprom.i.i.us.28 = sext i32 %add.i.i.us.28 to i64 - %arrayidx.i.i.us.28 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.28 - store float 0.000000e+00, float* %arrayidx.i.i.us.28, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.28 - -if.end.i.i.us.28: ; preds = %if.then.i.i.us.28, %if.end.i.i.us.27 - br i1 %cmp4.i.i.us.29, label %if.then.i.i.us.29, label %if.end.i.i.us.29 - -if.then.i.i.us.29: ; preds = %if.end.i.i.us.28 - %add.i.i.us.29 = add nsw i32 %mul.i.i, %conv.i.i.us.29 - %idxprom.i.i.us.29 = sext i32 %add.i.i.us.29 to i64 - %arrayidx.i.i.us.29 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.29 - store float 0.000000e+00, float* %arrayidx.i.i.us.29, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.29 - -if.end.i.i.us.29: ; preds = %if.then.i.i.us.29, %if.end.i.i.us.28 - br i1 %cmp4.i.i.us.30, label %if.then.i.i.us.30, label %if.end.i.i.us.30 - -if.then.i.i.us.30: ; preds = %if.end.i.i.us.29 - %add.i.i.us.30 = add nsw i32 %mul.i.i, %conv.i.i.us.30 - %idxprom.i.i.us.30 = sext i32 %add.i.i.us.30 to i64 - %arrayidx.i.i.us.30 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.30 - store float 0.000000e+00, float* %arrayidx.i.i.us.30, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.30 - -if.end.i.i.us.30: ; preds = %if.then.i.i.us.30, %if.end.i.i.us.29 - br i1 %cmp4.i.i.us.31, label %if.then.i.i.us.31, label %pregion_for_end.i.i - -if.then.i.i.us.31: ; preds = %if.end.i.i.us.30 - %add.i.i.us.31 = add nsw i32 %mul.i.i, %conv.i.i.us.31 - %idxprom.i.i.us.31 = sext i32 %add.i.i.us.31 to i64 - %arrayidx.i.i.us.31 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.31 - store float 0.000000e+00, float* %arrayidx.i.i.us.31, align 4, !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i - -if.then.i.i.us.us.7.1: ; preds = %if.end.i.i.us.us.7 - %add.i.i.us.us.7.1 = add nsw i32 %mul.i.i.us.7, %conv.i.i.us.us.7.1 - %idxprom.i.i.us.us.7.1 = sext i32 %add.i.i.us.us.7.1 to i64 - %arrayidx.i.i.us.us.7.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.7.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.7.1 = shl i64 %add1.i.i.i.us.us.7.1, 32 - %149 = ashr exact i64 %sext.i.i.us.us.7.1, 32 - br label %for.body.i.i.us.us.7.1 - -for.body.i.i.us.us.7.1: ; preds = %for.body.i.i.us.us.7.1, %if.then.i.i.us.us.7.1 - %indvars.iv.next.i.i3.us.us.7.1 = phi i64 [ %indvars.iv.next.i.i.us.us.7.1, %for.body.i.i.us.us.7.1 ], [ 0, %if.then.i.i.us.us.7.1 ] - %150 = phi float [ %156, %for.body.i.i.us.us.7.1 ], [ 0.000000e+00, %if.then.i.i.us.us.7.1 ] - %151 = add nsw i64 %indvars.iv.next.i.i3.us.us.7.1, %139 - %arrayidx11.i.i.us.us.7.1 = getelementptr inbounds float, float* %8, i64 %151 - %152 = load float, float* %arrayidx11.i.i.us.us.7.1, align 4, !tbaa !12 - %153 = mul nsw i64 %indvars.iv.next.i.i3.us.us.7.1, %29 - %154 = add nsw i64 %153, %149 - %arrayidx15.i.i.us.us.7.1 = getelementptr inbounds float, float* %12, i64 %154 - %155 = load float, float* %arrayidx15.i.i.us.us.7.1, align 4, !tbaa !12 - %156 = tail call float @llvm.fmuladd.f32(float %152, float %155, float %150) #2 - store float %156, float* %arrayidx.i.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.7.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.7.1, 1 - %exitcond.not.i.i.us.us.7.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.7.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.7.1, label %if.end.i.i.us.us.7.1.loopexit, label %for.body.i.i.us.us.7.1, !llvm.loop !19 - -if.end.i.i.us.us.7.1.loopexit: ; preds = %for.body.i.i.us.us.7.1 - br label %if.end.i.i.us.us.7.1 - -if.end.i.i.us.us.7.1: ; preds = %if.end.i.i.us.us.7.1.loopexit, %if.end.i.i.us.us.7 - %157 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %157, 32 - br i1 %exitcond.7.not.1, label %_pocl_kernel_mm3_kernel1.exit.loopexit, label %pregion_for_entry.entry.i.i.us.us.7, !llvm.loop !23 - -if.then.i.i.us.us.6.1: ; preds = %if.end.i.i.us.us.6 - %add.i.i.us.us.6.1 = add nsw i32 %mul.i.i.us.6, %conv.i.i.us.us.6.1 - %idxprom.i.i.us.us.6.1 = sext i32 %add.i.i.us.us.6.1 to i64 - %arrayidx.i.i.us.us.6.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.6.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.6.1 = shl i64 %add1.i.i.i.us.us.6.1, 32 - %158 = ashr exact i64 %sext.i.i.us.us.6.1, 32 - br label %for.body.i.i.us.us.6.1 - -for.body.i.i.us.us.6.1: ; preds = %for.body.i.i.us.us.6.1, %if.then.i.i.us.us.6.1 - %indvars.iv.next.i.i3.us.us.6.1 = phi i64 [ %indvars.iv.next.i.i.us.us.6.1, %for.body.i.i.us.us.6.1 ], [ 0, %if.then.i.i.us.us.6.1 ] - %159 = phi float [ %165, %for.body.i.i.us.us.6.1 ], [ 0.000000e+00, %if.then.i.i.us.us.6.1 ] - %160 = add nsw i64 %indvars.iv.next.i.i3.us.us.6.1, %128 - %arrayidx11.i.i.us.us.6.1 = getelementptr inbounds float, float* %8, i64 %160 - %161 = load float, float* %arrayidx11.i.i.us.us.6.1, align 4, !tbaa !12 - %162 = mul nsw i64 %indvars.iv.next.i.i3.us.us.6.1, %29 - %163 = add nsw i64 %162, %158 - %arrayidx15.i.i.us.us.6.1 = getelementptr inbounds float, float* %12, i64 %163 - %164 = load float, float* %arrayidx15.i.i.us.us.6.1, align 4, !tbaa !12 - %165 = tail call float @llvm.fmuladd.f32(float %161, float %164, float %159) #2 - store float %165, float* %arrayidx.i.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.6.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.6.1, 1 - %exitcond.not.i.i.us.us.6.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.6.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.6.1, label %if.end.i.i.us.us.6.1.loopexit, label %for.body.i.i.us.us.6.1, !llvm.loop !19 - -if.end.i.i.us.us.6.1.loopexit: ; preds = %for.body.i.i.us.us.6.1 - br label %if.end.i.i.us.us.6.1 - -if.end.i.i.us.us.6.1: ; preds = %if.end.i.i.us.us.6.1.loopexit, %if.end.i.i.us.us.6 - %166 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %166, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.i.us.6.loopexit, label %pregion_for_entry.entry.i.i.us.us.6, !llvm.loop !23 - -if.then.i.i.us.us.5.1: ; preds = %if.end.i.i.us.us.5 - %add.i.i.us.us.5.1 = add nsw i32 %mul.i.i.us.5, %conv.i.i.us.us.5.1 - %idxprom.i.i.us.us.5.1 = sext i32 %add.i.i.us.us.5.1 to i64 - %arrayidx.i.i.us.us.5.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.5.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.5.1 = shl i64 %add1.i.i.i.us.us.5.1, 32 - %167 = ashr exact i64 %sext.i.i.us.us.5.1, 32 - br label %for.body.i.i.us.us.5.1 - -for.body.i.i.us.us.5.1: ; preds = %for.body.i.i.us.us.5.1, %if.then.i.i.us.us.5.1 - %indvars.iv.next.i.i3.us.us.5.1 = phi i64 [ %indvars.iv.next.i.i.us.us.5.1, %for.body.i.i.us.us.5.1 ], [ 0, %if.then.i.i.us.us.5.1 ] - %168 = phi float [ %174, %for.body.i.i.us.us.5.1 ], [ 0.000000e+00, %if.then.i.i.us.us.5.1 ] - %169 = add nsw i64 %indvars.iv.next.i.i3.us.us.5.1, %117 - %arrayidx11.i.i.us.us.5.1 = getelementptr inbounds float, float* %8, i64 %169 - %170 = load float, float* %arrayidx11.i.i.us.us.5.1, align 4, !tbaa !12 - %171 = mul nsw i64 %indvars.iv.next.i.i3.us.us.5.1, %29 - %172 = add nsw i64 %171, %167 - %arrayidx15.i.i.us.us.5.1 = getelementptr inbounds float, float* %12, i64 %172 - %173 = load float, float* %arrayidx15.i.i.us.us.5.1, align 4, !tbaa !12 - %174 = tail call float @llvm.fmuladd.f32(float %170, float %173, float %168) #2 - store float %174, float* %arrayidx.i.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.5.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.5.1, 1 - %exitcond.not.i.i.us.us.5.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.5.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.5.1, label %if.end.i.i.us.us.5.1.loopexit, label %for.body.i.i.us.us.5.1, !llvm.loop !19 - -if.end.i.i.us.us.5.1.loopexit: ; preds = %for.body.i.i.us.us.5.1 - br label %if.end.i.i.us.us.5.1 - -if.end.i.i.us.us.5.1: ; preds = %if.end.i.i.us.us.5.1.loopexit, %if.end.i.i.us.us.5 - %175 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %175, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.i.us.5.loopexit, label %pregion_for_entry.entry.i.i.us.us.5, !llvm.loop !23 - -if.then.i.i.us.us.4.1: ; preds = %if.end.i.i.us.us.4 - %add.i.i.us.us.4.1 = add nsw i32 %mul.i.i.us.4, %conv.i.i.us.us.4.1 - %idxprom.i.i.us.us.4.1 = sext i32 %add.i.i.us.us.4.1 to i64 - %arrayidx.i.i.us.us.4.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.4.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.4.1 = shl i64 %add1.i.i.i.us.us.4.1, 32 - %176 = ashr exact i64 %sext.i.i.us.us.4.1, 32 - br label %for.body.i.i.us.us.4.1 - -for.body.i.i.us.us.4.1: ; preds = %for.body.i.i.us.us.4.1, %if.then.i.i.us.us.4.1 - %indvars.iv.next.i.i3.us.us.4.1 = phi i64 [ %indvars.iv.next.i.i.us.us.4.1, %for.body.i.i.us.us.4.1 ], [ 0, %if.then.i.i.us.us.4.1 ] - %177 = phi float [ %183, %for.body.i.i.us.us.4.1 ], [ 0.000000e+00, %if.then.i.i.us.us.4.1 ] - %178 = add nsw i64 %indvars.iv.next.i.i3.us.us.4.1, %106 - %arrayidx11.i.i.us.us.4.1 = getelementptr inbounds float, float* %8, i64 %178 - %179 = load float, float* %arrayidx11.i.i.us.us.4.1, align 4, !tbaa !12 - %180 = mul nsw i64 %indvars.iv.next.i.i3.us.us.4.1, %29 - %181 = add nsw i64 %180, %176 - %arrayidx15.i.i.us.us.4.1 = getelementptr inbounds float, float* %12, i64 %181 - %182 = load float, float* %arrayidx15.i.i.us.us.4.1, align 4, !tbaa !12 - %183 = tail call float @llvm.fmuladd.f32(float %179, float %182, float %177) #2 - store float %183, float* %arrayidx.i.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.4.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.4.1, 1 - %exitcond.not.i.i.us.us.4.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.4.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.4.1, label %if.end.i.i.us.us.4.1.loopexit, label %for.body.i.i.us.us.4.1, !llvm.loop !19 - -if.end.i.i.us.us.4.1.loopexit: ; preds = %for.body.i.i.us.us.4.1 - br label %if.end.i.i.us.us.4.1 - -if.end.i.i.us.us.4.1: ; preds = %if.end.i.i.us.us.4.1.loopexit, %if.end.i.i.us.us.4 - %184 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %184, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.i.us.4.loopexit, label %pregion_for_entry.entry.i.i.us.us.4, !llvm.loop !23 - -if.then.i.i.us.us.3.1: ; preds = %if.end.i.i.us.us.3 - %add.i.i.us.us.3.1 = add nsw i32 %mul.i.i.us.3, %conv.i.i.us.us.3.1 - %idxprom.i.i.us.us.3.1 = sext i32 %add.i.i.us.us.3.1 to i64 - %arrayidx.i.i.us.us.3.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.3.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.3.1 = shl i64 %add1.i.i.i.us.us.3.1, 32 - %185 = ashr exact i64 %sext.i.i.us.us.3.1, 32 - br label %for.body.i.i.us.us.3.1 - -for.body.i.i.us.us.3.1: ; preds = %for.body.i.i.us.us.3.1, %if.then.i.i.us.us.3.1 - %indvars.iv.next.i.i3.us.us.3.1 = phi i64 [ %indvars.iv.next.i.i.us.us.3.1, %for.body.i.i.us.us.3.1 ], [ 0, %if.then.i.i.us.us.3.1 ] - %186 = phi float [ %192, %for.body.i.i.us.us.3.1 ], [ 0.000000e+00, %if.then.i.i.us.us.3.1 ] - %187 = add nsw i64 %indvars.iv.next.i.i3.us.us.3.1, %95 - %arrayidx11.i.i.us.us.3.1 = getelementptr inbounds float, float* %8, i64 %187 - %188 = load float, float* %arrayidx11.i.i.us.us.3.1, align 4, !tbaa !12 - %189 = mul nsw i64 %indvars.iv.next.i.i3.us.us.3.1, %29 - %190 = add nsw i64 %189, %185 - %arrayidx15.i.i.us.us.3.1 = getelementptr inbounds float, float* %12, i64 %190 - %191 = load float, float* %arrayidx15.i.i.us.us.3.1, align 4, !tbaa !12 - %192 = tail call float @llvm.fmuladd.f32(float %188, float %191, float %186) #2 - store float %192, float* %arrayidx.i.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.3.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.3.1, 1 - %exitcond.not.i.i.us.us.3.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.3.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.3.1, label %if.end.i.i.us.us.3.1.loopexit, label %for.body.i.i.us.us.3.1, !llvm.loop !19 - -if.end.i.i.us.us.3.1.loopexit: ; preds = %for.body.i.i.us.us.3.1 - br label %if.end.i.i.us.us.3.1 - -if.end.i.i.us.us.3.1: ; preds = %if.end.i.i.us.us.3.1.loopexit, %if.end.i.i.us.us.3 - %193 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %193, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.i.us.3.loopexit, label %pregion_for_entry.entry.i.i.us.us.3, !llvm.loop !23 - -if.then.i.i.us.us.2.1: ; preds = %if.end.i.i.us.us.2 - %add.i.i.us.us.2.1 = add nsw i32 %mul.i.i.us.2, %conv.i.i.us.us.2.1 - %idxprom.i.i.us.us.2.1 = sext i32 %add.i.i.us.us.2.1 to i64 - %arrayidx.i.i.us.us.2.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.2.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.2.1 = shl i64 %add1.i.i.i.us.us.2.1, 32 - %194 = ashr exact i64 %sext.i.i.us.us.2.1, 32 - br label %for.body.i.i.us.us.2.1 - -for.body.i.i.us.us.2.1: ; preds = %for.body.i.i.us.us.2.1, %if.then.i.i.us.us.2.1 - %indvars.iv.next.i.i3.us.us.2.1 = phi i64 [ %indvars.iv.next.i.i.us.us.2.1, %for.body.i.i.us.us.2.1 ], [ 0, %if.then.i.i.us.us.2.1 ] - %195 = phi float [ %201, %for.body.i.i.us.us.2.1 ], [ 0.000000e+00, %if.then.i.i.us.us.2.1 ] - %196 = add nsw i64 %indvars.iv.next.i.i3.us.us.2.1, %84 - %arrayidx11.i.i.us.us.2.1 = getelementptr inbounds float, float* %8, i64 %196 - %197 = load float, float* %arrayidx11.i.i.us.us.2.1, align 4, !tbaa !12 - %198 = mul nsw i64 %indvars.iv.next.i.i3.us.us.2.1, %29 - %199 = add nsw i64 %198, %194 - %arrayidx15.i.i.us.us.2.1 = getelementptr inbounds float, float* %12, i64 %199 - %200 = load float, float* %arrayidx15.i.i.us.us.2.1, align 4, !tbaa !12 - %201 = tail call float @llvm.fmuladd.f32(float %197, float %200, float %195) #2 - store float %201, float* %arrayidx.i.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.2.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.2.1, 1 - %exitcond.not.i.i.us.us.2.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.2.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.2.1, label %if.end.i.i.us.us.2.1.loopexit, label %for.body.i.i.us.us.2.1, !llvm.loop !19 - -if.end.i.i.us.us.2.1.loopexit: ; preds = %for.body.i.i.us.us.2.1 - br label %if.end.i.i.us.us.2.1 - -if.end.i.i.us.us.2.1: ; preds = %if.end.i.i.us.us.2.1.loopexit, %if.end.i.i.us.us.2 - %202 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %202, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.i.us.2.loopexit, label %pregion_for_entry.entry.i.i.us.us.2, !llvm.loop !23 - -if.then.i.i.us.us.1.1: ; preds = %if.end.i.i.us.us.1 - %add.i.i.us.us.1.1 = add nsw i32 %mul.i.i.us.1, %conv.i.i.us.us.1.1 - %idxprom.i.i.us.us.1.1 = sext i32 %add.i.i.us.us.1.1 to i64 - %arrayidx.i.i.us.us.1.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.1.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.1.1 = shl i64 %add1.i.i.i.us.us.1.1, 32 - %203 = ashr exact i64 %sext.i.i.us.us.1.1, 32 - br label %for.body.i.i.us.us.1.1 - -for.body.i.i.us.us.1.1: ; preds = %for.body.i.i.us.us.1.1, %if.then.i.i.us.us.1.1 - %indvars.iv.next.i.i3.us.us.1.1 = phi i64 [ %indvars.iv.next.i.i.us.us.1.1, %for.body.i.i.us.us.1.1 ], [ 0, %if.then.i.i.us.us.1.1 ] - %204 = phi float [ %210, %for.body.i.i.us.us.1.1 ], [ 0.000000e+00, %if.then.i.i.us.us.1.1 ] - %205 = add nsw i64 %indvars.iv.next.i.i3.us.us.1.1, %63 - %arrayidx11.i.i.us.us.1.1 = getelementptr inbounds float, float* %8, i64 %205 - %206 = load float, float* %arrayidx11.i.i.us.us.1.1, align 4, !tbaa !12 - %207 = mul nsw i64 %indvars.iv.next.i.i3.us.us.1.1, %29 - %208 = add nsw i64 %207, %203 - %arrayidx15.i.i.us.us.1.1 = getelementptr inbounds float, float* %12, i64 %208 - %209 = load float, float* %arrayidx15.i.i.us.us.1.1, align 4, !tbaa !12 - %210 = tail call float @llvm.fmuladd.f32(float %206, float %209, float %204) #2 - store float %210, float* %arrayidx.i.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.1.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.1.1, 1 - %exitcond.not.i.i.us.us.1.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.1.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.1.1, label %if.end.i.i.us.us.1.1.loopexit, label %for.body.i.i.us.us.1.1, !llvm.loop !19 - -if.end.i.i.us.us.1.1.loopexit: ; preds = %for.body.i.i.us.us.1.1 - br label %if.end.i.i.us.us.1.1 - -if.end.i.i.us.us.1.1: ; preds = %if.end.i.i.us.us.1.1.loopexit, %if.end.i.i.us.us.1 - %211 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %211, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.i.us.1.loopexit, label %pregion_for_entry.entry.i.i.us.us.1, !llvm.loop !23 - -if.then.i.i.us.us.146: ; preds = %if.end.i.i.us.us - %add.i.i.us.us.142 = add nsw i32 %mul.i.i.us, %conv.i.i.us.us.139 - %idxprom.i.i.us.us.143 = sext i32 %add.i.i.us.us.142 to i64 - %arrayidx.i.i.us.us.144 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.143 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.144, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.145 = shl i64 %add1.i.i.i.us.us.138, 32 - %212 = ashr exact i64 %sext.i.i.us.us.145, 32 - br label %for.body.i.i.us.us.152 - -for.body.i.i.us.us.152: ; preds = %for.body.i.i.us.us.152, %if.then.i.i.us.us.146 - %indvars.iv.next.i.i3.us.us.147 = phi i64 [ %indvars.iv.next.i.i.us.us.150, %for.body.i.i.us.us.152 ], [ 0, %if.then.i.i.us.us.146 ] - %213 = phi float [ %219, %for.body.i.i.us.us.152 ], [ 0.000000e+00, %if.then.i.i.us.us.146 ] - %214 = add nsw i64 %indvars.iv.next.i.i3.us.us.147, %61 - %arrayidx11.i.i.us.us.148 = getelementptr inbounds float, float* %8, i64 %214 - %215 = load float, float* %arrayidx11.i.i.us.us.148, align 4, !tbaa !12 - %216 = mul nsw i64 %indvars.iv.next.i.i3.us.us.147, %29 - %217 = add nsw i64 %216, %212 - %arrayidx15.i.i.us.us.149 = getelementptr inbounds float, float* %12, i64 %217 - %218 = load float, float* %arrayidx15.i.i.us.us.149, align 4, !tbaa !12 - %219 = tail call float @llvm.fmuladd.f32(float %215, float %218, float %213) #2 - store float %219, float* %arrayidx.i.i.us.us.144, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.150 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.147, 1 - %exitcond.not.i.i.us.us.151 = icmp eq i64 %indvars.iv.next.i.i.us.us.150, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.151, label %if.end.i.i.us.us.153.loopexit, label %for.body.i.i.us.us.152, !llvm.loop !19 - -if.end.i.i.us.us.153.loopexit: ; preds = %for.body.i.i.us.us.152 - br label %if.end.i.i.us.us.153 - -if.end.i.i.us.us.153: ; preds = %if.end.i.i.us.us.153.loopexit, %if.end.i.i.us.us - %220 = add nuw nsw i64 %_local_id_x.i.0.us.us, 2 - %exitcond.not.1 = icmp eq i64 %220, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.us.loopexit, label %pregion_for_entry.entry.i.i.us.us, !llvm.loop !23 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_mm3_kernel1_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to float** - %13 = load float*, float** %12, align 8 - %14 = getelementptr i8*, i8** %0, i64 3 - %15 = bitcast i8** %14 to i32** - %16 = load i32*, i32** %15, align 8 - %17 = load i32, i32* %16, align 4 - %18 = getelementptr i8*, i8** %0, i64 4 - %19 = bitcast i8** %18 to i32** - %20 = load i32*, i32** %19, align 8 - %21 = load i32, i32* %20, align 4 - %22 = getelementptr i8*, i8** %0, i64 5 - %23 = bitcast i8** %22 to i32** - %24 = load i32*, i32** %23, align 8 - %25 = load i32, i32* %24, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %cmp638.i.i = icmp sgt i32 %25, 0 - %26 = sext i32 %21 to i64 - %wide.trip.count.i.i = zext i32 %25 to i64 - br i1 %cmp638.i.i, label %pregion_for_entry.pregion_for_init.i.i.us.preheader, label %pregion_for_entry.pregion_for_init.i.i.preheader - -pregion_for_entry.pregion_for_init.i.i.preheader: ; preds = %5 - %conv.i.i.us = trunc i64 %mul.i.i.i to i32 - %cmp4.i.i.us = icmp sgt i32 %21, %conv.i.i.us - %27 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.1 = or i32 %27, 1 - %cmp4.i.i.us.1 = icmp sgt i32 %21, %conv.i.i.us.1 - %28 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.2 = or i32 %28, 2 - %cmp4.i.i.us.2 = icmp sgt i32 %21, %conv.i.i.us.2 - %29 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.3 = or i32 %29, 3 - %cmp4.i.i.us.3 = icmp sgt i32 %21, %conv.i.i.us.3 - %30 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.4 = or i32 %30, 4 - %cmp4.i.i.us.4 = icmp sgt i32 %21, %conv.i.i.us.4 - %31 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.5 = or i32 %31, 5 - %cmp4.i.i.us.5 = icmp sgt i32 %21, %conv.i.i.us.5 - %32 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.6 = or i32 %32, 6 - %cmp4.i.i.us.6 = icmp sgt i32 %21, %conv.i.i.us.6 - %33 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.7 = or i32 %33, 7 - %cmp4.i.i.us.7 = icmp sgt i32 %21, %conv.i.i.us.7 - %34 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.8 = or i32 %34, 8 - %cmp4.i.i.us.8 = icmp sgt i32 %21, %conv.i.i.us.8 - %35 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.9 = or i32 %35, 9 - %cmp4.i.i.us.9 = icmp sgt i32 %21, %conv.i.i.us.9 - %36 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.10 = or i32 %36, 10 - %cmp4.i.i.us.10 = icmp sgt i32 %21, %conv.i.i.us.10 - %37 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.11 = or i32 %37, 11 - %cmp4.i.i.us.11 = icmp sgt i32 %21, %conv.i.i.us.11 - %38 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.12 = or i32 %38, 12 - %cmp4.i.i.us.12 = icmp sgt i32 %21, %conv.i.i.us.12 - %39 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.13 = or i32 %39, 13 - %cmp4.i.i.us.13 = icmp sgt i32 %21, %conv.i.i.us.13 - %40 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.14 = or i32 %40, 14 - %cmp4.i.i.us.14 = icmp sgt i32 %21, %conv.i.i.us.14 - %41 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.15 = or i32 %41, 15 - %cmp4.i.i.us.15 = icmp sgt i32 %21, %conv.i.i.us.15 - %42 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.16 = or i32 %42, 16 - %cmp4.i.i.us.16 = icmp sgt i32 %21, %conv.i.i.us.16 - %43 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.17 = or i32 %43, 17 - %cmp4.i.i.us.17 = icmp sgt i32 %21, %conv.i.i.us.17 - %44 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.18 = or i32 %44, 18 - %cmp4.i.i.us.18 = icmp sgt i32 %21, %conv.i.i.us.18 - %45 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.19 = or i32 %45, 19 - %cmp4.i.i.us.19 = icmp sgt i32 %21, %conv.i.i.us.19 - %46 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.20 = or i32 %46, 20 - %cmp4.i.i.us.20 = icmp sgt i32 %21, %conv.i.i.us.20 - %47 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.21 = or i32 %47, 21 - %cmp4.i.i.us.21 = icmp sgt i32 %21, %conv.i.i.us.21 - %48 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.22 = or i32 %48, 22 - %cmp4.i.i.us.22 = icmp sgt i32 %21, %conv.i.i.us.22 - %49 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.23 = or i32 %49, 23 - %cmp4.i.i.us.23 = icmp sgt i32 %21, %conv.i.i.us.23 - %50 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.24 = or i32 %50, 24 - %cmp4.i.i.us.24 = icmp sgt i32 %21, %conv.i.i.us.24 - %51 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.25 = or i32 %51, 25 - %cmp4.i.i.us.25 = icmp sgt i32 %21, %conv.i.i.us.25 - %52 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.26 = or i32 %52, 26 - %cmp4.i.i.us.26 = icmp sgt i32 %21, %conv.i.i.us.26 - %53 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.27 = or i32 %53, 27 - %cmp4.i.i.us.27 = icmp sgt i32 %21, %conv.i.i.us.27 - %54 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.28 = or i32 %54, 28 - %cmp4.i.i.us.28 = icmp sgt i32 %21, %conv.i.i.us.28 - %55 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.29 = or i32 %55, 29 - %cmp4.i.i.us.29 = icmp sgt i32 %21, %conv.i.i.us.29 - %56 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.30 = or i32 %56, 30 - %cmp4.i.i.us.30 = icmp sgt i32 %21, %conv.i.i.us.30 - %57 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.31 = or i32 %57, 31 - %cmp4.i.i.us.31 = icmp sgt i32 %21, %conv.i.i.us.31 - br label %pregion_for_entry.pregion_for_init.i.i - -pregion_for_entry.pregion_for_init.i.i.us.preheader: ; preds = %5 - %conv2.i.i.us = trunc i64 %mul3.i.i.i to i32 - %cmp.i.i.us = icmp sgt i32 %17, %conv2.i.i.us - %mul.i.i.us = mul nsw i32 %21, %conv2.i.i.us - %mul8.i.i.us = mul nsw i32 %25, %conv2.i.i.us - %58 = sext i32 %mul8.i.i.us to i64 - br i1 %cmp.i.i.us, label %pregion_for_entry.entry.i.i.us.us.preheader, label %pregion_for_end.i.i.us - -pregion_for_entry.entry.i.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i.us.preheader - br label %pregion_for_entry.entry.i.i.us.us - -pregion_for_end.i.i.us.loopexit: ; preds = %if.end.i.i.us.us.153 - br label %pregion_for_end.i.i.us - -pregion_for_end.i.i.us: ; preds = %pregion_for_end.i.i.us.loopexit, %pregion_for_entry.pregion_for_init.i.i.us.preheader - %59 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.1 = or i32 %59, 1 - %cmp.i.i.us.1 = icmp sgt i32 %17, %conv2.i.i.us.1 - %mul.i.i.us.1 = mul nsw i32 %21, %conv2.i.i.us.1 - %mul8.i.i.us.1 = mul nsw i32 %25, %conv2.i.i.us.1 - %60 = sext i32 %mul8.i.i.us.1 to i64 - br i1 %cmp.i.i.us.1, label %pregion_for_entry.entry.i.i.us.us.1.preheader, label %pregion_for_end.i.i.us.1 - -pregion_for_entry.entry.i.i.us.us.1.preheader: ; preds = %pregion_for_end.i.i.us - br label %pregion_for_entry.entry.i.i.us.us.1 - -pregion_for_entry.entry.i.i.us.us: ; preds = %if.end.i.i.us.us.153, %pregion_for_entry.entry.i.i.us.us.preheader - %_local_id_x.i.0.us.us = phi i64 [ %217, %if.end.i.i.us.us.153 ], [ 0, %pregion_for_entry.entry.i.i.us.us.preheader ] - %add1.i.i.i.us.us = add nuw nsw i64 %_local_id_x.i.0.us.us, %mul.i.i.i - %conv.i.i.us.us = trunc i64 %add1.i.i.i.us.us to i32 - %cmp4.i.i.us.us = icmp sgt i32 %21, %conv.i.i.us.us - br i1 %cmp4.i.i.us.us, label %if.then.i.i.us.us, label %if.end.i.i.us.us - -if.then.i.i.us.us: ; preds = %pregion_for_entry.entry.i.i.us.us - %add.i.i.us.us = add nsw i32 %mul.i.i.us, %conv.i.i.us.us - %idxprom.i.i.us.us = sext i32 %add.i.i.us.us to i64 - %arrayidx.i.i.us.us = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us - store float 0.000000e+00, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us = shl i64 %add1.i.i.i.us.us, 32 - %61 = ashr exact i64 %sext.i.i.us.us, 32 - br label %for.body.i.i.us.us - -if.end.i.i.us.us.loopexit: ; preds = %for.body.i.i.us.us - br label %if.end.i.i.us.us - -if.end.i.i.us.us: ; preds = %if.end.i.i.us.us.loopexit, %pregion_for_entry.entry.i.i.us.us - %62 = or i64 %_local_id_x.i.0.us.us, 1 - %add1.i.i.i.us.us.138 = add nuw nsw i64 %62, %mul.i.i.i - %conv.i.i.us.us.139 = trunc i64 %add1.i.i.i.us.us.138 to i32 - %cmp4.i.i.us.us.140 = icmp sgt i32 %21, %conv.i.i.us.us.139 - br i1 %cmp4.i.i.us.us.140, label %if.then.i.i.us.us.146, label %if.end.i.i.us.us.153 - -for.body.i.i.us.us: ; preds = %for.body.i.i.us.us, %if.then.i.i.us.us - %indvars.iv.next.i.i3.us.us = phi i64 [ %indvars.iv.next.i.i.us.us, %for.body.i.i.us.us ], [ 0, %if.then.i.i.us.us ] - %63 = phi float [ %69, %for.body.i.i.us.us ], [ 0.000000e+00, %if.then.i.i.us.us ] - %64 = add nsw i64 %indvars.iv.next.i.i3.us.us, %58 - %arrayidx11.i.i.us.us = getelementptr inbounds float, float* %7, i64 %64 - %65 = load float, float* %arrayidx11.i.i.us.us, align 4, !tbaa !12 - %66 = mul nsw i64 %indvars.iv.next.i.i3.us.us, %26 - %67 = add nsw i64 %66, %61 - %arrayidx15.i.i.us.us = getelementptr inbounds float, float* %10, i64 %67 - %68 = load float, float* %arrayidx15.i.i.us.us, align 4, !tbaa !12 - %69 = tail call float @llvm.fmuladd.f32(float %65, float %68, float %63) #2 - store float %69, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us = add nuw nsw i64 %indvars.iv.next.i.i3.us.us, 1 - %exitcond.not.i.i.us.us = icmp eq i64 %indvars.iv.next.i.i.us.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us, label %if.end.i.i.us.us.loopexit, label %for.body.i.i.us.us, !llvm.loop !19 - -pregion_for_entry.pregion_for_init.i.i: ; preds = %pregion_for_end.i.i, %pregion_for_entry.pregion_for_init.i.i.preheader - %_local_id_y.i.0 = phi i64 [ %70, %pregion_for_end.i.i ], [ 0, %pregion_for_entry.pregion_for_init.i.i.preheader ] - %add6.i.i.i = add nuw nsw i64 %_local_id_y.i.0, %mul3.i.i.i - %conv2.i.i = trunc i64 %add6.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %17, %conv2.i.i - %mul.i.i = mul nsw i32 %21, %conv2.i.i - br i1 %cmp.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i - br i1 %cmp4.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us.preheader - %add.i.i.us = add nsw i32 %mul.i.i, %conv.i.i.us - %idxprom.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us - store float 0.000000e+00, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - br i1 %cmp4.i.i.us.1, label %if.then.i.i.us.1, label %if.end.i.i.us.1 - -pregion_for_end.i.i: ; preds = %if.then.i.i.us.31, %if.end.i.i.us.30, %pregion_for_entry.pregion_for_init.i.i - %70 = add nuw nsw i64 %_local_id_y.i.0, 1 - %exitcond33.not = icmp eq i64 %70, 8 - br i1 %exitcond33.not, label %_pocl_kernel_mm3_kernel1.exit.loopexit54, label %pregion_for_entry.pregion_for_init.i.i, !llvm.loop !21 - -_pocl_kernel_mm3_kernel1.exit.loopexit: ; preds = %if.end.i.i.us.us.7.1 - br label %_pocl_kernel_mm3_kernel1.exit - -_pocl_kernel_mm3_kernel1.exit.loopexit54: ; preds = %pregion_for_end.i.i - br label %_pocl_kernel_mm3_kernel1.exit - -_pocl_kernel_mm3_kernel1.exit: ; preds = %pregion_for_end.i.i.us.6, %_pocl_kernel_mm3_kernel1.exit.loopexit54, %_pocl_kernel_mm3_kernel1.exit.loopexit - ret void - -pregion_for_entry.entry.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.1, %pregion_for_entry.entry.i.i.us.us.1.preheader - %_local_id_x.i.0.us.us.1 = phi i64 [ %208, %if.end.i.i.us.us.1.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.1.preheader ] - %add1.i.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, %mul.i.i.i - %conv.i.i.us.us.1 = trunc i64 %add1.i.i.i.us.us.1 to i32 - %cmp4.i.i.us.us.1 = icmp sgt i32 %21, %conv.i.i.us.us.1 - br i1 %cmp4.i.i.us.us.1, label %if.then.i.i.us.us.1, label %if.end.i.i.us.us.1 - -if.then.i.i.us.us.1: ; preds = %pregion_for_entry.entry.i.i.us.us.1 - %add.i.i.us.us.1 = add nsw i32 %mul.i.i.us.1, %conv.i.i.us.us.1 - %idxprom.i.i.us.us.1 = sext i32 %add.i.i.us.us.1 to i64 - %arrayidx.i.i.us.us.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.1 = shl i64 %add1.i.i.i.us.us.1, 32 - %71 = ashr exact i64 %sext.i.i.us.us.1, 32 - br label %for.body.i.i.us.us.1 - -for.body.i.i.us.us.1: ; preds = %for.body.i.i.us.us.1, %if.then.i.i.us.us.1 - %indvars.iv.next.i.i3.us.us.1 = phi i64 [ %indvars.iv.next.i.i.us.us.1, %for.body.i.i.us.us.1 ], [ 0, %if.then.i.i.us.us.1 ] - %72 = phi float [ %78, %for.body.i.i.us.us.1 ], [ 0.000000e+00, %if.then.i.i.us.us.1 ] - %73 = add nsw i64 %indvars.iv.next.i.i3.us.us.1, %60 - %arrayidx11.i.i.us.us.1 = getelementptr inbounds float, float* %7, i64 %73 - %74 = load float, float* %arrayidx11.i.i.us.us.1, align 4, !tbaa !12 - %75 = mul nsw i64 %indvars.iv.next.i.i3.us.us.1, %26 - %76 = add nsw i64 %75, %71 - %arrayidx15.i.i.us.us.1 = getelementptr inbounds float, float* %10, i64 %76 - %77 = load float, float* %arrayidx15.i.i.us.us.1, align 4, !tbaa !12 - %78 = tail call float @llvm.fmuladd.f32(float %74, float %77, float %72) #2 - store float %78, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.1, 1 - %exitcond.not.i.i.us.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.1, label %if.end.i.i.us.us.1.loopexit, label %for.body.i.i.us.us.1, !llvm.loop !19 - -if.end.i.i.us.us.1.loopexit: ; preds = %for.body.i.i.us.us.1 - br label %if.end.i.i.us.us.1 - -if.end.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.loopexit, %pregion_for_entry.entry.i.i.us.us.1 - %79 = or i64 %_local_id_x.i.0.us.us.1, 1 - %add1.i.i.i.us.us.1.1 = add nuw nsw i64 %79, %mul.i.i.i - %conv.i.i.us.us.1.1 = trunc i64 %add1.i.i.i.us.us.1.1 to i32 - %cmp4.i.i.us.us.1.1 = icmp sgt i32 %21, %conv.i.i.us.us.1.1 - br i1 %cmp4.i.i.us.us.1.1, label %if.then.i.i.us.us.1.1, label %if.end.i.i.us.us.1.1 - -pregion_for_end.i.i.us.1.loopexit: ; preds = %if.end.i.i.us.us.1.1 - br label %pregion_for_end.i.i.us.1 - -pregion_for_end.i.i.us.1: ; preds = %pregion_for_end.i.i.us.1.loopexit, %pregion_for_end.i.i.us - %80 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.2 = or i32 %80, 2 - %cmp.i.i.us.2 = icmp sgt i32 %17, %conv2.i.i.us.2 - %mul.i.i.us.2 = mul nsw i32 %21, %conv2.i.i.us.2 - %mul8.i.i.us.2 = mul nsw i32 %25, %conv2.i.i.us.2 - %81 = sext i32 %mul8.i.i.us.2 to i64 - br i1 %cmp.i.i.us.2, label %pregion_for_entry.entry.i.i.us.us.2.preheader, label %pregion_for_end.i.i.us.2 - -pregion_for_entry.entry.i.i.us.us.2.preheader: ; preds = %pregion_for_end.i.i.us.1 - br label %pregion_for_entry.entry.i.i.us.us.2 - -pregion_for_entry.entry.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.1, %pregion_for_entry.entry.i.i.us.us.2.preheader - %_local_id_x.i.0.us.us.2 = phi i64 [ %199, %if.end.i.i.us.us.2.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.2.preheader ] - %add1.i.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, %mul.i.i.i - %conv.i.i.us.us.2 = trunc i64 %add1.i.i.i.us.us.2 to i32 - %cmp4.i.i.us.us.2 = icmp sgt i32 %21, %conv.i.i.us.us.2 - br i1 %cmp4.i.i.us.us.2, label %if.then.i.i.us.us.2, label %if.end.i.i.us.us.2 - -if.then.i.i.us.us.2: ; preds = %pregion_for_entry.entry.i.i.us.us.2 - %add.i.i.us.us.2 = add nsw i32 %mul.i.i.us.2, %conv.i.i.us.us.2 - %idxprom.i.i.us.us.2 = sext i32 %add.i.i.us.us.2 to i64 - %arrayidx.i.i.us.us.2 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.2 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.2 = shl i64 %add1.i.i.i.us.us.2, 32 - %82 = ashr exact i64 %sext.i.i.us.us.2, 32 - br label %for.body.i.i.us.us.2 - -for.body.i.i.us.us.2: ; preds = %for.body.i.i.us.us.2, %if.then.i.i.us.us.2 - %indvars.iv.next.i.i3.us.us.2 = phi i64 [ %indvars.iv.next.i.i.us.us.2, %for.body.i.i.us.us.2 ], [ 0, %if.then.i.i.us.us.2 ] - %83 = phi float [ %89, %for.body.i.i.us.us.2 ], [ 0.000000e+00, %if.then.i.i.us.us.2 ] - %84 = add nsw i64 %indvars.iv.next.i.i3.us.us.2, %81 - %arrayidx11.i.i.us.us.2 = getelementptr inbounds float, float* %7, i64 %84 - %85 = load float, float* %arrayidx11.i.i.us.us.2, align 4, !tbaa !12 - %86 = mul nsw i64 %indvars.iv.next.i.i3.us.us.2, %26 - %87 = add nsw i64 %86, %82 - %arrayidx15.i.i.us.us.2 = getelementptr inbounds float, float* %10, i64 %87 - %88 = load float, float* %arrayidx15.i.i.us.us.2, align 4, !tbaa !12 - %89 = tail call float @llvm.fmuladd.f32(float %85, float %88, float %83) #2 - store float %89, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.2 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.2, 1 - %exitcond.not.i.i.us.us.2 = icmp eq i64 %indvars.iv.next.i.i.us.us.2, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.2, label %if.end.i.i.us.us.2.loopexit, label %for.body.i.i.us.us.2, !llvm.loop !19 - -if.end.i.i.us.us.2.loopexit: ; preds = %for.body.i.i.us.us.2 - br label %if.end.i.i.us.us.2 - -if.end.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.loopexit, %pregion_for_entry.entry.i.i.us.us.2 - %90 = or i64 %_local_id_x.i.0.us.us.2, 1 - %add1.i.i.i.us.us.2.1 = add nuw nsw i64 %90, %mul.i.i.i - %conv.i.i.us.us.2.1 = trunc i64 %add1.i.i.i.us.us.2.1 to i32 - %cmp4.i.i.us.us.2.1 = icmp sgt i32 %21, %conv.i.i.us.us.2.1 - br i1 %cmp4.i.i.us.us.2.1, label %if.then.i.i.us.us.2.1, label %if.end.i.i.us.us.2.1 - -pregion_for_end.i.i.us.2.loopexit: ; preds = %if.end.i.i.us.us.2.1 - br label %pregion_for_end.i.i.us.2 - -pregion_for_end.i.i.us.2: ; preds = %pregion_for_end.i.i.us.2.loopexit, %pregion_for_end.i.i.us.1 - %91 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.3 = or i32 %91, 3 - %cmp.i.i.us.3 = icmp sgt i32 %17, %conv2.i.i.us.3 - %mul.i.i.us.3 = mul nsw i32 %21, %conv2.i.i.us.3 - %mul8.i.i.us.3 = mul nsw i32 %25, %conv2.i.i.us.3 - %92 = sext i32 %mul8.i.i.us.3 to i64 - br i1 %cmp.i.i.us.3, label %pregion_for_entry.entry.i.i.us.us.3.preheader, label %pregion_for_end.i.i.us.3 - -pregion_for_entry.entry.i.i.us.us.3.preheader: ; preds = %pregion_for_end.i.i.us.2 - br label %pregion_for_entry.entry.i.i.us.us.3 - -pregion_for_entry.entry.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.1, %pregion_for_entry.entry.i.i.us.us.3.preheader - %_local_id_x.i.0.us.us.3 = phi i64 [ %190, %if.end.i.i.us.us.3.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.3.preheader ] - %add1.i.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, %mul.i.i.i - %conv.i.i.us.us.3 = trunc i64 %add1.i.i.i.us.us.3 to i32 - %cmp4.i.i.us.us.3 = icmp sgt i32 %21, %conv.i.i.us.us.3 - br i1 %cmp4.i.i.us.us.3, label %if.then.i.i.us.us.3, label %if.end.i.i.us.us.3 - -if.then.i.i.us.us.3: ; preds = %pregion_for_entry.entry.i.i.us.us.3 - %add.i.i.us.us.3 = add nsw i32 %mul.i.i.us.3, %conv.i.i.us.us.3 - %idxprom.i.i.us.us.3 = sext i32 %add.i.i.us.us.3 to i64 - %arrayidx.i.i.us.us.3 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.3 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.3 = shl i64 %add1.i.i.i.us.us.3, 32 - %93 = ashr exact i64 %sext.i.i.us.us.3, 32 - br label %for.body.i.i.us.us.3 - -for.body.i.i.us.us.3: ; preds = %for.body.i.i.us.us.3, %if.then.i.i.us.us.3 - %indvars.iv.next.i.i3.us.us.3 = phi i64 [ %indvars.iv.next.i.i.us.us.3, %for.body.i.i.us.us.3 ], [ 0, %if.then.i.i.us.us.3 ] - %94 = phi float [ %100, %for.body.i.i.us.us.3 ], [ 0.000000e+00, %if.then.i.i.us.us.3 ] - %95 = add nsw i64 %indvars.iv.next.i.i3.us.us.3, %92 - %arrayidx11.i.i.us.us.3 = getelementptr inbounds float, float* %7, i64 %95 - %96 = load float, float* %arrayidx11.i.i.us.us.3, align 4, !tbaa !12 - %97 = mul nsw i64 %indvars.iv.next.i.i3.us.us.3, %26 - %98 = add nsw i64 %97, %93 - %arrayidx15.i.i.us.us.3 = getelementptr inbounds float, float* %10, i64 %98 - %99 = load float, float* %arrayidx15.i.i.us.us.3, align 4, !tbaa !12 - %100 = tail call float @llvm.fmuladd.f32(float %96, float %99, float %94) #2 - store float %100, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.3 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.3, 1 - %exitcond.not.i.i.us.us.3 = icmp eq i64 %indvars.iv.next.i.i.us.us.3, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.3, label %if.end.i.i.us.us.3.loopexit, label %for.body.i.i.us.us.3, !llvm.loop !19 - -if.end.i.i.us.us.3.loopexit: ; preds = %for.body.i.i.us.us.3 - br label %if.end.i.i.us.us.3 - -if.end.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.loopexit, %pregion_for_entry.entry.i.i.us.us.3 - %101 = or i64 %_local_id_x.i.0.us.us.3, 1 - %add1.i.i.i.us.us.3.1 = add nuw nsw i64 %101, %mul.i.i.i - %conv.i.i.us.us.3.1 = trunc i64 %add1.i.i.i.us.us.3.1 to i32 - %cmp4.i.i.us.us.3.1 = icmp sgt i32 %21, %conv.i.i.us.us.3.1 - br i1 %cmp4.i.i.us.us.3.1, label %if.then.i.i.us.us.3.1, label %if.end.i.i.us.us.3.1 - -pregion_for_end.i.i.us.3.loopexit: ; preds = %if.end.i.i.us.us.3.1 - br label %pregion_for_end.i.i.us.3 - -pregion_for_end.i.i.us.3: ; preds = %pregion_for_end.i.i.us.3.loopexit, %pregion_for_end.i.i.us.2 - %102 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.4 = or i32 %102, 4 - %cmp.i.i.us.4 = icmp sgt i32 %17, %conv2.i.i.us.4 - %mul.i.i.us.4 = mul nsw i32 %21, %conv2.i.i.us.4 - %mul8.i.i.us.4 = mul nsw i32 %25, %conv2.i.i.us.4 - %103 = sext i32 %mul8.i.i.us.4 to i64 - br i1 %cmp.i.i.us.4, label %pregion_for_entry.entry.i.i.us.us.4.preheader, label %pregion_for_end.i.i.us.4 - -pregion_for_entry.entry.i.i.us.us.4.preheader: ; preds = %pregion_for_end.i.i.us.3 - br label %pregion_for_entry.entry.i.i.us.us.4 - -pregion_for_entry.entry.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.1, %pregion_for_entry.entry.i.i.us.us.4.preheader - %_local_id_x.i.0.us.us.4 = phi i64 [ %181, %if.end.i.i.us.us.4.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.4.preheader ] - %add1.i.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, %mul.i.i.i - %conv.i.i.us.us.4 = trunc i64 %add1.i.i.i.us.us.4 to i32 - %cmp4.i.i.us.us.4 = icmp sgt i32 %21, %conv.i.i.us.us.4 - br i1 %cmp4.i.i.us.us.4, label %if.then.i.i.us.us.4, label %if.end.i.i.us.us.4 - -if.then.i.i.us.us.4: ; preds = %pregion_for_entry.entry.i.i.us.us.4 - %add.i.i.us.us.4 = add nsw i32 %mul.i.i.us.4, %conv.i.i.us.us.4 - %idxprom.i.i.us.us.4 = sext i32 %add.i.i.us.us.4 to i64 - %arrayidx.i.i.us.us.4 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.4 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.4 = shl i64 %add1.i.i.i.us.us.4, 32 - %104 = ashr exact i64 %sext.i.i.us.us.4, 32 - br label %for.body.i.i.us.us.4 - -for.body.i.i.us.us.4: ; preds = %for.body.i.i.us.us.4, %if.then.i.i.us.us.4 - %indvars.iv.next.i.i3.us.us.4 = phi i64 [ %indvars.iv.next.i.i.us.us.4, %for.body.i.i.us.us.4 ], [ 0, %if.then.i.i.us.us.4 ] - %105 = phi float [ %111, %for.body.i.i.us.us.4 ], [ 0.000000e+00, %if.then.i.i.us.us.4 ] - %106 = add nsw i64 %indvars.iv.next.i.i3.us.us.4, %103 - %arrayidx11.i.i.us.us.4 = getelementptr inbounds float, float* %7, i64 %106 - %107 = load float, float* %arrayidx11.i.i.us.us.4, align 4, !tbaa !12 - %108 = mul nsw i64 %indvars.iv.next.i.i3.us.us.4, %26 - %109 = add nsw i64 %108, %104 - %arrayidx15.i.i.us.us.4 = getelementptr inbounds float, float* %10, i64 %109 - %110 = load float, float* %arrayidx15.i.i.us.us.4, align 4, !tbaa !12 - %111 = tail call float @llvm.fmuladd.f32(float %107, float %110, float %105) #2 - store float %111, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.4 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.4, 1 - %exitcond.not.i.i.us.us.4 = icmp eq i64 %indvars.iv.next.i.i.us.us.4, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.4, label %if.end.i.i.us.us.4.loopexit, label %for.body.i.i.us.us.4, !llvm.loop !19 - -if.end.i.i.us.us.4.loopexit: ; preds = %for.body.i.i.us.us.4 - br label %if.end.i.i.us.us.4 - -if.end.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.loopexit, %pregion_for_entry.entry.i.i.us.us.4 - %112 = or i64 %_local_id_x.i.0.us.us.4, 1 - %add1.i.i.i.us.us.4.1 = add nuw nsw i64 %112, %mul.i.i.i - %conv.i.i.us.us.4.1 = trunc i64 %add1.i.i.i.us.us.4.1 to i32 - %cmp4.i.i.us.us.4.1 = icmp sgt i32 %21, %conv.i.i.us.us.4.1 - br i1 %cmp4.i.i.us.us.4.1, label %if.then.i.i.us.us.4.1, label %if.end.i.i.us.us.4.1 - -pregion_for_end.i.i.us.4.loopexit: ; preds = %if.end.i.i.us.us.4.1 - br label %pregion_for_end.i.i.us.4 - -pregion_for_end.i.i.us.4: ; preds = %pregion_for_end.i.i.us.4.loopexit, %pregion_for_end.i.i.us.3 - %113 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.5 = or i32 %113, 5 - %cmp.i.i.us.5 = icmp sgt i32 %17, %conv2.i.i.us.5 - %mul.i.i.us.5 = mul nsw i32 %21, %conv2.i.i.us.5 - %mul8.i.i.us.5 = mul nsw i32 %25, %conv2.i.i.us.5 - %114 = sext i32 %mul8.i.i.us.5 to i64 - br i1 %cmp.i.i.us.5, label %pregion_for_entry.entry.i.i.us.us.5.preheader, label %pregion_for_end.i.i.us.5 - -pregion_for_entry.entry.i.i.us.us.5.preheader: ; preds = %pregion_for_end.i.i.us.4 - br label %pregion_for_entry.entry.i.i.us.us.5 - -pregion_for_entry.entry.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.1, %pregion_for_entry.entry.i.i.us.us.5.preheader - %_local_id_x.i.0.us.us.5 = phi i64 [ %172, %if.end.i.i.us.us.5.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.5.preheader ] - %add1.i.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, %mul.i.i.i - %conv.i.i.us.us.5 = trunc i64 %add1.i.i.i.us.us.5 to i32 - %cmp4.i.i.us.us.5 = icmp sgt i32 %21, %conv.i.i.us.us.5 - br i1 %cmp4.i.i.us.us.5, label %if.then.i.i.us.us.5, label %if.end.i.i.us.us.5 - -if.then.i.i.us.us.5: ; preds = %pregion_for_entry.entry.i.i.us.us.5 - %add.i.i.us.us.5 = add nsw i32 %mul.i.i.us.5, %conv.i.i.us.us.5 - %idxprom.i.i.us.us.5 = sext i32 %add.i.i.us.us.5 to i64 - %arrayidx.i.i.us.us.5 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.5 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.5 = shl i64 %add1.i.i.i.us.us.5, 32 - %115 = ashr exact i64 %sext.i.i.us.us.5, 32 - br label %for.body.i.i.us.us.5 - -for.body.i.i.us.us.5: ; preds = %for.body.i.i.us.us.5, %if.then.i.i.us.us.5 - %indvars.iv.next.i.i3.us.us.5 = phi i64 [ %indvars.iv.next.i.i.us.us.5, %for.body.i.i.us.us.5 ], [ 0, %if.then.i.i.us.us.5 ] - %116 = phi float [ %122, %for.body.i.i.us.us.5 ], [ 0.000000e+00, %if.then.i.i.us.us.5 ] - %117 = add nsw i64 %indvars.iv.next.i.i3.us.us.5, %114 - %arrayidx11.i.i.us.us.5 = getelementptr inbounds float, float* %7, i64 %117 - %118 = load float, float* %arrayidx11.i.i.us.us.5, align 4, !tbaa !12 - %119 = mul nsw i64 %indvars.iv.next.i.i3.us.us.5, %26 - %120 = add nsw i64 %119, %115 - %arrayidx15.i.i.us.us.5 = getelementptr inbounds float, float* %10, i64 %120 - %121 = load float, float* %arrayidx15.i.i.us.us.5, align 4, !tbaa !12 - %122 = tail call float @llvm.fmuladd.f32(float %118, float %121, float %116) #2 - store float %122, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.5 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.5, 1 - %exitcond.not.i.i.us.us.5 = icmp eq i64 %indvars.iv.next.i.i.us.us.5, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.5, label %if.end.i.i.us.us.5.loopexit, label %for.body.i.i.us.us.5, !llvm.loop !19 - -if.end.i.i.us.us.5.loopexit: ; preds = %for.body.i.i.us.us.5 - br label %if.end.i.i.us.us.5 - -if.end.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.loopexit, %pregion_for_entry.entry.i.i.us.us.5 - %123 = or i64 %_local_id_x.i.0.us.us.5, 1 - %add1.i.i.i.us.us.5.1 = add nuw nsw i64 %123, %mul.i.i.i - %conv.i.i.us.us.5.1 = trunc i64 %add1.i.i.i.us.us.5.1 to i32 - %cmp4.i.i.us.us.5.1 = icmp sgt i32 %21, %conv.i.i.us.us.5.1 - br i1 %cmp4.i.i.us.us.5.1, label %if.then.i.i.us.us.5.1, label %if.end.i.i.us.us.5.1 - -pregion_for_end.i.i.us.5.loopexit: ; preds = %if.end.i.i.us.us.5.1 - br label %pregion_for_end.i.i.us.5 - -pregion_for_end.i.i.us.5: ; preds = %pregion_for_end.i.i.us.5.loopexit, %pregion_for_end.i.i.us.4 - %124 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.6 = or i32 %124, 6 - %cmp.i.i.us.6 = icmp sgt i32 %17, %conv2.i.i.us.6 - %mul.i.i.us.6 = mul nsw i32 %21, %conv2.i.i.us.6 - %mul8.i.i.us.6 = mul nsw i32 %25, %conv2.i.i.us.6 - %125 = sext i32 %mul8.i.i.us.6 to i64 - br i1 %cmp.i.i.us.6, label %pregion_for_entry.entry.i.i.us.us.6.preheader, label %pregion_for_end.i.i.us.6 - -pregion_for_entry.entry.i.i.us.us.6.preheader: ; preds = %pregion_for_end.i.i.us.5 - br label %pregion_for_entry.entry.i.i.us.us.6 - -pregion_for_entry.entry.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.1, %pregion_for_entry.entry.i.i.us.us.6.preheader - %_local_id_x.i.0.us.us.6 = phi i64 [ %163, %if.end.i.i.us.us.6.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.6.preheader ] - %add1.i.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, %mul.i.i.i - %conv.i.i.us.us.6 = trunc i64 %add1.i.i.i.us.us.6 to i32 - %cmp4.i.i.us.us.6 = icmp sgt i32 %21, %conv.i.i.us.us.6 - br i1 %cmp4.i.i.us.us.6, label %if.then.i.i.us.us.6, label %if.end.i.i.us.us.6 - -if.then.i.i.us.us.6: ; preds = %pregion_for_entry.entry.i.i.us.us.6 - %add.i.i.us.us.6 = add nsw i32 %mul.i.i.us.6, %conv.i.i.us.us.6 - %idxprom.i.i.us.us.6 = sext i32 %add.i.i.us.us.6 to i64 - %arrayidx.i.i.us.us.6 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.6 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.6 = shl i64 %add1.i.i.i.us.us.6, 32 - %126 = ashr exact i64 %sext.i.i.us.us.6, 32 - br label %for.body.i.i.us.us.6 - -for.body.i.i.us.us.6: ; preds = %for.body.i.i.us.us.6, %if.then.i.i.us.us.6 - %indvars.iv.next.i.i3.us.us.6 = phi i64 [ %indvars.iv.next.i.i.us.us.6, %for.body.i.i.us.us.6 ], [ 0, %if.then.i.i.us.us.6 ] - %127 = phi float [ %133, %for.body.i.i.us.us.6 ], [ 0.000000e+00, %if.then.i.i.us.us.6 ] - %128 = add nsw i64 %indvars.iv.next.i.i3.us.us.6, %125 - %arrayidx11.i.i.us.us.6 = getelementptr inbounds float, float* %7, i64 %128 - %129 = load float, float* %arrayidx11.i.i.us.us.6, align 4, !tbaa !12 - %130 = mul nsw i64 %indvars.iv.next.i.i3.us.us.6, %26 - %131 = add nsw i64 %130, %126 - %arrayidx15.i.i.us.us.6 = getelementptr inbounds float, float* %10, i64 %131 - %132 = load float, float* %arrayidx15.i.i.us.us.6, align 4, !tbaa !12 - %133 = tail call float @llvm.fmuladd.f32(float %129, float %132, float %127) #2 - store float %133, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.6 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.6, 1 - %exitcond.not.i.i.us.us.6 = icmp eq i64 %indvars.iv.next.i.i.us.us.6, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.6, label %if.end.i.i.us.us.6.loopexit, label %for.body.i.i.us.us.6, !llvm.loop !19 - -if.end.i.i.us.us.6.loopexit: ; preds = %for.body.i.i.us.us.6 - br label %if.end.i.i.us.us.6 - -if.end.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.loopexit, %pregion_for_entry.entry.i.i.us.us.6 - %134 = or i64 %_local_id_x.i.0.us.us.6, 1 - %add1.i.i.i.us.us.6.1 = add nuw nsw i64 %134, %mul.i.i.i - %conv.i.i.us.us.6.1 = trunc i64 %add1.i.i.i.us.us.6.1 to i32 - %cmp4.i.i.us.us.6.1 = icmp sgt i32 %21, %conv.i.i.us.us.6.1 - br i1 %cmp4.i.i.us.us.6.1, label %if.then.i.i.us.us.6.1, label %if.end.i.i.us.us.6.1 - -pregion_for_end.i.i.us.6.loopexit: ; preds = %if.end.i.i.us.us.6.1 - br label %pregion_for_end.i.i.us.6 - -pregion_for_end.i.i.us.6: ; preds = %pregion_for_end.i.i.us.6.loopexit, %pregion_for_end.i.i.us.5 - %135 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.7 = or i32 %135, 7 - %cmp.i.i.us.7 = icmp sgt i32 %17, %conv2.i.i.us.7 - %mul.i.i.us.7 = mul nsw i32 %21, %conv2.i.i.us.7 - %mul8.i.i.us.7 = mul nsw i32 %25, %conv2.i.i.us.7 - %136 = sext i32 %mul8.i.i.us.7 to i64 - br i1 %cmp.i.i.us.7, label %pregion_for_entry.entry.i.i.us.us.7.preheader, label %_pocl_kernel_mm3_kernel1.exit - -pregion_for_entry.entry.i.i.us.us.7.preheader: ; preds = %pregion_for_end.i.i.us.6 - br label %pregion_for_entry.entry.i.i.us.us.7 - -pregion_for_entry.entry.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.1, %pregion_for_entry.entry.i.i.us.us.7.preheader - %_local_id_x.i.0.us.us.7 = phi i64 [ %154, %if.end.i.i.us.us.7.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.7.preheader ] - %add1.i.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, %mul.i.i.i - %conv.i.i.us.us.7 = trunc i64 %add1.i.i.i.us.us.7 to i32 - %cmp4.i.i.us.us.7 = icmp sgt i32 %21, %conv.i.i.us.us.7 - br i1 %cmp4.i.i.us.us.7, label %if.then.i.i.us.us.7, label %if.end.i.i.us.us.7 - -if.then.i.i.us.us.7: ; preds = %pregion_for_entry.entry.i.i.us.us.7 - %add.i.i.us.us.7 = add nsw i32 %mul.i.i.us.7, %conv.i.i.us.us.7 - %idxprom.i.i.us.us.7 = sext i32 %add.i.i.us.us.7 to i64 - %arrayidx.i.i.us.us.7 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.7 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.7 = shl i64 %add1.i.i.i.us.us.7, 32 - %137 = ashr exact i64 %sext.i.i.us.us.7, 32 - br label %for.body.i.i.us.us.7 - -for.body.i.i.us.us.7: ; preds = %for.body.i.i.us.us.7, %if.then.i.i.us.us.7 - %indvars.iv.next.i.i3.us.us.7 = phi i64 [ %indvars.iv.next.i.i.us.us.7, %for.body.i.i.us.us.7 ], [ 0, %if.then.i.i.us.us.7 ] - %138 = phi float [ %144, %for.body.i.i.us.us.7 ], [ 0.000000e+00, %if.then.i.i.us.us.7 ] - %139 = add nsw i64 %indvars.iv.next.i.i3.us.us.7, %136 - %arrayidx11.i.i.us.us.7 = getelementptr inbounds float, float* %7, i64 %139 - %140 = load float, float* %arrayidx11.i.i.us.us.7, align 4, !tbaa !12 - %141 = mul nsw i64 %indvars.iv.next.i.i3.us.us.7, %26 - %142 = add nsw i64 %141, %137 - %arrayidx15.i.i.us.us.7 = getelementptr inbounds float, float* %10, i64 %142 - %143 = load float, float* %arrayidx15.i.i.us.us.7, align 4, !tbaa !12 - %144 = tail call float @llvm.fmuladd.f32(float %140, float %143, float %138) #2 - store float %144, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.7 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.7, 1 - %exitcond.not.i.i.us.us.7 = icmp eq i64 %indvars.iv.next.i.i.us.us.7, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.7, label %if.end.i.i.us.us.7.loopexit, label %for.body.i.i.us.us.7, !llvm.loop !19 - -if.end.i.i.us.us.7.loopexit: ; preds = %for.body.i.i.us.us.7 - br label %if.end.i.i.us.us.7 - -if.end.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.loopexit, %pregion_for_entry.entry.i.i.us.us.7 - %145 = or i64 %_local_id_x.i.0.us.us.7, 1 - %add1.i.i.i.us.us.7.1 = add nuw nsw i64 %145, %mul.i.i.i - %conv.i.i.us.us.7.1 = trunc i64 %add1.i.i.i.us.us.7.1 to i32 - %cmp4.i.i.us.us.7.1 = icmp sgt i32 %21, %conv.i.i.us.us.7.1 - br i1 %cmp4.i.i.us.us.7.1, label %if.then.i.i.us.us.7.1, label %if.end.i.i.us.us.7.1 - -if.then.i.i.us.1: ; preds = %if.end.i.i.us - %add.i.i.us.1 = add nsw i32 %mul.i.i, %conv.i.i.us.1 - %idxprom.i.i.us.1 = sext i32 %add.i.i.us.1 to i64 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1 - -if.end.i.i.us.1: ; preds = %if.then.i.i.us.1, %if.end.i.i.us - br i1 %cmp4.i.i.us.2, label %if.then.i.i.us.2, label %if.end.i.i.us.2 - -if.then.i.i.us.2: ; preds = %if.end.i.i.us.1 - %add.i.i.us.2 = add nsw i32 %mul.i.i, %conv.i.i.us.2 - %idxprom.i.i.us.2 = sext i32 %add.i.i.us.2 to i64 - %arrayidx.i.i.us.2 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.2 - store float 0.000000e+00, float* %arrayidx.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2 - -if.end.i.i.us.2: ; preds = %if.then.i.i.us.2, %if.end.i.i.us.1 - br i1 %cmp4.i.i.us.3, label %if.then.i.i.us.3, label %if.end.i.i.us.3 - -if.then.i.i.us.3: ; preds = %if.end.i.i.us.2 - %add.i.i.us.3 = add nsw i32 %mul.i.i, %conv.i.i.us.3 - %idxprom.i.i.us.3 = sext i32 %add.i.i.us.3 to i64 - %arrayidx.i.i.us.3 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.3 - store float 0.000000e+00, float* %arrayidx.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3 - -if.end.i.i.us.3: ; preds = %if.then.i.i.us.3, %if.end.i.i.us.2 - br i1 %cmp4.i.i.us.4, label %if.then.i.i.us.4, label %if.end.i.i.us.4 - -if.then.i.i.us.4: ; preds = %if.end.i.i.us.3 - %add.i.i.us.4 = add nsw i32 %mul.i.i, %conv.i.i.us.4 - %idxprom.i.i.us.4 = sext i32 %add.i.i.us.4 to i64 - %arrayidx.i.i.us.4 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.4 - store float 0.000000e+00, float* %arrayidx.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4 - -if.end.i.i.us.4: ; preds = %if.then.i.i.us.4, %if.end.i.i.us.3 - br i1 %cmp4.i.i.us.5, label %if.then.i.i.us.5, label %if.end.i.i.us.5 - -if.then.i.i.us.5: ; preds = %if.end.i.i.us.4 - %add.i.i.us.5 = add nsw i32 %mul.i.i, %conv.i.i.us.5 - %idxprom.i.i.us.5 = sext i32 %add.i.i.us.5 to i64 - %arrayidx.i.i.us.5 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.5 - store float 0.000000e+00, float* %arrayidx.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5 - -if.end.i.i.us.5: ; preds = %if.then.i.i.us.5, %if.end.i.i.us.4 - br i1 %cmp4.i.i.us.6, label %if.then.i.i.us.6, label %if.end.i.i.us.6 - -if.then.i.i.us.6: ; preds = %if.end.i.i.us.5 - %add.i.i.us.6 = add nsw i32 %mul.i.i, %conv.i.i.us.6 - %idxprom.i.i.us.6 = sext i32 %add.i.i.us.6 to i64 - %arrayidx.i.i.us.6 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.6 - store float 0.000000e+00, float* %arrayidx.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6 - -if.end.i.i.us.6: ; preds = %if.then.i.i.us.6, %if.end.i.i.us.5 - br i1 %cmp4.i.i.us.7, label %if.then.i.i.us.7, label %if.end.i.i.us.7 - -if.then.i.i.us.7: ; preds = %if.end.i.i.us.6 - %add.i.i.us.7 = add nsw i32 %mul.i.i, %conv.i.i.us.7 - %idxprom.i.i.us.7 = sext i32 %add.i.i.us.7 to i64 - %arrayidx.i.i.us.7 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.7 - store float 0.000000e+00, float* %arrayidx.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7 - -if.end.i.i.us.7: ; preds = %if.then.i.i.us.7, %if.end.i.i.us.6 - br i1 %cmp4.i.i.us.8, label %if.then.i.i.us.8, label %if.end.i.i.us.8 - -if.then.i.i.us.8: ; preds = %if.end.i.i.us.7 - %add.i.i.us.8 = add nsw i32 %mul.i.i, %conv.i.i.us.8 - %idxprom.i.i.us.8 = sext i32 %add.i.i.us.8 to i64 - %arrayidx.i.i.us.8 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.8 - store float 0.000000e+00, float* %arrayidx.i.i.us.8, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.8 - -if.end.i.i.us.8: ; preds = %if.then.i.i.us.8, %if.end.i.i.us.7 - br i1 %cmp4.i.i.us.9, label %if.then.i.i.us.9, label %if.end.i.i.us.9 - -if.then.i.i.us.9: ; preds = %if.end.i.i.us.8 - %add.i.i.us.9 = add nsw i32 %mul.i.i, %conv.i.i.us.9 - %idxprom.i.i.us.9 = sext i32 %add.i.i.us.9 to i64 - %arrayidx.i.i.us.9 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.9 - store float 0.000000e+00, float* %arrayidx.i.i.us.9, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.9 - -if.end.i.i.us.9: ; preds = %if.then.i.i.us.9, %if.end.i.i.us.8 - br i1 %cmp4.i.i.us.10, label %if.then.i.i.us.10, label %if.end.i.i.us.10 - -if.then.i.i.us.10: ; preds = %if.end.i.i.us.9 - %add.i.i.us.10 = add nsw i32 %mul.i.i, %conv.i.i.us.10 - %idxprom.i.i.us.10 = sext i32 %add.i.i.us.10 to i64 - %arrayidx.i.i.us.10 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.10 - store float 0.000000e+00, float* %arrayidx.i.i.us.10, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.10 - -if.end.i.i.us.10: ; preds = %if.then.i.i.us.10, %if.end.i.i.us.9 - br i1 %cmp4.i.i.us.11, label %if.then.i.i.us.11, label %if.end.i.i.us.11 - -if.then.i.i.us.11: ; preds = %if.end.i.i.us.10 - %add.i.i.us.11 = add nsw i32 %mul.i.i, %conv.i.i.us.11 - %idxprom.i.i.us.11 = sext i32 %add.i.i.us.11 to i64 - %arrayidx.i.i.us.11 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.11 - store float 0.000000e+00, float* %arrayidx.i.i.us.11, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.11 - -if.end.i.i.us.11: ; preds = %if.then.i.i.us.11, %if.end.i.i.us.10 - br i1 %cmp4.i.i.us.12, label %if.then.i.i.us.12, label %if.end.i.i.us.12 - -if.then.i.i.us.12: ; preds = %if.end.i.i.us.11 - %add.i.i.us.12 = add nsw i32 %mul.i.i, %conv.i.i.us.12 - %idxprom.i.i.us.12 = sext i32 %add.i.i.us.12 to i64 - %arrayidx.i.i.us.12 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.12 - store float 0.000000e+00, float* %arrayidx.i.i.us.12, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.12 - -if.end.i.i.us.12: ; preds = %if.then.i.i.us.12, %if.end.i.i.us.11 - br i1 %cmp4.i.i.us.13, label %if.then.i.i.us.13, label %if.end.i.i.us.13 - -if.then.i.i.us.13: ; preds = %if.end.i.i.us.12 - %add.i.i.us.13 = add nsw i32 %mul.i.i, %conv.i.i.us.13 - %idxprom.i.i.us.13 = sext i32 %add.i.i.us.13 to i64 - %arrayidx.i.i.us.13 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.13 - store float 0.000000e+00, float* %arrayidx.i.i.us.13, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.13 - -if.end.i.i.us.13: ; preds = %if.then.i.i.us.13, %if.end.i.i.us.12 - br i1 %cmp4.i.i.us.14, label %if.then.i.i.us.14, label %if.end.i.i.us.14 - -if.then.i.i.us.14: ; preds = %if.end.i.i.us.13 - %add.i.i.us.14 = add nsw i32 %mul.i.i, %conv.i.i.us.14 - %idxprom.i.i.us.14 = sext i32 %add.i.i.us.14 to i64 - %arrayidx.i.i.us.14 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.14 - store float 0.000000e+00, float* %arrayidx.i.i.us.14, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.14 - -if.end.i.i.us.14: ; preds = %if.then.i.i.us.14, %if.end.i.i.us.13 - br i1 %cmp4.i.i.us.15, label %if.then.i.i.us.15, label %if.end.i.i.us.15 - -if.then.i.i.us.15: ; preds = %if.end.i.i.us.14 - %add.i.i.us.15 = add nsw i32 %mul.i.i, %conv.i.i.us.15 - %idxprom.i.i.us.15 = sext i32 %add.i.i.us.15 to i64 - %arrayidx.i.i.us.15 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.15 - store float 0.000000e+00, float* %arrayidx.i.i.us.15, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.15 - -if.end.i.i.us.15: ; preds = %if.then.i.i.us.15, %if.end.i.i.us.14 - br i1 %cmp4.i.i.us.16, label %if.then.i.i.us.16, label %if.end.i.i.us.16 - -if.then.i.i.us.16: ; preds = %if.end.i.i.us.15 - %add.i.i.us.16 = add nsw i32 %mul.i.i, %conv.i.i.us.16 - %idxprom.i.i.us.16 = sext i32 %add.i.i.us.16 to i64 - %arrayidx.i.i.us.16 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.16 - store float 0.000000e+00, float* %arrayidx.i.i.us.16, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.16 - -if.end.i.i.us.16: ; preds = %if.then.i.i.us.16, %if.end.i.i.us.15 - br i1 %cmp4.i.i.us.17, label %if.then.i.i.us.17, label %if.end.i.i.us.17 - -if.then.i.i.us.17: ; preds = %if.end.i.i.us.16 - %add.i.i.us.17 = add nsw i32 %mul.i.i, %conv.i.i.us.17 - %idxprom.i.i.us.17 = sext i32 %add.i.i.us.17 to i64 - %arrayidx.i.i.us.17 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.17 - store float 0.000000e+00, float* %arrayidx.i.i.us.17, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.17 - -if.end.i.i.us.17: ; preds = %if.then.i.i.us.17, %if.end.i.i.us.16 - br i1 %cmp4.i.i.us.18, label %if.then.i.i.us.18, label %if.end.i.i.us.18 - -if.then.i.i.us.18: ; preds = %if.end.i.i.us.17 - %add.i.i.us.18 = add nsw i32 %mul.i.i, %conv.i.i.us.18 - %idxprom.i.i.us.18 = sext i32 %add.i.i.us.18 to i64 - %arrayidx.i.i.us.18 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.18 - store float 0.000000e+00, float* %arrayidx.i.i.us.18, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.18 - -if.end.i.i.us.18: ; preds = %if.then.i.i.us.18, %if.end.i.i.us.17 - br i1 %cmp4.i.i.us.19, label %if.then.i.i.us.19, label %if.end.i.i.us.19 - -if.then.i.i.us.19: ; preds = %if.end.i.i.us.18 - %add.i.i.us.19 = add nsw i32 %mul.i.i, %conv.i.i.us.19 - %idxprom.i.i.us.19 = sext i32 %add.i.i.us.19 to i64 - %arrayidx.i.i.us.19 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.19 - store float 0.000000e+00, float* %arrayidx.i.i.us.19, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.19 - -if.end.i.i.us.19: ; preds = %if.then.i.i.us.19, %if.end.i.i.us.18 - br i1 %cmp4.i.i.us.20, label %if.then.i.i.us.20, label %if.end.i.i.us.20 - -if.then.i.i.us.20: ; preds = %if.end.i.i.us.19 - %add.i.i.us.20 = add nsw i32 %mul.i.i, %conv.i.i.us.20 - %idxprom.i.i.us.20 = sext i32 %add.i.i.us.20 to i64 - %arrayidx.i.i.us.20 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.20 - store float 0.000000e+00, float* %arrayidx.i.i.us.20, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.20 - -if.end.i.i.us.20: ; preds = %if.then.i.i.us.20, %if.end.i.i.us.19 - br i1 %cmp4.i.i.us.21, label %if.then.i.i.us.21, label %if.end.i.i.us.21 - -if.then.i.i.us.21: ; preds = %if.end.i.i.us.20 - %add.i.i.us.21 = add nsw i32 %mul.i.i, %conv.i.i.us.21 - %idxprom.i.i.us.21 = sext i32 %add.i.i.us.21 to i64 - %arrayidx.i.i.us.21 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.21 - store float 0.000000e+00, float* %arrayidx.i.i.us.21, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.21 - -if.end.i.i.us.21: ; preds = %if.then.i.i.us.21, %if.end.i.i.us.20 - br i1 %cmp4.i.i.us.22, label %if.then.i.i.us.22, label %if.end.i.i.us.22 - -if.then.i.i.us.22: ; preds = %if.end.i.i.us.21 - %add.i.i.us.22 = add nsw i32 %mul.i.i, %conv.i.i.us.22 - %idxprom.i.i.us.22 = sext i32 %add.i.i.us.22 to i64 - %arrayidx.i.i.us.22 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.22 - store float 0.000000e+00, float* %arrayidx.i.i.us.22, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.22 - -if.end.i.i.us.22: ; preds = %if.then.i.i.us.22, %if.end.i.i.us.21 - br i1 %cmp4.i.i.us.23, label %if.then.i.i.us.23, label %if.end.i.i.us.23 - -if.then.i.i.us.23: ; preds = %if.end.i.i.us.22 - %add.i.i.us.23 = add nsw i32 %mul.i.i, %conv.i.i.us.23 - %idxprom.i.i.us.23 = sext i32 %add.i.i.us.23 to i64 - %arrayidx.i.i.us.23 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.23 - store float 0.000000e+00, float* %arrayidx.i.i.us.23, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.23 - -if.end.i.i.us.23: ; preds = %if.then.i.i.us.23, %if.end.i.i.us.22 - br i1 %cmp4.i.i.us.24, label %if.then.i.i.us.24, label %if.end.i.i.us.24 - -if.then.i.i.us.24: ; preds = %if.end.i.i.us.23 - %add.i.i.us.24 = add nsw i32 %mul.i.i, %conv.i.i.us.24 - %idxprom.i.i.us.24 = sext i32 %add.i.i.us.24 to i64 - %arrayidx.i.i.us.24 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.24 - store float 0.000000e+00, float* %arrayidx.i.i.us.24, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.24 - -if.end.i.i.us.24: ; preds = %if.then.i.i.us.24, %if.end.i.i.us.23 - br i1 %cmp4.i.i.us.25, label %if.then.i.i.us.25, label %if.end.i.i.us.25 - -if.then.i.i.us.25: ; preds = %if.end.i.i.us.24 - %add.i.i.us.25 = add nsw i32 %mul.i.i, %conv.i.i.us.25 - %idxprom.i.i.us.25 = sext i32 %add.i.i.us.25 to i64 - %arrayidx.i.i.us.25 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.25 - store float 0.000000e+00, float* %arrayidx.i.i.us.25, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.25 - -if.end.i.i.us.25: ; preds = %if.then.i.i.us.25, %if.end.i.i.us.24 - br i1 %cmp4.i.i.us.26, label %if.then.i.i.us.26, label %if.end.i.i.us.26 - -if.then.i.i.us.26: ; preds = %if.end.i.i.us.25 - %add.i.i.us.26 = add nsw i32 %mul.i.i, %conv.i.i.us.26 - %idxprom.i.i.us.26 = sext i32 %add.i.i.us.26 to i64 - %arrayidx.i.i.us.26 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.26 - store float 0.000000e+00, float* %arrayidx.i.i.us.26, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.26 - -if.end.i.i.us.26: ; preds = %if.then.i.i.us.26, %if.end.i.i.us.25 - br i1 %cmp4.i.i.us.27, label %if.then.i.i.us.27, label %if.end.i.i.us.27 - -if.then.i.i.us.27: ; preds = %if.end.i.i.us.26 - %add.i.i.us.27 = add nsw i32 %mul.i.i, %conv.i.i.us.27 - %idxprom.i.i.us.27 = sext i32 %add.i.i.us.27 to i64 - %arrayidx.i.i.us.27 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.27 - store float 0.000000e+00, float* %arrayidx.i.i.us.27, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.27 - -if.end.i.i.us.27: ; preds = %if.then.i.i.us.27, %if.end.i.i.us.26 - br i1 %cmp4.i.i.us.28, label %if.then.i.i.us.28, label %if.end.i.i.us.28 - -if.then.i.i.us.28: ; preds = %if.end.i.i.us.27 - %add.i.i.us.28 = add nsw i32 %mul.i.i, %conv.i.i.us.28 - %idxprom.i.i.us.28 = sext i32 %add.i.i.us.28 to i64 - %arrayidx.i.i.us.28 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.28 - store float 0.000000e+00, float* %arrayidx.i.i.us.28, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.28 - -if.end.i.i.us.28: ; preds = %if.then.i.i.us.28, %if.end.i.i.us.27 - br i1 %cmp4.i.i.us.29, label %if.then.i.i.us.29, label %if.end.i.i.us.29 - -if.then.i.i.us.29: ; preds = %if.end.i.i.us.28 - %add.i.i.us.29 = add nsw i32 %mul.i.i, %conv.i.i.us.29 - %idxprom.i.i.us.29 = sext i32 %add.i.i.us.29 to i64 - %arrayidx.i.i.us.29 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.29 - store float 0.000000e+00, float* %arrayidx.i.i.us.29, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.29 - -if.end.i.i.us.29: ; preds = %if.then.i.i.us.29, %if.end.i.i.us.28 - br i1 %cmp4.i.i.us.30, label %if.then.i.i.us.30, label %if.end.i.i.us.30 - -if.then.i.i.us.30: ; preds = %if.end.i.i.us.29 - %add.i.i.us.30 = add nsw i32 %mul.i.i, %conv.i.i.us.30 - %idxprom.i.i.us.30 = sext i32 %add.i.i.us.30 to i64 - %arrayidx.i.i.us.30 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.30 - store float 0.000000e+00, float* %arrayidx.i.i.us.30, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.30 - -if.end.i.i.us.30: ; preds = %if.then.i.i.us.30, %if.end.i.i.us.29 - br i1 %cmp4.i.i.us.31, label %if.then.i.i.us.31, label %pregion_for_end.i.i - -if.then.i.i.us.31: ; preds = %if.end.i.i.us.30 - %add.i.i.us.31 = add nsw i32 %mul.i.i, %conv.i.i.us.31 - %idxprom.i.i.us.31 = sext i32 %add.i.i.us.31 to i64 - %arrayidx.i.i.us.31 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.31 - store float 0.000000e+00, float* %arrayidx.i.i.us.31, align 4, !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i - -if.then.i.i.us.us.7.1: ; preds = %if.end.i.i.us.us.7 - %add.i.i.us.us.7.1 = add nsw i32 %mul.i.i.us.7, %conv.i.i.us.us.7.1 - %idxprom.i.i.us.us.7.1 = sext i32 %add.i.i.us.us.7.1 to i64 - %arrayidx.i.i.us.us.7.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.7.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.7.1 = shl i64 %add1.i.i.i.us.us.7.1, 32 - %146 = ashr exact i64 %sext.i.i.us.us.7.1, 32 - br label %for.body.i.i.us.us.7.1 - -for.body.i.i.us.us.7.1: ; preds = %for.body.i.i.us.us.7.1, %if.then.i.i.us.us.7.1 - %indvars.iv.next.i.i3.us.us.7.1 = phi i64 [ %indvars.iv.next.i.i.us.us.7.1, %for.body.i.i.us.us.7.1 ], [ 0, %if.then.i.i.us.us.7.1 ] - %147 = phi float [ %153, %for.body.i.i.us.us.7.1 ], [ 0.000000e+00, %if.then.i.i.us.us.7.1 ] - %148 = add nsw i64 %indvars.iv.next.i.i3.us.us.7.1, %136 - %arrayidx11.i.i.us.us.7.1 = getelementptr inbounds float, float* %7, i64 %148 - %149 = load float, float* %arrayidx11.i.i.us.us.7.1, align 4, !tbaa !12 - %150 = mul nsw i64 %indvars.iv.next.i.i3.us.us.7.1, %26 - %151 = add nsw i64 %150, %146 - %arrayidx15.i.i.us.us.7.1 = getelementptr inbounds float, float* %10, i64 %151 - %152 = load float, float* %arrayidx15.i.i.us.us.7.1, align 4, !tbaa !12 - %153 = tail call float @llvm.fmuladd.f32(float %149, float %152, float %147) #2 - store float %153, float* %arrayidx.i.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.7.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.7.1, 1 - %exitcond.not.i.i.us.us.7.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.7.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.7.1, label %if.end.i.i.us.us.7.1.loopexit, label %for.body.i.i.us.us.7.1, !llvm.loop !19 - -if.end.i.i.us.us.7.1.loopexit: ; preds = %for.body.i.i.us.us.7.1 - br label %if.end.i.i.us.us.7.1 - -if.end.i.i.us.us.7.1: ; preds = %if.end.i.i.us.us.7.1.loopexit, %if.end.i.i.us.us.7 - %154 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %154, 32 - br i1 %exitcond.7.not.1, label %_pocl_kernel_mm3_kernel1.exit.loopexit, label %pregion_for_entry.entry.i.i.us.us.7, !llvm.loop !23 - -if.then.i.i.us.us.6.1: ; preds = %if.end.i.i.us.us.6 - %add.i.i.us.us.6.1 = add nsw i32 %mul.i.i.us.6, %conv.i.i.us.us.6.1 - %idxprom.i.i.us.us.6.1 = sext i32 %add.i.i.us.us.6.1 to i64 - %arrayidx.i.i.us.us.6.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.6.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.6.1 = shl i64 %add1.i.i.i.us.us.6.1, 32 - %155 = ashr exact i64 %sext.i.i.us.us.6.1, 32 - br label %for.body.i.i.us.us.6.1 - -for.body.i.i.us.us.6.1: ; preds = %for.body.i.i.us.us.6.1, %if.then.i.i.us.us.6.1 - %indvars.iv.next.i.i3.us.us.6.1 = phi i64 [ %indvars.iv.next.i.i.us.us.6.1, %for.body.i.i.us.us.6.1 ], [ 0, %if.then.i.i.us.us.6.1 ] - %156 = phi float [ %162, %for.body.i.i.us.us.6.1 ], [ 0.000000e+00, %if.then.i.i.us.us.6.1 ] - %157 = add nsw i64 %indvars.iv.next.i.i3.us.us.6.1, %125 - %arrayidx11.i.i.us.us.6.1 = getelementptr inbounds float, float* %7, i64 %157 - %158 = load float, float* %arrayidx11.i.i.us.us.6.1, align 4, !tbaa !12 - %159 = mul nsw i64 %indvars.iv.next.i.i3.us.us.6.1, %26 - %160 = add nsw i64 %159, %155 - %arrayidx15.i.i.us.us.6.1 = getelementptr inbounds float, float* %10, i64 %160 - %161 = load float, float* %arrayidx15.i.i.us.us.6.1, align 4, !tbaa !12 - %162 = tail call float @llvm.fmuladd.f32(float %158, float %161, float %156) #2 - store float %162, float* %arrayidx.i.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.6.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.6.1, 1 - %exitcond.not.i.i.us.us.6.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.6.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.6.1, label %if.end.i.i.us.us.6.1.loopexit, label %for.body.i.i.us.us.6.1, !llvm.loop !19 - -if.end.i.i.us.us.6.1.loopexit: ; preds = %for.body.i.i.us.us.6.1 - br label %if.end.i.i.us.us.6.1 - -if.end.i.i.us.us.6.1: ; preds = %if.end.i.i.us.us.6.1.loopexit, %if.end.i.i.us.us.6 - %163 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %163, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.i.us.6.loopexit, label %pregion_for_entry.entry.i.i.us.us.6, !llvm.loop !23 - -if.then.i.i.us.us.5.1: ; preds = %if.end.i.i.us.us.5 - %add.i.i.us.us.5.1 = add nsw i32 %mul.i.i.us.5, %conv.i.i.us.us.5.1 - %idxprom.i.i.us.us.5.1 = sext i32 %add.i.i.us.us.5.1 to i64 - %arrayidx.i.i.us.us.5.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.5.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.5.1 = shl i64 %add1.i.i.i.us.us.5.1, 32 - %164 = ashr exact i64 %sext.i.i.us.us.5.1, 32 - br label %for.body.i.i.us.us.5.1 - -for.body.i.i.us.us.5.1: ; preds = %for.body.i.i.us.us.5.1, %if.then.i.i.us.us.5.1 - %indvars.iv.next.i.i3.us.us.5.1 = phi i64 [ %indvars.iv.next.i.i.us.us.5.1, %for.body.i.i.us.us.5.1 ], [ 0, %if.then.i.i.us.us.5.1 ] - %165 = phi float [ %171, %for.body.i.i.us.us.5.1 ], [ 0.000000e+00, %if.then.i.i.us.us.5.1 ] - %166 = add nsw i64 %indvars.iv.next.i.i3.us.us.5.1, %114 - %arrayidx11.i.i.us.us.5.1 = getelementptr inbounds float, float* %7, i64 %166 - %167 = load float, float* %arrayidx11.i.i.us.us.5.1, align 4, !tbaa !12 - %168 = mul nsw i64 %indvars.iv.next.i.i3.us.us.5.1, %26 - %169 = add nsw i64 %168, %164 - %arrayidx15.i.i.us.us.5.1 = getelementptr inbounds float, float* %10, i64 %169 - %170 = load float, float* %arrayidx15.i.i.us.us.5.1, align 4, !tbaa !12 - %171 = tail call float @llvm.fmuladd.f32(float %167, float %170, float %165) #2 - store float %171, float* %arrayidx.i.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.5.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.5.1, 1 - %exitcond.not.i.i.us.us.5.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.5.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.5.1, label %if.end.i.i.us.us.5.1.loopexit, label %for.body.i.i.us.us.5.1, !llvm.loop !19 - -if.end.i.i.us.us.5.1.loopexit: ; preds = %for.body.i.i.us.us.5.1 - br label %if.end.i.i.us.us.5.1 - -if.end.i.i.us.us.5.1: ; preds = %if.end.i.i.us.us.5.1.loopexit, %if.end.i.i.us.us.5 - %172 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %172, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.i.us.5.loopexit, label %pregion_for_entry.entry.i.i.us.us.5, !llvm.loop !23 - -if.then.i.i.us.us.4.1: ; preds = %if.end.i.i.us.us.4 - %add.i.i.us.us.4.1 = add nsw i32 %mul.i.i.us.4, %conv.i.i.us.us.4.1 - %idxprom.i.i.us.us.4.1 = sext i32 %add.i.i.us.us.4.1 to i64 - %arrayidx.i.i.us.us.4.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.4.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.4.1 = shl i64 %add1.i.i.i.us.us.4.1, 32 - %173 = ashr exact i64 %sext.i.i.us.us.4.1, 32 - br label %for.body.i.i.us.us.4.1 - -for.body.i.i.us.us.4.1: ; preds = %for.body.i.i.us.us.4.1, %if.then.i.i.us.us.4.1 - %indvars.iv.next.i.i3.us.us.4.1 = phi i64 [ %indvars.iv.next.i.i.us.us.4.1, %for.body.i.i.us.us.4.1 ], [ 0, %if.then.i.i.us.us.4.1 ] - %174 = phi float [ %180, %for.body.i.i.us.us.4.1 ], [ 0.000000e+00, %if.then.i.i.us.us.4.1 ] - %175 = add nsw i64 %indvars.iv.next.i.i3.us.us.4.1, %103 - %arrayidx11.i.i.us.us.4.1 = getelementptr inbounds float, float* %7, i64 %175 - %176 = load float, float* %arrayidx11.i.i.us.us.4.1, align 4, !tbaa !12 - %177 = mul nsw i64 %indvars.iv.next.i.i3.us.us.4.1, %26 - %178 = add nsw i64 %177, %173 - %arrayidx15.i.i.us.us.4.1 = getelementptr inbounds float, float* %10, i64 %178 - %179 = load float, float* %arrayidx15.i.i.us.us.4.1, align 4, !tbaa !12 - %180 = tail call float @llvm.fmuladd.f32(float %176, float %179, float %174) #2 - store float %180, float* %arrayidx.i.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.4.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.4.1, 1 - %exitcond.not.i.i.us.us.4.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.4.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.4.1, label %if.end.i.i.us.us.4.1.loopexit, label %for.body.i.i.us.us.4.1, !llvm.loop !19 - -if.end.i.i.us.us.4.1.loopexit: ; preds = %for.body.i.i.us.us.4.1 - br label %if.end.i.i.us.us.4.1 - -if.end.i.i.us.us.4.1: ; preds = %if.end.i.i.us.us.4.1.loopexit, %if.end.i.i.us.us.4 - %181 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %181, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.i.us.4.loopexit, label %pregion_for_entry.entry.i.i.us.us.4, !llvm.loop !23 - -if.then.i.i.us.us.3.1: ; preds = %if.end.i.i.us.us.3 - %add.i.i.us.us.3.1 = add nsw i32 %mul.i.i.us.3, %conv.i.i.us.us.3.1 - %idxprom.i.i.us.us.3.1 = sext i32 %add.i.i.us.us.3.1 to i64 - %arrayidx.i.i.us.us.3.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.3.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.3.1 = shl i64 %add1.i.i.i.us.us.3.1, 32 - %182 = ashr exact i64 %sext.i.i.us.us.3.1, 32 - br label %for.body.i.i.us.us.3.1 - -for.body.i.i.us.us.3.1: ; preds = %for.body.i.i.us.us.3.1, %if.then.i.i.us.us.3.1 - %indvars.iv.next.i.i3.us.us.3.1 = phi i64 [ %indvars.iv.next.i.i.us.us.3.1, %for.body.i.i.us.us.3.1 ], [ 0, %if.then.i.i.us.us.3.1 ] - %183 = phi float [ %189, %for.body.i.i.us.us.3.1 ], [ 0.000000e+00, %if.then.i.i.us.us.3.1 ] - %184 = add nsw i64 %indvars.iv.next.i.i3.us.us.3.1, %92 - %arrayidx11.i.i.us.us.3.1 = getelementptr inbounds float, float* %7, i64 %184 - %185 = load float, float* %arrayidx11.i.i.us.us.3.1, align 4, !tbaa !12 - %186 = mul nsw i64 %indvars.iv.next.i.i3.us.us.3.1, %26 - %187 = add nsw i64 %186, %182 - %arrayidx15.i.i.us.us.3.1 = getelementptr inbounds float, float* %10, i64 %187 - %188 = load float, float* %arrayidx15.i.i.us.us.3.1, align 4, !tbaa !12 - %189 = tail call float @llvm.fmuladd.f32(float %185, float %188, float %183) #2 - store float %189, float* %arrayidx.i.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.3.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.3.1, 1 - %exitcond.not.i.i.us.us.3.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.3.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.3.1, label %if.end.i.i.us.us.3.1.loopexit, label %for.body.i.i.us.us.3.1, !llvm.loop !19 - -if.end.i.i.us.us.3.1.loopexit: ; preds = %for.body.i.i.us.us.3.1 - br label %if.end.i.i.us.us.3.1 - -if.end.i.i.us.us.3.1: ; preds = %if.end.i.i.us.us.3.1.loopexit, %if.end.i.i.us.us.3 - %190 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %190, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.i.us.3.loopexit, label %pregion_for_entry.entry.i.i.us.us.3, !llvm.loop !23 - -if.then.i.i.us.us.2.1: ; preds = %if.end.i.i.us.us.2 - %add.i.i.us.us.2.1 = add nsw i32 %mul.i.i.us.2, %conv.i.i.us.us.2.1 - %idxprom.i.i.us.us.2.1 = sext i32 %add.i.i.us.us.2.1 to i64 - %arrayidx.i.i.us.us.2.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.2.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.2.1 = shl i64 %add1.i.i.i.us.us.2.1, 32 - %191 = ashr exact i64 %sext.i.i.us.us.2.1, 32 - br label %for.body.i.i.us.us.2.1 - -for.body.i.i.us.us.2.1: ; preds = %for.body.i.i.us.us.2.1, %if.then.i.i.us.us.2.1 - %indvars.iv.next.i.i3.us.us.2.1 = phi i64 [ %indvars.iv.next.i.i.us.us.2.1, %for.body.i.i.us.us.2.1 ], [ 0, %if.then.i.i.us.us.2.1 ] - %192 = phi float [ %198, %for.body.i.i.us.us.2.1 ], [ 0.000000e+00, %if.then.i.i.us.us.2.1 ] - %193 = add nsw i64 %indvars.iv.next.i.i3.us.us.2.1, %81 - %arrayidx11.i.i.us.us.2.1 = getelementptr inbounds float, float* %7, i64 %193 - %194 = load float, float* %arrayidx11.i.i.us.us.2.1, align 4, !tbaa !12 - %195 = mul nsw i64 %indvars.iv.next.i.i3.us.us.2.1, %26 - %196 = add nsw i64 %195, %191 - %arrayidx15.i.i.us.us.2.1 = getelementptr inbounds float, float* %10, i64 %196 - %197 = load float, float* %arrayidx15.i.i.us.us.2.1, align 4, !tbaa !12 - %198 = tail call float @llvm.fmuladd.f32(float %194, float %197, float %192) #2 - store float %198, float* %arrayidx.i.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.2.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.2.1, 1 - %exitcond.not.i.i.us.us.2.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.2.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.2.1, label %if.end.i.i.us.us.2.1.loopexit, label %for.body.i.i.us.us.2.1, !llvm.loop !19 - -if.end.i.i.us.us.2.1.loopexit: ; preds = %for.body.i.i.us.us.2.1 - br label %if.end.i.i.us.us.2.1 - -if.end.i.i.us.us.2.1: ; preds = %if.end.i.i.us.us.2.1.loopexit, %if.end.i.i.us.us.2 - %199 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %199, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.i.us.2.loopexit, label %pregion_for_entry.entry.i.i.us.us.2, !llvm.loop !23 - -if.then.i.i.us.us.1.1: ; preds = %if.end.i.i.us.us.1 - %add.i.i.us.us.1.1 = add nsw i32 %mul.i.i.us.1, %conv.i.i.us.us.1.1 - %idxprom.i.i.us.us.1.1 = sext i32 %add.i.i.us.us.1.1 to i64 - %arrayidx.i.i.us.us.1.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.1.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.1.1 = shl i64 %add1.i.i.i.us.us.1.1, 32 - %200 = ashr exact i64 %sext.i.i.us.us.1.1, 32 - br label %for.body.i.i.us.us.1.1 - -for.body.i.i.us.us.1.1: ; preds = %for.body.i.i.us.us.1.1, %if.then.i.i.us.us.1.1 - %indvars.iv.next.i.i3.us.us.1.1 = phi i64 [ %indvars.iv.next.i.i.us.us.1.1, %for.body.i.i.us.us.1.1 ], [ 0, %if.then.i.i.us.us.1.1 ] - %201 = phi float [ %207, %for.body.i.i.us.us.1.1 ], [ 0.000000e+00, %if.then.i.i.us.us.1.1 ] - %202 = add nsw i64 %indvars.iv.next.i.i3.us.us.1.1, %60 - %arrayidx11.i.i.us.us.1.1 = getelementptr inbounds float, float* %7, i64 %202 - %203 = load float, float* %arrayidx11.i.i.us.us.1.1, align 4, !tbaa !12 - %204 = mul nsw i64 %indvars.iv.next.i.i3.us.us.1.1, %26 - %205 = add nsw i64 %204, %200 - %arrayidx15.i.i.us.us.1.1 = getelementptr inbounds float, float* %10, i64 %205 - %206 = load float, float* %arrayidx15.i.i.us.us.1.1, align 4, !tbaa !12 - %207 = tail call float @llvm.fmuladd.f32(float %203, float %206, float %201) #2 - store float %207, float* %arrayidx.i.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.1.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.1.1, 1 - %exitcond.not.i.i.us.us.1.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.1.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.1.1, label %if.end.i.i.us.us.1.1.loopexit, label %for.body.i.i.us.us.1.1, !llvm.loop !19 - -if.end.i.i.us.us.1.1.loopexit: ; preds = %for.body.i.i.us.us.1.1 - br label %if.end.i.i.us.us.1.1 - -if.end.i.i.us.us.1.1: ; preds = %if.end.i.i.us.us.1.1.loopexit, %if.end.i.i.us.us.1 - %208 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %208, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.i.us.1.loopexit, label %pregion_for_entry.entry.i.i.us.us.1, !llvm.loop !23 - -if.then.i.i.us.us.146: ; preds = %if.end.i.i.us.us - %add.i.i.us.us.142 = add nsw i32 %mul.i.i.us, %conv.i.i.us.us.139 - %idxprom.i.i.us.us.143 = sext i32 %add.i.i.us.us.142 to i64 - %arrayidx.i.i.us.us.144 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.143 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.144, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.145 = shl i64 %add1.i.i.i.us.us.138, 32 - %209 = ashr exact i64 %sext.i.i.us.us.145, 32 - br label %for.body.i.i.us.us.152 - -for.body.i.i.us.us.152: ; preds = %for.body.i.i.us.us.152, %if.then.i.i.us.us.146 - %indvars.iv.next.i.i3.us.us.147 = phi i64 [ %indvars.iv.next.i.i.us.us.150, %for.body.i.i.us.us.152 ], [ 0, %if.then.i.i.us.us.146 ] - %210 = phi float [ %216, %for.body.i.i.us.us.152 ], [ 0.000000e+00, %if.then.i.i.us.us.146 ] - %211 = add nsw i64 %indvars.iv.next.i.i3.us.us.147, %58 - %arrayidx11.i.i.us.us.148 = getelementptr inbounds float, float* %7, i64 %211 - %212 = load float, float* %arrayidx11.i.i.us.us.148, align 4, !tbaa !12 - %213 = mul nsw i64 %indvars.iv.next.i.i3.us.us.147, %26 - %214 = add nsw i64 %213, %209 - %arrayidx15.i.i.us.us.149 = getelementptr inbounds float, float* %10, i64 %214 - %215 = load float, float* %arrayidx15.i.i.us.us.149, align 4, !tbaa !12 - %216 = tail call float @llvm.fmuladd.f32(float %212, float %215, float %210) #2 - store float %216, float* %arrayidx.i.i.us.us.144, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.150 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.147, 1 - %exitcond.not.i.i.us.us.151 = icmp eq i64 %indvars.iv.next.i.i.us.us.150, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.151, label %if.end.i.i.us.us.153.loopexit, label %for.body.i.i.us.us.152, !llvm.loop !19 - -if.end.i.i.us.us.153.loopexit: ; preds = %for.body.i.i.us.us.152 - br label %if.end.i.i.us.us.153 - -if.end.i.i.us.us.153: ; preds = %if.end.i.i.us.us.153.loopexit, %if.end.i.i.us.us - %217 = add nuw nsw i64 %_local_id_x.i.0.us.us, 2 - %exitcond.not.1 = icmp eq i64 %217, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.us.loopexit, label %pregion_for_entry.entry.i.i.us.us, !llvm.loop !23 -} - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"int", !"int", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"int", !"int", !"int"} -!9 = !{!"", !"", !"", !"", !"", !""} -!10 = !{!"A", !"B", !"E", !"ni", !"nj", !"nk"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17, !18} -!17 = distinct !{} -!18 = distinct !{} -!19 = distinct !{!19, !20} -!20 = !{!"llvm.loop.unroll.disable"} -!21 = distinct !{!21, !22} -!22 = !{!"llvm.loop.parallel_accesses", !18} -!23 = distinct !{!23, !24} -!24 = !{!"llvm.loop.parallel_accesses", !17} diff --git a/pocl_irs/3mm_kernel2.ll b/pocl_irs/3mm_kernel2.ll deleted file mode 100644 index 7654a8f..0000000 --- a/pocl_irs/3mm_kernel2.ll +++ /dev/null @@ -1,3583 +0,0 @@ -; ModuleID = './CF/DAJJACGEKBAPHIEIKFDOEEKGMOBCBEEPDHBEI/mm3_kernel2/32-8-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_mm3_kernel2(float* nocapture readonly %0, float* nocapture readonly %1, float* nocapture %2, i32 %3, i32 %4, i32 %5, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %6, i64 %7, i64 %8, i64 %9) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %7, 5 - %mul3.i.i = shl i64 %8, 3 - %cmp638.i = icmp sgt i32 %5, 0 - %11 = sext i32 %4 to i64 - %wide.trip.count.i = zext i32 %5 to i64 - br i1 %cmp638.i, label %pregion_for_entry.pregion_for_init.i.us.preheader, label %pregion_for_entry.pregion_for_init.i.preheader - -pregion_for_entry.pregion_for_init.i.preheader: ; preds = %10 - %conv.i.us = trunc i64 %mul.i.i to i32 - %cmp4.i.us = icmp slt i32 %conv.i.us, %4 - %12 = trunc i64 %mul.i.i to i32 - %conv.i.us.1 = or i32 %12, 1 - %cmp4.i.us.1 = icmp slt i32 %conv.i.us.1, %4 - %13 = trunc i64 %mul.i.i to i32 - %conv.i.us.2 = or i32 %13, 2 - %cmp4.i.us.2 = icmp slt i32 %conv.i.us.2, %4 - %14 = trunc i64 %mul.i.i to i32 - %conv.i.us.3 = or i32 %14, 3 - %cmp4.i.us.3 = icmp slt i32 %conv.i.us.3, %4 - %15 = trunc i64 %mul.i.i to i32 - %conv.i.us.4 = or i32 %15, 4 - %cmp4.i.us.4 = icmp slt i32 %conv.i.us.4, %4 - %16 = trunc i64 %mul.i.i to i32 - %conv.i.us.5 = or i32 %16, 5 - %cmp4.i.us.5 = icmp slt i32 %conv.i.us.5, %4 - %17 = trunc i64 %mul.i.i to i32 - %conv.i.us.6 = or i32 %17, 6 - %cmp4.i.us.6 = icmp slt i32 %conv.i.us.6, %4 - %18 = trunc i64 %mul.i.i to i32 - %conv.i.us.7 = or i32 %18, 7 - %cmp4.i.us.7 = icmp slt i32 %conv.i.us.7, %4 - %19 = trunc i64 %mul.i.i to i32 - %conv.i.us.8 = or i32 %19, 8 - %cmp4.i.us.8 = icmp slt i32 %conv.i.us.8, %4 - %20 = trunc i64 %mul.i.i to i32 - %conv.i.us.9 = or i32 %20, 9 - %cmp4.i.us.9 = icmp slt i32 %conv.i.us.9, %4 - %21 = trunc i64 %mul.i.i to i32 - %conv.i.us.10 = or i32 %21, 10 - %cmp4.i.us.10 = icmp slt i32 %conv.i.us.10, %4 - %22 = trunc i64 %mul.i.i to i32 - %conv.i.us.11 = or i32 %22, 11 - %cmp4.i.us.11 = icmp slt i32 %conv.i.us.11, %4 - %23 = trunc i64 %mul.i.i to i32 - %conv.i.us.12 = or i32 %23, 12 - %cmp4.i.us.12 = icmp slt i32 %conv.i.us.12, %4 - %24 = trunc i64 %mul.i.i to i32 - %conv.i.us.13 = or i32 %24, 13 - %cmp4.i.us.13 = icmp slt i32 %conv.i.us.13, %4 - %25 = trunc i64 %mul.i.i to i32 - %conv.i.us.14 = or i32 %25, 14 - %cmp4.i.us.14 = icmp slt i32 %conv.i.us.14, %4 - %26 = trunc i64 %mul.i.i to i32 - %conv.i.us.15 = or i32 %26, 15 - %cmp4.i.us.15 = icmp slt i32 %conv.i.us.15, %4 - %27 = trunc i64 %mul.i.i to i32 - %conv.i.us.16 = or i32 %27, 16 - %cmp4.i.us.16 = icmp slt i32 %conv.i.us.16, %4 - %28 = trunc i64 %mul.i.i to i32 - %conv.i.us.17 = or i32 %28, 17 - %cmp4.i.us.17 = icmp slt i32 %conv.i.us.17, %4 - %29 = trunc i64 %mul.i.i to i32 - %conv.i.us.18 = or i32 %29, 18 - %cmp4.i.us.18 = icmp slt i32 %conv.i.us.18, %4 - %30 = trunc i64 %mul.i.i to i32 - %conv.i.us.19 = or i32 %30, 19 - %cmp4.i.us.19 = icmp slt i32 %conv.i.us.19, %4 - %31 = trunc i64 %mul.i.i to i32 - %conv.i.us.20 = or i32 %31, 20 - %cmp4.i.us.20 = icmp slt i32 %conv.i.us.20, %4 - %32 = trunc i64 %mul.i.i to i32 - %conv.i.us.21 = or i32 %32, 21 - %cmp4.i.us.21 = icmp slt i32 %conv.i.us.21, %4 - %33 = trunc i64 %mul.i.i to i32 - %conv.i.us.22 = or i32 %33, 22 - %cmp4.i.us.22 = icmp slt i32 %conv.i.us.22, %4 - %34 = trunc i64 %mul.i.i to i32 - %conv.i.us.23 = or i32 %34, 23 - %cmp4.i.us.23 = icmp slt i32 %conv.i.us.23, %4 - %35 = trunc i64 %mul.i.i to i32 - %conv.i.us.24 = or i32 %35, 24 - %cmp4.i.us.24 = icmp slt i32 %conv.i.us.24, %4 - %36 = trunc i64 %mul.i.i to i32 - %conv.i.us.25 = or i32 %36, 25 - %cmp4.i.us.25 = icmp slt i32 %conv.i.us.25, %4 - %37 = trunc i64 %mul.i.i to i32 - %conv.i.us.26 = or i32 %37, 26 - %cmp4.i.us.26 = icmp slt i32 %conv.i.us.26, %4 - %38 = trunc i64 %mul.i.i to i32 - %conv.i.us.27 = or i32 %38, 27 - %cmp4.i.us.27 = icmp slt i32 %conv.i.us.27, %4 - %39 = trunc i64 %mul.i.i to i32 - %conv.i.us.28 = or i32 %39, 28 - %cmp4.i.us.28 = icmp slt i32 %conv.i.us.28, %4 - %40 = trunc i64 %mul.i.i to i32 - %conv.i.us.29 = or i32 %40, 29 - %cmp4.i.us.29 = icmp slt i32 %conv.i.us.29, %4 - %41 = trunc i64 %mul.i.i to i32 - %conv.i.us.30 = or i32 %41, 30 - %cmp4.i.us.30 = icmp slt i32 %conv.i.us.30, %4 - %42 = trunc i64 %mul.i.i to i32 - %conv.i.us.31 = or i32 %42, 31 - %cmp4.i.us.31 = icmp slt i32 %conv.i.us.31, %4 - br label %pregion_for_entry.pregion_for_init.i - -pregion_for_entry.pregion_for_init.i.us.preheader: ; preds = %10 - %conv2.i.us = trunc i64 %mul3.i.i to i32 - %cmp.i.us = icmp slt i32 %conv2.i.us, %3 - %mul.i.us = mul nsw i32 %conv2.i.us, %4 - %mul8.i.us = mul nsw i32 %conv2.i.us, %5 - %43 = sext i32 %mul8.i.us to i64 - br i1 %cmp.i.us, label %pregion_for_entry.entry.i.us.us.preheader, label %pregion_for_end.i.us - -pregion_for_entry.entry.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.us.preheader - br label %pregion_for_entry.entry.i.us.us - -pregion_for_end.i.us.loopexit: ; preds = %if.end.i.us.us.153 - br label %pregion_for_end.i.us - -pregion_for_end.i.us: ; preds = %pregion_for_end.i.us.loopexit, %pregion_for_entry.pregion_for_init.i.us.preheader - %44 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.1 = or i32 %44, 1 - %cmp.i.us.1 = icmp slt i32 %conv2.i.us.1, %3 - %mul.i.us.1 = mul nsw i32 %conv2.i.us.1, %4 - %mul8.i.us.1 = mul nsw i32 %conv2.i.us.1, %5 - %45 = sext i32 %mul8.i.us.1 to i64 - br i1 %cmp.i.us.1, label %pregion_for_entry.entry.i.us.us.1.preheader, label %pregion_for_end.i.us.1 - -pregion_for_entry.entry.i.us.us.1.preheader: ; preds = %pregion_for_end.i.us - br label %pregion_for_entry.entry.i.us.us.1 - -pregion_for_entry.entry.i.us.us: ; preds = %if.end.i.us.us.153, %pregion_for_entry.entry.i.us.us.preheader - %_local_id_x.0.us.us = phi i64 [ %202, %if.end.i.us.us.153 ], [ 0, %pregion_for_entry.entry.i.us.us.preheader ] - %add1.i.i.us.us = add nuw nsw i64 %_local_id_x.0.us.us, %mul.i.i - %conv.i.us.us = trunc i64 %add1.i.i.us.us to i32 - %cmp4.i.us.us = icmp slt i32 %conv.i.us.us, %4 - br i1 %cmp4.i.us.us, label %if.then.i.us.us, label %if.end.i.us.us - -if.then.i.us.us: ; preds = %pregion_for_entry.entry.i.us.us - %add.i.us.us = add nsw i32 %mul.i.us, %conv.i.us.us - %idxprom.i.us.us = sext i32 %add.i.us.us to i64 - %arrayidx.i.us.us = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us - store float 0.000000e+00, float* %arrayidx.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us = shl i64 %add1.i.i.us.us, 32 - %46 = ashr exact i64 %sext.i.us.us, 32 - br label %for.body.i.us.us - -if.end.i.us.us.loopexit: ; preds = %for.body.i.us.us - br label %if.end.i.us.us - -if.end.i.us.us: ; preds = %if.end.i.us.us.loopexit, %pregion_for_entry.entry.i.us.us - %47 = or i64 %_local_id_x.0.us.us, 1 - %add1.i.i.us.us.138 = add nuw nsw i64 %47, %mul.i.i - %conv.i.us.us.139 = trunc i64 %add1.i.i.us.us.138 to i32 - %cmp4.i.us.us.140 = icmp slt i32 %conv.i.us.us.139, %4 - br i1 %cmp4.i.us.us.140, label %if.then.i.us.us.146, label %if.end.i.us.us.153 - -for.body.i.us.us: ; preds = %for.body.i.us.us, %if.then.i.us.us - %indvars.iv.next.i3.us.us = phi i64 [ %indvars.iv.next.i.us.us, %for.body.i.us.us ], [ 0, %if.then.i.us.us ] - %48 = phi float [ %54, %for.body.i.us.us ], [ 0.000000e+00, %if.then.i.us.us ] - %49 = add nsw i64 %indvars.iv.next.i3.us.us, %43 - %arrayidx11.i.us.us = getelementptr inbounds float, float* %0, i64 %49 - %50 = load float, float* %arrayidx11.i.us.us, align 4, !tbaa !12 - %51 = mul nsw i64 %indvars.iv.next.i3.us.us, %11 - %52 = add nsw i64 %51, %46 - %arrayidx15.i.us.us = getelementptr inbounds float, float* %1, i64 %52 - %53 = load float, float* %arrayidx15.i.us.us, align 4, !tbaa !12 - %54 = tail call float @llvm.fmuladd.f32(float %50, float %53, float %48) #2 - store float %54, float* %arrayidx.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us = add nuw nsw i64 %indvars.iv.next.i3.us.us, 1 - %exitcond.not.i.us.us = icmp eq i64 %indvars.iv.next.i.us.us, %wide.trip.count.i - br i1 %exitcond.not.i.us.us, label %if.end.i.us.us.loopexit, label %for.body.i.us.us, !llvm.loop !19 - -pregion_for_entry.pregion_for_init.i: ; preds = %pregion_for_end.i, %pregion_for_entry.pregion_for_init.i.preheader - %_local_id_y.0 = phi i64 [ %55, %pregion_for_end.i ], [ 0, %pregion_for_entry.pregion_for_init.i.preheader ] - %add6.i.i = add nuw nsw i64 %_local_id_y.0, %mul3.i.i - %conv2.i = trunc i64 %add6.i.i to i32 - %cmp.i = icmp slt i32 %conv2.i, %3 - %mul.i = mul nsw i32 %conv2.i, %4 - br i1 %cmp.i, label %pregion_for_entry.entry.i.us.preheader, label %pregion_for_end.i - -pregion_for_entry.entry.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i - br i1 %cmp4.i.us, label %if.then.i.us, label %if.end.i.us - -if.then.i.us: ; preds = %pregion_for_entry.entry.i.us.preheader - %add.i.us = add nsw i32 %mul.i, %conv.i.us - %idxprom.i.us = sext i32 %add.i.us to i64 - %arrayidx.i.us = getelementptr inbounds float, float* %2, i64 %idxprom.i.us - store float 0.000000e+00, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us - -if.end.i.us: ; preds = %if.then.i.us, %pregion_for_entry.entry.i.us.preheader - br i1 %cmp4.i.us.1, label %if.then.i.us.1, label %if.end.i.us.1 - -pregion_for_end.i: ; preds = %if.then.i.us.31, %if.end.i.us.30, %pregion_for_entry.pregion_for_init.i - %55 = add nuw nsw i64 %_local_id_y.0, 1 - %exitcond33.not = icmp eq i64 %55, 8 - br i1 %exitcond33.not, label %mm3_kernel2.exit.loopexit54, label %pregion_for_entry.pregion_for_init.i, !llvm.loop !21 - -mm3_kernel2.exit.loopexit: ; preds = %if.end.i.us.us.7.1 - br label %mm3_kernel2.exit - -mm3_kernel2.exit.loopexit54: ; preds = %pregion_for_end.i - br label %mm3_kernel2.exit - -mm3_kernel2.exit: ; preds = %pregion_for_end.i.us.6, %mm3_kernel2.exit.loopexit54, %mm3_kernel2.exit.loopexit - ret void - -pregion_for_entry.entry.i.us.us.1: ; preds = %if.end.i.us.us.1.1, %pregion_for_entry.entry.i.us.us.1.preheader - %_local_id_x.0.us.us.1 = phi i64 [ %193, %if.end.i.us.us.1.1 ], [ 0, %pregion_for_entry.entry.i.us.us.1.preheader ] - %add1.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.0.us.us.1, %mul.i.i - %conv.i.us.us.1 = trunc i64 %add1.i.i.us.us.1 to i32 - %cmp4.i.us.us.1 = icmp slt i32 %conv.i.us.us.1, %4 - br i1 %cmp4.i.us.us.1, label %if.then.i.us.us.1, label %if.end.i.us.us.1 - -if.then.i.us.us.1: ; preds = %pregion_for_entry.entry.i.us.us.1 - %add.i.us.us.1 = add nsw i32 %mul.i.us.1, %conv.i.us.us.1 - %idxprom.i.us.us.1 = sext i32 %add.i.us.us.1 to i64 - %arrayidx.i.us.us.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.1 = shl i64 %add1.i.i.us.us.1, 32 - %56 = ashr exact i64 %sext.i.us.us.1, 32 - br label %for.body.i.us.us.1 - -for.body.i.us.us.1: ; preds = %for.body.i.us.us.1, %if.then.i.us.us.1 - %indvars.iv.next.i3.us.us.1 = phi i64 [ %indvars.iv.next.i.us.us.1, %for.body.i.us.us.1 ], [ 0, %if.then.i.us.us.1 ] - %57 = phi float [ %63, %for.body.i.us.us.1 ], [ 0.000000e+00, %if.then.i.us.us.1 ] - %58 = add nsw i64 %indvars.iv.next.i3.us.us.1, %45 - %arrayidx11.i.us.us.1 = getelementptr inbounds float, float* %0, i64 %58 - %59 = load float, float* %arrayidx11.i.us.us.1, align 4, !tbaa !12 - %60 = mul nsw i64 %indvars.iv.next.i3.us.us.1, %11 - %61 = add nsw i64 %60, %56 - %arrayidx15.i.us.us.1 = getelementptr inbounds float, float* %1, i64 %61 - %62 = load float, float* %arrayidx15.i.us.us.1, align 4, !tbaa !12 - %63 = tail call float @llvm.fmuladd.f32(float %59, float %62, float %57) #2 - store float %63, float* %arrayidx.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.1, 1 - %exitcond.not.i.us.us.1 = icmp eq i64 %indvars.iv.next.i.us.us.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.1, label %if.end.i.us.us.1.loopexit, label %for.body.i.us.us.1, !llvm.loop !19 - -if.end.i.us.us.1.loopexit: ; preds = %for.body.i.us.us.1 - br label %if.end.i.us.us.1 - -if.end.i.us.us.1: ; preds = %if.end.i.us.us.1.loopexit, %pregion_for_entry.entry.i.us.us.1 - %64 = or i64 %_local_id_x.0.us.us.1, 1 - %add1.i.i.us.us.1.1 = add nuw nsw i64 %64, %mul.i.i - %conv.i.us.us.1.1 = trunc i64 %add1.i.i.us.us.1.1 to i32 - %cmp4.i.us.us.1.1 = icmp slt i32 %conv.i.us.us.1.1, %4 - br i1 %cmp4.i.us.us.1.1, label %if.then.i.us.us.1.1, label %if.end.i.us.us.1.1 - -pregion_for_end.i.us.1.loopexit: ; preds = %if.end.i.us.us.1.1 - br label %pregion_for_end.i.us.1 - -pregion_for_end.i.us.1: ; preds = %pregion_for_end.i.us.1.loopexit, %pregion_for_end.i.us - %65 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.2 = or i32 %65, 2 - %cmp.i.us.2 = icmp slt i32 %conv2.i.us.2, %3 - %mul.i.us.2 = mul nsw i32 %conv2.i.us.2, %4 - %mul8.i.us.2 = mul nsw i32 %conv2.i.us.2, %5 - %66 = sext i32 %mul8.i.us.2 to i64 - br i1 %cmp.i.us.2, label %pregion_for_entry.entry.i.us.us.2.preheader, label %pregion_for_end.i.us.2 - -pregion_for_entry.entry.i.us.us.2.preheader: ; preds = %pregion_for_end.i.us.1 - br label %pregion_for_entry.entry.i.us.us.2 - -pregion_for_entry.entry.i.us.us.2: ; preds = %if.end.i.us.us.2.1, %pregion_for_entry.entry.i.us.us.2.preheader - %_local_id_x.0.us.us.2 = phi i64 [ %184, %if.end.i.us.us.2.1 ], [ 0, %pregion_for_entry.entry.i.us.us.2.preheader ] - %add1.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.0.us.us.2, %mul.i.i - %conv.i.us.us.2 = trunc i64 %add1.i.i.us.us.2 to i32 - %cmp4.i.us.us.2 = icmp slt i32 %conv.i.us.us.2, %4 - br i1 %cmp4.i.us.us.2, label %if.then.i.us.us.2, label %if.end.i.us.us.2 - -if.then.i.us.us.2: ; preds = %pregion_for_entry.entry.i.us.us.2 - %add.i.us.us.2 = add nsw i32 %mul.i.us.2, %conv.i.us.us.2 - %idxprom.i.us.us.2 = sext i32 %add.i.us.us.2 to i64 - %arrayidx.i.us.us.2 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.2 - store float 0.000000e+00, float* %arrayidx.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.2 = shl i64 %add1.i.i.us.us.2, 32 - %67 = ashr exact i64 %sext.i.us.us.2, 32 - br label %for.body.i.us.us.2 - -for.body.i.us.us.2: ; preds = %for.body.i.us.us.2, %if.then.i.us.us.2 - %indvars.iv.next.i3.us.us.2 = phi i64 [ %indvars.iv.next.i.us.us.2, %for.body.i.us.us.2 ], [ 0, %if.then.i.us.us.2 ] - %68 = phi float [ %74, %for.body.i.us.us.2 ], [ 0.000000e+00, %if.then.i.us.us.2 ] - %69 = add nsw i64 %indvars.iv.next.i3.us.us.2, %66 - %arrayidx11.i.us.us.2 = getelementptr inbounds float, float* %0, i64 %69 - %70 = load float, float* %arrayidx11.i.us.us.2, align 4, !tbaa !12 - %71 = mul nsw i64 %indvars.iv.next.i3.us.us.2, %11 - %72 = add nsw i64 %71, %67 - %arrayidx15.i.us.us.2 = getelementptr inbounds float, float* %1, i64 %72 - %73 = load float, float* %arrayidx15.i.us.us.2, align 4, !tbaa !12 - %74 = tail call float @llvm.fmuladd.f32(float %70, float %73, float %68) #2 - store float %74, float* %arrayidx.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.2 = add nuw nsw i64 %indvars.iv.next.i3.us.us.2, 1 - %exitcond.not.i.us.us.2 = icmp eq i64 %indvars.iv.next.i.us.us.2, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.2, label %if.end.i.us.us.2.loopexit, label %for.body.i.us.us.2, !llvm.loop !19 - -if.end.i.us.us.2.loopexit: ; preds = %for.body.i.us.us.2 - br label %if.end.i.us.us.2 - -if.end.i.us.us.2: ; preds = %if.end.i.us.us.2.loopexit, %pregion_for_entry.entry.i.us.us.2 - %75 = or i64 %_local_id_x.0.us.us.2, 1 - %add1.i.i.us.us.2.1 = add nuw nsw i64 %75, %mul.i.i - %conv.i.us.us.2.1 = trunc i64 %add1.i.i.us.us.2.1 to i32 - %cmp4.i.us.us.2.1 = icmp slt i32 %conv.i.us.us.2.1, %4 - br i1 %cmp4.i.us.us.2.1, label %if.then.i.us.us.2.1, label %if.end.i.us.us.2.1 - -pregion_for_end.i.us.2.loopexit: ; preds = %if.end.i.us.us.2.1 - br label %pregion_for_end.i.us.2 - -pregion_for_end.i.us.2: ; preds = %pregion_for_end.i.us.2.loopexit, %pregion_for_end.i.us.1 - %76 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.3 = or i32 %76, 3 - %cmp.i.us.3 = icmp slt i32 %conv2.i.us.3, %3 - %mul.i.us.3 = mul nsw i32 %conv2.i.us.3, %4 - %mul8.i.us.3 = mul nsw i32 %conv2.i.us.3, %5 - %77 = sext i32 %mul8.i.us.3 to i64 - br i1 %cmp.i.us.3, label %pregion_for_entry.entry.i.us.us.3.preheader, label %pregion_for_end.i.us.3 - -pregion_for_entry.entry.i.us.us.3.preheader: ; preds = %pregion_for_end.i.us.2 - br label %pregion_for_entry.entry.i.us.us.3 - -pregion_for_entry.entry.i.us.us.3: ; preds = %if.end.i.us.us.3.1, %pregion_for_entry.entry.i.us.us.3.preheader - %_local_id_x.0.us.us.3 = phi i64 [ %175, %if.end.i.us.us.3.1 ], [ 0, %pregion_for_entry.entry.i.us.us.3.preheader ] - %add1.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.0.us.us.3, %mul.i.i - %conv.i.us.us.3 = trunc i64 %add1.i.i.us.us.3 to i32 - %cmp4.i.us.us.3 = icmp slt i32 %conv.i.us.us.3, %4 - br i1 %cmp4.i.us.us.3, label %if.then.i.us.us.3, label %if.end.i.us.us.3 - -if.then.i.us.us.3: ; preds = %pregion_for_entry.entry.i.us.us.3 - %add.i.us.us.3 = add nsw i32 %mul.i.us.3, %conv.i.us.us.3 - %idxprom.i.us.us.3 = sext i32 %add.i.us.us.3 to i64 - %arrayidx.i.us.us.3 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.3 - store float 0.000000e+00, float* %arrayidx.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.3 = shl i64 %add1.i.i.us.us.3, 32 - %78 = ashr exact i64 %sext.i.us.us.3, 32 - br label %for.body.i.us.us.3 - -for.body.i.us.us.3: ; preds = %for.body.i.us.us.3, %if.then.i.us.us.3 - %indvars.iv.next.i3.us.us.3 = phi i64 [ %indvars.iv.next.i.us.us.3, %for.body.i.us.us.3 ], [ 0, %if.then.i.us.us.3 ] - %79 = phi float [ %85, %for.body.i.us.us.3 ], [ 0.000000e+00, %if.then.i.us.us.3 ] - %80 = add nsw i64 %indvars.iv.next.i3.us.us.3, %77 - %arrayidx11.i.us.us.3 = getelementptr inbounds float, float* %0, i64 %80 - %81 = load float, float* %arrayidx11.i.us.us.3, align 4, !tbaa !12 - %82 = mul nsw i64 %indvars.iv.next.i3.us.us.3, %11 - %83 = add nsw i64 %82, %78 - %arrayidx15.i.us.us.3 = getelementptr inbounds float, float* %1, i64 %83 - %84 = load float, float* %arrayidx15.i.us.us.3, align 4, !tbaa !12 - %85 = tail call float @llvm.fmuladd.f32(float %81, float %84, float %79) #2 - store float %85, float* %arrayidx.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.3 = add nuw nsw i64 %indvars.iv.next.i3.us.us.3, 1 - %exitcond.not.i.us.us.3 = icmp eq i64 %indvars.iv.next.i.us.us.3, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.3, label %if.end.i.us.us.3.loopexit, label %for.body.i.us.us.3, !llvm.loop !19 - -if.end.i.us.us.3.loopexit: ; preds = %for.body.i.us.us.3 - br label %if.end.i.us.us.3 - -if.end.i.us.us.3: ; preds = %if.end.i.us.us.3.loopexit, %pregion_for_entry.entry.i.us.us.3 - %86 = or i64 %_local_id_x.0.us.us.3, 1 - %add1.i.i.us.us.3.1 = add nuw nsw i64 %86, %mul.i.i - %conv.i.us.us.3.1 = trunc i64 %add1.i.i.us.us.3.1 to i32 - %cmp4.i.us.us.3.1 = icmp slt i32 %conv.i.us.us.3.1, %4 - br i1 %cmp4.i.us.us.3.1, label %if.then.i.us.us.3.1, label %if.end.i.us.us.3.1 - -pregion_for_end.i.us.3.loopexit: ; preds = %if.end.i.us.us.3.1 - br label %pregion_for_end.i.us.3 - -pregion_for_end.i.us.3: ; preds = %pregion_for_end.i.us.3.loopexit, %pregion_for_end.i.us.2 - %87 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.4 = or i32 %87, 4 - %cmp.i.us.4 = icmp slt i32 %conv2.i.us.4, %3 - %mul.i.us.4 = mul nsw i32 %conv2.i.us.4, %4 - %mul8.i.us.4 = mul nsw i32 %conv2.i.us.4, %5 - %88 = sext i32 %mul8.i.us.4 to i64 - br i1 %cmp.i.us.4, label %pregion_for_entry.entry.i.us.us.4.preheader, label %pregion_for_end.i.us.4 - -pregion_for_entry.entry.i.us.us.4.preheader: ; preds = %pregion_for_end.i.us.3 - br label %pregion_for_entry.entry.i.us.us.4 - -pregion_for_entry.entry.i.us.us.4: ; preds = %if.end.i.us.us.4.1, %pregion_for_entry.entry.i.us.us.4.preheader - %_local_id_x.0.us.us.4 = phi i64 [ %166, %if.end.i.us.us.4.1 ], [ 0, %pregion_for_entry.entry.i.us.us.4.preheader ] - %add1.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.0.us.us.4, %mul.i.i - %conv.i.us.us.4 = trunc i64 %add1.i.i.us.us.4 to i32 - %cmp4.i.us.us.4 = icmp slt i32 %conv.i.us.us.4, %4 - br i1 %cmp4.i.us.us.4, label %if.then.i.us.us.4, label %if.end.i.us.us.4 - -if.then.i.us.us.4: ; preds = %pregion_for_entry.entry.i.us.us.4 - %add.i.us.us.4 = add nsw i32 %mul.i.us.4, %conv.i.us.us.4 - %idxprom.i.us.us.4 = sext i32 %add.i.us.us.4 to i64 - %arrayidx.i.us.us.4 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.4 - store float 0.000000e+00, float* %arrayidx.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.4 = shl i64 %add1.i.i.us.us.4, 32 - %89 = ashr exact i64 %sext.i.us.us.4, 32 - br label %for.body.i.us.us.4 - -for.body.i.us.us.4: ; preds = %for.body.i.us.us.4, %if.then.i.us.us.4 - %indvars.iv.next.i3.us.us.4 = phi i64 [ %indvars.iv.next.i.us.us.4, %for.body.i.us.us.4 ], [ 0, %if.then.i.us.us.4 ] - %90 = phi float [ %96, %for.body.i.us.us.4 ], [ 0.000000e+00, %if.then.i.us.us.4 ] - %91 = add nsw i64 %indvars.iv.next.i3.us.us.4, %88 - %arrayidx11.i.us.us.4 = getelementptr inbounds float, float* %0, i64 %91 - %92 = load float, float* %arrayidx11.i.us.us.4, align 4, !tbaa !12 - %93 = mul nsw i64 %indvars.iv.next.i3.us.us.4, %11 - %94 = add nsw i64 %93, %89 - %arrayidx15.i.us.us.4 = getelementptr inbounds float, float* %1, i64 %94 - %95 = load float, float* %arrayidx15.i.us.us.4, align 4, !tbaa !12 - %96 = tail call float @llvm.fmuladd.f32(float %92, float %95, float %90) #2 - store float %96, float* %arrayidx.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.4 = add nuw nsw i64 %indvars.iv.next.i3.us.us.4, 1 - %exitcond.not.i.us.us.4 = icmp eq i64 %indvars.iv.next.i.us.us.4, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.4, label %if.end.i.us.us.4.loopexit, label %for.body.i.us.us.4, !llvm.loop !19 - -if.end.i.us.us.4.loopexit: ; preds = %for.body.i.us.us.4 - br label %if.end.i.us.us.4 - -if.end.i.us.us.4: ; preds = %if.end.i.us.us.4.loopexit, %pregion_for_entry.entry.i.us.us.4 - %97 = or i64 %_local_id_x.0.us.us.4, 1 - %add1.i.i.us.us.4.1 = add nuw nsw i64 %97, %mul.i.i - %conv.i.us.us.4.1 = trunc i64 %add1.i.i.us.us.4.1 to i32 - %cmp4.i.us.us.4.1 = icmp slt i32 %conv.i.us.us.4.1, %4 - br i1 %cmp4.i.us.us.4.1, label %if.then.i.us.us.4.1, label %if.end.i.us.us.4.1 - -pregion_for_end.i.us.4.loopexit: ; preds = %if.end.i.us.us.4.1 - br label %pregion_for_end.i.us.4 - -pregion_for_end.i.us.4: ; preds = %pregion_for_end.i.us.4.loopexit, %pregion_for_end.i.us.3 - %98 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.5 = or i32 %98, 5 - %cmp.i.us.5 = icmp slt i32 %conv2.i.us.5, %3 - %mul.i.us.5 = mul nsw i32 %conv2.i.us.5, %4 - %mul8.i.us.5 = mul nsw i32 %conv2.i.us.5, %5 - %99 = sext i32 %mul8.i.us.5 to i64 - br i1 %cmp.i.us.5, label %pregion_for_entry.entry.i.us.us.5.preheader, label %pregion_for_end.i.us.5 - -pregion_for_entry.entry.i.us.us.5.preheader: ; preds = %pregion_for_end.i.us.4 - br label %pregion_for_entry.entry.i.us.us.5 - -pregion_for_entry.entry.i.us.us.5: ; preds = %if.end.i.us.us.5.1, %pregion_for_entry.entry.i.us.us.5.preheader - %_local_id_x.0.us.us.5 = phi i64 [ %157, %if.end.i.us.us.5.1 ], [ 0, %pregion_for_entry.entry.i.us.us.5.preheader ] - %add1.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.0.us.us.5, %mul.i.i - %conv.i.us.us.5 = trunc i64 %add1.i.i.us.us.5 to i32 - %cmp4.i.us.us.5 = icmp slt i32 %conv.i.us.us.5, %4 - br i1 %cmp4.i.us.us.5, label %if.then.i.us.us.5, label %if.end.i.us.us.5 - -if.then.i.us.us.5: ; preds = %pregion_for_entry.entry.i.us.us.5 - %add.i.us.us.5 = add nsw i32 %mul.i.us.5, %conv.i.us.us.5 - %idxprom.i.us.us.5 = sext i32 %add.i.us.us.5 to i64 - %arrayidx.i.us.us.5 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.5 - store float 0.000000e+00, float* %arrayidx.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.5 = shl i64 %add1.i.i.us.us.5, 32 - %100 = ashr exact i64 %sext.i.us.us.5, 32 - br label %for.body.i.us.us.5 - -for.body.i.us.us.5: ; preds = %for.body.i.us.us.5, %if.then.i.us.us.5 - %indvars.iv.next.i3.us.us.5 = phi i64 [ %indvars.iv.next.i.us.us.5, %for.body.i.us.us.5 ], [ 0, %if.then.i.us.us.5 ] - %101 = phi float [ %107, %for.body.i.us.us.5 ], [ 0.000000e+00, %if.then.i.us.us.5 ] - %102 = add nsw i64 %indvars.iv.next.i3.us.us.5, %99 - %arrayidx11.i.us.us.5 = getelementptr inbounds float, float* %0, i64 %102 - %103 = load float, float* %arrayidx11.i.us.us.5, align 4, !tbaa !12 - %104 = mul nsw i64 %indvars.iv.next.i3.us.us.5, %11 - %105 = add nsw i64 %104, %100 - %arrayidx15.i.us.us.5 = getelementptr inbounds float, float* %1, i64 %105 - %106 = load float, float* %arrayidx15.i.us.us.5, align 4, !tbaa !12 - %107 = tail call float @llvm.fmuladd.f32(float %103, float %106, float %101) #2 - store float %107, float* %arrayidx.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.5 = add nuw nsw i64 %indvars.iv.next.i3.us.us.5, 1 - %exitcond.not.i.us.us.5 = icmp eq i64 %indvars.iv.next.i.us.us.5, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.5, label %if.end.i.us.us.5.loopexit, label %for.body.i.us.us.5, !llvm.loop !19 - -if.end.i.us.us.5.loopexit: ; preds = %for.body.i.us.us.5 - br label %if.end.i.us.us.5 - -if.end.i.us.us.5: ; preds = %if.end.i.us.us.5.loopexit, %pregion_for_entry.entry.i.us.us.5 - %108 = or i64 %_local_id_x.0.us.us.5, 1 - %add1.i.i.us.us.5.1 = add nuw nsw i64 %108, %mul.i.i - %conv.i.us.us.5.1 = trunc i64 %add1.i.i.us.us.5.1 to i32 - %cmp4.i.us.us.5.1 = icmp slt i32 %conv.i.us.us.5.1, %4 - br i1 %cmp4.i.us.us.5.1, label %if.then.i.us.us.5.1, label %if.end.i.us.us.5.1 - -pregion_for_end.i.us.5.loopexit: ; preds = %if.end.i.us.us.5.1 - br label %pregion_for_end.i.us.5 - -pregion_for_end.i.us.5: ; preds = %pregion_for_end.i.us.5.loopexit, %pregion_for_end.i.us.4 - %109 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.6 = or i32 %109, 6 - %cmp.i.us.6 = icmp slt i32 %conv2.i.us.6, %3 - %mul.i.us.6 = mul nsw i32 %conv2.i.us.6, %4 - %mul8.i.us.6 = mul nsw i32 %conv2.i.us.6, %5 - %110 = sext i32 %mul8.i.us.6 to i64 - br i1 %cmp.i.us.6, label %pregion_for_entry.entry.i.us.us.6.preheader, label %pregion_for_end.i.us.6 - -pregion_for_entry.entry.i.us.us.6.preheader: ; preds = %pregion_for_end.i.us.5 - br label %pregion_for_entry.entry.i.us.us.6 - -pregion_for_entry.entry.i.us.us.6: ; preds = %if.end.i.us.us.6.1, %pregion_for_entry.entry.i.us.us.6.preheader - %_local_id_x.0.us.us.6 = phi i64 [ %148, %if.end.i.us.us.6.1 ], [ 0, %pregion_for_entry.entry.i.us.us.6.preheader ] - %add1.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.0.us.us.6, %mul.i.i - %conv.i.us.us.6 = trunc i64 %add1.i.i.us.us.6 to i32 - %cmp4.i.us.us.6 = icmp slt i32 %conv.i.us.us.6, %4 - br i1 %cmp4.i.us.us.6, label %if.then.i.us.us.6, label %if.end.i.us.us.6 - -if.then.i.us.us.6: ; preds = %pregion_for_entry.entry.i.us.us.6 - %add.i.us.us.6 = add nsw i32 %mul.i.us.6, %conv.i.us.us.6 - %idxprom.i.us.us.6 = sext i32 %add.i.us.us.6 to i64 - %arrayidx.i.us.us.6 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.6 - store float 0.000000e+00, float* %arrayidx.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.6 = shl i64 %add1.i.i.us.us.6, 32 - %111 = ashr exact i64 %sext.i.us.us.6, 32 - br label %for.body.i.us.us.6 - -for.body.i.us.us.6: ; preds = %for.body.i.us.us.6, %if.then.i.us.us.6 - %indvars.iv.next.i3.us.us.6 = phi i64 [ %indvars.iv.next.i.us.us.6, %for.body.i.us.us.6 ], [ 0, %if.then.i.us.us.6 ] - %112 = phi float [ %118, %for.body.i.us.us.6 ], [ 0.000000e+00, %if.then.i.us.us.6 ] - %113 = add nsw i64 %indvars.iv.next.i3.us.us.6, %110 - %arrayidx11.i.us.us.6 = getelementptr inbounds float, float* %0, i64 %113 - %114 = load float, float* %arrayidx11.i.us.us.6, align 4, !tbaa !12 - %115 = mul nsw i64 %indvars.iv.next.i3.us.us.6, %11 - %116 = add nsw i64 %115, %111 - %arrayidx15.i.us.us.6 = getelementptr inbounds float, float* %1, i64 %116 - %117 = load float, float* %arrayidx15.i.us.us.6, align 4, !tbaa !12 - %118 = tail call float @llvm.fmuladd.f32(float %114, float %117, float %112) #2 - store float %118, float* %arrayidx.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.6 = add nuw nsw i64 %indvars.iv.next.i3.us.us.6, 1 - %exitcond.not.i.us.us.6 = icmp eq i64 %indvars.iv.next.i.us.us.6, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.6, label %if.end.i.us.us.6.loopexit, label %for.body.i.us.us.6, !llvm.loop !19 - -if.end.i.us.us.6.loopexit: ; preds = %for.body.i.us.us.6 - br label %if.end.i.us.us.6 - -if.end.i.us.us.6: ; preds = %if.end.i.us.us.6.loopexit, %pregion_for_entry.entry.i.us.us.6 - %119 = or i64 %_local_id_x.0.us.us.6, 1 - %add1.i.i.us.us.6.1 = add nuw nsw i64 %119, %mul.i.i - %conv.i.us.us.6.1 = trunc i64 %add1.i.i.us.us.6.1 to i32 - %cmp4.i.us.us.6.1 = icmp slt i32 %conv.i.us.us.6.1, %4 - br i1 %cmp4.i.us.us.6.1, label %if.then.i.us.us.6.1, label %if.end.i.us.us.6.1 - -pregion_for_end.i.us.6.loopexit: ; preds = %if.end.i.us.us.6.1 - br label %pregion_for_end.i.us.6 - -pregion_for_end.i.us.6: ; preds = %pregion_for_end.i.us.6.loopexit, %pregion_for_end.i.us.5 - %120 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.7 = or i32 %120, 7 - %cmp.i.us.7 = icmp slt i32 %conv2.i.us.7, %3 - %mul.i.us.7 = mul nsw i32 %conv2.i.us.7, %4 - %mul8.i.us.7 = mul nsw i32 %conv2.i.us.7, %5 - %121 = sext i32 %mul8.i.us.7 to i64 - br i1 %cmp.i.us.7, label %pregion_for_entry.entry.i.us.us.7.preheader, label %mm3_kernel2.exit - -pregion_for_entry.entry.i.us.us.7.preheader: ; preds = %pregion_for_end.i.us.6 - br label %pregion_for_entry.entry.i.us.us.7 - -pregion_for_entry.entry.i.us.us.7: ; preds = %if.end.i.us.us.7.1, %pregion_for_entry.entry.i.us.us.7.preheader - %_local_id_x.0.us.us.7 = phi i64 [ %139, %if.end.i.us.us.7.1 ], [ 0, %pregion_for_entry.entry.i.us.us.7.preheader ] - %add1.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.0.us.us.7, %mul.i.i - %conv.i.us.us.7 = trunc i64 %add1.i.i.us.us.7 to i32 - %cmp4.i.us.us.7 = icmp slt i32 %conv.i.us.us.7, %4 - br i1 %cmp4.i.us.us.7, label %if.then.i.us.us.7, label %if.end.i.us.us.7 - -if.then.i.us.us.7: ; preds = %pregion_for_entry.entry.i.us.us.7 - %add.i.us.us.7 = add nsw i32 %mul.i.us.7, %conv.i.us.us.7 - %idxprom.i.us.us.7 = sext i32 %add.i.us.us.7 to i64 - %arrayidx.i.us.us.7 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.7 - store float 0.000000e+00, float* %arrayidx.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.7 = shl i64 %add1.i.i.us.us.7, 32 - %122 = ashr exact i64 %sext.i.us.us.7, 32 - br label %for.body.i.us.us.7 - -for.body.i.us.us.7: ; preds = %for.body.i.us.us.7, %if.then.i.us.us.7 - %indvars.iv.next.i3.us.us.7 = phi i64 [ %indvars.iv.next.i.us.us.7, %for.body.i.us.us.7 ], [ 0, %if.then.i.us.us.7 ] - %123 = phi float [ %129, %for.body.i.us.us.7 ], [ 0.000000e+00, %if.then.i.us.us.7 ] - %124 = add nsw i64 %indvars.iv.next.i3.us.us.7, %121 - %arrayidx11.i.us.us.7 = getelementptr inbounds float, float* %0, i64 %124 - %125 = load float, float* %arrayidx11.i.us.us.7, align 4, !tbaa !12 - %126 = mul nsw i64 %indvars.iv.next.i3.us.us.7, %11 - %127 = add nsw i64 %126, %122 - %arrayidx15.i.us.us.7 = getelementptr inbounds float, float* %1, i64 %127 - %128 = load float, float* %arrayidx15.i.us.us.7, align 4, !tbaa !12 - %129 = tail call float @llvm.fmuladd.f32(float %125, float %128, float %123) #2 - store float %129, float* %arrayidx.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.7 = add nuw nsw i64 %indvars.iv.next.i3.us.us.7, 1 - %exitcond.not.i.us.us.7 = icmp eq i64 %indvars.iv.next.i.us.us.7, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.7, label %if.end.i.us.us.7.loopexit, label %for.body.i.us.us.7, !llvm.loop !19 - -if.end.i.us.us.7.loopexit: ; preds = %for.body.i.us.us.7 - br label %if.end.i.us.us.7 - -if.end.i.us.us.7: ; preds = %if.end.i.us.us.7.loopexit, %pregion_for_entry.entry.i.us.us.7 - %130 = or i64 %_local_id_x.0.us.us.7, 1 - %add1.i.i.us.us.7.1 = add nuw nsw i64 %130, %mul.i.i - %conv.i.us.us.7.1 = trunc i64 %add1.i.i.us.us.7.1 to i32 - %cmp4.i.us.us.7.1 = icmp slt i32 %conv.i.us.us.7.1, %4 - br i1 %cmp4.i.us.us.7.1, label %if.then.i.us.us.7.1, label %if.end.i.us.us.7.1 - -if.then.i.us.1: ; preds = %if.end.i.us - %add.i.us.1 = add nsw i32 %mul.i, %conv.i.us.1 - %idxprom.i.us.1 = sext i32 %add.i.us.1 to i64 - %arrayidx.i.us.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.1 - store float 0.000000e+00, float* %arrayidx.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.1 - -if.end.i.us.1: ; preds = %if.then.i.us.1, %if.end.i.us - br i1 %cmp4.i.us.2, label %if.then.i.us.2, label %if.end.i.us.2 - -if.then.i.us.2: ; preds = %if.end.i.us.1 - %add.i.us.2 = add nsw i32 %mul.i, %conv.i.us.2 - %idxprom.i.us.2 = sext i32 %add.i.us.2 to i64 - %arrayidx.i.us.2 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.2 - store float 0.000000e+00, float* %arrayidx.i.us.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.2 - -if.end.i.us.2: ; preds = %if.then.i.us.2, %if.end.i.us.1 - br i1 %cmp4.i.us.3, label %if.then.i.us.3, label %if.end.i.us.3 - -if.then.i.us.3: ; preds = %if.end.i.us.2 - %add.i.us.3 = add nsw i32 %mul.i, %conv.i.us.3 - %idxprom.i.us.3 = sext i32 %add.i.us.3 to i64 - %arrayidx.i.us.3 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.3 - store float 0.000000e+00, float* %arrayidx.i.us.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.3 - -if.end.i.us.3: ; preds = %if.then.i.us.3, %if.end.i.us.2 - br i1 %cmp4.i.us.4, label %if.then.i.us.4, label %if.end.i.us.4 - -if.then.i.us.4: ; preds = %if.end.i.us.3 - %add.i.us.4 = add nsw i32 %mul.i, %conv.i.us.4 - %idxprom.i.us.4 = sext i32 %add.i.us.4 to i64 - %arrayidx.i.us.4 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.4 - store float 0.000000e+00, float* %arrayidx.i.us.4, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.4 - -if.end.i.us.4: ; preds = %if.then.i.us.4, %if.end.i.us.3 - br i1 %cmp4.i.us.5, label %if.then.i.us.5, label %if.end.i.us.5 - -if.then.i.us.5: ; preds = %if.end.i.us.4 - %add.i.us.5 = add nsw i32 %mul.i, %conv.i.us.5 - %idxprom.i.us.5 = sext i32 %add.i.us.5 to i64 - %arrayidx.i.us.5 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.5 - store float 0.000000e+00, float* %arrayidx.i.us.5, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.5 - -if.end.i.us.5: ; preds = %if.then.i.us.5, %if.end.i.us.4 - br i1 %cmp4.i.us.6, label %if.then.i.us.6, label %if.end.i.us.6 - -if.then.i.us.6: ; preds = %if.end.i.us.5 - %add.i.us.6 = add nsw i32 %mul.i, %conv.i.us.6 - %idxprom.i.us.6 = sext i32 %add.i.us.6 to i64 - %arrayidx.i.us.6 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.6 - store float 0.000000e+00, float* %arrayidx.i.us.6, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.6 - -if.end.i.us.6: ; preds = %if.then.i.us.6, %if.end.i.us.5 - br i1 %cmp4.i.us.7, label %if.then.i.us.7, label %if.end.i.us.7 - -if.then.i.us.7: ; preds = %if.end.i.us.6 - %add.i.us.7 = add nsw i32 %mul.i, %conv.i.us.7 - %idxprom.i.us.7 = sext i32 %add.i.us.7 to i64 - %arrayidx.i.us.7 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.7 - store float 0.000000e+00, float* %arrayidx.i.us.7, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.7 - -if.end.i.us.7: ; preds = %if.then.i.us.7, %if.end.i.us.6 - br i1 %cmp4.i.us.8, label %if.then.i.us.8, label %if.end.i.us.8 - -if.then.i.us.8: ; preds = %if.end.i.us.7 - %add.i.us.8 = add nsw i32 %mul.i, %conv.i.us.8 - %idxprom.i.us.8 = sext i32 %add.i.us.8 to i64 - %arrayidx.i.us.8 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.8 - store float 0.000000e+00, float* %arrayidx.i.us.8, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.8 - -if.end.i.us.8: ; preds = %if.then.i.us.8, %if.end.i.us.7 - br i1 %cmp4.i.us.9, label %if.then.i.us.9, label %if.end.i.us.9 - -if.then.i.us.9: ; preds = %if.end.i.us.8 - %add.i.us.9 = add nsw i32 %mul.i, %conv.i.us.9 - %idxprom.i.us.9 = sext i32 %add.i.us.9 to i64 - %arrayidx.i.us.9 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.9 - store float 0.000000e+00, float* %arrayidx.i.us.9, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.9 - -if.end.i.us.9: ; preds = %if.then.i.us.9, %if.end.i.us.8 - br i1 %cmp4.i.us.10, label %if.then.i.us.10, label %if.end.i.us.10 - -if.then.i.us.10: ; preds = %if.end.i.us.9 - %add.i.us.10 = add nsw i32 %mul.i, %conv.i.us.10 - %idxprom.i.us.10 = sext i32 %add.i.us.10 to i64 - %arrayidx.i.us.10 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.10 - store float 0.000000e+00, float* %arrayidx.i.us.10, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.10 - -if.end.i.us.10: ; preds = %if.then.i.us.10, %if.end.i.us.9 - br i1 %cmp4.i.us.11, label %if.then.i.us.11, label %if.end.i.us.11 - -if.then.i.us.11: ; preds = %if.end.i.us.10 - %add.i.us.11 = add nsw i32 %mul.i, %conv.i.us.11 - %idxprom.i.us.11 = sext i32 %add.i.us.11 to i64 - %arrayidx.i.us.11 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.11 - store float 0.000000e+00, float* %arrayidx.i.us.11, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.11 - -if.end.i.us.11: ; preds = %if.then.i.us.11, %if.end.i.us.10 - br i1 %cmp4.i.us.12, label %if.then.i.us.12, label %if.end.i.us.12 - -if.then.i.us.12: ; preds = %if.end.i.us.11 - %add.i.us.12 = add nsw i32 %mul.i, %conv.i.us.12 - %idxprom.i.us.12 = sext i32 %add.i.us.12 to i64 - %arrayidx.i.us.12 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.12 - store float 0.000000e+00, float* %arrayidx.i.us.12, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.12 - -if.end.i.us.12: ; preds = %if.then.i.us.12, %if.end.i.us.11 - br i1 %cmp4.i.us.13, label %if.then.i.us.13, label %if.end.i.us.13 - -if.then.i.us.13: ; preds = %if.end.i.us.12 - %add.i.us.13 = add nsw i32 %mul.i, %conv.i.us.13 - %idxprom.i.us.13 = sext i32 %add.i.us.13 to i64 - %arrayidx.i.us.13 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.13 - store float 0.000000e+00, float* %arrayidx.i.us.13, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.13 - -if.end.i.us.13: ; preds = %if.then.i.us.13, %if.end.i.us.12 - br i1 %cmp4.i.us.14, label %if.then.i.us.14, label %if.end.i.us.14 - -if.then.i.us.14: ; preds = %if.end.i.us.13 - %add.i.us.14 = add nsw i32 %mul.i, %conv.i.us.14 - %idxprom.i.us.14 = sext i32 %add.i.us.14 to i64 - %arrayidx.i.us.14 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.14 - store float 0.000000e+00, float* %arrayidx.i.us.14, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.14 - -if.end.i.us.14: ; preds = %if.then.i.us.14, %if.end.i.us.13 - br i1 %cmp4.i.us.15, label %if.then.i.us.15, label %if.end.i.us.15 - -if.then.i.us.15: ; preds = %if.end.i.us.14 - %add.i.us.15 = add nsw i32 %mul.i, %conv.i.us.15 - %idxprom.i.us.15 = sext i32 %add.i.us.15 to i64 - %arrayidx.i.us.15 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.15 - store float 0.000000e+00, float* %arrayidx.i.us.15, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.15 - -if.end.i.us.15: ; preds = %if.then.i.us.15, %if.end.i.us.14 - br i1 %cmp4.i.us.16, label %if.then.i.us.16, label %if.end.i.us.16 - -if.then.i.us.16: ; preds = %if.end.i.us.15 - %add.i.us.16 = add nsw i32 %mul.i, %conv.i.us.16 - %idxprom.i.us.16 = sext i32 %add.i.us.16 to i64 - %arrayidx.i.us.16 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.16 - store float 0.000000e+00, float* %arrayidx.i.us.16, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.16 - -if.end.i.us.16: ; preds = %if.then.i.us.16, %if.end.i.us.15 - br i1 %cmp4.i.us.17, label %if.then.i.us.17, label %if.end.i.us.17 - -if.then.i.us.17: ; preds = %if.end.i.us.16 - %add.i.us.17 = add nsw i32 %mul.i, %conv.i.us.17 - %idxprom.i.us.17 = sext i32 %add.i.us.17 to i64 - %arrayidx.i.us.17 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.17 - store float 0.000000e+00, float* %arrayidx.i.us.17, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.17 - -if.end.i.us.17: ; preds = %if.then.i.us.17, %if.end.i.us.16 - br i1 %cmp4.i.us.18, label %if.then.i.us.18, label %if.end.i.us.18 - -if.then.i.us.18: ; preds = %if.end.i.us.17 - %add.i.us.18 = add nsw i32 %mul.i, %conv.i.us.18 - %idxprom.i.us.18 = sext i32 %add.i.us.18 to i64 - %arrayidx.i.us.18 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.18 - store float 0.000000e+00, float* %arrayidx.i.us.18, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.18 - -if.end.i.us.18: ; preds = %if.then.i.us.18, %if.end.i.us.17 - br i1 %cmp4.i.us.19, label %if.then.i.us.19, label %if.end.i.us.19 - -if.then.i.us.19: ; preds = %if.end.i.us.18 - %add.i.us.19 = add nsw i32 %mul.i, %conv.i.us.19 - %idxprom.i.us.19 = sext i32 %add.i.us.19 to i64 - %arrayidx.i.us.19 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.19 - store float 0.000000e+00, float* %arrayidx.i.us.19, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.19 - -if.end.i.us.19: ; preds = %if.then.i.us.19, %if.end.i.us.18 - br i1 %cmp4.i.us.20, label %if.then.i.us.20, label %if.end.i.us.20 - -if.then.i.us.20: ; preds = %if.end.i.us.19 - %add.i.us.20 = add nsw i32 %mul.i, %conv.i.us.20 - %idxprom.i.us.20 = sext i32 %add.i.us.20 to i64 - %arrayidx.i.us.20 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.20 - store float 0.000000e+00, float* %arrayidx.i.us.20, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.20 - -if.end.i.us.20: ; preds = %if.then.i.us.20, %if.end.i.us.19 - br i1 %cmp4.i.us.21, label %if.then.i.us.21, label %if.end.i.us.21 - -if.then.i.us.21: ; preds = %if.end.i.us.20 - %add.i.us.21 = add nsw i32 %mul.i, %conv.i.us.21 - %idxprom.i.us.21 = sext i32 %add.i.us.21 to i64 - %arrayidx.i.us.21 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.21 - store float 0.000000e+00, float* %arrayidx.i.us.21, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.21 - -if.end.i.us.21: ; preds = %if.then.i.us.21, %if.end.i.us.20 - br i1 %cmp4.i.us.22, label %if.then.i.us.22, label %if.end.i.us.22 - -if.then.i.us.22: ; preds = %if.end.i.us.21 - %add.i.us.22 = add nsw i32 %mul.i, %conv.i.us.22 - %idxprom.i.us.22 = sext i32 %add.i.us.22 to i64 - %arrayidx.i.us.22 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.22 - store float 0.000000e+00, float* %arrayidx.i.us.22, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.22 - -if.end.i.us.22: ; preds = %if.then.i.us.22, %if.end.i.us.21 - br i1 %cmp4.i.us.23, label %if.then.i.us.23, label %if.end.i.us.23 - -if.then.i.us.23: ; preds = %if.end.i.us.22 - %add.i.us.23 = add nsw i32 %mul.i, %conv.i.us.23 - %idxprom.i.us.23 = sext i32 %add.i.us.23 to i64 - %arrayidx.i.us.23 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.23 - store float 0.000000e+00, float* %arrayidx.i.us.23, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.23 - -if.end.i.us.23: ; preds = %if.then.i.us.23, %if.end.i.us.22 - br i1 %cmp4.i.us.24, label %if.then.i.us.24, label %if.end.i.us.24 - -if.then.i.us.24: ; preds = %if.end.i.us.23 - %add.i.us.24 = add nsw i32 %mul.i, %conv.i.us.24 - %idxprom.i.us.24 = sext i32 %add.i.us.24 to i64 - %arrayidx.i.us.24 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.24 - store float 0.000000e+00, float* %arrayidx.i.us.24, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.24 - -if.end.i.us.24: ; preds = %if.then.i.us.24, %if.end.i.us.23 - br i1 %cmp4.i.us.25, label %if.then.i.us.25, label %if.end.i.us.25 - -if.then.i.us.25: ; preds = %if.end.i.us.24 - %add.i.us.25 = add nsw i32 %mul.i, %conv.i.us.25 - %idxprom.i.us.25 = sext i32 %add.i.us.25 to i64 - %arrayidx.i.us.25 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.25 - store float 0.000000e+00, float* %arrayidx.i.us.25, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.25 - -if.end.i.us.25: ; preds = %if.then.i.us.25, %if.end.i.us.24 - br i1 %cmp4.i.us.26, label %if.then.i.us.26, label %if.end.i.us.26 - -if.then.i.us.26: ; preds = %if.end.i.us.25 - %add.i.us.26 = add nsw i32 %mul.i, %conv.i.us.26 - %idxprom.i.us.26 = sext i32 %add.i.us.26 to i64 - %arrayidx.i.us.26 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.26 - store float 0.000000e+00, float* %arrayidx.i.us.26, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.26 - -if.end.i.us.26: ; preds = %if.then.i.us.26, %if.end.i.us.25 - br i1 %cmp4.i.us.27, label %if.then.i.us.27, label %if.end.i.us.27 - -if.then.i.us.27: ; preds = %if.end.i.us.26 - %add.i.us.27 = add nsw i32 %mul.i, %conv.i.us.27 - %idxprom.i.us.27 = sext i32 %add.i.us.27 to i64 - %arrayidx.i.us.27 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.27 - store float 0.000000e+00, float* %arrayidx.i.us.27, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.27 - -if.end.i.us.27: ; preds = %if.then.i.us.27, %if.end.i.us.26 - br i1 %cmp4.i.us.28, label %if.then.i.us.28, label %if.end.i.us.28 - -if.then.i.us.28: ; preds = %if.end.i.us.27 - %add.i.us.28 = add nsw i32 %mul.i, %conv.i.us.28 - %idxprom.i.us.28 = sext i32 %add.i.us.28 to i64 - %arrayidx.i.us.28 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.28 - store float 0.000000e+00, float* %arrayidx.i.us.28, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.28 - -if.end.i.us.28: ; preds = %if.then.i.us.28, %if.end.i.us.27 - br i1 %cmp4.i.us.29, label %if.then.i.us.29, label %if.end.i.us.29 - -if.then.i.us.29: ; preds = %if.end.i.us.28 - %add.i.us.29 = add nsw i32 %mul.i, %conv.i.us.29 - %idxprom.i.us.29 = sext i32 %add.i.us.29 to i64 - %arrayidx.i.us.29 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.29 - store float 0.000000e+00, float* %arrayidx.i.us.29, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.29 - -if.end.i.us.29: ; preds = %if.then.i.us.29, %if.end.i.us.28 - br i1 %cmp4.i.us.30, label %if.then.i.us.30, label %if.end.i.us.30 - -if.then.i.us.30: ; preds = %if.end.i.us.29 - %add.i.us.30 = add nsw i32 %mul.i, %conv.i.us.30 - %idxprom.i.us.30 = sext i32 %add.i.us.30 to i64 - %arrayidx.i.us.30 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.30 - store float 0.000000e+00, float* %arrayidx.i.us.30, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.30 - -if.end.i.us.30: ; preds = %if.then.i.us.30, %if.end.i.us.29 - br i1 %cmp4.i.us.31, label %if.then.i.us.31, label %pregion_for_end.i - -if.then.i.us.31: ; preds = %if.end.i.us.30 - %add.i.us.31 = add nsw i32 %mul.i, %conv.i.us.31 - %idxprom.i.us.31 = sext i32 %add.i.us.31 to i64 - %arrayidx.i.us.31 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.31 - store float 0.000000e+00, float* %arrayidx.i.us.31, align 4, !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i - -if.then.i.us.us.7.1: ; preds = %if.end.i.us.us.7 - %add.i.us.us.7.1 = add nsw i32 %mul.i.us.7, %conv.i.us.us.7.1 - %idxprom.i.us.us.7.1 = sext i32 %add.i.us.us.7.1 to i64 - %arrayidx.i.us.us.7.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.7.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.7.1 = shl i64 %add1.i.i.us.us.7.1, 32 - %131 = ashr exact i64 %sext.i.us.us.7.1, 32 - br label %for.body.i.us.us.7.1 - -for.body.i.us.us.7.1: ; preds = %for.body.i.us.us.7.1, %if.then.i.us.us.7.1 - %indvars.iv.next.i3.us.us.7.1 = phi i64 [ %indvars.iv.next.i.us.us.7.1, %for.body.i.us.us.7.1 ], [ 0, %if.then.i.us.us.7.1 ] - %132 = phi float [ %138, %for.body.i.us.us.7.1 ], [ 0.000000e+00, %if.then.i.us.us.7.1 ] - %133 = add nsw i64 %indvars.iv.next.i3.us.us.7.1, %121 - %arrayidx11.i.us.us.7.1 = getelementptr inbounds float, float* %0, i64 %133 - %134 = load float, float* %arrayidx11.i.us.us.7.1, align 4, !tbaa !12 - %135 = mul nsw i64 %indvars.iv.next.i3.us.us.7.1, %11 - %136 = add nsw i64 %135, %131 - %arrayidx15.i.us.us.7.1 = getelementptr inbounds float, float* %1, i64 %136 - %137 = load float, float* %arrayidx15.i.us.us.7.1, align 4, !tbaa !12 - %138 = tail call float @llvm.fmuladd.f32(float %134, float %137, float %132) #2 - store float %138, float* %arrayidx.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.7.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.7.1, 1 - %exitcond.not.i.us.us.7.1 = icmp eq i64 %indvars.iv.next.i.us.us.7.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.7.1, label %if.end.i.us.us.7.1.loopexit, label %for.body.i.us.us.7.1, !llvm.loop !19 - -if.end.i.us.us.7.1.loopexit: ; preds = %for.body.i.us.us.7.1 - br label %if.end.i.us.us.7.1 - -if.end.i.us.us.7.1: ; preds = %if.end.i.us.us.7.1.loopexit, %if.end.i.us.us.7 - %139 = add nuw nsw i64 %_local_id_x.0.us.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %139, 32 - br i1 %exitcond.7.not.1, label %mm3_kernel2.exit.loopexit, label %pregion_for_entry.entry.i.us.us.7, !llvm.loop !23 - -if.then.i.us.us.6.1: ; preds = %if.end.i.us.us.6 - %add.i.us.us.6.1 = add nsw i32 %mul.i.us.6, %conv.i.us.us.6.1 - %idxprom.i.us.us.6.1 = sext i32 %add.i.us.us.6.1 to i64 - %arrayidx.i.us.us.6.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.6.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.6.1 = shl i64 %add1.i.i.us.us.6.1, 32 - %140 = ashr exact i64 %sext.i.us.us.6.1, 32 - br label %for.body.i.us.us.6.1 - -for.body.i.us.us.6.1: ; preds = %for.body.i.us.us.6.1, %if.then.i.us.us.6.1 - %indvars.iv.next.i3.us.us.6.1 = phi i64 [ %indvars.iv.next.i.us.us.6.1, %for.body.i.us.us.6.1 ], [ 0, %if.then.i.us.us.6.1 ] - %141 = phi float [ %147, %for.body.i.us.us.6.1 ], [ 0.000000e+00, %if.then.i.us.us.6.1 ] - %142 = add nsw i64 %indvars.iv.next.i3.us.us.6.1, %110 - %arrayidx11.i.us.us.6.1 = getelementptr inbounds float, float* %0, i64 %142 - %143 = load float, float* %arrayidx11.i.us.us.6.1, align 4, !tbaa !12 - %144 = mul nsw i64 %indvars.iv.next.i3.us.us.6.1, %11 - %145 = add nsw i64 %144, %140 - %arrayidx15.i.us.us.6.1 = getelementptr inbounds float, float* %1, i64 %145 - %146 = load float, float* %arrayidx15.i.us.us.6.1, align 4, !tbaa !12 - %147 = tail call float @llvm.fmuladd.f32(float %143, float %146, float %141) #2 - store float %147, float* %arrayidx.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.6.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.6.1, 1 - %exitcond.not.i.us.us.6.1 = icmp eq i64 %indvars.iv.next.i.us.us.6.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.6.1, label %if.end.i.us.us.6.1.loopexit, label %for.body.i.us.us.6.1, !llvm.loop !19 - -if.end.i.us.us.6.1.loopexit: ; preds = %for.body.i.us.us.6.1 - br label %if.end.i.us.us.6.1 - -if.end.i.us.us.6.1: ; preds = %if.end.i.us.us.6.1.loopexit, %if.end.i.us.us.6 - %148 = add nuw nsw i64 %_local_id_x.0.us.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %148, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.us.6.loopexit, label %pregion_for_entry.entry.i.us.us.6, !llvm.loop !23 - -if.then.i.us.us.5.1: ; preds = %if.end.i.us.us.5 - %add.i.us.us.5.1 = add nsw i32 %mul.i.us.5, %conv.i.us.us.5.1 - %idxprom.i.us.us.5.1 = sext i32 %add.i.us.us.5.1 to i64 - %arrayidx.i.us.us.5.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.5.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.5.1 = shl i64 %add1.i.i.us.us.5.1, 32 - %149 = ashr exact i64 %sext.i.us.us.5.1, 32 - br label %for.body.i.us.us.5.1 - -for.body.i.us.us.5.1: ; preds = %for.body.i.us.us.5.1, %if.then.i.us.us.5.1 - %indvars.iv.next.i3.us.us.5.1 = phi i64 [ %indvars.iv.next.i.us.us.5.1, %for.body.i.us.us.5.1 ], [ 0, %if.then.i.us.us.5.1 ] - %150 = phi float [ %156, %for.body.i.us.us.5.1 ], [ 0.000000e+00, %if.then.i.us.us.5.1 ] - %151 = add nsw i64 %indvars.iv.next.i3.us.us.5.1, %99 - %arrayidx11.i.us.us.5.1 = getelementptr inbounds float, float* %0, i64 %151 - %152 = load float, float* %arrayidx11.i.us.us.5.1, align 4, !tbaa !12 - %153 = mul nsw i64 %indvars.iv.next.i3.us.us.5.1, %11 - %154 = add nsw i64 %153, %149 - %arrayidx15.i.us.us.5.1 = getelementptr inbounds float, float* %1, i64 %154 - %155 = load float, float* %arrayidx15.i.us.us.5.1, align 4, !tbaa !12 - %156 = tail call float @llvm.fmuladd.f32(float %152, float %155, float %150) #2 - store float %156, float* %arrayidx.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.5.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.5.1, 1 - %exitcond.not.i.us.us.5.1 = icmp eq i64 %indvars.iv.next.i.us.us.5.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.5.1, label %if.end.i.us.us.5.1.loopexit, label %for.body.i.us.us.5.1, !llvm.loop !19 - -if.end.i.us.us.5.1.loopexit: ; preds = %for.body.i.us.us.5.1 - br label %if.end.i.us.us.5.1 - -if.end.i.us.us.5.1: ; preds = %if.end.i.us.us.5.1.loopexit, %if.end.i.us.us.5 - %157 = add nuw nsw i64 %_local_id_x.0.us.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %157, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.us.5.loopexit, label %pregion_for_entry.entry.i.us.us.5, !llvm.loop !23 - -if.then.i.us.us.4.1: ; preds = %if.end.i.us.us.4 - %add.i.us.us.4.1 = add nsw i32 %mul.i.us.4, %conv.i.us.us.4.1 - %idxprom.i.us.us.4.1 = sext i32 %add.i.us.us.4.1 to i64 - %arrayidx.i.us.us.4.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.4.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.4.1 = shl i64 %add1.i.i.us.us.4.1, 32 - %158 = ashr exact i64 %sext.i.us.us.4.1, 32 - br label %for.body.i.us.us.4.1 - -for.body.i.us.us.4.1: ; preds = %for.body.i.us.us.4.1, %if.then.i.us.us.4.1 - %indvars.iv.next.i3.us.us.4.1 = phi i64 [ %indvars.iv.next.i.us.us.4.1, %for.body.i.us.us.4.1 ], [ 0, %if.then.i.us.us.4.1 ] - %159 = phi float [ %165, %for.body.i.us.us.4.1 ], [ 0.000000e+00, %if.then.i.us.us.4.1 ] - %160 = add nsw i64 %indvars.iv.next.i3.us.us.4.1, %88 - %arrayidx11.i.us.us.4.1 = getelementptr inbounds float, float* %0, i64 %160 - %161 = load float, float* %arrayidx11.i.us.us.4.1, align 4, !tbaa !12 - %162 = mul nsw i64 %indvars.iv.next.i3.us.us.4.1, %11 - %163 = add nsw i64 %162, %158 - %arrayidx15.i.us.us.4.1 = getelementptr inbounds float, float* %1, i64 %163 - %164 = load float, float* %arrayidx15.i.us.us.4.1, align 4, !tbaa !12 - %165 = tail call float @llvm.fmuladd.f32(float %161, float %164, float %159) #2 - store float %165, float* %arrayidx.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.4.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.4.1, 1 - %exitcond.not.i.us.us.4.1 = icmp eq i64 %indvars.iv.next.i.us.us.4.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.4.1, label %if.end.i.us.us.4.1.loopexit, label %for.body.i.us.us.4.1, !llvm.loop !19 - -if.end.i.us.us.4.1.loopexit: ; preds = %for.body.i.us.us.4.1 - br label %if.end.i.us.us.4.1 - -if.end.i.us.us.4.1: ; preds = %if.end.i.us.us.4.1.loopexit, %if.end.i.us.us.4 - %166 = add nuw nsw i64 %_local_id_x.0.us.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %166, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.us.4.loopexit, label %pregion_for_entry.entry.i.us.us.4, !llvm.loop !23 - -if.then.i.us.us.3.1: ; preds = %if.end.i.us.us.3 - %add.i.us.us.3.1 = add nsw i32 %mul.i.us.3, %conv.i.us.us.3.1 - %idxprom.i.us.us.3.1 = sext i32 %add.i.us.us.3.1 to i64 - %arrayidx.i.us.us.3.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.3.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.3.1 = shl i64 %add1.i.i.us.us.3.1, 32 - %167 = ashr exact i64 %sext.i.us.us.3.1, 32 - br label %for.body.i.us.us.3.1 - -for.body.i.us.us.3.1: ; preds = %for.body.i.us.us.3.1, %if.then.i.us.us.3.1 - %indvars.iv.next.i3.us.us.3.1 = phi i64 [ %indvars.iv.next.i.us.us.3.1, %for.body.i.us.us.3.1 ], [ 0, %if.then.i.us.us.3.1 ] - %168 = phi float [ %174, %for.body.i.us.us.3.1 ], [ 0.000000e+00, %if.then.i.us.us.3.1 ] - %169 = add nsw i64 %indvars.iv.next.i3.us.us.3.1, %77 - %arrayidx11.i.us.us.3.1 = getelementptr inbounds float, float* %0, i64 %169 - %170 = load float, float* %arrayidx11.i.us.us.3.1, align 4, !tbaa !12 - %171 = mul nsw i64 %indvars.iv.next.i3.us.us.3.1, %11 - %172 = add nsw i64 %171, %167 - %arrayidx15.i.us.us.3.1 = getelementptr inbounds float, float* %1, i64 %172 - %173 = load float, float* %arrayidx15.i.us.us.3.1, align 4, !tbaa !12 - %174 = tail call float @llvm.fmuladd.f32(float %170, float %173, float %168) #2 - store float %174, float* %arrayidx.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.3.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.3.1, 1 - %exitcond.not.i.us.us.3.1 = icmp eq i64 %indvars.iv.next.i.us.us.3.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.3.1, label %if.end.i.us.us.3.1.loopexit, label %for.body.i.us.us.3.1, !llvm.loop !19 - -if.end.i.us.us.3.1.loopexit: ; preds = %for.body.i.us.us.3.1 - br label %if.end.i.us.us.3.1 - -if.end.i.us.us.3.1: ; preds = %if.end.i.us.us.3.1.loopexit, %if.end.i.us.us.3 - %175 = add nuw nsw i64 %_local_id_x.0.us.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %175, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.us.3.loopexit, label %pregion_for_entry.entry.i.us.us.3, !llvm.loop !23 - -if.then.i.us.us.2.1: ; preds = %if.end.i.us.us.2 - %add.i.us.us.2.1 = add nsw i32 %mul.i.us.2, %conv.i.us.us.2.1 - %idxprom.i.us.us.2.1 = sext i32 %add.i.us.us.2.1 to i64 - %arrayidx.i.us.us.2.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.2.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.2.1 = shl i64 %add1.i.i.us.us.2.1, 32 - %176 = ashr exact i64 %sext.i.us.us.2.1, 32 - br label %for.body.i.us.us.2.1 - -for.body.i.us.us.2.1: ; preds = %for.body.i.us.us.2.1, %if.then.i.us.us.2.1 - %indvars.iv.next.i3.us.us.2.1 = phi i64 [ %indvars.iv.next.i.us.us.2.1, %for.body.i.us.us.2.1 ], [ 0, %if.then.i.us.us.2.1 ] - %177 = phi float [ %183, %for.body.i.us.us.2.1 ], [ 0.000000e+00, %if.then.i.us.us.2.1 ] - %178 = add nsw i64 %indvars.iv.next.i3.us.us.2.1, %66 - %arrayidx11.i.us.us.2.1 = getelementptr inbounds float, float* %0, i64 %178 - %179 = load float, float* %arrayidx11.i.us.us.2.1, align 4, !tbaa !12 - %180 = mul nsw i64 %indvars.iv.next.i3.us.us.2.1, %11 - %181 = add nsw i64 %180, %176 - %arrayidx15.i.us.us.2.1 = getelementptr inbounds float, float* %1, i64 %181 - %182 = load float, float* %arrayidx15.i.us.us.2.1, align 4, !tbaa !12 - %183 = tail call float @llvm.fmuladd.f32(float %179, float %182, float %177) #2 - store float %183, float* %arrayidx.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.2.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.2.1, 1 - %exitcond.not.i.us.us.2.1 = icmp eq i64 %indvars.iv.next.i.us.us.2.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.2.1, label %if.end.i.us.us.2.1.loopexit, label %for.body.i.us.us.2.1, !llvm.loop !19 - -if.end.i.us.us.2.1.loopexit: ; preds = %for.body.i.us.us.2.1 - br label %if.end.i.us.us.2.1 - -if.end.i.us.us.2.1: ; preds = %if.end.i.us.us.2.1.loopexit, %if.end.i.us.us.2 - %184 = add nuw nsw i64 %_local_id_x.0.us.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %184, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.us.2.loopexit, label %pregion_for_entry.entry.i.us.us.2, !llvm.loop !23 - -if.then.i.us.us.1.1: ; preds = %if.end.i.us.us.1 - %add.i.us.us.1.1 = add nsw i32 %mul.i.us.1, %conv.i.us.us.1.1 - %idxprom.i.us.us.1.1 = sext i32 %add.i.us.us.1.1 to i64 - %arrayidx.i.us.us.1.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.1.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.1.1 = shl i64 %add1.i.i.us.us.1.1, 32 - %185 = ashr exact i64 %sext.i.us.us.1.1, 32 - br label %for.body.i.us.us.1.1 - -for.body.i.us.us.1.1: ; preds = %for.body.i.us.us.1.1, %if.then.i.us.us.1.1 - %indvars.iv.next.i3.us.us.1.1 = phi i64 [ %indvars.iv.next.i.us.us.1.1, %for.body.i.us.us.1.1 ], [ 0, %if.then.i.us.us.1.1 ] - %186 = phi float [ %192, %for.body.i.us.us.1.1 ], [ 0.000000e+00, %if.then.i.us.us.1.1 ] - %187 = add nsw i64 %indvars.iv.next.i3.us.us.1.1, %45 - %arrayidx11.i.us.us.1.1 = getelementptr inbounds float, float* %0, i64 %187 - %188 = load float, float* %arrayidx11.i.us.us.1.1, align 4, !tbaa !12 - %189 = mul nsw i64 %indvars.iv.next.i3.us.us.1.1, %11 - %190 = add nsw i64 %189, %185 - %arrayidx15.i.us.us.1.1 = getelementptr inbounds float, float* %1, i64 %190 - %191 = load float, float* %arrayidx15.i.us.us.1.1, align 4, !tbaa !12 - %192 = tail call float @llvm.fmuladd.f32(float %188, float %191, float %186) #2 - store float %192, float* %arrayidx.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.1.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.1.1, 1 - %exitcond.not.i.us.us.1.1 = icmp eq i64 %indvars.iv.next.i.us.us.1.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.1.1, label %if.end.i.us.us.1.1.loopexit, label %for.body.i.us.us.1.1, !llvm.loop !19 - -if.end.i.us.us.1.1.loopexit: ; preds = %for.body.i.us.us.1.1 - br label %if.end.i.us.us.1.1 - -if.end.i.us.us.1.1: ; preds = %if.end.i.us.us.1.1.loopexit, %if.end.i.us.us.1 - %193 = add nuw nsw i64 %_local_id_x.0.us.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %193, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.us.1.loopexit, label %pregion_for_entry.entry.i.us.us.1, !llvm.loop !23 - -if.then.i.us.us.146: ; preds = %if.end.i.us.us - %add.i.us.us.142 = add nsw i32 %mul.i.us, %conv.i.us.us.139 - %idxprom.i.us.us.143 = sext i32 %add.i.us.us.142 to i64 - %arrayidx.i.us.us.144 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.143 - store float 0.000000e+00, float* %arrayidx.i.us.us.144, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.145 = shl i64 %add1.i.i.us.us.138, 32 - %194 = ashr exact i64 %sext.i.us.us.145, 32 - br label %for.body.i.us.us.152 - -for.body.i.us.us.152: ; preds = %for.body.i.us.us.152, %if.then.i.us.us.146 - %indvars.iv.next.i3.us.us.147 = phi i64 [ %indvars.iv.next.i.us.us.150, %for.body.i.us.us.152 ], [ 0, %if.then.i.us.us.146 ] - %195 = phi float [ %201, %for.body.i.us.us.152 ], [ 0.000000e+00, %if.then.i.us.us.146 ] - %196 = add nsw i64 %indvars.iv.next.i3.us.us.147, %43 - %arrayidx11.i.us.us.148 = getelementptr inbounds float, float* %0, i64 %196 - %197 = load float, float* %arrayidx11.i.us.us.148, align 4, !tbaa !12 - %198 = mul nsw i64 %indvars.iv.next.i3.us.us.147, %11 - %199 = add nsw i64 %198, %194 - %arrayidx15.i.us.us.149 = getelementptr inbounds float, float* %1, i64 %199 - %200 = load float, float* %arrayidx15.i.us.us.149, align 4, !tbaa !12 - %201 = tail call float @llvm.fmuladd.f32(float %197, float %200, float %195) #2 - store float %201, float* %arrayidx.i.us.us.144, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.150 = add nuw nsw i64 %indvars.iv.next.i3.us.us.147, 1 - %exitcond.not.i.us.us.151 = icmp eq i64 %indvars.iv.next.i.us.us.150, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.151, label %if.end.i.us.us.153.loopexit, label %for.body.i.us.us.152, !llvm.loop !19 - -if.end.i.us.us.153.loopexit: ; preds = %for.body.i.us.us.152 - br label %if.end.i.us.us.153 - -if.end.i.us.us.153: ; preds = %if.end.i.us.us.153.loopexit, %if.end.i.us.us - %202 = add nuw nsw i64 %_local_id_x.0.us.us, 2 - %exitcond.not.1 = icmp eq i64 %202, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.us.loopexit, label %pregion_for_entry.entry.i.us.us, !llvm.loop !23 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_mm3_kernel2_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to float*** - %15 = load float**, float*** %14, align 8 - %16 = load float*, float** %15, align 8 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %21 = getelementptr i8*, i8** %0, i64 4 - %22 = bitcast i8** %21 to i32** - %23 = load i32*, i32** %22, align 8 - %24 = load i32, i32* %23, align 4 - %25 = getelementptr i8*, i8** %0, i64 5 - %26 = bitcast i8** %25 to i32** - %27 = load i32*, i32** %26, align 8 - %28 = load i32, i32* %27, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %cmp638.i.i = icmp sgt i32 %28, 0 - %29 = sext i32 %24 to i64 - %wide.trip.count.i.i = zext i32 %28 to i64 - br i1 %cmp638.i.i, label %pregion_for_entry.pregion_for_init.i.i.us.preheader, label %pregion_for_entry.pregion_for_init.i.i.preheader - -pregion_for_entry.pregion_for_init.i.i.preheader: ; preds = %5 - %conv.i.i.us = trunc i64 %mul.i.i.i to i32 - %cmp4.i.i.us = icmp sgt i32 %24, %conv.i.i.us - %30 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.1 = or i32 %30, 1 - %cmp4.i.i.us.1 = icmp sgt i32 %24, %conv.i.i.us.1 - %31 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.2 = or i32 %31, 2 - %cmp4.i.i.us.2 = icmp sgt i32 %24, %conv.i.i.us.2 - %32 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.3 = or i32 %32, 3 - %cmp4.i.i.us.3 = icmp sgt i32 %24, %conv.i.i.us.3 - %33 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.4 = or i32 %33, 4 - %cmp4.i.i.us.4 = icmp sgt i32 %24, %conv.i.i.us.4 - %34 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.5 = or i32 %34, 5 - %cmp4.i.i.us.5 = icmp sgt i32 %24, %conv.i.i.us.5 - %35 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.6 = or i32 %35, 6 - %cmp4.i.i.us.6 = icmp sgt i32 %24, %conv.i.i.us.6 - %36 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.7 = or i32 %36, 7 - %cmp4.i.i.us.7 = icmp sgt i32 %24, %conv.i.i.us.7 - %37 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.8 = or i32 %37, 8 - %cmp4.i.i.us.8 = icmp sgt i32 %24, %conv.i.i.us.8 - %38 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.9 = or i32 %38, 9 - %cmp4.i.i.us.9 = icmp sgt i32 %24, %conv.i.i.us.9 - %39 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.10 = or i32 %39, 10 - %cmp4.i.i.us.10 = icmp sgt i32 %24, %conv.i.i.us.10 - %40 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.11 = or i32 %40, 11 - %cmp4.i.i.us.11 = icmp sgt i32 %24, %conv.i.i.us.11 - %41 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.12 = or i32 %41, 12 - %cmp4.i.i.us.12 = icmp sgt i32 %24, %conv.i.i.us.12 - %42 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.13 = or i32 %42, 13 - %cmp4.i.i.us.13 = icmp sgt i32 %24, %conv.i.i.us.13 - %43 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.14 = or i32 %43, 14 - %cmp4.i.i.us.14 = icmp sgt i32 %24, %conv.i.i.us.14 - %44 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.15 = or i32 %44, 15 - %cmp4.i.i.us.15 = icmp sgt i32 %24, %conv.i.i.us.15 - %45 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.16 = or i32 %45, 16 - %cmp4.i.i.us.16 = icmp sgt i32 %24, %conv.i.i.us.16 - %46 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.17 = or i32 %46, 17 - %cmp4.i.i.us.17 = icmp sgt i32 %24, %conv.i.i.us.17 - %47 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.18 = or i32 %47, 18 - %cmp4.i.i.us.18 = icmp sgt i32 %24, %conv.i.i.us.18 - %48 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.19 = or i32 %48, 19 - %cmp4.i.i.us.19 = icmp sgt i32 %24, %conv.i.i.us.19 - %49 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.20 = or i32 %49, 20 - %cmp4.i.i.us.20 = icmp sgt i32 %24, %conv.i.i.us.20 - %50 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.21 = or i32 %50, 21 - %cmp4.i.i.us.21 = icmp sgt i32 %24, %conv.i.i.us.21 - %51 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.22 = or i32 %51, 22 - %cmp4.i.i.us.22 = icmp sgt i32 %24, %conv.i.i.us.22 - %52 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.23 = or i32 %52, 23 - %cmp4.i.i.us.23 = icmp sgt i32 %24, %conv.i.i.us.23 - %53 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.24 = or i32 %53, 24 - %cmp4.i.i.us.24 = icmp sgt i32 %24, %conv.i.i.us.24 - %54 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.25 = or i32 %54, 25 - %cmp4.i.i.us.25 = icmp sgt i32 %24, %conv.i.i.us.25 - %55 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.26 = or i32 %55, 26 - %cmp4.i.i.us.26 = icmp sgt i32 %24, %conv.i.i.us.26 - %56 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.27 = or i32 %56, 27 - %cmp4.i.i.us.27 = icmp sgt i32 %24, %conv.i.i.us.27 - %57 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.28 = or i32 %57, 28 - %cmp4.i.i.us.28 = icmp sgt i32 %24, %conv.i.i.us.28 - %58 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.29 = or i32 %58, 29 - %cmp4.i.i.us.29 = icmp sgt i32 %24, %conv.i.i.us.29 - %59 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.30 = or i32 %59, 30 - %cmp4.i.i.us.30 = icmp sgt i32 %24, %conv.i.i.us.30 - %60 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.31 = or i32 %60, 31 - %cmp4.i.i.us.31 = icmp sgt i32 %24, %conv.i.i.us.31 - br label %pregion_for_entry.pregion_for_init.i.i - -pregion_for_entry.pregion_for_init.i.i.us.preheader: ; preds = %5 - %conv2.i.i.us = trunc i64 %mul3.i.i.i to i32 - %cmp.i.i.us = icmp sgt i32 %20, %conv2.i.i.us - %mul.i.i.us = mul nsw i32 %24, %conv2.i.i.us - %mul8.i.i.us = mul nsw i32 %28, %conv2.i.i.us - %61 = sext i32 %mul8.i.i.us to i64 - br i1 %cmp.i.i.us, label %pregion_for_entry.entry.i.i.us.us.preheader, label %pregion_for_end.i.i.us - -pregion_for_entry.entry.i.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i.us.preheader - br label %pregion_for_entry.entry.i.i.us.us - -pregion_for_end.i.i.us.loopexit: ; preds = %if.end.i.i.us.us.153 - br label %pregion_for_end.i.i.us - -pregion_for_end.i.i.us: ; preds = %pregion_for_end.i.i.us.loopexit, %pregion_for_entry.pregion_for_init.i.i.us.preheader - %62 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.1 = or i32 %62, 1 - %cmp.i.i.us.1 = icmp sgt i32 %20, %conv2.i.i.us.1 - %mul.i.i.us.1 = mul nsw i32 %24, %conv2.i.i.us.1 - %mul8.i.i.us.1 = mul nsw i32 %28, %conv2.i.i.us.1 - %63 = sext i32 %mul8.i.i.us.1 to i64 - br i1 %cmp.i.i.us.1, label %pregion_for_entry.entry.i.i.us.us.1.preheader, label %pregion_for_end.i.i.us.1 - -pregion_for_entry.entry.i.i.us.us.1.preheader: ; preds = %pregion_for_end.i.i.us - br label %pregion_for_entry.entry.i.i.us.us.1 - -pregion_for_entry.entry.i.i.us.us: ; preds = %if.end.i.i.us.us.153, %pregion_for_entry.entry.i.i.us.us.preheader - %_local_id_x.i.0.us.us = phi i64 [ %220, %if.end.i.i.us.us.153 ], [ 0, %pregion_for_entry.entry.i.i.us.us.preheader ] - %add1.i.i.i.us.us = add nuw nsw i64 %_local_id_x.i.0.us.us, %mul.i.i.i - %conv.i.i.us.us = trunc i64 %add1.i.i.i.us.us to i32 - %cmp4.i.i.us.us = icmp sgt i32 %24, %conv.i.i.us.us - br i1 %cmp4.i.i.us.us, label %if.then.i.i.us.us, label %if.end.i.i.us.us - -if.then.i.i.us.us: ; preds = %pregion_for_entry.entry.i.i.us.us - %add.i.i.us.us = add nsw i32 %mul.i.i.us, %conv.i.i.us.us - %idxprom.i.i.us.us = sext i32 %add.i.i.us.us to i64 - %arrayidx.i.i.us.us = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us - store float 0.000000e+00, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us = shl i64 %add1.i.i.i.us.us, 32 - %64 = ashr exact i64 %sext.i.i.us.us, 32 - br label %for.body.i.i.us.us - -if.end.i.i.us.us.loopexit: ; preds = %for.body.i.i.us.us - br label %if.end.i.i.us.us - -if.end.i.i.us.us: ; preds = %if.end.i.i.us.us.loopexit, %pregion_for_entry.entry.i.i.us.us - %65 = or i64 %_local_id_x.i.0.us.us, 1 - %add1.i.i.i.us.us.138 = add nuw nsw i64 %65, %mul.i.i.i - %conv.i.i.us.us.139 = trunc i64 %add1.i.i.i.us.us.138 to i32 - %cmp4.i.i.us.us.140 = icmp sgt i32 %24, %conv.i.i.us.us.139 - br i1 %cmp4.i.i.us.us.140, label %if.then.i.i.us.us.146, label %if.end.i.i.us.us.153 - -for.body.i.i.us.us: ; preds = %for.body.i.i.us.us, %if.then.i.i.us.us - %indvars.iv.next.i.i3.us.us = phi i64 [ %indvars.iv.next.i.i.us.us, %for.body.i.i.us.us ], [ 0, %if.then.i.i.us.us ] - %66 = phi float [ %72, %for.body.i.i.us.us ], [ 0.000000e+00, %if.then.i.i.us.us ] - %67 = add nsw i64 %indvars.iv.next.i.i3.us.us, %61 - %arrayidx11.i.i.us.us = getelementptr inbounds float, float* %8, i64 %67 - %68 = load float, float* %arrayidx11.i.i.us.us, align 4, !tbaa !12 - %69 = mul nsw i64 %indvars.iv.next.i.i3.us.us, %29 - %70 = add nsw i64 %69, %64 - %arrayidx15.i.i.us.us = getelementptr inbounds float, float* %12, i64 %70 - %71 = load float, float* %arrayidx15.i.i.us.us, align 4, !tbaa !12 - %72 = tail call float @llvm.fmuladd.f32(float %68, float %71, float %66) #2 - store float %72, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us = add nuw nsw i64 %indvars.iv.next.i.i3.us.us, 1 - %exitcond.not.i.i.us.us = icmp eq i64 %indvars.iv.next.i.i.us.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us, label %if.end.i.i.us.us.loopexit, label %for.body.i.i.us.us, !llvm.loop !19 - -pregion_for_entry.pregion_for_init.i.i: ; preds = %pregion_for_end.i.i, %pregion_for_entry.pregion_for_init.i.i.preheader - %_local_id_y.i.0 = phi i64 [ %73, %pregion_for_end.i.i ], [ 0, %pregion_for_entry.pregion_for_init.i.i.preheader ] - %add6.i.i.i = add nuw nsw i64 %_local_id_y.i.0, %mul3.i.i.i - %conv2.i.i = trunc i64 %add6.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %20, %conv2.i.i - %mul.i.i = mul nsw i32 %24, %conv2.i.i - br i1 %cmp.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i - br i1 %cmp4.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us.preheader - %add.i.i.us = add nsw i32 %mul.i.i, %conv.i.i.us - %idxprom.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us - store float 0.000000e+00, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - br i1 %cmp4.i.i.us.1, label %if.then.i.i.us.1, label %if.end.i.i.us.1 - -pregion_for_end.i.i: ; preds = %if.then.i.i.us.31, %if.end.i.i.us.30, %pregion_for_entry.pregion_for_init.i.i - %73 = add nuw nsw i64 %_local_id_y.i.0, 1 - %exitcond33.not = icmp eq i64 %73, 8 - br i1 %exitcond33.not, label %_pocl_kernel_mm3_kernel2.exit.loopexit54, label %pregion_for_entry.pregion_for_init.i.i, !llvm.loop !21 - -_pocl_kernel_mm3_kernel2.exit.loopexit: ; preds = %if.end.i.i.us.us.7.1 - br label %_pocl_kernel_mm3_kernel2.exit - -_pocl_kernel_mm3_kernel2.exit.loopexit54: ; preds = %pregion_for_end.i.i - br label %_pocl_kernel_mm3_kernel2.exit - -_pocl_kernel_mm3_kernel2.exit: ; preds = %pregion_for_end.i.i.us.6, %_pocl_kernel_mm3_kernel2.exit.loopexit54, %_pocl_kernel_mm3_kernel2.exit.loopexit - ret void - -pregion_for_entry.entry.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.1, %pregion_for_entry.entry.i.i.us.us.1.preheader - %_local_id_x.i.0.us.us.1 = phi i64 [ %211, %if.end.i.i.us.us.1.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.1.preheader ] - %add1.i.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, %mul.i.i.i - %conv.i.i.us.us.1 = trunc i64 %add1.i.i.i.us.us.1 to i32 - %cmp4.i.i.us.us.1 = icmp sgt i32 %24, %conv.i.i.us.us.1 - br i1 %cmp4.i.i.us.us.1, label %if.then.i.i.us.us.1, label %if.end.i.i.us.us.1 - -if.then.i.i.us.us.1: ; preds = %pregion_for_entry.entry.i.i.us.us.1 - %add.i.i.us.us.1 = add nsw i32 %mul.i.i.us.1, %conv.i.i.us.us.1 - %idxprom.i.i.us.us.1 = sext i32 %add.i.i.us.us.1 to i64 - %arrayidx.i.i.us.us.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.1 = shl i64 %add1.i.i.i.us.us.1, 32 - %74 = ashr exact i64 %sext.i.i.us.us.1, 32 - br label %for.body.i.i.us.us.1 - -for.body.i.i.us.us.1: ; preds = %for.body.i.i.us.us.1, %if.then.i.i.us.us.1 - %indvars.iv.next.i.i3.us.us.1 = phi i64 [ %indvars.iv.next.i.i.us.us.1, %for.body.i.i.us.us.1 ], [ 0, %if.then.i.i.us.us.1 ] - %75 = phi float [ %81, %for.body.i.i.us.us.1 ], [ 0.000000e+00, %if.then.i.i.us.us.1 ] - %76 = add nsw i64 %indvars.iv.next.i.i3.us.us.1, %63 - %arrayidx11.i.i.us.us.1 = getelementptr inbounds float, float* %8, i64 %76 - %77 = load float, float* %arrayidx11.i.i.us.us.1, align 4, !tbaa !12 - %78 = mul nsw i64 %indvars.iv.next.i.i3.us.us.1, %29 - %79 = add nsw i64 %78, %74 - %arrayidx15.i.i.us.us.1 = getelementptr inbounds float, float* %12, i64 %79 - %80 = load float, float* %arrayidx15.i.i.us.us.1, align 4, !tbaa !12 - %81 = tail call float @llvm.fmuladd.f32(float %77, float %80, float %75) #2 - store float %81, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.1, 1 - %exitcond.not.i.i.us.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.1, label %if.end.i.i.us.us.1.loopexit, label %for.body.i.i.us.us.1, !llvm.loop !19 - -if.end.i.i.us.us.1.loopexit: ; preds = %for.body.i.i.us.us.1 - br label %if.end.i.i.us.us.1 - -if.end.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.loopexit, %pregion_for_entry.entry.i.i.us.us.1 - %82 = or i64 %_local_id_x.i.0.us.us.1, 1 - %add1.i.i.i.us.us.1.1 = add nuw nsw i64 %82, %mul.i.i.i - %conv.i.i.us.us.1.1 = trunc i64 %add1.i.i.i.us.us.1.1 to i32 - %cmp4.i.i.us.us.1.1 = icmp sgt i32 %24, %conv.i.i.us.us.1.1 - br i1 %cmp4.i.i.us.us.1.1, label %if.then.i.i.us.us.1.1, label %if.end.i.i.us.us.1.1 - -pregion_for_end.i.i.us.1.loopexit: ; preds = %if.end.i.i.us.us.1.1 - br label %pregion_for_end.i.i.us.1 - -pregion_for_end.i.i.us.1: ; preds = %pregion_for_end.i.i.us.1.loopexit, %pregion_for_end.i.i.us - %83 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.2 = or i32 %83, 2 - %cmp.i.i.us.2 = icmp sgt i32 %20, %conv2.i.i.us.2 - %mul.i.i.us.2 = mul nsw i32 %24, %conv2.i.i.us.2 - %mul8.i.i.us.2 = mul nsw i32 %28, %conv2.i.i.us.2 - %84 = sext i32 %mul8.i.i.us.2 to i64 - br i1 %cmp.i.i.us.2, label %pregion_for_entry.entry.i.i.us.us.2.preheader, label %pregion_for_end.i.i.us.2 - -pregion_for_entry.entry.i.i.us.us.2.preheader: ; preds = %pregion_for_end.i.i.us.1 - br label %pregion_for_entry.entry.i.i.us.us.2 - -pregion_for_entry.entry.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.1, %pregion_for_entry.entry.i.i.us.us.2.preheader - %_local_id_x.i.0.us.us.2 = phi i64 [ %202, %if.end.i.i.us.us.2.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.2.preheader ] - %add1.i.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, %mul.i.i.i - %conv.i.i.us.us.2 = trunc i64 %add1.i.i.i.us.us.2 to i32 - %cmp4.i.i.us.us.2 = icmp sgt i32 %24, %conv.i.i.us.us.2 - br i1 %cmp4.i.i.us.us.2, label %if.then.i.i.us.us.2, label %if.end.i.i.us.us.2 - -if.then.i.i.us.us.2: ; preds = %pregion_for_entry.entry.i.i.us.us.2 - %add.i.i.us.us.2 = add nsw i32 %mul.i.i.us.2, %conv.i.i.us.us.2 - %idxprom.i.i.us.us.2 = sext i32 %add.i.i.us.us.2 to i64 - %arrayidx.i.i.us.us.2 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.2 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.2 = shl i64 %add1.i.i.i.us.us.2, 32 - %85 = ashr exact i64 %sext.i.i.us.us.2, 32 - br label %for.body.i.i.us.us.2 - -for.body.i.i.us.us.2: ; preds = %for.body.i.i.us.us.2, %if.then.i.i.us.us.2 - %indvars.iv.next.i.i3.us.us.2 = phi i64 [ %indvars.iv.next.i.i.us.us.2, %for.body.i.i.us.us.2 ], [ 0, %if.then.i.i.us.us.2 ] - %86 = phi float [ %92, %for.body.i.i.us.us.2 ], [ 0.000000e+00, %if.then.i.i.us.us.2 ] - %87 = add nsw i64 %indvars.iv.next.i.i3.us.us.2, %84 - %arrayidx11.i.i.us.us.2 = getelementptr inbounds float, float* %8, i64 %87 - %88 = load float, float* %arrayidx11.i.i.us.us.2, align 4, !tbaa !12 - %89 = mul nsw i64 %indvars.iv.next.i.i3.us.us.2, %29 - %90 = add nsw i64 %89, %85 - %arrayidx15.i.i.us.us.2 = getelementptr inbounds float, float* %12, i64 %90 - %91 = load float, float* %arrayidx15.i.i.us.us.2, align 4, !tbaa !12 - %92 = tail call float @llvm.fmuladd.f32(float %88, float %91, float %86) #2 - store float %92, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.2 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.2, 1 - %exitcond.not.i.i.us.us.2 = icmp eq i64 %indvars.iv.next.i.i.us.us.2, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.2, label %if.end.i.i.us.us.2.loopexit, label %for.body.i.i.us.us.2, !llvm.loop !19 - -if.end.i.i.us.us.2.loopexit: ; preds = %for.body.i.i.us.us.2 - br label %if.end.i.i.us.us.2 - -if.end.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.loopexit, %pregion_for_entry.entry.i.i.us.us.2 - %93 = or i64 %_local_id_x.i.0.us.us.2, 1 - %add1.i.i.i.us.us.2.1 = add nuw nsw i64 %93, %mul.i.i.i - %conv.i.i.us.us.2.1 = trunc i64 %add1.i.i.i.us.us.2.1 to i32 - %cmp4.i.i.us.us.2.1 = icmp sgt i32 %24, %conv.i.i.us.us.2.1 - br i1 %cmp4.i.i.us.us.2.1, label %if.then.i.i.us.us.2.1, label %if.end.i.i.us.us.2.1 - -pregion_for_end.i.i.us.2.loopexit: ; preds = %if.end.i.i.us.us.2.1 - br label %pregion_for_end.i.i.us.2 - -pregion_for_end.i.i.us.2: ; preds = %pregion_for_end.i.i.us.2.loopexit, %pregion_for_end.i.i.us.1 - %94 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.3 = or i32 %94, 3 - %cmp.i.i.us.3 = icmp sgt i32 %20, %conv2.i.i.us.3 - %mul.i.i.us.3 = mul nsw i32 %24, %conv2.i.i.us.3 - %mul8.i.i.us.3 = mul nsw i32 %28, %conv2.i.i.us.3 - %95 = sext i32 %mul8.i.i.us.3 to i64 - br i1 %cmp.i.i.us.3, label %pregion_for_entry.entry.i.i.us.us.3.preheader, label %pregion_for_end.i.i.us.3 - -pregion_for_entry.entry.i.i.us.us.3.preheader: ; preds = %pregion_for_end.i.i.us.2 - br label %pregion_for_entry.entry.i.i.us.us.3 - -pregion_for_entry.entry.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.1, %pregion_for_entry.entry.i.i.us.us.3.preheader - %_local_id_x.i.0.us.us.3 = phi i64 [ %193, %if.end.i.i.us.us.3.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.3.preheader ] - %add1.i.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, %mul.i.i.i - %conv.i.i.us.us.3 = trunc i64 %add1.i.i.i.us.us.3 to i32 - %cmp4.i.i.us.us.3 = icmp sgt i32 %24, %conv.i.i.us.us.3 - br i1 %cmp4.i.i.us.us.3, label %if.then.i.i.us.us.3, label %if.end.i.i.us.us.3 - -if.then.i.i.us.us.3: ; preds = %pregion_for_entry.entry.i.i.us.us.3 - %add.i.i.us.us.3 = add nsw i32 %mul.i.i.us.3, %conv.i.i.us.us.3 - %idxprom.i.i.us.us.3 = sext i32 %add.i.i.us.us.3 to i64 - %arrayidx.i.i.us.us.3 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.3 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.3 = shl i64 %add1.i.i.i.us.us.3, 32 - %96 = ashr exact i64 %sext.i.i.us.us.3, 32 - br label %for.body.i.i.us.us.3 - -for.body.i.i.us.us.3: ; preds = %for.body.i.i.us.us.3, %if.then.i.i.us.us.3 - %indvars.iv.next.i.i3.us.us.3 = phi i64 [ %indvars.iv.next.i.i.us.us.3, %for.body.i.i.us.us.3 ], [ 0, %if.then.i.i.us.us.3 ] - %97 = phi float [ %103, %for.body.i.i.us.us.3 ], [ 0.000000e+00, %if.then.i.i.us.us.3 ] - %98 = add nsw i64 %indvars.iv.next.i.i3.us.us.3, %95 - %arrayidx11.i.i.us.us.3 = getelementptr inbounds float, float* %8, i64 %98 - %99 = load float, float* %arrayidx11.i.i.us.us.3, align 4, !tbaa !12 - %100 = mul nsw i64 %indvars.iv.next.i.i3.us.us.3, %29 - %101 = add nsw i64 %100, %96 - %arrayidx15.i.i.us.us.3 = getelementptr inbounds float, float* %12, i64 %101 - %102 = load float, float* %arrayidx15.i.i.us.us.3, align 4, !tbaa !12 - %103 = tail call float @llvm.fmuladd.f32(float %99, float %102, float %97) #2 - store float %103, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.3 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.3, 1 - %exitcond.not.i.i.us.us.3 = icmp eq i64 %indvars.iv.next.i.i.us.us.3, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.3, label %if.end.i.i.us.us.3.loopexit, label %for.body.i.i.us.us.3, !llvm.loop !19 - -if.end.i.i.us.us.3.loopexit: ; preds = %for.body.i.i.us.us.3 - br label %if.end.i.i.us.us.3 - -if.end.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.loopexit, %pregion_for_entry.entry.i.i.us.us.3 - %104 = or i64 %_local_id_x.i.0.us.us.3, 1 - %add1.i.i.i.us.us.3.1 = add nuw nsw i64 %104, %mul.i.i.i - %conv.i.i.us.us.3.1 = trunc i64 %add1.i.i.i.us.us.3.1 to i32 - %cmp4.i.i.us.us.3.1 = icmp sgt i32 %24, %conv.i.i.us.us.3.1 - br i1 %cmp4.i.i.us.us.3.1, label %if.then.i.i.us.us.3.1, label %if.end.i.i.us.us.3.1 - -pregion_for_end.i.i.us.3.loopexit: ; preds = %if.end.i.i.us.us.3.1 - br label %pregion_for_end.i.i.us.3 - -pregion_for_end.i.i.us.3: ; preds = %pregion_for_end.i.i.us.3.loopexit, %pregion_for_end.i.i.us.2 - %105 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.4 = or i32 %105, 4 - %cmp.i.i.us.4 = icmp sgt i32 %20, %conv2.i.i.us.4 - %mul.i.i.us.4 = mul nsw i32 %24, %conv2.i.i.us.4 - %mul8.i.i.us.4 = mul nsw i32 %28, %conv2.i.i.us.4 - %106 = sext i32 %mul8.i.i.us.4 to i64 - br i1 %cmp.i.i.us.4, label %pregion_for_entry.entry.i.i.us.us.4.preheader, label %pregion_for_end.i.i.us.4 - -pregion_for_entry.entry.i.i.us.us.4.preheader: ; preds = %pregion_for_end.i.i.us.3 - br label %pregion_for_entry.entry.i.i.us.us.4 - -pregion_for_entry.entry.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.1, %pregion_for_entry.entry.i.i.us.us.4.preheader - %_local_id_x.i.0.us.us.4 = phi i64 [ %184, %if.end.i.i.us.us.4.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.4.preheader ] - %add1.i.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, %mul.i.i.i - %conv.i.i.us.us.4 = trunc i64 %add1.i.i.i.us.us.4 to i32 - %cmp4.i.i.us.us.4 = icmp sgt i32 %24, %conv.i.i.us.us.4 - br i1 %cmp4.i.i.us.us.4, label %if.then.i.i.us.us.4, label %if.end.i.i.us.us.4 - -if.then.i.i.us.us.4: ; preds = %pregion_for_entry.entry.i.i.us.us.4 - %add.i.i.us.us.4 = add nsw i32 %mul.i.i.us.4, %conv.i.i.us.us.4 - %idxprom.i.i.us.us.4 = sext i32 %add.i.i.us.us.4 to i64 - %arrayidx.i.i.us.us.4 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.4 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.4 = shl i64 %add1.i.i.i.us.us.4, 32 - %107 = ashr exact i64 %sext.i.i.us.us.4, 32 - br label %for.body.i.i.us.us.4 - -for.body.i.i.us.us.4: ; preds = %for.body.i.i.us.us.4, %if.then.i.i.us.us.4 - %indvars.iv.next.i.i3.us.us.4 = phi i64 [ %indvars.iv.next.i.i.us.us.4, %for.body.i.i.us.us.4 ], [ 0, %if.then.i.i.us.us.4 ] - %108 = phi float [ %114, %for.body.i.i.us.us.4 ], [ 0.000000e+00, %if.then.i.i.us.us.4 ] - %109 = add nsw i64 %indvars.iv.next.i.i3.us.us.4, %106 - %arrayidx11.i.i.us.us.4 = getelementptr inbounds float, float* %8, i64 %109 - %110 = load float, float* %arrayidx11.i.i.us.us.4, align 4, !tbaa !12 - %111 = mul nsw i64 %indvars.iv.next.i.i3.us.us.4, %29 - %112 = add nsw i64 %111, %107 - %arrayidx15.i.i.us.us.4 = getelementptr inbounds float, float* %12, i64 %112 - %113 = load float, float* %arrayidx15.i.i.us.us.4, align 4, !tbaa !12 - %114 = tail call float @llvm.fmuladd.f32(float %110, float %113, float %108) #2 - store float %114, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.4 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.4, 1 - %exitcond.not.i.i.us.us.4 = icmp eq i64 %indvars.iv.next.i.i.us.us.4, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.4, label %if.end.i.i.us.us.4.loopexit, label %for.body.i.i.us.us.4, !llvm.loop !19 - -if.end.i.i.us.us.4.loopexit: ; preds = %for.body.i.i.us.us.4 - br label %if.end.i.i.us.us.4 - -if.end.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.loopexit, %pregion_for_entry.entry.i.i.us.us.4 - %115 = or i64 %_local_id_x.i.0.us.us.4, 1 - %add1.i.i.i.us.us.4.1 = add nuw nsw i64 %115, %mul.i.i.i - %conv.i.i.us.us.4.1 = trunc i64 %add1.i.i.i.us.us.4.1 to i32 - %cmp4.i.i.us.us.4.1 = icmp sgt i32 %24, %conv.i.i.us.us.4.1 - br i1 %cmp4.i.i.us.us.4.1, label %if.then.i.i.us.us.4.1, label %if.end.i.i.us.us.4.1 - -pregion_for_end.i.i.us.4.loopexit: ; preds = %if.end.i.i.us.us.4.1 - br label %pregion_for_end.i.i.us.4 - -pregion_for_end.i.i.us.4: ; preds = %pregion_for_end.i.i.us.4.loopexit, %pregion_for_end.i.i.us.3 - %116 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.5 = or i32 %116, 5 - %cmp.i.i.us.5 = icmp sgt i32 %20, %conv2.i.i.us.5 - %mul.i.i.us.5 = mul nsw i32 %24, %conv2.i.i.us.5 - %mul8.i.i.us.5 = mul nsw i32 %28, %conv2.i.i.us.5 - %117 = sext i32 %mul8.i.i.us.5 to i64 - br i1 %cmp.i.i.us.5, label %pregion_for_entry.entry.i.i.us.us.5.preheader, label %pregion_for_end.i.i.us.5 - -pregion_for_entry.entry.i.i.us.us.5.preheader: ; preds = %pregion_for_end.i.i.us.4 - br label %pregion_for_entry.entry.i.i.us.us.5 - -pregion_for_entry.entry.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.1, %pregion_for_entry.entry.i.i.us.us.5.preheader - %_local_id_x.i.0.us.us.5 = phi i64 [ %175, %if.end.i.i.us.us.5.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.5.preheader ] - %add1.i.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, %mul.i.i.i - %conv.i.i.us.us.5 = trunc i64 %add1.i.i.i.us.us.5 to i32 - %cmp4.i.i.us.us.5 = icmp sgt i32 %24, %conv.i.i.us.us.5 - br i1 %cmp4.i.i.us.us.5, label %if.then.i.i.us.us.5, label %if.end.i.i.us.us.5 - -if.then.i.i.us.us.5: ; preds = %pregion_for_entry.entry.i.i.us.us.5 - %add.i.i.us.us.5 = add nsw i32 %mul.i.i.us.5, %conv.i.i.us.us.5 - %idxprom.i.i.us.us.5 = sext i32 %add.i.i.us.us.5 to i64 - %arrayidx.i.i.us.us.5 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.5 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.5 = shl i64 %add1.i.i.i.us.us.5, 32 - %118 = ashr exact i64 %sext.i.i.us.us.5, 32 - br label %for.body.i.i.us.us.5 - -for.body.i.i.us.us.5: ; preds = %for.body.i.i.us.us.5, %if.then.i.i.us.us.5 - %indvars.iv.next.i.i3.us.us.5 = phi i64 [ %indvars.iv.next.i.i.us.us.5, %for.body.i.i.us.us.5 ], [ 0, %if.then.i.i.us.us.5 ] - %119 = phi float [ %125, %for.body.i.i.us.us.5 ], [ 0.000000e+00, %if.then.i.i.us.us.5 ] - %120 = add nsw i64 %indvars.iv.next.i.i3.us.us.5, %117 - %arrayidx11.i.i.us.us.5 = getelementptr inbounds float, float* %8, i64 %120 - %121 = load float, float* %arrayidx11.i.i.us.us.5, align 4, !tbaa !12 - %122 = mul nsw i64 %indvars.iv.next.i.i3.us.us.5, %29 - %123 = add nsw i64 %122, %118 - %arrayidx15.i.i.us.us.5 = getelementptr inbounds float, float* %12, i64 %123 - %124 = load float, float* %arrayidx15.i.i.us.us.5, align 4, !tbaa !12 - %125 = tail call float @llvm.fmuladd.f32(float %121, float %124, float %119) #2 - store float %125, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.5 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.5, 1 - %exitcond.not.i.i.us.us.5 = icmp eq i64 %indvars.iv.next.i.i.us.us.5, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.5, label %if.end.i.i.us.us.5.loopexit, label %for.body.i.i.us.us.5, !llvm.loop !19 - -if.end.i.i.us.us.5.loopexit: ; preds = %for.body.i.i.us.us.5 - br label %if.end.i.i.us.us.5 - -if.end.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.loopexit, %pregion_for_entry.entry.i.i.us.us.5 - %126 = or i64 %_local_id_x.i.0.us.us.5, 1 - %add1.i.i.i.us.us.5.1 = add nuw nsw i64 %126, %mul.i.i.i - %conv.i.i.us.us.5.1 = trunc i64 %add1.i.i.i.us.us.5.1 to i32 - %cmp4.i.i.us.us.5.1 = icmp sgt i32 %24, %conv.i.i.us.us.5.1 - br i1 %cmp4.i.i.us.us.5.1, label %if.then.i.i.us.us.5.1, label %if.end.i.i.us.us.5.1 - -pregion_for_end.i.i.us.5.loopexit: ; preds = %if.end.i.i.us.us.5.1 - br label %pregion_for_end.i.i.us.5 - -pregion_for_end.i.i.us.5: ; preds = %pregion_for_end.i.i.us.5.loopexit, %pregion_for_end.i.i.us.4 - %127 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.6 = or i32 %127, 6 - %cmp.i.i.us.6 = icmp sgt i32 %20, %conv2.i.i.us.6 - %mul.i.i.us.6 = mul nsw i32 %24, %conv2.i.i.us.6 - %mul8.i.i.us.6 = mul nsw i32 %28, %conv2.i.i.us.6 - %128 = sext i32 %mul8.i.i.us.6 to i64 - br i1 %cmp.i.i.us.6, label %pregion_for_entry.entry.i.i.us.us.6.preheader, label %pregion_for_end.i.i.us.6 - -pregion_for_entry.entry.i.i.us.us.6.preheader: ; preds = %pregion_for_end.i.i.us.5 - br label %pregion_for_entry.entry.i.i.us.us.6 - -pregion_for_entry.entry.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.1, %pregion_for_entry.entry.i.i.us.us.6.preheader - %_local_id_x.i.0.us.us.6 = phi i64 [ %166, %if.end.i.i.us.us.6.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.6.preheader ] - %add1.i.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, %mul.i.i.i - %conv.i.i.us.us.6 = trunc i64 %add1.i.i.i.us.us.6 to i32 - %cmp4.i.i.us.us.6 = icmp sgt i32 %24, %conv.i.i.us.us.6 - br i1 %cmp4.i.i.us.us.6, label %if.then.i.i.us.us.6, label %if.end.i.i.us.us.6 - -if.then.i.i.us.us.6: ; preds = %pregion_for_entry.entry.i.i.us.us.6 - %add.i.i.us.us.6 = add nsw i32 %mul.i.i.us.6, %conv.i.i.us.us.6 - %idxprom.i.i.us.us.6 = sext i32 %add.i.i.us.us.6 to i64 - %arrayidx.i.i.us.us.6 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.6 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.6 = shl i64 %add1.i.i.i.us.us.6, 32 - %129 = ashr exact i64 %sext.i.i.us.us.6, 32 - br label %for.body.i.i.us.us.6 - -for.body.i.i.us.us.6: ; preds = %for.body.i.i.us.us.6, %if.then.i.i.us.us.6 - %indvars.iv.next.i.i3.us.us.6 = phi i64 [ %indvars.iv.next.i.i.us.us.6, %for.body.i.i.us.us.6 ], [ 0, %if.then.i.i.us.us.6 ] - %130 = phi float [ %136, %for.body.i.i.us.us.6 ], [ 0.000000e+00, %if.then.i.i.us.us.6 ] - %131 = add nsw i64 %indvars.iv.next.i.i3.us.us.6, %128 - %arrayidx11.i.i.us.us.6 = getelementptr inbounds float, float* %8, i64 %131 - %132 = load float, float* %arrayidx11.i.i.us.us.6, align 4, !tbaa !12 - %133 = mul nsw i64 %indvars.iv.next.i.i3.us.us.6, %29 - %134 = add nsw i64 %133, %129 - %arrayidx15.i.i.us.us.6 = getelementptr inbounds float, float* %12, i64 %134 - %135 = load float, float* %arrayidx15.i.i.us.us.6, align 4, !tbaa !12 - %136 = tail call float @llvm.fmuladd.f32(float %132, float %135, float %130) #2 - store float %136, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.6 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.6, 1 - %exitcond.not.i.i.us.us.6 = icmp eq i64 %indvars.iv.next.i.i.us.us.6, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.6, label %if.end.i.i.us.us.6.loopexit, label %for.body.i.i.us.us.6, !llvm.loop !19 - -if.end.i.i.us.us.6.loopexit: ; preds = %for.body.i.i.us.us.6 - br label %if.end.i.i.us.us.6 - -if.end.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.loopexit, %pregion_for_entry.entry.i.i.us.us.6 - %137 = or i64 %_local_id_x.i.0.us.us.6, 1 - %add1.i.i.i.us.us.6.1 = add nuw nsw i64 %137, %mul.i.i.i - %conv.i.i.us.us.6.1 = trunc i64 %add1.i.i.i.us.us.6.1 to i32 - %cmp4.i.i.us.us.6.1 = icmp sgt i32 %24, %conv.i.i.us.us.6.1 - br i1 %cmp4.i.i.us.us.6.1, label %if.then.i.i.us.us.6.1, label %if.end.i.i.us.us.6.1 - -pregion_for_end.i.i.us.6.loopexit: ; preds = %if.end.i.i.us.us.6.1 - br label %pregion_for_end.i.i.us.6 - -pregion_for_end.i.i.us.6: ; preds = %pregion_for_end.i.i.us.6.loopexit, %pregion_for_end.i.i.us.5 - %138 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.7 = or i32 %138, 7 - %cmp.i.i.us.7 = icmp sgt i32 %20, %conv2.i.i.us.7 - %mul.i.i.us.7 = mul nsw i32 %24, %conv2.i.i.us.7 - %mul8.i.i.us.7 = mul nsw i32 %28, %conv2.i.i.us.7 - %139 = sext i32 %mul8.i.i.us.7 to i64 - br i1 %cmp.i.i.us.7, label %pregion_for_entry.entry.i.i.us.us.7.preheader, label %_pocl_kernel_mm3_kernel2.exit - -pregion_for_entry.entry.i.i.us.us.7.preheader: ; preds = %pregion_for_end.i.i.us.6 - br label %pregion_for_entry.entry.i.i.us.us.7 - -pregion_for_entry.entry.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.1, %pregion_for_entry.entry.i.i.us.us.7.preheader - %_local_id_x.i.0.us.us.7 = phi i64 [ %157, %if.end.i.i.us.us.7.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.7.preheader ] - %add1.i.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, %mul.i.i.i - %conv.i.i.us.us.7 = trunc i64 %add1.i.i.i.us.us.7 to i32 - %cmp4.i.i.us.us.7 = icmp sgt i32 %24, %conv.i.i.us.us.7 - br i1 %cmp4.i.i.us.us.7, label %if.then.i.i.us.us.7, label %if.end.i.i.us.us.7 - -if.then.i.i.us.us.7: ; preds = %pregion_for_entry.entry.i.i.us.us.7 - %add.i.i.us.us.7 = add nsw i32 %mul.i.i.us.7, %conv.i.i.us.us.7 - %idxprom.i.i.us.us.7 = sext i32 %add.i.i.us.us.7 to i64 - %arrayidx.i.i.us.us.7 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.7 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.7 = shl i64 %add1.i.i.i.us.us.7, 32 - %140 = ashr exact i64 %sext.i.i.us.us.7, 32 - br label %for.body.i.i.us.us.7 - -for.body.i.i.us.us.7: ; preds = %for.body.i.i.us.us.7, %if.then.i.i.us.us.7 - %indvars.iv.next.i.i3.us.us.7 = phi i64 [ %indvars.iv.next.i.i.us.us.7, %for.body.i.i.us.us.7 ], [ 0, %if.then.i.i.us.us.7 ] - %141 = phi float [ %147, %for.body.i.i.us.us.7 ], [ 0.000000e+00, %if.then.i.i.us.us.7 ] - %142 = add nsw i64 %indvars.iv.next.i.i3.us.us.7, %139 - %arrayidx11.i.i.us.us.7 = getelementptr inbounds float, float* %8, i64 %142 - %143 = load float, float* %arrayidx11.i.i.us.us.7, align 4, !tbaa !12 - %144 = mul nsw i64 %indvars.iv.next.i.i3.us.us.7, %29 - %145 = add nsw i64 %144, %140 - %arrayidx15.i.i.us.us.7 = getelementptr inbounds float, float* %12, i64 %145 - %146 = load float, float* %arrayidx15.i.i.us.us.7, align 4, !tbaa !12 - %147 = tail call float @llvm.fmuladd.f32(float %143, float %146, float %141) #2 - store float %147, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.7 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.7, 1 - %exitcond.not.i.i.us.us.7 = icmp eq i64 %indvars.iv.next.i.i.us.us.7, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.7, label %if.end.i.i.us.us.7.loopexit, label %for.body.i.i.us.us.7, !llvm.loop !19 - -if.end.i.i.us.us.7.loopexit: ; preds = %for.body.i.i.us.us.7 - br label %if.end.i.i.us.us.7 - -if.end.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.loopexit, %pregion_for_entry.entry.i.i.us.us.7 - %148 = or i64 %_local_id_x.i.0.us.us.7, 1 - %add1.i.i.i.us.us.7.1 = add nuw nsw i64 %148, %mul.i.i.i - %conv.i.i.us.us.7.1 = trunc i64 %add1.i.i.i.us.us.7.1 to i32 - %cmp4.i.i.us.us.7.1 = icmp sgt i32 %24, %conv.i.i.us.us.7.1 - br i1 %cmp4.i.i.us.us.7.1, label %if.then.i.i.us.us.7.1, label %if.end.i.i.us.us.7.1 - -if.then.i.i.us.1: ; preds = %if.end.i.i.us - %add.i.i.us.1 = add nsw i32 %mul.i.i, %conv.i.i.us.1 - %idxprom.i.i.us.1 = sext i32 %add.i.i.us.1 to i64 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1 - -if.end.i.i.us.1: ; preds = %if.then.i.i.us.1, %if.end.i.i.us - br i1 %cmp4.i.i.us.2, label %if.then.i.i.us.2, label %if.end.i.i.us.2 - -if.then.i.i.us.2: ; preds = %if.end.i.i.us.1 - %add.i.i.us.2 = add nsw i32 %mul.i.i, %conv.i.i.us.2 - %idxprom.i.i.us.2 = sext i32 %add.i.i.us.2 to i64 - %arrayidx.i.i.us.2 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.2 - store float 0.000000e+00, float* %arrayidx.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2 - -if.end.i.i.us.2: ; preds = %if.then.i.i.us.2, %if.end.i.i.us.1 - br i1 %cmp4.i.i.us.3, label %if.then.i.i.us.3, label %if.end.i.i.us.3 - -if.then.i.i.us.3: ; preds = %if.end.i.i.us.2 - %add.i.i.us.3 = add nsw i32 %mul.i.i, %conv.i.i.us.3 - %idxprom.i.i.us.3 = sext i32 %add.i.i.us.3 to i64 - %arrayidx.i.i.us.3 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.3 - store float 0.000000e+00, float* %arrayidx.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3 - -if.end.i.i.us.3: ; preds = %if.then.i.i.us.3, %if.end.i.i.us.2 - br i1 %cmp4.i.i.us.4, label %if.then.i.i.us.4, label %if.end.i.i.us.4 - -if.then.i.i.us.4: ; preds = %if.end.i.i.us.3 - %add.i.i.us.4 = add nsw i32 %mul.i.i, %conv.i.i.us.4 - %idxprom.i.i.us.4 = sext i32 %add.i.i.us.4 to i64 - %arrayidx.i.i.us.4 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.4 - store float 0.000000e+00, float* %arrayidx.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4 - -if.end.i.i.us.4: ; preds = %if.then.i.i.us.4, %if.end.i.i.us.3 - br i1 %cmp4.i.i.us.5, label %if.then.i.i.us.5, label %if.end.i.i.us.5 - -if.then.i.i.us.5: ; preds = %if.end.i.i.us.4 - %add.i.i.us.5 = add nsw i32 %mul.i.i, %conv.i.i.us.5 - %idxprom.i.i.us.5 = sext i32 %add.i.i.us.5 to i64 - %arrayidx.i.i.us.5 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.5 - store float 0.000000e+00, float* %arrayidx.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5 - -if.end.i.i.us.5: ; preds = %if.then.i.i.us.5, %if.end.i.i.us.4 - br i1 %cmp4.i.i.us.6, label %if.then.i.i.us.6, label %if.end.i.i.us.6 - -if.then.i.i.us.6: ; preds = %if.end.i.i.us.5 - %add.i.i.us.6 = add nsw i32 %mul.i.i, %conv.i.i.us.6 - %idxprom.i.i.us.6 = sext i32 %add.i.i.us.6 to i64 - %arrayidx.i.i.us.6 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.6 - store float 0.000000e+00, float* %arrayidx.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6 - -if.end.i.i.us.6: ; preds = %if.then.i.i.us.6, %if.end.i.i.us.5 - br i1 %cmp4.i.i.us.7, label %if.then.i.i.us.7, label %if.end.i.i.us.7 - -if.then.i.i.us.7: ; preds = %if.end.i.i.us.6 - %add.i.i.us.7 = add nsw i32 %mul.i.i, %conv.i.i.us.7 - %idxprom.i.i.us.7 = sext i32 %add.i.i.us.7 to i64 - %arrayidx.i.i.us.7 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.7 - store float 0.000000e+00, float* %arrayidx.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7 - -if.end.i.i.us.7: ; preds = %if.then.i.i.us.7, %if.end.i.i.us.6 - br i1 %cmp4.i.i.us.8, label %if.then.i.i.us.8, label %if.end.i.i.us.8 - -if.then.i.i.us.8: ; preds = %if.end.i.i.us.7 - %add.i.i.us.8 = add nsw i32 %mul.i.i, %conv.i.i.us.8 - %idxprom.i.i.us.8 = sext i32 %add.i.i.us.8 to i64 - %arrayidx.i.i.us.8 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.8 - store float 0.000000e+00, float* %arrayidx.i.i.us.8, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.8 - -if.end.i.i.us.8: ; preds = %if.then.i.i.us.8, %if.end.i.i.us.7 - br i1 %cmp4.i.i.us.9, label %if.then.i.i.us.9, label %if.end.i.i.us.9 - -if.then.i.i.us.9: ; preds = %if.end.i.i.us.8 - %add.i.i.us.9 = add nsw i32 %mul.i.i, %conv.i.i.us.9 - %idxprom.i.i.us.9 = sext i32 %add.i.i.us.9 to i64 - %arrayidx.i.i.us.9 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.9 - store float 0.000000e+00, float* %arrayidx.i.i.us.9, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.9 - -if.end.i.i.us.9: ; preds = %if.then.i.i.us.9, %if.end.i.i.us.8 - br i1 %cmp4.i.i.us.10, label %if.then.i.i.us.10, label %if.end.i.i.us.10 - -if.then.i.i.us.10: ; preds = %if.end.i.i.us.9 - %add.i.i.us.10 = add nsw i32 %mul.i.i, %conv.i.i.us.10 - %idxprom.i.i.us.10 = sext i32 %add.i.i.us.10 to i64 - %arrayidx.i.i.us.10 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.10 - store float 0.000000e+00, float* %arrayidx.i.i.us.10, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.10 - -if.end.i.i.us.10: ; preds = %if.then.i.i.us.10, %if.end.i.i.us.9 - br i1 %cmp4.i.i.us.11, label %if.then.i.i.us.11, label %if.end.i.i.us.11 - -if.then.i.i.us.11: ; preds = %if.end.i.i.us.10 - %add.i.i.us.11 = add nsw i32 %mul.i.i, %conv.i.i.us.11 - %idxprom.i.i.us.11 = sext i32 %add.i.i.us.11 to i64 - %arrayidx.i.i.us.11 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.11 - store float 0.000000e+00, float* %arrayidx.i.i.us.11, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.11 - -if.end.i.i.us.11: ; preds = %if.then.i.i.us.11, %if.end.i.i.us.10 - br i1 %cmp4.i.i.us.12, label %if.then.i.i.us.12, label %if.end.i.i.us.12 - -if.then.i.i.us.12: ; preds = %if.end.i.i.us.11 - %add.i.i.us.12 = add nsw i32 %mul.i.i, %conv.i.i.us.12 - %idxprom.i.i.us.12 = sext i32 %add.i.i.us.12 to i64 - %arrayidx.i.i.us.12 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.12 - store float 0.000000e+00, float* %arrayidx.i.i.us.12, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.12 - -if.end.i.i.us.12: ; preds = %if.then.i.i.us.12, %if.end.i.i.us.11 - br i1 %cmp4.i.i.us.13, label %if.then.i.i.us.13, label %if.end.i.i.us.13 - -if.then.i.i.us.13: ; preds = %if.end.i.i.us.12 - %add.i.i.us.13 = add nsw i32 %mul.i.i, %conv.i.i.us.13 - %idxprom.i.i.us.13 = sext i32 %add.i.i.us.13 to i64 - %arrayidx.i.i.us.13 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.13 - store float 0.000000e+00, float* %arrayidx.i.i.us.13, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.13 - -if.end.i.i.us.13: ; preds = %if.then.i.i.us.13, %if.end.i.i.us.12 - br i1 %cmp4.i.i.us.14, label %if.then.i.i.us.14, label %if.end.i.i.us.14 - -if.then.i.i.us.14: ; preds = %if.end.i.i.us.13 - %add.i.i.us.14 = add nsw i32 %mul.i.i, %conv.i.i.us.14 - %idxprom.i.i.us.14 = sext i32 %add.i.i.us.14 to i64 - %arrayidx.i.i.us.14 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.14 - store float 0.000000e+00, float* %arrayidx.i.i.us.14, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.14 - -if.end.i.i.us.14: ; preds = %if.then.i.i.us.14, %if.end.i.i.us.13 - br i1 %cmp4.i.i.us.15, label %if.then.i.i.us.15, label %if.end.i.i.us.15 - -if.then.i.i.us.15: ; preds = %if.end.i.i.us.14 - %add.i.i.us.15 = add nsw i32 %mul.i.i, %conv.i.i.us.15 - %idxprom.i.i.us.15 = sext i32 %add.i.i.us.15 to i64 - %arrayidx.i.i.us.15 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.15 - store float 0.000000e+00, float* %arrayidx.i.i.us.15, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.15 - -if.end.i.i.us.15: ; preds = %if.then.i.i.us.15, %if.end.i.i.us.14 - br i1 %cmp4.i.i.us.16, label %if.then.i.i.us.16, label %if.end.i.i.us.16 - -if.then.i.i.us.16: ; preds = %if.end.i.i.us.15 - %add.i.i.us.16 = add nsw i32 %mul.i.i, %conv.i.i.us.16 - %idxprom.i.i.us.16 = sext i32 %add.i.i.us.16 to i64 - %arrayidx.i.i.us.16 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.16 - store float 0.000000e+00, float* %arrayidx.i.i.us.16, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.16 - -if.end.i.i.us.16: ; preds = %if.then.i.i.us.16, %if.end.i.i.us.15 - br i1 %cmp4.i.i.us.17, label %if.then.i.i.us.17, label %if.end.i.i.us.17 - -if.then.i.i.us.17: ; preds = %if.end.i.i.us.16 - %add.i.i.us.17 = add nsw i32 %mul.i.i, %conv.i.i.us.17 - %idxprom.i.i.us.17 = sext i32 %add.i.i.us.17 to i64 - %arrayidx.i.i.us.17 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.17 - store float 0.000000e+00, float* %arrayidx.i.i.us.17, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.17 - -if.end.i.i.us.17: ; preds = %if.then.i.i.us.17, %if.end.i.i.us.16 - br i1 %cmp4.i.i.us.18, label %if.then.i.i.us.18, label %if.end.i.i.us.18 - -if.then.i.i.us.18: ; preds = %if.end.i.i.us.17 - %add.i.i.us.18 = add nsw i32 %mul.i.i, %conv.i.i.us.18 - %idxprom.i.i.us.18 = sext i32 %add.i.i.us.18 to i64 - %arrayidx.i.i.us.18 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.18 - store float 0.000000e+00, float* %arrayidx.i.i.us.18, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.18 - -if.end.i.i.us.18: ; preds = %if.then.i.i.us.18, %if.end.i.i.us.17 - br i1 %cmp4.i.i.us.19, label %if.then.i.i.us.19, label %if.end.i.i.us.19 - -if.then.i.i.us.19: ; preds = %if.end.i.i.us.18 - %add.i.i.us.19 = add nsw i32 %mul.i.i, %conv.i.i.us.19 - %idxprom.i.i.us.19 = sext i32 %add.i.i.us.19 to i64 - %arrayidx.i.i.us.19 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.19 - store float 0.000000e+00, float* %arrayidx.i.i.us.19, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.19 - -if.end.i.i.us.19: ; preds = %if.then.i.i.us.19, %if.end.i.i.us.18 - br i1 %cmp4.i.i.us.20, label %if.then.i.i.us.20, label %if.end.i.i.us.20 - -if.then.i.i.us.20: ; preds = %if.end.i.i.us.19 - %add.i.i.us.20 = add nsw i32 %mul.i.i, %conv.i.i.us.20 - %idxprom.i.i.us.20 = sext i32 %add.i.i.us.20 to i64 - %arrayidx.i.i.us.20 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.20 - store float 0.000000e+00, float* %arrayidx.i.i.us.20, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.20 - -if.end.i.i.us.20: ; preds = %if.then.i.i.us.20, %if.end.i.i.us.19 - br i1 %cmp4.i.i.us.21, label %if.then.i.i.us.21, label %if.end.i.i.us.21 - -if.then.i.i.us.21: ; preds = %if.end.i.i.us.20 - %add.i.i.us.21 = add nsw i32 %mul.i.i, %conv.i.i.us.21 - %idxprom.i.i.us.21 = sext i32 %add.i.i.us.21 to i64 - %arrayidx.i.i.us.21 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.21 - store float 0.000000e+00, float* %arrayidx.i.i.us.21, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.21 - -if.end.i.i.us.21: ; preds = %if.then.i.i.us.21, %if.end.i.i.us.20 - br i1 %cmp4.i.i.us.22, label %if.then.i.i.us.22, label %if.end.i.i.us.22 - -if.then.i.i.us.22: ; preds = %if.end.i.i.us.21 - %add.i.i.us.22 = add nsw i32 %mul.i.i, %conv.i.i.us.22 - %idxprom.i.i.us.22 = sext i32 %add.i.i.us.22 to i64 - %arrayidx.i.i.us.22 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.22 - store float 0.000000e+00, float* %arrayidx.i.i.us.22, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.22 - -if.end.i.i.us.22: ; preds = %if.then.i.i.us.22, %if.end.i.i.us.21 - br i1 %cmp4.i.i.us.23, label %if.then.i.i.us.23, label %if.end.i.i.us.23 - -if.then.i.i.us.23: ; preds = %if.end.i.i.us.22 - %add.i.i.us.23 = add nsw i32 %mul.i.i, %conv.i.i.us.23 - %idxprom.i.i.us.23 = sext i32 %add.i.i.us.23 to i64 - %arrayidx.i.i.us.23 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.23 - store float 0.000000e+00, float* %arrayidx.i.i.us.23, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.23 - -if.end.i.i.us.23: ; preds = %if.then.i.i.us.23, %if.end.i.i.us.22 - br i1 %cmp4.i.i.us.24, label %if.then.i.i.us.24, label %if.end.i.i.us.24 - -if.then.i.i.us.24: ; preds = %if.end.i.i.us.23 - %add.i.i.us.24 = add nsw i32 %mul.i.i, %conv.i.i.us.24 - %idxprom.i.i.us.24 = sext i32 %add.i.i.us.24 to i64 - %arrayidx.i.i.us.24 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.24 - store float 0.000000e+00, float* %arrayidx.i.i.us.24, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.24 - -if.end.i.i.us.24: ; preds = %if.then.i.i.us.24, %if.end.i.i.us.23 - br i1 %cmp4.i.i.us.25, label %if.then.i.i.us.25, label %if.end.i.i.us.25 - -if.then.i.i.us.25: ; preds = %if.end.i.i.us.24 - %add.i.i.us.25 = add nsw i32 %mul.i.i, %conv.i.i.us.25 - %idxprom.i.i.us.25 = sext i32 %add.i.i.us.25 to i64 - %arrayidx.i.i.us.25 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.25 - store float 0.000000e+00, float* %arrayidx.i.i.us.25, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.25 - -if.end.i.i.us.25: ; preds = %if.then.i.i.us.25, %if.end.i.i.us.24 - br i1 %cmp4.i.i.us.26, label %if.then.i.i.us.26, label %if.end.i.i.us.26 - -if.then.i.i.us.26: ; preds = %if.end.i.i.us.25 - %add.i.i.us.26 = add nsw i32 %mul.i.i, %conv.i.i.us.26 - %idxprom.i.i.us.26 = sext i32 %add.i.i.us.26 to i64 - %arrayidx.i.i.us.26 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.26 - store float 0.000000e+00, float* %arrayidx.i.i.us.26, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.26 - -if.end.i.i.us.26: ; preds = %if.then.i.i.us.26, %if.end.i.i.us.25 - br i1 %cmp4.i.i.us.27, label %if.then.i.i.us.27, label %if.end.i.i.us.27 - -if.then.i.i.us.27: ; preds = %if.end.i.i.us.26 - %add.i.i.us.27 = add nsw i32 %mul.i.i, %conv.i.i.us.27 - %idxprom.i.i.us.27 = sext i32 %add.i.i.us.27 to i64 - %arrayidx.i.i.us.27 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.27 - store float 0.000000e+00, float* %arrayidx.i.i.us.27, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.27 - -if.end.i.i.us.27: ; preds = %if.then.i.i.us.27, %if.end.i.i.us.26 - br i1 %cmp4.i.i.us.28, label %if.then.i.i.us.28, label %if.end.i.i.us.28 - -if.then.i.i.us.28: ; preds = %if.end.i.i.us.27 - %add.i.i.us.28 = add nsw i32 %mul.i.i, %conv.i.i.us.28 - %idxprom.i.i.us.28 = sext i32 %add.i.i.us.28 to i64 - %arrayidx.i.i.us.28 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.28 - store float 0.000000e+00, float* %arrayidx.i.i.us.28, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.28 - -if.end.i.i.us.28: ; preds = %if.then.i.i.us.28, %if.end.i.i.us.27 - br i1 %cmp4.i.i.us.29, label %if.then.i.i.us.29, label %if.end.i.i.us.29 - -if.then.i.i.us.29: ; preds = %if.end.i.i.us.28 - %add.i.i.us.29 = add nsw i32 %mul.i.i, %conv.i.i.us.29 - %idxprom.i.i.us.29 = sext i32 %add.i.i.us.29 to i64 - %arrayidx.i.i.us.29 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.29 - store float 0.000000e+00, float* %arrayidx.i.i.us.29, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.29 - -if.end.i.i.us.29: ; preds = %if.then.i.i.us.29, %if.end.i.i.us.28 - br i1 %cmp4.i.i.us.30, label %if.then.i.i.us.30, label %if.end.i.i.us.30 - -if.then.i.i.us.30: ; preds = %if.end.i.i.us.29 - %add.i.i.us.30 = add nsw i32 %mul.i.i, %conv.i.i.us.30 - %idxprom.i.i.us.30 = sext i32 %add.i.i.us.30 to i64 - %arrayidx.i.i.us.30 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.30 - store float 0.000000e+00, float* %arrayidx.i.i.us.30, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.30 - -if.end.i.i.us.30: ; preds = %if.then.i.i.us.30, %if.end.i.i.us.29 - br i1 %cmp4.i.i.us.31, label %if.then.i.i.us.31, label %pregion_for_end.i.i - -if.then.i.i.us.31: ; preds = %if.end.i.i.us.30 - %add.i.i.us.31 = add nsw i32 %mul.i.i, %conv.i.i.us.31 - %idxprom.i.i.us.31 = sext i32 %add.i.i.us.31 to i64 - %arrayidx.i.i.us.31 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.31 - store float 0.000000e+00, float* %arrayidx.i.i.us.31, align 4, !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i - -if.then.i.i.us.us.7.1: ; preds = %if.end.i.i.us.us.7 - %add.i.i.us.us.7.1 = add nsw i32 %mul.i.i.us.7, %conv.i.i.us.us.7.1 - %idxprom.i.i.us.us.7.1 = sext i32 %add.i.i.us.us.7.1 to i64 - %arrayidx.i.i.us.us.7.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.7.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.7.1 = shl i64 %add1.i.i.i.us.us.7.1, 32 - %149 = ashr exact i64 %sext.i.i.us.us.7.1, 32 - br label %for.body.i.i.us.us.7.1 - -for.body.i.i.us.us.7.1: ; preds = %for.body.i.i.us.us.7.1, %if.then.i.i.us.us.7.1 - %indvars.iv.next.i.i3.us.us.7.1 = phi i64 [ %indvars.iv.next.i.i.us.us.7.1, %for.body.i.i.us.us.7.1 ], [ 0, %if.then.i.i.us.us.7.1 ] - %150 = phi float [ %156, %for.body.i.i.us.us.7.1 ], [ 0.000000e+00, %if.then.i.i.us.us.7.1 ] - %151 = add nsw i64 %indvars.iv.next.i.i3.us.us.7.1, %139 - %arrayidx11.i.i.us.us.7.1 = getelementptr inbounds float, float* %8, i64 %151 - %152 = load float, float* %arrayidx11.i.i.us.us.7.1, align 4, !tbaa !12 - %153 = mul nsw i64 %indvars.iv.next.i.i3.us.us.7.1, %29 - %154 = add nsw i64 %153, %149 - %arrayidx15.i.i.us.us.7.1 = getelementptr inbounds float, float* %12, i64 %154 - %155 = load float, float* %arrayidx15.i.i.us.us.7.1, align 4, !tbaa !12 - %156 = tail call float @llvm.fmuladd.f32(float %152, float %155, float %150) #2 - store float %156, float* %arrayidx.i.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.7.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.7.1, 1 - %exitcond.not.i.i.us.us.7.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.7.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.7.1, label %if.end.i.i.us.us.7.1.loopexit, label %for.body.i.i.us.us.7.1, !llvm.loop !19 - -if.end.i.i.us.us.7.1.loopexit: ; preds = %for.body.i.i.us.us.7.1 - br label %if.end.i.i.us.us.7.1 - -if.end.i.i.us.us.7.1: ; preds = %if.end.i.i.us.us.7.1.loopexit, %if.end.i.i.us.us.7 - %157 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %157, 32 - br i1 %exitcond.7.not.1, label %_pocl_kernel_mm3_kernel2.exit.loopexit, label %pregion_for_entry.entry.i.i.us.us.7, !llvm.loop !23 - -if.then.i.i.us.us.6.1: ; preds = %if.end.i.i.us.us.6 - %add.i.i.us.us.6.1 = add nsw i32 %mul.i.i.us.6, %conv.i.i.us.us.6.1 - %idxprom.i.i.us.us.6.1 = sext i32 %add.i.i.us.us.6.1 to i64 - %arrayidx.i.i.us.us.6.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.6.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.6.1 = shl i64 %add1.i.i.i.us.us.6.1, 32 - %158 = ashr exact i64 %sext.i.i.us.us.6.1, 32 - br label %for.body.i.i.us.us.6.1 - -for.body.i.i.us.us.6.1: ; preds = %for.body.i.i.us.us.6.1, %if.then.i.i.us.us.6.1 - %indvars.iv.next.i.i3.us.us.6.1 = phi i64 [ %indvars.iv.next.i.i.us.us.6.1, %for.body.i.i.us.us.6.1 ], [ 0, %if.then.i.i.us.us.6.1 ] - %159 = phi float [ %165, %for.body.i.i.us.us.6.1 ], [ 0.000000e+00, %if.then.i.i.us.us.6.1 ] - %160 = add nsw i64 %indvars.iv.next.i.i3.us.us.6.1, %128 - %arrayidx11.i.i.us.us.6.1 = getelementptr inbounds float, float* %8, i64 %160 - %161 = load float, float* %arrayidx11.i.i.us.us.6.1, align 4, !tbaa !12 - %162 = mul nsw i64 %indvars.iv.next.i.i3.us.us.6.1, %29 - %163 = add nsw i64 %162, %158 - %arrayidx15.i.i.us.us.6.1 = getelementptr inbounds float, float* %12, i64 %163 - %164 = load float, float* %arrayidx15.i.i.us.us.6.1, align 4, !tbaa !12 - %165 = tail call float @llvm.fmuladd.f32(float %161, float %164, float %159) #2 - store float %165, float* %arrayidx.i.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.6.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.6.1, 1 - %exitcond.not.i.i.us.us.6.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.6.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.6.1, label %if.end.i.i.us.us.6.1.loopexit, label %for.body.i.i.us.us.6.1, !llvm.loop !19 - -if.end.i.i.us.us.6.1.loopexit: ; preds = %for.body.i.i.us.us.6.1 - br label %if.end.i.i.us.us.6.1 - -if.end.i.i.us.us.6.1: ; preds = %if.end.i.i.us.us.6.1.loopexit, %if.end.i.i.us.us.6 - %166 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %166, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.i.us.6.loopexit, label %pregion_for_entry.entry.i.i.us.us.6, !llvm.loop !23 - -if.then.i.i.us.us.5.1: ; preds = %if.end.i.i.us.us.5 - %add.i.i.us.us.5.1 = add nsw i32 %mul.i.i.us.5, %conv.i.i.us.us.5.1 - %idxprom.i.i.us.us.5.1 = sext i32 %add.i.i.us.us.5.1 to i64 - %arrayidx.i.i.us.us.5.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.5.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.5.1 = shl i64 %add1.i.i.i.us.us.5.1, 32 - %167 = ashr exact i64 %sext.i.i.us.us.5.1, 32 - br label %for.body.i.i.us.us.5.1 - -for.body.i.i.us.us.5.1: ; preds = %for.body.i.i.us.us.5.1, %if.then.i.i.us.us.5.1 - %indvars.iv.next.i.i3.us.us.5.1 = phi i64 [ %indvars.iv.next.i.i.us.us.5.1, %for.body.i.i.us.us.5.1 ], [ 0, %if.then.i.i.us.us.5.1 ] - %168 = phi float [ %174, %for.body.i.i.us.us.5.1 ], [ 0.000000e+00, %if.then.i.i.us.us.5.1 ] - %169 = add nsw i64 %indvars.iv.next.i.i3.us.us.5.1, %117 - %arrayidx11.i.i.us.us.5.1 = getelementptr inbounds float, float* %8, i64 %169 - %170 = load float, float* %arrayidx11.i.i.us.us.5.1, align 4, !tbaa !12 - %171 = mul nsw i64 %indvars.iv.next.i.i3.us.us.5.1, %29 - %172 = add nsw i64 %171, %167 - %arrayidx15.i.i.us.us.5.1 = getelementptr inbounds float, float* %12, i64 %172 - %173 = load float, float* %arrayidx15.i.i.us.us.5.1, align 4, !tbaa !12 - %174 = tail call float @llvm.fmuladd.f32(float %170, float %173, float %168) #2 - store float %174, float* %arrayidx.i.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.5.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.5.1, 1 - %exitcond.not.i.i.us.us.5.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.5.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.5.1, label %if.end.i.i.us.us.5.1.loopexit, label %for.body.i.i.us.us.5.1, !llvm.loop !19 - -if.end.i.i.us.us.5.1.loopexit: ; preds = %for.body.i.i.us.us.5.1 - br label %if.end.i.i.us.us.5.1 - -if.end.i.i.us.us.5.1: ; preds = %if.end.i.i.us.us.5.1.loopexit, %if.end.i.i.us.us.5 - %175 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %175, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.i.us.5.loopexit, label %pregion_for_entry.entry.i.i.us.us.5, !llvm.loop !23 - -if.then.i.i.us.us.4.1: ; preds = %if.end.i.i.us.us.4 - %add.i.i.us.us.4.1 = add nsw i32 %mul.i.i.us.4, %conv.i.i.us.us.4.1 - %idxprom.i.i.us.us.4.1 = sext i32 %add.i.i.us.us.4.1 to i64 - %arrayidx.i.i.us.us.4.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.4.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.4.1 = shl i64 %add1.i.i.i.us.us.4.1, 32 - %176 = ashr exact i64 %sext.i.i.us.us.4.1, 32 - br label %for.body.i.i.us.us.4.1 - -for.body.i.i.us.us.4.1: ; preds = %for.body.i.i.us.us.4.1, %if.then.i.i.us.us.4.1 - %indvars.iv.next.i.i3.us.us.4.1 = phi i64 [ %indvars.iv.next.i.i.us.us.4.1, %for.body.i.i.us.us.4.1 ], [ 0, %if.then.i.i.us.us.4.1 ] - %177 = phi float [ %183, %for.body.i.i.us.us.4.1 ], [ 0.000000e+00, %if.then.i.i.us.us.4.1 ] - %178 = add nsw i64 %indvars.iv.next.i.i3.us.us.4.1, %106 - %arrayidx11.i.i.us.us.4.1 = getelementptr inbounds float, float* %8, i64 %178 - %179 = load float, float* %arrayidx11.i.i.us.us.4.1, align 4, !tbaa !12 - %180 = mul nsw i64 %indvars.iv.next.i.i3.us.us.4.1, %29 - %181 = add nsw i64 %180, %176 - %arrayidx15.i.i.us.us.4.1 = getelementptr inbounds float, float* %12, i64 %181 - %182 = load float, float* %arrayidx15.i.i.us.us.4.1, align 4, !tbaa !12 - %183 = tail call float @llvm.fmuladd.f32(float %179, float %182, float %177) #2 - store float %183, float* %arrayidx.i.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.4.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.4.1, 1 - %exitcond.not.i.i.us.us.4.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.4.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.4.1, label %if.end.i.i.us.us.4.1.loopexit, label %for.body.i.i.us.us.4.1, !llvm.loop !19 - -if.end.i.i.us.us.4.1.loopexit: ; preds = %for.body.i.i.us.us.4.1 - br label %if.end.i.i.us.us.4.1 - -if.end.i.i.us.us.4.1: ; preds = %if.end.i.i.us.us.4.1.loopexit, %if.end.i.i.us.us.4 - %184 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %184, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.i.us.4.loopexit, label %pregion_for_entry.entry.i.i.us.us.4, !llvm.loop !23 - -if.then.i.i.us.us.3.1: ; preds = %if.end.i.i.us.us.3 - %add.i.i.us.us.3.1 = add nsw i32 %mul.i.i.us.3, %conv.i.i.us.us.3.1 - %idxprom.i.i.us.us.3.1 = sext i32 %add.i.i.us.us.3.1 to i64 - %arrayidx.i.i.us.us.3.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.3.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.3.1 = shl i64 %add1.i.i.i.us.us.3.1, 32 - %185 = ashr exact i64 %sext.i.i.us.us.3.1, 32 - br label %for.body.i.i.us.us.3.1 - -for.body.i.i.us.us.3.1: ; preds = %for.body.i.i.us.us.3.1, %if.then.i.i.us.us.3.1 - %indvars.iv.next.i.i3.us.us.3.1 = phi i64 [ %indvars.iv.next.i.i.us.us.3.1, %for.body.i.i.us.us.3.1 ], [ 0, %if.then.i.i.us.us.3.1 ] - %186 = phi float [ %192, %for.body.i.i.us.us.3.1 ], [ 0.000000e+00, %if.then.i.i.us.us.3.1 ] - %187 = add nsw i64 %indvars.iv.next.i.i3.us.us.3.1, %95 - %arrayidx11.i.i.us.us.3.1 = getelementptr inbounds float, float* %8, i64 %187 - %188 = load float, float* %arrayidx11.i.i.us.us.3.1, align 4, !tbaa !12 - %189 = mul nsw i64 %indvars.iv.next.i.i3.us.us.3.1, %29 - %190 = add nsw i64 %189, %185 - %arrayidx15.i.i.us.us.3.1 = getelementptr inbounds float, float* %12, i64 %190 - %191 = load float, float* %arrayidx15.i.i.us.us.3.1, align 4, !tbaa !12 - %192 = tail call float @llvm.fmuladd.f32(float %188, float %191, float %186) #2 - store float %192, float* %arrayidx.i.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.3.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.3.1, 1 - %exitcond.not.i.i.us.us.3.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.3.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.3.1, label %if.end.i.i.us.us.3.1.loopexit, label %for.body.i.i.us.us.3.1, !llvm.loop !19 - -if.end.i.i.us.us.3.1.loopexit: ; preds = %for.body.i.i.us.us.3.1 - br label %if.end.i.i.us.us.3.1 - -if.end.i.i.us.us.3.1: ; preds = %if.end.i.i.us.us.3.1.loopexit, %if.end.i.i.us.us.3 - %193 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %193, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.i.us.3.loopexit, label %pregion_for_entry.entry.i.i.us.us.3, !llvm.loop !23 - -if.then.i.i.us.us.2.1: ; preds = %if.end.i.i.us.us.2 - %add.i.i.us.us.2.1 = add nsw i32 %mul.i.i.us.2, %conv.i.i.us.us.2.1 - %idxprom.i.i.us.us.2.1 = sext i32 %add.i.i.us.us.2.1 to i64 - %arrayidx.i.i.us.us.2.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.2.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.2.1 = shl i64 %add1.i.i.i.us.us.2.1, 32 - %194 = ashr exact i64 %sext.i.i.us.us.2.1, 32 - br label %for.body.i.i.us.us.2.1 - -for.body.i.i.us.us.2.1: ; preds = %for.body.i.i.us.us.2.1, %if.then.i.i.us.us.2.1 - %indvars.iv.next.i.i3.us.us.2.1 = phi i64 [ %indvars.iv.next.i.i.us.us.2.1, %for.body.i.i.us.us.2.1 ], [ 0, %if.then.i.i.us.us.2.1 ] - %195 = phi float [ %201, %for.body.i.i.us.us.2.1 ], [ 0.000000e+00, %if.then.i.i.us.us.2.1 ] - %196 = add nsw i64 %indvars.iv.next.i.i3.us.us.2.1, %84 - %arrayidx11.i.i.us.us.2.1 = getelementptr inbounds float, float* %8, i64 %196 - %197 = load float, float* %arrayidx11.i.i.us.us.2.1, align 4, !tbaa !12 - %198 = mul nsw i64 %indvars.iv.next.i.i3.us.us.2.1, %29 - %199 = add nsw i64 %198, %194 - %arrayidx15.i.i.us.us.2.1 = getelementptr inbounds float, float* %12, i64 %199 - %200 = load float, float* %arrayidx15.i.i.us.us.2.1, align 4, !tbaa !12 - %201 = tail call float @llvm.fmuladd.f32(float %197, float %200, float %195) #2 - store float %201, float* %arrayidx.i.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.2.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.2.1, 1 - %exitcond.not.i.i.us.us.2.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.2.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.2.1, label %if.end.i.i.us.us.2.1.loopexit, label %for.body.i.i.us.us.2.1, !llvm.loop !19 - -if.end.i.i.us.us.2.1.loopexit: ; preds = %for.body.i.i.us.us.2.1 - br label %if.end.i.i.us.us.2.1 - -if.end.i.i.us.us.2.1: ; preds = %if.end.i.i.us.us.2.1.loopexit, %if.end.i.i.us.us.2 - %202 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %202, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.i.us.2.loopexit, label %pregion_for_entry.entry.i.i.us.us.2, !llvm.loop !23 - -if.then.i.i.us.us.1.1: ; preds = %if.end.i.i.us.us.1 - %add.i.i.us.us.1.1 = add nsw i32 %mul.i.i.us.1, %conv.i.i.us.us.1.1 - %idxprom.i.i.us.us.1.1 = sext i32 %add.i.i.us.us.1.1 to i64 - %arrayidx.i.i.us.us.1.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.1.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.1.1 = shl i64 %add1.i.i.i.us.us.1.1, 32 - %203 = ashr exact i64 %sext.i.i.us.us.1.1, 32 - br label %for.body.i.i.us.us.1.1 - -for.body.i.i.us.us.1.1: ; preds = %for.body.i.i.us.us.1.1, %if.then.i.i.us.us.1.1 - %indvars.iv.next.i.i3.us.us.1.1 = phi i64 [ %indvars.iv.next.i.i.us.us.1.1, %for.body.i.i.us.us.1.1 ], [ 0, %if.then.i.i.us.us.1.1 ] - %204 = phi float [ %210, %for.body.i.i.us.us.1.1 ], [ 0.000000e+00, %if.then.i.i.us.us.1.1 ] - %205 = add nsw i64 %indvars.iv.next.i.i3.us.us.1.1, %63 - %arrayidx11.i.i.us.us.1.1 = getelementptr inbounds float, float* %8, i64 %205 - %206 = load float, float* %arrayidx11.i.i.us.us.1.1, align 4, !tbaa !12 - %207 = mul nsw i64 %indvars.iv.next.i.i3.us.us.1.1, %29 - %208 = add nsw i64 %207, %203 - %arrayidx15.i.i.us.us.1.1 = getelementptr inbounds float, float* %12, i64 %208 - %209 = load float, float* %arrayidx15.i.i.us.us.1.1, align 4, !tbaa !12 - %210 = tail call float @llvm.fmuladd.f32(float %206, float %209, float %204) #2 - store float %210, float* %arrayidx.i.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.1.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.1.1, 1 - %exitcond.not.i.i.us.us.1.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.1.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.1.1, label %if.end.i.i.us.us.1.1.loopexit, label %for.body.i.i.us.us.1.1, !llvm.loop !19 - -if.end.i.i.us.us.1.1.loopexit: ; preds = %for.body.i.i.us.us.1.1 - br label %if.end.i.i.us.us.1.1 - -if.end.i.i.us.us.1.1: ; preds = %if.end.i.i.us.us.1.1.loopexit, %if.end.i.i.us.us.1 - %211 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %211, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.i.us.1.loopexit, label %pregion_for_entry.entry.i.i.us.us.1, !llvm.loop !23 - -if.then.i.i.us.us.146: ; preds = %if.end.i.i.us.us - %add.i.i.us.us.142 = add nsw i32 %mul.i.i.us, %conv.i.i.us.us.139 - %idxprom.i.i.us.us.143 = sext i32 %add.i.i.us.us.142 to i64 - %arrayidx.i.i.us.us.144 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.143 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.144, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.145 = shl i64 %add1.i.i.i.us.us.138, 32 - %212 = ashr exact i64 %sext.i.i.us.us.145, 32 - br label %for.body.i.i.us.us.152 - -for.body.i.i.us.us.152: ; preds = %for.body.i.i.us.us.152, %if.then.i.i.us.us.146 - %indvars.iv.next.i.i3.us.us.147 = phi i64 [ %indvars.iv.next.i.i.us.us.150, %for.body.i.i.us.us.152 ], [ 0, %if.then.i.i.us.us.146 ] - %213 = phi float [ %219, %for.body.i.i.us.us.152 ], [ 0.000000e+00, %if.then.i.i.us.us.146 ] - %214 = add nsw i64 %indvars.iv.next.i.i3.us.us.147, %61 - %arrayidx11.i.i.us.us.148 = getelementptr inbounds float, float* %8, i64 %214 - %215 = load float, float* %arrayidx11.i.i.us.us.148, align 4, !tbaa !12 - %216 = mul nsw i64 %indvars.iv.next.i.i3.us.us.147, %29 - %217 = add nsw i64 %216, %212 - %arrayidx15.i.i.us.us.149 = getelementptr inbounds float, float* %12, i64 %217 - %218 = load float, float* %arrayidx15.i.i.us.us.149, align 4, !tbaa !12 - %219 = tail call float @llvm.fmuladd.f32(float %215, float %218, float %213) #2 - store float %219, float* %arrayidx.i.i.us.us.144, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.150 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.147, 1 - %exitcond.not.i.i.us.us.151 = icmp eq i64 %indvars.iv.next.i.i.us.us.150, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.151, label %if.end.i.i.us.us.153.loopexit, label %for.body.i.i.us.us.152, !llvm.loop !19 - -if.end.i.i.us.us.153.loopexit: ; preds = %for.body.i.i.us.us.152 - br label %if.end.i.i.us.us.153 - -if.end.i.i.us.us.153: ; preds = %if.end.i.i.us.us.153.loopexit, %if.end.i.i.us.us - %220 = add nuw nsw i64 %_local_id_x.i.0.us.us, 2 - %exitcond.not.1 = icmp eq i64 %220, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.us.loopexit, label %pregion_for_entry.entry.i.i.us.us, !llvm.loop !23 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_mm3_kernel2_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to float** - %13 = load float*, float** %12, align 8 - %14 = getelementptr i8*, i8** %0, i64 3 - %15 = bitcast i8** %14 to i32** - %16 = load i32*, i32** %15, align 8 - %17 = load i32, i32* %16, align 4 - %18 = getelementptr i8*, i8** %0, i64 4 - %19 = bitcast i8** %18 to i32** - %20 = load i32*, i32** %19, align 8 - %21 = load i32, i32* %20, align 4 - %22 = getelementptr i8*, i8** %0, i64 5 - %23 = bitcast i8** %22 to i32** - %24 = load i32*, i32** %23, align 8 - %25 = load i32, i32* %24, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %cmp638.i.i = icmp sgt i32 %25, 0 - %26 = sext i32 %21 to i64 - %wide.trip.count.i.i = zext i32 %25 to i64 - br i1 %cmp638.i.i, label %pregion_for_entry.pregion_for_init.i.i.us.preheader, label %pregion_for_entry.pregion_for_init.i.i.preheader - -pregion_for_entry.pregion_for_init.i.i.preheader: ; preds = %5 - %conv.i.i.us = trunc i64 %mul.i.i.i to i32 - %cmp4.i.i.us = icmp sgt i32 %21, %conv.i.i.us - %27 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.1 = or i32 %27, 1 - %cmp4.i.i.us.1 = icmp sgt i32 %21, %conv.i.i.us.1 - %28 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.2 = or i32 %28, 2 - %cmp4.i.i.us.2 = icmp sgt i32 %21, %conv.i.i.us.2 - %29 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.3 = or i32 %29, 3 - %cmp4.i.i.us.3 = icmp sgt i32 %21, %conv.i.i.us.3 - %30 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.4 = or i32 %30, 4 - %cmp4.i.i.us.4 = icmp sgt i32 %21, %conv.i.i.us.4 - %31 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.5 = or i32 %31, 5 - %cmp4.i.i.us.5 = icmp sgt i32 %21, %conv.i.i.us.5 - %32 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.6 = or i32 %32, 6 - %cmp4.i.i.us.6 = icmp sgt i32 %21, %conv.i.i.us.6 - %33 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.7 = or i32 %33, 7 - %cmp4.i.i.us.7 = icmp sgt i32 %21, %conv.i.i.us.7 - %34 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.8 = or i32 %34, 8 - %cmp4.i.i.us.8 = icmp sgt i32 %21, %conv.i.i.us.8 - %35 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.9 = or i32 %35, 9 - %cmp4.i.i.us.9 = icmp sgt i32 %21, %conv.i.i.us.9 - %36 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.10 = or i32 %36, 10 - %cmp4.i.i.us.10 = icmp sgt i32 %21, %conv.i.i.us.10 - %37 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.11 = or i32 %37, 11 - %cmp4.i.i.us.11 = icmp sgt i32 %21, %conv.i.i.us.11 - %38 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.12 = or i32 %38, 12 - %cmp4.i.i.us.12 = icmp sgt i32 %21, %conv.i.i.us.12 - %39 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.13 = or i32 %39, 13 - %cmp4.i.i.us.13 = icmp sgt i32 %21, %conv.i.i.us.13 - %40 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.14 = or i32 %40, 14 - %cmp4.i.i.us.14 = icmp sgt i32 %21, %conv.i.i.us.14 - %41 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.15 = or i32 %41, 15 - %cmp4.i.i.us.15 = icmp sgt i32 %21, %conv.i.i.us.15 - %42 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.16 = or i32 %42, 16 - %cmp4.i.i.us.16 = icmp sgt i32 %21, %conv.i.i.us.16 - %43 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.17 = or i32 %43, 17 - %cmp4.i.i.us.17 = icmp sgt i32 %21, %conv.i.i.us.17 - %44 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.18 = or i32 %44, 18 - %cmp4.i.i.us.18 = icmp sgt i32 %21, %conv.i.i.us.18 - %45 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.19 = or i32 %45, 19 - %cmp4.i.i.us.19 = icmp sgt i32 %21, %conv.i.i.us.19 - %46 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.20 = or i32 %46, 20 - %cmp4.i.i.us.20 = icmp sgt i32 %21, %conv.i.i.us.20 - %47 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.21 = or i32 %47, 21 - %cmp4.i.i.us.21 = icmp sgt i32 %21, %conv.i.i.us.21 - %48 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.22 = or i32 %48, 22 - %cmp4.i.i.us.22 = icmp sgt i32 %21, %conv.i.i.us.22 - %49 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.23 = or i32 %49, 23 - %cmp4.i.i.us.23 = icmp sgt i32 %21, %conv.i.i.us.23 - %50 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.24 = or i32 %50, 24 - %cmp4.i.i.us.24 = icmp sgt i32 %21, %conv.i.i.us.24 - %51 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.25 = or i32 %51, 25 - %cmp4.i.i.us.25 = icmp sgt i32 %21, %conv.i.i.us.25 - %52 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.26 = or i32 %52, 26 - %cmp4.i.i.us.26 = icmp sgt i32 %21, %conv.i.i.us.26 - %53 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.27 = or i32 %53, 27 - %cmp4.i.i.us.27 = icmp sgt i32 %21, %conv.i.i.us.27 - %54 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.28 = or i32 %54, 28 - %cmp4.i.i.us.28 = icmp sgt i32 %21, %conv.i.i.us.28 - %55 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.29 = or i32 %55, 29 - %cmp4.i.i.us.29 = icmp sgt i32 %21, %conv.i.i.us.29 - %56 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.30 = or i32 %56, 30 - %cmp4.i.i.us.30 = icmp sgt i32 %21, %conv.i.i.us.30 - %57 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.31 = or i32 %57, 31 - %cmp4.i.i.us.31 = icmp sgt i32 %21, %conv.i.i.us.31 - br label %pregion_for_entry.pregion_for_init.i.i - -pregion_for_entry.pregion_for_init.i.i.us.preheader: ; preds = %5 - %conv2.i.i.us = trunc i64 %mul3.i.i.i to i32 - %cmp.i.i.us = icmp sgt i32 %17, %conv2.i.i.us - %mul.i.i.us = mul nsw i32 %21, %conv2.i.i.us - %mul8.i.i.us = mul nsw i32 %25, %conv2.i.i.us - %58 = sext i32 %mul8.i.i.us to i64 - br i1 %cmp.i.i.us, label %pregion_for_entry.entry.i.i.us.us.preheader, label %pregion_for_end.i.i.us - -pregion_for_entry.entry.i.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i.us.preheader - br label %pregion_for_entry.entry.i.i.us.us - -pregion_for_end.i.i.us.loopexit: ; preds = %if.end.i.i.us.us.153 - br label %pregion_for_end.i.i.us - -pregion_for_end.i.i.us: ; preds = %pregion_for_end.i.i.us.loopexit, %pregion_for_entry.pregion_for_init.i.i.us.preheader - %59 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.1 = or i32 %59, 1 - %cmp.i.i.us.1 = icmp sgt i32 %17, %conv2.i.i.us.1 - %mul.i.i.us.1 = mul nsw i32 %21, %conv2.i.i.us.1 - %mul8.i.i.us.1 = mul nsw i32 %25, %conv2.i.i.us.1 - %60 = sext i32 %mul8.i.i.us.1 to i64 - br i1 %cmp.i.i.us.1, label %pregion_for_entry.entry.i.i.us.us.1.preheader, label %pregion_for_end.i.i.us.1 - -pregion_for_entry.entry.i.i.us.us.1.preheader: ; preds = %pregion_for_end.i.i.us - br label %pregion_for_entry.entry.i.i.us.us.1 - -pregion_for_entry.entry.i.i.us.us: ; preds = %if.end.i.i.us.us.153, %pregion_for_entry.entry.i.i.us.us.preheader - %_local_id_x.i.0.us.us = phi i64 [ %217, %if.end.i.i.us.us.153 ], [ 0, %pregion_for_entry.entry.i.i.us.us.preheader ] - %add1.i.i.i.us.us = add nuw nsw i64 %_local_id_x.i.0.us.us, %mul.i.i.i - %conv.i.i.us.us = trunc i64 %add1.i.i.i.us.us to i32 - %cmp4.i.i.us.us = icmp sgt i32 %21, %conv.i.i.us.us - br i1 %cmp4.i.i.us.us, label %if.then.i.i.us.us, label %if.end.i.i.us.us - -if.then.i.i.us.us: ; preds = %pregion_for_entry.entry.i.i.us.us - %add.i.i.us.us = add nsw i32 %mul.i.i.us, %conv.i.i.us.us - %idxprom.i.i.us.us = sext i32 %add.i.i.us.us to i64 - %arrayidx.i.i.us.us = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us - store float 0.000000e+00, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us = shl i64 %add1.i.i.i.us.us, 32 - %61 = ashr exact i64 %sext.i.i.us.us, 32 - br label %for.body.i.i.us.us - -if.end.i.i.us.us.loopexit: ; preds = %for.body.i.i.us.us - br label %if.end.i.i.us.us - -if.end.i.i.us.us: ; preds = %if.end.i.i.us.us.loopexit, %pregion_for_entry.entry.i.i.us.us - %62 = or i64 %_local_id_x.i.0.us.us, 1 - %add1.i.i.i.us.us.138 = add nuw nsw i64 %62, %mul.i.i.i - %conv.i.i.us.us.139 = trunc i64 %add1.i.i.i.us.us.138 to i32 - %cmp4.i.i.us.us.140 = icmp sgt i32 %21, %conv.i.i.us.us.139 - br i1 %cmp4.i.i.us.us.140, label %if.then.i.i.us.us.146, label %if.end.i.i.us.us.153 - -for.body.i.i.us.us: ; preds = %for.body.i.i.us.us, %if.then.i.i.us.us - %indvars.iv.next.i.i3.us.us = phi i64 [ %indvars.iv.next.i.i.us.us, %for.body.i.i.us.us ], [ 0, %if.then.i.i.us.us ] - %63 = phi float [ %69, %for.body.i.i.us.us ], [ 0.000000e+00, %if.then.i.i.us.us ] - %64 = add nsw i64 %indvars.iv.next.i.i3.us.us, %58 - %arrayidx11.i.i.us.us = getelementptr inbounds float, float* %7, i64 %64 - %65 = load float, float* %arrayidx11.i.i.us.us, align 4, !tbaa !12 - %66 = mul nsw i64 %indvars.iv.next.i.i3.us.us, %26 - %67 = add nsw i64 %66, %61 - %arrayidx15.i.i.us.us = getelementptr inbounds float, float* %10, i64 %67 - %68 = load float, float* %arrayidx15.i.i.us.us, align 4, !tbaa !12 - %69 = tail call float @llvm.fmuladd.f32(float %65, float %68, float %63) #2 - store float %69, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us = add nuw nsw i64 %indvars.iv.next.i.i3.us.us, 1 - %exitcond.not.i.i.us.us = icmp eq i64 %indvars.iv.next.i.i.us.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us, label %if.end.i.i.us.us.loopexit, label %for.body.i.i.us.us, !llvm.loop !19 - -pregion_for_entry.pregion_for_init.i.i: ; preds = %pregion_for_end.i.i, %pregion_for_entry.pregion_for_init.i.i.preheader - %_local_id_y.i.0 = phi i64 [ %70, %pregion_for_end.i.i ], [ 0, %pregion_for_entry.pregion_for_init.i.i.preheader ] - %add6.i.i.i = add nuw nsw i64 %_local_id_y.i.0, %mul3.i.i.i - %conv2.i.i = trunc i64 %add6.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %17, %conv2.i.i - %mul.i.i = mul nsw i32 %21, %conv2.i.i - br i1 %cmp.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i - br i1 %cmp4.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us.preheader - %add.i.i.us = add nsw i32 %mul.i.i, %conv.i.i.us - %idxprom.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us - store float 0.000000e+00, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - br i1 %cmp4.i.i.us.1, label %if.then.i.i.us.1, label %if.end.i.i.us.1 - -pregion_for_end.i.i: ; preds = %if.then.i.i.us.31, %if.end.i.i.us.30, %pregion_for_entry.pregion_for_init.i.i - %70 = add nuw nsw i64 %_local_id_y.i.0, 1 - %exitcond33.not = icmp eq i64 %70, 8 - br i1 %exitcond33.not, label %_pocl_kernel_mm3_kernel2.exit.loopexit54, label %pregion_for_entry.pregion_for_init.i.i, !llvm.loop !21 - -_pocl_kernel_mm3_kernel2.exit.loopexit: ; preds = %if.end.i.i.us.us.7.1 - br label %_pocl_kernel_mm3_kernel2.exit - -_pocl_kernel_mm3_kernel2.exit.loopexit54: ; preds = %pregion_for_end.i.i - br label %_pocl_kernel_mm3_kernel2.exit - -_pocl_kernel_mm3_kernel2.exit: ; preds = %pregion_for_end.i.i.us.6, %_pocl_kernel_mm3_kernel2.exit.loopexit54, %_pocl_kernel_mm3_kernel2.exit.loopexit - ret void - -pregion_for_entry.entry.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.1, %pregion_for_entry.entry.i.i.us.us.1.preheader - %_local_id_x.i.0.us.us.1 = phi i64 [ %208, %if.end.i.i.us.us.1.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.1.preheader ] - %add1.i.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, %mul.i.i.i - %conv.i.i.us.us.1 = trunc i64 %add1.i.i.i.us.us.1 to i32 - %cmp4.i.i.us.us.1 = icmp sgt i32 %21, %conv.i.i.us.us.1 - br i1 %cmp4.i.i.us.us.1, label %if.then.i.i.us.us.1, label %if.end.i.i.us.us.1 - -if.then.i.i.us.us.1: ; preds = %pregion_for_entry.entry.i.i.us.us.1 - %add.i.i.us.us.1 = add nsw i32 %mul.i.i.us.1, %conv.i.i.us.us.1 - %idxprom.i.i.us.us.1 = sext i32 %add.i.i.us.us.1 to i64 - %arrayidx.i.i.us.us.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.1 = shl i64 %add1.i.i.i.us.us.1, 32 - %71 = ashr exact i64 %sext.i.i.us.us.1, 32 - br label %for.body.i.i.us.us.1 - -for.body.i.i.us.us.1: ; preds = %for.body.i.i.us.us.1, %if.then.i.i.us.us.1 - %indvars.iv.next.i.i3.us.us.1 = phi i64 [ %indvars.iv.next.i.i.us.us.1, %for.body.i.i.us.us.1 ], [ 0, %if.then.i.i.us.us.1 ] - %72 = phi float [ %78, %for.body.i.i.us.us.1 ], [ 0.000000e+00, %if.then.i.i.us.us.1 ] - %73 = add nsw i64 %indvars.iv.next.i.i3.us.us.1, %60 - %arrayidx11.i.i.us.us.1 = getelementptr inbounds float, float* %7, i64 %73 - %74 = load float, float* %arrayidx11.i.i.us.us.1, align 4, !tbaa !12 - %75 = mul nsw i64 %indvars.iv.next.i.i3.us.us.1, %26 - %76 = add nsw i64 %75, %71 - %arrayidx15.i.i.us.us.1 = getelementptr inbounds float, float* %10, i64 %76 - %77 = load float, float* %arrayidx15.i.i.us.us.1, align 4, !tbaa !12 - %78 = tail call float @llvm.fmuladd.f32(float %74, float %77, float %72) #2 - store float %78, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.1, 1 - %exitcond.not.i.i.us.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.1, label %if.end.i.i.us.us.1.loopexit, label %for.body.i.i.us.us.1, !llvm.loop !19 - -if.end.i.i.us.us.1.loopexit: ; preds = %for.body.i.i.us.us.1 - br label %if.end.i.i.us.us.1 - -if.end.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.loopexit, %pregion_for_entry.entry.i.i.us.us.1 - %79 = or i64 %_local_id_x.i.0.us.us.1, 1 - %add1.i.i.i.us.us.1.1 = add nuw nsw i64 %79, %mul.i.i.i - %conv.i.i.us.us.1.1 = trunc i64 %add1.i.i.i.us.us.1.1 to i32 - %cmp4.i.i.us.us.1.1 = icmp sgt i32 %21, %conv.i.i.us.us.1.1 - br i1 %cmp4.i.i.us.us.1.1, label %if.then.i.i.us.us.1.1, label %if.end.i.i.us.us.1.1 - -pregion_for_end.i.i.us.1.loopexit: ; preds = %if.end.i.i.us.us.1.1 - br label %pregion_for_end.i.i.us.1 - -pregion_for_end.i.i.us.1: ; preds = %pregion_for_end.i.i.us.1.loopexit, %pregion_for_end.i.i.us - %80 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.2 = or i32 %80, 2 - %cmp.i.i.us.2 = icmp sgt i32 %17, %conv2.i.i.us.2 - %mul.i.i.us.2 = mul nsw i32 %21, %conv2.i.i.us.2 - %mul8.i.i.us.2 = mul nsw i32 %25, %conv2.i.i.us.2 - %81 = sext i32 %mul8.i.i.us.2 to i64 - br i1 %cmp.i.i.us.2, label %pregion_for_entry.entry.i.i.us.us.2.preheader, label %pregion_for_end.i.i.us.2 - -pregion_for_entry.entry.i.i.us.us.2.preheader: ; preds = %pregion_for_end.i.i.us.1 - br label %pregion_for_entry.entry.i.i.us.us.2 - -pregion_for_entry.entry.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.1, %pregion_for_entry.entry.i.i.us.us.2.preheader - %_local_id_x.i.0.us.us.2 = phi i64 [ %199, %if.end.i.i.us.us.2.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.2.preheader ] - %add1.i.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, %mul.i.i.i - %conv.i.i.us.us.2 = trunc i64 %add1.i.i.i.us.us.2 to i32 - %cmp4.i.i.us.us.2 = icmp sgt i32 %21, %conv.i.i.us.us.2 - br i1 %cmp4.i.i.us.us.2, label %if.then.i.i.us.us.2, label %if.end.i.i.us.us.2 - -if.then.i.i.us.us.2: ; preds = %pregion_for_entry.entry.i.i.us.us.2 - %add.i.i.us.us.2 = add nsw i32 %mul.i.i.us.2, %conv.i.i.us.us.2 - %idxprom.i.i.us.us.2 = sext i32 %add.i.i.us.us.2 to i64 - %arrayidx.i.i.us.us.2 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.2 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.2 = shl i64 %add1.i.i.i.us.us.2, 32 - %82 = ashr exact i64 %sext.i.i.us.us.2, 32 - br label %for.body.i.i.us.us.2 - -for.body.i.i.us.us.2: ; preds = %for.body.i.i.us.us.2, %if.then.i.i.us.us.2 - %indvars.iv.next.i.i3.us.us.2 = phi i64 [ %indvars.iv.next.i.i.us.us.2, %for.body.i.i.us.us.2 ], [ 0, %if.then.i.i.us.us.2 ] - %83 = phi float [ %89, %for.body.i.i.us.us.2 ], [ 0.000000e+00, %if.then.i.i.us.us.2 ] - %84 = add nsw i64 %indvars.iv.next.i.i3.us.us.2, %81 - %arrayidx11.i.i.us.us.2 = getelementptr inbounds float, float* %7, i64 %84 - %85 = load float, float* %arrayidx11.i.i.us.us.2, align 4, !tbaa !12 - %86 = mul nsw i64 %indvars.iv.next.i.i3.us.us.2, %26 - %87 = add nsw i64 %86, %82 - %arrayidx15.i.i.us.us.2 = getelementptr inbounds float, float* %10, i64 %87 - %88 = load float, float* %arrayidx15.i.i.us.us.2, align 4, !tbaa !12 - %89 = tail call float @llvm.fmuladd.f32(float %85, float %88, float %83) #2 - store float %89, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.2 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.2, 1 - %exitcond.not.i.i.us.us.2 = icmp eq i64 %indvars.iv.next.i.i.us.us.2, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.2, label %if.end.i.i.us.us.2.loopexit, label %for.body.i.i.us.us.2, !llvm.loop !19 - -if.end.i.i.us.us.2.loopexit: ; preds = %for.body.i.i.us.us.2 - br label %if.end.i.i.us.us.2 - -if.end.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.loopexit, %pregion_for_entry.entry.i.i.us.us.2 - %90 = or i64 %_local_id_x.i.0.us.us.2, 1 - %add1.i.i.i.us.us.2.1 = add nuw nsw i64 %90, %mul.i.i.i - %conv.i.i.us.us.2.1 = trunc i64 %add1.i.i.i.us.us.2.1 to i32 - %cmp4.i.i.us.us.2.1 = icmp sgt i32 %21, %conv.i.i.us.us.2.1 - br i1 %cmp4.i.i.us.us.2.1, label %if.then.i.i.us.us.2.1, label %if.end.i.i.us.us.2.1 - -pregion_for_end.i.i.us.2.loopexit: ; preds = %if.end.i.i.us.us.2.1 - br label %pregion_for_end.i.i.us.2 - -pregion_for_end.i.i.us.2: ; preds = %pregion_for_end.i.i.us.2.loopexit, %pregion_for_end.i.i.us.1 - %91 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.3 = or i32 %91, 3 - %cmp.i.i.us.3 = icmp sgt i32 %17, %conv2.i.i.us.3 - %mul.i.i.us.3 = mul nsw i32 %21, %conv2.i.i.us.3 - %mul8.i.i.us.3 = mul nsw i32 %25, %conv2.i.i.us.3 - %92 = sext i32 %mul8.i.i.us.3 to i64 - br i1 %cmp.i.i.us.3, label %pregion_for_entry.entry.i.i.us.us.3.preheader, label %pregion_for_end.i.i.us.3 - -pregion_for_entry.entry.i.i.us.us.3.preheader: ; preds = %pregion_for_end.i.i.us.2 - br label %pregion_for_entry.entry.i.i.us.us.3 - -pregion_for_entry.entry.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.1, %pregion_for_entry.entry.i.i.us.us.3.preheader - %_local_id_x.i.0.us.us.3 = phi i64 [ %190, %if.end.i.i.us.us.3.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.3.preheader ] - %add1.i.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, %mul.i.i.i - %conv.i.i.us.us.3 = trunc i64 %add1.i.i.i.us.us.3 to i32 - %cmp4.i.i.us.us.3 = icmp sgt i32 %21, %conv.i.i.us.us.3 - br i1 %cmp4.i.i.us.us.3, label %if.then.i.i.us.us.3, label %if.end.i.i.us.us.3 - -if.then.i.i.us.us.3: ; preds = %pregion_for_entry.entry.i.i.us.us.3 - %add.i.i.us.us.3 = add nsw i32 %mul.i.i.us.3, %conv.i.i.us.us.3 - %idxprom.i.i.us.us.3 = sext i32 %add.i.i.us.us.3 to i64 - %arrayidx.i.i.us.us.3 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.3 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.3 = shl i64 %add1.i.i.i.us.us.3, 32 - %93 = ashr exact i64 %sext.i.i.us.us.3, 32 - br label %for.body.i.i.us.us.3 - -for.body.i.i.us.us.3: ; preds = %for.body.i.i.us.us.3, %if.then.i.i.us.us.3 - %indvars.iv.next.i.i3.us.us.3 = phi i64 [ %indvars.iv.next.i.i.us.us.3, %for.body.i.i.us.us.3 ], [ 0, %if.then.i.i.us.us.3 ] - %94 = phi float [ %100, %for.body.i.i.us.us.3 ], [ 0.000000e+00, %if.then.i.i.us.us.3 ] - %95 = add nsw i64 %indvars.iv.next.i.i3.us.us.3, %92 - %arrayidx11.i.i.us.us.3 = getelementptr inbounds float, float* %7, i64 %95 - %96 = load float, float* %arrayidx11.i.i.us.us.3, align 4, !tbaa !12 - %97 = mul nsw i64 %indvars.iv.next.i.i3.us.us.3, %26 - %98 = add nsw i64 %97, %93 - %arrayidx15.i.i.us.us.3 = getelementptr inbounds float, float* %10, i64 %98 - %99 = load float, float* %arrayidx15.i.i.us.us.3, align 4, !tbaa !12 - %100 = tail call float @llvm.fmuladd.f32(float %96, float %99, float %94) #2 - store float %100, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.3 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.3, 1 - %exitcond.not.i.i.us.us.3 = icmp eq i64 %indvars.iv.next.i.i.us.us.3, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.3, label %if.end.i.i.us.us.3.loopexit, label %for.body.i.i.us.us.3, !llvm.loop !19 - -if.end.i.i.us.us.3.loopexit: ; preds = %for.body.i.i.us.us.3 - br label %if.end.i.i.us.us.3 - -if.end.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.loopexit, %pregion_for_entry.entry.i.i.us.us.3 - %101 = or i64 %_local_id_x.i.0.us.us.3, 1 - %add1.i.i.i.us.us.3.1 = add nuw nsw i64 %101, %mul.i.i.i - %conv.i.i.us.us.3.1 = trunc i64 %add1.i.i.i.us.us.3.1 to i32 - %cmp4.i.i.us.us.3.1 = icmp sgt i32 %21, %conv.i.i.us.us.3.1 - br i1 %cmp4.i.i.us.us.3.1, label %if.then.i.i.us.us.3.1, label %if.end.i.i.us.us.3.1 - -pregion_for_end.i.i.us.3.loopexit: ; preds = %if.end.i.i.us.us.3.1 - br label %pregion_for_end.i.i.us.3 - -pregion_for_end.i.i.us.3: ; preds = %pregion_for_end.i.i.us.3.loopexit, %pregion_for_end.i.i.us.2 - %102 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.4 = or i32 %102, 4 - %cmp.i.i.us.4 = icmp sgt i32 %17, %conv2.i.i.us.4 - %mul.i.i.us.4 = mul nsw i32 %21, %conv2.i.i.us.4 - %mul8.i.i.us.4 = mul nsw i32 %25, %conv2.i.i.us.4 - %103 = sext i32 %mul8.i.i.us.4 to i64 - br i1 %cmp.i.i.us.4, label %pregion_for_entry.entry.i.i.us.us.4.preheader, label %pregion_for_end.i.i.us.4 - -pregion_for_entry.entry.i.i.us.us.4.preheader: ; preds = %pregion_for_end.i.i.us.3 - br label %pregion_for_entry.entry.i.i.us.us.4 - -pregion_for_entry.entry.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.1, %pregion_for_entry.entry.i.i.us.us.4.preheader - %_local_id_x.i.0.us.us.4 = phi i64 [ %181, %if.end.i.i.us.us.4.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.4.preheader ] - %add1.i.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, %mul.i.i.i - %conv.i.i.us.us.4 = trunc i64 %add1.i.i.i.us.us.4 to i32 - %cmp4.i.i.us.us.4 = icmp sgt i32 %21, %conv.i.i.us.us.4 - br i1 %cmp4.i.i.us.us.4, label %if.then.i.i.us.us.4, label %if.end.i.i.us.us.4 - -if.then.i.i.us.us.4: ; preds = %pregion_for_entry.entry.i.i.us.us.4 - %add.i.i.us.us.4 = add nsw i32 %mul.i.i.us.4, %conv.i.i.us.us.4 - %idxprom.i.i.us.us.4 = sext i32 %add.i.i.us.us.4 to i64 - %arrayidx.i.i.us.us.4 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.4 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.4 = shl i64 %add1.i.i.i.us.us.4, 32 - %104 = ashr exact i64 %sext.i.i.us.us.4, 32 - br label %for.body.i.i.us.us.4 - -for.body.i.i.us.us.4: ; preds = %for.body.i.i.us.us.4, %if.then.i.i.us.us.4 - %indvars.iv.next.i.i3.us.us.4 = phi i64 [ %indvars.iv.next.i.i.us.us.4, %for.body.i.i.us.us.4 ], [ 0, %if.then.i.i.us.us.4 ] - %105 = phi float [ %111, %for.body.i.i.us.us.4 ], [ 0.000000e+00, %if.then.i.i.us.us.4 ] - %106 = add nsw i64 %indvars.iv.next.i.i3.us.us.4, %103 - %arrayidx11.i.i.us.us.4 = getelementptr inbounds float, float* %7, i64 %106 - %107 = load float, float* %arrayidx11.i.i.us.us.4, align 4, !tbaa !12 - %108 = mul nsw i64 %indvars.iv.next.i.i3.us.us.4, %26 - %109 = add nsw i64 %108, %104 - %arrayidx15.i.i.us.us.4 = getelementptr inbounds float, float* %10, i64 %109 - %110 = load float, float* %arrayidx15.i.i.us.us.4, align 4, !tbaa !12 - %111 = tail call float @llvm.fmuladd.f32(float %107, float %110, float %105) #2 - store float %111, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.4 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.4, 1 - %exitcond.not.i.i.us.us.4 = icmp eq i64 %indvars.iv.next.i.i.us.us.4, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.4, label %if.end.i.i.us.us.4.loopexit, label %for.body.i.i.us.us.4, !llvm.loop !19 - -if.end.i.i.us.us.4.loopexit: ; preds = %for.body.i.i.us.us.4 - br label %if.end.i.i.us.us.4 - -if.end.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.loopexit, %pregion_for_entry.entry.i.i.us.us.4 - %112 = or i64 %_local_id_x.i.0.us.us.4, 1 - %add1.i.i.i.us.us.4.1 = add nuw nsw i64 %112, %mul.i.i.i - %conv.i.i.us.us.4.1 = trunc i64 %add1.i.i.i.us.us.4.1 to i32 - %cmp4.i.i.us.us.4.1 = icmp sgt i32 %21, %conv.i.i.us.us.4.1 - br i1 %cmp4.i.i.us.us.4.1, label %if.then.i.i.us.us.4.1, label %if.end.i.i.us.us.4.1 - -pregion_for_end.i.i.us.4.loopexit: ; preds = %if.end.i.i.us.us.4.1 - br label %pregion_for_end.i.i.us.4 - -pregion_for_end.i.i.us.4: ; preds = %pregion_for_end.i.i.us.4.loopexit, %pregion_for_end.i.i.us.3 - %113 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.5 = or i32 %113, 5 - %cmp.i.i.us.5 = icmp sgt i32 %17, %conv2.i.i.us.5 - %mul.i.i.us.5 = mul nsw i32 %21, %conv2.i.i.us.5 - %mul8.i.i.us.5 = mul nsw i32 %25, %conv2.i.i.us.5 - %114 = sext i32 %mul8.i.i.us.5 to i64 - br i1 %cmp.i.i.us.5, label %pregion_for_entry.entry.i.i.us.us.5.preheader, label %pregion_for_end.i.i.us.5 - -pregion_for_entry.entry.i.i.us.us.5.preheader: ; preds = %pregion_for_end.i.i.us.4 - br label %pregion_for_entry.entry.i.i.us.us.5 - -pregion_for_entry.entry.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.1, %pregion_for_entry.entry.i.i.us.us.5.preheader - %_local_id_x.i.0.us.us.5 = phi i64 [ %172, %if.end.i.i.us.us.5.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.5.preheader ] - %add1.i.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, %mul.i.i.i - %conv.i.i.us.us.5 = trunc i64 %add1.i.i.i.us.us.5 to i32 - %cmp4.i.i.us.us.5 = icmp sgt i32 %21, %conv.i.i.us.us.5 - br i1 %cmp4.i.i.us.us.5, label %if.then.i.i.us.us.5, label %if.end.i.i.us.us.5 - -if.then.i.i.us.us.5: ; preds = %pregion_for_entry.entry.i.i.us.us.5 - %add.i.i.us.us.5 = add nsw i32 %mul.i.i.us.5, %conv.i.i.us.us.5 - %idxprom.i.i.us.us.5 = sext i32 %add.i.i.us.us.5 to i64 - %arrayidx.i.i.us.us.5 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.5 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.5 = shl i64 %add1.i.i.i.us.us.5, 32 - %115 = ashr exact i64 %sext.i.i.us.us.5, 32 - br label %for.body.i.i.us.us.5 - -for.body.i.i.us.us.5: ; preds = %for.body.i.i.us.us.5, %if.then.i.i.us.us.5 - %indvars.iv.next.i.i3.us.us.5 = phi i64 [ %indvars.iv.next.i.i.us.us.5, %for.body.i.i.us.us.5 ], [ 0, %if.then.i.i.us.us.5 ] - %116 = phi float [ %122, %for.body.i.i.us.us.5 ], [ 0.000000e+00, %if.then.i.i.us.us.5 ] - %117 = add nsw i64 %indvars.iv.next.i.i3.us.us.5, %114 - %arrayidx11.i.i.us.us.5 = getelementptr inbounds float, float* %7, i64 %117 - %118 = load float, float* %arrayidx11.i.i.us.us.5, align 4, !tbaa !12 - %119 = mul nsw i64 %indvars.iv.next.i.i3.us.us.5, %26 - %120 = add nsw i64 %119, %115 - %arrayidx15.i.i.us.us.5 = getelementptr inbounds float, float* %10, i64 %120 - %121 = load float, float* %arrayidx15.i.i.us.us.5, align 4, !tbaa !12 - %122 = tail call float @llvm.fmuladd.f32(float %118, float %121, float %116) #2 - store float %122, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.5 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.5, 1 - %exitcond.not.i.i.us.us.5 = icmp eq i64 %indvars.iv.next.i.i.us.us.5, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.5, label %if.end.i.i.us.us.5.loopexit, label %for.body.i.i.us.us.5, !llvm.loop !19 - -if.end.i.i.us.us.5.loopexit: ; preds = %for.body.i.i.us.us.5 - br label %if.end.i.i.us.us.5 - -if.end.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.loopexit, %pregion_for_entry.entry.i.i.us.us.5 - %123 = or i64 %_local_id_x.i.0.us.us.5, 1 - %add1.i.i.i.us.us.5.1 = add nuw nsw i64 %123, %mul.i.i.i - %conv.i.i.us.us.5.1 = trunc i64 %add1.i.i.i.us.us.5.1 to i32 - %cmp4.i.i.us.us.5.1 = icmp sgt i32 %21, %conv.i.i.us.us.5.1 - br i1 %cmp4.i.i.us.us.5.1, label %if.then.i.i.us.us.5.1, label %if.end.i.i.us.us.5.1 - -pregion_for_end.i.i.us.5.loopexit: ; preds = %if.end.i.i.us.us.5.1 - br label %pregion_for_end.i.i.us.5 - -pregion_for_end.i.i.us.5: ; preds = %pregion_for_end.i.i.us.5.loopexit, %pregion_for_end.i.i.us.4 - %124 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.6 = or i32 %124, 6 - %cmp.i.i.us.6 = icmp sgt i32 %17, %conv2.i.i.us.6 - %mul.i.i.us.6 = mul nsw i32 %21, %conv2.i.i.us.6 - %mul8.i.i.us.6 = mul nsw i32 %25, %conv2.i.i.us.6 - %125 = sext i32 %mul8.i.i.us.6 to i64 - br i1 %cmp.i.i.us.6, label %pregion_for_entry.entry.i.i.us.us.6.preheader, label %pregion_for_end.i.i.us.6 - -pregion_for_entry.entry.i.i.us.us.6.preheader: ; preds = %pregion_for_end.i.i.us.5 - br label %pregion_for_entry.entry.i.i.us.us.6 - -pregion_for_entry.entry.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.1, %pregion_for_entry.entry.i.i.us.us.6.preheader - %_local_id_x.i.0.us.us.6 = phi i64 [ %163, %if.end.i.i.us.us.6.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.6.preheader ] - %add1.i.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, %mul.i.i.i - %conv.i.i.us.us.6 = trunc i64 %add1.i.i.i.us.us.6 to i32 - %cmp4.i.i.us.us.6 = icmp sgt i32 %21, %conv.i.i.us.us.6 - br i1 %cmp4.i.i.us.us.6, label %if.then.i.i.us.us.6, label %if.end.i.i.us.us.6 - -if.then.i.i.us.us.6: ; preds = %pregion_for_entry.entry.i.i.us.us.6 - %add.i.i.us.us.6 = add nsw i32 %mul.i.i.us.6, %conv.i.i.us.us.6 - %idxprom.i.i.us.us.6 = sext i32 %add.i.i.us.us.6 to i64 - %arrayidx.i.i.us.us.6 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.6 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.6 = shl i64 %add1.i.i.i.us.us.6, 32 - %126 = ashr exact i64 %sext.i.i.us.us.6, 32 - br label %for.body.i.i.us.us.6 - -for.body.i.i.us.us.6: ; preds = %for.body.i.i.us.us.6, %if.then.i.i.us.us.6 - %indvars.iv.next.i.i3.us.us.6 = phi i64 [ %indvars.iv.next.i.i.us.us.6, %for.body.i.i.us.us.6 ], [ 0, %if.then.i.i.us.us.6 ] - %127 = phi float [ %133, %for.body.i.i.us.us.6 ], [ 0.000000e+00, %if.then.i.i.us.us.6 ] - %128 = add nsw i64 %indvars.iv.next.i.i3.us.us.6, %125 - %arrayidx11.i.i.us.us.6 = getelementptr inbounds float, float* %7, i64 %128 - %129 = load float, float* %arrayidx11.i.i.us.us.6, align 4, !tbaa !12 - %130 = mul nsw i64 %indvars.iv.next.i.i3.us.us.6, %26 - %131 = add nsw i64 %130, %126 - %arrayidx15.i.i.us.us.6 = getelementptr inbounds float, float* %10, i64 %131 - %132 = load float, float* %arrayidx15.i.i.us.us.6, align 4, !tbaa !12 - %133 = tail call float @llvm.fmuladd.f32(float %129, float %132, float %127) #2 - store float %133, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.6 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.6, 1 - %exitcond.not.i.i.us.us.6 = icmp eq i64 %indvars.iv.next.i.i.us.us.6, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.6, label %if.end.i.i.us.us.6.loopexit, label %for.body.i.i.us.us.6, !llvm.loop !19 - -if.end.i.i.us.us.6.loopexit: ; preds = %for.body.i.i.us.us.6 - br label %if.end.i.i.us.us.6 - -if.end.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.loopexit, %pregion_for_entry.entry.i.i.us.us.6 - %134 = or i64 %_local_id_x.i.0.us.us.6, 1 - %add1.i.i.i.us.us.6.1 = add nuw nsw i64 %134, %mul.i.i.i - %conv.i.i.us.us.6.1 = trunc i64 %add1.i.i.i.us.us.6.1 to i32 - %cmp4.i.i.us.us.6.1 = icmp sgt i32 %21, %conv.i.i.us.us.6.1 - br i1 %cmp4.i.i.us.us.6.1, label %if.then.i.i.us.us.6.1, label %if.end.i.i.us.us.6.1 - -pregion_for_end.i.i.us.6.loopexit: ; preds = %if.end.i.i.us.us.6.1 - br label %pregion_for_end.i.i.us.6 - -pregion_for_end.i.i.us.6: ; preds = %pregion_for_end.i.i.us.6.loopexit, %pregion_for_end.i.i.us.5 - %135 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.7 = or i32 %135, 7 - %cmp.i.i.us.7 = icmp sgt i32 %17, %conv2.i.i.us.7 - %mul.i.i.us.7 = mul nsw i32 %21, %conv2.i.i.us.7 - %mul8.i.i.us.7 = mul nsw i32 %25, %conv2.i.i.us.7 - %136 = sext i32 %mul8.i.i.us.7 to i64 - br i1 %cmp.i.i.us.7, label %pregion_for_entry.entry.i.i.us.us.7.preheader, label %_pocl_kernel_mm3_kernel2.exit - -pregion_for_entry.entry.i.i.us.us.7.preheader: ; preds = %pregion_for_end.i.i.us.6 - br label %pregion_for_entry.entry.i.i.us.us.7 - -pregion_for_entry.entry.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.1, %pregion_for_entry.entry.i.i.us.us.7.preheader - %_local_id_x.i.0.us.us.7 = phi i64 [ %154, %if.end.i.i.us.us.7.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.7.preheader ] - %add1.i.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, %mul.i.i.i - %conv.i.i.us.us.7 = trunc i64 %add1.i.i.i.us.us.7 to i32 - %cmp4.i.i.us.us.7 = icmp sgt i32 %21, %conv.i.i.us.us.7 - br i1 %cmp4.i.i.us.us.7, label %if.then.i.i.us.us.7, label %if.end.i.i.us.us.7 - -if.then.i.i.us.us.7: ; preds = %pregion_for_entry.entry.i.i.us.us.7 - %add.i.i.us.us.7 = add nsw i32 %mul.i.i.us.7, %conv.i.i.us.us.7 - %idxprom.i.i.us.us.7 = sext i32 %add.i.i.us.us.7 to i64 - %arrayidx.i.i.us.us.7 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.7 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.7 = shl i64 %add1.i.i.i.us.us.7, 32 - %137 = ashr exact i64 %sext.i.i.us.us.7, 32 - br label %for.body.i.i.us.us.7 - -for.body.i.i.us.us.7: ; preds = %for.body.i.i.us.us.7, %if.then.i.i.us.us.7 - %indvars.iv.next.i.i3.us.us.7 = phi i64 [ %indvars.iv.next.i.i.us.us.7, %for.body.i.i.us.us.7 ], [ 0, %if.then.i.i.us.us.7 ] - %138 = phi float [ %144, %for.body.i.i.us.us.7 ], [ 0.000000e+00, %if.then.i.i.us.us.7 ] - %139 = add nsw i64 %indvars.iv.next.i.i3.us.us.7, %136 - %arrayidx11.i.i.us.us.7 = getelementptr inbounds float, float* %7, i64 %139 - %140 = load float, float* %arrayidx11.i.i.us.us.7, align 4, !tbaa !12 - %141 = mul nsw i64 %indvars.iv.next.i.i3.us.us.7, %26 - %142 = add nsw i64 %141, %137 - %arrayidx15.i.i.us.us.7 = getelementptr inbounds float, float* %10, i64 %142 - %143 = load float, float* %arrayidx15.i.i.us.us.7, align 4, !tbaa !12 - %144 = tail call float @llvm.fmuladd.f32(float %140, float %143, float %138) #2 - store float %144, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.7 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.7, 1 - %exitcond.not.i.i.us.us.7 = icmp eq i64 %indvars.iv.next.i.i.us.us.7, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.7, label %if.end.i.i.us.us.7.loopexit, label %for.body.i.i.us.us.7, !llvm.loop !19 - -if.end.i.i.us.us.7.loopexit: ; preds = %for.body.i.i.us.us.7 - br label %if.end.i.i.us.us.7 - -if.end.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.loopexit, %pregion_for_entry.entry.i.i.us.us.7 - %145 = or i64 %_local_id_x.i.0.us.us.7, 1 - %add1.i.i.i.us.us.7.1 = add nuw nsw i64 %145, %mul.i.i.i - %conv.i.i.us.us.7.1 = trunc i64 %add1.i.i.i.us.us.7.1 to i32 - %cmp4.i.i.us.us.7.1 = icmp sgt i32 %21, %conv.i.i.us.us.7.1 - br i1 %cmp4.i.i.us.us.7.1, label %if.then.i.i.us.us.7.1, label %if.end.i.i.us.us.7.1 - -if.then.i.i.us.1: ; preds = %if.end.i.i.us - %add.i.i.us.1 = add nsw i32 %mul.i.i, %conv.i.i.us.1 - %idxprom.i.i.us.1 = sext i32 %add.i.i.us.1 to i64 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1 - -if.end.i.i.us.1: ; preds = %if.then.i.i.us.1, %if.end.i.i.us - br i1 %cmp4.i.i.us.2, label %if.then.i.i.us.2, label %if.end.i.i.us.2 - -if.then.i.i.us.2: ; preds = %if.end.i.i.us.1 - %add.i.i.us.2 = add nsw i32 %mul.i.i, %conv.i.i.us.2 - %idxprom.i.i.us.2 = sext i32 %add.i.i.us.2 to i64 - %arrayidx.i.i.us.2 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.2 - store float 0.000000e+00, float* %arrayidx.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2 - -if.end.i.i.us.2: ; preds = %if.then.i.i.us.2, %if.end.i.i.us.1 - br i1 %cmp4.i.i.us.3, label %if.then.i.i.us.3, label %if.end.i.i.us.3 - -if.then.i.i.us.3: ; preds = %if.end.i.i.us.2 - %add.i.i.us.3 = add nsw i32 %mul.i.i, %conv.i.i.us.3 - %idxprom.i.i.us.3 = sext i32 %add.i.i.us.3 to i64 - %arrayidx.i.i.us.3 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.3 - store float 0.000000e+00, float* %arrayidx.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3 - -if.end.i.i.us.3: ; preds = %if.then.i.i.us.3, %if.end.i.i.us.2 - br i1 %cmp4.i.i.us.4, label %if.then.i.i.us.4, label %if.end.i.i.us.4 - -if.then.i.i.us.4: ; preds = %if.end.i.i.us.3 - %add.i.i.us.4 = add nsw i32 %mul.i.i, %conv.i.i.us.4 - %idxprom.i.i.us.4 = sext i32 %add.i.i.us.4 to i64 - %arrayidx.i.i.us.4 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.4 - store float 0.000000e+00, float* %arrayidx.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4 - -if.end.i.i.us.4: ; preds = %if.then.i.i.us.4, %if.end.i.i.us.3 - br i1 %cmp4.i.i.us.5, label %if.then.i.i.us.5, label %if.end.i.i.us.5 - -if.then.i.i.us.5: ; preds = %if.end.i.i.us.4 - %add.i.i.us.5 = add nsw i32 %mul.i.i, %conv.i.i.us.5 - %idxprom.i.i.us.5 = sext i32 %add.i.i.us.5 to i64 - %arrayidx.i.i.us.5 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.5 - store float 0.000000e+00, float* %arrayidx.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5 - -if.end.i.i.us.5: ; preds = %if.then.i.i.us.5, %if.end.i.i.us.4 - br i1 %cmp4.i.i.us.6, label %if.then.i.i.us.6, label %if.end.i.i.us.6 - -if.then.i.i.us.6: ; preds = %if.end.i.i.us.5 - %add.i.i.us.6 = add nsw i32 %mul.i.i, %conv.i.i.us.6 - %idxprom.i.i.us.6 = sext i32 %add.i.i.us.6 to i64 - %arrayidx.i.i.us.6 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.6 - store float 0.000000e+00, float* %arrayidx.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6 - -if.end.i.i.us.6: ; preds = %if.then.i.i.us.6, %if.end.i.i.us.5 - br i1 %cmp4.i.i.us.7, label %if.then.i.i.us.7, label %if.end.i.i.us.7 - -if.then.i.i.us.7: ; preds = %if.end.i.i.us.6 - %add.i.i.us.7 = add nsw i32 %mul.i.i, %conv.i.i.us.7 - %idxprom.i.i.us.7 = sext i32 %add.i.i.us.7 to i64 - %arrayidx.i.i.us.7 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.7 - store float 0.000000e+00, float* %arrayidx.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7 - -if.end.i.i.us.7: ; preds = %if.then.i.i.us.7, %if.end.i.i.us.6 - br i1 %cmp4.i.i.us.8, label %if.then.i.i.us.8, label %if.end.i.i.us.8 - -if.then.i.i.us.8: ; preds = %if.end.i.i.us.7 - %add.i.i.us.8 = add nsw i32 %mul.i.i, %conv.i.i.us.8 - %idxprom.i.i.us.8 = sext i32 %add.i.i.us.8 to i64 - %arrayidx.i.i.us.8 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.8 - store float 0.000000e+00, float* %arrayidx.i.i.us.8, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.8 - -if.end.i.i.us.8: ; preds = %if.then.i.i.us.8, %if.end.i.i.us.7 - br i1 %cmp4.i.i.us.9, label %if.then.i.i.us.9, label %if.end.i.i.us.9 - -if.then.i.i.us.9: ; preds = %if.end.i.i.us.8 - %add.i.i.us.9 = add nsw i32 %mul.i.i, %conv.i.i.us.9 - %idxprom.i.i.us.9 = sext i32 %add.i.i.us.9 to i64 - %arrayidx.i.i.us.9 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.9 - store float 0.000000e+00, float* %arrayidx.i.i.us.9, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.9 - -if.end.i.i.us.9: ; preds = %if.then.i.i.us.9, %if.end.i.i.us.8 - br i1 %cmp4.i.i.us.10, label %if.then.i.i.us.10, label %if.end.i.i.us.10 - -if.then.i.i.us.10: ; preds = %if.end.i.i.us.9 - %add.i.i.us.10 = add nsw i32 %mul.i.i, %conv.i.i.us.10 - %idxprom.i.i.us.10 = sext i32 %add.i.i.us.10 to i64 - %arrayidx.i.i.us.10 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.10 - store float 0.000000e+00, float* %arrayidx.i.i.us.10, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.10 - -if.end.i.i.us.10: ; preds = %if.then.i.i.us.10, %if.end.i.i.us.9 - br i1 %cmp4.i.i.us.11, label %if.then.i.i.us.11, label %if.end.i.i.us.11 - -if.then.i.i.us.11: ; preds = %if.end.i.i.us.10 - %add.i.i.us.11 = add nsw i32 %mul.i.i, %conv.i.i.us.11 - %idxprom.i.i.us.11 = sext i32 %add.i.i.us.11 to i64 - %arrayidx.i.i.us.11 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.11 - store float 0.000000e+00, float* %arrayidx.i.i.us.11, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.11 - -if.end.i.i.us.11: ; preds = %if.then.i.i.us.11, %if.end.i.i.us.10 - br i1 %cmp4.i.i.us.12, label %if.then.i.i.us.12, label %if.end.i.i.us.12 - -if.then.i.i.us.12: ; preds = %if.end.i.i.us.11 - %add.i.i.us.12 = add nsw i32 %mul.i.i, %conv.i.i.us.12 - %idxprom.i.i.us.12 = sext i32 %add.i.i.us.12 to i64 - %arrayidx.i.i.us.12 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.12 - store float 0.000000e+00, float* %arrayidx.i.i.us.12, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.12 - -if.end.i.i.us.12: ; preds = %if.then.i.i.us.12, %if.end.i.i.us.11 - br i1 %cmp4.i.i.us.13, label %if.then.i.i.us.13, label %if.end.i.i.us.13 - -if.then.i.i.us.13: ; preds = %if.end.i.i.us.12 - %add.i.i.us.13 = add nsw i32 %mul.i.i, %conv.i.i.us.13 - %idxprom.i.i.us.13 = sext i32 %add.i.i.us.13 to i64 - %arrayidx.i.i.us.13 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.13 - store float 0.000000e+00, float* %arrayidx.i.i.us.13, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.13 - -if.end.i.i.us.13: ; preds = %if.then.i.i.us.13, %if.end.i.i.us.12 - br i1 %cmp4.i.i.us.14, label %if.then.i.i.us.14, label %if.end.i.i.us.14 - -if.then.i.i.us.14: ; preds = %if.end.i.i.us.13 - %add.i.i.us.14 = add nsw i32 %mul.i.i, %conv.i.i.us.14 - %idxprom.i.i.us.14 = sext i32 %add.i.i.us.14 to i64 - %arrayidx.i.i.us.14 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.14 - store float 0.000000e+00, float* %arrayidx.i.i.us.14, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.14 - -if.end.i.i.us.14: ; preds = %if.then.i.i.us.14, %if.end.i.i.us.13 - br i1 %cmp4.i.i.us.15, label %if.then.i.i.us.15, label %if.end.i.i.us.15 - -if.then.i.i.us.15: ; preds = %if.end.i.i.us.14 - %add.i.i.us.15 = add nsw i32 %mul.i.i, %conv.i.i.us.15 - %idxprom.i.i.us.15 = sext i32 %add.i.i.us.15 to i64 - %arrayidx.i.i.us.15 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.15 - store float 0.000000e+00, float* %arrayidx.i.i.us.15, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.15 - -if.end.i.i.us.15: ; preds = %if.then.i.i.us.15, %if.end.i.i.us.14 - br i1 %cmp4.i.i.us.16, label %if.then.i.i.us.16, label %if.end.i.i.us.16 - -if.then.i.i.us.16: ; preds = %if.end.i.i.us.15 - %add.i.i.us.16 = add nsw i32 %mul.i.i, %conv.i.i.us.16 - %idxprom.i.i.us.16 = sext i32 %add.i.i.us.16 to i64 - %arrayidx.i.i.us.16 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.16 - store float 0.000000e+00, float* %arrayidx.i.i.us.16, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.16 - -if.end.i.i.us.16: ; preds = %if.then.i.i.us.16, %if.end.i.i.us.15 - br i1 %cmp4.i.i.us.17, label %if.then.i.i.us.17, label %if.end.i.i.us.17 - -if.then.i.i.us.17: ; preds = %if.end.i.i.us.16 - %add.i.i.us.17 = add nsw i32 %mul.i.i, %conv.i.i.us.17 - %idxprom.i.i.us.17 = sext i32 %add.i.i.us.17 to i64 - %arrayidx.i.i.us.17 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.17 - store float 0.000000e+00, float* %arrayidx.i.i.us.17, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.17 - -if.end.i.i.us.17: ; preds = %if.then.i.i.us.17, %if.end.i.i.us.16 - br i1 %cmp4.i.i.us.18, label %if.then.i.i.us.18, label %if.end.i.i.us.18 - -if.then.i.i.us.18: ; preds = %if.end.i.i.us.17 - %add.i.i.us.18 = add nsw i32 %mul.i.i, %conv.i.i.us.18 - %idxprom.i.i.us.18 = sext i32 %add.i.i.us.18 to i64 - %arrayidx.i.i.us.18 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.18 - store float 0.000000e+00, float* %arrayidx.i.i.us.18, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.18 - -if.end.i.i.us.18: ; preds = %if.then.i.i.us.18, %if.end.i.i.us.17 - br i1 %cmp4.i.i.us.19, label %if.then.i.i.us.19, label %if.end.i.i.us.19 - -if.then.i.i.us.19: ; preds = %if.end.i.i.us.18 - %add.i.i.us.19 = add nsw i32 %mul.i.i, %conv.i.i.us.19 - %idxprom.i.i.us.19 = sext i32 %add.i.i.us.19 to i64 - %arrayidx.i.i.us.19 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.19 - store float 0.000000e+00, float* %arrayidx.i.i.us.19, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.19 - -if.end.i.i.us.19: ; preds = %if.then.i.i.us.19, %if.end.i.i.us.18 - br i1 %cmp4.i.i.us.20, label %if.then.i.i.us.20, label %if.end.i.i.us.20 - -if.then.i.i.us.20: ; preds = %if.end.i.i.us.19 - %add.i.i.us.20 = add nsw i32 %mul.i.i, %conv.i.i.us.20 - %idxprom.i.i.us.20 = sext i32 %add.i.i.us.20 to i64 - %arrayidx.i.i.us.20 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.20 - store float 0.000000e+00, float* %arrayidx.i.i.us.20, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.20 - -if.end.i.i.us.20: ; preds = %if.then.i.i.us.20, %if.end.i.i.us.19 - br i1 %cmp4.i.i.us.21, label %if.then.i.i.us.21, label %if.end.i.i.us.21 - -if.then.i.i.us.21: ; preds = %if.end.i.i.us.20 - %add.i.i.us.21 = add nsw i32 %mul.i.i, %conv.i.i.us.21 - %idxprom.i.i.us.21 = sext i32 %add.i.i.us.21 to i64 - %arrayidx.i.i.us.21 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.21 - store float 0.000000e+00, float* %arrayidx.i.i.us.21, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.21 - -if.end.i.i.us.21: ; preds = %if.then.i.i.us.21, %if.end.i.i.us.20 - br i1 %cmp4.i.i.us.22, label %if.then.i.i.us.22, label %if.end.i.i.us.22 - -if.then.i.i.us.22: ; preds = %if.end.i.i.us.21 - %add.i.i.us.22 = add nsw i32 %mul.i.i, %conv.i.i.us.22 - %idxprom.i.i.us.22 = sext i32 %add.i.i.us.22 to i64 - %arrayidx.i.i.us.22 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.22 - store float 0.000000e+00, float* %arrayidx.i.i.us.22, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.22 - -if.end.i.i.us.22: ; preds = %if.then.i.i.us.22, %if.end.i.i.us.21 - br i1 %cmp4.i.i.us.23, label %if.then.i.i.us.23, label %if.end.i.i.us.23 - -if.then.i.i.us.23: ; preds = %if.end.i.i.us.22 - %add.i.i.us.23 = add nsw i32 %mul.i.i, %conv.i.i.us.23 - %idxprom.i.i.us.23 = sext i32 %add.i.i.us.23 to i64 - %arrayidx.i.i.us.23 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.23 - store float 0.000000e+00, float* %arrayidx.i.i.us.23, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.23 - -if.end.i.i.us.23: ; preds = %if.then.i.i.us.23, %if.end.i.i.us.22 - br i1 %cmp4.i.i.us.24, label %if.then.i.i.us.24, label %if.end.i.i.us.24 - -if.then.i.i.us.24: ; preds = %if.end.i.i.us.23 - %add.i.i.us.24 = add nsw i32 %mul.i.i, %conv.i.i.us.24 - %idxprom.i.i.us.24 = sext i32 %add.i.i.us.24 to i64 - %arrayidx.i.i.us.24 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.24 - store float 0.000000e+00, float* %arrayidx.i.i.us.24, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.24 - -if.end.i.i.us.24: ; preds = %if.then.i.i.us.24, %if.end.i.i.us.23 - br i1 %cmp4.i.i.us.25, label %if.then.i.i.us.25, label %if.end.i.i.us.25 - -if.then.i.i.us.25: ; preds = %if.end.i.i.us.24 - %add.i.i.us.25 = add nsw i32 %mul.i.i, %conv.i.i.us.25 - %idxprom.i.i.us.25 = sext i32 %add.i.i.us.25 to i64 - %arrayidx.i.i.us.25 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.25 - store float 0.000000e+00, float* %arrayidx.i.i.us.25, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.25 - -if.end.i.i.us.25: ; preds = %if.then.i.i.us.25, %if.end.i.i.us.24 - br i1 %cmp4.i.i.us.26, label %if.then.i.i.us.26, label %if.end.i.i.us.26 - -if.then.i.i.us.26: ; preds = %if.end.i.i.us.25 - %add.i.i.us.26 = add nsw i32 %mul.i.i, %conv.i.i.us.26 - %idxprom.i.i.us.26 = sext i32 %add.i.i.us.26 to i64 - %arrayidx.i.i.us.26 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.26 - store float 0.000000e+00, float* %arrayidx.i.i.us.26, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.26 - -if.end.i.i.us.26: ; preds = %if.then.i.i.us.26, %if.end.i.i.us.25 - br i1 %cmp4.i.i.us.27, label %if.then.i.i.us.27, label %if.end.i.i.us.27 - -if.then.i.i.us.27: ; preds = %if.end.i.i.us.26 - %add.i.i.us.27 = add nsw i32 %mul.i.i, %conv.i.i.us.27 - %idxprom.i.i.us.27 = sext i32 %add.i.i.us.27 to i64 - %arrayidx.i.i.us.27 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.27 - store float 0.000000e+00, float* %arrayidx.i.i.us.27, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.27 - -if.end.i.i.us.27: ; preds = %if.then.i.i.us.27, %if.end.i.i.us.26 - br i1 %cmp4.i.i.us.28, label %if.then.i.i.us.28, label %if.end.i.i.us.28 - -if.then.i.i.us.28: ; preds = %if.end.i.i.us.27 - %add.i.i.us.28 = add nsw i32 %mul.i.i, %conv.i.i.us.28 - %idxprom.i.i.us.28 = sext i32 %add.i.i.us.28 to i64 - %arrayidx.i.i.us.28 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.28 - store float 0.000000e+00, float* %arrayidx.i.i.us.28, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.28 - -if.end.i.i.us.28: ; preds = %if.then.i.i.us.28, %if.end.i.i.us.27 - br i1 %cmp4.i.i.us.29, label %if.then.i.i.us.29, label %if.end.i.i.us.29 - -if.then.i.i.us.29: ; preds = %if.end.i.i.us.28 - %add.i.i.us.29 = add nsw i32 %mul.i.i, %conv.i.i.us.29 - %idxprom.i.i.us.29 = sext i32 %add.i.i.us.29 to i64 - %arrayidx.i.i.us.29 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.29 - store float 0.000000e+00, float* %arrayidx.i.i.us.29, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.29 - -if.end.i.i.us.29: ; preds = %if.then.i.i.us.29, %if.end.i.i.us.28 - br i1 %cmp4.i.i.us.30, label %if.then.i.i.us.30, label %if.end.i.i.us.30 - -if.then.i.i.us.30: ; preds = %if.end.i.i.us.29 - %add.i.i.us.30 = add nsw i32 %mul.i.i, %conv.i.i.us.30 - %idxprom.i.i.us.30 = sext i32 %add.i.i.us.30 to i64 - %arrayidx.i.i.us.30 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.30 - store float 0.000000e+00, float* %arrayidx.i.i.us.30, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.30 - -if.end.i.i.us.30: ; preds = %if.then.i.i.us.30, %if.end.i.i.us.29 - br i1 %cmp4.i.i.us.31, label %if.then.i.i.us.31, label %pregion_for_end.i.i - -if.then.i.i.us.31: ; preds = %if.end.i.i.us.30 - %add.i.i.us.31 = add nsw i32 %mul.i.i, %conv.i.i.us.31 - %idxprom.i.i.us.31 = sext i32 %add.i.i.us.31 to i64 - %arrayidx.i.i.us.31 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.31 - store float 0.000000e+00, float* %arrayidx.i.i.us.31, align 4, !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i - -if.then.i.i.us.us.7.1: ; preds = %if.end.i.i.us.us.7 - %add.i.i.us.us.7.1 = add nsw i32 %mul.i.i.us.7, %conv.i.i.us.us.7.1 - %idxprom.i.i.us.us.7.1 = sext i32 %add.i.i.us.us.7.1 to i64 - %arrayidx.i.i.us.us.7.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.7.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.7.1 = shl i64 %add1.i.i.i.us.us.7.1, 32 - %146 = ashr exact i64 %sext.i.i.us.us.7.1, 32 - br label %for.body.i.i.us.us.7.1 - -for.body.i.i.us.us.7.1: ; preds = %for.body.i.i.us.us.7.1, %if.then.i.i.us.us.7.1 - %indvars.iv.next.i.i3.us.us.7.1 = phi i64 [ %indvars.iv.next.i.i.us.us.7.1, %for.body.i.i.us.us.7.1 ], [ 0, %if.then.i.i.us.us.7.1 ] - %147 = phi float [ %153, %for.body.i.i.us.us.7.1 ], [ 0.000000e+00, %if.then.i.i.us.us.7.1 ] - %148 = add nsw i64 %indvars.iv.next.i.i3.us.us.7.1, %136 - %arrayidx11.i.i.us.us.7.1 = getelementptr inbounds float, float* %7, i64 %148 - %149 = load float, float* %arrayidx11.i.i.us.us.7.1, align 4, !tbaa !12 - %150 = mul nsw i64 %indvars.iv.next.i.i3.us.us.7.1, %26 - %151 = add nsw i64 %150, %146 - %arrayidx15.i.i.us.us.7.1 = getelementptr inbounds float, float* %10, i64 %151 - %152 = load float, float* %arrayidx15.i.i.us.us.7.1, align 4, !tbaa !12 - %153 = tail call float @llvm.fmuladd.f32(float %149, float %152, float %147) #2 - store float %153, float* %arrayidx.i.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.7.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.7.1, 1 - %exitcond.not.i.i.us.us.7.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.7.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.7.1, label %if.end.i.i.us.us.7.1.loopexit, label %for.body.i.i.us.us.7.1, !llvm.loop !19 - -if.end.i.i.us.us.7.1.loopexit: ; preds = %for.body.i.i.us.us.7.1 - br label %if.end.i.i.us.us.7.1 - -if.end.i.i.us.us.7.1: ; preds = %if.end.i.i.us.us.7.1.loopexit, %if.end.i.i.us.us.7 - %154 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %154, 32 - br i1 %exitcond.7.not.1, label %_pocl_kernel_mm3_kernel2.exit.loopexit, label %pregion_for_entry.entry.i.i.us.us.7, !llvm.loop !23 - -if.then.i.i.us.us.6.1: ; preds = %if.end.i.i.us.us.6 - %add.i.i.us.us.6.1 = add nsw i32 %mul.i.i.us.6, %conv.i.i.us.us.6.1 - %idxprom.i.i.us.us.6.1 = sext i32 %add.i.i.us.us.6.1 to i64 - %arrayidx.i.i.us.us.6.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.6.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.6.1 = shl i64 %add1.i.i.i.us.us.6.1, 32 - %155 = ashr exact i64 %sext.i.i.us.us.6.1, 32 - br label %for.body.i.i.us.us.6.1 - -for.body.i.i.us.us.6.1: ; preds = %for.body.i.i.us.us.6.1, %if.then.i.i.us.us.6.1 - %indvars.iv.next.i.i3.us.us.6.1 = phi i64 [ %indvars.iv.next.i.i.us.us.6.1, %for.body.i.i.us.us.6.1 ], [ 0, %if.then.i.i.us.us.6.1 ] - %156 = phi float [ %162, %for.body.i.i.us.us.6.1 ], [ 0.000000e+00, %if.then.i.i.us.us.6.1 ] - %157 = add nsw i64 %indvars.iv.next.i.i3.us.us.6.1, %125 - %arrayidx11.i.i.us.us.6.1 = getelementptr inbounds float, float* %7, i64 %157 - %158 = load float, float* %arrayidx11.i.i.us.us.6.1, align 4, !tbaa !12 - %159 = mul nsw i64 %indvars.iv.next.i.i3.us.us.6.1, %26 - %160 = add nsw i64 %159, %155 - %arrayidx15.i.i.us.us.6.1 = getelementptr inbounds float, float* %10, i64 %160 - %161 = load float, float* %arrayidx15.i.i.us.us.6.1, align 4, !tbaa !12 - %162 = tail call float @llvm.fmuladd.f32(float %158, float %161, float %156) #2 - store float %162, float* %arrayidx.i.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.6.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.6.1, 1 - %exitcond.not.i.i.us.us.6.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.6.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.6.1, label %if.end.i.i.us.us.6.1.loopexit, label %for.body.i.i.us.us.6.1, !llvm.loop !19 - -if.end.i.i.us.us.6.1.loopexit: ; preds = %for.body.i.i.us.us.6.1 - br label %if.end.i.i.us.us.6.1 - -if.end.i.i.us.us.6.1: ; preds = %if.end.i.i.us.us.6.1.loopexit, %if.end.i.i.us.us.6 - %163 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %163, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.i.us.6.loopexit, label %pregion_for_entry.entry.i.i.us.us.6, !llvm.loop !23 - -if.then.i.i.us.us.5.1: ; preds = %if.end.i.i.us.us.5 - %add.i.i.us.us.5.1 = add nsw i32 %mul.i.i.us.5, %conv.i.i.us.us.5.1 - %idxprom.i.i.us.us.5.1 = sext i32 %add.i.i.us.us.5.1 to i64 - %arrayidx.i.i.us.us.5.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.5.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.5.1 = shl i64 %add1.i.i.i.us.us.5.1, 32 - %164 = ashr exact i64 %sext.i.i.us.us.5.1, 32 - br label %for.body.i.i.us.us.5.1 - -for.body.i.i.us.us.5.1: ; preds = %for.body.i.i.us.us.5.1, %if.then.i.i.us.us.5.1 - %indvars.iv.next.i.i3.us.us.5.1 = phi i64 [ %indvars.iv.next.i.i.us.us.5.1, %for.body.i.i.us.us.5.1 ], [ 0, %if.then.i.i.us.us.5.1 ] - %165 = phi float [ %171, %for.body.i.i.us.us.5.1 ], [ 0.000000e+00, %if.then.i.i.us.us.5.1 ] - %166 = add nsw i64 %indvars.iv.next.i.i3.us.us.5.1, %114 - %arrayidx11.i.i.us.us.5.1 = getelementptr inbounds float, float* %7, i64 %166 - %167 = load float, float* %arrayidx11.i.i.us.us.5.1, align 4, !tbaa !12 - %168 = mul nsw i64 %indvars.iv.next.i.i3.us.us.5.1, %26 - %169 = add nsw i64 %168, %164 - %arrayidx15.i.i.us.us.5.1 = getelementptr inbounds float, float* %10, i64 %169 - %170 = load float, float* %arrayidx15.i.i.us.us.5.1, align 4, !tbaa !12 - %171 = tail call float @llvm.fmuladd.f32(float %167, float %170, float %165) #2 - store float %171, float* %arrayidx.i.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.5.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.5.1, 1 - %exitcond.not.i.i.us.us.5.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.5.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.5.1, label %if.end.i.i.us.us.5.1.loopexit, label %for.body.i.i.us.us.5.1, !llvm.loop !19 - -if.end.i.i.us.us.5.1.loopexit: ; preds = %for.body.i.i.us.us.5.1 - br label %if.end.i.i.us.us.5.1 - -if.end.i.i.us.us.5.1: ; preds = %if.end.i.i.us.us.5.1.loopexit, %if.end.i.i.us.us.5 - %172 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %172, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.i.us.5.loopexit, label %pregion_for_entry.entry.i.i.us.us.5, !llvm.loop !23 - -if.then.i.i.us.us.4.1: ; preds = %if.end.i.i.us.us.4 - %add.i.i.us.us.4.1 = add nsw i32 %mul.i.i.us.4, %conv.i.i.us.us.4.1 - %idxprom.i.i.us.us.4.1 = sext i32 %add.i.i.us.us.4.1 to i64 - %arrayidx.i.i.us.us.4.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.4.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.4.1 = shl i64 %add1.i.i.i.us.us.4.1, 32 - %173 = ashr exact i64 %sext.i.i.us.us.4.1, 32 - br label %for.body.i.i.us.us.4.1 - -for.body.i.i.us.us.4.1: ; preds = %for.body.i.i.us.us.4.1, %if.then.i.i.us.us.4.1 - %indvars.iv.next.i.i3.us.us.4.1 = phi i64 [ %indvars.iv.next.i.i.us.us.4.1, %for.body.i.i.us.us.4.1 ], [ 0, %if.then.i.i.us.us.4.1 ] - %174 = phi float [ %180, %for.body.i.i.us.us.4.1 ], [ 0.000000e+00, %if.then.i.i.us.us.4.1 ] - %175 = add nsw i64 %indvars.iv.next.i.i3.us.us.4.1, %103 - %arrayidx11.i.i.us.us.4.1 = getelementptr inbounds float, float* %7, i64 %175 - %176 = load float, float* %arrayidx11.i.i.us.us.4.1, align 4, !tbaa !12 - %177 = mul nsw i64 %indvars.iv.next.i.i3.us.us.4.1, %26 - %178 = add nsw i64 %177, %173 - %arrayidx15.i.i.us.us.4.1 = getelementptr inbounds float, float* %10, i64 %178 - %179 = load float, float* %arrayidx15.i.i.us.us.4.1, align 4, !tbaa !12 - %180 = tail call float @llvm.fmuladd.f32(float %176, float %179, float %174) #2 - store float %180, float* %arrayidx.i.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.4.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.4.1, 1 - %exitcond.not.i.i.us.us.4.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.4.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.4.1, label %if.end.i.i.us.us.4.1.loopexit, label %for.body.i.i.us.us.4.1, !llvm.loop !19 - -if.end.i.i.us.us.4.1.loopexit: ; preds = %for.body.i.i.us.us.4.1 - br label %if.end.i.i.us.us.4.1 - -if.end.i.i.us.us.4.1: ; preds = %if.end.i.i.us.us.4.1.loopexit, %if.end.i.i.us.us.4 - %181 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %181, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.i.us.4.loopexit, label %pregion_for_entry.entry.i.i.us.us.4, !llvm.loop !23 - -if.then.i.i.us.us.3.1: ; preds = %if.end.i.i.us.us.3 - %add.i.i.us.us.3.1 = add nsw i32 %mul.i.i.us.3, %conv.i.i.us.us.3.1 - %idxprom.i.i.us.us.3.1 = sext i32 %add.i.i.us.us.3.1 to i64 - %arrayidx.i.i.us.us.3.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.3.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.3.1 = shl i64 %add1.i.i.i.us.us.3.1, 32 - %182 = ashr exact i64 %sext.i.i.us.us.3.1, 32 - br label %for.body.i.i.us.us.3.1 - -for.body.i.i.us.us.3.1: ; preds = %for.body.i.i.us.us.3.1, %if.then.i.i.us.us.3.1 - %indvars.iv.next.i.i3.us.us.3.1 = phi i64 [ %indvars.iv.next.i.i.us.us.3.1, %for.body.i.i.us.us.3.1 ], [ 0, %if.then.i.i.us.us.3.1 ] - %183 = phi float [ %189, %for.body.i.i.us.us.3.1 ], [ 0.000000e+00, %if.then.i.i.us.us.3.1 ] - %184 = add nsw i64 %indvars.iv.next.i.i3.us.us.3.1, %92 - %arrayidx11.i.i.us.us.3.1 = getelementptr inbounds float, float* %7, i64 %184 - %185 = load float, float* %arrayidx11.i.i.us.us.3.1, align 4, !tbaa !12 - %186 = mul nsw i64 %indvars.iv.next.i.i3.us.us.3.1, %26 - %187 = add nsw i64 %186, %182 - %arrayidx15.i.i.us.us.3.1 = getelementptr inbounds float, float* %10, i64 %187 - %188 = load float, float* %arrayidx15.i.i.us.us.3.1, align 4, !tbaa !12 - %189 = tail call float @llvm.fmuladd.f32(float %185, float %188, float %183) #2 - store float %189, float* %arrayidx.i.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.3.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.3.1, 1 - %exitcond.not.i.i.us.us.3.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.3.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.3.1, label %if.end.i.i.us.us.3.1.loopexit, label %for.body.i.i.us.us.3.1, !llvm.loop !19 - -if.end.i.i.us.us.3.1.loopexit: ; preds = %for.body.i.i.us.us.3.1 - br label %if.end.i.i.us.us.3.1 - -if.end.i.i.us.us.3.1: ; preds = %if.end.i.i.us.us.3.1.loopexit, %if.end.i.i.us.us.3 - %190 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %190, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.i.us.3.loopexit, label %pregion_for_entry.entry.i.i.us.us.3, !llvm.loop !23 - -if.then.i.i.us.us.2.1: ; preds = %if.end.i.i.us.us.2 - %add.i.i.us.us.2.1 = add nsw i32 %mul.i.i.us.2, %conv.i.i.us.us.2.1 - %idxprom.i.i.us.us.2.1 = sext i32 %add.i.i.us.us.2.1 to i64 - %arrayidx.i.i.us.us.2.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.2.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.2.1 = shl i64 %add1.i.i.i.us.us.2.1, 32 - %191 = ashr exact i64 %sext.i.i.us.us.2.1, 32 - br label %for.body.i.i.us.us.2.1 - -for.body.i.i.us.us.2.1: ; preds = %for.body.i.i.us.us.2.1, %if.then.i.i.us.us.2.1 - %indvars.iv.next.i.i3.us.us.2.1 = phi i64 [ %indvars.iv.next.i.i.us.us.2.1, %for.body.i.i.us.us.2.1 ], [ 0, %if.then.i.i.us.us.2.1 ] - %192 = phi float [ %198, %for.body.i.i.us.us.2.1 ], [ 0.000000e+00, %if.then.i.i.us.us.2.1 ] - %193 = add nsw i64 %indvars.iv.next.i.i3.us.us.2.1, %81 - %arrayidx11.i.i.us.us.2.1 = getelementptr inbounds float, float* %7, i64 %193 - %194 = load float, float* %arrayidx11.i.i.us.us.2.1, align 4, !tbaa !12 - %195 = mul nsw i64 %indvars.iv.next.i.i3.us.us.2.1, %26 - %196 = add nsw i64 %195, %191 - %arrayidx15.i.i.us.us.2.1 = getelementptr inbounds float, float* %10, i64 %196 - %197 = load float, float* %arrayidx15.i.i.us.us.2.1, align 4, !tbaa !12 - %198 = tail call float @llvm.fmuladd.f32(float %194, float %197, float %192) #2 - store float %198, float* %arrayidx.i.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.2.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.2.1, 1 - %exitcond.not.i.i.us.us.2.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.2.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.2.1, label %if.end.i.i.us.us.2.1.loopexit, label %for.body.i.i.us.us.2.1, !llvm.loop !19 - -if.end.i.i.us.us.2.1.loopexit: ; preds = %for.body.i.i.us.us.2.1 - br label %if.end.i.i.us.us.2.1 - -if.end.i.i.us.us.2.1: ; preds = %if.end.i.i.us.us.2.1.loopexit, %if.end.i.i.us.us.2 - %199 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %199, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.i.us.2.loopexit, label %pregion_for_entry.entry.i.i.us.us.2, !llvm.loop !23 - -if.then.i.i.us.us.1.1: ; preds = %if.end.i.i.us.us.1 - %add.i.i.us.us.1.1 = add nsw i32 %mul.i.i.us.1, %conv.i.i.us.us.1.1 - %idxprom.i.i.us.us.1.1 = sext i32 %add.i.i.us.us.1.1 to i64 - %arrayidx.i.i.us.us.1.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.1.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.1.1 = shl i64 %add1.i.i.i.us.us.1.1, 32 - %200 = ashr exact i64 %sext.i.i.us.us.1.1, 32 - br label %for.body.i.i.us.us.1.1 - -for.body.i.i.us.us.1.1: ; preds = %for.body.i.i.us.us.1.1, %if.then.i.i.us.us.1.1 - %indvars.iv.next.i.i3.us.us.1.1 = phi i64 [ %indvars.iv.next.i.i.us.us.1.1, %for.body.i.i.us.us.1.1 ], [ 0, %if.then.i.i.us.us.1.1 ] - %201 = phi float [ %207, %for.body.i.i.us.us.1.1 ], [ 0.000000e+00, %if.then.i.i.us.us.1.1 ] - %202 = add nsw i64 %indvars.iv.next.i.i3.us.us.1.1, %60 - %arrayidx11.i.i.us.us.1.1 = getelementptr inbounds float, float* %7, i64 %202 - %203 = load float, float* %arrayidx11.i.i.us.us.1.1, align 4, !tbaa !12 - %204 = mul nsw i64 %indvars.iv.next.i.i3.us.us.1.1, %26 - %205 = add nsw i64 %204, %200 - %arrayidx15.i.i.us.us.1.1 = getelementptr inbounds float, float* %10, i64 %205 - %206 = load float, float* %arrayidx15.i.i.us.us.1.1, align 4, !tbaa !12 - %207 = tail call float @llvm.fmuladd.f32(float %203, float %206, float %201) #2 - store float %207, float* %arrayidx.i.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.1.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.1.1, 1 - %exitcond.not.i.i.us.us.1.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.1.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.1.1, label %if.end.i.i.us.us.1.1.loopexit, label %for.body.i.i.us.us.1.1, !llvm.loop !19 - -if.end.i.i.us.us.1.1.loopexit: ; preds = %for.body.i.i.us.us.1.1 - br label %if.end.i.i.us.us.1.1 - -if.end.i.i.us.us.1.1: ; preds = %if.end.i.i.us.us.1.1.loopexit, %if.end.i.i.us.us.1 - %208 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %208, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.i.us.1.loopexit, label %pregion_for_entry.entry.i.i.us.us.1, !llvm.loop !23 - -if.then.i.i.us.us.146: ; preds = %if.end.i.i.us.us - %add.i.i.us.us.142 = add nsw i32 %mul.i.i.us, %conv.i.i.us.us.139 - %idxprom.i.i.us.us.143 = sext i32 %add.i.i.us.us.142 to i64 - %arrayidx.i.i.us.us.144 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.143 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.144, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.145 = shl i64 %add1.i.i.i.us.us.138, 32 - %209 = ashr exact i64 %sext.i.i.us.us.145, 32 - br label %for.body.i.i.us.us.152 - -for.body.i.i.us.us.152: ; preds = %for.body.i.i.us.us.152, %if.then.i.i.us.us.146 - %indvars.iv.next.i.i3.us.us.147 = phi i64 [ %indvars.iv.next.i.i.us.us.150, %for.body.i.i.us.us.152 ], [ 0, %if.then.i.i.us.us.146 ] - %210 = phi float [ %216, %for.body.i.i.us.us.152 ], [ 0.000000e+00, %if.then.i.i.us.us.146 ] - %211 = add nsw i64 %indvars.iv.next.i.i3.us.us.147, %58 - %arrayidx11.i.i.us.us.148 = getelementptr inbounds float, float* %7, i64 %211 - %212 = load float, float* %arrayidx11.i.i.us.us.148, align 4, !tbaa !12 - %213 = mul nsw i64 %indvars.iv.next.i.i3.us.us.147, %26 - %214 = add nsw i64 %213, %209 - %arrayidx15.i.i.us.us.149 = getelementptr inbounds float, float* %10, i64 %214 - %215 = load float, float* %arrayidx15.i.i.us.us.149, align 4, !tbaa !12 - %216 = tail call float @llvm.fmuladd.f32(float %212, float %215, float %210) #2 - store float %216, float* %arrayidx.i.i.us.us.144, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.150 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.147, 1 - %exitcond.not.i.i.us.us.151 = icmp eq i64 %indvars.iv.next.i.i.us.us.150, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.151, label %if.end.i.i.us.us.153.loopexit, label %for.body.i.i.us.us.152, !llvm.loop !19 - -if.end.i.i.us.us.153.loopexit: ; preds = %for.body.i.i.us.us.152 - br label %if.end.i.i.us.us.153 - -if.end.i.i.us.us.153: ; preds = %if.end.i.i.us.us.153.loopexit, %if.end.i.i.us.us - %217 = add nuw nsw i64 %_local_id_x.i.0.us.us, 2 - %exitcond.not.1 = icmp eq i64 %217, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.us.loopexit, label %pregion_for_entry.entry.i.i.us.us, !llvm.loop !23 -} - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"int", !"int", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"int", !"int", !"int"} -!9 = !{!"", !"", !"", !"", !"", !""} -!10 = !{!"C", !"D", !"F", !"nj", !"nl", !"nm"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17, !18} -!17 = distinct !{} -!18 = distinct !{} -!19 = distinct !{!19, !20} -!20 = !{!"llvm.loop.unroll.disable"} -!21 = distinct !{!21, !22} -!22 = !{!"llvm.loop.parallel_accesses", !18} -!23 = distinct !{!23, !24} -!24 = !{!"llvm.loop.parallel_accesses", !17} diff --git a/pocl_irs/3mm_kernel3.ll b/pocl_irs/3mm_kernel3.ll deleted file mode 100644 index 047c80d..0000000 --- a/pocl_irs/3mm_kernel3.ll +++ /dev/null @@ -1,3583 +0,0 @@ -; ModuleID = './CF/DAJJACGEKBAPHIEIKFDOEEKGMOBCBEEPDHBEI/mm3_kernel3/32-8-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_mm3_kernel3(float* nocapture readonly %0, float* nocapture readonly %1, float* nocapture %2, i32 %3, i32 %4, i32 %5, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %6, i64 %7, i64 %8, i64 %9) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %7, 5 - %mul3.i.i = shl i64 %8, 3 - %cmp638.i = icmp sgt i32 %5, 0 - %11 = sext i32 %4 to i64 - %wide.trip.count.i = zext i32 %5 to i64 - br i1 %cmp638.i, label %pregion_for_entry.pregion_for_init.i.us.preheader, label %pregion_for_entry.pregion_for_init.i.preheader - -pregion_for_entry.pregion_for_init.i.preheader: ; preds = %10 - %conv.i.us = trunc i64 %mul.i.i to i32 - %cmp4.i.us = icmp slt i32 %conv.i.us, %4 - %12 = trunc i64 %mul.i.i to i32 - %conv.i.us.1 = or i32 %12, 1 - %cmp4.i.us.1 = icmp slt i32 %conv.i.us.1, %4 - %13 = trunc i64 %mul.i.i to i32 - %conv.i.us.2 = or i32 %13, 2 - %cmp4.i.us.2 = icmp slt i32 %conv.i.us.2, %4 - %14 = trunc i64 %mul.i.i to i32 - %conv.i.us.3 = or i32 %14, 3 - %cmp4.i.us.3 = icmp slt i32 %conv.i.us.3, %4 - %15 = trunc i64 %mul.i.i to i32 - %conv.i.us.4 = or i32 %15, 4 - %cmp4.i.us.4 = icmp slt i32 %conv.i.us.4, %4 - %16 = trunc i64 %mul.i.i to i32 - %conv.i.us.5 = or i32 %16, 5 - %cmp4.i.us.5 = icmp slt i32 %conv.i.us.5, %4 - %17 = trunc i64 %mul.i.i to i32 - %conv.i.us.6 = or i32 %17, 6 - %cmp4.i.us.6 = icmp slt i32 %conv.i.us.6, %4 - %18 = trunc i64 %mul.i.i to i32 - %conv.i.us.7 = or i32 %18, 7 - %cmp4.i.us.7 = icmp slt i32 %conv.i.us.7, %4 - %19 = trunc i64 %mul.i.i to i32 - %conv.i.us.8 = or i32 %19, 8 - %cmp4.i.us.8 = icmp slt i32 %conv.i.us.8, %4 - %20 = trunc i64 %mul.i.i to i32 - %conv.i.us.9 = or i32 %20, 9 - %cmp4.i.us.9 = icmp slt i32 %conv.i.us.9, %4 - %21 = trunc i64 %mul.i.i to i32 - %conv.i.us.10 = or i32 %21, 10 - %cmp4.i.us.10 = icmp slt i32 %conv.i.us.10, %4 - %22 = trunc i64 %mul.i.i to i32 - %conv.i.us.11 = or i32 %22, 11 - %cmp4.i.us.11 = icmp slt i32 %conv.i.us.11, %4 - %23 = trunc i64 %mul.i.i to i32 - %conv.i.us.12 = or i32 %23, 12 - %cmp4.i.us.12 = icmp slt i32 %conv.i.us.12, %4 - %24 = trunc i64 %mul.i.i to i32 - %conv.i.us.13 = or i32 %24, 13 - %cmp4.i.us.13 = icmp slt i32 %conv.i.us.13, %4 - %25 = trunc i64 %mul.i.i to i32 - %conv.i.us.14 = or i32 %25, 14 - %cmp4.i.us.14 = icmp slt i32 %conv.i.us.14, %4 - %26 = trunc i64 %mul.i.i to i32 - %conv.i.us.15 = or i32 %26, 15 - %cmp4.i.us.15 = icmp slt i32 %conv.i.us.15, %4 - %27 = trunc i64 %mul.i.i to i32 - %conv.i.us.16 = or i32 %27, 16 - %cmp4.i.us.16 = icmp slt i32 %conv.i.us.16, %4 - %28 = trunc i64 %mul.i.i to i32 - %conv.i.us.17 = or i32 %28, 17 - %cmp4.i.us.17 = icmp slt i32 %conv.i.us.17, %4 - %29 = trunc i64 %mul.i.i to i32 - %conv.i.us.18 = or i32 %29, 18 - %cmp4.i.us.18 = icmp slt i32 %conv.i.us.18, %4 - %30 = trunc i64 %mul.i.i to i32 - %conv.i.us.19 = or i32 %30, 19 - %cmp4.i.us.19 = icmp slt i32 %conv.i.us.19, %4 - %31 = trunc i64 %mul.i.i to i32 - %conv.i.us.20 = or i32 %31, 20 - %cmp4.i.us.20 = icmp slt i32 %conv.i.us.20, %4 - %32 = trunc i64 %mul.i.i to i32 - %conv.i.us.21 = or i32 %32, 21 - %cmp4.i.us.21 = icmp slt i32 %conv.i.us.21, %4 - %33 = trunc i64 %mul.i.i to i32 - %conv.i.us.22 = or i32 %33, 22 - %cmp4.i.us.22 = icmp slt i32 %conv.i.us.22, %4 - %34 = trunc i64 %mul.i.i to i32 - %conv.i.us.23 = or i32 %34, 23 - %cmp4.i.us.23 = icmp slt i32 %conv.i.us.23, %4 - %35 = trunc i64 %mul.i.i to i32 - %conv.i.us.24 = or i32 %35, 24 - %cmp4.i.us.24 = icmp slt i32 %conv.i.us.24, %4 - %36 = trunc i64 %mul.i.i to i32 - %conv.i.us.25 = or i32 %36, 25 - %cmp4.i.us.25 = icmp slt i32 %conv.i.us.25, %4 - %37 = trunc i64 %mul.i.i to i32 - %conv.i.us.26 = or i32 %37, 26 - %cmp4.i.us.26 = icmp slt i32 %conv.i.us.26, %4 - %38 = trunc i64 %mul.i.i to i32 - %conv.i.us.27 = or i32 %38, 27 - %cmp4.i.us.27 = icmp slt i32 %conv.i.us.27, %4 - %39 = trunc i64 %mul.i.i to i32 - %conv.i.us.28 = or i32 %39, 28 - %cmp4.i.us.28 = icmp slt i32 %conv.i.us.28, %4 - %40 = trunc i64 %mul.i.i to i32 - %conv.i.us.29 = or i32 %40, 29 - %cmp4.i.us.29 = icmp slt i32 %conv.i.us.29, %4 - %41 = trunc i64 %mul.i.i to i32 - %conv.i.us.30 = or i32 %41, 30 - %cmp4.i.us.30 = icmp slt i32 %conv.i.us.30, %4 - %42 = trunc i64 %mul.i.i to i32 - %conv.i.us.31 = or i32 %42, 31 - %cmp4.i.us.31 = icmp slt i32 %conv.i.us.31, %4 - br label %pregion_for_entry.pregion_for_init.i - -pregion_for_entry.pregion_for_init.i.us.preheader: ; preds = %10 - %conv2.i.us = trunc i64 %mul3.i.i to i32 - %cmp.i.us = icmp slt i32 %conv2.i.us, %3 - %mul.i.us = mul nsw i32 %conv2.i.us, %4 - %mul8.i.us = mul nsw i32 %conv2.i.us, %5 - %43 = sext i32 %mul8.i.us to i64 - br i1 %cmp.i.us, label %pregion_for_entry.entry.i.us.us.preheader, label %pregion_for_end.i.us - -pregion_for_entry.entry.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.us.preheader - br label %pregion_for_entry.entry.i.us.us - -pregion_for_end.i.us.loopexit: ; preds = %if.end.i.us.us.153 - br label %pregion_for_end.i.us - -pregion_for_end.i.us: ; preds = %pregion_for_end.i.us.loopexit, %pregion_for_entry.pregion_for_init.i.us.preheader - %44 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.1 = or i32 %44, 1 - %cmp.i.us.1 = icmp slt i32 %conv2.i.us.1, %3 - %mul.i.us.1 = mul nsw i32 %conv2.i.us.1, %4 - %mul8.i.us.1 = mul nsw i32 %conv2.i.us.1, %5 - %45 = sext i32 %mul8.i.us.1 to i64 - br i1 %cmp.i.us.1, label %pregion_for_entry.entry.i.us.us.1.preheader, label %pregion_for_end.i.us.1 - -pregion_for_entry.entry.i.us.us.1.preheader: ; preds = %pregion_for_end.i.us - br label %pregion_for_entry.entry.i.us.us.1 - -pregion_for_entry.entry.i.us.us: ; preds = %if.end.i.us.us.153, %pregion_for_entry.entry.i.us.us.preheader - %_local_id_x.0.us.us = phi i64 [ %202, %if.end.i.us.us.153 ], [ 0, %pregion_for_entry.entry.i.us.us.preheader ] - %add1.i.i.us.us = add nuw nsw i64 %_local_id_x.0.us.us, %mul.i.i - %conv.i.us.us = trunc i64 %add1.i.i.us.us to i32 - %cmp4.i.us.us = icmp slt i32 %conv.i.us.us, %4 - br i1 %cmp4.i.us.us, label %if.then.i.us.us, label %if.end.i.us.us - -if.then.i.us.us: ; preds = %pregion_for_entry.entry.i.us.us - %add.i.us.us = add nsw i32 %mul.i.us, %conv.i.us.us - %idxprom.i.us.us = sext i32 %add.i.us.us to i64 - %arrayidx.i.us.us = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us - store float 0.000000e+00, float* %arrayidx.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us = shl i64 %add1.i.i.us.us, 32 - %46 = ashr exact i64 %sext.i.us.us, 32 - br label %for.body.i.us.us - -if.end.i.us.us.loopexit: ; preds = %for.body.i.us.us - br label %if.end.i.us.us - -if.end.i.us.us: ; preds = %if.end.i.us.us.loopexit, %pregion_for_entry.entry.i.us.us - %47 = or i64 %_local_id_x.0.us.us, 1 - %add1.i.i.us.us.138 = add nuw nsw i64 %47, %mul.i.i - %conv.i.us.us.139 = trunc i64 %add1.i.i.us.us.138 to i32 - %cmp4.i.us.us.140 = icmp slt i32 %conv.i.us.us.139, %4 - br i1 %cmp4.i.us.us.140, label %if.then.i.us.us.146, label %if.end.i.us.us.153 - -for.body.i.us.us: ; preds = %for.body.i.us.us, %if.then.i.us.us - %indvars.iv.next.i3.us.us = phi i64 [ %indvars.iv.next.i.us.us, %for.body.i.us.us ], [ 0, %if.then.i.us.us ] - %48 = phi float [ %54, %for.body.i.us.us ], [ 0.000000e+00, %if.then.i.us.us ] - %49 = add nsw i64 %indvars.iv.next.i3.us.us, %43 - %arrayidx11.i.us.us = getelementptr inbounds float, float* %0, i64 %49 - %50 = load float, float* %arrayidx11.i.us.us, align 4, !tbaa !12 - %51 = mul nsw i64 %indvars.iv.next.i3.us.us, %11 - %52 = add nsw i64 %51, %46 - %arrayidx15.i.us.us = getelementptr inbounds float, float* %1, i64 %52 - %53 = load float, float* %arrayidx15.i.us.us, align 4, !tbaa !12 - %54 = tail call float @llvm.fmuladd.f32(float %50, float %53, float %48) #2 - store float %54, float* %arrayidx.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us = add nuw nsw i64 %indvars.iv.next.i3.us.us, 1 - %exitcond.not.i.us.us = icmp eq i64 %indvars.iv.next.i.us.us, %wide.trip.count.i - br i1 %exitcond.not.i.us.us, label %if.end.i.us.us.loopexit, label %for.body.i.us.us, !llvm.loop !19 - -pregion_for_entry.pregion_for_init.i: ; preds = %pregion_for_end.i, %pregion_for_entry.pregion_for_init.i.preheader - %_local_id_y.0 = phi i64 [ %55, %pregion_for_end.i ], [ 0, %pregion_for_entry.pregion_for_init.i.preheader ] - %add6.i.i = add nuw nsw i64 %_local_id_y.0, %mul3.i.i - %conv2.i = trunc i64 %add6.i.i to i32 - %cmp.i = icmp slt i32 %conv2.i, %3 - %mul.i = mul nsw i32 %conv2.i, %4 - br i1 %cmp.i, label %pregion_for_entry.entry.i.us.preheader, label %pregion_for_end.i - -pregion_for_entry.entry.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i - br i1 %cmp4.i.us, label %if.then.i.us, label %if.end.i.us - -if.then.i.us: ; preds = %pregion_for_entry.entry.i.us.preheader - %add.i.us = add nsw i32 %mul.i, %conv.i.us - %idxprom.i.us = sext i32 %add.i.us to i64 - %arrayidx.i.us = getelementptr inbounds float, float* %2, i64 %idxprom.i.us - store float 0.000000e+00, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us - -if.end.i.us: ; preds = %if.then.i.us, %pregion_for_entry.entry.i.us.preheader - br i1 %cmp4.i.us.1, label %if.then.i.us.1, label %if.end.i.us.1 - -pregion_for_end.i: ; preds = %if.then.i.us.31, %if.end.i.us.30, %pregion_for_entry.pregion_for_init.i - %55 = add nuw nsw i64 %_local_id_y.0, 1 - %exitcond33.not = icmp eq i64 %55, 8 - br i1 %exitcond33.not, label %mm3_kernel3.exit.loopexit54, label %pregion_for_entry.pregion_for_init.i, !llvm.loop !21 - -mm3_kernel3.exit.loopexit: ; preds = %if.end.i.us.us.7.1 - br label %mm3_kernel3.exit - -mm3_kernel3.exit.loopexit54: ; preds = %pregion_for_end.i - br label %mm3_kernel3.exit - -mm3_kernel3.exit: ; preds = %pregion_for_end.i.us.6, %mm3_kernel3.exit.loopexit54, %mm3_kernel3.exit.loopexit - ret void - -pregion_for_entry.entry.i.us.us.1: ; preds = %if.end.i.us.us.1.1, %pregion_for_entry.entry.i.us.us.1.preheader - %_local_id_x.0.us.us.1 = phi i64 [ %193, %if.end.i.us.us.1.1 ], [ 0, %pregion_for_entry.entry.i.us.us.1.preheader ] - %add1.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.0.us.us.1, %mul.i.i - %conv.i.us.us.1 = trunc i64 %add1.i.i.us.us.1 to i32 - %cmp4.i.us.us.1 = icmp slt i32 %conv.i.us.us.1, %4 - br i1 %cmp4.i.us.us.1, label %if.then.i.us.us.1, label %if.end.i.us.us.1 - -if.then.i.us.us.1: ; preds = %pregion_for_entry.entry.i.us.us.1 - %add.i.us.us.1 = add nsw i32 %mul.i.us.1, %conv.i.us.us.1 - %idxprom.i.us.us.1 = sext i32 %add.i.us.us.1 to i64 - %arrayidx.i.us.us.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.1 = shl i64 %add1.i.i.us.us.1, 32 - %56 = ashr exact i64 %sext.i.us.us.1, 32 - br label %for.body.i.us.us.1 - -for.body.i.us.us.1: ; preds = %for.body.i.us.us.1, %if.then.i.us.us.1 - %indvars.iv.next.i3.us.us.1 = phi i64 [ %indvars.iv.next.i.us.us.1, %for.body.i.us.us.1 ], [ 0, %if.then.i.us.us.1 ] - %57 = phi float [ %63, %for.body.i.us.us.1 ], [ 0.000000e+00, %if.then.i.us.us.1 ] - %58 = add nsw i64 %indvars.iv.next.i3.us.us.1, %45 - %arrayidx11.i.us.us.1 = getelementptr inbounds float, float* %0, i64 %58 - %59 = load float, float* %arrayidx11.i.us.us.1, align 4, !tbaa !12 - %60 = mul nsw i64 %indvars.iv.next.i3.us.us.1, %11 - %61 = add nsw i64 %60, %56 - %arrayidx15.i.us.us.1 = getelementptr inbounds float, float* %1, i64 %61 - %62 = load float, float* %arrayidx15.i.us.us.1, align 4, !tbaa !12 - %63 = tail call float @llvm.fmuladd.f32(float %59, float %62, float %57) #2 - store float %63, float* %arrayidx.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.1, 1 - %exitcond.not.i.us.us.1 = icmp eq i64 %indvars.iv.next.i.us.us.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.1, label %if.end.i.us.us.1.loopexit, label %for.body.i.us.us.1, !llvm.loop !19 - -if.end.i.us.us.1.loopexit: ; preds = %for.body.i.us.us.1 - br label %if.end.i.us.us.1 - -if.end.i.us.us.1: ; preds = %if.end.i.us.us.1.loopexit, %pregion_for_entry.entry.i.us.us.1 - %64 = or i64 %_local_id_x.0.us.us.1, 1 - %add1.i.i.us.us.1.1 = add nuw nsw i64 %64, %mul.i.i - %conv.i.us.us.1.1 = trunc i64 %add1.i.i.us.us.1.1 to i32 - %cmp4.i.us.us.1.1 = icmp slt i32 %conv.i.us.us.1.1, %4 - br i1 %cmp4.i.us.us.1.1, label %if.then.i.us.us.1.1, label %if.end.i.us.us.1.1 - -pregion_for_end.i.us.1.loopexit: ; preds = %if.end.i.us.us.1.1 - br label %pregion_for_end.i.us.1 - -pregion_for_end.i.us.1: ; preds = %pregion_for_end.i.us.1.loopexit, %pregion_for_end.i.us - %65 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.2 = or i32 %65, 2 - %cmp.i.us.2 = icmp slt i32 %conv2.i.us.2, %3 - %mul.i.us.2 = mul nsw i32 %conv2.i.us.2, %4 - %mul8.i.us.2 = mul nsw i32 %conv2.i.us.2, %5 - %66 = sext i32 %mul8.i.us.2 to i64 - br i1 %cmp.i.us.2, label %pregion_for_entry.entry.i.us.us.2.preheader, label %pregion_for_end.i.us.2 - -pregion_for_entry.entry.i.us.us.2.preheader: ; preds = %pregion_for_end.i.us.1 - br label %pregion_for_entry.entry.i.us.us.2 - -pregion_for_entry.entry.i.us.us.2: ; preds = %if.end.i.us.us.2.1, %pregion_for_entry.entry.i.us.us.2.preheader - %_local_id_x.0.us.us.2 = phi i64 [ %184, %if.end.i.us.us.2.1 ], [ 0, %pregion_for_entry.entry.i.us.us.2.preheader ] - %add1.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.0.us.us.2, %mul.i.i - %conv.i.us.us.2 = trunc i64 %add1.i.i.us.us.2 to i32 - %cmp4.i.us.us.2 = icmp slt i32 %conv.i.us.us.2, %4 - br i1 %cmp4.i.us.us.2, label %if.then.i.us.us.2, label %if.end.i.us.us.2 - -if.then.i.us.us.2: ; preds = %pregion_for_entry.entry.i.us.us.2 - %add.i.us.us.2 = add nsw i32 %mul.i.us.2, %conv.i.us.us.2 - %idxprom.i.us.us.2 = sext i32 %add.i.us.us.2 to i64 - %arrayidx.i.us.us.2 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.2 - store float 0.000000e+00, float* %arrayidx.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.2 = shl i64 %add1.i.i.us.us.2, 32 - %67 = ashr exact i64 %sext.i.us.us.2, 32 - br label %for.body.i.us.us.2 - -for.body.i.us.us.2: ; preds = %for.body.i.us.us.2, %if.then.i.us.us.2 - %indvars.iv.next.i3.us.us.2 = phi i64 [ %indvars.iv.next.i.us.us.2, %for.body.i.us.us.2 ], [ 0, %if.then.i.us.us.2 ] - %68 = phi float [ %74, %for.body.i.us.us.2 ], [ 0.000000e+00, %if.then.i.us.us.2 ] - %69 = add nsw i64 %indvars.iv.next.i3.us.us.2, %66 - %arrayidx11.i.us.us.2 = getelementptr inbounds float, float* %0, i64 %69 - %70 = load float, float* %arrayidx11.i.us.us.2, align 4, !tbaa !12 - %71 = mul nsw i64 %indvars.iv.next.i3.us.us.2, %11 - %72 = add nsw i64 %71, %67 - %arrayidx15.i.us.us.2 = getelementptr inbounds float, float* %1, i64 %72 - %73 = load float, float* %arrayidx15.i.us.us.2, align 4, !tbaa !12 - %74 = tail call float @llvm.fmuladd.f32(float %70, float %73, float %68) #2 - store float %74, float* %arrayidx.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.2 = add nuw nsw i64 %indvars.iv.next.i3.us.us.2, 1 - %exitcond.not.i.us.us.2 = icmp eq i64 %indvars.iv.next.i.us.us.2, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.2, label %if.end.i.us.us.2.loopexit, label %for.body.i.us.us.2, !llvm.loop !19 - -if.end.i.us.us.2.loopexit: ; preds = %for.body.i.us.us.2 - br label %if.end.i.us.us.2 - -if.end.i.us.us.2: ; preds = %if.end.i.us.us.2.loopexit, %pregion_for_entry.entry.i.us.us.2 - %75 = or i64 %_local_id_x.0.us.us.2, 1 - %add1.i.i.us.us.2.1 = add nuw nsw i64 %75, %mul.i.i - %conv.i.us.us.2.1 = trunc i64 %add1.i.i.us.us.2.1 to i32 - %cmp4.i.us.us.2.1 = icmp slt i32 %conv.i.us.us.2.1, %4 - br i1 %cmp4.i.us.us.2.1, label %if.then.i.us.us.2.1, label %if.end.i.us.us.2.1 - -pregion_for_end.i.us.2.loopexit: ; preds = %if.end.i.us.us.2.1 - br label %pregion_for_end.i.us.2 - -pregion_for_end.i.us.2: ; preds = %pregion_for_end.i.us.2.loopexit, %pregion_for_end.i.us.1 - %76 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.3 = or i32 %76, 3 - %cmp.i.us.3 = icmp slt i32 %conv2.i.us.3, %3 - %mul.i.us.3 = mul nsw i32 %conv2.i.us.3, %4 - %mul8.i.us.3 = mul nsw i32 %conv2.i.us.3, %5 - %77 = sext i32 %mul8.i.us.3 to i64 - br i1 %cmp.i.us.3, label %pregion_for_entry.entry.i.us.us.3.preheader, label %pregion_for_end.i.us.3 - -pregion_for_entry.entry.i.us.us.3.preheader: ; preds = %pregion_for_end.i.us.2 - br label %pregion_for_entry.entry.i.us.us.3 - -pregion_for_entry.entry.i.us.us.3: ; preds = %if.end.i.us.us.3.1, %pregion_for_entry.entry.i.us.us.3.preheader - %_local_id_x.0.us.us.3 = phi i64 [ %175, %if.end.i.us.us.3.1 ], [ 0, %pregion_for_entry.entry.i.us.us.3.preheader ] - %add1.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.0.us.us.3, %mul.i.i - %conv.i.us.us.3 = trunc i64 %add1.i.i.us.us.3 to i32 - %cmp4.i.us.us.3 = icmp slt i32 %conv.i.us.us.3, %4 - br i1 %cmp4.i.us.us.3, label %if.then.i.us.us.3, label %if.end.i.us.us.3 - -if.then.i.us.us.3: ; preds = %pregion_for_entry.entry.i.us.us.3 - %add.i.us.us.3 = add nsw i32 %mul.i.us.3, %conv.i.us.us.3 - %idxprom.i.us.us.3 = sext i32 %add.i.us.us.3 to i64 - %arrayidx.i.us.us.3 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.3 - store float 0.000000e+00, float* %arrayidx.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.3 = shl i64 %add1.i.i.us.us.3, 32 - %78 = ashr exact i64 %sext.i.us.us.3, 32 - br label %for.body.i.us.us.3 - -for.body.i.us.us.3: ; preds = %for.body.i.us.us.3, %if.then.i.us.us.3 - %indvars.iv.next.i3.us.us.3 = phi i64 [ %indvars.iv.next.i.us.us.3, %for.body.i.us.us.3 ], [ 0, %if.then.i.us.us.3 ] - %79 = phi float [ %85, %for.body.i.us.us.3 ], [ 0.000000e+00, %if.then.i.us.us.3 ] - %80 = add nsw i64 %indvars.iv.next.i3.us.us.3, %77 - %arrayidx11.i.us.us.3 = getelementptr inbounds float, float* %0, i64 %80 - %81 = load float, float* %arrayidx11.i.us.us.3, align 4, !tbaa !12 - %82 = mul nsw i64 %indvars.iv.next.i3.us.us.3, %11 - %83 = add nsw i64 %82, %78 - %arrayidx15.i.us.us.3 = getelementptr inbounds float, float* %1, i64 %83 - %84 = load float, float* %arrayidx15.i.us.us.3, align 4, !tbaa !12 - %85 = tail call float @llvm.fmuladd.f32(float %81, float %84, float %79) #2 - store float %85, float* %arrayidx.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.3 = add nuw nsw i64 %indvars.iv.next.i3.us.us.3, 1 - %exitcond.not.i.us.us.3 = icmp eq i64 %indvars.iv.next.i.us.us.3, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.3, label %if.end.i.us.us.3.loopexit, label %for.body.i.us.us.3, !llvm.loop !19 - -if.end.i.us.us.3.loopexit: ; preds = %for.body.i.us.us.3 - br label %if.end.i.us.us.3 - -if.end.i.us.us.3: ; preds = %if.end.i.us.us.3.loopexit, %pregion_for_entry.entry.i.us.us.3 - %86 = or i64 %_local_id_x.0.us.us.3, 1 - %add1.i.i.us.us.3.1 = add nuw nsw i64 %86, %mul.i.i - %conv.i.us.us.3.1 = trunc i64 %add1.i.i.us.us.3.1 to i32 - %cmp4.i.us.us.3.1 = icmp slt i32 %conv.i.us.us.3.1, %4 - br i1 %cmp4.i.us.us.3.1, label %if.then.i.us.us.3.1, label %if.end.i.us.us.3.1 - -pregion_for_end.i.us.3.loopexit: ; preds = %if.end.i.us.us.3.1 - br label %pregion_for_end.i.us.3 - -pregion_for_end.i.us.3: ; preds = %pregion_for_end.i.us.3.loopexit, %pregion_for_end.i.us.2 - %87 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.4 = or i32 %87, 4 - %cmp.i.us.4 = icmp slt i32 %conv2.i.us.4, %3 - %mul.i.us.4 = mul nsw i32 %conv2.i.us.4, %4 - %mul8.i.us.4 = mul nsw i32 %conv2.i.us.4, %5 - %88 = sext i32 %mul8.i.us.4 to i64 - br i1 %cmp.i.us.4, label %pregion_for_entry.entry.i.us.us.4.preheader, label %pregion_for_end.i.us.4 - -pregion_for_entry.entry.i.us.us.4.preheader: ; preds = %pregion_for_end.i.us.3 - br label %pregion_for_entry.entry.i.us.us.4 - -pregion_for_entry.entry.i.us.us.4: ; preds = %if.end.i.us.us.4.1, %pregion_for_entry.entry.i.us.us.4.preheader - %_local_id_x.0.us.us.4 = phi i64 [ %166, %if.end.i.us.us.4.1 ], [ 0, %pregion_for_entry.entry.i.us.us.4.preheader ] - %add1.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.0.us.us.4, %mul.i.i - %conv.i.us.us.4 = trunc i64 %add1.i.i.us.us.4 to i32 - %cmp4.i.us.us.4 = icmp slt i32 %conv.i.us.us.4, %4 - br i1 %cmp4.i.us.us.4, label %if.then.i.us.us.4, label %if.end.i.us.us.4 - -if.then.i.us.us.4: ; preds = %pregion_for_entry.entry.i.us.us.4 - %add.i.us.us.4 = add nsw i32 %mul.i.us.4, %conv.i.us.us.4 - %idxprom.i.us.us.4 = sext i32 %add.i.us.us.4 to i64 - %arrayidx.i.us.us.4 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.4 - store float 0.000000e+00, float* %arrayidx.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.4 = shl i64 %add1.i.i.us.us.4, 32 - %89 = ashr exact i64 %sext.i.us.us.4, 32 - br label %for.body.i.us.us.4 - -for.body.i.us.us.4: ; preds = %for.body.i.us.us.4, %if.then.i.us.us.4 - %indvars.iv.next.i3.us.us.4 = phi i64 [ %indvars.iv.next.i.us.us.4, %for.body.i.us.us.4 ], [ 0, %if.then.i.us.us.4 ] - %90 = phi float [ %96, %for.body.i.us.us.4 ], [ 0.000000e+00, %if.then.i.us.us.4 ] - %91 = add nsw i64 %indvars.iv.next.i3.us.us.4, %88 - %arrayidx11.i.us.us.4 = getelementptr inbounds float, float* %0, i64 %91 - %92 = load float, float* %arrayidx11.i.us.us.4, align 4, !tbaa !12 - %93 = mul nsw i64 %indvars.iv.next.i3.us.us.4, %11 - %94 = add nsw i64 %93, %89 - %arrayidx15.i.us.us.4 = getelementptr inbounds float, float* %1, i64 %94 - %95 = load float, float* %arrayidx15.i.us.us.4, align 4, !tbaa !12 - %96 = tail call float @llvm.fmuladd.f32(float %92, float %95, float %90) #2 - store float %96, float* %arrayidx.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.4 = add nuw nsw i64 %indvars.iv.next.i3.us.us.4, 1 - %exitcond.not.i.us.us.4 = icmp eq i64 %indvars.iv.next.i.us.us.4, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.4, label %if.end.i.us.us.4.loopexit, label %for.body.i.us.us.4, !llvm.loop !19 - -if.end.i.us.us.4.loopexit: ; preds = %for.body.i.us.us.4 - br label %if.end.i.us.us.4 - -if.end.i.us.us.4: ; preds = %if.end.i.us.us.4.loopexit, %pregion_for_entry.entry.i.us.us.4 - %97 = or i64 %_local_id_x.0.us.us.4, 1 - %add1.i.i.us.us.4.1 = add nuw nsw i64 %97, %mul.i.i - %conv.i.us.us.4.1 = trunc i64 %add1.i.i.us.us.4.1 to i32 - %cmp4.i.us.us.4.1 = icmp slt i32 %conv.i.us.us.4.1, %4 - br i1 %cmp4.i.us.us.4.1, label %if.then.i.us.us.4.1, label %if.end.i.us.us.4.1 - -pregion_for_end.i.us.4.loopexit: ; preds = %if.end.i.us.us.4.1 - br label %pregion_for_end.i.us.4 - -pregion_for_end.i.us.4: ; preds = %pregion_for_end.i.us.4.loopexit, %pregion_for_end.i.us.3 - %98 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.5 = or i32 %98, 5 - %cmp.i.us.5 = icmp slt i32 %conv2.i.us.5, %3 - %mul.i.us.5 = mul nsw i32 %conv2.i.us.5, %4 - %mul8.i.us.5 = mul nsw i32 %conv2.i.us.5, %5 - %99 = sext i32 %mul8.i.us.5 to i64 - br i1 %cmp.i.us.5, label %pregion_for_entry.entry.i.us.us.5.preheader, label %pregion_for_end.i.us.5 - -pregion_for_entry.entry.i.us.us.5.preheader: ; preds = %pregion_for_end.i.us.4 - br label %pregion_for_entry.entry.i.us.us.5 - -pregion_for_entry.entry.i.us.us.5: ; preds = %if.end.i.us.us.5.1, %pregion_for_entry.entry.i.us.us.5.preheader - %_local_id_x.0.us.us.5 = phi i64 [ %157, %if.end.i.us.us.5.1 ], [ 0, %pregion_for_entry.entry.i.us.us.5.preheader ] - %add1.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.0.us.us.5, %mul.i.i - %conv.i.us.us.5 = trunc i64 %add1.i.i.us.us.5 to i32 - %cmp4.i.us.us.5 = icmp slt i32 %conv.i.us.us.5, %4 - br i1 %cmp4.i.us.us.5, label %if.then.i.us.us.5, label %if.end.i.us.us.5 - -if.then.i.us.us.5: ; preds = %pregion_for_entry.entry.i.us.us.5 - %add.i.us.us.5 = add nsw i32 %mul.i.us.5, %conv.i.us.us.5 - %idxprom.i.us.us.5 = sext i32 %add.i.us.us.5 to i64 - %arrayidx.i.us.us.5 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.5 - store float 0.000000e+00, float* %arrayidx.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.5 = shl i64 %add1.i.i.us.us.5, 32 - %100 = ashr exact i64 %sext.i.us.us.5, 32 - br label %for.body.i.us.us.5 - -for.body.i.us.us.5: ; preds = %for.body.i.us.us.5, %if.then.i.us.us.5 - %indvars.iv.next.i3.us.us.5 = phi i64 [ %indvars.iv.next.i.us.us.5, %for.body.i.us.us.5 ], [ 0, %if.then.i.us.us.5 ] - %101 = phi float [ %107, %for.body.i.us.us.5 ], [ 0.000000e+00, %if.then.i.us.us.5 ] - %102 = add nsw i64 %indvars.iv.next.i3.us.us.5, %99 - %arrayidx11.i.us.us.5 = getelementptr inbounds float, float* %0, i64 %102 - %103 = load float, float* %arrayidx11.i.us.us.5, align 4, !tbaa !12 - %104 = mul nsw i64 %indvars.iv.next.i3.us.us.5, %11 - %105 = add nsw i64 %104, %100 - %arrayidx15.i.us.us.5 = getelementptr inbounds float, float* %1, i64 %105 - %106 = load float, float* %arrayidx15.i.us.us.5, align 4, !tbaa !12 - %107 = tail call float @llvm.fmuladd.f32(float %103, float %106, float %101) #2 - store float %107, float* %arrayidx.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.5 = add nuw nsw i64 %indvars.iv.next.i3.us.us.5, 1 - %exitcond.not.i.us.us.5 = icmp eq i64 %indvars.iv.next.i.us.us.5, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.5, label %if.end.i.us.us.5.loopexit, label %for.body.i.us.us.5, !llvm.loop !19 - -if.end.i.us.us.5.loopexit: ; preds = %for.body.i.us.us.5 - br label %if.end.i.us.us.5 - -if.end.i.us.us.5: ; preds = %if.end.i.us.us.5.loopexit, %pregion_for_entry.entry.i.us.us.5 - %108 = or i64 %_local_id_x.0.us.us.5, 1 - %add1.i.i.us.us.5.1 = add nuw nsw i64 %108, %mul.i.i - %conv.i.us.us.5.1 = trunc i64 %add1.i.i.us.us.5.1 to i32 - %cmp4.i.us.us.5.1 = icmp slt i32 %conv.i.us.us.5.1, %4 - br i1 %cmp4.i.us.us.5.1, label %if.then.i.us.us.5.1, label %if.end.i.us.us.5.1 - -pregion_for_end.i.us.5.loopexit: ; preds = %if.end.i.us.us.5.1 - br label %pregion_for_end.i.us.5 - -pregion_for_end.i.us.5: ; preds = %pregion_for_end.i.us.5.loopexit, %pregion_for_end.i.us.4 - %109 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.6 = or i32 %109, 6 - %cmp.i.us.6 = icmp slt i32 %conv2.i.us.6, %3 - %mul.i.us.6 = mul nsw i32 %conv2.i.us.6, %4 - %mul8.i.us.6 = mul nsw i32 %conv2.i.us.6, %5 - %110 = sext i32 %mul8.i.us.6 to i64 - br i1 %cmp.i.us.6, label %pregion_for_entry.entry.i.us.us.6.preheader, label %pregion_for_end.i.us.6 - -pregion_for_entry.entry.i.us.us.6.preheader: ; preds = %pregion_for_end.i.us.5 - br label %pregion_for_entry.entry.i.us.us.6 - -pregion_for_entry.entry.i.us.us.6: ; preds = %if.end.i.us.us.6.1, %pregion_for_entry.entry.i.us.us.6.preheader - %_local_id_x.0.us.us.6 = phi i64 [ %148, %if.end.i.us.us.6.1 ], [ 0, %pregion_for_entry.entry.i.us.us.6.preheader ] - %add1.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.0.us.us.6, %mul.i.i - %conv.i.us.us.6 = trunc i64 %add1.i.i.us.us.6 to i32 - %cmp4.i.us.us.6 = icmp slt i32 %conv.i.us.us.6, %4 - br i1 %cmp4.i.us.us.6, label %if.then.i.us.us.6, label %if.end.i.us.us.6 - -if.then.i.us.us.6: ; preds = %pregion_for_entry.entry.i.us.us.6 - %add.i.us.us.6 = add nsw i32 %mul.i.us.6, %conv.i.us.us.6 - %idxprom.i.us.us.6 = sext i32 %add.i.us.us.6 to i64 - %arrayidx.i.us.us.6 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.6 - store float 0.000000e+00, float* %arrayidx.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.6 = shl i64 %add1.i.i.us.us.6, 32 - %111 = ashr exact i64 %sext.i.us.us.6, 32 - br label %for.body.i.us.us.6 - -for.body.i.us.us.6: ; preds = %for.body.i.us.us.6, %if.then.i.us.us.6 - %indvars.iv.next.i3.us.us.6 = phi i64 [ %indvars.iv.next.i.us.us.6, %for.body.i.us.us.6 ], [ 0, %if.then.i.us.us.6 ] - %112 = phi float [ %118, %for.body.i.us.us.6 ], [ 0.000000e+00, %if.then.i.us.us.6 ] - %113 = add nsw i64 %indvars.iv.next.i3.us.us.6, %110 - %arrayidx11.i.us.us.6 = getelementptr inbounds float, float* %0, i64 %113 - %114 = load float, float* %arrayidx11.i.us.us.6, align 4, !tbaa !12 - %115 = mul nsw i64 %indvars.iv.next.i3.us.us.6, %11 - %116 = add nsw i64 %115, %111 - %arrayidx15.i.us.us.6 = getelementptr inbounds float, float* %1, i64 %116 - %117 = load float, float* %arrayidx15.i.us.us.6, align 4, !tbaa !12 - %118 = tail call float @llvm.fmuladd.f32(float %114, float %117, float %112) #2 - store float %118, float* %arrayidx.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.6 = add nuw nsw i64 %indvars.iv.next.i3.us.us.6, 1 - %exitcond.not.i.us.us.6 = icmp eq i64 %indvars.iv.next.i.us.us.6, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.6, label %if.end.i.us.us.6.loopexit, label %for.body.i.us.us.6, !llvm.loop !19 - -if.end.i.us.us.6.loopexit: ; preds = %for.body.i.us.us.6 - br label %if.end.i.us.us.6 - -if.end.i.us.us.6: ; preds = %if.end.i.us.us.6.loopexit, %pregion_for_entry.entry.i.us.us.6 - %119 = or i64 %_local_id_x.0.us.us.6, 1 - %add1.i.i.us.us.6.1 = add nuw nsw i64 %119, %mul.i.i - %conv.i.us.us.6.1 = trunc i64 %add1.i.i.us.us.6.1 to i32 - %cmp4.i.us.us.6.1 = icmp slt i32 %conv.i.us.us.6.1, %4 - br i1 %cmp4.i.us.us.6.1, label %if.then.i.us.us.6.1, label %if.end.i.us.us.6.1 - -pregion_for_end.i.us.6.loopexit: ; preds = %if.end.i.us.us.6.1 - br label %pregion_for_end.i.us.6 - -pregion_for_end.i.us.6: ; preds = %pregion_for_end.i.us.6.loopexit, %pregion_for_end.i.us.5 - %120 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.7 = or i32 %120, 7 - %cmp.i.us.7 = icmp slt i32 %conv2.i.us.7, %3 - %mul.i.us.7 = mul nsw i32 %conv2.i.us.7, %4 - %mul8.i.us.7 = mul nsw i32 %conv2.i.us.7, %5 - %121 = sext i32 %mul8.i.us.7 to i64 - br i1 %cmp.i.us.7, label %pregion_for_entry.entry.i.us.us.7.preheader, label %mm3_kernel3.exit - -pregion_for_entry.entry.i.us.us.7.preheader: ; preds = %pregion_for_end.i.us.6 - br label %pregion_for_entry.entry.i.us.us.7 - -pregion_for_entry.entry.i.us.us.7: ; preds = %if.end.i.us.us.7.1, %pregion_for_entry.entry.i.us.us.7.preheader - %_local_id_x.0.us.us.7 = phi i64 [ %139, %if.end.i.us.us.7.1 ], [ 0, %pregion_for_entry.entry.i.us.us.7.preheader ] - %add1.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.0.us.us.7, %mul.i.i - %conv.i.us.us.7 = trunc i64 %add1.i.i.us.us.7 to i32 - %cmp4.i.us.us.7 = icmp slt i32 %conv.i.us.us.7, %4 - br i1 %cmp4.i.us.us.7, label %if.then.i.us.us.7, label %if.end.i.us.us.7 - -if.then.i.us.us.7: ; preds = %pregion_for_entry.entry.i.us.us.7 - %add.i.us.us.7 = add nsw i32 %mul.i.us.7, %conv.i.us.us.7 - %idxprom.i.us.us.7 = sext i32 %add.i.us.us.7 to i64 - %arrayidx.i.us.us.7 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.7 - store float 0.000000e+00, float* %arrayidx.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.7 = shl i64 %add1.i.i.us.us.7, 32 - %122 = ashr exact i64 %sext.i.us.us.7, 32 - br label %for.body.i.us.us.7 - -for.body.i.us.us.7: ; preds = %for.body.i.us.us.7, %if.then.i.us.us.7 - %indvars.iv.next.i3.us.us.7 = phi i64 [ %indvars.iv.next.i.us.us.7, %for.body.i.us.us.7 ], [ 0, %if.then.i.us.us.7 ] - %123 = phi float [ %129, %for.body.i.us.us.7 ], [ 0.000000e+00, %if.then.i.us.us.7 ] - %124 = add nsw i64 %indvars.iv.next.i3.us.us.7, %121 - %arrayidx11.i.us.us.7 = getelementptr inbounds float, float* %0, i64 %124 - %125 = load float, float* %arrayidx11.i.us.us.7, align 4, !tbaa !12 - %126 = mul nsw i64 %indvars.iv.next.i3.us.us.7, %11 - %127 = add nsw i64 %126, %122 - %arrayidx15.i.us.us.7 = getelementptr inbounds float, float* %1, i64 %127 - %128 = load float, float* %arrayidx15.i.us.us.7, align 4, !tbaa !12 - %129 = tail call float @llvm.fmuladd.f32(float %125, float %128, float %123) #2 - store float %129, float* %arrayidx.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.7 = add nuw nsw i64 %indvars.iv.next.i3.us.us.7, 1 - %exitcond.not.i.us.us.7 = icmp eq i64 %indvars.iv.next.i.us.us.7, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.7, label %if.end.i.us.us.7.loopexit, label %for.body.i.us.us.7, !llvm.loop !19 - -if.end.i.us.us.7.loopexit: ; preds = %for.body.i.us.us.7 - br label %if.end.i.us.us.7 - -if.end.i.us.us.7: ; preds = %if.end.i.us.us.7.loopexit, %pregion_for_entry.entry.i.us.us.7 - %130 = or i64 %_local_id_x.0.us.us.7, 1 - %add1.i.i.us.us.7.1 = add nuw nsw i64 %130, %mul.i.i - %conv.i.us.us.7.1 = trunc i64 %add1.i.i.us.us.7.1 to i32 - %cmp4.i.us.us.7.1 = icmp slt i32 %conv.i.us.us.7.1, %4 - br i1 %cmp4.i.us.us.7.1, label %if.then.i.us.us.7.1, label %if.end.i.us.us.7.1 - -if.then.i.us.1: ; preds = %if.end.i.us - %add.i.us.1 = add nsw i32 %mul.i, %conv.i.us.1 - %idxprom.i.us.1 = sext i32 %add.i.us.1 to i64 - %arrayidx.i.us.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.1 - store float 0.000000e+00, float* %arrayidx.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.1 - -if.end.i.us.1: ; preds = %if.then.i.us.1, %if.end.i.us - br i1 %cmp4.i.us.2, label %if.then.i.us.2, label %if.end.i.us.2 - -if.then.i.us.2: ; preds = %if.end.i.us.1 - %add.i.us.2 = add nsw i32 %mul.i, %conv.i.us.2 - %idxprom.i.us.2 = sext i32 %add.i.us.2 to i64 - %arrayidx.i.us.2 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.2 - store float 0.000000e+00, float* %arrayidx.i.us.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.2 - -if.end.i.us.2: ; preds = %if.then.i.us.2, %if.end.i.us.1 - br i1 %cmp4.i.us.3, label %if.then.i.us.3, label %if.end.i.us.3 - -if.then.i.us.3: ; preds = %if.end.i.us.2 - %add.i.us.3 = add nsw i32 %mul.i, %conv.i.us.3 - %idxprom.i.us.3 = sext i32 %add.i.us.3 to i64 - %arrayidx.i.us.3 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.3 - store float 0.000000e+00, float* %arrayidx.i.us.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.3 - -if.end.i.us.3: ; preds = %if.then.i.us.3, %if.end.i.us.2 - br i1 %cmp4.i.us.4, label %if.then.i.us.4, label %if.end.i.us.4 - -if.then.i.us.4: ; preds = %if.end.i.us.3 - %add.i.us.4 = add nsw i32 %mul.i, %conv.i.us.4 - %idxprom.i.us.4 = sext i32 %add.i.us.4 to i64 - %arrayidx.i.us.4 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.4 - store float 0.000000e+00, float* %arrayidx.i.us.4, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.4 - -if.end.i.us.4: ; preds = %if.then.i.us.4, %if.end.i.us.3 - br i1 %cmp4.i.us.5, label %if.then.i.us.5, label %if.end.i.us.5 - -if.then.i.us.5: ; preds = %if.end.i.us.4 - %add.i.us.5 = add nsw i32 %mul.i, %conv.i.us.5 - %idxprom.i.us.5 = sext i32 %add.i.us.5 to i64 - %arrayidx.i.us.5 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.5 - store float 0.000000e+00, float* %arrayidx.i.us.5, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.5 - -if.end.i.us.5: ; preds = %if.then.i.us.5, %if.end.i.us.4 - br i1 %cmp4.i.us.6, label %if.then.i.us.6, label %if.end.i.us.6 - -if.then.i.us.6: ; preds = %if.end.i.us.5 - %add.i.us.6 = add nsw i32 %mul.i, %conv.i.us.6 - %idxprom.i.us.6 = sext i32 %add.i.us.6 to i64 - %arrayidx.i.us.6 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.6 - store float 0.000000e+00, float* %arrayidx.i.us.6, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.6 - -if.end.i.us.6: ; preds = %if.then.i.us.6, %if.end.i.us.5 - br i1 %cmp4.i.us.7, label %if.then.i.us.7, label %if.end.i.us.7 - -if.then.i.us.7: ; preds = %if.end.i.us.6 - %add.i.us.7 = add nsw i32 %mul.i, %conv.i.us.7 - %idxprom.i.us.7 = sext i32 %add.i.us.7 to i64 - %arrayidx.i.us.7 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.7 - store float 0.000000e+00, float* %arrayidx.i.us.7, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.7 - -if.end.i.us.7: ; preds = %if.then.i.us.7, %if.end.i.us.6 - br i1 %cmp4.i.us.8, label %if.then.i.us.8, label %if.end.i.us.8 - -if.then.i.us.8: ; preds = %if.end.i.us.7 - %add.i.us.8 = add nsw i32 %mul.i, %conv.i.us.8 - %idxprom.i.us.8 = sext i32 %add.i.us.8 to i64 - %arrayidx.i.us.8 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.8 - store float 0.000000e+00, float* %arrayidx.i.us.8, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.8 - -if.end.i.us.8: ; preds = %if.then.i.us.8, %if.end.i.us.7 - br i1 %cmp4.i.us.9, label %if.then.i.us.9, label %if.end.i.us.9 - -if.then.i.us.9: ; preds = %if.end.i.us.8 - %add.i.us.9 = add nsw i32 %mul.i, %conv.i.us.9 - %idxprom.i.us.9 = sext i32 %add.i.us.9 to i64 - %arrayidx.i.us.9 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.9 - store float 0.000000e+00, float* %arrayidx.i.us.9, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.9 - -if.end.i.us.9: ; preds = %if.then.i.us.9, %if.end.i.us.8 - br i1 %cmp4.i.us.10, label %if.then.i.us.10, label %if.end.i.us.10 - -if.then.i.us.10: ; preds = %if.end.i.us.9 - %add.i.us.10 = add nsw i32 %mul.i, %conv.i.us.10 - %idxprom.i.us.10 = sext i32 %add.i.us.10 to i64 - %arrayidx.i.us.10 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.10 - store float 0.000000e+00, float* %arrayidx.i.us.10, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.10 - -if.end.i.us.10: ; preds = %if.then.i.us.10, %if.end.i.us.9 - br i1 %cmp4.i.us.11, label %if.then.i.us.11, label %if.end.i.us.11 - -if.then.i.us.11: ; preds = %if.end.i.us.10 - %add.i.us.11 = add nsw i32 %mul.i, %conv.i.us.11 - %idxprom.i.us.11 = sext i32 %add.i.us.11 to i64 - %arrayidx.i.us.11 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.11 - store float 0.000000e+00, float* %arrayidx.i.us.11, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.11 - -if.end.i.us.11: ; preds = %if.then.i.us.11, %if.end.i.us.10 - br i1 %cmp4.i.us.12, label %if.then.i.us.12, label %if.end.i.us.12 - -if.then.i.us.12: ; preds = %if.end.i.us.11 - %add.i.us.12 = add nsw i32 %mul.i, %conv.i.us.12 - %idxprom.i.us.12 = sext i32 %add.i.us.12 to i64 - %arrayidx.i.us.12 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.12 - store float 0.000000e+00, float* %arrayidx.i.us.12, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.12 - -if.end.i.us.12: ; preds = %if.then.i.us.12, %if.end.i.us.11 - br i1 %cmp4.i.us.13, label %if.then.i.us.13, label %if.end.i.us.13 - -if.then.i.us.13: ; preds = %if.end.i.us.12 - %add.i.us.13 = add nsw i32 %mul.i, %conv.i.us.13 - %idxprom.i.us.13 = sext i32 %add.i.us.13 to i64 - %arrayidx.i.us.13 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.13 - store float 0.000000e+00, float* %arrayidx.i.us.13, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.13 - -if.end.i.us.13: ; preds = %if.then.i.us.13, %if.end.i.us.12 - br i1 %cmp4.i.us.14, label %if.then.i.us.14, label %if.end.i.us.14 - -if.then.i.us.14: ; preds = %if.end.i.us.13 - %add.i.us.14 = add nsw i32 %mul.i, %conv.i.us.14 - %idxprom.i.us.14 = sext i32 %add.i.us.14 to i64 - %arrayidx.i.us.14 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.14 - store float 0.000000e+00, float* %arrayidx.i.us.14, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.14 - -if.end.i.us.14: ; preds = %if.then.i.us.14, %if.end.i.us.13 - br i1 %cmp4.i.us.15, label %if.then.i.us.15, label %if.end.i.us.15 - -if.then.i.us.15: ; preds = %if.end.i.us.14 - %add.i.us.15 = add nsw i32 %mul.i, %conv.i.us.15 - %idxprom.i.us.15 = sext i32 %add.i.us.15 to i64 - %arrayidx.i.us.15 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.15 - store float 0.000000e+00, float* %arrayidx.i.us.15, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.15 - -if.end.i.us.15: ; preds = %if.then.i.us.15, %if.end.i.us.14 - br i1 %cmp4.i.us.16, label %if.then.i.us.16, label %if.end.i.us.16 - -if.then.i.us.16: ; preds = %if.end.i.us.15 - %add.i.us.16 = add nsw i32 %mul.i, %conv.i.us.16 - %idxprom.i.us.16 = sext i32 %add.i.us.16 to i64 - %arrayidx.i.us.16 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.16 - store float 0.000000e+00, float* %arrayidx.i.us.16, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.16 - -if.end.i.us.16: ; preds = %if.then.i.us.16, %if.end.i.us.15 - br i1 %cmp4.i.us.17, label %if.then.i.us.17, label %if.end.i.us.17 - -if.then.i.us.17: ; preds = %if.end.i.us.16 - %add.i.us.17 = add nsw i32 %mul.i, %conv.i.us.17 - %idxprom.i.us.17 = sext i32 %add.i.us.17 to i64 - %arrayidx.i.us.17 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.17 - store float 0.000000e+00, float* %arrayidx.i.us.17, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.17 - -if.end.i.us.17: ; preds = %if.then.i.us.17, %if.end.i.us.16 - br i1 %cmp4.i.us.18, label %if.then.i.us.18, label %if.end.i.us.18 - -if.then.i.us.18: ; preds = %if.end.i.us.17 - %add.i.us.18 = add nsw i32 %mul.i, %conv.i.us.18 - %idxprom.i.us.18 = sext i32 %add.i.us.18 to i64 - %arrayidx.i.us.18 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.18 - store float 0.000000e+00, float* %arrayidx.i.us.18, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.18 - -if.end.i.us.18: ; preds = %if.then.i.us.18, %if.end.i.us.17 - br i1 %cmp4.i.us.19, label %if.then.i.us.19, label %if.end.i.us.19 - -if.then.i.us.19: ; preds = %if.end.i.us.18 - %add.i.us.19 = add nsw i32 %mul.i, %conv.i.us.19 - %idxprom.i.us.19 = sext i32 %add.i.us.19 to i64 - %arrayidx.i.us.19 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.19 - store float 0.000000e+00, float* %arrayidx.i.us.19, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.19 - -if.end.i.us.19: ; preds = %if.then.i.us.19, %if.end.i.us.18 - br i1 %cmp4.i.us.20, label %if.then.i.us.20, label %if.end.i.us.20 - -if.then.i.us.20: ; preds = %if.end.i.us.19 - %add.i.us.20 = add nsw i32 %mul.i, %conv.i.us.20 - %idxprom.i.us.20 = sext i32 %add.i.us.20 to i64 - %arrayidx.i.us.20 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.20 - store float 0.000000e+00, float* %arrayidx.i.us.20, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.20 - -if.end.i.us.20: ; preds = %if.then.i.us.20, %if.end.i.us.19 - br i1 %cmp4.i.us.21, label %if.then.i.us.21, label %if.end.i.us.21 - -if.then.i.us.21: ; preds = %if.end.i.us.20 - %add.i.us.21 = add nsw i32 %mul.i, %conv.i.us.21 - %idxprom.i.us.21 = sext i32 %add.i.us.21 to i64 - %arrayidx.i.us.21 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.21 - store float 0.000000e+00, float* %arrayidx.i.us.21, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.21 - -if.end.i.us.21: ; preds = %if.then.i.us.21, %if.end.i.us.20 - br i1 %cmp4.i.us.22, label %if.then.i.us.22, label %if.end.i.us.22 - -if.then.i.us.22: ; preds = %if.end.i.us.21 - %add.i.us.22 = add nsw i32 %mul.i, %conv.i.us.22 - %idxprom.i.us.22 = sext i32 %add.i.us.22 to i64 - %arrayidx.i.us.22 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.22 - store float 0.000000e+00, float* %arrayidx.i.us.22, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.22 - -if.end.i.us.22: ; preds = %if.then.i.us.22, %if.end.i.us.21 - br i1 %cmp4.i.us.23, label %if.then.i.us.23, label %if.end.i.us.23 - -if.then.i.us.23: ; preds = %if.end.i.us.22 - %add.i.us.23 = add nsw i32 %mul.i, %conv.i.us.23 - %idxprom.i.us.23 = sext i32 %add.i.us.23 to i64 - %arrayidx.i.us.23 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.23 - store float 0.000000e+00, float* %arrayidx.i.us.23, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.23 - -if.end.i.us.23: ; preds = %if.then.i.us.23, %if.end.i.us.22 - br i1 %cmp4.i.us.24, label %if.then.i.us.24, label %if.end.i.us.24 - -if.then.i.us.24: ; preds = %if.end.i.us.23 - %add.i.us.24 = add nsw i32 %mul.i, %conv.i.us.24 - %idxprom.i.us.24 = sext i32 %add.i.us.24 to i64 - %arrayidx.i.us.24 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.24 - store float 0.000000e+00, float* %arrayidx.i.us.24, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.24 - -if.end.i.us.24: ; preds = %if.then.i.us.24, %if.end.i.us.23 - br i1 %cmp4.i.us.25, label %if.then.i.us.25, label %if.end.i.us.25 - -if.then.i.us.25: ; preds = %if.end.i.us.24 - %add.i.us.25 = add nsw i32 %mul.i, %conv.i.us.25 - %idxprom.i.us.25 = sext i32 %add.i.us.25 to i64 - %arrayidx.i.us.25 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.25 - store float 0.000000e+00, float* %arrayidx.i.us.25, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.25 - -if.end.i.us.25: ; preds = %if.then.i.us.25, %if.end.i.us.24 - br i1 %cmp4.i.us.26, label %if.then.i.us.26, label %if.end.i.us.26 - -if.then.i.us.26: ; preds = %if.end.i.us.25 - %add.i.us.26 = add nsw i32 %mul.i, %conv.i.us.26 - %idxprom.i.us.26 = sext i32 %add.i.us.26 to i64 - %arrayidx.i.us.26 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.26 - store float 0.000000e+00, float* %arrayidx.i.us.26, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.26 - -if.end.i.us.26: ; preds = %if.then.i.us.26, %if.end.i.us.25 - br i1 %cmp4.i.us.27, label %if.then.i.us.27, label %if.end.i.us.27 - -if.then.i.us.27: ; preds = %if.end.i.us.26 - %add.i.us.27 = add nsw i32 %mul.i, %conv.i.us.27 - %idxprom.i.us.27 = sext i32 %add.i.us.27 to i64 - %arrayidx.i.us.27 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.27 - store float 0.000000e+00, float* %arrayidx.i.us.27, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.27 - -if.end.i.us.27: ; preds = %if.then.i.us.27, %if.end.i.us.26 - br i1 %cmp4.i.us.28, label %if.then.i.us.28, label %if.end.i.us.28 - -if.then.i.us.28: ; preds = %if.end.i.us.27 - %add.i.us.28 = add nsw i32 %mul.i, %conv.i.us.28 - %idxprom.i.us.28 = sext i32 %add.i.us.28 to i64 - %arrayidx.i.us.28 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.28 - store float 0.000000e+00, float* %arrayidx.i.us.28, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.28 - -if.end.i.us.28: ; preds = %if.then.i.us.28, %if.end.i.us.27 - br i1 %cmp4.i.us.29, label %if.then.i.us.29, label %if.end.i.us.29 - -if.then.i.us.29: ; preds = %if.end.i.us.28 - %add.i.us.29 = add nsw i32 %mul.i, %conv.i.us.29 - %idxprom.i.us.29 = sext i32 %add.i.us.29 to i64 - %arrayidx.i.us.29 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.29 - store float 0.000000e+00, float* %arrayidx.i.us.29, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.29 - -if.end.i.us.29: ; preds = %if.then.i.us.29, %if.end.i.us.28 - br i1 %cmp4.i.us.30, label %if.then.i.us.30, label %if.end.i.us.30 - -if.then.i.us.30: ; preds = %if.end.i.us.29 - %add.i.us.30 = add nsw i32 %mul.i, %conv.i.us.30 - %idxprom.i.us.30 = sext i32 %add.i.us.30 to i64 - %arrayidx.i.us.30 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.30 - store float 0.000000e+00, float* %arrayidx.i.us.30, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.30 - -if.end.i.us.30: ; preds = %if.then.i.us.30, %if.end.i.us.29 - br i1 %cmp4.i.us.31, label %if.then.i.us.31, label %pregion_for_end.i - -if.then.i.us.31: ; preds = %if.end.i.us.30 - %add.i.us.31 = add nsw i32 %mul.i, %conv.i.us.31 - %idxprom.i.us.31 = sext i32 %add.i.us.31 to i64 - %arrayidx.i.us.31 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.31 - store float 0.000000e+00, float* %arrayidx.i.us.31, align 4, !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i - -if.then.i.us.us.7.1: ; preds = %if.end.i.us.us.7 - %add.i.us.us.7.1 = add nsw i32 %mul.i.us.7, %conv.i.us.us.7.1 - %idxprom.i.us.us.7.1 = sext i32 %add.i.us.us.7.1 to i64 - %arrayidx.i.us.us.7.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.7.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.7.1 = shl i64 %add1.i.i.us.us.7.1, 32 - %131 = ashr exact i64 %sext.i.us.us.7.1, 32 - br label %for.body.i.us.us.7.1 - -for.body.i.us.us.7.1: ; preds = %for.body.i.us.us.7.1, %if.then.i.us.us.7.1 - %indvars.iv.next.i3.us.us.7.1 = phi i64 [ %indvars.iv.next.i.us.us.7.1, %for.body.i.us.us.7.1 ], [ 0, %if.then.i.us.us.7.1 ] - %132 = phi float [ %138, %for.body.i.us.us.7.1 ], [ 0.000000e+00, %if.then.i.us.us.7.1 ] - %133 = add nsw i64 %indvars.iv.next.i3.us.us.7.1, %121 - %arrayidx11.i.us.us.7.1 = getelementptr inbounds float, float* %0, i64 %133 - %134 = load float, float* %arrayidx11.i.us.us.7.1, align 4, !tbaa !12 - %135 = mul nsw i64 %indvars.iv.next.i3.us.us.7.1, %11 - %136 = add nsw i64 %135, %131 - %arrayidx15.i.us.us.7.1 = getelementptr inbounds float, float* %1, i64 %136 - %137 = load float, float* %arrayidx15.i.us.us.7.1, align 4, !tbaa !12 - %138 = tail call float @llvm.fmuladd.f32(float %134, float %137, float %132) #2 - store float %138, float* %arrayidx.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.7.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.7.1, 1 - %exitcond.not.i.us.us.7.1 = icmp eq i64 %indvars.iv.next.i.us.us.7.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.7.1, label %if.end.i.us.us.7.1.loopexit, label %for.body.i.us.us.7.1, !llvm.loop !19 - -if.end.i.us.us.7.1.loopexit: ; preds = %for.body.i.us.us.7.1 - br label %if.end.i.us.us.7.1 - -if.end.i.us.us.7.1: ; preds = %if.end.i.us.us.7.1.loopexit, %if.end.i.us.us.7 - %139 = add nuw nsw i64 %_local_id_x.0.us.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %139, 32 - br i1 %exitcond.7.not.1, label %mm3_kernel3.exit.loopexit, label %pregion_for_entry.entry.i.us.us.7, !llvm.loop !23 - -if.then.i.us.us.6.1: ; preds = %if.end.i.us.us.6 - %add.i.us.us.6.1 = add nsw i32 %mul.i.us.6, %conv.i.us.us.6.1 - %idxprom.i.us.us.6.1 = sext i32 %add.i.us.us.6.1 to i64 - %arrayidx.i.us.us.6.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.6.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.6.1 = shl i64 %add1.i.i.us.us.6.1, 32 - %140 = ashr exact i64 %sext.i.us.us.6.1, 32 - br label %for.body.i.us.us.6.1 - -for.body.i.us.us.6.1: ; preds = %for.body.i.us.us.6.1, %if.then.i.us.us.6.1 - %indvars.iv.next.i3.us.us.6.1 = phi i64 [ %indvars.iv.next.i.us.us.6.1, %for.body.i.us.us.6.1 ], [ 0, %if.then.i.us.us.6.1 ] - %141 = phi float [ %147, %for.body.i.us.us.6.1 ], [ 0.000000e+00, %if.then.i.us.us.6.1 ] - %142 = add nsw i64 %indvars.iv.next.i3.us.us.6.1, %110 - %arrayidx11.i.us.us.6.1 = getelementptr inbounds float, float* %0, i64 %142 - %143 = load float, float* %arrayidx11.i.us.us.6.1, align 4, !tbaa !12 - %144 = mul nsw i64 %indvars.iv.next.i3.us.us.6.1, %11 - %145 = add nsw i64 %144, %140 - %arrayidx15.i.us.us.6.1 = getelementptr inbounds float, float* %1, i64 %145 - %146 = load float, float* %arrayidx15.i.us.us.6.1, align 4, !tbaa !12 - %147 = tail call float @llvm.fmuladd.f32(float %143, float %146, float %141) #2 - store float %147, float* %arrayidx.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.6.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.6.1, 1 - %exitcond.not.i.us.us.6.1 = icmp eq i64 %indvars.iv.next.i.us.us.6.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.6.1, label %if.end.i.us.us.6.1.loopexit, label %for.body.i.us.us.6.1, !llvm.loop !19 - -if.end.i.us.us.6.1.loopexit: ; preds = %for.body.i.us.us.6.1 - br label %if.end.i.us.us.6.1 - -if.end.i.us.us.6.1: ; preds = %if.end.i.us.us.6.1.loopexit, %if.end.i.us.us.6 - %148 = add nuw nsw i64 %_local_id_x.0.us.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %148, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.us.6.loopexit, label %pregion_for_entry.entry.i.us.us.6, !llvm.loop !23 - -if.then.i.us.us.5.1: ; preds = %if.end.i.us.us.5 - %add.i.us.us.5.1 = add nsw i32 %mul.i.us.5, %conv.i.us.us.5.1 - %idxprom.i.us.us.5.1 = sext i32 %add.i.us.us.5.1 to i64 - %arrayidx.i.us.us.5.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.5.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.5.1 = shl i64 %add1.i.i.us.us.5.1, 32 - %149 = ashr exact i64 %sext.i.us.us.5.1, 32 - br label %for.body.i.us.us.5.1 - -for.body.i.us.us.5.1: ; preds = %for.body.i.us.us.5.1, %if.then.i.us.us.5.1 - %indvars.iv.next.i3.us.us.5.1 = phi i64 [ %indvars.iv.next.i.us.us.5.1, %for.body.i.us.us.5.1 ], [ 0, %if.then.i.us.us.5.1 ] - %150 = phi float [ %156, %for.body.i.us.us.5.1 ], [ 0.000000e+00, %if.then.i.us.us.5.1 ] - %151 = add nsw i64 %indvars.iv.next.i3.us.us.5.1, %99 - %arrayidx11.i.us.us.5.1 = getelementptr inbounds float, float* %0, i64 %151 - %152 = load float, float* %arrayidx11.i.us.us.5.1, align 4, !tbaa !12 - %153 = mul nsw i64 %indvars.iv.next.i3.us.us.5.1, %11 - %154 = add nsw i64 %153, %149 - %arrayidx15.i.us.us.5.1 = getelementptr inbounds float, float* %1, i64 %154 - %155 = load float, float* %arrayidx15.i.us.us.5.1, align 4, !tbaa !12 - %156 = tail call float @llvm.fmuladd.f32(float %152, float %155, float %150) #2 - store float %156, float* %arrayidx.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.5.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.5.1, 1 - %exitcond.not.i.us.us.5.1 = icmp eq i64 %indvars.iv.next.i.us.us.5.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.5.1, label %if.end.i.us.us.5.1.loopexit, label %for.body.i.us.us.5.1, !llvm.loop !19 - -if.end.i.us.us.5.1.loopexit: ; preds = %for.body.i.us.us.5.1 - br label %if.end.i.us.us.5.1 - -if.end.i.us.us.5.1: ; preds = %if.end.i.us.us.5.1.loopexit, %if.end.i.us.us.5 - %157 = add nuw nsw i64 %_local_id_x.0.us.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %157, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.us.5.loopexit, label %pregion_for_entry.entry.i.us.us.5, !llvm.loop !23 - -if.then.i.us.us.4.1: ; preds = %if.end.i.us.us.4 - %add.i.us.us.4.1 = add nsw i32 %mul.i.us.4, %conv.i.us.us.4.1 - %idxprom.i.us.us.4.1 = sext i32 %add.i.us.us.4.1 to i64 - %arrayidx.i.us.us.4.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.4.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.4.1 = shl i64 %add1.i.i.us.us.4.1, 32 - %158 = ashr exact i64 %sext.i.us.us.4.1, 32 - br label %for.body.i.us.us.4.1 - -for.body.i.us.us.4.1: ; preds = %for.body.i.us.us.4.1, %if.then.i.us.us.4.1 - %indvars.iv.next.i3.us.us.4.1 = phi i64 [ %indvars.iv.next.i.us.us.4.1, %for.body.i.us.us.4.1 ], [ 0, %if.then.i.us.us.4.1 ] - %159 = phi float [ %165, %for.body.i.us.us.4.1 ], [ 0.000000e+00, %if.then.i.us.us.4.1 ] - %160 = add nsw i64 %indvars.iv.next.i3.us.us.4.1, %88 - %arrayidx11.i.us.us.4.1 = getelementptr inbounds float, float* %0, i64 %160 - %161 = load float, float* %arrayidx11.i.us.us.4.1, align 4, !tbaa !12 - %162 = mul nsw i64 %indvars.iv.next.i3.us.us.4.1, %11 - %163 = add nsw i64 %162, %158 - %arrayidx15.i.us.us.4.1 = getelementptr inbounds float, float* %1, i64 %163 - %164 = load float, float* %arrayidx15.i.us.us.4.1, align 4, !tbaa !12 - %165 = tail call float @llvm.fmuladd.f32(float %161, float %164, float %159) #2 - store float %165, float* %arrayidx.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.4.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.4.1, 1 - %exitcond.not.i.us.us.4.1 = icmp eq i64 %indvars.iv.next.i.us.us.4.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.4.1, label %if.end.i.us.us.4.1.loopexit, label %for.body.i.us.us.4.1, !llvm.loop !19 - -if.end.i.us.us.4.1.loopexit: ; preds = %for.body.i.us.us.4.1 - br label %if.end.i.us.us.4.1 - -if.end.i.us.us.4.1: ; preds = %if.end.i.us.us.4.1.loopexit, %if.end.i.us.us.4 - %166 = add nuw nsw i64 %_local_id_x.0.us.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %166, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.us.4.loopexit, label %pregion_for_entry.entry.i.us.us.4, !llvm.loop !23 - -if.then.i.us.us.3.1: ; preds = %if.end.i.us.us.3 - %add.i.us.us.3.1 = add nsw i32 %mul.i.us.3, %conv.i.us.us.3.1 - %idxprom.i.us.us.3.1 = sext i32 %add.i.us.us.3.1 to i64 - %arrayidx.i.us.us.3.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.3.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.3.1 = shl i64 %add1.i.i.us.us.3.1, 32 - %167 = ashr exact i64 %sext.i.us.us.3.1, 32 - br label %for.body.i.us.us.3.1 - -for.body.i.us.us.3.1: ; preds = %for.body.i.us.us.3.1, %if.then.i.us.us.3.1 - %indvars.iv.next.i3.us.us.3.1 = phi i64 [ %indvars.iv.next.i.us.us.3.1, %for.body.i.us.us.3.1 ], [ 0, %if.then.i.us.us.3.1 ] - %168 = phi float [ %174, %for.body.i.us.us.3.1 ], [ 0.000000e+00, %if.then.i.us.us.3.1 ] - %169 = add nsw i64 %indvars.iv.next.i3.us.us.3.1, %77 - %arrayidx11.i.us.us.3.1 = getelementptr inbounds float, float* %0, i64 %169 - %170 = load float, float* %arrayidx11.i.us.us.3.1, align 4, !tbaa !12 - %171 = mul nsw i64 %indvars.iv.next.i3.us.us.3.1, %11 - %172 = add nsw i64 %171, %167 - %arrayidx15.i.us.us.3.1 = getelementptr inbounds float, float* %1, i64 %172 - %173 = load float, float* %arrayidx15.i.us.us.3.1, align 4, !tbaa !12 - %174 = tail call float @llvm.fmuladd.f32(float %170, float %173, float %168) #2 - store float %174, float* %arrayidx.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.3.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.3.1, 1 - %exitcond.not.i.us.us.3.1 = icmp eq i64 %indvars.iv.next.i.us.us.3.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.3.1, label %if.end.i.us.us.3.1.loopexit, label %for.body.i.us.us.3.1, !llvm.loop !19 - -if.end.i.us.us.3.1.loopexit: ; preds = %for.body.i.us.us.3.1 - br label %if.end.i.us.us.3.1 - -if.end.i.us.us.3.1: ; preds = %if.end.i.us.us.3.1.loopexit, %if.end.i.us.us.3 - %175 = add nuw nsw i64 %_local_id_x.0.us.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %175, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.us.3.loopexit, label %pregion_for_entry.entry.i.us.us.3, !llvm.loop !23 - -if.then.i.us.us.2.1: ; preds = %if.end.i.us.us.2 - %add.i.us.us.2.1 = add nsw i32 %mul.i.us.2, %conv.i.us.us.2.1 - %idxprom.i.us.us.2.1 = sext i32 %add.i.us.us.2.1 to i64 - %arrayidx.i.us.us.2.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.2.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.2.1 = shl i64 %add1.i.i.us.us.2.1, 32 - %176 = ashr exact i64 %sext.i.us.us.2.1, 32 - br label %for.body.i.us.us.2.1 - -for.body.i.us.us.2.1: ; preds = %for.body.i.us.us.2.1, %if.then.i.us.us.2.1 - %indvars.iv.next.i3.us.us.2.1 = phi i64 [ %indvars.iv.next.i.us.us.2.1, %for.body.i.us.us.2.1 ], [ 0, %if.then.i.us.us.2.1 ] - %177 = phi float [ %183, %for.body.i.us.us.2.1 ], [ 0.000000e+00, %if.then.i.us.us.2.1 ] - %178 = add nsw i64 %indvars.iv.next.i3.us.us.2.1, %66 - %arrayidx11.i.us.us.2.1 = getelementptr inbounds float, float* %0, i64 %178 - %179 = load float, float* %arrayidx11.i.us.us.2.1, align 4, !tbaa !12 - %180 = mul nsw i64 %indvars.iv.next.i3.us.us.2.1, %11 - %181 = add nsw i64 %180, %176 - %arrayidx15.i.us.us.2.1 = getelementptr inbounds float, float* %1, i64 %181 - %182 = load float, float* %arrayidx15.i.us.us.2.1, align 4, !tbaa !12 - %183 = tail call float @llvm.fmuladd.f32(float %179, float %182, float %177) #2 - store float %183, float* %arrayidx.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.2.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.2.1, 1 - %exitcond.not.i.us.us.2.1 = icmp eq i64 %indvars.iv.next.i.us.us.2.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.2.1, label %if.end.i.us.us.2.1.loopexit, label %for.body.i.us.us.2.1, !llvm.loop !19 - -if.end.i.us.us.2.1.loopexit: ; preds = %for.body.i.us.us.2.1 - br label %if.end.i.us.us.2.1 - -if.end.i.us.us.2.1: ; preds = %if.end.i.us.us.2.1.loopexit, %if.end.i.us.us.2 - %184 = add nuw nsw i64 %_local_id_x.0.us.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %184, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.us.2.loopexit, label %pregion_for_entry.entry.i.us.us.2, !llvm.loop !23 - -if.then.i.us.us.1.1: ; preds = %if.end.i.us.us.1 - %add.i.us.us.1.1 = add nsw i32 %mul.i.us.1, %conv.i.us.us.1.1 - %idxprom.i.us.us.1.1 = sext i32 %add.i.us.us.1.1 to i64 - %arrayidx.i.us.us.1.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.1.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.1.1 = shl i64 %add1.i.i.us.us.1.1, 32 - %185 = ashr exact i64 %sext.i.us.us.1.1, 32 - br label %for.body.i.us.us.1.1 - -for.body.i.us.us.1.1: ; preds = %for.body.i.us.us.1.1, %if.then.i.us.us.1.1 - %indvars.iv.next.i3.us.us.1.1 = phi i64 [ %indvars.iv.next.i.us.us.1.1, %for.body.i.us.us.1.1 ], [ 0, %if.then.i.us.us.1.1 ] - %186 = phi float [ %192, %for.body.i.us.us.1.1 ], [ 0.000000e+00, %if.then.i.us.us.1.1 ] - %187 = add nsw i64 %indvars.iv.next.i3.us.us.1.1, %45 - %arrayidx11.i.us.us.1.1 = getelementptr inbounds float, float* %0, i64 %187 - %188 = load float, float* %arrayidx11.i.us.us.1.1, align 4, !tbaa !12 - %189 = mul nsw i64 %indvars.iv.next.i3.us.us.1.1, %11 - %190 = add nsw i64 %189, %185 - %arrayidx15.i.us.us.1.1 = getelementptr inbounds float, float* %1, i64 %190 - %191 = load float, float* %arrayidx15.i.us.us.1.1, align 4, !tbaa !12 - %192 = tail call float @llvm.fmuladd.f32(float %188, float %191, float %186) #2 - store float %192, float* %arrayidx.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.1.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.1.1, 1 - %exitcond.not.i.us.us.1.1 = icmp eq i64 %indvars.iv.next.i.us.us.1.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.1.1, label %if.end.i.us.us.1.1.loopexit, label %for.body.i.us.us.1.1, !llvm.loop !19 - -if.end.i.us.us.1.1.loopexit: ; preds = %for.body.i.us.us.1.1 - br label %if.end.i.us.us.1.1 - -if.end.i.us.us.1.1: ; preds = %if.end.i.us.us.1.1.loopexit, %if.end.i.us.us.1 - %193 = add nuw nsw i64 %_local_id_x.0.us.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %193, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.us.1.loopexit, label %pregion_for_entry.entry.i.us.us.1, !llvm.loop !23 - -if.then.i.us.us.146: ; preds = %if.end.i.us.us - %add.i.us.us.142 = add nsw i32 %mul.i.us, %conv.i.us.us.139 - %idxprom.i.us.us.143 = sext i32 %add.i.us.us.142 to i64 - %arrayidx.i.us.us.144 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.143 - store float 0.000000e+00, float* %arrayidx.i.us.us.144, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.145 = shl i64 %add1.i.i.us.us.138, 32 - %194 = ashr exact i64 %sext.i.us.us.145, 32 - br label %for.body.i.us.us.152 - -for.body.i.us.us.152: ; preds = %for.body.i.us.us.152, %if.then.i.us.us.146 - %indvars.iv.next.i3.us.us.147 = phi i64 [ %indvars.iv.next.i.us.us.150, %for.body.i.us.us.152 ], [ 0, %if.then.i.us.us.146 ] - %195 = phi float [ %201, %for.body.i.us.us.152 ], [ 0.000000e+00, %if.then.i.us.us.146 ] - %196 = add nsw i64 %indvars.iv.next.i3.us.us.147, %43 - %arrayidx11.i.us.us.148 = getelementptr inbounds float, float* %0, i64 %196 - %197 = load float, float* %arrayidx11.i.us.us.148, align 4, !tbaa !12 - %198 = mul nsw i64 %indvars.iv.next.i3.us.us.147, %11 - %199 = add nsw i64 %198, %194 - %arrayidx15.i.us.us.149 = getelementptr inbounds float, float* %1, i64 %199 - %200 = load float, float* %arrayidx15.i.us.us.149, align 4, !tbaa !12 - %201 = tail call float @llvm.fmuladd.f32(float %197, float %200, float %195) #2 - store float %201, float* %arrayidx.i.us.us.144, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.150 = add nuw nsw i64 %indvars.iv.next.i3.us.us.147, 1 - %exitcond.not.i.us.us.151 = icmp eq i64 %indvars.iv.next.i.us.us.150, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.151, label %if.end.i.us.us.153.loopexit, label %for.body.i.us.us.152, !llvm.loop !19 - -if.end.i.us.us.153.loopexit: ; preds = %for.body.i.us.us.152 - br label %if.end.i.us.us.153 - -if.end.i.us.us.153: ; preds = %if.end.i.us.us.153.loopexit, %if.end.i.us.us - %202 = add nuw nsw i64 %_local_id_x.0.us.us, 2 - %exitcond.not.1 = icmp eq i64 %202, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.us.loopexit, label %pregion_for_entry.entry.i.us.us, !llvm.loop !23 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_mm3_kernel3_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to float*** - %15 = load float**, float*** %14, align 8 - %16 = load float*, float** %15, align 8 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %21 = getelementptr i8*, i8** %0, i64 4 - %22 = bitcast i8** %21 to i32** - %23 = load i32*, i32** %22, align 8 - %24 = load i32, i32* %23, align 4 - %25 = getelementptr i8*, i8** %0, i64 5 - %26 = bitcast i8** %25 to i32** - %27 = load i32*, i32** %26, align 8 - %28 = load i32, i32* %27, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %cmp638.i.i = icmp sgt i32 %28, 0 - %29 = sext i32 %24 to i64 - %wide.trip.count.i.i = zext i32 %28 to i64 - br i1 %cmp638.i.i, label %pregion_for_entry.pregion_for_init.i.i.us.preheader, label %pregion_for_entry.pregion_for_init.i.i.preheader - -pregion_for_entry.pregion_for_init.i.i.preheader: ; preds = %5 - %conv.i.i.us = trunc i64 %mul.i.i.i to i32 - %cmp4.i.i.us = icmp sgt i32 %24, %conv.i.i.us - %30 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.1 = or i32 %30, 1 - %cmp4.i.i.us.1 = icmp sgt i32 %24, %conv.i.i.us.1 - %31 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.2 = or i32 %31, 2 - %cmp4.i.i.us.2 = icmp sgt i32 %24, %conv.i.i.us.2 - %32 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.3 = or i32 %32, 3 - %cmp4.i.i.us.3 = icmp sgt i32 %24, %conv.i.i.us.3 - %33 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.4 = or i32 %33, 4 - %cmp4.i.i.us.4 = icmp sgt i32 %24, %conv.i.i.us.4 - %34 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.5 = or i32 %34, 5 - %cmp4.i.i.us.5 = icmp sgt i32 %24, %conv.i.i.us.5 - %35 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.6 = or i32 %35, 6 - %cmp4.i.i.us.6 = icmp sgt i32 %24, %conv.i.i.us.6 - %36 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.7 = or i32 %36, 7 - %cmp4.i.i.us.7 = icmp sgt i32 %24, %conv.i.i.us.7 - %37 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.8 = or i32 %37, 8 - %cmp4.i.i.us.8 = icmp sgt i32 %24, %conv.i.i.us.8 - %38 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.9 = or i32 %38, 9 - %cmp4.i.i.us.9 = icmp sgt i32 %24, %conv.i.i.us.9 - %39 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.10 = or i32 %39, 10 - %cmp4.i.i.us.10 = icmp sgt i32 %24, %conv.i.i.us.10 - %40 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.11 = or i32 %40, 11 - %cmp4.i.i.us.11 = icmp sgt i32 %24, %conv.i.i.us.11 - %41 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.12 = or i32 %41, 12 - %cmp4.i.i.us.12 = icmp sgt i32 %24, %conv.i.i.us.12 - %42 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.13 = or i32 %42, 13 - %cmp4.i.i.us.13 = icmp sgt i32 %24, %conv.i.i.us.13 - %43 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.14 = or i32 %43, 14 - %cmp4.i.i.us.14 = icmp sgt i32 %24, %conv.i.i.us.14 - %44 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.15 = or i32 %44, 15 - %cmp4.i.i.us.15 = icmp sgt i32 %24, %conv.i.i.us.15 - %45 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.16 = or i32 %45, 16 - %cmp4.i.i.us.16 = icmp sgt i32 %24, %conv.i.i.us.16 - %46 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.17 = or i32 %46, 17 - %cmp4.i.i.us.17 = icmp sgt i32 %24, %conv.i.i.us.17 - %47 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.18 = or i32 %47, 18 - %cmp4.i.i.us.18 = icmp sgt i32 %24, %conv.i.i.us.18 - %48 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.19 = or i32 %48, 19 - %cmp4.i.i.us.19 = icmp sgt i32 %24, %conv.i.i.us.19 - %49 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.20 = or i32 %49, 20 - %cmp4.i.i.us.20 = icmp sgt i32 %24, %conv.i.i.us.20 - %50 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.21 = or i32 %50, 21 - %cmp4.i.i.us.21 = icmp sgt i32 %24, %conv.i.i.us.21 - %51 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.22 = or i32 %51, 22 - %cmp4.i.i.us.22 = icmp sgt i32 %24, %conv.i.i.us.22 - %52 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.23 = or i32 %52, 23 - %cmp4.i.i.us.23 = icmp sgt i32 %24, %conv.i.i.us.23 - %53 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.24 = or i32 %53, 24 - %cmp4.i.i.us.24 = icmp sgt i32 %24, %conv.i.i.us.24 - %54 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.25 = or i32 %54, 25 - %cmp4.i.i.us.25 = icmp sgt i32 %24, %conv.i.i.us.25 - %55 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.26 = or i32 %55, 26 - %cmp4.i.i.us.26 = icmp sgt i32 %24, %conv.i.i.us.26 - %56 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.27 = or i32 %56, 27 - %cmp4.i.i.us.27 = icmp sgt i32 %24, %conv.i.i.us.27 - %57 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.28 = or i32 %57, 28 - %cmp4.i.i.us.28 = icmp sgt i32 %24, %conv.i.i.us.28 - %58 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.29 = or i32 %58, 29 - %cmp4.i.i.us.29 = icmp sgt i32 %24, %conv.i.i.us.29 - %59 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.30 = or i32 %59, 30 - %cmp4.i.i.us.30 = icmp sgt i32 %24, %conv.i.i.us.30 - %60 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.31 = or i32 %60, 31 - %cmp4.i.i.us.31 = icmp sgt i32 %24, %conv.i.i.us.31 - br label %pregion_for_entry.pregion_for_init.i.i - -pregion_for_entry.pregion_for_init.i.i.us.preheader: ; preds = %5 - %conv2.i.i.us = trunc i64 %mul3.i.i.i to i32 - %cmp.i.i.us = icmp sgt i32 %20, %conv2.i.i.us - %mul.i.i.us = mul nsw i32 %24, %conv2.i.i.us - %mul8.i.i.us = mul nsw i32 %28, %conv2.i.i.us - %61 = sext i32 %mul8.i.i.us to i64 - br i1 %cmp.i.i.us, label %pregion_for_entry.entry.i.i.us.us.preheader, label %pregion_for_end.i.i.us - -pregion_for_entry.entry.i.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i.us.preheader - br label %pregion_for_entry.entry.i.i.us.us - -pregion_for_end.i.i.us.loopexit: ; preds = %if.end.i.i.us.us.153 - br label %pregion_for_end.i.i.us - -pregion_for_end.i.i.us: ; preds = %pregion_for_end.i.i.us.loopexit, %pregion_for_entry.pregion_for_init.i.i.us.preheader - %62 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.1 = or i32 %62, 1 - %cmp.i.i.us.1 = icmp sgt i32 %20, %conv2.i.i.us.1 - %mul.i.i.us.1 = mul nsw i32 %24, %conv2.i.i.us.1 - %mul8.i.i.us.1 = mul nsw i32 %28, %conv2.i.i.us.1 - %63 = sext i32 %mul8.i.i.us.1 to i64 - br i1 %cmp.i.i.us.1, label %pregion_for_entry.entry.i.i.us.us.1.preheader, label %pregion_for_end.i.i.us.1 - -pregion_for_entry.entry.i.i.us.us.1.preheader: ; preds = %pregion_for_end.i.i.us - br label %pregion_for_entry.entry.i.i.us.us.1 - -pregion_for_entry.entry.i.i.us.us: ; preds = %if.end.i.i.us.us.153, %pregion_for_entry.entry.i.i.us.us.preheader - %_local_id_x.i.0.us.us = phi i64 [ %220, %if.end.i.i.us.us.153 ], [ 0, %pregion_for_entry.entry.i.i.us.us.preheader ] - %add1.i.i.i.us.us = add nuw nsw i64 %_local_id_x.i.0.us.us, %mul.i.i.i - %conv.i.i.us.us = trunc i64 %add1.i.i.i.us.us to i32 - %cmp4.i.i.us.us = icmp sgt i32 %24, %conv.i.i.us.us - br i1 %cmp4.i.i.us.us, label %if.then.i.i.us.us, label %if.end.i.i.us.us - -if.then.i.i.us.us: ; preds = %pregion_for_entry.entry.i.i.us.us - %add.i.i.us.us = add nsw i32 %mul.i.i.us, %conv.i.i.us.us - %idxprom.i.i.us.us = sext i32 %add.i.i.us.us to i64 - %arrayidx.i.i.us.us = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us - store float 0.000000e+00, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us = shl i64 %add1.i.i.i.us.us, 32 - %64 = ashr exact i64 %sext.i.i.us.us, 32 - br label %for.body.i.i.us.us - -if.end.i.i.us.us.loopexit: ; preds = %for.body.i.i.us.us - br label %if.end.i.i.us.us - -if.end.i.i.us.us: ; preds = %if.end.i.i.us.us.loopexit, %pregion_for_entry.entry.i.i.us.us - %65 = or i64 %_local_id_x.i.0.us.us, 1 - %add1.i.i.i.us.us.138 = add nuw nsw i64 %65, %mul.i.i.i - %conv.i.i.us.us.139 = trunc i64 %add1.i.i.i.us.us.138 to i32 - %cmp4.i.i.us.us.140 = icmp sgt i32 %24, %conv.i.i.us.us.139 - br i1 %cmp4.i.i.us.us.140, label %if.then.i.i.us.us.146, label %if.end.i.i.us.us.153 - -for.body.i.i.us.us: ; preds = %for.body.i.i.us.us, %if.then.i.i.us.us - %indvars.iv.next.i.i3.us.us = phi i64 [ %indvars.iv.next.i.i.us.us, %for.body.i.i.us.us ], [ 0, %if.then.i.i.us.us ] - %66 = phi float [ %72, %for.body.i.i.us.us ], [ 0.000000e+00, %if.then.i.i.us.us ] - %67 = add nsw i64 %indvars.iv.next.i.i3.us.us, %61 - %arrayidx11.i.i.us.us = getelementptr inbounds float, float* %8, i64 %67 - %68 = load float, float* %arrayidx11.i.i.us.us, align 4, !tbaa !12 - %69 = mul nsw i64 %indvars.iv.next.i.i3.us.us, %29 - %70 = add nsw i64 %69, %64 - %arrayidx15.i.i.us.us = getelementptr inbounds float, float* %12, i64 %70 - %71 = load float, float* %arrayidx15.i.i.us.us, align 4, !tbaa !12 - %72 = tail call float @llvm.fmuladd.f32(float %68, float %71, float %66) #2 - store float %72, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us = add nuw nsw i64 %indvars.iv.next.i.i3.us.us, 1 - %exitcond.not.i.i.us.us = icmp eq i64 %indvars.iv.next.i.i.us.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us, label %if.end.i.i.us.us.loopexit, label %for.body.i.i.us.us, !llvm.loop !19 - -pregion_for_entry.pregion_for_init.i.i: ; preds = %pregion_for_end.i.i, %pregion_for_entry.pregion_for_init.i.i.preheader - %_local_id_y.i.0 = phi i64 [ %73, %pregion_for_end.i.i ], [ 0, %pregion_for_entry.pregion_for_init.i.i.preheader ] - %add6.i.i.i = add nuw nsw i64 %_local_id_y.i.0, %mul3.i.i.i - %conv2.i.i = trunc i64 %add6.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %20, %conv2.i.i - %mul.i.i = mul nsw i32 %24, %conv2.i.i - br i1 %cmp.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i - br i1 %cmp4.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us.preheader - %add.i.i.us = add nsw i32 %mul.i.i, %conv.i.i.us - %idxprom.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us - store float 0.000000e+00, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - br i1 %cmp4.i.i.us.1, label %if.then.i.i.us.1, label %if.end.i.i.us.1 - -pregion_for_end.i.i: ; preds = %if.then.i.i.us.31, %if.end.i.i.us.30, %pregion_for_entry.pregion_for_init.i.i - %73 = add nuw nsw i64 %_local_id_y.i.0, 1 - %exitcond33.not = icmp eq i64 %73, 8 - br i1 %exitcond33.not, label %_pocl_kernel_mm3_kernel3.exit.loopexit54, label %pregion_for_entry.pregion_for_init.i.i, !llvm.loop !21 - -_pocl_kernel_mm3_kernel3.exit.loopexit: ; preds = %if.end.i.i.us.us.7.1 - br label %_pocl_kernel_mm3_kernel3.exit - -_pocl_kernel_mm3_kernel3.exit.loopexit54: ; preds = %pregion_for_end.i.i - br label %_pocl_kernel_mm3_kernel3.exit - -_pocl_kernel_mm3_kernel3.exit: ; preds = %pregion_for_end.i.i.us.6, %_pocl_kernel_mm3_kernel3.exit.loopexit54, %_pocl_kernel_mm3_kernel3.exit.loopexit - ret void - -pregion_for_entry.entry.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.1, %pregion_for_entry.entry.i.i.us.us.1.preheader - %_local_id_x.i.0.us.us.1 = phi i64 [ %211, %if.end.i.i.us.us.1.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.1.preheader ] - %add1.i.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, %mul.i.i.i - %conv.i.i.us.us.1 = trunc i64 %add1.i.i.i.us.us.1 to i32 - %cmp4.i.i.us.us.1 = icmp sgt i32 %24, %conv.i.i.us.us.1 - br i1 %cmp4.i.i.us.us.1, label %if.then.i.i.us.us.1, label %if.end.i.i.us.us.1 - -if.then.i.i.us.us.1: ; preds = %pregion_for_entry.entry.i.i.us.us.1 - %add.i.i.us.us.1 = add nsw i32 %mul.i.i.us.1, %conv.i.i.us.us.1 - %idxprom.i.i.us.us.1 = sext i32 %add.i.i.us.us.1 to i64 - %arrayidx.i.i.us.us.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.1 = shl i64 %add1.i.i.i.us.us.1, 32 - %74 = ashr exact i64 %sext.i.i.us.us.1, 32 - br label %for.body.i.i.us.us.1 - -for.body.i.i.us.us.1: ; preds = %for.body.i.i.us.us.1, %if.then.i.i.us.us.1 - %indvars.iv.next.i.i3.us.us.1 = phi i64 [ %indvars.iv.next.i.i.us.us.1, %for.body.i.i.us.us.1 ], [ 0, %if.then.i.i.us.us.1 ] - %75 = phi float [ %81, %for.body.i.i.us.us.1 ], [ 0.000000e+00, %if.then.i.i.us.us.1 ] - %76 = add nsw i64 %indvars.iv.next.i.i3.us.us.1, %63 - %arrayidx11.i.i.us.us.1 = getelementptr inbounds float, float* %8, i64 %76 - %77 = load float, float* %arrayidx11.i.i.us.us.1, align 4, !tbaa !12 - %78 = mul nsw i64 %indvars.iv.next.i.i3.us.us.1, %29 - %79 = add nsw i64 %78, %74 - %arrayidx15.i.i.us.us.1 = getelementptr inbounds float, float* %12, i64 %79 - %80 = load float, float* %arrayidx15.i.i.us.us.1, align 4, !tbaa !12 - %81 = tail call float @llvm.fmuladd.f32(float %77, float %80, float %75) #2 - store float %81, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.1, 1 - %exitcond.not.i.i.us.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.1, label %if.end.i.i.us.us.1.loopexit, label %for.body.i.i.us.us.1, !llvm.loop !19 - -if.end.i.i.us.us.1.loopexit: ; preds = %for.body.i.i.us.us.1 - br label %if.end.i.i.us.us.1 - -if.end.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.loopexit, %pregion_for_entry.entry.i.i.us.us.1 - %82 = or i64 %_local_id_x.i.0.us.us.1, 1 - %add1.i.i.i.us.us.1.1 = add nuw nsw i64 %82, %mul.i.i.i - %conv.i.i.us.us.1.1 = trunc i64 %add1.i.i.i.us.us.1.1 to i32 - %cmp4.i.i.us.us.1.1 = icmp sgt i32 %24, %conv.i.i.us.us.1.1 - br i1 %cmp4.i.i.us.us.1.1, label %if.then.i.i.us.us.1.1, label %if.end.i.i.us.us.1.1 - -pregion_for_end.i.i.us.1.loopexit: ; preds = %if.end.i.i.us.us.1.1 - br label %pregion_for_end.i.i.us.1 - -pregion_for_end.i.i.us.1: ; preds = %pregion_for_end.i.i.us.1.loopexit, %pregion_for_end.i.i.us - %83 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.2 = or i32 %83, 2 - %cmp.i.i.us.2 = icmp sgt i32 %20, %conv2.i.i.us.2 - %mul.i.i.us.2 = mul nsw i32 %24, %conv2.i.i.us.2 - %mul8.i.i.us.2 = mul nsw i32 %28, %conv2.i.i.us.2 - %84 = sext i32 %mul8.i.i.us.2 to i64 - br i1 %cmp.i.i.us.2, label %pregion_for_entry.entry.i.i.us.us.2.preheader, label %pregion_for_end.i.i.us.2 - -pregion_for_entry.entry.i.i.us.us.2.preheader: ; preds = %pregion_for_end.i.i.us.1 - br label %pregion_for_entry.entry.i.i.us.us.2 - -pregion_for_entry.entry.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.1, %pregion_for_entry.entry.i.i.us.us.2.preheader - %_local_id_x.i.0.us.us.2 = phi i64 [ %202, %if.end.i.i.us.us.2.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.2.preheader ] - %add1.i.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, %mul.i.i.i - %conv.i.i.us.us.2 = trunc i64 %add1.i.i.i.us.us.2 to i32 - %cmp4.i.i.us.us.2 = icmp sgt i32 %24, %conv.i.i.us.us.2 - br i1 %cmp4.i.i.us.us.2, label %if.then.i.i.us.us.2, label %if.end.i.i.us.us.2 - -if.then.i.i.us.us.2: ; preds = %pregion_for_entry.entry.i.i.us.us.2 - %add.i.i.us.us.2 = add nsw i32 %mul.i.i.us.2, %conv.i.i.us.us.2 - %idxprom.i.i.us.us.2 = sext i32 %add.i.i.us.us.2 to i64 - %arrayidx.i.i.us.us.2 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.2 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.2 = shl i64 %add1.i.i.i.us.us.2, 32 - %85 = ashr exact i64 %sext.i.i.us.us.2, 32 - br label %for.body.i.i.us.us.2 - -for.body.i.i.us.us.2: ; preds = %for.body.i.i.us.us.2, %if.then.i.i.us.us.2 - %indvars.iv.next.i.i3.us.us.2 = phi i64 [ %indvars.iv.next.i.i.us.us.2, %for.body.i.i.us.us.2 ], [ 0, %if.then.i.i.us.us.2 ] - %86 = phi float [ %92, %for.body.i.i.us.us.2 ], [ 0.000000e+00, %if.then.i.i.us.us.2 ] - %87 = add nsw i64 %indvars.iv.next.i.i3.us.us.2, %84 - %arrayidx11.i.i.us.us.2 = getelementptr inbounds float, float* %8, i64 %87 - %88 = load float, float* %arrayidx11.i.i.us.us.2, align 4, !tbaa !12 - %89 = mul nsw i64 %indvars.iv.next.i.i3.us.us.2, %29 - %90 = add nsw i64 %89, %85 - %arrayidx15.i.i.us.us.2 = getelementptr inbounds float, float* %12, i64 %90 - %91 = load float, float* %arrayidx15.i.i.us.us.2, align 4, !tbaa !12 - %92 = tail call float @llvm.fmuladd.f32(float %88, float %91, float %86) #2 - store float %92, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.2 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.2, 1 - %exitcond.not.i.i.us.us.2 = icmp eq i64 %indvars.iv.next.i.i.us.us.2, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.2, label %if.end.i.i.us.us.2.loopexit, label %for.body.i.i.us.us.2, !llvm.loop !19 - -if.end.i.i.us.us.2.loopexit: ; preds = %for.body.i.i.us.us.2 - br label %if.end.i.i.us.us.2 - -if.end.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.loopexit, %pregion_for_entry.entry.i.i.us.us.2 - %93 = or i64 %_local_id_x.i.0.us.us.2, 1 - %add1.i.i.i.us.us.2.1 = add nuw nsw i64 %93, %mul.i.i.i - %conv.i.i.us.us.2.1 = trunc i64 %add1.i.i.i.us.us.2.1 to i32 - %cmp4.i.i.us.us.2.1 = icmp sgt i32 %24, %conv.i.i.us.us.2.1 - br i1 %cmp4.i.i.us.us.2.1, label %if.then.i.i.us.us.2.1, label %if.end.i.i.us.us.2.1 - -pregion_for_end.i.i.us.2.loopexit: ; preds = %if.end.i.i.us.us.2.1 - br label %pregion_for_end.i.i.us.2 - -pregion_for_end.i.i.us.2: ; preds = %pregion_for_end.i.i.us.2.loopexit, %pregion_for_end.i.i.us.1 - %94 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.3 = or i32 %94, 3 - %cmp.i.i.us.3 = icmp sgt i32 %20, %conv2.i.i.us.3 - %mul.i.i.us.3 = mul nsw i32 %24, %conv2.i.i.us.3 - %mul8.i.i.us.3 = mul nsw i32 %28, %conv2.i.i.us.3 - %95 = sext i32 %mul8.i.i.us.3 to i64 - br i1 %cmp.i.i.us.3, label %pregion_for_entry.entry.i.i.us.us.3.preheader, label %pregion_for_end.i.i.us.3 - -pregion_for_entry.entry.i.i.us.us.3.preheader: ; preds = %pregion_for_end.i.i.us.2 - br label %pregion_for_entry.entry.i.i.us.us.3 - -pregion_for_entry.entry.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.1, %pregion_for_entry.entry.i.i.us.us.3.preheader - %_local_id_x.i.0.us.us.3 = phi i64 [ %193, %if.end.i.i.us.us.3.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.3.preheader ] - %add1.i.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, %mul.i.i.i - %conv.i.i.us.us.3 = trunc i64 %add1.i.i.i.us.us.3 to i32 - %cmp4.i.i.us.us.3 = icmp sgt i32 %24, %conv.i.i.us.us.3 - br i1 %cmp4.i.i.us.us.3, label %if.then.i.i.us.us.3, label %if.end.i.i.us.us.3 - -if.then.i.i.us.us.3: ; preds = %pregion_for_entry.entry.i.i.us.us.3 - %add.i.i.us.us.3 = add nsw i32 %mul.i.i.us.3, %conv.i.i.us.us.3 - %idxprom.i.i.us.us.3 = sext i32 %add.i.i.us.us.3 to i64 - %arrayidx.i.i.us.us.3 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.3 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.3 = shl i64 %add1.i.i.i.us.us.3, 32 - %96 = ashr exact i64 %sext.i.i.us.us.3, 32 - br label %for.body.i.i.us.us.3 - -for.body.i.i.us.us.3: ; preds = %for.body.i.i.us.us.3, %if.then.i.i.us.us.3 - %indvars.iv.next.i.i3.us.us.3 = phi i64 [ %indvars.iv.next.i.i.us.us.3, %for.body.i.i.us.us.3 ], [ 0, %if.then.i.i.us.us.3 ] - %97 = phi float [ %103, %for.body.i.i.us.us.3 ], [ 0.000000e+00, %if.then.i.i.us.us.3 ] - %98 = add nsw i64 %indvars.iv.next.i.i3.us.us.3, %95 - %arrayidx11.i.i.us.us.3 = getelementptr inbounds float, float* %8, i64 %98 - %99 = load float, float* %arrayidx11.i.i.us.us.3, align 4, !tbaa !12 - %100 = mul nsw i64 %indvars.iv.next.i.i3.us.us.3, %29 - %101 = add nsw i64 %100, %96 - %arrayidx15.i.i.us.us.3 = getelementptr inbounds float, float* %12, i64 %101 - %102 = load float, float* %arrayidx15.i.i.us.us.3, align 4, !tbaa !12 - %103 = tail call float @llvm.fmuladd.f32(float %99, float %102, float %97) #2 - store float %103, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.3 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.3, 1 - %exitcond.not.i.i.us.us.3 = icmp eq i64 %indvars.iv.next.i.i.us.us.3, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.3, label %if.end.i.i.us.us.3.loopexit, label %for.body.i.i.us.us.3, !llvm.loop !19 - -if.end.i.i.us.us.3.loopexit: ; preds = %for.body.i.i.us.us.3 - br label %if.end.i.i.us.us.3 - -if.end.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.loopexit, %pregion_for_entry.entry.i.i.us.us.3 - %104 = or i64 %_local_id_x.i.0.us.us.3, 1 - %add1.i.i.i.us.us.3.1 = add nuw nsw i64 %104, %mul.i.i.i - %conv.i.i.us.us.3.1 = trunc i64 %add1.i.i.i.us.us.3.1 to i32 - %cmp4.i.i.us.us.3.1 = icmp sgt i32 %24, %conv.i.i.us.us.3.1 - br i1 %cmp4.i.i.us.us.3.1, label %if.then.i.i.us.us.3.1, label %if.end.i.i.us.us.3.1 - -pregion_for_end.i.i.us.3.loopexit: ; preds = %if.end.i.i.us.us.3.1 - br label %pregion_for_end.i.i.us.3 - -pregion_for_end.i.i.us.3: ; preds = %pregion_for_end.i.i.us.3.loopexit, %pregion_for_end.i.i.us.2 - %105 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.4 = or i32 %105, 4 - %cmp.i.i.us.4 = icmp sgt i32 %20, %conv2.i.i.us.4 - %mul.i.i.us.4 = mul nsw i32 %24, %conv2.i.i.us.4 - %mul8.i.i.us.4 = mul nsw i32 %28, %conv2.i.i.us.4 - %106 = sext i32 %mul8.i.i.us.4 to i64 - br i1 %cmp.i.i.us.4, label %pregion_for_entry.entry.i.i.us.us.4.preheader, label %pregion_for_end.i.i.us.4 - -pregion_for_entry.entry.i.i.us.us.4.preheader: ; preds = %pregion_for_end.i.i.us.3 - br label %pregion_for_entry.entry.i.i.us.us.4 - -pregion_for_entry.entry.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.1, %pregion_for_entry.entry.i.i.us.us.4.preheader - %_local_id_x.i.0.us.us.4 = phi i64 [ %184, %if.end.i.i.us.us.4.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.4.preheader ] - %add1.i.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, %mul.i.i.i - %conv.i.i.us.us.4 = trunc i64 %add1.i.i.i.us.us.4 to i32 - %cmp4.i.i.us.us.4 = icmp sgt i32 %24, %conv.i.i.us.us.4 - br i1 %cmp4.i.i.us.us.4, label %if.then.i.i.us.us.4, label %if.end.i.i.us.us.4 - -if.then.i.i.us.us.4: ; preds = %pregion_for_entry.entry.i.i.us.us.4 - %add.i.i.us.us.4 = add nsw i32 %mul.i.i.us.4, %conv.i.i.us.us.4 - %idxprom.i.i.us.us.4 = sext i32 %add.i.i.us.us.4 to i64 - %arrayidx.i.i.us.us.4 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.4 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.4 = shl i64 %add1.i.i.i.us.us.4, 32 - %107 = ashr exact i64 %sext.i.i.us.us.4, 32 - br label %for.body.i.i.us.us.4 - -for.body.i.i.us.us.4: ; preds = %for.body.i.i.us.us.4, %if.then.i.i.us.us.4 - %indvars.iv.next.i.i3.us.us.4 = phi i64 [ %indvars.iv.next.i.i.us.us.4, %for.body.i.i.us.us.4 ], [ 0, %if.then.i.i.us.us.4 ] - %108 = phi float [ %114, %for.body.i.i.us.us.4 ], [ 0.000000e+00, %if.then.i.i.us.us.4 ] - %109 = add nsw i64 %indvars.iv.next.i.i3.us.us.4, %106 - %arrayidx11.i.i.us.us.4 = getelementptr inbounds float, float* %8, i64 %109 - %110 = load float, float* %arrayidx11.i.i.us.us.4, align 4, !tbaa !12 - %111 = mul nsw i64 %indvars.iv.next.i.i3.us.us.4, %29 - %112 = add nsw i64 %111, %107 - %arrayidx15.i.i.us.us.4 = getelementptr inbounds float, float* %12, i64 %112 - %113 = load float, float* %arrayidx15.i.i.us.us.4, align 4, !tbaa !12 - %114 = tail call float @llvm.fmuladd.f32(float %110, float %113, float %108) #2 - store float %114, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.4 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.4, 1 - %exitcond.not.i.i.us.us.4 = icmp eq i64 %indvars.iv.next.i.i.us.us.4, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.4, label %if.end.i.i.us.us.4.loopexit, label %for.body.i.i.us.us.4, !llvm.loop !19 - -if.end.i.i.us.us.4.loopexit: ; preds = %for.body.i.i.us.us.4 - br label %if.end.i.i.us.us.4 - -if.end.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.loopexit, %pregion_for_entry.entry.i.i.us.us.4 - %115 = or i64 %_local_id_x.i.0.us.us.4, 1 - %add1.i.i.i.us.us.4.1 = add nuw nsw i64 %115, %mul.i.i.i - %conv.i.i.us.us.4.1 = trunc i64 %add1.i.i.i.us.us.4.1 to i32 - %cmp4.i.i.us.us.4.1 = icmp sgt i32 %24, %conv.i.i.us.us.4.1 - br i1 %cmp4.i.i.us.us.4.1, label %if.then.i.i.us.us.4.1, label %if.end.i.i.us.us.4.1 - -pregion_for_end.i.i.us.4.loopexit: ; preds = %if.end.i.i.us.us.4.1 - br label %pregion_for_end.i.i.us.4 - -pregion_for_end.i.i.us.4: ; preds = %pregion_for_end.i.i.us.4.loopexit, %pregion_for_end.i.i.us.3 - %116 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.5 = or i32 %116, 5 - %cmp.i.i.us.5 = icmp sgt i32 %20, %conv2.i.i.us.5 - %mul.i.i.us.5 = mul nsw i32 %24, %conv2.i.i.us.5 - %mul8.i.i.us.5 = mul nsw i32 %28, %conv2.i.i.us.5 - %117 = sext i32 %mul8.i.i.us.5 to i64 - br i1 %cmp.i.i.us.5, label %pregion_for_entry.entry.i.i.us.us.5.preheader, label %pregion_for_end.i.i.us.5 - -pregion_for_entry.entry.i.i.us.us.5.preheader: ; preds = %pregion_for_end.i.i.us.4 - br label %pregion_for_entry.entry.i.i.us.us.5 - -pregion_for_entry.entry.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.1, %pregion_for_entry.entry.i.i.us.us.5.preheader - %_local_id_x.i.0.us.us.5 = phi i64 [ %175, %if.end.i.i.us.us.5.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.5.preheader ] - %add1.i.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, %mul.i.i.i - %conv.i.i.us.us.5 = trunc i64 %add1.i.i.i.us.us.5 to i32 - %cmp4.i.i.us.us.5 = icmp sgt i32 %24, %conv.i.i.us.us.5 - br i1 %cmp4.i.i.us.us.5, label %if.then.i.i.us.us.5, label %if.end.i.i.us.us.5 - -if.then.i.i.us.us.5: ; preds = %pregion_for_entry.entry.i.i.us.us.5 - %add.i.i.us.us.5 = add nsw i32 %mul.i.i.us.5, %conv.i.i.us.us.5 - %idxprom.i.i.us.us.5 = sext i32 %add.i.i.us.us.5 to i64 - %arrayidx.i.i.us.us.5 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.5 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.5 = shl i64 %add1.i.i.i.us.us.5, 32 - %118 = ashr exact i64 %sext.i.i.us.us.5, 32 - br label %for.body.i.i.us.us.5 - -for.body.i.i.us.us.5: ; preds = %for.body.i.i.us.us.5, %if.then.i.i.us.us.5 - %indvars.iv.next.i.i3.us.us.5 = phi i64 [ %indvars.iv.next.i.i.us.us.5, %for.body.i.i.us.us.5 ], [ 0, %if.then.i.i.us.us.5 ] - %119 = phi float [ %125, %for.body.i.i.us.us.5 ], [ 0.000000e+00, %if.then.i.i.us.us.5 ] - %120 = add nsw i64 %indvars.iv.next.i.i3.us.us.5, %117 - %arrayidx11.i.i.us.us.5 = getelementptr inbounds float, float* %8, i64 %120 - %121 = load float, float* %arrayidx11.i.i.us.us.5, align 4, !tbaa !12 - %122 = mul nsw i64 %indvars.iv.next.i.i3.us.us.5, %29 - %123 = add nsw i64 %122, %118 - %arrayidx15.i.i.us.us.5 = getelementptr inbounds float, float* %12, i64 %123 - %124 = load float, float* %arrayidx15.i.i.us.us.5, align 4, !tbaa !12 - %125 = tail call float @llvm.fmuladd.f32(float %121, float %124, float %119) #2 - store float %125, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.5 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.5, 1 - %exitcond.not.i.i.us.us.5 = icmp eq i64 %indvars.iv.next.i.i.us.us.5, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.5, label %if.end.i.i.us.us.5.loopexit, label %for.body.i.i.us.us.5, !llvm.loop !19 - -if.end.i.i.us.us.5.loopexit: ; preds = %for.body.i.i.us.us.5 - br label %if.end.i.i.us.us.5 - -if.end.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.loopexit, %pregion_for_entry.entry.i.i.us.us.5 - %126 = or i64 %_local_id_x.i.0.us.us.5, 1 - %add1.i.i.i.us.us.5.1 = add nuw nsw i64 %126, %mul.i.i.i - %conv.i.i.us.us.5.1 = trunc i64 %add1.i.i.i.us.us.5.1 to i32 - %cmp4.i.i.us.us.5.1 = icmp sgt i32 %24, %conv.i.i.us.us.5.1 - br i1 %cmp4.i.i.us.us.5.1, label %if.then.i.i.us.us.5.1, label %if.end.i.i.us.us.5.1 - -pregion_for_end.i.i.us.5.loopexit: ; preds = %if.end.i.i.us.us.5.1 - br label %pregion_for_end.i.i.us.5 - -pregion_for_end.i.i.us.5: ; preds = %pregion_for_end.i.i.us.5.loopexit, %pregion_for_end.i.i.us.4 - %127 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.6 = or i32 %127, 6 - %cmp.i.i.us.6 = icmp sgt i32 %20, %conv2.i.i.us.6 - %mul.i.i.us.6 = mul nsw i32 %24, %conv2.i.i.us.6 - %mul8.i.i.us.6 = mul nsw i32 %28, %conv2.i.i.us.6 - %128 = sext i32 %mul8.i.i.us.6 to i64 - br i1 %cmp.i.i.us.6, label %pregion_for_entry.entry.i.i.us.us.6.preheader, label %pregion_for_end.i.i.us.6 - -pregion_for_entry.entry.i.i.us.us.6.preheader: ; preds = %pregion_for_end.i.i.us.5 - br label %pregion_for_entry.entry.i.i.us.us.6 - -pregion_for_entry.entry.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.1, %pregion_for_entry.entry.i.i.us.us.6.preheader - %_local_id_x.i.0.us.us.6 = phi i64 [ %166, %if.end.i.i.us.us.6.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.6.preheader ] - %add1.i.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, %mul.i.i.i - %conv.i.i.us.us.6 = trunc i64 %add1.i.i.i.us.us.6 to i32 - %cmp4.i.i.us.us.6 = icmp sgt i32 %24, %conv.i.i.us.us.6 - br i1 %cmp4.i.i.us.us.6, label %if.then.i.i.us.us.6, label %if.end.i.i.us.us.6 - -if.then.i.i.us.us.6: ; preds = %pregion_for_entry.entry.i.i.us.us.6 - %add.i.i.us.us.6 = add nsw i32 %mul.i.i.us.6, %conv.i.i.us.us.6 - %idxprom.i.i.us.us.6 = sext i32 %add.i.i.us.us.6 to i64 - %arrayidx.i.i.us.us.6 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.6 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.6 = shl i64 %add1.i.i.i.us.us.6, 32 - %129 = ashr exact i64 %sext.i.i.us.us.6, 32 - br label %for.body.i.i.us.us.6 - -for.body.i.i.us.us.6: ; preds = %for.body.i.i.us.us.6, %if.then.i.i.us.us.6 - %indvars.iv.next.i.i3.us.us.6 = phi i64 [ %indvars.iv.next.i.i.us.us.6, %for.body.i.i.us.us.6 ], [ 0, %if.then.i.i.us.us.6 ] - %130 = phi float [ %136, %for.body.i.i.us.us.6 ], [ 0.000000e+00, %if.then.i.i.us.us.6 ] - %131 = add nsw i64 %indvars.iv.next.i.i3.us.us.6, %128 - %arrayidx11.i.i.us.us.6 = getelementptr inbounds float, float* %8, i64 %131 - %132 = load float, float* %arrayidx11.i.i.us.us.6, align 4, !tbaa !12 - %133 = mul nsw i64 %indvars.iv.next.i.i3.us.us.6, %29 - %134 = add nsw i64 %133, %129 - %arrayidx15.i.i.us.us.6 = getelementptr inbounds float, float* %12, i64 %134 - %135 = load float, float* %arrayidx15.i.i.us.us.6, align 4, !tbaa !12 - %136 = tail call float @llvm.fmuladd.f32(float %132, float %135, float %130) #2 - store float %136, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.6 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.6, 1 - %exitcond.not.i.i.us.us.6 = icmp eq i64 %indvars.iv.next.i.i.us.us.6, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.6, label %if.end.i.i.us.us.6.loopexit, label %for.body.i.i.us.us.6, !llvm.loop !19 - -if.end.i.i.us.us.6.loopexit: ; preds = %for.body.i.i.us.us.6 - br label %if.end.i.i.us.us.6 - -if.end.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.loopexit, %pregion_for_entry.entry.i.i.us.us.6 - %137 = or i64 %_local_id_x.i.0.us.us.6, 1 - %add1.i.i.i.us.us.6.1 = add nuw nsw i64 %137, %mul.i.i.i - %conv.i.i.us.us.6.1 = trunc i64 %add1.i.i.i.us.us.6.1 to i32 - %cmp4.i.i.us.us.6.1 = icmp sgt i32 %24, %conv.i.i.us.us.6.1 - br i1 %cmp4.i.i.us.us.6.1, label %if.then.i.i.us.us.6.1, label %if.end.i.i.us.us.6.1 - -pregion_for_end.i.i.us.6.loopexit: ; preds = %if.end.i.i.us.us.6.1 - br label %pregion_for_end.i.i.us.6 - -pregion_for_end.i.i.us.6: ; preds = %pregion_for_end.i.i.us.6.loopexit, %pregion_for_end.i.i.us.5 - %138 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.7 = or i32 %138, 7 - %cmp.i.i.us.7 = icmp sgt i32 %20, %conv2.i.i.us.7 - %mul.i.i.us.7 = mul nsw i32 %24, %conv2.i.i.us.7 - %mul8.i.i.us.7 = mul nsw i32 %28, %conv2.i.i.us.7 - %139 = sext i32 %mul8.i.i.us.7 to i64 - br i1 %cmp.i.i.us.7, label %pregion_for_entry.entry.i.i.us.us.7.preheader, label %_pocl_kernel_mm3_kernel3.exit - -pregion_for_entry.entry.i.i.us.us.7.preheader: ; preds = %pregion_for_end.i.i.us.6 - br label %pregion_for_entry.entry.i.i.us.us.7 - -pregion_for_entry.entry.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.1, %pregion_for_entry.entry.i.i.us.us.7.preheader - %_local_id_x.i.0.us.us.7 = phi i64 [ %157, %if.end.i.i.us.us.7.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.7.preheader ] - %add1.i.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, %mul.i.i.i - %conv.i.i.us.us.7 = trunc i64 %add1.i.i.i.us.us.7 to i32 - %cmp4.i.i.us.us.7 = icmp sgt i32 %24, %conv.i.i.us.us.7 - br i1 %cmp4.i.i.us.us.7, label %if.then.i.i.us.us.7, label %if.end.i.i.us.us.7 - -if.then.i.i.us.us.7: ; preds = %pregion_for_entry.entry.i.i.us.us.7 - %add.i.i.us.us.7 = add nsw i32 %mul.i.i.us.7, %conv.i.i.us.us.7 - %idxprom.i.i.us.us.7 = sext i32 %add.i.i.us.us.7 to i64 - %arrayidx.i.i.us.us.7 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.7 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.7 = shl i64 %add1.i.i.i.us.us.7, 32 - %140 = ashr exact i64 %sext.i.i.us.us.7, 32 - br label %for.body.i.i.us.us.7 - -for.body.i.i.us.us.7: ; preds = %for.body.i.i.us.us.7, %if.then.i.i.us.us.7 - %indvars.iv.next.i.i3.us.us.7 = phi i64 [ %indvars.iv.next.i.i.us.us.7, %for.body.i.i.us.us.7 ], [ 0, %if.then.i.i.us.us.7 ] - %141 = phi float [ %147, %for.body.i.i.us.us.7 ], [ 0.000000e+00, %if.then.i.i.us.us.7 ] - %142 = add nsw i64 %indvars.iv.next.i.i3.us.us.7, %139 - %arrayidx11.i.i.us.us.7 = getelementptr inbounds float, float* %8, i64 %142 - %143 = load float, float* %arrayidx11.i.i.us.us.7, align 4, !tbaa !12 - %144 = mul nsw i64 %indvars.iv.next.i.i3.us.us.7, %29 - %145 = add nsw i64 %144, %140 - %arrayidx15.i.i.us.us.7 = getelementptr inbounds float, float* %12, i64 %145 - %146 = load float, float* %arrayidx15.i.i.us.us.7, align 4, !tbaa !12 - %147 = tail call float @llvm.fmuladd.f32(float %143, float %146, float %141) #2 - store float %147, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.7 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.7, 1 - %exitcond.not.i.i.us.us.7 = icmp eq i64 %indvars.iv.next.i.i.us.us.7, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.7, label %if.end.i.i.us.us.7.loopexit, label %for.body.i.i.us.us.7, !llvm.loop !19 - -if.end.i.i.us.us.7.loopexit: ; preds = %for.body.i.i.us.us.7 - br label %if.end.i.i.us.us.7 - -if.end.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.loopexit, %pregion_for_entry.entry.i.i.us.us.7 - %148 = or i64 %_local_id_x.i.0.us.us.7, 1 - %add1.i.i.i.us.us.7.1 = add nuw nsw i64 %148, %mul.i.i.i - %conv.i.i.us.us.7.1 = trunc i64 %add1.i.i.i.us.us.7.1 to i32 - %cmp4.i.i.us.us.7.1 = icmp sgt i32 %24, %conv.i.i.us.us.7.1 - br i1 %cmp4.i.i.us.us.7.1, label %if.then.i.i.us.us.7.1, label %if.end.i.i.us.us.7.1 - -if.then.i.i.us.1: ; preds = %if.end.i.i.us - %add.i.i.us.1 = add nsw i32 %mul.i.i, %conv.i.i.us.1 - %idxprom.i.i.us.1 = sext i32 %add.i.i.us.1 to i64 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1 - -if.end.i.i.us.1: ; preds = %if.then.i.i.us.1, %if.end.i.i.us - br i1 %cmp4.i.i.us.2, label %if.then.i.i.us.2, label %if.end.i.i.us.2 - -if.then.i.i.us.2: ; preds = %if.end.i.i.us.1 - %add.i.i.us.2 = add nsw i32 %mul.i.i, %conv.i.i.us.2 - %idxprom.i.i.us.2 = sext i32 %add.i.i.us.2 to i64 - %arrayidx.i.i.us.2 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.2 - store float 0.000000e+00, float* %arrayidx.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2 - -if.end.i.i.us.2: ; preds = %if.then.i.i.us.2, %if.end.i.i.us.1 - br i1 %cmp4.i.i.us.3, label %if.then.i.i.us.3, label %if.end.i.i.us.3 - -if.then.i.i.us.3: ; preds = %if.end.i.i.us.2 - %add.i.i.us.3 = add nsw i32 %mul.i.i, %conv.i.i.us.3 - %idxprom.i.i.us.3 = sext i32 %add.i.i.us.3 to i64 - %arrayidx.i.i.us.3 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.3 - store float 0.000000e+00, float* %arrayidx.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3 - -if.end.i.i.us.3: ; preds = %if.then.i.i.us.3, %if.end.i.i.us.2 - br i1 %cmp4.i.i.us.4, label %if.then.i.i.us.4, label %if.end.i.i.us.4 - -if.then.i.i.us.4: ; preds = %if.end.i.i.us.3 - %add.i.i.us.4 = add nsw i32 %mul.i.i, %conv.i.i.us.4 - %idxprom.i.i.us.4 = sext i32 %add.i.i.us.4 to i64 - %arrayidx.i.i.us.4 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.4 - store float 0.000000e+00, float* %arrayidx.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4 - -if.end.i.i.us.4: ; preds = %if.then.i.i.us.4, %if.end.i.i.us.3 - br i1 %cmp4.i.i.us.5, label %if.then.i.i.us.5, label %if.end.i.i.us.5 - -if.then.i.i.us.5: ; preds = %if.end.i.i.us.4 - %add.i.i.us.5 = add nsw i32 %mul.i.i, %conv.i.i.us.5 - %idxprom.i.i.us.5 = sext i32 %add.i.i.us.5 to i64 - %arrayidx.i.i.us.5 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.5 - store float 0.000000e+00, float* %arrayidx.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5 - -if.end.i.i.us.5: ; preds = %if.then.i.i.us.5, %if.end.i.i.us.4 - br i1 %cmp4.i.i.us.6, label %if.then.i.i.us.6, label %if.end.i.i.us.6 - -if.then.i.i.us.6: ; preds = %if.end.i.i.us.5 - %add.i.i.us.6 = add nsw i32 %mul.i.i, %conv.i.i.us.6 - %idxprom.i.i.us.6 = sext i32 %add.i.i.us.6 to i64 - %arrayidx.i.i.us.6 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.6 - store float 0.000000e+00, float* %arrayidx.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6 - -if.end.i.i.us.6: ; preds = %if.then.i.i.us.6, %if.end.i.i.us.5 - br i1 %cmp4.i.i.us.7, label %if.then.i.i.us.7, label %if.end.i.i.us.7 - -if.then.i.i.us.7: ; preds = %if.end.i.i.us.6 - %add.i.i.us.7 = add nsw i32 %mul.i.i, %conv.i.i.us.7 - %idxprom.i.i.us.7 = sext i32 %add.i.i.us.7 to i64 - %arrayidx.i.i.us.7 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.7 - store float 0.000000e+00, float* %arrayidx.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7 - -if.end.i.i.us.7: ; preds = %if.then.i.i.us.7, %if.end.i.i.us.6 - br i1 %cmp4.i.i.us.8, label %if.then.i.i.us.8, label %if.end.i.i.us.8 - -if.then.i.i.us.8: ; preds = %if.end.i.i.us.7 - %add.i.i.us.8 = add nsw i32 %mul.i.i, %conv.i.i.us.8 - %idxprom.i.i.us.8 = sext i32 %add.i.i.us.8 to i64 - %arrayidx.i.i.us.8 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.8 - store float 0.000000e+00, float* %arrayidx.i.i.us.8, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.8 - -if.end.i.i.us.8: ; preds = %if.then.i.i.us.8, %if.end.i.i.us.7 - br i1 %cmp4.i.i.us.9, label %if.then.i.i.us.9, label %if.end.i.i.us.9 - -if.then.i.i.us.9: ; preds = %if.end.i.i.us.8 - %add.i.i.us.9 = add nsw i32 %mul.i.i, %conv.i.i.us.9 - %idxprom.i.i.us.9 = sext i32 %add.i.i.us.9 to i64 - %arrayidx.i.i.us.9 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.9 - store float 0.000000e+00, float* %arrayidx.i.i.us.9, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.9 - -if.end.i.i.us.9: ; preds = %if.then.i.i.us.9, %if.end.i.i.us.8 - br i1 %cmp4.i.i.us.10, label %if.then.i.i.us.10, label %if.end.i.i.us.10 - -if.then.i.i.us.10: ; preds = %if.end.i.i.us.9 - %add.i.i.us.10 = add nsw i32 %mul.i.i, %conv.i.i.us.10 - %idxprom.i.i.us.10 = sext i32 %add.i.i.us.10 to i64 - %arrayidx.i.i.us.10 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.10 - store float 0.000000e+00, float* %arrayidx.i.i.us.10, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.10 - -if.end.i.i.us.10: ; preds = %if.then.i.i.us.10, %if.end.i.i.us.9 - br i1 %cmp4.i.i.us.11, label %if.then.i.i.us.11, label %if.end.i.i.us.11 - -if.then.i.i.us.11: ; preds = %if.end.i.i.us.10 - %add.i.i.us.11 = add nsw i32 %mul.i.i, %conv.i.i.us.11 - %idxprom.i.i.us.11 = sext i32 %add.i.i.us.11 to i64 - %arrayidx.i.i.us.11 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.11 - store float 0.000000e+00, float* %arrayidx.i.i.us.11, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.11 - -if.end.i.i.us.11: ; preds = %if.then.i.i.us.11, %if.end.i.i.us.10 - br i1 %cmp4.i.i.us.12, label %if.then.i.i.us.12, label %if.end.i.i.us.12 - -if.then.i.i.us.12: ; preds = %if.end.i.i.us.11 - %add.i.i.us.12 = add nsw i32 %mul.i.i, %conv.i.i.us.12 - %idxprom.i.i.us.12 = sext i32 %add.i.i.us.12 to i64 - %arrayidx.i.i.us.12 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.12 - store float 0.000000e+00, float* %arrayidx.i.i.us.12, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.12 - -if.end.i.i.us.12: ; preds = %if.then.i.i.us.12, %if.end.i.i.us.11 - br i1 %cmp4.i.i.us.13, label %if.then.i.i.us.13, label %if.end.i.i.us.13 - -if.then.i.i.us.13: ; preds = %if.end.i.i.us.12 - %add.i.i.us.13 = add nsw i32 %mul.i.i, %conv.i.i.us.13 - %idxprom.i.i.us.13 = sext i32 %add.i.i.us.13 to i64 - %arrayidx.i.i.us.13 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.13 - store float 0.000000e+00, float* %arrayidx.i.i.us.13, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.13 - -if.end.i.i.us.13: ; preds = %if.then.i.i.us.13, %if.end.i.i.us.12 - br i1 %cmp4.i.i.us.14, label %if.then.i.i.us.14, label %if.end.i.i.us.14 - -if.then.i.i.us.14: ; preds = %if.end.i.i.us.13 - %add.i.i.us.14 = add nsw i32 %mul.i.i, %conv.i.i.us.14 - %idxprom.i.i.us.14 = sext i32 %add.i.i.us.14 to i64 - %arrayidx.i.i.us.14 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.14 - store float 0.000000e+00, float* %arrayidx.i.i.us.14, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.14 - -if.end.i.i.us.14: ; preds = %if.then.i.i.us.14, %if.end.i.i.us.13 - br i1 %cmp4.i.i.us.15, label %if.then.i.i.us.15, label %if.end.i.i.us.15 - -if.then.i.i.us.15: ; preds = %if.end.i.i.us.14 - %add.i.i.us.15 = add nsw i32 %mul.i.i, %conv.i.i.us.15 - %idxprom.i.i.us.15 = sext i32 %add.i.i.us.15 to i64 - %arrayidx.i.i.us.15 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.15 - store float 0.000000e+00, float* %arrayidx.i.i.us.15, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.15 - -if.end.i.i.us.15: ; preds = %if.then.i.i.us.15, %if.end.i.i.us.14 - br i1 %cmp4.i.i.us.16, label %if.then.i.i.us.16, label %if.end.i.i.us.16 - -if.then.i.i.us.16: ; preds = %if.end.i.i.us.15 - %add.i.i.us.16 = add nsw i32 %mul.i.i, %conv.i.i.us.16 - %idxprom.i.i.us.16 = sext i32 %add.i.i.us.16 to i64 - %arrayidx.i.i.us.16 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.16 - store float 0.000000e+00, float* %arrayidx.i.i.us.16, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.16 - -if.end.i.i.us.16: ; preds = %if.then.i.i.us.16, %if.end.i.i.us.15 - br i1 %cmp4.i.i.us.17, label %if.then.i.i.us.17, label %if.end.i.i.us.17 - -if.then.i.i.us.17: ; preds = %if.end.i.i.us.16 - %add.i.i.us.17 = add nsw i32 %mul.i.i, %conv.i.i.us.17 - %idxprom.i.i.us.17 = sext i32 %add.i.i.us.17 to i64 - %arrayidx.i.i.us.17 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.17 - store float 0.000000e+00, float* %arrayidx.i.i.us.17, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.17 - -if.end.i.i.us.17: ; preds = %if.then.i.i.us.17, %if.end.i.i.us.16 - br i1 %cmp4.i.i.us.18, label %if.then.i.i.us.18, label %if.end.i.i.us.18 - -if.then.i.i.us.18: ; preds = %if.end.i.i.us.17 - %add.i.i.us.18 = add nsw i32 %mul.i.i, %conv.i.i.us.18 - %idxprom.i.i.us.18 = sext i32 %add.i.i.us.18 to i64 - %arrayidx.i.i.us.18 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.18 - store float 0.000000e+00, float* %arrayidx.i.i.us.18, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.18 - -if.end.i.i.us.18: ; preds = %if.then.i.i.us.18, %if.end.i.i.us.17 - br i1 %cmp4.i.i.us.19, label %if.then.i.i.us.19, label %if.end.i.i.us.19 - -if.then.i.i.us.19: ; preds = %if.end.i.i.us.18 - %add.i.i.us.19 = add nsw i32 %mul.i.i, %conv.i.i.us.19 - %idxprom.i.i.us.19 = sext i32 %add.i.i.us.19 to i64 - %arrayidx.i.i.us.19 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.19 - store float 0.000000e+00, float* %arrayidx.i.i.us.19, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.19 - -if.end.i.i.us.19: ; preds = %if.then.i.i.us.19, %if.end.i.i.us.18 - br i1 %cmp4.i.i.us.20, label %if.then.i.i.us.20, label %if.end.i.i.us.20 - -if.then.i.i.us.20: ; preds = %if.end.i.i.us.19 - %add.i.i.us.20 = add nsw i32 %mul.i.i, %conv.i.i.us.20 - %idxprom.i.i.us.20 = sext i32 %add.i.i.us.20 to i64 - %arrayidx.i.i.us.20 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.20 - store float 0.000000e+00, float* %arrayidx.i.i.us.20, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.20 - -if.end.i.i.us.20: ; preds = %if.then.i.i.us.20, %if.end.i.i.us.19 - br i1 %cmp4.i.i.us.21, label %if.then.i.i.us.21, label %if.end.i.i.us.21 - -if.then.i.i.us.21: ; preds = %if.end.i.i.us.20 - %add.i.i.us.21 = add nsw i32 %mul.i.i, %conv.i.i.us.21 - %idxprom.i.i.us.21 = sext i32 %add.i.i.us.21 to i64 - %arrayidx.i.i.us.21 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.21 - store float 0.000000e+00, float* %arrayidx.i.i.us.21, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.21 - -if.end.i.i.us.21: ; preds = %if.then.i.i.us.21, %if.end.i.i.us.20 - br i1 %cmp4.i.i.us.22, label %if.then.i.i.us.22, label %if.end.i.i.us.22 - -if.then.i.i.us.22: ; preds = %if.end.i.i.us.21 - %add.i.i.us.22 = add nsw i32 %mul.i.i, %conv.i.i.us.22 - %idxprom.i.i.us.22 = sext i32 %add.i.i.us.22 to i64 - %arrayidx.i.i.us.22 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.22 - store float 0.000000e+00, float* %arrayidx.i.i.us.22, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.22 - -if.end.i.i.us.22: ; preds = %if.then.i.i.us.22, %if.end.i.i.us.21 - br i1 %cmp4.i.i.us.23, label %if.then.i.i.us.23, label %if.end.i.i.us.23 - -if.then.i.i.us.23: ; preds = %if.end.i.i.us.22 - %add.i.i.us.23 = add nsw i32 %mul.i.i, %conv.i.i.us.23 - %idxprom.i.i.us.23 = sext i32 %add.i.i.us.23 to i64 - %arrayidx.i.i.us.23 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.23 - store float 0.000000e+00, float* %arrayidx.i.i.us.23, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.23 - -if.end.i.i.us.23: ; preds = %if.then.i.i.us.23, %if.end.i.i.us.22 - br i1 %cmp4.i.i.us.24, label %if.then.i.i.us.24, label %if.end.i.i.us.24 - -if.then.i.i.us.24: ; preds = %if.end.i.i.us.23 - %add.i.i.us.24 = add nsw i32 %mul.i.i, %conv.i.i.us.24 - %idxprom.i.i.us.24 = sext i32 %add.i.i.us.24 to i64 - %arrayidx.i.i.us.24 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.24 - store float 0.000000e+00, float* %arrayidx.i.i.us.24, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.24 - -if.end.i.i.us.24: ; preds = %if.then.i.i.us.24, %if.end.i.i.us.23 - br i1 %cmp4.i.i.us.25, label %if.then.i.i.us.25, label %if.end.i.i.us.25 - -if.then.i.i.us.25: ; preds = %if.end.i.i.us.24 - %add.i.i.us.25 = add nsw i32 %mul.i.i, %conv.i.i.us.25 - %idxprom.i.i.us.25 = sext i32 %add.i.i.us.25 to i64 - %arrayidx.i.i.us.25 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.25 - store float 0.000000e+00, float* %arrayidx.i.i.us.25, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.25 - -if.end.i.i.us.25: ; preds = %if.then.i.i.us.25, %if.end.i.i.us.24 - br i1 %cmp4.i.i.us.26, label %if.then.i.i.us.26, label %if.end.i.i.us.26 - -if.then.i.i.us.26: ; preds = %if.end.i.i.us.25 - %add.i.i.us.26 = add nsw i32 %mul.i.i, %conv.i.i.us.26 - %idxprom.i.i.us.26 = sext i32 %add.i.i.us.26 to i64 - %arrayidx.i.i.us.26 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.26 - store float 0.000000e+00, float* %arrayidx.i.i.us.26, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.26 - -if.end.i.i.us.26: ; preds = %if.then.i.i.us.26, %if.end.i.i.us.25 - br i1 %cmp4.i.i.us.27, label %if.then.i.i.us.27, label %if.end.i.i.us.27 - -if.then.i.i.us.27: ; preds = %if.end.i.i.us.26 - %add.i.i.us.27 = add nsw i32 %mul.i.i, %conv.i.i.us.27 - %idxprom.i.i.us.27 = sext i32 %add.i.i.us.27 to i64 - %arrayidx.i.i.us.27 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.27 - store float 0.000000e+00, float* %arrayidx.i.i.us.27, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.27 - -if.end.i.i.us.27: ; preds = %if.then.i.i.us.27, %if.end.i.i.us.26 - br i1 %cmp4.i.i.us.28, label %if.then.i.i.us.28, label %if.end.i.i.us.28 - -if.then.i.i.us.28: ; preds = %if.end.i.i.us.27 - %add.i.i.us.28 = add nsw i32 %mul.i.i, %conv.i.i.us.28 - %idxprom.i.i.us.28 = sext i32 %add.i.i.us.28 to i64 - %arrayidx.i.i.us.28 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.28 - store float 0.000000e+00, float* %arrayidx.i.i.us.28, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.28 - -if.end.i.i.us.28: ; preds = %if.then.i.i.us.28, %if.end.i.i.us.27 - br i1 %cmp4.i.i.us.29, label %if.then.i.i.us.29, label %if.end.i.i.us.29 - -if.then.i.i.us.29: ; preds = %if.end.i.i.us.28 - %add.i.i.us.29 = add nsw i32 %mul.i.i, %conv.i.i.us.29 - %idxprom.i.i.us.29 = sext i32 %add.i.i.us.29 to i64 - %arrayidx.i.i.us.29 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.29 - store float 0.000000e+00, float* %arrayidx.i.i.us.29, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.29 - -if.end.i.i.us.29: ; preds = %if.then.i.i.us.29, %if.end.i.i.us.28 - br i1 %cmp4.i.i.us.30, label %if.then.i.i.us.30, label %if.end.i.i.us.30 - -if.then.i.i.us.30: ; preds = %if.end.i.i.us.29 - %add.i.i.us.30 = add nsw i32 %mul.i.i, %conv.i.i.us.30 - %idxprom.i.i.us.30 = sext i32 %add.i.i.us.30 to i64 - %arrayidx.i.i.us.30 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.30 - store float 0.000000e+00, float* %arrayidx.i.i.us.30, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.30 - -if.end.i.i.us.30: ; preds = %if.then.i.i.us.30, %if.end.i.i.us.29 - br i1 %cmp4.i.i.us.31, label %if.then.i.i.us.31, label %pregion_for_end.i.i - -if.then.i.i.us.31: ; preds = %if.end.i.i.us.30 - %add.i.i.us.31 = add nsw i32 %mul.i.i, %conv.i.i.us.31 - %idxprom.i.i.us.31 = sext i32 %add.i.i.us.31 to i64 - %arrayidx.i.i.us.31 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.31 - store float 0.000000e+00, float* %arrayidx.i.i.us.31, align 4, !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i - -if.then.i.i.us.us.7.1: ; preds = %if.end.i.i.us.us.7 - %add.i.i.us.us.7.1 = add nsw i32 %mul.i.i.us.7, %conv.i.i.us.us.7.1 - %idxprom.i.i.us.us.7.1 = sext i32 %add.i.i.us.us.7.1 to i64 - %arrayidx.i.i.us.us.7.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.7.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.7.1 = shl i64 %add1.i.i.i.us.us.7.1, 32 - %149 = ashr exact i64 %sext.i.i.us.us.7.1, 32 - br label %for.body.i.i.us.us.7.1 - -for.body.i.i.us.us.7.1: ; preds = %for.body.i.i.us.us.7.1, %if.then.i.i.us.us.7.1 - %indvars.iv.next.i.i3.us.us.7.1 = phi i64 [ %indvars.iv.next.i.i.us.us.7.1, %for.body.i.i.us.us.7.1 ], [ 0, %if.then.i.i.us.us.7.1 ] - %150 = phi float [ %156, %for.body.i.i.us.us.7.1 ], [ 0.000000e+00, %if.then.i.i.us.us.7.1 ] - %151 = add nsw i64 %indvars.iv.next.i.i3.us.us.7.1, %139 - %arrayidx11.i.i.us.us.7.1 = getelementptr inbounds float, float* %8, i64 %151 - %152 = load float, float* %arrayidx11.i.i.us.us.7.1, align 4, !tbaa !12 - %153 = mul nsw i64 %indvars.iv.next.i.i3.us.us.7.1, %29 - %154 = add nsw i64 %153, %149 - %arrayidx15.i.i.us.us.7.1 = getelementptr inbounds float, float* %12, i64 %154 - %155 = load float, float* %arrayidx15.i.i.us.us.7.1, align 4, !tbaa !12 - %156 = tail call float @llvm.fmuladd.f32(float %152, float %155, float %150) #2 - store float %156, float* %arrayidx.i.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.7.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.7.1, 1 - %exitcond.not.i.i.us.us.7.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.7.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.7.1, label %if.end.i.i.us.us.7.1.loopexit, label %for.body.i.i.us.us.7.1, !llvm.loop !19 - -if.end.i.i.us.us.7.1.loopexit: ; preds = %for.body.i.i.us.us.7.1 - br label %if.end.i.i.us.us.7.1 - -if.end.i.i.us.us.7.1: ; preds = %if.end.i.i.us.us.7.1.loopexit, %if.end.i.i.us.us.7 - %157 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %157, 32 - br i1 %exitcond.7.not.1, label %_pocl_kernel_mm3_kernel3.exit.loopexit, label %pregion_for_entry.entry.i.i.us.us.7, !llvm.loop !23 - -if.then.i.i.us.us.6.1: ; preds = %if.end.i.i.us.us.6 - %add.i.i.us.us.6.1 = add nsw i32 %mul.i.i.us.6, %conv.i.i.us.us.6.1 - %idxprom.i.i.us.us.6.1 = sext i32 %add.i.i.us.us.6.1 to i64 - %arrayidx.i.i.us.us.6.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.6.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.6.1 = shl i64 %add1.i.i.i.us.us.6.1, 32 - %158 = ashr exact i64 %sext.i.i.us.us.6.1, 32 - br label %for.body.i.i.us.us.6.1 - -for.body.i.i.us.us.6.1: ; preds = %for.body.i.i.us.us.6.1, %if.then.i.i.us.us.6.1 - %indvars.iv.next.i.i3.us.us.6.1 = phi i64 [ %indvars.iv.next.i.i.us.us.6.1, %for.body.i.i.us.us.6.1 ], [ 0, %if.then.i.i.us.us.6.1 ] - %159 = phi float [ %165, %for.body.i.i.us.us.6.1 ], [ 0.000000e+00, %if.then.i.i.us.us.6.1 ] - %160 = add nsw i64 %indvars.iv.next.i.i3.us.us.6.1, %128 - %arrayidx11.i.i.us.us.6.1 = getelementptr inbounds float, float* %8, i64 %160 - %161 = load float, float* %arrayidx11.i.i.us.us.6.1, align 4, !tbaa !12 - %162 = mul nsw i64 %indvars.iv.next.i.i3.us.us.6.1, %29 - %163 = add nsw i64 %162, %158 - %arrayidx15.i.i.us.us.6.1 = getelementptr inbounds float, float* %12, i64 %163 - %164 = load float, float* %arrayidx15.i.i.us.us.6.1, align 4, !tbaa !12 - %165 = tail call float @llvm.fmuladd.f32(float %161, float %164, float %159) #2 - store float %165, float* %arrayidx.i.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.6.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.6.1, 1 - %exitcond.not.i.i.us.us.6.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.6.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.6.1, label %if.end.i.i.us.us.6.1.loopexit, label %for.body.i.i.us.us.6.1, !llvm.loop !19 - -if.end.i.i.us.us.6.1.loopexit: ; preds = %for.body.i.i.us.us.6.1 - br label %if.end.i.i.us.us.6.1 - -if.end.i.i.us.us.6.1: ; preds = %if.end.i.i.us.us.6.1.loopexit, %if.end.i.i.us.us.6 - %166 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %166, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.i.us.6.loopexit, label %pregion_for_entry.entry.i.i.us.us.6, !llvm.loop !23 - -if.then.i.i.us.us.5.1: ; preds = %if.end.i.i.us.us.5 - %add.i.i.us.us.5.1 = add nsw i32 %mul.i.i.us.5, %conv.i.i.us.us.5.1 - %idxprom.i.i.us.us.5.1 = sext i32 %add.i.i.us.us.5.1 to i64 - %arrayidx.i.i.us.us.5.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.5.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.5.1 = shl i64 %add1.i.i.i.us.us.5.1, 32 - %167 = ashr exact i64 %sext.i.i.us.us.5.1, 32 - br label %for.body.i.i.us.us.5.1 - -for.body.i.i.us.us.5.1: ; preds = %for.body.i.i.us.us.5.1, %if.then.i.i.us.us.5.1 - %indvars.iv.next.i.i3.us.us.5.1 = phi i64 [ %indvars.iv.next.i.i.us.us.5.1, %for.body.i.i.us.us.5.1 ], [ 0, %if.then.i.i.us.us.5.1 ] - %168 = phi float [ %174, %for.body.i.i.us.us.5.1 ], [ 0.000000e+00, %if.then.i.i.us.us.5.1 ] - %169 = add nsw i64 %indvars.iv.next.i.i3.us.us.5.1, %117 - %arrayidx11.i.i.us.us.5.1 = getelementptr inbounds float, float* %8, i64 %169 - %170 = load float, float* %arrayidx11.i.i.us.us.5.1, align 4, !tbaa !12 - %171 = mul nsw i64 %indvars.iv.next.i.i3.us.us.5.1, %29 - %172 = add nsw i64 %171, %167 - %arrayidx15.i.i.us.us.5.1 = getelementptr inbounds float, float* %12, i64 %172 - %173 = load float, float* %arrayidx15.i.i.us.us.5.1, align 4, !tbaa !12 - %174 = tail call float @llvm.fmuladd.f32(float %170, float %173, float %168) #2 - store float %174, float* %arrayidx.i.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.5.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.5.1, 1 - %exitcond.not.i.i.us.us.5.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.5.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.5.1, label %if.end.i.i.us.us.5.1.loopexit, label %for.body.i.i.us.us.5.1, !llvm.loop !19 - -if.end.i.i.us.us.5.1.loopexit: ; preds = %for.body.i.i.us.us.5.1 - br label %if.end.i.i.us.us.5.1 - -if.end.i.i.us.us.5.1: ; preds = %if.end.i.i.us.us.5.1.loopexit, %if.end.i.i.us.us.5 - %175 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %175, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.i.us.5.loopexit, label %pregion_for_entry.entry.i.i.us.us.5, !llvm.loop !23 - -if.then.i.i.us.us.4.1: ; preds = %if.end.i.i.us.us.4 - %add.i.i.us.us.4.1 = add nsw i32 %mul.i.i.us.4, %conv.i.i.us.us.4.1 - %idxprom.i.i.us.us.4.1 = sext i32 %add.i.i.us.us.4.1 to i64 - %arrayidx.i.i.us.us.4.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.4.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.4.1 = shl i64 %add1.i.i.i.us.us.4.1, 32 - %176 = ashr exact i64 %sext.i.i.us.us.4.1, 32 - br label %for.body.i.i.us.us.4.1 - -for.body.i.i.us.us.4.1: ; preds = %for.body.i.i.us.us.4.1, %if.then.i.i.us.us.4.1 - %indvars.iv.next.i.i3.us.us.4.1 = phi i64 [ %indvars.iv.next.i.i.us.us.4.1, %for.body.i.i.us.us.4.1 ], [ 0, %if.then.i.i.us.us.4.1 ] - %177 = phi float [ %183, %for.body.i.i.us.us.4.1 ], [ 0.000000e+00, %if.then.i.i.us.us.4.1 ] - %178 = add nsw i64 %indvars.iv.next.i.i3.us.us.4.1, %106 - %arrayidx11.i.i.us.us.4.1 = getelementptr inbounds float, float* %8, i64 %178 - %179 = load float, float* %arrayidx11.i.i.us.us.4.1, align 4, !tbaa !12 - %180 = mul nsw i64 %indvars.iv.next.i.i3.us.us.4.1, %29 - %181 = add nsw i64 %180, %176 - %arrayidx15.i.i.us.us.4.1 = getelementptr inbounds float, float* %12, i64 %181 - %182 = load float, float* %arrayidx15.i.i.us.us.4.1, align 4, !tbaa !12 - %183 = tail call float @llvm.fmuladd.f32(float %179, float %182, float %177) #2 - store float %183, float* %arrayidx.i.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.4.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.4.1, 1 - %exitcond.not.i.i.us.us.4.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.4.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.4.1, label %if.end.i.i.us.us.4.1.loopexit, label %for.body.i.i.us.us.4.1, !llvm.loop !19 - -if.end.i.i.us.us.4.1.loopexit: ; preds = %for.body.i.i.us.us.4.1 - br label %if.end.i.i.us.us.4.1 - -if.end.i.i.us.us.4.1: ; preds = %if.end.i.i.us.us.4.1.loopexit, %if.end.i.i.us.us.4 - %184 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %184, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.i.us.4.loopexit, label %pregion_for_entry.entry.i.i.us.us.4, !llvm.loop !23 - -if.then.i.i.us.us.3.1: ; preds = %if.end.i.i.us.us.3 - %add.i.i.us.us.3.1 = add nsw i32 %mul.i.i.us.3, %conv.i.i.us.us.3.1 - %idxprom.i.i.us.us.3.1 = sext i32 %add.i.i.us.us.3.1 to i64 - %arrayidx.i.i.us.us.3.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.3.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.3.1 = shl i64 %add1.i.i.i.us.us.3.1, 32 - %185 = ashr exact i64 %sext.i.i.us.us.3.1, 32 - br label %for.body.i.i.us.us.3.1 - -for.body.i.i.us.us.3.1: ; preds = %for.body.i.i.us.us.3.1, %if.then.i.i.us.us.3.1 - %indvars.iv.next.i.i3.us.us.3.1 = phi i64 [ %indvars.iv.next.i.i.us.us.3.1, %for.body.i.i.us.us.3.1 ], [ 0, %if.then.i.i.us.us.3.1 ] - %186 = phi float [ %192, %for.body.i.i.us.us.3.1 ], [ 0.000000e+00, %if.then.i.i.us.us.3.1 ] - %187 = add nsw i64 %indvars.iv.next.i.i3.us.us.3.1, %95 - %arrayidx11.i.i.us.us.3.1 = getelementptr inbounds float, float* %8, i64 %187 - %188 = load float, float* %arrayidx11.i.i.us.us.3.1, align 4, !tbaa !12 - %189 = mul nsw i64 %indvars.iv.next.i.i3.us.us.3.1, %29 - %190 = add nsw i64 %189, %185 - %arrayidx15.i.i.us.us.3.1 = getelementptr inbounds float, float* %12, i64 %190 - %191 = load float, float* %arrayidx15.i.i.us.us.3.1, align 4, !tbaa !12 - %192 = tail call float @llvm.fmuladd.f32(float %188, float %191, float %186) #2 - store float %192, float* %arrayidx.i.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.3.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.3.1, 1 - %exitcond.not.i.i.us.us.3.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.3.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.3.1, label %if.end.i.i.us.us.3.1.loopexit, label %for.body.i.i.us.us.3.1, !llvm.loop !19 - -if.end.i.i.us.us.3.1.loopexit: ; preds = %for.body.i.i.us.us.3.1 - br label %if.end.i.i.us.us.3.1 - -if.end.i.i.us.us.3.1: ; preds = %if.end.i.i.us.us.3.1.loopexit, %if.end.i.i.us.us.3 - %193 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %193, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.i.us.3.loopexit, label %pregion_for_entry.entry.i.i.us.us.3, !llvm.loop !23 - -if.then.i.i.us.us.2.1: ; preds = %if.end.i.i.us.us.2 - %add.i.i.us.us.2.1 = add nsw i32 %mul.i.i.us.2, %conv.i.i.us.us.2.1 - %idxprom.i.i.us.us.2.1 = sext i32 %add.i.i.us.us.2.1 to i64 - %arrayidx.i.i.us.us.2.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.2.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.2.1 = shl i64 %add1.i.i.i.us.us.2.1, 32 - %194 = ashr exact i64 %sext.i.i.us.us.2.1, 32 - br label %for.body.i.i.us.us.2.1 - -for.body.i.i.us.us.2.1: ; preds = %for.body.i.i.us.us.2.1, %if.then.i.i.us.us.2.1 - %indvars.iv.next.i.i3.us.us.2.1 = phi i64 [ %indvars.iv.next.i.i.us.us.2.1, %for.body.i.i.us.us.2.1 ], [ 0, %if.then.i.i.us.us.2.1 ] - %195 = phi float [ %201, %for.body.i.i.us.us.2.1 ], [ 0.000000e+00, %if.then.i.i.us.us.2.1 ] - %196 = add nsw i64 %indvars.iv.next.i.i3.us.us.2.1, %84 - %arrayidx11.i.i.us.us.2.1 = getelementptr inbounds float, float* %8, i64 %196 - %197 = load float, float* %arrayidx11.i.i.us.us.2.1, align 4, !tbaa !12 - %198 = mul nsw i64 %indvars.iv.next.i.i3.us.us.2.1, %29 - %199 = add nsw i64 %198, %194 - %arrayidx15.i.i.us.us.2.1 = getelementptr inbounds float, float* %12, i64 %199 - %200 = load float, float* %arrayidx15.i.i.us.us.2.1, align 4, !tbaa !12 - %201 = tail call float @llvm.fmuladd.f32(float %197, float %200, float %195) #2 - store float %201, float* %arrayidx.i.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.2.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.2.1, 1 - %exitcond.not.i.i.us.us.2.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.2.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.2.1, label %if.end.i.i.us.us.2.1.loopexit, label %for.body.i.i.us.us.2.1, !llvm.loop !19 - -if.end.i.i.us.us.2.1.loopexit: ; preds = %for.body.i.i.us.us.2.1 - br label %if.end.i.i.us.us.2.1 - -if.end.i.i.us.us.2.1: ; preds = %if.end.i.i.us.us.2.1.loopexit, %if.end.i.i.us.us.2 - %202 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %202, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.i.us.2.loopexit, label %pregion_for_entry.entry.i.i.us.us.2, !llvm.loop !23 - -if.then.i.i.us.us.1.1: ; preds = %if.end.i.i.us.us.1 - %add.i.i.us.us.1.1 = add nsw i32 %mul.i.i.us.1, %conv.i.i.us.us.1.1 - %idxprom.i.i.us.us.1.1 = sext i32 %add.i.i.us.us.1.1 to i64 - %arrayidx.i.i.us.us.1.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.1.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.1.1 = shl i64 %add1.i.i.i.us.us.1.1, 32 - %203 = ashr exact i64 %sext.i.i.us.us.1.1, 32 - br label %for.body.i.i.us.us.1.1 - -for.body.i.i.us.us.1.1: ; preds = %for.body.i.i.us.us.1.1, %if.then.i.i.us.us.1.1 - %indvars.iv.next.i.i3.us.us.1.1 = phi i64 [ %indvars.iv.next.i.i.us.us.1.1, %for.body.i.i.us.us.1.1 ], [ 0, %if.then.i.i.us.us.1.1 ] - %204 = phi float [ %210, %for.body.i.i.us.us.1.1 ], [ 0.000000e+00, %if.then.i.i.us.us.1.1 ] - %205 = add nsw i64 %indvars.iv.next.i.i3.us.us.1.1, %63 - %arrayidx11.i.i.us.us.1.1 = getelementptr inbounds float, float* %8, i64 %205 - %206 = load float, float* %arrayidx11.i.i.us.us.1.1, align 4, !tbaa !12 - %207 = mul nsw i64 %indvars.iv.next.i.i3.us.us.1.1, %29 - %208 = add nsw i64 %207, %203 - %arrayidx15.i.i.us.us.1.1 = getelementptr inbounds float, float* %12, i64 %208 - %209 = load float, float* %arrayidx15.i.i.us.us.1.1, align 4, !tbaa !12 - %210 = tail call float @llvm.fmuladd.f32(float %206, float %209, float %204) #2 - store float %210, float* %arrayidx.i.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.1.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.1.1, 1 - %exitcond.not.i.i.us.us.1.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.1.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.1.1, label %if.end.i.i.us.us.1.1.loopexit, label %for.body.i.i.us.us.1.1, !llvm.loop !19 - -if.end.i.i.us.us.1.1.loopexit: ; preds = %for.body.i.i.us.us.1.1 - br label %if.end.i.i.us.us.1.1 - -if.end.i.i.us.us.1.1: ; preds = %if.end.i.i.us.us.1.1.loopexit, %if.end.i.i.us.us.1 - %211 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %211, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.i.us.1.loopexit, label %pregion_for_entry.entry.i.i.us.us.1, !llvm.loop !23 - -if.then.i.i.us.us.146: ; preds = %if.end.i.i.us.us - %add.i.i.us.us.142 = add nsw i32 %mul.i.i.us, %conv.i.i.us.us.139 - %idxprom.i.i.us.us.143 = sext i32 %add.i.i.us.us.142 to i64 - %arrayidx.i.i.us.us.144 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.143 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.144, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.145 = shl i64 %add1.i.i.i.us.us.138, 32 - %212 = ashr exact i64 %sext.i.i.us.us.145, 32 - br label %for.body.i.i.us.us.152 - -for.body.i.i.us.us.152: ; preds = %for.body.i.i.us.us.152, %if.then.i.i.us.us.146 - %indvars.iv.next.i.i3.us.us.147 = phi i64 [ %indvars.iv.next.i.i.us.us.150, %for.body.i.i.us.us.152 ], [ 0, %if.then.i.i.us.us.146 ] - %213 = phi float [ %219, %for.body.i.i.us.us.152 ], [ 0.000000e+00, %if.then.i.i.us.us.146 ] - %214 = add nsw i64 %indvars.iv.next.i.i3.us.us.147, %61 - %arrayidx11.i.i.us.us.148 = getelementptr inbounds float, float* %8, i64 %214 - %215 = load float, float* %arrayidx11.i.i.us.us.148, align 4, !tbaa !12 - %216 = mul nsw i64 %indvars.iv.next.i.i3.us.us.147, %29 - %217 = add nsw i64 %216, %212 - %arrayidx15.i.i.us.us.149 = getelementptr inbounds float, float* %12, i64 %217 - %218 = load float, float* %arrayidx15.i.i.us.us.149, align 4, !tbaa !12 - %219 = tail call float @llvm.fmuladd.f32(float %215, float %218, float %213) #2 - store float %219, float* %arrayidx.i.i.us.us.144, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.150 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.147, 1 - %exitcond.not.i.i.us.us.151 = icmp eq i64 %indvars.iv.next.i.i.us.us.150, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.151, label %if.end.i.i.us.us.153.loopexit, label %for.body.i.i.us.us.152, !llvm.loop !19 - -if.end.i.i.us.us.153.loopexit: ; preds = %for.body.i.i.us.us.152 - br label %if.end.i.i.us.us.153 - -if.end.i.i.us.us.153: ; preds = %if.end.i.i.us.us.153.loopexit, %if.end.i.i.us.us - %220 = add nuw nsw i64 %_local_id_x.i.0.us.us, 2 - %exitcond.not.1 = icmp eq i64 %220, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.us.loopexit, label %pregion_for_entry.entry.i.i.us.us, !llvm.loop !23 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_mm3_kernel3_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to float** - %13 = load float*, float** %12, align 8 - %14 = getelementptr i8*, i8** %0, i64 3 - %15 = bitcast i8** %14 to i32** - %16 = load i32*, i32** %15, align 8 - %17 = load i32, i32* %16, align 4 - %18 = getelementptr i8*, i8** %0, i64 4 - %19 = bitcast i8** %18 to i32** - %20 = load i32*, i32** %19, align 8 - %21 = load i32, i32* %20, align 4 - %22 = getelementptr i8*, i8** %0, i64 5 - %23 = bitcast i8** %22 to i32** - %24 = load i32*, i32** %23, align 8 - %25 = load i32, i32* %24, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %cmp638.i.i = icmp sgt i32 %25, 0 - %26 = sext i32 %21 to i64 - %wide.trip.count.i.i = zext i32 %25 to i64 - br i1 %cmp638.i.i, label %pregion_for_entry.pregion_for_init.i.i.us.preheader, label %pregion_for_entry.pregion_for_init.i.i.preheader - -pregion_for_entry.pregion_for_init.i.i.preheader: ; preds = %5 - %conv.i.i.us = trunc i64 %mul.i.i.i to i32 - %cmp4.i.i.us = icmp sgt i32 %21, %conv.i.i.us - %27 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.1 = or i32 %27, 1 - %cmp4.i.i.us.1 = icmp sgt i32 %21, %conv.i.i.us.1 - %28 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.2 = or i32 %28, 2 - %cmp4.i.i.us.2 = icmp sgt i32 %21, %conv.i.i.us.2 - %29 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.3 = or i32 %29, 3 - %cmp4.i.i.us.3 = icmp sgt i32 %21, %conv.i.i.us.3 - %30 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.4 = or i32 %30, 4 - %cmp4.i.i.us.4 = icmp sgt i32 %21, %conv.i.i.us.4 - %31 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.5 = or i32 %31, 5 - %cmp4.i.i.us.5 = icmp sgt i32 %21, %conv.i.i.us.5 - %32 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.6 = or i32 %32, 6 - %cmp4.i.i.us.6 = icmp sgt i32 %21, %conv.i.i.us.6 - %33 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.7 = or i32 %33, 7 - %cmp4.i.i.us.7 = icmp sgt i32 %21, %conv.i.i.us.7 - %34 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.8 = or i32 %34, 8 - %cmp4.i.i.us.8 = icmp sgt i32 %21, %conv.i.i.us.8 - %35 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.9 = or i32 %35, 9 - %cmp4.i.i.us.9 = icmp sgt i32 %21, %conv.i.i.us.9 - %36 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.10 = or i32 %36, 10 - %cmp4.i.i.us.10 = icmp sgt i32 %21, %conv.i.i.us.10 - %37 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.11 = or i32 %37, 11 - %cmp4.i.i.us.11 = icmp sgt i32 %21, %conv.i.i.us.11 - %38 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.12 = or i32 %38, 12 - %cmp4.i.i.us.12 = icmp sgt i32 %21, %conv.i.i.us.12 - %39 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.13 = or i32 %39, 13 - %cmp4.i.i.us.13 = icmp sgt i32 %21, %conv.i.i.us.13 - %40 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.14 = or i32 %40, 14 - %cmp4.i.i.us.14 = icmp sgt i32 %21, %conv.i.i.us.14 - %41 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.15 = or i32 %41, 15 - %cmp4.i.i.us.15 = icmp sgt i32 %21, %conv.i.i.us.15 - %42 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.16 = or i32 %42, 16 - %cmp4.i.i.us.16 = icmp sgt i32 %21, %conv.i.i.us.16 - %43 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.17 = or i32 %43, 17 - %cmp4.i.i.us.17 = icmp sgt i32 %21, %conv.i.i.us.17 - %44 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.18 = or i32 %44, 18 - %cmp4.i.i.us.18 = icmp sgt i32 %21, %conv.i.i.us.18 - %45 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.19 = or i32 %45, 19 - %cmp4.i.i.us.19 = icmp sgt i32 %21, %conv.i.i.us.19 - %46 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.20 = or i32 %46, 20 - %cmp4.i.i.us.20 = icmp sgt i32 %21, %conv.i.i.us.20 - %47 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.21 = or i32 %47, 21 - %cmp4.i.i.us.21 = icmp sgt i32 %21, %conv.i.i.us.21 - %48 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.22 = or i32 %48, 22 - %cmp4.i.i.us.22 = icmp sgt i32 %21, %conv.i.i.us.22 - %49 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.23 = or i32 %49, 23 - %cmp4.i.i.us.23 = icmp sgt i32 %21, %conv.i.i.us.23 - %50 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.24 = or i32 %50, 24 - %cmp4.i.i.us.24 = icmp sgt i32 %21, %conv.i.i.us.24 - %51 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.25 = or i32 %51, 25 - %cmp4.i.i.us.25 = icmp sgt i32 %21, %conv.i.i.us.25 - %52 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.26 = or i32 %52, 26 - %cmp4.i.i.us.26 = icmp sgt i32 %21, %conv.i.i.us.26 - %53 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.27 = or i32 %53, 27 - %cmp4.i.i.us.27 = icmp sgt i32 %21, %conv.i.i.us.27 - %54 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.28 = or i32 %54, 28 - %cmp4.i.i.us.28 = icmp sgt i32 %21, %conv.i.i.us.28 - %55 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.29 = or i32 %55, 29 - %cmp4.i.i.us.29 = icmp sgt i32 %21, %conv.i.i.us.29 - %56 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.30 = or i32 %56, 30 - %cmp4.i.i.us.30 = icmp sgt i32 %21, %conv.i.i.us.30 - %57 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.31 = or i32 %57, 31 - %cmp4.i.i.us.31 = icmp sgt i32 %21, %conv.i.i.us.31 - br label %pregion_for_entry.pregion_for_init.i.i - -pregion_for_entry.pregion_for_init.i.i.us.preheader: ; preds = %5 - %conv2.i.i.us = trunc i64 %mul3.i.i.i to i32 - %cmp.i.i.us = icmp sgt i32 %17, %conv2.i.i.us - %mul.i.i.us = mul nsw i32 %21, %conv2.i.i.us - %mul8.i.i.us = mul nsw i32 %25, %conv2.i.i.us - %58 = sext i32 %mul8.i.i.us to i64 - br i1 %cmp.i.i.us, label %pregion_for_entry.entry.i.i.us.us.preheader, label %pregion_for_end.i.i.us - -pregion_for_entry.entry.i.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i.us.preheader - br label %pregion_for_entry.entry.i.i.us.us - -pregion_for_end.i.i.us.loopexit: ; preds = %if.end.i.i.us.us.153 - br label %pregion_for_end.i.i.us - -pregion_for_end.i.i.us: ; preds = %pregion_for_end.i.i.us.loopexit, %pregion_for_entry.pregion_for_init.i.i.us.preheader - %59 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.1 = or i32 %59, 1 - %cmp.i.i.us.1 = icmp sgt i32 %17, %conv2.i.i.us.1 - %mul.i.i.us.1 = mul nsw i32 %21, %conv2.i.i.us.1 - %mul8.i.i.us.1 = mul nsw i32 %25, %conv2.i.i.us.1 - %60 = sext i32 %mul8.i.i.us.1 to i64 - br i1 %cmp.i.i.us.1, label %pregion_for_entry.entry.i.i.us.us.1.preheader, label %pregion_for_end.i.i.us.1 - -pregion_for_entry.entry.i.i.us.us.1.preheader: ; preds = %pregion_for_end.i.i.us - br label %pregion_for_entry.entry.i.i.us.us.1 - -pregion_for_entry.entry.i.i.us.us: ; preds = %if.end.i.i.us.us.153, %pregion_for_entry.entry.i.i.us.us.preheader - %_local_id_x.i.0.us.us = phi i64 [ %217, %if.end.i.i.us.us.153 ], [ 0, %pregion_for_entry.entry.i.i.us.us.preheader ] - %add1.i.i.i.us.us = add nuw nsw i64 %_local_id_x.i.0.us.us, %mul.i.i.i - %conv.i.i.us.us = trunc i64 %add1.i.i.i.us.us to i32 - %cmp4.i.i.us.us = icmp sgt i32 %21, %conv.i.i.us.us - br i1 %cmp4.i.i.us.us, label %if.then.i.i.us.us, label %if.end.i.i.us.us - -if.then.i.i.us.us: ; preds = %pregion_for_entry.entry.i.i.us.us - %add.i.i.us.us = add nsw i32 %mul.i.i.us, %conv.i.i.us.us - %idxprom.i.i.us.us = sext i32 %add.i.i.us.us to i64 - %arrayidx.i.i.us.us = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us - store float 0.000000e+00, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us = shl i64 %add1.i.i.i.us.us, 32 - %61 = ashr exact i64 %sext.i.i.us.us, 32 - br label %for.body.i.i.us.us - -if.end.i.i.us.us.loopexit: ; preds = %for.body.i.i.us.us - br label %if.end.i.i.us.us - -if.end.i.i.us.us: ; preds = %if.end.i.i.us.us.loopexit, %pregion_for_entry.entry.i.i.us.us - %62 = or i64 %_local_id_x.i.0.us.us, 1 - %add1.i.i.i.us.us.138 = add nuw nsw i64 %62, %mul.i.i.i - %conv.i.i.us.us.139 = trunc i64 %add1.i.i.i.us.us.138 to i32 - %cmp4.i.i.us.us.140 = icmp sgt i32 %21, %conv.i.i.us.us.139 - br i1 %cmp4.i.i.us.us.140, label %if.then.i.i.us.us.146, label %if.end.i.i.us.us.153 - -for.body.i.i.us.us: ; preds = %for.body.i.i.us.us, %if.then.i.i.us.us - %indvars.iv.next.i.i3.us.us = phi i64 [ %indvars.iv.next.i.i.us.us, %for.body.i.i.us.us ], [ 0, %if.then.i.i.us.us ] - %63 = phi float [ %69, %for.body.i.i.us.us ], [ 0.000000e+00, %if.then.i.i.us.us ] - %64 = add nsw i64 %indvars.iv.next.i.i3.us.us, %58 - %arrayidx11.i.i.us.us = getelementptr inbounds float, float* %7, i64 %64 - %65 = load float, float* %arrayidx11.i.i.us.us, align 4, !tbaa !12 - %66 = mul nsw i64 %indvars.iv.next.i.i3.us.us, %26 - %67 = add nsw i64 %66, %61 - %arrayidx15.i.i.us.us = getelementptr inbounds float, float* %10, i64 %67 - %68 = load float, float* %arrayidx15.i.i.us.us, align 4, !tbaa !12 - %69 = tail call float @llvm.fmuladd.f32(float %65, float %68, float %63) #2 - store float %69, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us = add nuw nsw i64 %indvars.iv.next.i.i3.us.us, 1 - %exitcond.not.i.i.us.us = icmp eq i64 %indvars.iv.next.i.i.us.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us, label %if.end.i.i.us.us.loopexit, label %for.body.i.i.us.us, !llvm.loop !19 - -pregion_for_entry.pregion_for_init.i.i: ; preds = %pregion_for_end.i.i, %pregion_for_entry.pregion_for_init.i.i.preheader - %_local_id_y.i.0 = phi i64 [ %70, %pregion_for_end.i.i ], [ 0, %pregion_for_entry.pregion_for_init.i.i.preheader ] - %add6.i.i.i = add nuw nsw i64 %_local_id_y.i.0, %mul3.i.i.i - %conv2.i.i = trunc i64 %add6.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %17, %conv2.i.i - %mul.i.i = mul nsw i32 %21, %conv2.i.i - br i1 %cmp.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i - br i1 %cmp4.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us.preheader - %add.i.i.us = add nsw i32 %mul.i.i, %conv.i.i.us - %idxprom.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us - store float 0.000000e+00, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - br i1 %cmp4.i.i.us.1, label %if.then.i.i.us.1, label %if.end.i.i.us.1 - -pregion_for_end.i.i: ; preds = %if.then.i.i.us.31, %if.end.i.i.us.30, %pregion_for_entry.pregion_for_init.i.i - %70 = add nuw nsw i64 %_local_id_y.i.0, 1 - %exitcond33.not = icmp eq i64 %70, 8 - br i1 %exitcond33.not, label %_pocl_kernel_mm3_kernel3.exit.loopexit54, label %pregion_for_entry.pregion_for_init.i.i, !llvm.loop !21 - -_pocl_kernel_mm3_kernel3.exit.loopexit: ; preds = %if.end.i.i.us.us.7.1 - br label %_pocl_kernel_mm3_kernel3.exit - -_pocl_kernel_mm3_kernel3.exit.loopexit54: ; preds = %pregion_for_end.i.i - br label %_pocl_kernel_mm3_kernel3.exit - -_pocl_kernel_mm3_kernel3.exit: ; preds = %pregion_for_end.i.i.us.6, %_pocl_kernel_mm3_kernel3.exit.loopexit54, %_pocl_kernel_mm3_kernel3.exit.loopexit - ret void - -pregion_for_entry.entry.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.1, %pregion_for_entry.entry.i.i.us.us.1.preheader - %_local_id_x.i.0.us.us.1 = phi i64 [ %208, %if.end.i.i.us.us.1.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.1.preheader ] - %add1.i.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, %mul.i.i.i - %conv.i.i.us.us.1 = trunc i64 %add1.i.i.i.us.us.1 to i32 - %cmp4.i.i.us.us.1 = icmp sgt i32 %21, %conv.i.i.us.us.1 - br i1 %cmp4.i.i.us.us.1, label %if.then.i.i.us.us.1, label %if.end.i.i.us.us.1 - -if.then.i.i.us.us.1: ; preds = %pregion_for_entry.entry.i.i.us.us.1 - %add.i.i.us.us.1 = add nsw i32 %mul.i.i.us.1, %conv.i.i.us.us.1 - %idxprom.i.i.us.us.1 = sext i32 %add.i.i.us.us.1 to i64 - %arrayidx.i.i.us.us.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.1 = shl i64 %add1.i.i.i.us.us.1, 32 - %71 = ashr exact i64 %sext.i.i.us.us.1, 32 - br label %for.body.i.i.us.us.1 - -for.body.i.i.us.us.1: ; preds = %for.body.i.i.us.us.1, %if.then.i.i.us.us.1 - %indvars.iv.next.i.i3.us.us.1 = phi i64 [ %indvars.iv.next.i.i.us.us.1, %for.body.i.i.us.us.1 ], [ 0, %if.then.i.i.us.us.1 ] - %72 = phi float [ %78, %for.body.i.i.us.us.1 ], [ 0.000000e+00, %if.then.i.i.us.us.1 ] - %73 = add nsw i64 %indvars.iv.next.i.i3.us.us.1, %60 - %arrayidx11.i.i.us.us.1 = getelementptr inbounds float, float* %7, i64 %73 - %74 = load float, float* %arrayidx11.i.i.us.us.1, align 4, !tbaa !12 - %75 = mul nsw i64 %indvars.iv.next.i.i3.us.us.1, %26 - %76 = add nsw i64 %75, %71 - %arrayidx15.i.i.us.us.1 = getelementptr inbounds float, float* %10, i64 %76 - %77 = load float, float* %arrayidx15.i.i.us.us.1, align 4, !tbaa !12 - %78 = tail call float @llvm.fmuladd.f32(float %74, float %77, float %72) #2 - store float %78, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.1, 1 - %exitcond.not.i.i.us.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.1, label %if.end.i.i.us.us.1.loopexit, label %for.body.i.i.us.us.1, !llvm.loop !19 - -if.end.i.i.us.us.1.loopexit: ; preds = %for.body.i.i.us.us.1 - br label %if.end.i.i.us.us.1 - -if.end.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.loopexit, %pregion_for_entry.entry.i.i.us.us.1 - %79 = or i64 %_local_id_x.i.0.us.us.1, 1 - %add1.i.i.i.us.us.1.1 = add nuw nsw i64 %79, %mul.i.i.i - %conv.i.i.us.us.1.1 = trunc i64 %add1.i.i.i.us.us.1.1 to i32 - %cmp4.i.i.us.us.1.1 = icmp sgt i32 %21, %conv.i.i.us.us.1.1 - br i1 %cmp4.i.i.us.us.1.1, label %if.then.i.i.us.us.1.1, label %if.end.i.i.us.us.1.1 - -pregion_for_end.i.i.us.1.loopexit: ; preds = %if.end.i.i.us.us.1.1 - br label %pregion_for_end.i.i.us.1 - -pregion_for_end.i.i.us.1: ; preds = %pregion_for_end.i.i.us.1.loopexit, %pregion_for_end.i.i.us - %80 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.2 = or i32 %80, 2 - %cmp.i.i.us.2 = icmp sgt i32 %17, %conv2.i.i.us.2 - %mul.i.i.us.2 = mul nsw i32 %21, %conv2.i.i.us.2 - %mul8.i.i.us.2 = mul nsw i32 %25, %conv2.i.i.us.2 - %81 = sext i32 %mul8.i.i.us.2 to i64 - br i1 %cmp.i.i.us.2, label %pregion_for_entry.entry.i.i.us.us.2.preheader, label %pregion_for_end.i.i.us.2 - -pregion_for_entry.entry.i.i.us.us.2.preheader: ; preds = %pregion_for_end.i.i.us.1 - br label %pregion_for_entry.entry.i.i.us.us.2 - -pregion_for_entry.entry.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.1, %pregion_for_entry.entry.i.i.us.us.2.preheader - %_local_id_x.i.0.us.us.2 = phi i64 [ %199, %if.end.i.i.us.us.2.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.2.preheader ] - %add1.i.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, %mul.i.i.i - %conv.i.i.us.us.2 = trunc i64 %add1.i.i.i.us.us.2 to i32 - %cmp4.i.i.us.us.2 = icmp sgt i32 %21, %conv.i.i.us.us.2 - br i1 %cmp4.i.i.us.us.2, label %if.then.i.i.us.us.2, label %if.end.i.i.us.us.2 - -if.then.i.i.us.us.2: ; preds = %pregion_for_entry.entry.i.i.us.us.2 - %add.i.i.us.us.2 = add nsw i32 %mul.i.i.us.2, %conv.i.i.us.us.2 - %idxprom.i.i.us.us.2 = sext i32 %add.i.i.us.us.2 to i64 - %arrayidx.i.i.us.us.2 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.2 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.2 = shl i64 %add1.i.i.i.us.us.2, 32 - %82 = ashr exact i64 %sext.i.i.us.us.2, 32 - br label %for.body.i.i.us.us.2 - -for.body.i.i.us.us.2: ; preds = %for.body.i.i.us.us.2, %if.then.i.i.us.us.2 - %indvars.iv.next.i.i3.us.us.2 = phi i64 [ %indvars.iv.next.i.i.us.us.2, %for.body.i.i.us.us.2 ], [ 0, %if.then.i.i.us.us.2 ] - %83 = phi float [ %89, %for.body.i.i.us.us.2 ], [ 0.000000e+00, %if.then.i.i.us.us.2 ] - %84 = add nsw i64 %indvars.iv.next.i.i3.us.us.2, %81 - %arrayidx11.i.i.us.us.2 = getelementptr inbounds float, float* %7, i64 %84 - %85 = load float, float* %arrayidx11.i.i.us.us.2, align 4, !tbaa !12 - %86 = mul nsw i64 %indvars.iv.next.i.i3.us.us.2, %26 - %87 = add nsw i64 %86, %82 - %arrayidx15.i.i.us.us.2 = getelementptr inbounds float, float* %10, i64 %87 - %88 = load float, float* %arrayidx15.i.i.us.us.2, align 4, !tbaa !12 - %89 = tail call float @llvm.fmuladd.f32(float %85, float %88, float %83) #2 - store float %89, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.2 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.2, 1 - %exitcond.not.i.i.us.us.2 = icmp eq i64 %indvars.iv.next.i.i.us.us.2, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.2, label %if.end.i.i.us.us.2.loopexit, label %for.body.i.i.us.us.2, !llvm.loop !19 - -if.end.i.i.us.us.2.loopexit: ; preds = %for.body.i.i.us.us.2 - br label %if.end.i.i.us.us.2 - -if.end.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.loopexit, %pregion_for_entry.entry.i.i.us.us.2 - %90 = or i64 %_local_id_x.i.0.us.us.2, 1 - %add1.i.i.i.us.us.2.1 = add nuw nsw i64 %90, %mul.i.i.i - %conv.i.i.us.us.2.1 = trunc i64 %add1.i.i.i.us.us.2.1 to i32 - %cmp4.i.i.us.us.2.1 = icmp sgt i32 %21, %conv.i.i.us.us.2.1 - br i1 %cmp4.i.i.us.us.2.1, label %if.then.i.i.us.us.2.1, label %if.end.i.i.us.us.2.1 - -pregion_for_end.i.i.us.2.loopexit: ; preds = %if.end.i.i.us.us.2.1 - br label %pregion_for_end.i.i.us.2 - -pregion_for_end.i.i.us.2: ; preds = %pregion_for_end.i.i.us.2.loopexit, %pregion_for_end.i.i.us.1 - %91 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.3 = or i32 %91, 3 - %cmp.i.i.us.3 = icmp sgt i32 %17, %conv2.i.i.us.3 - %mul.i.i.us.3 = mul nsw i32 %21, %conv2.i.i.us.3 - %mul8.i.i.us.3 = mul nsw i32 %25, %conv2.i.i.us.3 - %92 = sext i32 %mul8.i.i.us.3 to i64 - br i1 %cmp.i.i.us.3, label %pregion_for_entry.entry.i.i.us.us.3.preheader, label %pregion_for_end.i.i.us.3 - -pregion_for_entry.entry.i.i.us.us.3.preheader: ; preds = %pregion_for_end.i.i.us.2 - br label %pregion_for_entry.entry.i.i.us.us.3 - -pregion_for_entry.entry.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.1, %pregion_for_entry.entry.i.i.us.us.3.preheader - %_local_id_x.i.0.us.us.3 = phi i64 [ %190, %if.end.i.i.us.us.3.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.3.preheader ] - %add1.i.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, %mul.i.i.i - %conv.i.i.us.us.3 = trunc i64 %add1.i.i.i.us.us.3 to i32 - %cmp4.i.i.us.us.3 = icmp sgt i32 %21, %conv.i.i.us.us.3 - br i1 %cmp4.i.i.us.us.3, label %if.then.i.i.us.us.3, label %if.end.i.i.us.us.3 - -if.then.i.i.us.us.3: ; preds = %pregion_for_entry.entry.i.i.us.us.3 - %add.i.i.us.us.3 = add nsw i32 %mul.i.i.us.3, %conv.i.i.us.us.3 - %idxprom.i.i.us.us.3 = sext i32 %add.i.i.us.us.3 to i64 - %arrayidx.i.i.us.us.3 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.3 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.3 = shl i64 %add1.i.i.i.us.us.3, 32 - %93 = ashr exact i64 %sext.i.i.us.us.3, 32 - br label %for.body.i.i.us.us.3 - -for.body.i.i.us.us.3: ; preds = %for.body.i.i.us.us.3, %if.then.i.i.us.us.3 - %indvars.iv.next.i.i3.us.us.3 = phi i64 [ %indvars.iv.next.i.i.us.us.3, %for.body.i.i.us.us.3 ], [ 0, %if.then.i.i.us.us.3 ] - %94 = phi float [ %100, %for.body.i.i.us.us.3 ], [ 0.000000e+00, %if.then.i.i.us.us.3 ] - %95 = add nsw i64 %indvars.iv.next.i.i3.us.us.3, %92 - %arrayidx11.i.i.us.us.3 = getelementptr inbounds float, float* %7, i64 %95 - %96 = load float, float* %arrayidx11.i.i.us.us.3, align 4, !tbaa !12 - %97 = mul nsw i64 %indvars.iv.next.i.i3.us.us.3, %26 - %98 = add nsw i64 %97, %93 - %arrayidx15.i.i.us.us.3 = getelementptr inbounds float, float* %10, i64 %98 - %99 = load float, float* %arrayidx15.i.i.us.us.3, align 4, !tbaa !12 - %100 = tail call float @llvm.fmuladd.f32(float %96, float %99, float %94) #2 - store float %100, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.3 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.3, 1 - %exitcond.not.i.i.us.us.3 = icmp eq i64 %indvars.iv.next.i.i.us.us.3, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.3, label %if.end.i.i.us.us.3.loopexit, label %for.body.i.i.us.us.3, !llvm.loop !19 - -if.end.i.i.us.us.3.loopexit: ; preds = %for.body.i.i.us.us.3 - br label %if.end.i.i.us.us.3 - -if.end.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.loopexit, %pregion_for_entry.entry.i.i.us.us.3 - %101 = or i64 %_local_id_x.i.0.us.us.3, 1 - %add1.i.i.i.us.us.3.1 = add nuw nsw i64 %101, %mul.i.i.i - %conv.i.i.us.us.3.1 = trunc i64 %add1.i.i.i.us.us.3.1 to i32 - %cmp4.i.i.us.us.3.1 = icmp sgt i32 %21, %conv.i.i.us.us.3.1 - br i1 %cmp4.i.i.us.us.3.1, label %if.then.i.i.us.us.3.1, label %if.end.i.i.us.us.3.1 - -pregion_for_end.i.i.us.3.loopexit: ; preds = %if.end.i.i.us.us.3.1 - br label %pregion_for_end.i.i.us.3 - -pregion_for_end.i.i.us.3: ; preds = %pregion_for_end.i.i.us.3.loopexit, %pregion_for_end.i.i.us.2 - %102 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.4 = or i32 %102, 4 - %cmp.i.i.us.4 = icmp sgt i32 %17, %conv2.i.i.us.4 - %mul.i.i.us.4 = mul nsw i32 %21, %conv2.i.i.us.4 - %mul8.i.i.us.4 = mul nsw i32 %25, %conv2.i.i.us.4 - %103 = sext i32 %mul8.i.i.us.4 to i64 - br i1 %cmp.i.i.us.4, label %pregion_for_entry.entry.i.i.us.us.4.preheader, label %pregion_for_end.i.i.us.4 - -pregion_for_entry.entry.i.i.us.us.4.preheader: ; preds = %pregion_for_end.i.i.us.3 - br label %pregion_for_entry.entry.i.i.us.us.4 - -pregion_for_entry.entry.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.1, %pregion_for_entry.entry.i.i.us.us.4.preheader - %_local_id_x.i.0.us.us.4 = phi i64 [ %181, %if.end.i.i.us.us.4.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.4.preheader ] - %add1.i.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, %mul.i.i.i - %conv.i.i.us.us.4 = trunc i64 %add1.i.i.i.us.us.4 to i32 - %cmp4.i.i.us.us.4 = icmp sgt i32 %21, %conv.i.i.us.us.4 - br i1 %cmp4.i.i.us.us.4, label %if.then.i.i.us.us.4, label %if.end.i.i.us.us.4 - -if.then.i.i.us.us.4: ; preds = %pregion_for_entry.entry.i.i.us.us.4 - %add.i.i.us.us.4 = add nsw i32 %mul.i.i.us.4, %conv.i.i.us.us.4 - %idxprom.i.i.us.us.4 = sext i32 %add.i.i.us.us.4 to i64 - %arrayidx.i.i.us.us.4 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.4 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.4 = shl i64 %add1.i.i.i.us.us.4, 32 - %104 = ashr exact i64 %sext.i.i.us.us.4, 32 - br label %for.body.i.i.us.us.4 - -for.body.i.i.us.us.4: ; preds = %for.body.i.i.us.us.4, %if.then.i.i.us.us.4 - %indvars.iv.next.i.i3.us.us.4 = phi i64 [ %indvars.iv.next.i.i.us.us.4, %for.body.i.i.us.us.4 ], [ 0, %if.then.i.i.us.us.4 ] - %105 = phi float [ %111, %for.body.i.i.us.us.4 ], [ 0.000000e+00, %if.then.i.i.us.us.4 ] - %106 = add nsw i64 %indvars.iv.next.i.i3.us.us.4, %103 - %arrayidx11.i.i.us.us.4 = getelementptr inbounds float, float* %7, i64 %106 - %107 = load float, float* %arrayidx11.i.i.us.us.4, align 4, !tbaa !12 - %108 = mul nsw i64 %indvars.iv.next.i.i3.us.us.4, %26 - %109 = add nsw i64 %108, %104 - %arrayidx15.i.i.us.us.4 = getelementptr inbounds float, float* %10, i64 %109 - %110 = load float, float* %arrayidx15.i.i.us.us.4, align 4, !tbaa !12 - %111 = tail call float @llvm.fmuladd.f32(float %107, float %110, float %105) #2 - store float %111, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.4 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.4, 1 - %exitcond.not.i.i.us.us.4 = icmp eq i64 %indvars.iv.next.i.i.us.us.4, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.4, label %if.end.i.i.us.us.4.loopexit, label %for.body.i.i.us.us.4, !llvm.loop !19 - -if.end.i.i.us.us.4.loopexit: ; preds = %for.body.i.i.us.us.4 - br label %if.end.i.i.us.us.4 - -if.end.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.loopexit, %pregion_for_entry.entry.i.i.us.us.4 - %112 = or i64 %_local_id_x.i.0.us.us.4, 1 - %add1.i.i.i.us.us.4.1 = add nuw nsw i64 %112, %mul.i.i.i - %conv.i.i.us.us.4.1 = trunc i64 %add1.i.i.i.us.us.4.1 to i32 - %cmp4.i.i.us.us.4.1 = icmp sgt i32 %21, %conv.i.i.us.us.4.1 - br i1 %cmp4.i.i.us.us.4.1, label %if.then.i.i.us.us.4.1, label %if.end.i.i.us.us.4.1 - -pregion_for_end.i.i.us.4.loopexit: ; preds = %if.end.i.i.us.us.4.1 - br label %pregion_for_end.i.i.us.4 - -pregion_for_end.i.i.us.4: ; preds = %pregion_for_end.i.i.us.4.loopexit, %pregion_for_end.i.i.us.3 - %113 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.5 = or i32 %113, 5 - %cmp.i.i.us.5 = icmp sgt i32 %17, %conv2.i.i.us.5 - %mul.i.i.us.5 = mul nsw i32 %21, %conv2.i.i.us.5 - %mul8.i.i.us.5 = mul nsw i32 %25, %conv2.i.i.us.5 - %114 = sext i32 %mul8.i.i.us.5 to i64 - br i1 %cmp.i.i.us.5, label %pregion_for_entry.entry.i.i.us.us.5.preheader, label %pregion_for_end.i.i.us.5 - -pregion_for_entry.entry.i.i.us.us.5.preheader: ; preds = %pregion_for_end.i.i.us.4 - br label %pregion_for_entry.entry.i.i.us.us.5 - -pregion_for_entry.entry.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.1, %pregion_for_entry.entry.i.i.us.us.5.preheader - %_local_id_x.i.0.us.us.5 = phi i64 [ %172, %if.end.i.i.us.us.5.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.5.preheader ] - %add1.i.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, %mul.i.i.i - %conv.i.i.us.us.5 = trunc i64 %add1.i.i.i.us.us.5 to i32 - %cmp4.i.i.us.us.5 = icmp sgt i32 %21, %conv.i.i.us.us.5 - br i1 %cmp4.i.i.us.us.5, label %if.then.i.i.us.us.5, label %if.end.i.i.us.us.5 - -if.then.i.i.us.us.5: ; preds = %pregion_for_entry.entry.i.i.us.us.5 - %add.i.i.us.us.5 = add nsw i32 %mul.i.i.us.5, %conv.i.i.us.us.5 - %idxprom.i.i.us.us.5 = sext i32 %add.i.i.us.us.5 to i64 - %arrayidx.i.i.us.us.5 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.5 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.5 = shl i64 %add1.i.i.i.us.us.5, 32 - %115 = ashr exact i64 %sext.i.i.us.us.5, 32 - br label %for.body.i.i.us.us.5 - -for.body.i.i.us.us.5: ; preds = %for.body.i.i.us.us.5, %if.then.i.i.us.us.5 - %indvars.iv.next.i.i3.us.us.5 = phi i64 [ %indvars.iv.next.i.i.us.us.5, %for.body.i.i.us.us.5 ], [ 0, %if.then.i.i.us.us.5 ] - %116 = phi float [ %122, %for.body.i.i.us.us.5 ], [ 0.000000e+00, %if.then.i.i.us.us.5 ] - %117 = add nsw i64 %indvars.iv.next.i.i3.us.us.5, %114 - %arrayidx11.i.i.us.us.5 = getelementptr inbounds float, float* %7, i64 %117 - %118 = load float, float* %arrayidx11.i.i.us.us.5, align 4, !tbaa !12 - %119 = mul nsw i64 %indvars.iv.next.i.i3.us.us.5, %26 - %120 = add nsw i64 %119, %115 - %arrayidx15.i.i.us.us.5 = getelementptr inbounds float, float* %10, i64 %120 - %121 = load float, float* %arrayidx15.i.i.us.us.5, align 4, !tbaa !12 - %122 = tail call float @llvm.fmuladd.f32(float %118, float %121, float %116) #2 - store float %122, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.5 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.5, 1 - %exitcond.not.i.i.us.us.5 = icmp eq i64 %indvars.iv.next.i.i.us.us.5, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.5, label %if.end.i.i.us.us.5.loopexit, label %for.body.i.i.us.us.5, !llvm.loop !19 - -if.end.i.i.us.us.5.loopexit: ; preds = %for.body.i.i.us.us.5 - br label %if.end.i.i.us.us.5 - -if.end.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.loopexit, %pregion_for_entry.entry.i.i.us.us.5 - %123 = or i64 %_local_id_x.i.0.us.us.5, 1 - %add1.i.i.i.us.us.5.1 = add nuw nsw i64 %123, %mul.i.i.i - %conv.i.i.us.us.5.1 = trunc i64 %add1.i.i.i.us.us.5.1 to i32 - %cmp4.i.i.us.us.5.1 = icmp sgt i32 %21, %conv.i.i.us.us.5.1 - br i1 %cmp4.i.i.us.us.5.1, label %if.then.i.i.us.us.5.1, label %if.end.i.i.us.us.5.1 - -pregion_for_end.i.i.us.5.loopexit: ; preds = %if.end.i.i.us.us.5.1 - br label %pregion_for_end.i.i.us.5 - -pregion_for_end.i.i.us.5: ; preds = %pregion_for_end.i.i.us.5.loopexit, %pregion_for_end.i.i.us.4 - %124 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.6 = or i32 %124, 6 - %cmp.i.i.us.6 = icmp sgt i32 %17, %conv2.i.i.us.6 - %mul.i.i.us.6 = mul nsw i32 %21, %conv2.i.i.us.6 - %mul8.i.i.us.6 = mul nsw i32 %25, %conv2.i.i.us.6 - %125 = sext i32 %mul8.i.i.us.6 to i64 - br i1 %cmp.i.i.us.6, label %pregion_for_entry.entry.i.i.us.us.6.preheader, label %pregion_for_end.i.i.us.6 - -pregion_for_entry.entry.i.i.us.us.6.preheader: ; preds = %pregion_for_end.i.i.us.5 - br label %pregion_for_entry.entry.i.i.us.us.6 - -pregion_for_entry.entry.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.1, %pregion_for_entry.entry.i.i.us.us.6.preheader - %_local_id_x.i.0.us.us.6 = phi i64 [ %163, %if.end.i.i.us.us.6.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.6.preheader ] - %add1.i.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, %mul.i.i.i - %conv.i.i.us.us.6 = trunc i64 %add1.i.i.i.us.us.6 to i32 - %cmp4.i.i.us.us.6 = icmp sgt i32 %21, %conv.i.i.us.us.6 - br i1 %cmp4.i.i.us.us.6, label %if.then.i.i.us.us.6, label %if.end.i.i.us.us.6 - -if.then.i.i.us.us.6: ; preds = %pregion_for_entry.entry.i.i.us.us.6 - %add.i.i.us.us.6 = add nsw i32 %mul.i.i.us.6, %conv.i.i.us.us.6 - %idxprom.i.i.us.us.6 = sext i32 %add.i.i.us.us.6 to i64 - %arrayidx.i.i.us.us.6 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.6 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.6 = shl i64 %add1.i.i.i.us.us.6, 32 - %126 = ashr exact i64 %sext.i.i.us.us.6, 32 - br label %for.body.i.i.us.us.6 - -for.body.i.i.us.us.6: ; preds = %for.body.i.i.us.us.6, %if.then.i.i.us.us.6 - %indvars.iv.next.i.i3.us.us.6 = phi i64 [ %indvars.iv.next.i.i.us.us.6, %for.body.i.i.us.us.6 ], [ 0, %if.then.i.i.us.us.6 ] - %127 = phi float [ %133, %for.body.i.i.us.us.6 ], [ 0.000000e+00, %if.then.i.i.us.us.6 ] - %128 = add nsw i64 %indvars.iv.next.i.i3.us.us.6, %125 - %arrayidx11.i.i.us.us.6 = getelementptr inbounds float, float* %7, i64 %128 - %129 = load float, float* %arrayidx11.i.i.us.us.6, align 4, !tbaa !12 - %130 = mul nsw i64 %indvars.iv.next.i.i3.us.us.6, %26 - %131 = add nsw i64 %130, %126 - %arrayidx15.i.i.us.us.6 = getelementptr inbounds float, float* %10, i64 %131 - %132 = load float, float* %arrayidx15.i.i.us.us.6, align 4, !tbaa !12 - %133 = tail call float @llvm.fmuladd.f32(float %129, float %132, float %127) #2 - store float %133, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.6 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.6, 1 - %exitcond.not.i.i.us.us.6 = icmp eq i64 %indvars.iv.next.i.i.us.us.6, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.6, label %if.end.i.i.us.us.6.loopexit, label %for.body.i.i.us.us.6, !llvm.loop !19 - -if.end.i.i.us.us.6.loopexit: ; preds = %for.body.i.i.us.us.6 - br label %if.end.i.i.us.us.6 - -if.end.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.loopexit, %pregion_for_entry.entry.i.i.us.us.6 - %134 = or i64 %_local_id_x.i.0.us.us.6, 1 - %add1.i.i.i.us.us.6.1 = add nuw nsw i64 %134, %mul.i.i.i - %conv.i.i.us.us.6.1 = trunc i64 %add1.i.i.i.us.us.6.1 to i32 - %cmp4.i.i.us.us.6.1 = icmp sgt i32 %21, %conv.i.i.us.us.6.1 - br i1 %cmp4.i.i.us.us.6.1, label %if.then.i.i.us.us.6.1, label %if.end.i.i.us.us.6.1 - -pregion_for_end.i.i.us.6.loopexit: ; preds = %if.end.i.i.us.us.6.1 - br label %pregion_for_end.i.i.us.6 - -pregion_for_end.i.i.us.6: ; preds = %pregion_for_end.i.i.us.6.loopexit, %pregion_for_end.i.i.us.5 - %135 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.7 = or i32 %135, 7 - %cmp.i.i.us.7 = icmp sgt i32 %17, %conv2.i.i.us.7 - %mul.i.i.us.7 = mul nsw i32 %21, %conv2.i.i.us.7 - %mul8.i.i.us.7 = mul nsw i32 %25, %conv2.i.i.us.7 - %136 = sext i32 %mul8.i.i.us.7 to i64 - br i1 %cmp.i.i.us.7, label %pregion_for_entry.entry.i.i.us.us.7.preheader, label %_pocl_kernel_mm3_kernel3.exit - -pregion_for_entry.entry.i.i.us.us.7.preheader: ; preds = %pregion_for_end.i.i.us.6 - br label %pregion_for_entry.entry.i.i.us.us.7 - -pregion_for_entry.entry.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.1, %pregion_for_entry.entry.i.i.us.us.7.preheader - %_local_id_x.i.0.us.us.7 = phi i64 [ %154, %if.end.i.i.us.us.7.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.7.preheader ] - %add1.i.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, %mul.i.i.i - %conv.i.i.us.us.7 = trunc i64 %add1.i.i.i.us.us.7 to i32 - %cmp4.i.i.us.us.7 = icmp sgt i32 %21, %conv.i.i.us.us.7 - br i1 %cmp4.i.i.us.us.7, label %if.then.i.i.us.us.7, label %if.end.i.i.us.us.7 - -if.then.i.i.us.us.7: ; preds = %pregion_for_entry.entry.i.i.us.us.7 - %add.i.i.us.us.7 = add nsw i32 %mul.i.i.us.7, %conv.i.i.us.us.7 - %idxprom.i.i.us.us.7 = sext i32 %add.i.i.us.us.7 to i64 - %arrayidx.i.i.us.us.7 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.7 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.7 = shl i64 %add1.i.i.i.us.us.7, 32 - %137 = ashr exact i64 %sext.i.i.us.us.7, 32 - br label %for.body.i.i.us.us.7 - -for.body.i.i.us.us.7: ; preds = %for.body.i.i.us.us.7, %if.then.i.i.us.us.7 - %indvars.iv.next.i.i3.us.us.7 = phi i64 [ %indvars.iv.next.i.i.us.us.7, %for.body.i.i.us.us.7 ], [ 0, %if.then.i.i.us.us.7 ] - %138 = phi float [ %144, %for.body.i.i.us.us.7 ], [ 0.000000e+00, %if.then.i.i.us.us.7 ] - %139 = add nsw i64 %indvars.iv.next.i.i3.us.us.7, %136 - %arrayidx11.i.i.us.us.7 = getelementptr inbounds float, float* %7, i64 %139 - %140 = load float, float* %arrayidx11.i.i.us.us.7, align 4, !tbaa !12 - %141 = mul nsw i64 %indvars.iv.next.i.i3.us.us.7, %26 - %142 = add nsw i64 %141, %137 - %arrayidx15.i.i.us.us.7 = getelementptr inbounds float, float* %10, i64 %142 - %143 = load float, float* %arrayidx15.i.i.us.us.7, align 4, !tbaa !12 - %144 = tail call float @llvm.fmuladd.f32(float %140, float %143, float %138) #2 - store float %144, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.7 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.7, 1 - %exitcond.not.i.i.us.us.7 = icmp eq i64 %indvars.iv.next.i.i.us.us.7, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.7, label %if.end.i.i.us.us.7.loopexit, label %for.body.i.i.us.us.7, !llvm.loop !19 - -if.end.i.i.us.us.7.loopexit: ; preds = %for.body.i.i.us.us.7 - br label %if.end.i.i.us.us.7 - -if.end.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.loopexit, %pregion_for_entry.entry.i.i.us.us.7 - %145 = or i64 %_local_id_x.i.0.us.us.7, 1 - %add1.i.i.i.us.us.7.1 = add nuw nsw i64 %145, %mul.i.i.i - %conv.i.i.us.us.7.1 = trunc i64 %add1.i.i.i.us.us.7.1 to i32 - %cmp4.i.i.us.us.7.1 = icmp sgt i32 %21, %conv.i.i.us.us.7.1 - br i1 %cmp4.i.i.us.us.7.1, label %if.then.i.i.us.us.7.1, label %if.end.i.i.us.us.7.1 - -if.then.i.i.us.1: ; preds = %if.end.i.i.us - %add.i.i.us.1 = add nsw i32 %mul.i.i, %conv.i.i.us.1 - %idxprom.i.i.us.1 = sext i32 %add.i.i.us.1 to i64 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1 - -if.end.i.i.us.1: ; preds = %if.then.i.i.us.1, %if.end.i.i.us - br i1 %cmp4.i.i.us.2, label %if.then.i.i.us.2, label %if.end.i.i.us.2 - -if.then.i.i.us.2: ; preds = %if.end.i.i.us.1 - %add.i.i.us.2 = add nsw i32 %mul.i.i, %conv.i.i.us.2 - %idxprom.i.i.us.2 = sext i32 %add.i.i.us.2 to i64 - %arrayidx.i.i.us.2 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.2 - store float 0.000000e+00, float* %arrayidx.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2 - -if.end.i.i.us.2: ; preds = %if.then.i.i.us.2, %if.end.i.i.us.1 - br i1 %cmp4.i.i.us.3, label %if.then.i.i.us.3, label %if.end.i.i.us.3 - -if.then.i.i.us.3: ; preds = %if.end.i.i.us.2 - %add.i.i.us.3 = add nsw i32 %mul.i.i, %conv.i.i.us.3 - %idxprom.i.i.us.3 = sext i32 %add.i.i.us.3 to i64 - %arrayidx.i.i.us.3 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.3 - store float 0.000000e+00, float* %arrayidx.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3 - -if.end.i.i.us.3: ; preds = %if.then.i.i.us.3, %if.end.i.i.us.2 - br i1 %cmp4.i.i.us.4, label %if.then.i.i.us.4, label %if.end.i.i.us.4 - -if.then.i.i.us.4: ; preds = %if.end.i.i.us.3 - %add.i.i.us.4 = add nsw i32 %mul.i.i, %conv.i.i.us.4 - %idxprom.i.i.us.4 = sext i32 %add.i.i.us.4 to i64 - %arrayidx.i.i.us.4 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.4 - store float 0.000000e+00, float* %arrayidx.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4 - -if.end.i.i.us.4: ; preds = %if.then.i.i.us.4, %if.end.i.i.us.3 - br i1 %cmp4.i.i.us.5, label %if.then.i.i.us.5, label %if.end.i.i.us.5 - -if.then.i.i.us.5: ; preds = %if.end.i.i.us.4 - %add.i.i.us.5 = add nsw i32 %mul.i.i, %conv.i.i.us.5 - %idxprom.i.i.us.5 = sext i32 %add.i.i.us.5 to i64 - %arrayidx.i.i.us.5 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.5 - store float 0.000000e+00, float* %arrayidx.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5 - -if.end.i.i.us.5: ; preds = %if.then.i.i.us.5, %if.end.i.i.us.4 - br i1 %cmp4.i.i.us.6, label %if.then.i.i.us.6, label %if.end.i.i.us.6 - -if.then.i.i.us.6: ; preds = %if.end.i.i.us.5 - %add.i.i.us.6 = add nsw i32 %mul.i.i, %conv.i.i.us.6 - %idxprom.i.i.us.6 = sext i32 %add.i.i.us.6 to i64 - %arrayidx.i.i.us.6 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.6 - store float 0.000000e+00, float* %arrayidx.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6 - -if.end.i.i.us.6: ; preds = %if.then.i.i.us.6, %if.end.i.i.us.5 - br i1 %cmp4.i.i.us.7, label %if.then.i.i.us.7, label %if.end.i.i.us.7 - -if.then.i.i.us.7: ; preds = %if.end.i.i.us.6 - %add.i.i.us.7 = add nsw i32 %mul.i.i, %conv.i.i.us.7 - %idxprom.i.i.us.7 = sext i32 %add.i.i.us.7 to i64 - %arrayidx.i.i.us.7 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.7 - store float 0.000000e+00, float* %arrayidx.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7 - -if.end.i.i.us.7: ; preds = %if.then.i.i.us.7, %if.end.i.i.us.6 - br i1 %cmp4.i.i.us.8, label %if.then.i.i.us.8, label %if.end.i.i.us.8 - -if.then.i.i.us.8: ; preds = %if.end.i.i.us.7 - %add.i.i.us.8 = add nsw i32 %mul.i.i, %conv.i.i.us.8 - %idxprom.i.i.us.8 = sext i32 %add.i.i.us.8 to i64 - %arrayidx.i.i.us.8 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.8 - store float 0.000000e+00, float* %arrayidx.i.i.us.8, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.8 - -if.end.i.i.us.8: ; preds = %if.then.i.i.us.8, %if.end.i.i.us.7 - br i1 %cmp4.i.i.us.9, label %if.then.i.i.us.9, label %if.end.i.i.us.9 - -if.then.i.i.us.9: ; preds = %if.end.i.i.us.8 - %add.i.i.us.9 = add nsw i32 %mul.i.i, %conv.i.i.us.9 - %idxprom.i.i.us.9 = sext i32 %add.i.i.us.9 to i64 - %arrayidx.i.i.us.9 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.9 - store float 0.000000e+00, float* %arrayidx.i.i.us.9, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.9 - -if.end.i.i.us.9: ; preds = %if.then.i.i.us.9, %if.end.i.i.us.8 - br i1 %cmp4.i.i.us.10, label %if.then.i.i.us.10, label %if.end.i.i.us.10 - -if.then.i.i.us.10: ; preds = %if.end.i.i.us.9 - %add.i.i.us.10 = add nsw i32 %mul.i.i, %conv.i.i.us.10 - %idxprom.i.i.us.10 = sext i32 %add.i.i.us.10 to i64 - %arrayidx.i.i.us.10 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.10 - store float 0.000000e+00, float* %arrayidx.i.i.us.10, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.10 - -if.end.i.i.us.10: ; preds = %if.then.i.i.us.10, %if.end.i.i.us.9 - br i1 %cmp4.i.i.us.11, label %if.then.i.i.us.11, label %if.end.i.i.us.11 - -if.then.i.i.us.11: ; preds = %if.end.i.i.us.10 - %add.i.i.us.11 = add nsw i32 %mul.i.i, %conv.i.i.us.11 - %idxprom.i.i.us.11 = sext i32 %add.i.i.us.11 to i64 - %arrayidx.i.i.us.11 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.11 - store float 0.000000e+00, float* %arrayidx.i.i.us.11, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.11 - -if.end.i.i.us.11: ; preds = %if.then.i.i.us.11, %if.end.i.i.us.10 - br i1 %cmp4.i.i.us.12, label %if.then.i.i.us.12, label %if.end.i.i.us.12 - -if.then.i.i.us.12: ; preds = %if.end.i.i.us.11 - %add.i.i.us.12 = add nsw i32 %mul.i.i, %conv.i.i.us.12 - %idxprom.i.i.us.12 = sext i32 %add.i.i.us.12 to i64 - %arrayidx.i.i.us.12 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.12 - store float 0.000000e+00, float* %arrayidx.i.i.us.12, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.12 - -if.end.i.i.us.12: ; preds = %if.then.i.i.us.12, %if.end.i.i.us.11 - br i1 %cmp4.i.i.us.13, label %if.then.i.i.us.13, label %if.end.i.i.us.13 - -if.then.i.i.us.13: ; preds = %if.end.i.i.us.12 - %add.i.i.us.13 = add nsw i32 %mul.i.i, %conv.i.i.us.13 - %idxprom.i.i.us.13 = sext i32 %add.i.i.us.13 to i64 - %arrayidx.i.i.us.13 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.13 - store float 0.000000e+00, float* %arrayidx.i.i.us.13, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.13 - -if.end.i.i.us.13: ; preds = %if.then.i.i.us.13, %if.end.i.i.us.12 - br i1 %cmp4.i.i.us.14, label %if.then.i.i.us.14, label %if.end.i.i.us.14 - -if.then.i.i.us.14: ; preds = %if.end.i.i.us.13 - %add.i.i.us.14 = add nsw i32 %mul.i.i, %conv.i.i.us.14 - %idxprom.i.i.us.14 = sext i32 %add.i.i.us.14 to i64 - %arrayidx.i.i.us.14 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.14 - store float 0.000000e+00, float* %arrayidx.i.i.us.14, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.14 - -if.end.i.i.us.14: ; preds = %if.then.i.i.us.14, %if.end.i.i.us.13 - br i1 %cmp4.i.i.us.15, label %if.then.i.i.us.15, label %if.end.i.i.us.15 - -if.then.i.i.us.15: ; preds = %if.end.i.i.us.14 - %add.i.i.us.15 = add nsw i32 %mul.i.i, %conv.i.i.us.15 - %idxprom.i.i.us.15 = sext i32 %add.i.i.us.15 to i64 - %arrayidx.i.i.us.15 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.15 - store float 0.000000e+00, float* %arrayidx.i.i.us.15, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.15 - -if.end.i.i.us.15: ; preds = %if.then.i.i.us.15, %if.end.i.i.us.14 - br i1 %cmp4.i.i.us.16, label %if.then.i.i.us.16, label %if.end.i.i.us.16 - -if.then.i.i.us.16: ; preds = %if.end.i.i.us.15 - %add.i.i.us.16 = add nsw i32 %mul.i.i, %conv.i.i.us.16 - %idxprom.i.i.us.16 = sext i32 %add.i.i.us.16 to i64 - %arrayidx.i.i.us.16 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.16 - store float 0.000000e+00, float* %arrayidx.i.i.us.16, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.16 - -if.end.i.i.us.16: ; preds = %if.then.i.i.us.16, %if.end.i.i.us.15 - br i1 %cmp4.i.i.us.17, label %if.then.i.i.us.17, label %if.end.i.i.us.17 - -if.then.i.i.us.17: ; preds = %if.end.i.i.us.16 - %add.i.i.us.17 = add nsw i32 %mul.i.i, %conv.i.i.us.17 - %idxprom.i.i.us.17 = sext i32 %add.i.i.us.17 to i64 - %arrayidx.i.i.us.17 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.17 - store float 0.000000e+00, float* %arrayidx.i.i.us.17, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.17 - -if.end.i.i.us.17: ; preds = %if.then.i.i.us.17, %if.end.i.i.us.16 - br i1 %cmp4.i.i.us.18, label %if.then.i.i.us.18, label %if.end.i.i.us.18 - -if.then.i.i.us.18: ; preds = %if.end.i.i.us.17 - %add.i.i.us.18 = add nsw i32 %mul.i.i, %conv.i.i.us.18 - %idxprom.i.i.us.18 = sext i32 %add.i.i.us.18 to i64 - %arrayidx.i.i.us.18 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.18 - store float 0.000000e+00, float* %arrayidx.i.i.us.18, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.18 - -if.end.i.i.us.18: ; preds = %if.then.i.i.us.18, %if.end.i.i.us.17 - br i1 %cmp4.i.i.us.19, label %if.then.i.i.us.19, label %if.end.i.i.us.19 - -if.then.i.i.us.19: ; preds = %if.end.i.i.us.18 - %add.i.i.us.19 = add nsw i32 %mul.i.i, %conv.i.i.us.19 - %idxprom.i.i.us.19 = sext i32 %add.i.i.us.19 to i64 - %arrayidx.i.i.us.19 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.19 - store float 0.000000e+00, float* %arrayidx.i.i.us.19, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.19 - -if.end.i.i.us.19: ; preds = %if.then.i.i.us.19, %if.end.i.i.us.18 - br i1 %cmp4.i.i.us.20, label %if.then.i.i.us.20, label %if.end.i.i.us.20 - -if.then.i.i.us.20: ; preds = %if.end.i.i.us.19 - %add.i.i.us.20 = add nsw i32 %mul.i.i, %conv.i.i.us.20 - %idxprom.i.i.us.20 = sext i32 %add.i.i.us.20 to i64 - %arrayidx.i.i.us.20 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.20 - store float 0.000000e+00, float* %arrayidx.i.i.us.20, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.20 - -if.end.i.i.us.20: ; preds = %if.then.i.i.us.20, %if.end.i.i.us.19 - br i1 %cmp4.i.i.us.21, label %if.then.i.i.us.21, label %if.end.i.i.us.21 - -if.then.i.i.us.21: ; preds = %if.end.i.i.us.20 - %add.i.i.us.21 = add nsw i32 %mul.i.i, %conv.i.i.us.21 - %idxprom.i.i.us.21 = sext i32 %add.i.i.us.21 to i64 - %arrayidx.i.i.us.21 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.21 - store float 0.000000e+00, float* %arrayidx.i.i.us.21, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.21 - -if.end.i.i.us.21: ; preds = %if.then.i.i.us.21, %if.end.i.i.us.20 - br i1 %cmp4.i.i.us.22, label %if.then.i.i.us.22, label %if.end.i.i.us.22 - -if.then.i.i.us.22: ; preds = %if.end.i.i.us.21 - %add.i.i.us.22 = add nsw i32 %mul.i.i, %conv.i.i.us.22 - %idxprom.i.i.us.22 = sext i32 %add.i.i.us.22 to i64 - %arrayidx.i.i.us.22 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.22 - store float 0.000000e+00, float* %arrayidx.i.i.us.22, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.22 - -if.end.i.i.us.22: ; preds = %if.then.i.i.us.22, %if.end.i.i.us.21 - br i1 %cmp4.i.i.us.23, label %if.then.i.i.us.23, label %if.end.i.i.us.23 - -if.then.i.i.us.23: ; preds = %if.end.i.i.us.22 - %add.i.i.us.23 = add nsw i32 %mul.i.i, %conv.i.i.us.23 - %idxprom.i.i.us.23 = sext i32 %add.i.i.us.23 to i64 - %arrayidx.i.i.us.23 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.23 - store float 0.000000e+00, float* %arrayidx.i.i.us.23, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.23 - -if.end.i.i.us.23: ; preds = %if.then.i.i.us.23, %if.end.i.i.us.22 - br i1 %cmp4.i.i.us.24, label %if.then.i.i.us.24, label %if.end.i.i.us.24 - -if.then.i.i.us.24: ; preds = %if.end.i.i.us.23 - %add.i.i.us.24 = add nsw i32 %mul.i.i, %conv.i.i.us.24 - %idxprom.i.i.us.24 = sext i32 %add.i.i.us.24 to i64 - %arrayidx.i.i.us.24 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.24 - store float 0.000000e+00, float* %arrayidx.i.i.us.24, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.24 - -if.end.i.i.us.24: ; preds = %if.then.i.i.us.24, %if.end.i.i.us.23 - br i1 %cmp4.i.i.us.25, label %if.then.i.i.us.25, label %if.end.i.i.us.25 - -if.then.i.i.us.25: ; preds = %if.end.i.i.us.24 - %add.i.i.us.25 = add nsw i32 %mul.i.i, %conv.i.i.us.25 - %idxprom.i.i.us.25 = sext i32 %add.i.i.us.25 to i64 - %arrayidx.i.i.us.25 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.25 - store float 0.000000e+00, float* %arrayidx.i.i.us.25, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.25 - -if.end.i.i.us.25: ; preds = %if.then.i.i.us.25, %if.end.i.i.us.24 - br i1 %cmp4.i.i.us.26, label %if.then.i.i.us.26, label %if.end.i.i.us.26 - -if.then.i.i.us.26: ; preds = %if.end.i.i.us.25 - %add.i.i.us.26 = add nsw i32 %mul.i.i, %conv.i.i.us.26 - %idxprom.i.i.us.26 = sext i32 %add.i.i.us.26 to i64 - %arrayidx.i.i.us.26 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.26 - store float 0.000000e+00, float* %arrayidx.i.i.us.26, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.26 - -if.end.i.i.us.26: ; preds = %if.then.i.i.us.26, %if.end.i.i.us.25 - br i1 %cmp4.i.i.us.27, label %if.then.i.i.us.27, label %if.end.i.i.us.27 - -if.then.i.i.us.27: ; preds = %if.end.i.i.us.26 - %add.i.i.us.27 = add nsw i32 %mul.i.i, %conv.i.i.us.27 - %idxprom.i.i.us.27 = sext i32 %add.i.i.us.27 to i64 - %arrayidx.i.i.us.27 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.27 - store float 0.000000e+00, float* %arrayidx.i.i.us.27, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.27 - -if.end.i.i.us.27: ; preds = %if.then.i.i.us.27, %if.end.i.i.us.26 - br i1 %cmp4.i.i.us.28, label %if.then.i.i.us.28, label %if.end.i.i.us.28 - -if.then.i.i.us.28: ; preds = %if.end.i.i.us.27 - %add.i.i.us.28 = add nsw i32 %mul.i.i, %conv.i.i.us.28 - %idxprom.i.i.us.28 = sext i32 %add.i.i.us.28 to i64 - %arrayidx.i.i.us.28 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.28 - store float 0.000000e+00, float* %arrayidx.i.i.us.28, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.28 - -if.end.i.i.us.28: ; preds = %if.then.i.i.us.28, %if.end.i.i.us.27 - br i1 %cmp4.i.i.us.29, label %if.then.i.i.us.29, label %if.end.i.i.us.29 - -if.then.i.i.us.29: ; preds = %if.end.i.i.us.28 - %add.i.i.us.29 = add nsw i32 %mul.i.i, %conv.i.i.us.29 - %idxprom.i.i.us.29 = sext i32 %add.i.i.us.29 to i64 - %arrayidx.i.i.us.29 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.29 - store float 0.000000e+00, float* %arrayidx.i.i.us.29, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.29 - -if.end.i.i.us.29: ; preds = %if.then.i.i.us.29, %if.end.i.i.us.28 - br i1 %cmp4.i.i.us.30, label %if.then.i.i.us.30, label %if.end.i.i.us.30 - -if.then.i.i.us.30: ; preds = %if.end.i.i.us.29 - %add.i.i.us.30 = add nsw i32 %mul.i.i, %conv.i.i.us.30 - %idxprom.i.i.us.30 = sext i32 %add.i.i.us.30 to i64 - %arrayidx.i.i.us.30 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.30 - store float 0.000000e+00, float* %arrayidx.i.i.us.30, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.30 - -if.end.i.i.us.30: ; preds = %if.then.i.i.us.30, %if.end.i.i.us.29 - br i1 %cmp4.i.i.us.31, label %if.then.i.i.us.31, label %pregion_for_end.i.i - -if.then.i.i.us.31: ; preds = %if.end.i.i.us.30 - %add.i.i.us.31 = add nsw i32 %mul.i.i, %conv.i.i.us.31 - %idxprom.i.i.us.31 = sext i32 %add.i.i.us.31 to i64 - %arrayidx.i.i.us.31 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.31 - store float 0.000000e+00, float* %arrayidx.i.i.us.31, align 4, !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i - -if.then.i.i.us.us.7.1: ; preds = %if.end.i.i.us.us.7 - %add.i.i.us.us.7.1 = add nsw i32 %mul.i.i.us.7, %conv.i.i.us.us.7.1 - %idxprom.i.i.us.us.7.1 = sext i32 %add.i.i.us.us.7.1 to i64 - %arrayidx.i.i.us.us.7.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.7.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.7.1 = shl i64 %add1.i.i.i.us.us.7.1, 32 - %146 = ashr exact i64 %sext.i.i.us.us.7.1, 32 - br label %for.body.i.i.us.us.7.1 - -for.body.i.i.us.us.7.1: ; preds = %for.body.i.i.us.us.7.1, %if.then.i.i.us.us.7.1 - %indvars.iv.next.i.i3.us.us.7.1 = phi i64 [ %indvars.iv.next.i.i.us.us.7.1, %for.body.i.i.us.us.7.1 ], [ 0, %if.then.i.i.us.us.7.1 ] - %147 = phi float [ %153, %for.body.i.i.us.us.7.1 ], [ 0.000000e+00, %if.then.i.i.us.us.7.1 ] - %148 = add nsw i64 %indvars.iv.next.i.i3.us.us.7.1, %136 - %arrayidx11.i.i.us.us.7.1 = getelementptr inbounds float, float* %7, i64 %148 - %149 = load float, float* %arrayidx11.i.i.us.us.7.1, align 4, !tbaa !12 - %150 = mul nsw i64 %indvars.iv.next.i.i3.us.us.7.1, %26 - %151 = add nsw i64 %150, %146 - %arrayidx15.i.i.us.us.7.1 = getelementptr inbounds float, float* %10, i64 %151 - %152 = load float, float* %arrayidx15.i.i.us.us.7.1, align 4, !tbaa !12 - %153 = tail call float @llvm.fmuladd.f32(float %149, float %152, float %147) #2 - store float %153, float* %arrayidx.i.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.7.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.7.1, 1 - %exitcond.not.i.i.us.us.7.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.7.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.7.1, label %if.end.i.i.us.us.7.1.loopexit, label %for.body.i.i.us.us.7.1, !llvm.loop !19 - -if.end.i.i.us.us.7.1.loopexit: ; preds = %for.body.i.i.us.us.7.1 - br label %if.end.i.i.us.us.7.1 - -if.end.i.i.us.us.7.1: ; preds = %if.end.i.i.us.us.7.1.loopexit, %if.end.i.i.us.us.7 - %154 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %154, 32 - br i1 %exitcond.7.not.1, label %_pocl_kernel_mm3_kernel3.exit.loopexit, label %pregion_for_entry.entry.i.i.us.us.7, !llvm.loop !23 - -if.then.i.i.us.us.6.1: ; preds = %if.end.i.i.us.us.6 - %add.i.i.us.us.6.1 = add nsw i32 %mul.i.i.us.6, %conv.i.i.us.us.6.1 - %idxprom.i.i.us.us.6.1 = sext i32 %add.i.i.us.us.6.1 to i64 - %arrayidx.i.i.us.us.6.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.6.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.6.1 = shl i64 %add1.i.i.i.us.us.6.1, 32 - %155 = ashr exact i64 %sext.i.i.us.us.6.1, 32 - br label %for.body.i.i.us.us.6.1 - -for.body.i.i.us.us.6.1: ; preds = %for.body.i.i.us.us.6.1, %if.then.i.i.us.us.6.1 - %indvars.iv.next.i.i3.us.us.6.1 = phi i64 [ %indvars.iv.next.i.i.us.us.6.1, %for.body.i.i.us.us.6.1 ], [ 0, %if.then.i.i.us.us.6.1 ] - %156 = phi float [ %162, %for.body.i.i.us.us.6.1 ], [ 0.000000e+00, %if.then.i.i.us.us.6.1 ] - %157 = add nsw i64 %indvars.iv.next.i.i3.us.us.6.1, %125 - %arrayidx11.i.i.us.us.6.1 = getelementptr inbounds float, float* %7, i64 %157 - %158 = load float, float* %arrayidx11.i.i.us.us.6.1, align 4, !tbaa !12 - %159 = mul nsw i64 %indvars.iv.next.i.i3.us.us.6.1, %26 - %160 = add nsw i64 %159, %155 - %arrayidx15.i.i.us.us.6.1 = getelementptr inbounds float, float* %10, i64 %160 - %161 = load float, float* %arrayidx15.i.i.us.us.6.1, align 4, !tbaa !12 - %162 = tail call float @llvm.fmuladd.f32(float %158, float %161, float %156) #2 - store float %162, float* %arrayidx.i.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.6.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.6.1, 1 - %exitcond.not.i.i.us.us.6.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.6.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.6.1, label %if.end.i.i.us.us.6.1.loopexit, label %for.body.i.i.us.us.6.1, !llvm.loop !19 - -if.end.i.i.us.us.6.1.loopexit: ; preds = %for.body.i.i.us.us.6.1 - br label %if.end.i.i.us.us.6.1 - -if.end.i.i.us.us.6.1: ; preds = %if.end.i.i.us.us.6.1.loopexit, %if.end.i.i.us.us.6 - %163 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %163, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.i.us.6.loopexit, label %pregion_for_entry.entry.i.i.us.us.6, !llvm.loop !23 - -if.then.i.i.us.us.5.1: ; preds = %if.end.i.i.us.us.5 - %add.i.i.us.us.5.1 = add nsw i32 %mul.i.i.us.5, %conv.i.i.us.us.5.1 - %idxprom.i.i.us.us.5.1 = sext i32 %add.i.i.us.us.5.1 to i64 - %arrayidx.i.i.us.us.5.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.5.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.5.1 = shl i64 %add1.i.i.i.us.us.5.1, 32 - %164 = ashr exact i64 %sext.i.i.us.us.5.1, 32 - br label %for.body.i.i.us.us.5.1 - -for.body.i.i.us.us.5.1: ; preds = %for.body.i.i.us.us.5.1, %if.then.i.i.us.us.5.1 - %indvars.iv.next.i.i3.us.us.5.1 = phi i64 [ %indvars.iv.next.i.i.us.us.5.1, %for.body.i.i.us.us.5.1 ], [ 0, %if.then.i.i.us.us.5.1 ] - %165 = phi float [ %171, %for.body.i.i.us.us.5.1 ], [ 0.000000e+00, %if.then.i.i.us.us.5.1 ] - %166 = add nsw i64 %indvars.iv.next.i.i3.us.us.5.1, %114 - %arrayidx11.i.i.us.us.5.1 = getelementptr inbounds float, float* %7, i64 %166 - %167 = load float, float* %arrayidx11.i.i.us.us.5.1, align 4, !tbaa !12 - %168 = mul nsw i64 %indvars.iv.next.i.i3.us.us.5.1, %26 - %169 = add nsw i64 %168, %164 - %arrayidx15.i.i.us.us.5.1 = getelementptr inbounds float, float* %10, i64 %169 - %170 = load float, float* %arrayidx15.i.i.us.us.5.1, align 4, !tbaa !12 - %171 = tail call float @llvm.fmuladd.f32(float %167, float %170, float %165) #2 - store float %171, float* %arrayidx.i.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.5.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.5.1, 1 - %exitcond.not.i.i.us.us.5.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.5.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.5.1, label %if.end.i.i.us.us.5.1.loopexit, label %for.body.i.i.us.us.5.1, !llvm.loop !19 - -if.end.i.i.us.us.5.1.loopexit: ; preds = %for.body.i.i.us.us.5.1 - br label %if.end.i.i.us.us.5.1 - -if.end.i.i.us.us.5.1: ; preds = %if.end.i.i.us.us.5.1.loopexit, %if.end.i.i.us.us.5 - %172 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %172, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.i.us.5.loopexit, label %pregion_for_entry.entry.i.i.us.us.5, !llvm.loop !23 - -if.then.i.i.us.us.4.1: ; preds = %if.end.i.i.us.us.4 - %add.i.i.us.us.4.1 = add nsw i32 %mul.i.i.us.4, %conv.i.i.us.us.4.1 - %idxprom.i.i.us.us.4.1 = sext i32 %add.i.i.us.us.4.1 to i64 - %arrayidx.i.i.us.us.4.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.4.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.4.1 = shl i64 %add1.i.i.i.us.us.4.1, 32 - %173 = ashr exact i64 %sext.i.i.us.us.4.1, 32 - br label %for.body.i.i.us.us.4.1 - -for.body.i.i.us.us.4.1: ; preds = %for.body.i.i.us.us.4.1, %if.then.i.i.us.us.4.1 - %indvars.iv.next.i.i3.us.us.4.1 = phi i64 [ %indvars.iv.next.i.i.us.us.4.1, %for.body.i.i.us.us.4.1 ], [ 0, %if.then.i.i.us.us.4.1 ] - %174 = phi float [ %180, %for.body.i.i.us.us.4.1 ], [ 0.000000e+00, %if.then.i.i.us.us.4.1 ] - %175 = add nsw i64 %indvars.iv.next.i.i3.us.us.4.1, %103 - %arrayidx11.i.i.us.us.4.1 = getelementptr inbounds float, float* %7, i64 %175 - %176 = load float, float* %arrayidx11.i.i.us.us.4.1, align 4, !tbaa !12 - %177 = mul nsw i64 %indvars.iv.next.i.i3.us.us.4.1, %26 - %178 = add nsw i64 %177, %173 - %arrayidx15.i.i.us.us.4.1 = getelementptr inbounds float, float* %10, i64 %178 - %179 = load float, float* %arrayidx15.i.i.us.us.4.1, align 4, !tbaa !12 - %180 = tail call float @llvm.fmuladd.f32(float %176, float %179, float %174) #2 - store float %180, float* %arrayidx.i.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.4.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.4.1, 1 - %exitcond.not.i.i.us.us.4.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.4.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.4.1, label %if.end.i.i.us.us.4.1.loopexit, label %for.body.i.i.us.us.4.1, !llvm.loop !19 - -if.end.i.i.us.us.4.1.loopexit: ; preds = %for.body.i.i.us.us.4.1 - br label %if.end.i.i.us.us.4.1 - -if.end.i.i.us.us.4.1: ; preds = %if.end.i.i.us.us.4.1.loopexit, %if.end.i.i.us.us.4 - %181 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %181, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.i.us.4.loopexit, label %pregion_for_entry.entry.i.i.us.us.4, !llvm.loop !23 - -if.then.i.i.us.us.3.1: ; preds = %if.end.i.i.us.us.3 - %add.i.i.us.us.3.1 = add nsw i32 %mul.i.i.us.3, %conv.i.i.us.us.3.1 - %idxprom.i.i.us.us.3.1 = sext i32 %add.i.i.us.us.3.1 to i64 - %arrayidx.i.i.us.us.3.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.3.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.3.1 = shl i64 %add1.i.i.i.us.us.3.1, 32 - %182 = ashr exact i64 %sext.i.i.us.us.3.1, 32 - br label %for.body.i.i.us.us.3.1 - -for.body.i.i.us.us.3.1: ; preds = %for.body.i.i.us.us.3.1, %if.then.i.i.us.us.3.1 - %indvars.iv.next.i.i3.us.us.3.1 = phi i64 [ %indvars.iv.next.i.i.us.us.3.1, %for.body.i.i.us.us.3.1 ], [ 0, %if.then.i.i.us.us.3.1 ] - %183 = phi float [ %189, %for.body.i.i.us.us.3.1 ], [ 0.000000e+00, %if.then.i.i.us.us.3.1 ] - %184 = add nsw i64 %indvars.iv.next.i.i3.us.us.3.1, %92 - %arrayidx11.i.i.us.us.3.1 = getelementptr inbounds float, float* %7, i64 %184 - %185 = load float, float* %arrayidx11.i.i.us.us.3.1, align 4, !tbaa !12 - %186 = mul nsw i64 %indvars.iv.next.i.i3.us.us.3.1, %26 - %187 = add nsw i64 %186, %182 - %arrayidx15.i.i.us.us.3.1 = getelementptr inbounds float, float* %10, i64 %187 - %188 = load float, float* %arrayidx15.i.i.us.us.3.1, align 4, !tbaa !12 - %189 = tail call float @llvm.fmuladd.f32(float %185, float %188, float %183) #2 - store float %189, float* %arrayidx.i.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.3.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.3.1, 1 - %exitcond.not.i.i.us.us.3.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.3.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.3.1, label %if.end.i.i.us.us.3.1.loopexit, label %for.body.i.i.us.us.3.1, !llvm.loop !19 - -if.end.i.i.us.us.3.1.loopexit: ; preds = %for.body.i.i.us.us.3.1 - br label %if.end.i.i.us.us.3.1 - -if.end.i.i.us.us.3.1: ; preds = %if.end.i.i.us.us.3.1.loopexit, %if.end.i.i.us.us.3 - %190 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %190, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.i.us.3.loopexit, label %pregion_for_entry.entry.i.i.us.us.3, !llvm.loop !23 - -if.then.i.i.us.us.2.1: ; preds = %if.end.i.i.us.us.2 - %add.i.i.us.us.2.1 = add nsw i32 %mul.i.i.us.2, %conv.i.i.us.us.2.1 - %idxprom.i.i.us.us.2.1 = sext i32 %add.i.i.us.us.2.1 to i64 - %arrayidx.i.i.us.us.2.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.2.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.2.1 = shl i64 %add1.i.i.i.us.us.2.1, 32 - %191 = ashr exact i64 %sext.i.i.us.us.2.1, 32 - br label %for.body.i.i.us.us.2.1 - -for.body.i.i.us.us.2.1: ; preds = %for.body.i.i.us.us.2.1, %if.then.i.i.us.us.2.1 - %indvars.iv.next.i.i3.us.us.2.1 = phi i64 [ %indvars.iv.next.i.i.us.us.2.1, %for.body.i.i.us.us.2.1 ], [ 0, %if.then.i.i.us.us.2.1 ] - %192 = phi float [ %198, %for.body.i.i.us.us.2.1 ], [ 0.000000e+00, %if.then.i.i.us.us.2.1 ] - %193 = add nsw i64 %indvars.iv.next.i.i3.us.us.2.1, %81 - %arrayidx11.i.i.us.us.2.1 = getelementptr inbounds float, float* %7, i64 %193 - %194 = load float, float* %arrayidx11.i.i.us.us.2.1, align 4, !tbaa !12 - %195 = mul nsw i64 %indvars.iv.next.i.i3.us.us.2.1, %26 - %196 = add nsw i64 %195, %191 - %arrayidx15.i.i.us.us.2.1 = getelementptr inbounds float, float* %10, i64 %196 - %197 = load float, float* %arrayidx15.i.i.us.us.2.1, align 4, !tbaa !12 - %198 = tail call float @llvm.fmuladd.f32(float %194, float %197, float %192) #2 - store float %198, float* %arrayidx.i.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.2.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.2.1, 1 - %exitcond.not.i.i.us.us.2.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.2.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.2.1, label %if.end.i.i.us.us.2.1.loopexit, label %for.body.i.i.us.us.2.1, !llvm.loop !19 - -if.end.i.i.us.us.2.1.loopexit: ; preds = %for.body.i.i.us.us.2.1 - br label %if.end.i.i.us.us.2.1 - -if.end.i.i.us.us.2.1: ; preds = %if.end.i.i.us.us.2.1.loopexit, %if.end.i.i.us.us.2 - %199 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %199, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.i.us.2.loopexit, label %pregion_for_entry.entry.i.i.us.us.2, !llvm.loop !23 - -if.then.i.i.us.us.1.1: ; preds = %if.end.i.i.us.us.1 - %add.i.i.us.us.1.1 = add nsw i32 %mul.i.i.us.1, %conv.i.i.us.us.1.1 - %idxprom.i.i.us.us.1.1 = sext i32 %add.i.i.us.us.1.1 to i64 - %arrayidx.i.i.us.us.1.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.1.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.1.1 = shl i64 %add1.i.i.i.us.us.1.1, 32 - %200 = ashr exact i64 %sext.i.i.us.us.1.1, 32 - br label %for.body.i.i.us.us.1.1 - -for.body.i.i.us.us.1.1: ; preds = %for.body.i.i.us.us.1.1, %if.then.i.i.us.us.1.1 - %indvars.iv.next.i.i3.us.us.1.1 = phi i64 [ %indvars.iv.next.i.i.us.us.1.1, %for.body.i.i.us.us.1.1 ], [ 0, %if.then.i.i.us.us.1.1 ] - %201 = phi float [ %207, %for.body.i.i.us.us.1.1 ], [ 0.000000e+00, %if.then.i.i.us.us.1.1 ] - %202 = add nsw i64 %indvars.iv.next.i.i3.us.us.1.1, %60 - %arrayidx11.i.i.us.us.1.1 = getelementptr inbounds float, float* %7, i64 %202 - %203 = load float, float* %arrayidx11.i.i.us.us.1.1, align 4, !tbaa !12 - %204 = mul nsw i64 %indvars.iv.next.i.i3.us.us.1.1, %26 - %205 = add nsw i64 %204, %200 - %arrayidx15.i.i.us.us.1.1 = getelementptr inbounds float, float* %10, i64 %205 - %206 = load float, float* %arrayidx15.i.i.us.us.1.1, align 4, !tbaa !12 - %207 = tail call float @llvm.fmuladd.f32(float %203, float %206, float %201) #2 - store float %207, float* %arrayidx.i.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.1.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.1.1, 1 - %exitcond.not.i.i.us.us.1.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.1.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.1.1, label %if.end.i.i.us.us.1.1.loopexit, label %for.body.i.i.us.us.1.1, !llvm.loop !19 - -if.end.i.i.us.us.1.1.loopexit: ; preds = %for.body.i.i.us.us.1.1 - br label %if.end.i.i.us.us.1.1 - -if.end.i.i.us.us.1.1: ; preds = %if.end.i.i.us.us.1.1.loopexit, %if.end.i.i.us.us.1 - %208 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %208, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.i.us.1.loopexit, label %pregion_for_entry.entry.i.i.us.us.1, !llvm.loop !23 - -if.then.i.i.us.us.146: ; preds = %if.end.i.i.us.us - %add.i.i.us.us.142 = add nsw i32 %mul.i.i.us, %conv.i.i.us.us.139 - %idxprom.i.i.us.us.143 = sext i32 %add.i.i.us.us.142 to i64 - %arrayidx.i.i.us.us.144 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.143 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.144, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.145 = shl i64 %add1.i.i.i.us.us.138, 32 - %209 = ashr exact i64 %sext.i.i.us.us.145, 32 - br label %for.body.i.i.us.us.152 - -for.body.i.i.us.us.152: ; preds = %for.body.i.i.us.us.152, %if.then.i.i.us.us.146 - %indvars.iv.next.i.i3.us.us.147 = phi i64 [ %indvars.iv.next.i.i.us.us.150, %for.body.i.i.us.us.152 ], [ 0, %if.then.i.i.us.us.146 ] - %210 = phi float [ %216, %for.body.i.i.us.us.152 ], [ 0.000000e+00, %if.then.i.i.us.us.146 ] - %211 = add nsw i64 %indvars.iv.next.i.i3.us.us.147, %58 - %arrayidx11.i.i.us.us.148 = getelementptr inbounds float, float* %7, i64 %211 - %212 = load float, float* %arrayidx11.i.i.us.us.148, align 4, !tbaa !12 - %213 = mul nsw i64 %indvars.iv.next.i.i3.us.us.147, %26 - %214 = add nsw i64 %213, %209 - %arrayidx15.i.i.us.us.149 = getelementptr inbounds float, float* %10, i64 %214 - %215 = load float, float* %arrayidx15.i.i.us.us.149, align 4, !tbaa !12 - %216 = tail call float @llvm.fmuladd.f32(float %212, float %215, float %210) #2 - store float %216, float* %arrayidx.i.i.us.us.144, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.150 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.147, 1 - %exitcond.not.i.i.us.us.151 = icmp eq i64 %indvars.iv.next.i.i.us.us.150, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.151, label %if.end.i.i.us.us.153.loopexit, label %for.body.i.i.us.us.152, !llvm.loop !19 - -if.end.i.i.us.us.153.loopexit: ; preds = %for.body.i.i.us.us.152 - br label %if.end.i.i.us.us.153 - -if.end.i.i.us.us.153: ; preds = %if.end.i.i.us.us.153.loopexit, %if.end.i.i.us.us - %217 = add nuw nsw i64 %_local_id_x.i.0.us.us, 2 - %exitcond.not.1 = icmp eq i64 %217, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.us.loopexit, label %pregion_for_entry.entry.i.i.us.us, !llvm.loop !23 -} - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"int", !"int", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"int", !"int", !"int"} -!9 = !{!"", !"", !"", !"", !"", !""} -!10 = !{!"E", !"F", !"G", !"ni", !"nl", !"nj"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17, !18} -!17 = distinct !{} -!18 = distinct !{} -!19 = distinct !{!19, !20} -!20 = !{!"llvm.loop.unroll.disable"} -!21 = distinct !{!21, !22} -!22 = !{!"llvm.loop.parallel_accesses", !18} -!23 = distinct !{!23, !24} -!24 = !{!"llvm.loop.parallel_accesses", !17} diff --git a/pocl_irs/adi_kernel1.ll b/pocl_irs/adi_kernel1.ll deleted file mode 100644 index 1876915..0000000 --- a/pocl_irs/adi_kernel1.ll +++ /dev/null @@ -1,534 +0,0 @@ -; ModuleID = './BF/DFFLECFOLOBPKCKMNEPCKIANKJKKLLHBOGBCO/adi_kernel1/256-1-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_adi_kernel1(float* nocapture readonly %0, float* nocapture %1, float* nocapture %2, i32 %3, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %4, i64 %5, i64 %6, i64 %7) local_unnamed_addr #0 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %5, 8 - %cmp288.i = icmp sgt i32 %3, 1 - %wide.trip.count.i = zext i32 %3 to i64 - br i1 %cmp288.i, label %pregion_for_entry.entry.i.us.preheader, label %adi_kernel1.exit - -pregion_for_entry.entry.i.us.preheader: ; preds = %8 - %scevgep19 = getelementptr float, float* %2, i64 %wide.trip.count.i - %scevgep24 = getelementptr float, float* %1, i64 %wide.trip.count.i - %9 = trunc i64 %5 to i32 - %10 = mul i32 %9, %3 - %11 = shl i32 %10, 8 - %bound0 = icmp ugt float* %scevgep24, %2 - %bound1 = icmp ugt float* %scevgep19, %1 - %found.conflict = and i1 %bound0, %bound1 - br label %pregion_for_entry.entry.i.us - -pregion_for_entry.entry.i.us: ; preds = %if.end.i.us, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ %37, %if.end.i.us ], [ 0, %pregion_for_entry.entry.i.us.preheader ] - %12 = trunc i64 %_local_id_x.0.us to i32 - %13 = mul i32 %12, %3 - %14 = add i32 %13, %11 - %15 = sext i32 %14 to i64 - %scevgep27 = getelementptr float, float* %2, i64 %15 - %scevgep28 = getelementptr float, float* %1, i64 %15 - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp.i.us = icmp slt i32 %conv.i.us, %3 - br i1 %cmp.i.us, label %for.body.lver.check.i.us, label %if.end.i.us - -for.body.lver.check.i.us: ; preds = %pregion_for_entry.entry.i.us - %mul.i.us = mul nsw i32 %conv.i.us, %3 - %16 = sext i32 %mul.i.us to i64 - %scevgep.i.us = getelementptr float, float* %2, i64 %16 - %17 = add nsw i64 %16, %wide.trip.count.i - %scevgep94.i.us = getelementptr float, float* %2, i64 %17 - %scevgep96.i.us = getelementptr float, float* %1, i64 %16 - %scevgep98.i.us = getelementptr float, float* %1, i64 %17 - %bound0.i.us = icmp ult float* %scevgep.i.us, %scevgep98.i.us - %bound1.i.us = icmp ult float* %scevgep96.i.us, %scevgep94.i.us - %found.conflict.i.us = and i1 %bound0.i.us, %bound1.i.us - br i1 %found.conflict.i.us, label %for.body.lver.orig.lver.orig.i.us.lver.check, label %for.body.ph.i.us - -for.body.lver.orig.lver.orig.i.us.lver.check: ; preds = %for.body.lver.check.i.us - br i1 %found.conflict, label %for.body.lver.orig.lver.orig.i.us.lver.orig.preheader, label %for.body.lver.orig.lver.orig.i.us.ph - -for.body.lver.orig.lver.orig.i.us.lver.orig.preheader: ; preds = %for.body.lver.orig.lver.orig.i.us.lver.check - br label %for.body.lver.orig.lver.orig.i.us.lver.orig - -for.body.lver.orig.lver.orig.i.us.lver.orig: ; preds = %for.body.lver.orig.lver.orig.i.us.lver.orig, %for.body.lver.orig.lver.orig.i.us.lver.orig.preheader - %indvars.iv.next.lver.orig.lver.orig.i12.us.lver.orig = phi i64 [ %indvars.iv.next.lver.orig.lver.orig.i.us.lver.orig, %for.body.lver.orig.lver.orig.i.us.lver.orig ], [ 1, %for.body.lver.orig.lver.orig.i.us.lver.orig.preheader ] - %18 = add nsw i64 %indvars.iv.next.lver.orig.lver.orig.i12.us.lver.orig, %16 - %arrayidx.lver.orig.lver.orig.i.us.lver.orig = getelementptr inbounds float, float* %2, i64 %18 - %19 = load float, float* %arrayidx.lver.orig.lver.orig.i.us.lver.orig, align 4, !tbaa !12 - %20 = add nsw i64 %18, -1 - %arrayidx7.lver.orig.lver.orig.i.us.lver.orig = getelementptr inbounds float, float* %2, i64 %20 - %21 = load float, float* %arrayidx7.lver.orig.lver.orig.i.us.lver.orig, align 4, !tbaa !12 - %arrayidx11.lver.orig.lver.orig.i.us.lver.orig = getelementptr inbounds float, float* %0, i64 %18 - %22 = load float, float* %arrayidx11.lver.orig.lver.orig.i.us.lver.orig, align 4, !tbaa !12 - %mul12.lver.orig.lver.orig.i.us.lver.orig = fmul float %21, %22 - %arrayidx17.lver.orig.lver.orig.i.us.lver.orig = getelementptr inbounds float, float* %1, i64 %20 - %23 = load float, float* %arrayidx17.lver.orig.lver.orig.i.us.lver.orig, align 4, !tbaa !12 - %div.lver.orig.lver.orig.i.us.lver.orig = fdiv float %mul12.lver.orig.lver.orig.i.us.lver.orig, %23, !fpmath !16 - %sub18.lver.orig.lver.orig.i.us.lver.orig = fsub float %19, %div.lver.orig.lver.orig.i.us.lver.orig - store float %sub18.lver.orig.lver.orig.i.us.lver.orig, float* %arrayidx.lver.orig.lver.orig.i.us.lver.orig, align 4, !tbaa !12, !llvm.access.group !17 - %arrayidx26.lver.orig.lver.orig.i.us.lver.orig = getelementptr inbounds float, float* %1, i64 %18 - %24 = load float, float* %arrayidx26.lver.orig.lver.orig.i.us.lver.orig, align 4, !tbaa !12 - %25 = load float, float* %arrayidx11.lver.orig.lver.orig.i.us.lver.orig, align 4, !tbaa !12 - %mul35.lver.orig.lver.orig.i.us.lver.orig = fmul float %25, %25 - %26 = load float, float* %arrayidx17.lver.orig.lver.orig.i.us.lver.orig, align 4, !tbaa !12 - %div41.lver.orig.lver.orig.i.us.lver.orig = fdiv float %mul35.lver.orig.lver.orig.i.us.lver.orig, %26, !fpmath !16 - %sub42.lver.orig.lver.orig.i.us.lver.orig = fsub float %24, %div41.lver.orig.lver.orig.i.us.lver.orig - store float %sub42.lver.orig.lver.orig.i.us.lver.orig, float* %arrayidx26.lver.orig.lver.orig.i.us.lver.orig, align 4, !tbaa !12, !llvm.access.group !17 - %indvars.iv.next.lver.orig.lver.orig.i.us.lver.orig = add nuw nsw i64 %indvars.iv.next.lver.orig.lver.orig.i12.us.lver.orig, 1 - %exitcond.not.lver.orig.lver.orig.i.us.lver.orig = icmp eq i64 %indvars.iv.next.lver.orig.lver.orig.i.us.lver.orig, %wide.trip.count.i - br i1 %exitcond.not.lver.orig.lver.orig.i.us.lver.orig, label %if.end.i.us.loopexit, label %for.body.lver.orig.lver.orig.i.us.lver.orig, !llvm.loop !19 - -for.body.lver.orig.lver.orig.i.us.ph: ; preds = %for.body.lver.orig.lver.orig.i.us.lver.check - %load_initial = load float, float* %scevgep27, align 4 - %load_initial29 = load float, float* %scevgep28, align 4 - br label %for.body.lver.orig.lver.orig.i.us - -for.body.ph.i.us: ; preds = %for.body.lver.check.i.us - %load_initial.i1.us13 = load float, float* %scevgep.i.us, align 4 - %load_initial102.i2.us14 = load float, float* %scevgep96.i.us, align 4 - br label %for.body.i.us - -for.body.i.us: ; preds = %for.body.i.us, %for.body.ph.i.us - %indvars.iv.next.i10.us = phi i64 [ %indvars.iv.next.i.us, %for.body.i.us ], [ 1, %for.body.ph.i.us ] - %sub18.i8.us = phi float [ %sub18.i.us, %for.body.i.us ], [ %load_initial.i1.us13, %for.body.ph.i.us ] - %sub42.i6.us = phi float [ %sub42.i.us, %for.body.i.us ], [ %load_initial102.i2.us14, %for.body.ph.i.us ] - %27 = add nsw i64 %indvars.iv.next.i10.us, %16 - %arrayidx.i.us = getelementptr inbounds float, float* %2, i64 %27 - %28 = load float, float* %arrayidx.i.us, align 4, !tbaa !12 - %arrayidx11.i.us = getelementptr inbounds float, float* %0, i64 %27 - %29 = load float, float* %arrayidx11.i.us, align 4, !tbaa !12 - %mul12.i.us = fmul float %sub18.i8.us, %29 - %div.i.us = fdiv float %mul12.i.us, %sub42.i6.us, !fpmath !16 - %sub18.i.us = fsub float %28, %div.i.us - store float %sub18.i.us, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !17 - %arrayidx26.i.us = getelementptr inbounds float, float* %1, i64 %27 - %30 = load float, float* %arrayidx26.i.us, align 4, !tbaa !12 - %31 = load float, float* %arrayidx11.i.us, align 4, !tbaa !12 - %mul35.i.us = fmul float %31, %31 - %div41.i.us = fdiv float %mul35.i.us, %sub42.i6.us, !fpmath !16 - %sub42.i.us = fsub float %30, %div41.i.us - store float %sub42.i.us, float* %arrayidx26.i.us, align 4, !tbaa !12, !llvm.access.group !17 - %indvars.iv.next.i.us = add nuw nsw i64 %indvars.iv.next.i10.us, 1 - %exitcond.not.i.us = icmp eq i64 %indvars.iv.next.i.us, %wide.trip.count.i - br i1 %exitcond.not.i.us, label %if.end.i.us.loopexit36, label %for.body.i.us, !llvm.loop !19 - -for.body.lver.orig.lver.orig.i.us: ; preds = %for.body.lver.orig.lver.orig.i.us, %for.body.lver.orig.lver.orig.i.us.ph - %store_forwarded32 = phi float [ %load_initial29, %for.body.lver.orig.lver.orig.i.us.ph ], [ %sub42.lver.orig.lver.orig.i.us, %for.body.lver.orig.lver.orig.i.us ] - %store_forwarded = phi float [ %load_initial, %for.body.lver.orig.lver.orig.i.us.ph ], [ %sub18.lver.orig.lver.orig.i.us, %for.body.lver.orig.lver.orig.i.us ] - %indvars.iv.next.lver.orig.lver.orig.i12.us = phi i64 [ 1, %for.body.lver.orig.lver.orig.i.us.ph ], [ %indvars.iv.next.lver.orig.lver.orig.i.us, %for.body.lver.orig.lver.orig.i.us ] - %32 = add nsw i64 %indvars.iv.next.lver.orig.lver.orig.i12.us, %16 - %arrayidx.lver.orig.lver.orig.i.us = getelementptr inbounds float, float* %2, i64 %32 - %33 = load float, float* %arrayidx.lver.orig.lver.orig.i.us, align 4, !tbaa !12 - %arrayidx11.lver.orig.lver.orig.i.us = getelementptr inbounds float, float* %0, i64 %32 - %34 = load float, float* %arrayidx11.lver.orig.lver.orig.i.us, align 4, !tbaa !12 - %mul12.lver.orig.lver.orig.i.us = fmul float %store_forwarded, %34 - %div.lver.orig.lver.orig.i.us = fdiv float %mul12.lver.orig.lver.orig.i.us, %store_forwarded32, !fpmath !16 - %sub18.lver.orig.lver.orig.i.us = fsub float %33, %div.lver.orig.lver.orig.i.us - store float %sub18.lver.orig.lver.orig.i.us, float* %arrayidx.lver.orig.lver.orig.i.us, align 4, !tbaa !12, !llvm.access.group !17 - %arrayidx26.lver.orig.lver.orig.i.us = getelementptr inbounds float, float* %1, i64 %32 - %35 = load float, float* %arrayidx26.lver.orig.lver.orig.i.us, align 4, !tbaa !12 - %36 = load float, float* %arrayidx11.lver.orig.lver.orig.i.us, align 4, !tbaa !12 - %mul35.lver.orig.lver.orig.i.us = fmul float %36, %36 - %div41.lver.orig.lver.orig.i.us = fdiv float %mul35.lver.orig.lver.orig.i.us, %store_forwarded32, !fpmath !16 - %sub42.lver.orig.lver.orig.i.us = fsub float %35, %div41.lver.orig.lver.orig.i.us - store float %sub42.lver.orig.lver.orig.i.us, float* %arrayidx26.lver.orig.lver.orig.i.us, align 4, !tbaa !12, !llvm.access.group !17 - %indvars.iv.next.lver.orig.lver.orig.i.us = add nuw nsw i64 %indvars.iv.next.lver.orig.lver.orig.i12.us, 1 - %exitcond.not.lver.orig.lver.orig.i.us = icmp eq i64 %indvars.iv.next.lver.orig.lver.orig.i.us, %wide.trip.count.i - br i1 %exitcond.not.lver.orig.lver.orig.i.us, label %if.end.i.us.loopexit35, label %for.body.lver.orig.lver.orig.i.us, !llvm.loop !19 - -if.end.i.us.loopexit: ; preds = %for.body.lver.orig.lver.orig.i.us.lver.orig - br label %if.end.i.us - -if.end.i.us.loopexit35: ; preds = %for.body.lver.orig.lver.orig.i.us - br label %if.end.i.us - -if.end.i.us.loopexit36: ; preds = %for.body.i.us - br label %if.end.i.us - -if.end.i.us: ; preds = %if.end.i.us.loopexit36, %if.end.i.us.loopexit35, %if.end.i.us.loopexit, %pregion_for_entry.entry.i.us - %37 = add nuw nsw i64 %_local_id_x.0.us, 1 - %exitcond.not = icmp eq i64 %37, 256 - br i1 %exitcond.not, label %adi_kernel1.exit.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !21 - -adi_kernel1.exit.loopexit: ; preds = %if.end.i.us - br label %adi_kernel1.exit - -adi_kernel1.exit: ; preds = %adi_kernel1.exit.loopexit, %8 - ret void -} - -; Function Attrs: nounwind -define void @_pocl_kernel_adi_kernel1_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to float*** - %15 = load float**, float*** %14, align 8 - %16 = load float*, float** %15, align 8 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %mul.i.i.i = shl i64 %2, 8 - %cmp288.i.i = icmp sgt i32 %20, 1 - %wide.trip.count.i.i = zext i32 %20 to i64 - br i1 %cmp288.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %_pocl_kernel_adi_kernel1.exit - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - %scevgep19 = getelementptr float, float* %16, i64 %wide.trip.count.i.i - %scevgep24 = getelementptr float, float* %12, i64 %wide.trip.count.i.i - %21 = trunc i64 %2 to i32 - %22 = mul i32 %20, %21 - %23 = shl i32 %22, 8 - %bound0 = icmp ult float* %16, %scevgep24 - %bound1 = icmp ult float* %12, %scevgep19 - %found.conflict = and i1 %bound0, %bound1 - br label %pregion_for_entry.entry.i.i.us - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %49, %if.end.i.i.us ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %24 = trunc i64 %_local_id_x.i.0.us to i32 - %25 = mul i32 %20, %24 - %26 = add i32 %25, %23 - %27 = sext i32 %26 to i64 - %scevgep27 = getelementptr float, float* %16, i64 %27 - %scevgep28 = getelementptr float, float* %12, i64 %27 - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %20, %conv.i.i.us - br i1 %cmp.i.i.us, label %for.body.lver.check.i.i.us, label %if.end.i.i.us - -for.body.lver.check.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %mul.i.i.us = mul nsw i32 %20, %conv.i.i.us - %28 = sext i32 %mul.i.i.us to i64 - %scevgep.i.i.us = getelementptr float, float* %16, i64 %28 - %29 = add nsw i64 %28, %wide.trip.count.i.i - %scevgep94.i.i.us = getelementptr float, float* %16, i64 %29 - %scevgep96.i.i.us = getelementptr float, float* %12, i64 %28 - %scevgep98.i.i.us = getelementptr float, float* %12, i64 %29 - %bound0.i.i.us = icmp ult float* %scevgep.i.i.us, %scevgep98.i.i.us - %bound1.i.i.us = icmp ult float* %scevgep96.i.i.us, %scevgep94.i.i.us - %found.conflict.i.i.us = and i1 %bound0.i.i.us, %bound1.i.i.us - br i1 %found.conflict.i.i.us, label %for.body.lver.orig.lver.orig.i.i.us.lver.check, label %for.body.ph.i.i.us - -for.body.lver.orig.lver.orig.i.i.us.lver.check: ; preds = %for.body.lver.check.i.i.us - br i1 %found.conflict, label %for.body.lver.orig.lver.orig.i.i.us.lver.orig.preheader, label %for.body.lver.orig.lver.orig.i.i.us.ph - -for.body.lver.orig.lver.orig.i.i.us.lver.orig.preheader: ; preds = %for.body.lver.orig.lver.orig.i.i.us.lver.check - br label %for.body.lver.orig.lver.orig.i.i.us.lver.orig - -for.body.lver.orig.lver.orig.i.i.us.lver.orig: ; preds = %for.body.lver.orig.lver.orig.i.i.us.lver.orig, %for.body.lver.orig.lver.orig.i.i.us.lver.orig.preheader - %indvars.iv.next.lver.orig.lver.orig.i.i12.us.lver.orig = phi i64 [ %indvars.iv.next.lver.orig.lver.orig.i.i.us.lver.orig, %for.body.lver.orig.lver.orig.i.i.us.lver.orig ], [ 1, %for.body.lver.orig.lver.orig.i.i.us.lver.orig.preheader ] - %30 = add nsw i64 %indvars.iv.next.lver.orig.lver.orig.i.i12.us.lver.orig, %28 - %arrayidx.lver.orig.lver.orig.i.i.us.lver.orig = getelementptr inbounds float, float* %16, i64 %30 - %31 = load float, float* %arrayidx.lver.orig.lver.orig.i.i.us.lver.orig, align 4, !tbaa !12 - %32 = add nsw i64 %30, -1 - %arrayidx7.lver.orig.lver.orig.i.i.us.lver.orig = getelementptr inbounds float, float* %16, i64 %32 - %33 = load float, float* %arrayidx7.lver.orig.lver.orig.i.i.us.lver.orig, align 4, !tbaa !12 - %arrayidx11.lver.orig.lver.orig.i.i.us.lver.orig = getelementptr inbounds float, float* %8, i64 %30 - %34 = load float, float* %arrayidx11.lver.orig.lver.orig.i.i.us.lver.orig, align 4, !tbaa !12 - %mul12.lver.orig.lver.orig.i.i.us.lver.orig = fmul float %33, %34 - %arrayidx17.lver.orig.lver.orig.i.i.us.lver.orig = getelementptr inbounds float, float* %12, i64 %32 - %35 = load float, float* %arrayidx17.lver.orig.lver.orig.i.i.us.lver.orig, align 4, !tbaa !12 - %div.lver.orig.lver.orig.i.i.us.lver.orig = fdiv float %mul12.lver.orig.lver.orig.i.i.us.lver.orig, %35, !fpmath !16 - %sub18.lver.orig.lver.orig.i.i.us.lver.orig = fsub float %31, %div.lver.orig.lver.orig.i.i.us.lver.orig - store float %sub18.lver.orig.lver.orig.i.i.us.lver.orig, float* %arrayidx.lver.orig.lver.orig.i.i.us.lver.orig, align 4, !tbaa !12, !llvm.access.group !17 - %arrayidx26.lver.orig.lver.orig.i.i.us.lver.orig = getelementptr inbounds float, float* %12, i64 %30 - %36 = load float, float* %arrayidx26.lver.orig.lver.orig.i.i.us.lver.orig, align 4, !tbaa !12 - %37 = load float, float* %arrayidx11.lver.orig.lver.orig.i.i.us.lver.orig, align 4, !tbaa !12 - %mul35.lver.orig.lver.orig.i.i.us.lver.orig = fmul float %37, %37 - %38 = load float, float* %arrayidx17.lver.orig.lver.orig.i.i.us.lver.orig, align 4, !tbaa !12 - %div41.lver.orig.lver.orig.i.i.us.lver.orig = fdiv float %mul35.lver.orig.lver.orig.i.i.us.lver.orig, %38, !fpmath !16 - %sub42.lver.orig.lver.orig.i.i.us.lver.orig = fsub float %36, %div41.lver.orig.lver.orig.i.i.us.lver.orig - store float %sub42.lver.orig.lver.orig.i.i.us.lver.orig, float* %arrayidx26.lver.orig.lver.orig.i.i.us.lver.orig, align 4, !tbaa !12, !llvm.access.group !17 - %indvars.iv.next.lver.orig.lver.orig.i.i.us.lver.orig = add nuw nsw i64 %indvars.iv.next.lver.orig.lver.orig.i.i12.us.lver.orig, 1 - %exitcond.not.lver.orig.lver.orig.i.i.us.lver.orig = icmp eq i64 %indvars.iv.next.lver.orig.lver.orig.i.i.us.lver.orig, %wide.trip.count.i.i - br i1 %exitcond.not.lver.orig.lver.orig.i.i.us.lver.orig, label %if.end.i.i.us.loopexit, label %for.body.lver.orig.lver.orig.i.i.us.lver.orig, !llvm.loop !19 - -for.body.lver.orig.lver.orig.i.i.us.ph: ; preds = %for.body.lver.orig.lver.orig.i.i.us.lver.check - %load_initial = load float, float* %scevgep27, align 4 - %load_initial29 = load float, float* %scevgep28, align 4 - br label %for.body.lver.orig.lver.orig.i.i.us - -for.body.ph.i.i.us: ; preds = %for.body.lver.check.i.i.us - %load_initial.i.i1.us13 = load float, float* %scevgep.i.i.us, align 4 - %load_initial102.i.i2.us14 = load float, float* %scevgep96.i.i.us, align 4 - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %for.body.ph.i.i.us - %indvars.iv.next.i.i10.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 1, %for.body.ph.i.i.us ] - %sub18.i.i8.us = phi float [ %sub18.i.i.us, %for.body.i.i.us ], [ %load_initial.i.i1.us13, %for.body.ph.i.i.us ] - %sub42.i.i6.us = phi float [ %sub42.i.i.us, %for.body.i.i.us ], [ %load_initial102.i.i2.us14, %for.body.ph.i.i.us ] - %39 = add nsw i64 %indvars.iv.next.i.i10.us, %28 - %arrayidx.i.i.us = getelementptr inbounds float, float* %16, i64 %39 - %40 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %arrayidx11.i.i.us = getelementptr inbounds float, float* %8, i64 %39 - %41 = load float, float* %arrayidx11.i.i.us, align 4, !tbaa !12 - %mul12.i.i.us = fmul float %sub18.i.i8.us, %41 - %div.i.i.us = fdiv float %mul12.i.i.us, %sub42.i.i6.us, !fpmath !16 - %sub18.i.i.us = fsub float %40, %div.i.i.us - store float %sub18.i.i.us, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !17 - %arrayidx26.i.i.us = getelementptr inbounds float, float* %12, i64 %39 - %42 = load float, float* %arrayidx26.i.i.us, align 4, !tbaa !12 - %43 = load float, float* %arrayidx11.i.i.us, align 4, !tbaa !12 - %mul35.i.i.us = fmul float %43, %43 - %div41.i.i.us = fdiv float %mul35.i.i.us, %sub42.i.i6.us, !fpmath !16 - %sub42.i.i.us = fsub float %42, %div41.i.i.us - store float %sub42.i.i.us, float* %arrayidx26.i.i.us, align 4, !tbaa !12, !llvm.access.group !17 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i10.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us, label %if.end.i.i.us.loopexit36, label %for.body.i.i.us, !llvm.loop !19 - -for.body.lver.orig.lver.orig.i.i.us: ; preds = %for.body.lver.orig.lver.orig.i.i.us, %for.body.lver.orig.lver.orig.i.i.us.ph - %store_forwarded32 = phi float [ %load_initial29, %for.body.lver.orig.lver.orig.i.i.us.ph ], [ %sub42.lver.orig.lver.orig.i.i.us, %for.body.lver.orig.lver.orig.i.i.us ] - %store_forwarded = phi float [ %load_initial, %for.body.lver.orig.lver.orig.i.i.us.ph ], [ %sub18.lver.orig.lver.orig.i.i.us, %for.body.lver.orig.lver.orig.i.i.us ] - %indvars.iv.next.lver.orig.lver.orig.i.i12.us = phi i64 [ 1, %for.body.lver.orig.lver.orig.i.i.us.ph ], [ %indvars.iv.next.lver.orig.lver.orig.i.i.us, %for.body.lver.orig.lver.orig.i.i.us ] - %44 = add nsw i64 %indvars.iv.next.lver.orig.lver.orig.i.i12.us, %28 - %arrayidx.lver.orig.lver.orig.i.i.us = getelementptr inbounds float, float* %16, i64 %44 - %45 = load float, float* %arrayidx.lver.orig.lver.orig.i.i.us, align 4, !tbaa !12 - %arrayidx11.lver.orig.lver.orig.i.i.us = getelementptr inbounds float, float* %8, i64 %44 - %46 = load float, float* %arrayidx11.lver.orig.lver.orig.i.i.us, align 4, !tbaa !12 - %mul12.lver.orig.lver.orig.i.i.us = fmul float %store_forwarded, %46 - %div.lver.orig.lver.orig.i.i.us = fdiv float %mul12.lver.orig.lver.orig.i.i.us, %store_forwarded32, !fpmath !16 - %sub18.lver.orig.lver.orig.i.i.us = fsub float %45, %div.lver.orig.lver.orig.i.i.us - store float %sub18.lver.orig.lver.orig.i.i.us, float* %arrayidx.lver.orig.lver.orig.i.i.us, align 4, !tbaa !12, !llvm.access.group !17 - %arrayidx26.lver.orig.lver.orig.i.i.us = getelementptr inbounds float, float* %12, i64 %44 - %47 = load float, float* %arrayidx26.lver.orig.lver.orig.i.i.us, align 4, !tbaa !12 - %48 = load float, float* %arrayidx11.lver.orig.lver.orig.i.i.us, align 4, !tbaa !12 - %mul35.lver.orig.lver.orig.i.i.us = fmul float %48, %48 - %div41.lver.orig.lver.orig.i.i.us = fdiv float %mul35.lver.orig.lver.orig.i.i.us, %store_forwarded32, !fpmath !16 - %sub42.lver.orig.lver.orig.i.i.us = fsub float %47, %div41.lver.orig.lver.orig.i.i.us - store float %sub42.lver.orig.lver.orig.i.i.us, float* %arrayidx26.lver.orig.lver.orig.i.i.us, align 4, !tbaa !12, !llvm.access.group !17 - %indvars.iv.next.lver.orig.lver.orig.i.i.us = add nuw nsw i64 %indvars.iv.next.lver.orig.lver.orig.i.i12.us, 1 - %exitcond.not.lver.orig.lver.orig.i.i.us = icmp eq i64 %indvars.iv.next.lver.orig.lver.orig.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.lver.orig.lver.orig.i.i.us, label %if.end.i.i.us.loopexit35, label %for.body.lver.orig.lver.orig.i.i.us, !llvm.loop !19 - -if.end.i.i.us.loopexit: ; preds = %for.body.lver.orig.lver.orig.i.i.us.lver.orig - br label %if.end.i.i.us - -if.end.i.i.us.loopexit35: ; preds = %for.body.lver.orig.lver.orig.i.i.us - br label %if.end.i.i.us - -if.end.i.i.us.loopexit36: ; preds = %for.body.i.i.us - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.end.i.i.us.loopexit36, %if.end.i.i.us.loopexit35, %if.end.i.i.us.loopexit, %pregion_for_entry.entry.i.i.us - %49 = add nuw nsw i64 %_local_id_x.i.0.us, 1 - %exitcond.not = icmp eq i64 %49, 256 - br i1 %exitcond.not, label %_pocl_kernel_adi_kernel1.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !21 - -_pocl_kernel_adi_kernel1.exit.loopexit: ; preds = %if.end.i.i.us - br label %_pocl_kernel_adi_kernel1.exit - -_pocl_kernel_adi_kernel1.exit: ; preds = %_pocl_kernel_adi_kernel1.exit.loopexit, %5 - ret void -} - -; Function Attrs: nounwind -define void @_pocl_kernel_adi_kernel1_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to float** - %13 = load float*, float** %12, align 8 - %14 = getelementptr i8*, i8** %0, i64 3 - %15 = bitcast i8** %14 to i32** - %16 = load i32*, i32** %15, align 8 - %17 = load i32, i32* %16, align 4 - %mul.i.i.i = shl i64 %2, 8 - %cmp288.i.i = icmp sgt i32 %17, 1 - %wide.trip.count.i.i = zext i32 %17 to i64 - br i1 %cmp288.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %_pocl_kernel_adi_kernel1.exit - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - %scevgep19 = getelementptr float, float* %13, i64 %wide.trip.count.i.i - %scevgep24 = getelementptr float, float* %10, i64 %wide.trip.count.i.i - %18 = trunc i64 %2 to i32 - %19 = mul i32 %17, %18 - %20 = shl i32 %19, 8 - %bound0 = icmp ult float* %13, %scevgep24 - %bound1 = icmp ult float* %10, %scevgep19 - %found.conflict = and i1 %bound0, %bound1 - br label %pregion_for_entry.entry.i.i.us - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %46, %if.end.i.i.us ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %21 = trunc i64 %_local_id_x.i.0.us to i32 - %22 = mul i32 %17, %21 - %23 = add i32 %22, %20 - %24 = sext i32 %23 to i64 - %scevgep27 = getelementptr float, float* %13, i64 %24 - %scevgep28 = getelementptr float, float* %10, i64 %24 - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %17, %conv.i.i.us - br i1 %cmp.i.i.us, label %for.body.lver.check.i.i.us, label %if.end.i.i.us - -for.body.lver.check.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %mul.i.i.us = mul nsw i32 %17, %conv.i.i.us - %25 = sext i32 %mul.i.i.us to i64 - %scevgep.i.i.us = getelementptr float, float* %13, i64 %25 - %26 = add nsw i64 %25, %wide.trip.count.i.i - %scevgep94.i.i.us = getelementptr float, float* %13, i64 %26 - %scevgep96.i.i.us = getelementptr float, float* %10, i64 %25 - %scevgep98.i.i.us = getelementptr float, float* %10, i64 %26 - %bound0.i.i.us = icmp ult float* %scevgep.i.i.us, %scevgep98.i.i.us - %bound1.i.i.us = icmp ult float* %scevgep96.i.i.us, %scevgep94.i.i.us - %found.conflict.i.i.us = and i1 %bound0.i.i.us, %bound1.i.i.us - br i1 %found.conflict.i.i.us, label %for.body.lver.orig.lver.orig.i.i.us.lver.check, label %for.body.ph.i.i.us - -for.body.lver.orig.lver.orig.i.i.us.lver.check: ; preds = %for.body.lver.check.i.i.us - br i1 %found.conflict, label %for.body.lver.orig.lver.orig.i.i.us.lver.orig.preheader, label %for.body.lver.orig.lver.orig.i.i.us.ph - -for.body.lver.orig.lver.orig.i.i.us.lver.orig.preheader: ; preds = %for.body.lver.orig.lver.orig.i.i.us.lver.check - br label %for.body.lver.orig.lver.orig.i.i.us.lver.orig - -for.body.lver.orig.lver.orig.i.i.us.lver.orig: ; preds = %for.body.lver.orig.lver.orig.i.i.us.lver.orig, %for.body.lver.orig.lver.orig.i.i.us.lver.orig.preheader - %indvars.iv.next.lver.orig.lver.orig.i.i12.us.lver.orig = phi i64 [ %indvars.iv.next.lver.orig.lver.orig.i.i.us.lver.orig, %for.body.lver.orig.lver.orig.i.i.us.lver.orig ], [ 1, %for.body.lver.orig.lver.orig.i.i.us.lver.orig.preheader ] - %27 = add nsw i64 %indvars.iv.next.lver.orig.lver.orig.i.i12.us.lver.orig, %25 - %arrayidx.lver.orig.lver.orig.i.i.us.lver.orig = getelementptr inbounds float, float* %13, i64 %27 - %28 = load float, float* %arrayidx.lver.orig.lver.orig.i.i.us.lver.orig, align 4, !tbaa !12 - %29 = add nsw i64 %27, -1 - %arrayidx7.lver.orig.lver.orig.i.i.us.lver.orig = getelementptr inbounds float, float* %13, i64 %29 - %30 = load float, float* %arrayidx7.lver.orig.lver.orig.i.i.us.lver.orig, align 4, !tbaa !12 - %arrayidx11.lver.orig.lver.orig.i.i.us.lver.orig = getelementptr inbounds float, float* %7, i64 %27 - %31 = load float, float* %arrayidx11.lver.orig.lver.orig.i.i.us.lver.orig, align 4, !tbaa !12 - %mul12.lver.orig.lver.orig.i.i.us.lver.orig = fmul float %30, %31 - %arrayidx17.lver.orig.lver.orig.i.i.us.lver.orig = getelementptr inbounds float, float* %10, i64 %29 - %32 = load float, float* %arrayidx17.lver.orig.lver.orig.i.i.us.lver.orig, align 4, !tbaa !12 - %div.lver.orig.lver.orig.i.i.us.lver.orig = fdiv float %mul12.lver.orig.lver.orig.i.i.us.lver.orig, %32, !fpmath !16 - %sub18.lver.orig.lver.orig.i.i.us.lver.orig = fsub float %28, %div.lver.orig.lver.orig.i.i.us.lver.orig - store float %sub18.lver.orig.lver.orig.i.i.us.lver.orig, float* %arrayidx.lver.orig.lver.orig.i.i.us.lver.orig, align 4, !tbaa !12, !llvm.access.group !17 - %arrayidx26.lver.orig.lver.orig.i.i.us.lver.orig = getelementptr inbounds float, float* %10, i64 %27 - %33 = load float, float* %arrayidx26.lver.orig.lver.orig.i.i.us.lver.orig, align 4, !tbaa !12 - %34 = load float, float* %arrayidx11.lver.orig.lver.orig.i.i.us.lver.orig, align 4, !tbaa !12 - %mul35.lver.orig.lver.orig.i.i.us.lver.orig = fmul float %34, %34 - %35 = load float, float* %arrayidx17.lver.orig.lver.orig.i.i.us.lver.orig, align 4, !tbaa !12 - %div41.lver.orig.lver.orig.i.i.us.lver.orig = fdiv float %mul35.lver.orig.lver.orig.i.i.us.lver.orig, %35, !fpmath !16 - %sub42.lver.orig.lver.orig.i.i.us.lver.orig = fsub float %33, %div41.lver.orig.lver.orig.i.i.us.lver.orig - store float %sub42.lver.orig.lver.orig.i.i.us.lver.orig, float* %arrayidx26.lver.orig.lver.orig.i.i.us.lver.orig, align 4, !tbaa !12, !llvm.access.group !17 - %indvars.iv.next.lver.orig.lver.orig.i.i.us.lver.orig = add nuw nsw i64 %indvars.iv.next.lver.orig.lver.orig.i.i12.us.lver.orig, 1 - %exitcond.not.lver.orig.lver.orig.i.i.us.lver.orig = icmp eq i64 %indvars.iv.next.lver.orig.lver.orig.i.i.us.lver.orig, %wide.trip.count.i.i - br i1 %exitcond.not.lver.orig.lver.orig.i.i.us.lver.orig, label %if.end.i.i.us.loopexit, label %for.body.lver.orig.lver.orig.i.i.us.lver.orig, !llvm.loop !19 - -for.body.lver.orig.lver.orig.i.i.us.ph: ; preds = %for.body.lver.orig.lver.orig.i.i.us.lver.check - %load_initial = load float, float* %scevgep27, align 4 - %load_initial29 = load float, float* %scevgep28, align 4 - br label %for.body.lver.orig.lver.orig.i.i.us - -for.body.ph.i.i.us: ; preds = %for.body.lver.check.i.i.us - %load_initial.i.i1.us13 = load float, float* %scevgep.i.i.us, align 4 - %load_initial102.i.i2.us14 = load float, float* %scevgep96.i.i.us, align 4 - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %for.body.ph.i.i.us - %indvars.iv.next.i.i10.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 1, %for.body.ph.i.i.us ] - %sub18.i.i8.us = phi float [ %sub18.i.i.us, %for.body.i.i.us ], [ %load_initial.i.i1.us13, %for.body.ph.i.i.us ] - %sub42.i.i6.us = phi float [ %sub42.i.i.us, %for.body.i.i.us ], [ %load_initial102.i.i2.us14, %for.body.ph.i.i.us ] - %36 = add nsw i64 %indvars.iv.next.i.i10.us, %25 - %arrayidx.i.i.us = getelementptr inbounds float, float* %13, i64 %36 - %37 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %arrayidx11.i.i.us = getelementptr inbounds float, float* %7, i64 %36 - %38 = load float, float* %arrayidx11.i.i.us, align 4, !tbaa !12 - %mul12.i.i.us = fmul float %sub18.i.i8.us, %38 - %div.i.i.us = fdiv float %mul12.i.i.us, %sub42.i.i6.us, !fpmath !16 - %sub18.i.i.us = fsub float %37, %div.i.i.us - store float %sub18.i.i.us, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !17 - %arrayidx26.i.i.us = getelementptr inbounds float, float* %10, i64 %36 - %39 = load float, float* %arrayidx26.i.i.us, align 4, !tbaa !12 - %40 = load float, float* %arrayidx11.i.i.us, align 4, !tbaa !12 - %mul35.i.i.us = fmul float %40, %40 - %div41.i.i.us = fdiv float %mul35.i.i.us, %sub42.i.i6.us, !fpmath !16 - %sub42.i.i.us = fsub float %39, %div41.i.i.us - store float %sub42.i.i.us, float* %arrayidx26.i.i.us, align 4, !tbaa !12, !llvm.access.group !17 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i10.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us, label %if.end.i.i.us.loopexit36, label %for.body.i.i.us, !llvm.loop !19 - -for.body.lver.orig.lver.orig.i.i.us: ; preds = %for.body.lver.orig.lver.orig.i.i.us, %for.body.lver.orig.lver.orig.i.i.us.ph - %store_forwarded32 = phi float [ %load_initial29, %for.body.lver.orig.lver.orig.i.i.us.ph ], [ %sub42.lver.orig.lver.orig.i.i.us, %for.body.lver.orig.lver.orig.i.i.us ] - %store_forwarded = phi float [ %load_initial, %for.body.lver.orig.lver.orig.i.i.us.ph ], [ %sub18.lver.orig.lver.orig.i.i.us, %for.body.lver.orig.lver.orig.i.i.us ] - %indvars.iv.next.lver.orig.lver.orig.i.i12.us = phi i64 [ 1, %for.body.lver.orig.lver.orig.i.i.us.ph ], [ %indvars.iv.next.lver.orig.lver.orig.i.i.us, %for.body.lver.orig.lver.orig.i.i.us ] - %41 = add nsw i64 %indvars.iv.next.lver.orig.lver.orig.i.i12.us, %25 - %arrayidx.lver.orig.lver.orig.i.i.us = getelementptr inbounds float, float* %13, i64 %41 - %42 = load float, float* %arrayidx.lver.orig.lver.orig.i.i.us, align 4, !tbaa !12 - %arrayidx11.lver.orig.lver.orig.i.i.us = getelementptr inbounds float, float* %7, i64 %41 - %43 = load float, float* %arrayidx11.lver.orig.lver.orig.i.i.us, align 4, !tbaa !12 - %mul12.lver.orig.lver.orig.i.i.us = fmul float %store_forwarded, %43 - %div.lver.orig.lver.orig.i.i.us = fdiv float %mul12.lver.orig.lver.orig.i.i.us, %store_forwarded32, !fpmath !16 - %sub18.lver.orig.lver.orig.i.i.us = fsub float %42, %div.lver.orig.lver.orig.i.i.us - store float %sub18.lver.orig.lver.orig.i.i.us, float* %arrayidx.lver.orig.lver.orig.i.i.us, align 4, !tbaa !12, !llvm.access.group !17 - %arrayidx26.lver.orig.lver.orig.i.i.us = getelementptr inbounds float, float* %10, i64 %41 - %44 = load float, float* %arrayidx26.lver.orig.lver.orig.i.i.us, align 4, !tbaa !12 - %45 = load float, float* %arrayidx11.lver.orig.lver.orig.i.i.us, align 4, !tbaa !12 - %mul35.lver.orig.lver.orig.i.i.us = fmul float %45, %45 - %div41.lver.orig.lver.orig.i.i.us = fdiv float %mul35.lver.orig.lver.orig.i.i.us, %store_forwarded32, !fpmath !16 - %sub42.lver.orig.lver.orig.i.i.us = fsub float %44, %div41.lver.orig.lver.orig.i.i.us - store float %sub42.lver.orig.lver.orig.i.i.us, float* %arrayidx26.lver.orig.lver.orig.i.i.us, align 4, !tbaa !12, !llvm.access.group !17 - %indvars.iv.next.lver.orig.lver.orig.i.i.us = add nuw nsw i64 %indvars.iv.next.lver.orig.lver.orig.i.i12.us, 1 - %exitcond.not.lver.orig.lver.orig.i.i.us = icmp eq i64 %indvars.iv.next.lver.orig.lver.orig.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.lver.orig.lver.orig.i.i.us, label %if.end.i.i.us.loopexit35, label %for.body.lver.orig.lver.orig.i.i.us, !llvm.loop !19 - -if.end.i.i.us.loopexit: ; preds = %for.body.lver.orig.lver.orig.i.i.us.lver.orig - br label %if.end.i.i.us - -if.end.i.i.us.loopexit35: ; preds = %for.body.lver.orig.lver.orig.i.i.us - br label %if.end.i.i.us - -if.end.i.i.us.loopexit36: ; preds = %for.body.i.i.us - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.end.i.i.us.loopexit36, %if.end.i.i.us.loopexit35, %if.end.i.i.us.loopexit, %pregion_for_entry.entry.i.i.us - %46 = add nuw nsw i64 %_local_id_x.i.0.us, 1 - %exitcond.not = icmp eq i64 %46, 256 - br i1 %exitcond.not, label %_pocl_kernel_adi_kernel1.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !21 - -_pocl_kernel_adi_kernel1.exit.loopexit: ; preds = %if.end.i.i.us - br label %_pocl_kernel_adi_kernel1.exit - -_pocl_kernel_adi_kernel1.exit: ; preds = %_pocl_kernel_adi_kernel1.exit.loopexit, %5 - ret void -} - -attributes #0 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0} -!6 = !{!"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"int"} -!9 = !{!"", !"", !"", !""} -!10 = !{!"A", !"B", !"X", !"n"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{float 2.500000e+00} -!17 = !{!18} -!18 = distinct !{} -!19 = distinct !{!19, !20} -!20 = !{!"llvm.loop.unroll.disable"} -!21 = distinct !{!21, !22} -!22 = !{!"llvm.loop.parallel_accesses", !18} diff --git a/pocl_irs/adi_kernel2.ll b/pocl_irs/adi_kernel2.ll deleted file mode 100644 index 229288c..0000000 --- a/pocl_irs/adi_kernel2.ll +++ /dev/null @@ -1,457 +0,0 @@ -; ModuleID = './BF/DFFLECFOLOBPKCKMNEPCKIANKJKKLLHBOGBCO/adi_kernel2/256-1-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_adi_kernel2(float* nocapture readnone %0, float* nocapture readonly %1, float* nocapture %2, i32 %3, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %4, i64 %5, i64 %6, i64 %7) local_unnamed_addr #0 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { -vector.scevcheck: - %mul.i.i = shl i64 %5, 8 - %sub.i = add nsw i32 %3, -1 - %ident.check.not = icmp eq i32 %3, 1 - br i1 %ident.check.not, label %vector.memcheck, label %pregion_for_entry.entry.i.preheader - -vector.memcheck: ; preds = %vector.scevcheck - %8 = trunc i64 %5 to i32 - %9 = shl i32 %8, 8 - %10 = sext i32 %9 to i64 - %scevgep = getelementptr float, float* %2, i64 %10 - %11 = add nsw i64 %10, 256 - %scevgep2 = getelementptr float, float* %2, i64 %11 - %scevgep4 = getelementptr float, float* %1, i64 %10 - %scevgep6 = getelementptr float, float* %1, i64 %11 - %bound0 = icmp ult float* %scevgep, %scevgep6 - %bound1 = icmp ult float* %scevgep4, %scevgep2 - %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %pregion_for_entry.entry.i.preheader, label %vector.ph - -pregion_for_entry.entry.i.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next.1, %vector.body ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next.1, %vector.body ] - %12 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %13 = trunc <8 x i64> %12 to <8 x i32> - %14 = icmp slt <8 x i32> %13, - %15 = extractelement <8 x i32> %13, i32 0 - %16 = mul nsw i32 %15, %3 - %17 = add nsw i32 %sub.i, %16 - %18 = sext i32 %17 to i64 - %19 = getelementptr inbounds float, float* %2, i64 %18 - %20 = bitcast float* %19 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %20, i32 4, <8 x i1> %14, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %21 = getelementptr inbounds float, float* %1, i64 %18 - %22 = bitcast float* %21 to <8 x float>* - %wide.masked.load8 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %22, i32 4, <8 x i1> %14, <8 x float> undef), !tbaa !12, !alias.scope !19 - %23 = fdiv <8 x float> %wide.masked.load, %wide.masked.load8, !fpmath !21 - %24 = bitcast float* %19 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %23, <8 x float>* %24, i32 4, <8 x i1> %14), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !22 - %vec.ind.next = add <8 x i64> %vec.ind, - %25 = add nuw nsw <8 x i64> %vec.ind.next, %broadcast.splat - %26 = trunc <8 x i64> %25 to <8 x i32> - %27 = icmp slt <8 x i32> %26, - %28 = extractelement <8 x i32> %26, i32 0 - %29 = mul nsw i32 %28, %3 - %30 = add nsw i32 %sub.i, %29 - %31 = sext i32 %30 to i64 - %32 = getelementptr inbounds float, float* %2, i64 %31 - %33 = bitcast float* %32 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %33, i32 4, <8 x i1> %27, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %34 = getelementptr inbounds float, float* %1, i64 %31 - %35 = bitcast float* %34 to <8 x float>* - %wide.masked.load8.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %35, i32 4, <8 x i1> %27, <8 x float> undef), !tbaa !12, !alias.scope !19 - %36 = fdiv <8 x float> %wide.masked.load.1, %wide.masked.load8.1, !fpmath !21 - %37 = bitcast float* %32 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %36, <8 x float>* %37, i32 4, <8 x i1> %27), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !22 - %index.next.1 = add nuw nsw i64 %index, 16 - %vec.ind.next.1 = add <8 x i64> %vec.ind, - %38 = icmp eq i64 %index.next.1, 256 - br i1 %38, label %adi_kernel2.exit.loopexit10, label %vector.body, !llvm.loop !24 - -pregion_for_entry.entry.i: ; preds = %if.end.r_exit.i.1, %pregion_for_entry.entry.i.preheader - %_local_id_x.0 = phi i64 [ 0, %pregion_for_entry.entry.i.preheader ], [ %44, %if.end.r_exit.i.1 ] - %add1.i.i = add nuw nsw i64 %_local_id_x.0, %mul.i.i - %conv.i = trunc i64 %add1.i.i to i32 - %cmp.i = icmp slt i32 %conv.i, %3 - br i1 %cmp.i, label %if.then.i, label %if.end.r_exit.i - -if.then.i: ; preds = %pregion_for_entry.entry.i - %mul.i = mul nsw i32 %conv.i, %3 - %add.i = add nsw i32 %sub.i, %mul.i - %idxprom.i = sext i32 %add.i to i64 - %arrayidx.i = getelementptr inbounds float, float* %2, i64 %idxprom.i - %39 = load float, float* %arrayidx.i, align 4, !tbaa !12 - %arrayidx6.i = getelementptr inbounds float, float* %1, i64 %idxprom.i - %40 = load float, float* %arrayidx6.i, align 4, !tbaa !12 - %div.i = fdiv float %39, %40, !fpmath !21 - store float %div.i, float* %arrayidx.i, align 4, !tbaa !12, !llvm.access.group !22 - br label %if.end.r_exit.i - -if.end.r_exit.i: ; preds = %if.then.i, %pregion_for_entry.entry.i - %41 = or i64 %_local_id_x.0, 1 - %add1.i.i.1 = add nuw nsw i64 %41, %mul.i.i - %conv.i.1 = trunc i64 %add1.i.i.1 to i32 - %cmp.i.1 = icmp slt i32 %conv.i.1, %3 - br i1 %cmp.i.1, label %if.then.i.1, label %if.end.r_exit.i.1 - -adi_kernel2.exit.loopexit: ; preds = %if.end.r_exit.i.1 - br label %adi_kernel2.exit - -adi_kernel2.exit.loopexit10: ; preds = %vector.body - br label %adi_kernel2.exit - -adi_kernel2.exit: ; preds = %adi_kernel2.exit.loopexit10, %adi_kernel2.exit.loopexit - ret void - -if.then.i.1: ; preds = %if.end.r_exit.i - %mul.i.1 = mul nsw i32 %conv.i.1, %3 - %add.i.1 = add nsw i32 %sub.i, %mul.i.1 - %idxprom.i.1 = sext i32 %add.i.1 to i64 - %arrayidx.i.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.1 - %42 = load float, float* %arrayidx.i.1, align 4, !tbaa !12 - %arrayidx6.i.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.1 - %43 = load float, float* %arrayidx6.i.1, align 4, !tbaa !12 - %div.i.1 = fdiv float %42, %43, !fpmath !21 - store float %div.i.1, float* %arrayidx.i.1, align 4, !tbaa !12, !llvm.access.group !22 - br label %if.end.r_exit.i.1 - -if.end.r_exit.i.1: ; preds = %if.then.i.1, %if.end.r_exit.i - %44 = add nuw nsw i64 %_local_id_x.0, 2 - %exitcond.not.1 = icmp eq i64 %44, 256 - br i1 %exitcond.not.1, label %adi_kernel2.exit.loopexit, label %pregion_for_entry.entry.i, !llvm.loop !27 -} - -; Function Attrs: nofree norecurse nounwind -define void @_pocl_kernel_adi_kernel2_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { -vector.scevcheck: - %5 = getelementptr i8*, i8** %0, i64 1 - %6 = bitcast i8** %5 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 2 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 3 - %14 = bitcast i8** %13 to i32** - %15 = load i32*, i32** %14, align 8 - %16 = load i32, i32* %15, align 4 - %mul.i.i.i = shl i64 %2, 8 - %sub.i.i = add nsw i32 %16, -1 - %ident.check.not = icmp eq i32 %16, 1 - br i1 %ident.check.not, label %vector.memcheck, label %pregion_for_entry.entry.i.i.preheader - -vector.memcheck: ; preds = %vector.scevcheck - %17 = trunc i64 %2 to i32 - %18 = shl i32 %17, 8 - %19 = sext i32 %18 to i64 - %scevgep = getelementptr float, float* %12, i64 %19 - %20 = add nsw i64 %19, 256 - %scevgep2 = getelementptr float, float* %12, i64 %20 - %scevgep4 = getelementptr float, float* %8, i64 %19 - %scevgep6 = getelementptr float, float* %8, i64 %20 - %bound0 = icmp ult float* %scevgep, %scevgep6 - %bound1 = icmp ult float* %scevgep4, %scevgep2 - %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %pregion_for_entry.entry.i.i.preheader, label %vector.ph - -pregion_for_entry.entry.i.i.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next.1, %vector.body ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next.1, %vector.body ] - %21 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %22 = trunc <8 x i64> %21 to <8 x i32> - %23 = icmp slt <8 x i32> %22, - %24 = extractelement <8 x i32> %22, i32 0 - %25 = mul nsw i32 %16, %24 - %26 = add nsw i32 %sub.i.i, %25 - %27 = sext i32 %26 to i64 - %28 = getelementptr inbounds float, float* %12, i64 %27 - %29 = bitcast float* %28 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %29, i32 4, <8 x i1> %23, <8 x float> undef), !tbaa !12, !alias.scope !28, !noalias !31 - %30 = getelementptr inbounds float, float* %8, i64 %27 - %31 = bitcast float* %30 to <8 x float>* - %wide.masked.load8 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %31, i32 4, <8 x i1> %23, <8 x float> undef), !tbaa !12, !alias.scope !31 - %32 = fdiv <8 x float> %wide.masked.load, %wide.masked.load8, !fpmath !21 - %33 = bitcast float* %28 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %32, <8 x float>* %33, i32 4, <8 x i1> %23), !tbaa !12, !alias.scope !28, !noalias !31, !llvm.access.group !22 - %vec.ind.next = add <8 x i64> %vec.ind, - %34 = add nuw nsw <8 x i64> %vec.ind.next, %broadcast.splat - %35 = trunc <8 x i64> %34 to <8 x i32> - %36 = icmp slt <8 x i32> %35, - %37 = extractelement <8 x i32> %35, i32 0 - %38 = mul nsw i32 %16, %37 - %39 = add nsw i32 %sub.i.i, %38 - %40 = sext i32 %39 to i64 - %41 = getelementptr inbounds float, float* %12, i64 %40 - %42 = bitcast float* %41 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %42, i32 4, <8 x i1> %36, <8 x float> undef), !tbaa !12, !alias.scope !28, !noalias !31 - %43 = getelementptr inbounds float, float* %8, i64 %40 - %44 = bitcast float* %43 to <8 x float>* - %wide.masked.load8.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %44, i32 4, <8 x i1> %36, <8 x float> undef), !tbaa !12, !alias.scope !31 - %45 = fdiv <8 x float> %wide.masked.load.1, %wide.masked.load8.1, !fpmath !21 - %46 = bitcast float* %41 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %45, <8 x float>* %46, i32 4, <8 x i1> %36), !tbaa !12, !alias.scope !28, !noalias !31, !llvm.access.group !22 - %index.next.1 = add nuw nsw i64 %index, 16 - %vec.ind.next.1 = add <8 x i64> %vec.ind, - %47 = icmp eq i64 %index.next.1, 256 - br i1 %47, label %_pocl_kernel_adi_kernel2.exit.loopexit10, label %vector.body, !llvm.loop !33 - -pregion_for_entry.entry.i.i: ; preds = %if.end.r_exit.i.i.1, %pregion_for_entry.entry.i.i.preheader - %_local_id_x.i.0 = phi i64 [ 0, %pregion_for_entry.entry.i.i.preheader ], [ %53, %if.end.r_exit.i.i.1 ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %conv.i.i = trunc i64 %add1.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %16, %conv.i.i - br i1 %cmp.i.i, label %if.then.i.i, label %if.end.r_exit.i.i - -if.then.i.i: ; preds = %pregion_for_entry.entry.i.i - %mul.i.i = mul nsw i32 %16, %conv.i.i - %add.i.i = add nsw i32 %sub.i.i, %mul.i.i - %idxprom.i.i = sext i32 %add.i.i to i64 - %arrayidx.i.i = getelementptr inbounds float, float* %12, i64 %idxprom.i.i - %48 = load float, float* %arrayidx.i.i, align 4, !tbaa !12 - %arrayidx6.i.i = getelementptr inbounds float, float* %8, i64 %idxprom.i.i - %49 = load float, float* %arrayidx6.i.i, align 4, !tbaa !12 - %div.i.i = fdiv float %48, %49, !fpmath !21 - store float %div.i.i, float* %arrayidx.i.i, align 4, !tbaa !12, !llvm.access.group !22 - br label %if.end.r_exit.i.i - -if.end.r_exit.i.i: ; preds = %if.then.i.i, %pregion_for_entry.entry.i.i - %50 = or i64 %_local_id_x.i.0, 1 - %add1.i.i.i.1 = add nuw nsw i64 %50, %mul.i.i.i - %conv.i.i.1 = trunc i64 %add1.i.i.i.1 to i32 - %cmp.i.i.1 = icmp sgt i32 %16, %conv.i.i.1 - br i1 %cmp.i.i.1, label %if.then.i.i.1, label %if.end.r_exit.i.i.1 - -_pocl_kernel_adi_kernel2.exit.loopexit: ; preds = %if.end.r_exit.i.i.1 - br label %_pocl_kernel_adi_kernel2.exit - -_pocl_kernel_adi_kernel2.exit.loopexit10: ; preds = %vector.body - br label %_pocl_kernel_adi_kernel2.exit - -_pocl_kernel_adi_kernel2.exit: ; preds = %_pocl_kernel_adi_kernel2.exit.loopexit10, %_pocl_kernel_adi_kernel2.exit.loopexit - ret void - -if.then.i.i.1: ; preds = %if.end.r_exit.i.i - %mul.i.i.1 = mul nsw i32 %16, %conv.i.i.1 - %add.i.i.1 = add nsw i32 %sub.i.i, %mul.i.i.1 - %idxprom.i.i.1 = sext i32 %add.i.i.1 to i64 - %arrayidx.i.i.1 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.1 - %51 = load float, float* %arrayidx.i.i.1, align 4, !tbaa !12 - %arrayidx6.i.i.1 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.1 - %52 = load float, float* %arrayidx6.i.i.1, align 4, !tbaa !12 - %div.i.i.1 = fdiv float %51, %52, !fpmath !21 - store float %div.i.i.1, float* %arrayidx.i.i.1, align 4, !tbaa !12, !llvm.access.group !22 - br label %if.end.r_exit.i.i.1 - -if.end.r_exit.i.i.1: ; preds = %if.then.i.i.1, %if.end.r_exit.i.i - %53 = add nuw nsw i64 %_local_id_x.i.0, 2 - %exitcond.not.1 = icmp eq i64 %53, 256 - br i1 %exitcond.not.1, label %_pocl_kernel_adi_kernel2.exit.loopexit, label %pregion_for_entry.entry.i.i, !llvm.loop !34 -} - -; Function Attrs: nofree norecurse nounwind -define void @_pocl_kernel_adi_kernel2_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { -vector.scevcheck: - %5 = getelementptr i8*, i8** %0, i64 1 - %6 = bitcast i8** %5 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 2 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 3 - %12 = bitcast i8** %11 to i32** - %13 = load i32*, i32** %12, align 8 - %14 = load i32, i32* %13, align 4 - %mul.i.i.i = shl i64 %2, 8 - %sub.i.i = add nsw i32 %14, -1 - %ident.check.not = icmp eq i32 %14, 1 - br i1 %ident.check.not, label %vector.memcheck, label %pregion_for_entry.entry.i.i.preheader - -vector.memcheck: ; preds = %vector.scevcheck - %15 = trunc i64 %2 to i32 - %16 = shl i32 %15, 8 - %17 = sext i32 %16 to i64 - %scevgep = getelementptr float, float* %10, i64 %17 - %18 = add nsw i64 %17, 256 - %scevgep2 = getelementptr float, float* %10, i64 %18 - %scevgep4 = getelementptr float, float* %7, i64 %17 - %scevgep6 = getelementptr float, float* %7, i64 %18 - %bound0 = icmp ult float* %scevgep, %scevgep6 - %bound1 = icmp ult float* %scevgep4, %scevgep2 - %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %pregion_for_entry.entry.i.i.preheader, label %vector.ph - -pregion_for_entry.entry.i.i.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next.1, %vector.body ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next.1, %vector.body ] - %19 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %20 = trunc <8 x i64> %19 to <8 x i32> - %21 = icmp slt <8 x i32> %20, - %22 = extractelement <8 x i32> %20, i32 0 - %23 = mul nsw i32 %14, %22 - %24 = add nsw i32 %sub.i.i, %23 - %25 = sext i32 %24 to i64 - %26 = getelementptr inbounds float, float* %10, i64 %25 - %27 = bitcast float* %26 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %27, i32 4, <8 x i1> %21, <8 x float> undef), !tbaa !12, !alias.scope !35, !noalias !38 - %28 = getelementptr inbounds float, float* %7, i64 %25 - %29 = bitcast float* %28 to <8 x float>* - %wide.masked.load8 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %29, i32 4, <8 x i1> %21, <8 x float> undef), !tbaa !12, !alias.scope !38 - %30 = fdiv <8 x float> %wide.masked.load, %wide.masked.load8, !fpmath !21 - %31 = bitcast float* %26 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %30, <8 x float>* %31, i32 4, <8 x i1> %21), !tbaa !12, !alias.scope !35, !noalias !38, !llvm.access.group !22 - %vec.ind.next = add <8 x i64> %vec.ind, - %32 = add nuw nsw <8 x i64> %vec.ind.next, %broadcast.splat - %33 = trunc <8 x i64> %32 to <8 x i32> - %34 = icmp slt <8 x i32> %33, - %35 = extractelement <8 x i32> %33, i32 0 - %36 = mul nsw i32 %14, %35 - %37 = add nsw i32 %sub.i.i, %36 - %38 = sext i32 %37 to i64 - %39 = getelementptr inbounds float, float* %10, i64 %38 - %40 = bitcast float* %39 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %40, i32 4, <8 x i1> %34, <8 x float> undef), !tbaa !12, !alias.scope !35, !noalias !38 - %41 = getelementptr inbounds float, float* %7, i64 %38 - %42 = bitcast float* %41 to <8 x float>* - %wide.masked.load8.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %42, i32 4, <8 x i1> %34, <8 x float> undef), !tbaa !12, !alias.scope !38 - %43 = fdiv <8 x float> %wide.masked.load.1, %wide.masked.load8.1, !fpmath !21 - %44 = bitcast float* %39 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %43, <8 x float>* %44, i32 4, <8 x i1> %34), !tbaa !12, !alias.scope !35, !noalias !38, !llvm.access.group !22 - %index.next.1 = add nuw nsw i64 %index, 16 - %vec.ind.next.1 = add <8 x i64> %vec.ind, - %45 = icmp eq i64 %index.next.1, 256 - br i1 %45, label %_pocl_kernel_adi_kernel2.exit.loopexit10, label %vector.body, !llvm.loop !40 - -pregion_for_entry.entry.i.i: ; preds = %if.end.r_exit.i.i.1, %pregion_for_entry.entry.i.i.preheader - %_local_id_x.i.0 = phi i64 [ 0, %pregion_for_entry.entry.i.i.preheader ], [ %51, %if.end.r_exit.i.i.1 ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %conv.i.i = trunc i64 %add1.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %14, %conv.i.i - br i1 %cmp.i.i, label %if.then.i.i, label %if.end.r_exit.i.i - -if.then.i.i: ; preds = %pregion_for_entry.entry.i.i - %mul.i.i = mul nsw i32 %14, %conv.i.i - %add.i.i = add nsw i32 %sub.i.i, %mul.i.i - %idxprom.i.i = sext i32 %add.i.i to i64 - %arrayidx.i.i = getelementptr inbounds float, float* %10, i64 %idxprom.i.i - %46 = load float, float* %arrayidx.i.i, align 4, !tbaa !12 - %arrayidx6.i.i = getelementptr inbounds float, float* %7, i64 %idxprom.i.i - %47 = load float, float* %arrayidx6.i.i, align 4, !tbaa !12 - %div.i.i = fdiv float %46, %47, !fpmath !21 - store float %div.i.i, float* %arrayidx.i.i, align 4, !tbaa !12, !llvm.access.group !22 - br label %if.end.r_exit.i.i - -if.end.r_exit.i.i: ; preds = %if.then.i.i, %pregion_for_entry.entry.i.i - %48 = or i64 %_local_id_x.i.0, 1 - %add1.i.i.i.1 = add nuw nsw i64 %48, %mul.i.i.i - %conv.i.i.1 = trunc i64 %add1.i.i.i.1 to i32 - %cmp.i.i.1 = icmp sgt i32 %14, %conv.i.i.1 - br i1 %cmp.i.i.1, label %if.then.i.i.1, label %if.end.r_exit.i.i.1 - -_pocl_kernel_adi_kernel2.exit.loopexit: ; preds = %if.end.r_exit.i.i.1 - br label %_pocl_kernel_adi_kernel2.exit - -_pocl_kernel_adi_kernel2.exit.loopexit10: ; preds = %vector.body - br label %_pocl_kernel_adi_kernel2.exit - -_pocl_kernel_adi_kernel2.exit: ; preds = %_pocl_kernel_adi_kernel2.exit.loopexit10, %_pocl_kernel_adi_kernel2.exit.loopexit - ret void - -if.then.i.i.1: ; preds = %if.end.r_exit.i.i - %mul.i.i.1 = mul nsw i32 %14, %conv.i.i.1 - %add.i.i.1 = add nsw i32 %sub.i.i, %mul.i.i.1 - %idxprom.i.i.1 = sext i32 %add.i.i.1 to i64 - %arrayidx.i.i.1 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.1 - %49 = load float, float* %arrayidx.i.i.1, align 4, !tbaa !12 - %arrayidx6.i.i.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.1 - %50 = load float, float* %arrayidx6.i.i.1, align 4, !tbaa !12 - %div.i.i.1 = fdiv float %49, %50, !fpmath !21 - store float %div.i.i.1, float* %arrayidx.i.i.1, align 4, !tbaa !12, !llvm.access.group !22 - br label %if.end.r_exit.i.i.1 - -if.end.r_exit.i.i.1: ; preds = %if.then.i.i.1, %if.end.r_exit.i.i - %51 = add nuw nsw i64 %_local_id_x.i.0, 2 - %exitcond.not.1 = icmp eq i64 %51, 256 - br i1 %exitcond.not.1, label %_pocl_kernel_adi_kernel2.exit.loopexit, label %pregion_for_entry.entry.i.i, !llvm.loop !41 -} - -; Function Attrs: argmemonly nounwind readonly willreturn -declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32 immarg, <8 x i1>, <8 x float>) #2 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>) #3 - -attributes #0 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nofree norecurse nounwind } -attributes #2 = { argmemonly nounwind readonly willreturn } -attributes #3 = { argmemonly nounwind willreturn } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0} -!6 = !{!"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"int"} -!9 = !{!"", !"", !"", !""} -!10 = !{!"A", !"B", !"X", !"n"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{!17, !18} -!18 = distinct !{!18, !"LVerDomain"} -!19 = !{!20} -!20 = distinct !{!20, !18} -!21 = !{float 2.500000e+00} -!22 = !{!23} -!23 = distinct !{} -!24 = distinct !{!24, !25, !26} -!25 = !{!"llvm.loop.parallel_accesses", !23} -!26 = !{!"llvm.loop.isvectorized", i32 1} -!27 = distinct !{!27, !25, !26} -!28 = !{!29} -!29 = distinct !{!29, !30} -!30 = distinct !{!30, !"LVerDomain"} -!31 = !{!32} -!32 = distinct !{!32, !30} -!33 = distinct !{!33, !25, !26} -!34 = distinct !{!34, !25, !26} -!35 = !{!36} -!36 = distinct !{!36, !37} -!37 = distinct !{!37, !"LVerDomain"} -!38 = !{!39} -!39 = distinct !{!39, !37} -!40 = distinct !{!40, !25, !26} -!41 = distinct !{!41, !25, !26} diff --git a/pocl_irs/adi_kernel3.ll b/pocl_irs/adi_kernel3.ll deleted file mode 100644 index e4a0072..0000000 --- a/pocl_irs/adi_kernel3.ll +++ /dev/null @@ -1,723 +0,0 @@ -; ModuleID = './BF/DFFLECFOLOBPKCKMNEPCKIANKJKKLLHBOGBCO/adi_kernel3/256-1-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: nounwind readnone speculatable willreturn -declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) #0 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <8 x float> @llvm.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_adi_kernel3(float* nocapture readonly %0, float* nocapture readonly %1, float* nocapture %2, i32 %3, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %4, i64 %5, i64 %6, i64 %7) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %5, 8 - %sub.i = add i32 %3, -2 - %cmp258.i = icmp slt i32 %3, 3 - %9 = sext i32 %sub.i to i64 - %wide.trip.count.i = zext i32 %sub.i to i64 - %min.iters.check.i = icmp ult i32 %sub.i, 8 - %10 = add nsw i64 %wide.trip.count.i, -1 - %11 = trunc i64 %10 to i32 - %12 = icmp ugt i64 %10, 4294967295 - %mul6.i = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %10, i64 4) #2 - %mul.result7.i = extractvalue { i64, i1 } %mul6.i, 0 - %mul.overflow8.i = extractvalue { i64, i1 } %mul6.i, 1 - %n.vec.i = and i64 %wide.trip.count.i, 4294967288 - %13 = getelementptr inbounds float, float* %2, i64 -7 - %14 = getelementptr inbounds float, float* %0, i64 -7 - %15 = getelementptr inbounds float, float* %1, i64 -7 - %cmp.n.i = icmp eq i64 %n.vec.i, %wide.trip.count.i - br label %pregion_for_entry.entry.i - -pregion_for_entry.entry.i: ; preds = %if.end.i, %8 - %_local_id_x.0 = phi i64 [ 0, %8 ], [ %83, %if.end.i ] - %add1.i.i = add nuw nsw i64 %_local_id_x.0, %mul.i.i - %conv.i = trunc i64 %add1.i.i to i32 - %cmp.i = icmp sge i32 %conv.i, %3 - %brmerge = or i1 %cmp.i, %cmp258.i - br i1 %brmerge, label %if.end.i, label %for.body.lr.ph.i - -for.body.lr.ph.i: ; preds = %pregion_for_entry.entry.i - %mul.i = mul nsw i32 %conv.i, %3 - %sub21.i = add i32 %mul.i, %3 - %sub22.i = add i32 %sub21.i, -3 - %16 = sext i32 %mul.i to i64 - %sub9.i = add i32 %mul.i, -1 - br i1 %min.iters.check.i, label %for.body.i.preheader, label %vector.scevcheck.i - -vector.scevcheck.i: ; preds = %for.body.lr.ph.i - %17 = add i32 %sub.i, %mul.i - %18 = sub i32 %17, %11 - %19 = icmp sgt i32 %18, %17 - %20 = sub i32 %sub22.i, %11 - %21 = icmp sgt i32 %20, %sub22.i - %22 = or i1 %12, %21 - %23 = or i1 %22, %19 - %24 = sext i32 %17 to i64 - %scevgep.i = getelementptr float, float* %2, i64 %24 - %scevgep5.i = ptrtoint float* %scevgep.i to i64 - %25 = icmp ugt i64 %mul.result7.i, %scevgep5.i - %26 = or i1 %mul.overflow8.i, %25 - %27 = or i1 %23, %26 - %28 = add nsw i64 %16, %9 - %scevgep9.i = getelementptr float, float* %2, i64 %28 - %scevgep910.i = ptrtoint float* %scevgep9.i to i64 - %29 = icmp ugt i64 %mul.result7.i, %scevgep910.i - %30 = or i1 %29, %27 - %31 = sext i32 %sub22.i to i64 - %scevgep14.i = getelementptr float, float* %2, i64 %31 - %scevgep1415.i = ptrtoint float* %scevgep14.i to i64 - %32 = icmp ugt i64 %mul.result7.i, %scevgep1415.i - %33 = or i1 %32, %30 - %scevgep19.i = getelementptr float, float* %0, i64 %31 - %scevgep1920.i = ptrtoint float* %scevgep19.i to i64 - %34 = icmp ugt i64 %mul.result7.i, %scevgep1920.i - %35 = or i1 %mul.overflow8.i, %34 - %36 = or i1 %35, %33 - %scevgep24.i = getelementptr float, float* %1, i64 %31 - %scevgep2425.i = ptrtoint float* %scevgep24.i to i64 - %37 = icmp ugt i64 %mul.result7.i, %scevgep2425.i - %38 = or i1 %37, %36 - br i1 %38, label %for.body.i.preheader, label %vector.memcheck.i - -vector.memcheck.i: ; preds = %vector.scevcheck.i - %39 = add nsw i64 %24, 1 - %40 = sub nsw i64 %39, %wide.trip.count.i - %scevgep29.i = getelementptr float, float* %2, i64 %40 - %scevgep31.i = getelementptr float, float* %2, i64 %39 - %41 = add nsw i64 %28, 1 - %42 = sub nsw i64 %41, %wide.trip.count.i - %scevgep33.i = getelementptr float, float* %2, i64 %42 - %scevgep35.i = getelementptr float, float* %2, i64 %41 - %43 = add nsw i64 %31, 1 - %44 = sub nsw i64 %43, %wide.trip.count.i - %scevgep37.i = getelementptr float, float* %2, i64 %44 - %scevgep39.i = getelementptr float, float* %2, i64 %43 - %scevgep41.i = getelementptr float, float* %0, i64 %44 - %scevgep43.i = getelementptr float, float* %0, i64 %43 - %scevgep45.i = getelementptr float, float* %1, i64 %44 - %scevgep47.i = getelementptr float, float* %1, i64 %43 - %bound0.i = icmp ult float* %scevgep29.i, %scevgep35.i - %bound1.i = icmp ult float* %scevgep33.i, %scevgep31.i - %found.conflict.i = and i1 %bound0.i, %bound1.i - %bound049.i = icmp ult float* %scevgep29.i, %scevgep39.i - %bound150.i = icmp ult float* %scevgep37.i, %scevgep31.i - %found.conflict51.i = and i1 %bound150.i, %bound049.i - %conflict.rdx.i = or i1 %found.conflict.i, %found.conflict51.i - %bound052.i = icmp ult float* %scevgep29.i, %scevgep43.i - %bound153.i = icmp ult float* %scevgep41.i, %scevgep31.i - %found.conflict54.i = and i1 %bound153.i, %bound052.i - %conflict.rdx55.i = or i1 %found.conflict54.i, %conflict.rdx.i - %bound056.i = icmp ult float* %scevgep29.i, %scevgep47.i - %bound157.i = icmp ult float* %scevgep45.i, %scevgep31.i - %found.conflict58.i = and i1 %bound157.i, %bound056.i - %conflict.rdx59.i = or i1 %found.conflict58.i, %conflict.rdx55.i - br i1 %conflict.rdx59.i, label %for.body.i.preheader, label %vector.body.i.preheader - -vector.body.i.preheader: ; preds = %vector.memcheck.i - br label %vector.body.i - -vector.body.i: ; preds = %vector.body.i, %vector.body.i.preheader - %index.next.i1 = phi i64 [ %index.next.i, %vector.body.i ], [ 0, %vector.body.i.preheader ] - %45 = sub nsw i64 %9, %index.next.i1 - %46 = add nsw i64 %45, %16 - %47 = getelementptr inbounds float, float* %13, i64 %46 - %48 = bitcast float* %47 to <8 x float>* - %wide.load.i = load <8 x float>, <8 x float>* %48, align 4, !tbaa !12, !alias.scope !16 - %reverse.i = shufflevector <8 x float> %wide.load.i, <8 x float> undef, <8 x i32> - %49 = trunc i64 %45 to i32 - %50 = add i32 %sub9.i, %49 - %51 = sext i32 %50 to i64 - %52 = getelementptr inbounds float, float* %13, i64 %51 - %53 = bitcast float* %52 to <8 x float>* - %wide.load60.i = load <8 x float>, <8 x float>* %53, align 4, !tbaa !12, !alias.scope !19 - %reverse61.i = shufflevector <8 x float> %wide.load60.i, <8 x float> undef, <8 x i32> - %54 = trunc i64 %index.next.i1 to i32 - %55 = sub i32 %3, %54 - %56 = add i32 %55, %mul.i - %57 = add i32 %56, -3 - %58 = sext i32 %57 to i64 - %59 = getelementptr inbounds float, float* %14, i64 %58 - %60 = bitcast float* %59 to <8 x float>* - %wide.load62.i = load <8 x float>, <8 x float>* %60, align 4, !tbaa !12, !alias.scope !21 - %reverse63.i = shufflevector <8 x float> %wide.load62.i, <8 x float> undef, <8 x i32> - %61 = fneg <8 x float> %reverse61.i - %62 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %61, <8 x float> %reverse63.i, <8 x float> %reverse.i) #2 - %63 = sub i32 %sub22.i, %54 - %64 = sext i32 %63 to i64 - %65 = getelementptr inbounds float, float* %15, i64 %64 - %66 = bitcast float* %65 to <8 x float>* - %wide.load64.i = load <8 x float>, <8 x float>* %66, align 4, !tbaa !12, !alias.scope !23 - %reverse65.i = shufflevector <8 x float> %wide.load64.i, <8 x float> undef, <8 x i32> - %67 = fdiv <8 x float> %62, %reverse65.i, !fpmath !25 - %68 = add i32 %56, -2 - %69 = sext i32 %68 to i64 - %reverse66.i = shufflevector <8 x float> %67, <8 x float> undef, <8 x i32> - %70 = getelementptr inbounds float, float* %13, i64 %69 - %71 = bitcast float* %70 to <8 x float>* - store <8 x float> %reverse66.i, <8 x float>* %71, align 4, !tbaa !12, !alias.scope !26, !noalias !28, !llvm.access.group !29 - %index.next.i = add i64 %index.next.i1, 8 - %72 = icmp eq i64 %index.next.i, %n.vec.i - br i1 %72, label %middle.block.i, label %vector.body.i, !llvm.loop !31 - -middle.block.i: ; preds = %vector.body.i - br i1 %cmp.n.i, label %if.end.i, label %for.body.i.preheader - -for.body.i.preheader: ; preds = %middle.block.i, %vector.memcheck.i, %vector.scevcheck.i, %for.body.lr.ph.i - %indvars.iv.next.i3.ph = phi i64 [ 0, %for.body.lr.ph.i ], [ 0, %vector.scevcheck.i ], [ 0, %vector.memcheck.i ], [ %n.vec.i, %middle.block.i ] - br label %for.body.i - -for.body.i: ; preds = %for.body.i, %for.body.i.preheader - %indvars.iv.next.i3 = phi i64 [ %indvars.iv.next.i, %for.body.i ], [ %indvars.iv.next.i3.ph, %for.body.i.preheader ] - %73 = sub nsw i64 %9, %indvars.iv.next.i3 - %74 = add nsw i64 %73, %16 - %arrayidx.i = getelementptr inbounds float, float* %2, i64 %74 - %75 = load float, float* %arrayidx.i, align 4, !tbaa !12 - %76 = trunc i64 %73 to i32 - %add10.i = add i32 %sub9.i, %76 - %idxprom11.i = sext i32 %add10.i to i64 - %arrayidx12.i = getelementptr inbounds float, float* %2, i64 %idxprom11.i - %77 = load float, float* %arrayidx12.i, align 4, !tbaa !12 - %78 = trunc i64 %indvars.iv.next.i3 to i32 - %79 = sub i32 %3, %78 - %sub15.i = add i32 %79, %mul.i - %add16.i = add i32 %sub15.i, -3 - %idxprom17.i = sext i32 %add16.i to i64 - %arrayidx18.i = getelementptr inbounds float, float* %0, i64 %idxprom17.i - %80 = load float, float* %arrayidx18.i, align 4, !tbaa !12 - %neg.i = fneg float %77 - %81 = tail call float @llvm.fmuladd.f32(float %neg.i, float %80, float %75) #2 - %add23.i = sub i32 %sub22.i, %78 - %idxprom24.i = sext i32 %add23.i to i64 - %arrayidx25.i = getelementptr inbounds float, float* %1, i64 %idxprom24.i - %82 = load float, float* %arrayidx25.i, align 4, !tbaa !12 - %div.i = fdiv float %81, %82, !fpmath !25 - %add29.i = add i32 %sub15.i, -2 - %idxprom30.i = sext i32 %add29.i to i64 - %arrayidx31.i = getelementptr inbounds float, float* %2, i64 %idxprom30.i - store float %div.i, float* %arrayidx31.i, align 4, !tbaa !12, !llvm.access.group !29 - %indvars.iv.next.i = add nuw nsw i64 %indvars.iv.next.i3, 1 - %exitcond.not.i = icmp eq i64 %indvars.iv.next.i, %wide.trip.count.i - br i1 %exitcond.not.i, label %if.end.i.loopexit, label %for.body.i, !llvm.loop !34 - -if.end.i.loopexit: ; preds = %for.body.i - br label %if.end.i - -if.end.i: ; preds = %if.end.i.loopexit, %middle.block.i, %pregion_for_entry.entry.i - %83 = add nuw nsw i64 %_local_id_x.0, 1 - %exitcond.not = icmp eq i64 %83, 256 - br i1 %exitcond.not, label %adi_kernel3.exit, label %pregion_for_entry.entry.i, !llvm.loop !35 - -adi_kernel3.exit: ; preds = %if.end.i - ret void -} - -; Function Attrs: nounwind -define void @_pocl_kernel_adi_kernel3_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to float*** - %15 = load float**, float*** %14, align 8 - %16 = load float*, float** %15, align 8 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %mul.i.i.i = shl i64 %2, 8 - %sub.i.i = add i32 %20, -2 - %cmp258.i.i = icmp slt i32 %20, 3 - %21 = sext i32 %sub.i.i to i64 - %wide.trip.count.i.i = zext i32 %sub.i.i to i64 - %min.iters.check.i.i = icmp ult i32 %sub.i.i, 8 - %22 = add nsw i64 %wide.trip.count.i.i, -1 - %23 = trunc i64 %22 to i32 - %24 = icmp ugt i64 %22, 4294967295 - %mul6.i.i = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %22, i64 4) #2 - %mul.result7.i.i = extractvalue { i64, i1 } %mul6.i.i, 0 - %mul.overflow8.i.i = extractvalue { i64, i1 } %mul6.i.i, 1 - %n.vec.i.i = and i64 %wide.trip.count.i.i, 4294967288 - %25 = getelementptr inbounds float, float* %16, i64 -7 - %26 = getelementptr inbounds float, float* %8, i64 -7 - %27 = getelementptr inbounds float, float* %12, i64 -7 - %cmp.n.i.i = icmp eq i64 %n.vec.i.i, %wide.trip.count.i.i - br label %pregion_for_entry.entry.i.i - -pregion_for_entry.entry.i.i: ; preds = %if.end.i.i, %5 - %_local_id_x.i.0 = phi i64 [ 0, %5 ], [ %95, %if.end.i.i ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %conv.i.i = trunc i64 %add1.i.i.i to i32 - %cmp.i.i = icmp sle i32 %20, %conv.i.i - %brmerge = or i1 %cmp.i.i, %cmp258.i.i - br i1 %brmerge, label %if.end.i.i, label %for.body.lr.ph.i.i - -for.body.lr.ph.i.i: ; preds = %pregion_for_entry.entry.i.i - %mul.i.i = mul nsw i32 %20, %conv.i.i - %sub21.i.i = add i32 %mul.i.i, %20 - %sub22.i.i = add i32 %sub21.i.i, -3 - %28 = sext i32 %mul.i.i to i64 - %sub9.i.i = add i32 %mul.i.i, -1 - br i1 %min.iters.check.i.i, label %for.body.i.i.preheader, label %vector.scevcheck.i.i - -vector.scevcheck.i.i: ; preds = %for.body.lr.ph.i.i - %29 = add i32 %sub.i.i, %mul.i.i - %30 = sub i32 %29, %23 - %31 = icmp sgt i32 %30, %29 - %32 = sub i32 %sub22.i.i, %23 - %33 = icmp sgt i32 %32, %sub22.i.i - %34 = or i1 %24, %33 - %35 = or i1 %34, %31 - %36 = sext i32 %29 to i64 - %scevgep.i.i = getelementptr float, float* %16, i64 %36 - %scevgep5.i.i = ptrtoint float* %scevgep.i.i to i64 - %37 = icmp ugt i64 %mul.result7.i.i, %scevgep5.i.i - %38 = or i1 %mul.overflow8.i.i, %37 - %39 = or i1 %35, %38 - %40 = add nsw i64 %28, %21 - %scevgep9.i.i = getelementptr float, float* %16, i64 %40 - %scevgep910.i.i = ptrtoint float* %scevgep9.i.i to i64 - %41 = icmp ugt i64 %mul.result7.i.i, %scevgep910.i.i - %42 = or i1 %41, %39 - %43 = sext i32 %sub22.i.i to i64 - %scevgep14.i.i = getelementptr float, float* %16, i64 %43 - %scevgep1415.i.i = ptrtoint float* %scevgep14.i.i to i64 - %44 = icmp ugt i64 %mul.result7.i.i, %scevgep1415.i.i - %45 = or i1 %44, %42 - %scevgep19.i.i = getelementptr float, float* %8, i64 %43 - %scevgep1920.i.i = ptrtoint float* %scevgep19.i.i to i64 - %46 = icmp ugt i64 %mul.result7.i.i, %scevgep1920.i.i - %47 = or i1 %mul.overflow8.i.i, %46 - %48 = or i1 %47, %45 - %scevgep24.i.i = getelementptr float, float* %12, i64 %43 - %scevgep2425.i.i = ptrtoint float* %scevgep24.i.i to i64 - %49 = icmp ugt i64 %mul.result7.i.i, %scevgep2425.i.i - %50 = or i1 %49, %48 - br i1 %50, label %for.body.i.i.preheader, label %vector.memcheck.i.i - -vector.memcheck.i.i: ; preds = %vector.scevcheck.i.i - %51 = add nsw i64 %36, 1 - %52 = sub nsw i64 %51, %wide.trip.count.i.i - %scevgep29.i.i = getelementptr float, float* %16, i64 %52 - %scevgep31.i.i = getelementptr float, float* %16, i64 %51 - %53 = add nsw i64 %40, 1 - %54 = sub nsw i64 %53, %wide.trip.count.i.i - %scevgep33.i.i = getelementptr float, float* %16, i64 %54 - %scevgep35.i.i = getelementptr float, float* %16, i64 %53 - %55 = add nsw i64 %43, 1 - %56 = sub nsw i64 %55, %wide.trip.count.i.i - %scevgep37.i.i = getelementptr float, float* %16, i64 %56 - %scevgep39.i.i = getelementptr float, float* %16, i64 %55 - %scevgep41.i.i = getelementptr float, float* %8, i64 %56 - %scevgep43.i.i = getelementptr float, float* %8, i64 %55 - %scevgep45.i.i = getelementptr float, float* %12, i64 %56 - %scevgep47.i.i = getelementptr float, float* %12, i64 %55 - %bound0.i.i = icmp ult float* %scevgep29.i.i, %scevgep35.i.i - %bound1.i.i = icmp ult float* %scevgep33.i.i, %scevgep31.i.i - %found.conflict.i.i = and i1 %bound0.i.i, %bound1.i.i - %bound049.i.i = icmp ult float* %scevgep29.i.i, %scevgep39.i.i - %bound150.i.i = icmp ult float* %scevgep37.i.i, %scevgep31.i.i - %found.conflict51.i.i = and i1 %bound150.i.i, %bound049.i.i - %conflict.rdx.i.i = or i1 %found.conflict.i.i, %found.conflict51.i.i - %bound052.i.i = icmp ult float* %scevgep29.i.i, %scevgep43.i.i - %bound153.i.i = icmp ult float* %scevgep41.i.i, %scevgep31.i.i - %found.conflict54.i.i = and i1 %bound153.i.i, %bound052.i.i - %conflict.rdx55.i.i = or i1 %found.conflict54.i.i, %conflict.rdx.i.i - %bound056.i.i = icmp ult float* %scevgep29.i.i, %scevgep47.i.i - %bound157.i.i = icmp ult float* %scevgep45.i.i, %scevgep31.i.i - %found.conflict58.i.i = and i1 %bound157.i.i, %bound056.i.i - %conflict.rdx59.i.i = or i1 %found.conflict58.i.i, %conflict.rdx55.i.i - br i1 %conflict.rdx59.i.i, label %for.body.i.i.preheader, label %vector.body.i.i.preheader - -vector.body.i.i.preheader: ; preds = %vector.memcheck.i.i - br label %vector.body.i.i - -vector.body.i.i: ; preds = %vector.body.i.i, %vector.body.i.i.preheader - %index.next.i.i1 = phi i64 [ %index.next.i.i, %vector.body.i.i ], [ 0, %vector.body.i.i.preheader ] - %57 = sub nsw i64 %21, %index.next.i.i1 - %58 = add nsw i64 %57, %28 - %59 = getelementptr inbounds float, float* %25, i64 %58 - %60 = bitcast float* %59 to <8 x float>* - %wide.load.i.i = load <8 x float>, <8 x float>* %60, align 4, !tbaa !12, !alias.scope !37 - %reverse.i.i = shufflevector <8 x float> %wide.load.i.i, <8 x float> undef, <8 x i32> - %61 = trunc i64 %57 to i32 - %62 = add i32 %sub9.i.i, %61 - %63 = sext i32 %62 to i64 - %64 = getelementptr inbounds float, float* %25, i64 %63 - %65 = bitcast float* %64 to <8 x float>* - %wide.load60.i.i = load <8 x float>, <8 x float>* %65, align 4, !tbaa !12, !alias.scope !40 - %reverse61.i.i = shufflevector <8 x float> %wide.load60.i.i, <8 x float> undef, <8 x i32> - %66 = trunc i64 %index.next.i.i1 to i32 - %67 = sub i32 %20, %66 - %68 = add i32 %67, %mul.i.i - %69 = add i32 %68, -3 - %70 = sext i32 %69 to i64 - %71 = getelementptr inbounds float, float* %26, i64 %70 - %72 = bitcast float* %71 to <8 x float>* - %wide.load62.i.i = load <8 x float>, <8 x float>* %72, align 4, !tbaa !12, !alias.scope !42 - %reverse63.i.i = shufflevector <8 x float> %wide.load62.i.i, <8 x float> undef, <8 x i32> - %73 = fneg <8 x float> %reverse61.i.i - %74 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %73, <8 x float> %reverse63.i.i, <8 x float> %reverse.i.i) #2 - %75 = sub i32 %sub22.i.i, %66 - %76 = sext i32 %75 to i64 - %77 = getelementptr inbounds float, float* %27, i64 %76 - %78 = bitcast float* %77 to <8 x float>* - %wide.load64.i.i = load <8 x float>, <8 x float>* %78, align 4, !tbaa !12, !alias.scope !44 - %reverse65.i.i = shufflevector <8 x float> %wide.load64.i.i, <8 x float> undef, <8 x i32> - %79 = fdiv <8 x float> %74, %reverse65.i.i, !fpmath !25 - %80 = add i32 %68, -2 - %81 = sext i32 %80 to i64 - %reverse66.i.i = shufflevector <8 x float> %79, <8 x float> undef, <8 x i32> - %82 = getelementptr inbounds float, float* %25, i64 %81 - %83 = bitcast float* %82 to <8 x float>* - store <8 x float> %reverse66.i.i, <8 x float>* %83, align 4, !tbaa !12, !alias.scope !46, !noalias !48, !llvm.access.group !29 - %index.next.i.i = add i64 %index.next.i.i1, 8 - %84 = icmp eq i64 %index.next.i.i, %n.vec.i.i - br i1 %84, label %middle.block.i.i, label %vector.body.i.i, !llvm.loop !31 - -middle.block.i.i: ; preds = %vector.body.i.i - br i1 %cmp.n.i.i, label %if.end.i.i, label %for.body.i.i.preheader - -for.body.i.i.preheader: ; preds = %middle.block.i.i, %vector.memcheck.i.i, %vector.scevcheck.i.i, %for.body.lr.ph.i.i - %indvars.iv.next.i.i3.ph = phi i64 [ 0, %for.body.lr.ph.i.i ], [ 0, %vector.scevcheck.i.i ], [ 0, %vector.memcheck.i.i ], [ %n.vec.i.i, %middle.block.i.i ] - br label %for.body.i.i - -for.body.i.i: ; preds = %for.body.i.i, %for.body.i.i.preheader - %indvars.iv.next.i.i3 = phi i64 [ %indvars.iv.next.i.i, %for.body.i.i ], [ %indvars.iv.next.i.i3.ph, %for.body.i.i.preheader ] - %85 = sub nsw i64 %21, %indvars.iv.next.i.i3 - %86 = add nsw i64 %85, %28 - %arrayidx.i.i = getelementptr inbounds float, float* %16, i64 %86 - %87 = load float, float* %arrayidx.i.i, align 4, !tbaa !12 - %88 = trunc i64 %85 to i32 - %add10.i.i = add i32 %sub9.i.i, %88 - %idxprom11.i.i = sext i32 %add10.i.i to i64 - %arrayidx12.i.i = getelementptr inbounds float, float* %16, i64 %idxprom11.i.i - %89 = load float, float* %arrayidx12.i.i, align 4, !tbaa !12 - %90 = trunc i64 %indvars.iv.next.i.i3 to i32 - %91 = sub i32 %20, %90 - %sub15.i.i = add i32 %91, %mul.i.i - %add16.i.i = add i32 %sub15.i.i, -3 - %idxprom17.i.i = sext i32 %add16.i.i to i64 - %arrayidx18.i.i = getelementptr inbounds float, float* %8, i64 %idxprom17.i.i - %92 = load float, float* %arrayidx18.i.i, align 4, !tbaa !12 - %neg.i.i = fneg float %89 - %93 = tail call float @llvm.fmuladd.f32(float %neg.i.i, float %92, float %87) #2 - %add23.i.i = sub i32 %sub22.i.i, %90 - %idxprom24.i.i = sext i32 %add23.i.i to i64 - %arrayidx25.i.i = getelementptr inbounds float, float* %12, i64 %idxprom24.i.i - %94 = load float, float* %arrayidx25.i.i, align 4, !tbaa !12 - %div.i.i = fdiv float %93, %94, !fpmath !25 - %add29.i.i = add i32 %sub15.i.i, -2 - %idxprom30.i.i = sext i32 %add29.i.i to i64 - %arrayidx31.i.i = getelementptr inbounds float, float* %16, i64 %idxprom30.i.i - store float %div.i.i, float* %arrayidx31.i.i, align 4, !tbaa !12, !llvm.access.group !29 - %indvars.iv.next.i.i = add nuw nsw i64 %indvars.iv.next.i.i3, 1 - %exitcond.not.i.i = icmp eq i64 %indvars.iv.next.i.i, %wide.trip.count.i.i - br i1 %exitcond.not.i.i, label %if.end.i.i.loopexit, label %for.body.i.i, !llvm.loop !34 - -if.end.i.i.loopexit: ; preds = %for.body.i.i - br label %if.end.i.i - -if.end.i.i: ; preds = %if.end.i.i.loopexit, %middle.block.i.i, %pregion_for_entry.entry.i.i - %95 = add nuw nsw i64 %_local_id_x.i.0, 1 - %exitcond.not = icmp eq i64 %95, 256 - br i1 %exitcond.not, label %_pocl_kernel_adi_kernel3.exit, label %pregion_for_entry.entry.i.i, !llvm.loop !35 - -_pocl_kernel_adi_kernel3.exit: ; preds = %if.end.i.i - ret void -} - -; Function Attrs: nounwind -define void @_pocl_kernel_adi_kernel3_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to float** - %13 = load float*, float** %12, align 8 - %14 = getelementptr i8*, i8** %0, i64 3 - %15 = bitcast i8** %14 to i32** - %16 = load i32*, i32** %15, align 8 - %17 = load i32, i32* %16, align 4 - %mul.i.i.i = shl i64 %2, 8 - %sub.i.i = add i32 %17, -2 - %cmp258.i.i = icmp slt i32 %17, 3 - %18 = sext i32 %sub.i.i to i64 - %wide.trip.count.i.i = zext i32 %sub.i.i to i64 - %min.iters.check.i.i = icmp ult i32 %sub.i.i, 8 - %19 = add nsw i64 %wide.trip.count.i.i, -1 - %20 = trunc i64 %19 to i32 - %21 = icmp ugt i64 %19, 4294967295 - %mul6.i.i = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %19, i64 4) #2 - %mul.result7.i.i = extractvalue { i64, i1 } %mul6.i.i, 0 - %mul.overflow8.i.i = extractvalue { i64, i1 } %mul6.i.i, 1 - %n.vec.i.i = and i64 %wide.trip.count.i.i, 4294967288 - %22 = getelementptr inbounds float, float* %13, i64 -7 - %23 = getelementptr inbounds float, float* %7, i64 -7 - %24 = getelementptr inbounds float, float* %10, i64 -7 - %cmp.n.i.i = icmp eq i64 %n.vec.i.i, %wide.trip.count.i.i - br label %pregion_for_entry.entry.i.i - -pregion_for_entry.entry.i.i: ; preds = %if.end.i.i, %5 - %_local_id_x.i.0 = phi i64 [ 0, %5 ], [ %92, %if.end.i.i ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %conv.i.i = trunc i64 %add1.i.i.i to i32 - %cmp.i.i = icmp sle i32 %17, %conv.i.i - %brmerge = or i1 %cmp.i.i, %cmp258.i.i - br i1 %brmerge, label %if.end.i.i, label %for.body.lr.ph.i.i - -for.body.lr.ph.i.i: ; preds = %pregion_for_entry.entry.i.i - %mul.i.i = mul nsw i32 %17, %conv.i.i - %sub21.i.i = add i32 %mul.i.i, %17 - %sub22.i.i = add i32 %sub21.i.i, -3 - %25 = sext i32 %mul.i.i to i64 - %sub9.i.i = add i32 %mul.i.i, -1 - br i1 %min.iters.check.i.i, label %for.body.i.i.preheader, label %vector.scevcheck.i.i - -vector.scevcheck.i.i: ; preds = %for.body.lr.ph.i.i - %26 = add i32 %sub.i.i, %mul.i.i - %27 = sub i32 %26, %20 - %28 = icmp sgt i32 %27, %26 - %29 = sub i32 %sub22.i.i, %20 - %30 = icmp sgt i32 %29, %sub22.i.i - %31 = or i1 %21, %30 - %32 = or i1 %31, %28 - %33 = sext i32 %26 to i64 - %scevgep.i.i = getelementptr float, float* %13, i64 %33 - %scevgep5.i.i = ptrtoint float* %scevgep.i.i to i64 - %34 = icmp ugt i64 %mul.result7.i.i, %scevgep5.i.i - %35 = or i1 %mul.overflow8.i.i, %34 - %36 = or i1 %32, %35 - %37 = add nsw i64 %25, %18 - %scevgep9.i.i = getelementptr float, float* %13, i64 %37 - %scevgep910.i.i = ptrtoint float* %scevgep9.i.i to i64 - %38 = icmp ugt i64 %mul.result7.i.i, %scevgep910.i.i - %39 = or i1 %38, %36 - %40 = sext i32 %sub22.i.i to i64 - %scevgep14.i.i = getelementptr float, float* %13, i64 %40 - %scevgep1415.i.i = ptrtoint float* %scevgep14.i.i to i64 - %41 = icmp ugt i64 %mul.result7.i.i, %scevgep1415.i.i - %42 = or i1 %41, %39 - %scevgep19.i.i = getelementptr float, float* %7, i64 %40 - %scevgep1920.i.i = ptrtoint float* %scevgep19.i.i to i64 - %43 = icmp ugt i64 %mul.result7.i.i, %scevgep1920.i.i - %44 = or i1 %mul.overflow8.i.i, %43 - %45 = or i1 %44, %42 - %scevgep24.i.i = getelementptr float, float* %10, i64 %40 - %scevgep2425.i.i = ptrtoint float* %scevgep24.i.i to i64 - %46 = icmp ugt i64 %mul.result7.i.i, %scevgep2425.i.i - %47 = or i1 %46, %45 - br i1 %47, label %for.body.i.i.preheader, label %vector.memcheck.i.i - -vector.memcheck.i.i: ; preds = %vector.scevcheck.i.i - %48 = add nsw i64 %33, 1 - %49 = sub nsw i64 %48, %wide.trip.count.i.i - %scevgep29.i.i = getelementptr float, float* %13, i64 %49 - %scevgep31.i.i = getelementptr float, float* %13, i64 %48 - %50 = add nsw i64 %37, 1 - %51 = sub nsw i64 %50, %wide.trip.count.i.i - %scevgep33.i.i = getelementptr float, float* %13, i64 %51 - %scevgep35.i.i = getelementptr float, float* %13, i64 %50 - %52 = add nsw i64 %40, 1 - %53 = sub nsw i64 %52, %wide.trip.count.i.i - %scevgep37.i.i = getelementptr float, float* %13, i64 %53 - %scevgep39.i.i = getelementptr float, float* %13, i64 %52 - %scevgep41.i.i = getelementptr float, float* %7, i64 %53 - %scevgep43.i.i = getelementptr float, float* %7, i64 %52 - %scevgep45.i.i = getelementptr float, float* %10, i64 %53 - %scevgep47.i.i = getelementptr float, float* %10, i64 %52 - %bound0.i.i = icmp ult float* %scevgep29.i.i, %scevgep35.i.i - %bound1.i.i = icmp ult float* %scevgep33.i.i, %scevgep31.i.i - %found.conflict.i.i = and i1 %bound0.i.i, %bound1.i.i - %bound049.i.i = icmp ult float* %scevgep29.i.i, %scevgep39.i.i - %bound150.i.i = icmp ult float* %scevgep37.i.i, %scevgep31.i.i - %found.conflict51.i.i = and i1 %bound150.i.i, %bound049.i.i - %conflict.rdx.i.i = or i1 %found.conflict.i.i, %found.conflict51.i.i - %bound052.i.i = icmp ult float* %scevgep29.i.i, %scevgep43.i.i - %bound153.i.i = icmp ult float* %scevgep41.i.i, %scevgep31.i.i - %found.conflict54.i.i = and i1 %bound153.i.i, %bound052.i.i - %conflict.rdx55.i.i = or i1 %found.conflict54.i.i, %conflict.rdx.i.i - %bound056.i.i = icmp ult float* %scevgep29.i.i, %scevgep47.i.i - %bound157.i.i = icmp ult float* %scevgep45.i.i, %scevgep31.i.i - %found.conflict58.i.i = and i1 %bound157.i.i, %bound056.i.i - %conflict.rdx59.i.i = or i1 %found.conflict58.i.i, %conflict.rdx55.i.i - br i1 %conflict.rdx59.i.i, label %for.body.i.i.preheader, label %vector.body.i.i.preheader - -vector.body.i.i.preheader: ; preds = %vector.memcheck.i.i - br label %vector.body.i.i - -vector.body.i.i: ; preds = %vector.body.i.i, %vector.body.i.i.preheader - %index.next.i.i1 = phi i64 [ %index.next.i.i, %vector.body.i.i ], [ 0, %vector.body.i.i.preheader ] - %54 = sub nsw i64 %18, %index.next.i.i1 - %55 = add nsw i64 %54, %25 - %56 = getelementptr inbounds float, float* %22, i64 %55 - %57 = bitcast float* %56 to <8 x float>* - %wide.load.i.i = load <8 x float>, <8 x float>* %57, align 4, !tbaa !12, !alias.scope !49 - %reverse.i.i = shufflevector <8 x float> %wide.load.i.i, <8 x float> undef, <8 x i32> - %58 = trunc i64 %54 to i32 - %59 = add i32 %sub9.i.i, %58 - %60 = sext i32 %59 to i64 - %61 = getelementptr inbounds float, float* %22, i64 %60 - %62 = bitcast float* %61 to <8 x float>* - %wide.load60.i.i = load <8 x float>, <8 x float>* %62, align 4, !tbaa !12, !alias.scope !52 - %reverse61.i.i = shufflevector <8 x float> %wide.load60.i.i, <8 x float> undef, <8 x i32> - %63 = trunc i64 %index.next.i.i1 to i32 - %64 = sub i32 %17, %63 - %65 = add i32 %64, %mul.i.i - %66 = add i32 %65, -3 - %67 = sext i32 %66 to i64 - %68 = getelementptr inbounds float, float* %23, i64 %67 - %69 = bitcast float* %68 to <8 x float>* - %wide.load62.i.i = load <8 x float>, <8 x float>* %69, align 4, !tbaa !12, !alias.scope !54 - %reverse63.i.i = shufflevector <8 x float> %wide.load62.i.i, <8 x float> undef, <8 x i32> - %70 = fneg <8 x float> %reverse61.i.i - %71 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %70, <8 x float> %reverse63.i.i, <8 x float> %reverse.i.i) #2 - %72 = sub i32 %sub22.i.i, %63 - %73 = sext i32 %72 to i64 - %74 = getelementptr inbounds float, float* %24, i64 %73 - %75 = bitcast float* %74 to <8 x float>* - %wide.load64.i.i = load <8 x float>, <8 x float>* %75, align 4, !tbaa !12, !alias.scope !56 - %reverse65.i.i = shufflevector <8 x float> %wide.load64.i.i, <8 x float> undef, <8 x i32> - %76 = fdiv <8 x float> %71, %reverse65.i.i, !fpmath !25 - %77 = add i32 %65, -2 - %78 = sext i32 %77 to i64 - %reverse66.i.i = shufflevector <8 x float> %76, <8 x float> undef, <8 x i32> - %79 = getelementptr inbounds float, float* %22, i64 %78 - %80 = bitcast float* %79 to <8 x float>* - store <8 x float> %reverse66.i.i, <8 x float>* %80, align 4, !tbaa !12, !alias.scope !58, !noalias !60, !llvm.access.group !29 - %index.next.i.i = add i64 %index.next.i.i1, 8 - %81 = icmp eq i64 %index.next.i.i, %n.vec.i.i - br i1 %81, label %middle.block.i.i, label %vector.body.i.i, !llvm.loop !31 - -middle.block.i.i: ; preds = %vector.body.i.i - br i1 %cmp.n.i.i, label %if.end.i.i, label %for.body.i.i.preheader - -for.body.i.i.preheader: ; preds = %middle.block.i.i, %vector.memcheck.i.i, %vector.scevcheck.i.i, %for.body.lr.ph.i.i - %indvars.iv.next.i.i3.ph = phi i64 [ 0, %for.body.lr.ph.i.i ], [ 0, %vector.scevcheck.i.i ], [ 0, %vector.memcheck.i.i ], [ %n.vec.i.i, %middle.block.i.i ] - br label %for.body.i.i - -for.body.i.i: ; preds = %for.body.i.i, %for.body.i.i.preheader - %indvars.iv.next.i.i3 = phi i64 [ %indvars.iv.next.i.i, %for.body.i.i ], [ %indvars.iv.next.i.i3.ph, %for.body.i.i.preheader ] - %82 = sub nsw i64 %18, %indvars.iv.next.i.i3 - %83 = add nsw i64 %82, %25 - %arrayidx.i.i = getelementptr inbounds float, float* %13, i64 %83 - %84 = load float, float* %arrayidx.i.i, align 4, !tbaa !12 - %85 = trunc i64 %82 to i32 - %add10.i.i = add i32 %sub9.i.i, %85 - %idxprom11.i.i = sext i32 %add10.i.i to i64 - %arrayidx12.i.i = getelementptr inbounds float, float* %13, i64 %idxprom11.i.i - %86 = load float, float* %arrayidx12.i.i, align 4, !tbaa !12 - %87 = trunc i64 %indvars.iv.next.i.i3 to i32 - %88 = sub i32 %17, %87 - %sub15.i.i = add i32 %88, %mul.i.i - %add16.i.i = add i32 %sub15.i.i, -3 - %idxprom17.i.i = sext i32 %add16.i.i to i64 - %arrayidx18.i.i = getelementptr inbounds float, float* %7, i64 %idxprom17.i.i - %89 = load float, float* %arrayidx18.i.i, align 4, !tbaa !12 - %neg.i.i = fneg float %86 - %90 = tail call float @llvm.fmuladd.f32(float %neg.i.i, float %89, float %84) #2 - %add23.i.i = sub i32 %sub22.i.i, %87 - %idxprom24.i.i = sext i32 %add23.i.i to i64 - %arrayidx25.i.i = getelementptr inbounds float, float* %10, i64 %idxprom24.i.i - %91 = load float, float* %arrayidx25.i.i, align 4, !tbaa !12 - %div.i.i = fdiv float %90, %91, !fpmath !25 - %add29.i.i = add i32 %sub15.i.i, -2 - %idxprom30.i.i = sext i32 %add29.i.i to i64 - %arrayidx31.i.i = getelementptr inbounds float, float* %13, i64 %idxprom30.i.i - store float %div.i.i, float* %arrayidx31.i.i, align 4, !tbaa !12, !llvm.access.group !29 - %indvars.iv.next.i.i = add nuw nsw i64 %indvars.iv.next.i.i3, 1 - %exitcond.not.i.i = icmp eq i64 %indvars.iv.next.i.i, %wide.trip.count.i.i - br i1 %exitcond.not.i.i, label %if.end.i.i.loopexit, label %for.body.i.i, !llvm.loop !34 - -if.end.i.i.loopexit: ; preds = %for.body.i.i - br label %if.end.i.i - -if.end.i.i: ; preds = %if.end.i.i.loopexit, %middle.block.i.i, %pregion_for_entry.entry.i.i - %92 = add nuw nsw i64 %_local_id_x.i.0, 1 - %exitcond.not = icmp eq i64 %92, 256 - br i1 %exitcond.not, label %_pocl_kernel_adi_kernel3.exit, label %pregion_for_entry.entry.i.i, !llvm.loop !35 - -_pocl_kernel_adi_kernel3.exit: ; preds = %if.end.i.i - ret void -} - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0} -!6 = !{!"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"int"} -!9 = !{!"", !"", !"", !""} -!10 = !{!"A", !"B", !"X", !"n"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{!17, !18} -!18 = distinct !{!18, !"LVerDomain"} -!19 = !{!20} -!20 = distinct !{!20, !18} -!21 = !{!22} -!22 = distinct !{!22, !18} -!23 = !{!24} -!24 = distinct !{!24, !18} -!25 = !{float 2.500000e+00} -!26 = !{!27} -!27 = distinct !{!27, !18} -!28 = !{!17, !20, !22, !24} -!29 = !{!30} -!30 = distinct !{} -!31 = distinct !{!31, !32, !33} -!32 = !{!"llvm.loop.unroll.disable"} -!33 = !{!"llvm.loop.isvectorized", i32 1} -!34 = distinct !{!34, !32, !33} -!35 = distinct !{!35, !36} -!36 = !{!"llvm.loop.parallel_accesses", !30} -!37 = !{!38} -!38 = distinct !{!38, !39} -!39 = distinct !{!39, !"LVerDomain"} -!40 = !{!41} -!41 = distinct !{!41, !39} -!42 = !{!43} -!43 = distinct !{!43, !39} -!44 = !{!45} -!45 = distinct !{!45, !39} -!46 = !{!47} -!47 = distinct !{!47, !39} -!48 = !{!38, !41, !43, !45} -!49 = !{!50} -!50 = distinct !{!50, !51} -!51 = distinct !{!51, !"LVerDomain"} -!52 = !{!53} -!53 = distinct !{!53, !51} -!54 = !{!55} -!55 = distinct !{!55, !51} -!56 = !{!57} -!57 = distinct !{!57, !51} -!58 = !{!59} -!59 = distinct !{!59, !51} -!60 = !{!50, !53, !55, !57} diff --git a/pocl_irs/adi_kernel4.ll b/pocl_irs/adi_kernel4.ll deleted file mode 100644 index 4a06fb5..0000000 --- a/pocl_irs/adi_kernel4.ll +++ /dev/null @@ -1,644 +0,0 @@ -; ModuleID = './BF/DFFLECFOLOBPKCKMNEPCKIANKJKKLLHBOGBCO/adi_kernel4/256-1-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_adi_kernel4(float* nocapture readonly %0, float* nocapture %1, float* nocapture %2, i32 %3, i32 %4, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %5, i64 %6, i64 %7, i64 %8) local_unnamed_addr #0 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { -vector.scevcheck: - %mul.i.i = shl i64 %6, 8 - %mul.i = mul nsw i32 %4, %3 - %sub.i = add nsw i32 %3, -1 - %mul2.i = mul nsw i32 %sub.i, %4 - %9 = mul i32 %4, %3 - %10 = trunc i64 %6 to i32 - %11 = shl i32 %10, 8 - %12 = add i32 %9, %11 - %13 = icmp sgt i32 %12, 2147483392 - %14 = add i32 %3, -1 - %15 = mul i32 %14, %4 - %16 = add i32 %15, %11 - %17 = icmp sgt i32 %16, 2147483392 - %18 = or i1 %13, %17 - br i1 %18, label %pregion_for_entry.entry.i.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i - -vector.memcheck: ; preds = %vector.scevcheck - %19 = mul i32 %4, %3 - %20 = trunc i64 %6 to i32 - %21 = shl i32 %20, 8 - %22 = add i32 %19, %21 - %23 = sext i32 %22 to i64 - %scevgep = getelementptr float, float* %2, i64 %23 - %24 = add nsw i64 %23, 256 - %scevgep5 = getelementptr float, float* %2, i64 %24 - %scevgep7 = getelementptr float, float* %1, i64 %23 - %scevgep9 = getelementptr float, float* %1, i64 %24 - %25 = add i32 %3, -1 - %26 = mul i32 %25, %4 - %27 = add i32 %26, %21 - %28 = sext i32 %27 to i64 - %scevgep11 = getelementptr float, float* %2, i64 %28 - %29 = add nsw i64 %28, 256 - %scevgep13 = getelementptr float, float* %2, i64 %29 - %scevgep15 = getelementptr float, float* %0, i64 %23 - %scevgep17 = getelementptr float, float* %0, i64 %24 - %scevgep19 = getelementptr float, float* %1, i64 %28 - %scevgep21 = getelementptr float, float* %1, i64 %29 - %bound0 = icmp ult float* %scevgep, %scevgep9 - %bound1 = icmp ult float* %scevgep7, %scevgep5 - %found.conflict = and i1 %bound0, %bound1 - %bound023 = icmp ult float* %scevgep, %scevgep13 - %bound124 = icmp ult float* %scevgep11, %scevgep5 - %found.conflict25 = and i1 %bound023, %bound124 - %conflict.rdx = or i1 %found.conflict, %found.conflict25 - %bound026 = icmp ult float* %scevgep, %scevgep17 - %bound127 = icmp ult float* %scevgep15, %scevgep5 - %found.conflict28 = and i1 %bound026, %bound127 - %conflict.rdx29 = or i1 %conflict.rdx, %found.conflict28 - %bound030 = icmp ult float* %scevgep, %scevgep21 - %bound131 = icmp ult float* %scevgep19, %scevgep5 - %found.conflict32 = and i1 %bound030, %bound131 - %conflict.rdx33 = or i1 %conflict.rdx29, %found.conflict32 - %bound034 = icmp ult float* %scevgep7, %scevgep13 - %bound135 = icmp ult float* %scevgep11, %scevgep9 - %found.conflict36 = and i1 %bound034, %bound135 - %conflict.rdx37 = or i1 %conflict.rdx33, %found.conflict36 - %bound038 = icmp ult float* %scevgep7, %scevgep17 - %bound139 = icmp ult float* %scevgep15, %scevgep9 - %found.conflict40 = and i1 %bound038, %bound139 - %conflict.rdx41 = or i1 %conflict.rdx37, %found.conflict40 - %bound042 = icmp ult float* %scevgep7, %scevgep21 - %bound143 = icmp ult float* %scevgep19, %scevgep9 - %found.conflict44 = and i1 %bound042, %bound143 - %conflict.rdx45 = or i1 %conflict.rdx41, %found.conflict44 - br i1 %conflict.rdx45, label %pregion_for_entry.entry.i.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert46 = insertelement <8 x i32> undef, i32 %4, i32 0 - %broadcast.splat47 = shufflevector <8 x i32> %broadcast.splatinsert46, <8 x i32> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next, %vector.body ] - %30 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %31 = trunc <8 x i64> %30 to <8 x i32> - %32 = icmp sgt <8 x i32> %broadcast.splat47, %31 - %33 = extractelement <8 x i32> %31, i32 0 - %34 = add nsw i32 %mul.i, %33 - %35 = sext i32 %34 to i64 - %36 = getelementptr inbounds float, float* %2, i64 %35 - %37 = bitcast float* %36 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %37, i32 4, <8 x i1> %32, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %38 = add nsw i32 %mul2.i, %33 - %39 = sext i32 %38 to i64 - %40 = getelementptr inbounds float, float* %2, i64 %39 - %41 = bitcast float* %40 to <8 x float>* - %wide.masked.load48 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %41, i32 4, <8 x i1> %32, <8 x float> undef), !tbaa !12, !alias.scope !24 - %42 = getelementptr inbounds float, float* %0, i64 %35 - %43 = bitcast float* %42 to <8 x float>* - %wide.masked.load49 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %43, i32 4, <8 x i1> %32, <8 x float> undef), !tbaa !12, !alias.scope !25 - %44 = fmul <8 x float> %wide.masked.load48, %wide.masked.load49 - %45 = getelementptr inbounds float, float* %1, i64 %39 - %46 = bitcast float* %45 to <8 x float>* - %wide.masked.load50 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %46, i32 4, <8 x i1> %32, <8 x float> undef), !tbaa !12, !alias.scope !26 - %47 = fdiv <8 x float> %44, %wide.masked.load50, !fpmath !27 - %48 = fsub <8 x float> %wide.masked.load, %47 - %49 = bitcast float* %36 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %48, <8 x float>* %49, i32 4, <8 x i1> %32), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !28 - %50 = getelementptr inbounds float, float* %1, i64 %35 - %51 = bitcast float* %50 to <8 x float>* - %wide.masked.load51 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %51, i32 4, <8 x i1> %32, <8 x float> undef), !tbaa !12, !alias.scope !30, !noalias !31 - %52 = bitcast float* %42 to <8 x float>* - %wide.masked.load52 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %52, i32 4, <8 x i1> %32, <8 x float> undef), !tbaa !12, !alias.scope !25 - %53 = fmul <8 x float> %wide.masked.load52, %wide.masked.load52 - %54 = bitcast float* %45 to <8 x float>* - %wide.masked.load53 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %54, i32 4, <8 x i1> %32, <8 x float> undef), !tbaa !12, !alias.scope !26 - %55 = fdiv <8 x float> %53, %wide.masked.load53, !fpmath !27 - %56 = fsub <8 x float> %wide.masked.load51, %55 - %57 = bitcast float* %50 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %56, <8 x float>* %57, i32 4, <8 x i1> %32), !tbaa !12, !alias.scope !30, !noalias !31, !llvm.access.group !28 - %index.next = add i64 %index, 8 - %vec.ind.next = add <8 x i64> %vec.ind, - %58 = icmp eq i64 %index.next, 256 - br i1 %58, label %adi_kernel4.exit.loopexit55, label %vector.body, !llvm.loop !32 - -pregion_for_entry.entry.i: ; preds = %if.end.r_exit.i, %pregion_for_entry.entry.i.preheader - %_local_id_x.0 = phi i64 [ %66, %if.end.r_exit.i ], [ 0, %pregion_for_entry.entry.i.preheader ] - %add1.i.i = add nuw nsw i64 %_local_id_x.0, %mul.i.i - %conv.i = trunc i64 %add1.i.i to i32 - %cmp.i = icmp slt i32 %conv.i, %4 - br i1 %cmp.i, label %if.then.i, label %if.end.r_exit.i - -if.then.i: ; preds = %pregion_for_entry.entry.i - %add.i = add nsw i32 %mul.i, %conv.i - %idxprom.i = sext i32 %add.i to i64 - %arrayidx.i = getelementptr inbounds float, float* %2, i64 %idxprom.i - %59 = load float, float* %arrayidx.i, align 4, !tbaa !12 - %add3.i = add nsw i32 %mul2.i, %conv.i - %idxprom4.i = sext i32 %add3.i to i64 - %arrayidx5.i = getelementptr inbounds float, float* %2, i64 %idxprom4.i - %60 = load float, float* %arrayidx5.i, align 4, !tbaa !12 - %arrayidx9.i = getelementptr inbounds float, float* %0, i64 %idxprom.i - %61 = load float, float* %arrayidx9.i, align 4, !tbaa !12 - %mul10.i = fmul float %60, %61 - %arrayidx15.i = getelementptr inbounds float, float* %1, i64 %idxprom4.i - %62 = load float, float* %arrayidx15.i, align 4, !tbaa !12 - %div.i = fdiv float %mul10.i, %62, !fpmath !27 - %sub16.i = fsub float %59, %div.i - store float %sub16.i, float* %arrayidx.i, align 4, !tbaa !12, !llvm.access.group !28 - %arrayidx24.i = getelementptr inbounds float, float* %1, i64 %idxprom.i - %63 = load float, float* %arrayidx24.i, align 4, !tbaa !12 - %64 = load float, float* %arrayidx9.i, align 4, !tbaa !12 - %mul33.i = fmul float %64, %64 - %65 = load float, float* %arrayidx15.i, align 4, !tbaa !12 - %div39.i = fdiv float %mul33.i, %65, !fpmath !27 - %sub40.i = fsub float %63, %div39.i - store float %sub40.i, float* %arrayidx24.i, align 4, !tbaa !12, !llvm.access.group !28 - br label %if.end.r_exit.i - -if.end.r_exit.i: ; preds = %if.then.i, %pregion_for_entry.entry.i - %66 = add nuw nsw i64 %_local_id_x.0, 1 - %exitcond.not = icmp eq i64 %66, 256 - br i1 %exitcond.not, label %adi_kernel4.exit.loopexit, label %pregion_for_entry.entry.i, !llvm.loop !35 - -adi_kernel4.exit.loopexit: ; preds = %if.end.r_exit.i - br label %adi_kernel4.exit - -adi_kernel4.exit.loopexit55: ; preds = %vector.body - br label %adi_kernel4.exit - -adi_kernel4.exit: ; preds = %adi_kernel4.exit.loopexit55, %adi_kernel4.exit.loopexit - ret void -} - -; Function Attrs: nofree norecurse nounwind -define void @_pocl_kernel_adi_kernel4_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { -vector.scevcheck: - %5 = bitcast i8** %0 to float*** - %6 = load float**, float*** %5, align 8 - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float*** - %10 = load float**, float*** %9, align 8 - %11 = load float*, float** %10, align 8 - %12 = getelementptr i8*, i8** %0, i64 2 - %13 = bitcast i8** %12 to float*** - %14 = load float**, float*** %13, align 8 - %15 = load float*, float** %14, align 8 - %16 = getelementptr i8*, i8** %0, i64 3 - %17 = bitcast i8** %16 to i32** - %18 = load i32*, i32** %17, align 8 - %19 = load i32, i32* %18, align 4 - %20 = getelementptr i8*, i8** %0, i64 4 - %21 = bitcast i8** %20 to i32** - %22 = load i32*, i32** %21, align 8 - %23 = load i32, i32* %22, align 4 - %mul.i.i.i = shl i64 %2, 8 - %mul.i.i = mul nsw i32 %23, %19 - %sub.i.i = add nsw i32 %19, -1 - %mul2.i.i = mul nsw i32 %23, %sub.i.i - %24 = mul i32 %23, %19 - %25 = trunc i64 %2 to i32 - %26 = shl i32 %25, 8 - %27 = add i32 %24, %26 - %28 = icmp sgt i32 %27, 2147483392 - %29 = add i32 %19, -1 - %30 = mul i32 %23, %29 - %31 = add i32 %30, %26 - %32 = icmp sgt i32 %31, 2147483392 - %33 = or i1 %28, %32 - br i1 %33, label %pregion_for_entry.entry.i.i.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i - -vector.memcheck: ; preds = %vector.scevcheck - %34 = mul i32 %23, %19 - %35 = trunc i64 %2 to i32 - %36 = shl i32 %35, 8 - %37 = add i32 %34, %36 - %38 = sext i32 %37 to i64 - %scevgep = getelementptr float, float* %15, i64 %38 - %39 = add nsw i64 %38, 256 - %scevgep5 = getelementptr float, float* %15, i64 %39 - %scevgep7 = getelementptr float, float* %11, i64 %38 - %scevgep9 = getelementptr float, float* %11, i64 %39 - %40 = add i32 %19, -1 - %41 = mul i32 %23, %40 - %42 = add i32 %41, %36 - %43 = sext i32 %42 to i64 - %scevgep11 = getelementptr float, float* %15, i64 %43 - %44 = add nsw i64 %43, 256 - %scevgep13 = getelementptr float, float* %15, i64 %44 - %scevgep15 = getelementptr float, float* %7, i64 %38 - %scevgep17 = getelementptr float, float* %7, i64 %39 - %scevgep19 = getelementptr float, float* %11, i64 %43 - %scevgep21 = getelementptr float, float* %11, i64 %44 - %bound0 = icmp ult float* %scevgep, %scevgep9 - %bound1 = icmp ult float* %scevgep7, %scevgep5 - %found.conflict = and i1 %bound0, %bound1 - %bound023 = icmp ult float* %scevgep, %scevgep13 - %bound124 = icmp ult float* %scevgep11, %scevgep5 - %found.conflict25 = and i1 %bound023, %bound124 - %conflict.rdx = or i1 %found.conflict, %found.conflict25 - %bound026 = icmp ult float* %scevgep, %scevgep17 - %bound127 = icmp ult float* %scevgep15, %scevgep5 - %found.conflict28 = and i1 %bound026, %bound127 - %conflict.rdx29 = or i1 %conflict.rdx, %found.conflict28 - %bound030 = icmp ult float* %scevgep, %scevgep21 - %bound131 = icmp ult float* %scevgep19, %scevgep5 - %found.conflict32 = and i1 %bound030, %bound131 - %conflict.rdx33 = or i1 %conflict.rdx29, %found.conflict32 - %bound034 = icmp ult float* %scevgep7, %scevgep13 - %bound135 = icmp ult float* %scevgep11, %scevgep9 - %found.conflict36 = and i1 %bound034, %bound135 - %conflict.rdx37 = or i1 %conflict.rdx33, %found.conflict36 - %bound038 = icmp ult float* %scevgep7, %scevgep17 - %bound139 = icmp ult float* %scevgep15, %scevgep9 - %found.conflict40 = and i1 %bound038, %bound139 - %conflict.rdx41 = or i1 %conflict.rdx37, %found.conflict40 - %bound042 = icmp ult float* %scevgep7, %scevgep21 - %bound143 = icmp ult float* %scevgep19, %scevgep9 - %found.conflict44 = and i1 %bound042, %bound143 - %conflict.rdx45 = or i1 %conflict.rdx41, %found.conflict44 - br i1 %conflict.rdx45, label %pregion_for_entry.entry.i.i.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert46 = insertelement <8 x i32> undef, i32 %23, i32 0 - %broadcast.splat47 = shufflevector <8 x i32> %broadcast.splatinsert46, <8 x i32> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next, %vector.body ] - %45 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %46 = trunc <8 x i64> %45 to <8 x i32> - %47 = icmp sgt <8 x i32> %broadcast.splat47, %46 - %48 = extractelement <8 x i32> %46, i32 0 - %49 = add nsw i32 %mul.i.i, %48 - %50 = sext i32 %49 to i64 - %51 = getelementptr inbounds float, float* %15, i64 %50 - %52 = bitcast float* %51 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %52, i32 4, <8 x i1> %47, <8 x float> undef), !tbaa !12, !alias.scope !36, !noalias !39 - %53 = add nsw i32 %mul2.i.i, %48 - %54 = sext i32 %53 to i64 - %55 = getelementptr inbounds float, float* %15, i64 %54 - %56 = bitcast float* %55 to <8 x float>* - %wide.masked.load48 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %56, i32 4, <8 x i1> %47, <8 x float> undef), !tbaa !12, !alias.scope !44 - %57 = getelementptr inbounds float, float* %7, i64 %50 - %58 = bitcast float* %57 to <8 x float>* - %wide.masked.load49 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %58, i32 4, <8 x i1> %47, <8 x float> undef), !tbaa !12, !alias.scope !45 - %59 = fmul <8 x float> %wide.masked.load48, %wide.masked.load49 - %60 = getelementptr inbounds float, float* %11, i64 %54 - %61 = bitcast float* %60 to <8 x float>* - %wide.masked.load50 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %61, i32 4, <8 x i1> %47, <8 x float> undef), !tbaa !12, !alias.scope !46 - %62 = fdiv <8 x float> %59, %wide.masked.load50, !fpmath !27 - %63 = fsub <8 x float> %wide.masked.load, %62 - %64 = bitcast float* %51 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %63, <8 x float>* %64, i32 4, <8 x i1> %47), !tbaa !12, !alias.scope !36, !noalias !39, !llvm.access.group !28 - %65 = getelementptr inbounds float, float* %11, i64 %50 - %66 = bitcast float* %65 to <8 x float>* - %wide.masked.load51 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %66, i32 4, <8 x i1> %47, <8 x float> undef), !tbaa !12, !alias.scope !47, !noalias !48 - %67 = bitcast float* %57 to <8 x float>* - %wide.masked.load52 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %67, i32 4, <8 x i1> %47, <8 x float> undef), !tbaa !12, !alias.scope !45 - %68 = fmul <8 x float> %wide.masked.load52, %wide.masked.load52 - %69 = bitcast float* %60 to <8 x float>* - %wide.masked.load53 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %69, i32 4, <8 x i1> %47, <8 x float> undef), !tbaa !12, !alias.scope !46 - %70 = fdiv <8 x float> %68, %wide.masked.load53, !fpmath !27 - %71 = fsub <8 x float> %wide.masked.load51, %70 - %72 = bitcast float* %65 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %71, <8 x float>* %72, i32 4, <8 x i1> %47), !tbaa !12, !alias.scope !47, !noalias !48, !llvm.access.group !28 - %index.next = add i64 %index, 8 - %vec.ind.next = add <8 x i64> %vec.ind, - %73 = icmp eq i64 %index.next, 256 - br i1 %73, label %_pocl_kernel_adi_kernel4.exit.loopexit55, label %vector.body, !llvm.loop !49 - -pregion_for_entry.entry.i.i: ; preds = %if.end.r_exit.i.i, %pregion_for_entry.entry.i.i.preheader - %_local_id_x.i.0 = phi i64 [ %81, %if.end.r_exit.i.i ], [ 0, %pregion_for_entry.entry.i.i.preheader ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %conv.i.i = trunc i64 %add1.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %23, %conv.i.i - br i1 %cmp.i.i, label %if.then.i.i, label %if.end.r_exit.i.i - -if.then.i.i: ; preds = %pregion_for_entry.entry.i.i - %add.i.i = add nsw i32 %mul.i.i, %conv.i.i - %idxprom.i.i = sext i32 %add.i.i to i64 - %arrayidx.i.i = getelementptr inbounds float, float* %15, i64 %idxprom.i.i - %74 = load float, float* %arrayidx.i.i, align 4, !tbaa !12 - %add3.i.i = add nsw i32 %mul2.i.i, %conv.i.i - %idxprom4.i.i = sext i32 %add3.i.i to i64 - %arrayidx5.i.i = getelementptr inbounds float, float* %15, i64 %idxprom4.i.i - %75 = load float, float* %arrayidx5.i.i, align 4, !tbaa !12 - %arrayidx9.i.i = getelementptr inbounds float, float* %7, i64 %idxprom.i.i - %76 = load float, float* %arrayidx9.i.i, align 4, !tbaa !12 - %mul10.i.i = fmul float %75, %76 - %arrayidx15.i.i = getelementptr inbounds float, float* %11, i64 %idxprom4.i.i - %77 = load float, float* %arrayidx15.i.i, align 4, !tbaa !12 - %div.i.i = fdiv float %mul10.i.i, %77, !fpmath !27 - %sub16.i.i = fsub float %74, %div.i.i - store float %sub16.i.i, float* %arrayidx.i.i, align 4, !tbaa !12, !llvm.access.group !28 - %arrayidx24.i.i = getelementptr inbounds float, float* %11, i64 %idxprom.i.i - %78 = load float, float* %arrayidx24.i.i, align 4, !tbaa !12 - %79 = load float, float* %arrayidx9.i.i, align 4, !tbaa !12 - %mul33.i.i = fmul float %79, %79 - %80 = load float, float* %arrayidx15.i.i, align 4, !tbaa !12 - %div39.i.i = fdiv float %mul33.i.i, %80, !fpmath !27 - %sub40.i.i = fsub float %78, %div39.i.i - store float %sub40.i.i, float* %arrayidx24.i.i, align 4, !tbaa !12, !llvm.access.group !28 - br label %if.end.r_exit.i.i - -if.end.r_exit.i.i: ; preds = %if.then.i.i, %pregion_for_entry.entry.i.i - %81 = add nuw nsw i64 %_local_id_x.i.0, 1 - %exitcond.not = icmp eq i64 %81, 256 - br i1 %exitcond.not, label %_pocl_kernel_adi_kernel4.exit.loopexit, label %pregion_for_entry.entry.i.i, !llvm.loop !50 - -_pocl_kernel_adi_kernel4.exit.loopexit: ; preds = %if.end.r_exit.i.i - br label %_pocl_kernel_adi_kernel4.exit - -_pocl_kernel_adi_kernel4.exit.loopexit55: ; preds = %vector.body - br label %_pocl_kernel_adi_kernel4.exit - -_pocl_kernel_adi_kernel4.exit: ; preds = %_pocl_kernel_adi_kernel4.exit.loopexit55, %_pocl_kernel_adi_kernel4.exit.loopexit - ret void -} - -; Function Attrs: nofree norecurse nounwind -define void @_pocl_kernel_adi_kernel4_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { -vector.scevcheck: - %5 = bitcast i8** %0 to float** - %6 = load float*, float** %5, align 8 - %7 = getelementptr i8*, i8** %0, i64 1 - %8 = bitcast i8** %7 to float** - %9 = load float*, float** %8, align 8 - %10 = getelementptr i8*, i8** %0, i64 2 - %11 = bitcast i8** %10 to float** - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 3 - %14 = bitcast i8** %13 to i32** - %15 = load i32*, i32** %14, align 8 - %16 = load i32, i32* %15, align 4 - %17 = getelementptr i8*, i8** %0, i64 4 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %mul.i.i.i = shl i64 %2, 8 - %mul.i.i = mul nsw i32 %20, %16 - %sub.i.i = add nsw i32 %16, -1 - %mul2.i.i = mul nsw i32 %20, %sub.i.i - %21 = mul i32 %20, %16 - %22 = trunc i64 %2 to i32 - %23 = shl i32 %22, 8 - %24 = add i32 %21, %23 - %25 = icmp sgt i32 %24, 2147483392 - %26 = add i32 %16, -1 - %27 = mul i32 %20, %26 - %28 = add i32 %27, %23 - %29 = icmp sgt i32 %28, 2147483392 - %30 = or i1 %25, %29 - br i1 %30, label %pregion_for_entry.entry.i.i.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i - -vector.memcheck: ; preds = %vector.scevcheck - %31 = mul i32 %20, %16 - %32 = trunc i64 %2 to i32 - %33 = shl i32 %32, 8 - %34 = add i32 %31, %33 - %35 = sext i32 %34 to i64 - %scevgep = getelementptr float, float* %12, i64 %35 - %36 = add nsw i64 %35, 256 - %scevgep5 = getelementptr float, float* %12, i64 %36 - %scevgep7 = getelementptr float, float* %9, i64 %35 - %scevgep9 = getelementptr float, float* %9, i64 %36 - %37 = add i32 %16, -1 - %38 = mul i32 %20, %37 - %39 = add i32 %38, %33 - %40 = sext i32 %39 to i64 - %scevgep11 = getelementptr float, float* %12, i64 %40 - %41 = add nsw i64 %40, 256 - %scevgep13 = getelementptr float, float* %12, i64 %41 - %scevgep15 = getelementptr float, float* %6, i64 %35 - %scevgep17 = getelementptr float, float* %6, i64 %36 - %scevgep19 = getelementptr float, float* %9, i64 %40 - %scevgep21 = getelementptr float, float* %9, i64 %41 - %bound0 = icmp ult float* %scevgep, %scevgep9 - %bound1 = icmp ult float* %scevgep7, %scevgep5 - %found.conflict = and i1 %bound0, %bound1 - %bound023 = icmp ult float* %scevgep, %scevgep13 - %bound124 = icmp ult float* %scevgep11, %scevgep5 - %found.conflict25 = and i1 %bound023, %bound124 - %conflict.rdx = or i1 %found.conflict, %found.conflict25 - %bound026 = icmp ult float* %scevgep, %scevgep17 - %bound127 = icmp ult float* %scevgep15, %scevgep5 - %found.conflict28 = and i1 %bound026, %bound127 - %conflict.rdx29 = or i1 %conflict.rdx, %found.conflict28 - %bound030 = icmp ult float* %scevgep, %scevgep21 - %bound131 = icmp ult float* %scevgep19, %scevgep5 - %found.conflict32 = and i1 %bound030, %bound131 - %conflict.rdx33 = or i1 %conflict.rdx29, %found.conflict32 - %bound034 = icmp ult float* %scevgep7, %scevgep13 - %bound135 = icmp ult float* %scevgep11, %scevgep9 - %found.conflict36 = and i1 %bound034, %bound135 - %conflict.rdx37 = or i1 %conflict.rdx33, %found.conflict36 - %bound038 = icmp ult float* %scevgep7, %scevgep17 - %bound139 = icmp ult float* %scevgep15, %scevgep9 - %found.conflict40 = and i1 %bound038, %bound139 - %conflict.rdx41 = or i1 %conflict.rdx37, %found.conflict40 - %bound042 = icmp ult float* %scevgep7, %scevgep21 - %bound143 = icmp ult float* %scevgep19, %scevgep9 - %found.conflict44 = and i1 %bound042, %bound143 - %conflict.rdx45 = or i1 %conflict.rdx41, %found.conflict44 - br i1 %conflict.rdx45, label %pregion_for_entry.entry.i.i.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert46 = insertelement <8 x i32> undef, i32 %20, i32 0 - %broadcast.splat47 = shufflevector <8 x i32> %broadcast.splatinsert46, <8 x i32> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next, %vector.body ] - %42 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %43 = trunc <8 x i64> %42 to <8 x i32> - %44 = icmp sgt <8 x i32> %broadcast.splat47, %43 - %45 = extractelement <8 x i32> %43, i32 0 - %46 = add nsw i32 %mul.i.i, %45 - %47 = sext i32 %46 to i64 - %48 = getelementptr inbounds float, float* %12, i64 %47 - %49 = bitcast float* %48 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %49, i32 4, <8 x i1> %44, <8 x float> undef), !tbaa !12, !alias.scope !51, !noalias !54 - %50 = add nsw i32 %mul2.i.i, %45 - %51 = sext i32 %50 to i64 - %52 = getelementptr inbounds float, float* %12, i64 %51 - %53 = bitcast float* %52 to <8 x float>* - %wide.masked.load48 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %53, i32 4, <8 x i1> %44, <8 x float> undef), !tbaa !12, !alias.scope !59 - %54 = getelementptr inbounds float, float* %6, i64 %47 - %55 = bitcast float* %54 to <8 x float>* - %wide.masked.load49 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %55, i32 4, <8 x i1> %44, <8 x float> undef), !tbaa !12, !alias.scope !60 - %56 = fmul <8 x float> %wide.masked.load48, %wide.masked.load49 - %57 = getelementptr inbounds float, float* %9, i64 %51 - %58 = bitcast float* %57 to <8 x float>* - %wide.masked.load50 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %58, i32 4, <8 x i1> %44, <8 x float> undef), !tbaa !12, !alias.scope !61 - %59 = fdiv <8 x float> %56, %wide.masked.load50, !fpmath !27 - %60 = fsub <8 x float> %wide.masked.load, %59 - %61 = bitcast float* %48 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %60, <8 x float>* %61, i32 4, <8 x i1> %44), !tbaa !12, !alias.scope !51, !noalias !54, !llvm.access.group !28 - %62 = getelementptr inbounds float, float* %9, i64 %47 - %63 = bitcast float* %62 to <8 x float>* - %wide.masked.load51 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %63, i32 4, <8 x i1> %44, <8 x float> undef), !tbaa !12, !alias.scope !62, !noalias !63 - %64 = bitcast float* %54 to <8 x float>* - %wide.masked.load52 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %64, i32 4, <8 x i1> %44, <8 x float> undef), !tbaa !12, !alias.scope !60 - %65 = fmul <8 x float> %wide.masked.load52, %wide.masked.load52 - %66 = bitcast float* %57 to <8 x float>* - %wide.masked.load53 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %66, i32 4, <8 x i1> %44, <8 x float> undef), !tbaa !12, !alias.scope !61 - %67 = fdiv <8 x float> %65, %wide.masked.load53, !fpmath !27 - %68 = fsub <8 x float> %wide.masked.load51, %67 - %69 = bitcast float* %62 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %68, <8 x float>* %69, i32 4, <8 x i1> %44), !tbaa !12, !alias.scope !62, !noalias !63, !llvm.access.group !28 - %index.next = add i64 %index, 8 - %vec.ind.next = add <8 x i64> %vec.ind, - %70 = icmp eq i64 %index.next, 256 - br i1 %70, label %_pocl_kernel_adi_kernel4.exit.loopexit55, label %vector.body, !llvm.loop !64 - -pregion_for_entry.entry.i.i: ; preds = %if.end.r_exit.i.i, %pregion_for_entry.entry.i.i.preheader - %_local_id_x.i.0 = phi i64 [ %78, %if.end.r_exit.i.i ], [ 0, %pregion_for_entry.entry.i.i.preheader ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %conv.i.i = trunc i64 %add1.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %20, %conv.i.i - br i1 %cmp.i.i, label %if.then.i.i, label %if.end.r_exit.i.i - -if.then.i.i: ; preds = %pregion_for_entry.entry.i.i - %add.i.i = add nsw i32 %mul.i.i, %conv.i.i - %idxprom.i.i = sext i32 %add.i.i to i64 - %arrayidx.i.i = getelementptr inbounds float, float* %12, i64 %idxprom.i.i - %71 = load float, float* %arrayidx.i.i, align 4, !tbaa !12 - %add3.i.i = add nsw i32 %mul2.i.i, %conv.i.i - %idxprom4.i.i = sext i32 %add3.i.i to i64 - %arrayidx5.i.i = getelementptr inbounds float, float* %12, i64 %idxprom4.i.i - %72 = load float, float* %arrayidx5.i.i, align 4, !tbaa !12 - %arrayidx9.i.i = getelementptr inbounds float, float* %6, i64 %idxprom.i.i - %73 = load float, float* %arrayidx9.i.i, align 4, !tbaa !12 - %mul10.i.i = fmul float %72, %73 - %arrayidx15.i.i = getelementptr inbounds float, float* %9, i64 %idxprom4.i.i - %74 = load float, float* %arrayidx15.i.i, align 4, !tbaa !12 - %div.i.i = fdiv float %mul10.i.i, %74, !fpmath !27 - %sub16.i.i = fsub float %71, %div.i.i - store float %sub16.i.i, float* %arrayidx.i.i, align 4, !tbaa !12, !llvm.access.group !28 - %arrayidx24.i.i = getelementptr inbounds float, float* %9, i64 %idxprom.i.i - %75 = load float, float* %arrayidx24.i.i, align 4, !tbaa !12 - %76 = load float, float* %arrayidx9.i.i, align 4, !tbaa !12 - %mul33.i.i = fmul float %76, %76 - %77 = load float, float* %arrayidx15.i.i, align 4, !tbaa !12 - %div39.i.i = fdiv float %mul33.i.i, %77, !fpmath !27 - %sub40.i.i = fsub float %75, %div39.i.i - store float %sub40.i.i, float* %arrayidx24.i.i, align 4, !tbaa !12, !llvm.access.group !28 - br label %if.end.r_exit.i.i - -if.end.r_exit.i.i: ; preds = %if.then.i.i, %pregion_for_entry.entry.i.i - %78 = add nuw nsw i64 %_local_id_x.i.0, 1 - %exitcond.not = icmp eq i64 %78, 256 - br i1 %exitcond.not, label %_pocl_kernel_adi_kernel4.exit.loopexit, label %pregion_for_entry.entry.i.i, !llvm.loop !65 - -_pocl_kernel_adi_kernel4.exit.loopexit: ; preds = %if.end.r_exit.i.i - br label %_pocl_kernel_adi_kernel4.exit - -_pocl_kernel_adi_kernel4.exit.loopexit55: ; preds = %vector.body - br label %_pocl_kernel_adi_kernel4.exit - -_pocl_kernel_adi_kernel4.exit: ; preds = %_pocl_kernel_adi_kernel4.exit.loopexit55, %_pocl_kernel_adi_kernel4.exit.loopexit - ret void -} - -; Function Attrs: argmemonly nounwind readonly willreturn -declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32 immarg, <8 x i1>, <8 x float>) #2 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>) #3 - -attributes #0 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nofree norecurse nounwind } -attributes #2 = { argmemonly nounwind readonly willreturn } -attributes #3 = { argmemonly nounwind willreturn } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"int", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"int", !"int"} -!9 = !{!"", !"", !"", !"", !""} -!10 = !{!"A", !"B", !"X", !"i1", !"n"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{!17, !18} -!18 = distinct !{!18, !"LVerDomain"} -!19 = !{!20, !21, !22, !23} -!20 = distinct !{!20, !18} -!21 = distinct !{!21, !18} -!22 = distinct !{!22, !18} -!23 = distinct !{!23, !18} -!24 = !{!21} -!25 = !{!22} -!26 = !{!23} -!27 = !{float 2.500000e+00} -!28 = !{!29} -!29 = distinct !{} -!30 = !{!20} -!31 = !{!21, !22, !23} -!32 = distinct !{!32, !33, !34} -!33 = !{!"llvm.loop.parallel_accesses", !29} -!34 = !{!"llvm.loop.isvectorized", i32 1} -!35 = distinct !{!35, !33, !34} -!36 = !{!37} -!37 = distinct !{!37, !38} -!38 = distinct !{!38, !"LVerDomain"} -!39 = !{!40, !41, !42, !43} -!40 = distinct !{!40, !38} -!41 = distinct !{!41, !38} -!42 = distinct !{!42, !38} -!43 = distinct !{!43, !38} -!44 = !{!41} -!45 = !{!42} -!46 = !{!43} -!47 = !{!40} -!48 = !{!41, !42, !43} -!49 = distinct !{!49, !33, !34} -!50 = distinct !{!50, !33, !34} -!51 = !{!52} -!52 = distinct !{!52, !53} -!53 = distinct !{!53, !"LVerDomain"} -!54 = !{!55, !56, !57, !58} -!55 = distinct !{!55, !53} -!56 = distinct !{!56, !53} -!57 = distinct !{!57, !53} -!58 = distinct !{!58, !53} -!59 = !{!56} -!60 = !{!57} -!61 = !{!58} -!62 = !{!55} -!63 = !{!56, !57, !58} -!64 = distinct !{!64, !33, !34} -!65 = distinct !{!65, !33, !34} diff --git a/pocl_irs/adi_kernel5.ll b/pocl_irs/adi_kernel5.ll deleted file mode 100644 index c8a2cc1..0000000 --- a/pocl_irs/adi_kernel5.ll +++ /dev/null @@ -1,478 +0,0 @@ -; ModuleID = './BF/DFFLECFOLOBPKCKMNEPCKIANKJKKLLHBOGBCO/adi_kernel5/256-1-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_adi_kernel5(float* nocapture readnone %0, float* nocapture readonly %1, float* nocapture %2, i32 %3, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %4, i64 %5, i64 %6, i64 %7) local_unnamed_addr #0 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { -vector.scevcheck: - %mul.i.i = shl i64 %5, 8 - %sub.i = add nsw i32 %3, -1 - %mul.i = mul nsw i32 %sub.i, %3 - %8 = add i32 %3, -1 - %9 = mul i32 %8, %3 - %10 = trunc i64 %5 to i32 - %11 = shl i32 %10, 8 - %12 = add i32 %9, %11 - %13 = icmp sgt i32 %12, 2147483392 - br i1 %13, label %pregion_for_entry.entry.i.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i - -vector.memcheck: ; preds = %vector.scevcheck - %14 = add i32 %3, -1 - %15 = mul i32 %14, %3 - %16 = trunc i64 %5 to i32 - %17 = shl i32 %16, 8 - %18 = add i32 %15, %17 - %19 = sext i32 %18 to i64 - %scevgep = getelementptr float, float* %2, i64 %19 - %20 = add nsw i64 %19, 256 - %scevgep2 = getelementptr float, float* %2, i64 %20 - %scevgep4 = getelementptr float, float* %1, i64 %19 - %scevgep6 = getelementptr float, float* %1, i64 %20 - %bound0 = icmp ult float* %scevgep, %scevgep6 - %bound1 = icmp ult float* %scevgep4, %scevgep2 - %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %pregion_for_entry.entry.i.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert8 = insertelement <8 x i32> undef, i32 %3, i32 0 - %broadcast.splat9 = shufflevector <8 x i32> %broadcast.splatinsert8, <8 x i32> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next.1, %vector.body ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next.1, %vector.body ] - %21 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %22 = trunc <8 x i64> %21 to <8 x i32> - %23 = icmp sgt <8 x i32> %broadcast.splat9, %22 - %24 = extractelement <8 x i32> %22, i32 0 - %25 = add nsw i32 %mul.i, %24 - %26 = sext i32 %25 to i64 - %27 = getelementptr inbounds float, float* %2, i64 %26 - %28 = bitcast float* %27 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %28, i32 4, <8 x i1> %23, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %29 = getelementptr inbounds float, float* %1, i64 %26 - %30 = bitcast float* %29 to <8 x float>* - %wide.masked.load10 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %30, i32 4, <8 x i1> %23, <8 x float> undef), !tbaa !12, !alias.scope !19 - %31 = fdiv <8 x float> %wide.masked.load, %wide.masked.load10, !fpmath !21 - %32 = bitcast float* %27 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %31, <8 x float>* %32, i32 4, <8 x i1> %23), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !22 - %vec.ind.next = add <8 x i64> %vec.ind, - %33 = add nuw nsw <8 x i64> %vec.ind.next, %broadcast.splat - %34 = trunc <8 x i64> %33 to <8 x i32> - %35 = icmp sgt <8 x i32> %broadcast.splat9, %34 - %36 = extractelement <8 x i32> %34, i32 0 - %37 = add nsw i32 %mul.i, %36 - %38 = sext i32 %37 to i64 - %39 = getelementptr inbounds float, float* %2, i64 %38 - %40 = bitcast float* %39 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %40, i32 4, <8 x i1> %35, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %41 = getelementptr inbounds float, float* %1, i64 %38 - %42 = bitcast float* %41 to <8 x float>* - %wide.masked.load10.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %42, i32 4, <8 x i1> %35, <8 x float> undef), !tbaa !12, !alias.scope !19 - %43 = fdiv <8 x float> %wide.masked.load.1, %wide.masked.load10.1, !fpmath !21 - %44 = bitcast float* %39 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %43, <8 x float>* %44, i32 4, <8 x i1> %35), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !22 - %index.next.1 = add nuw nsw i64 %index, 16 - %vec.ind.next.1 = add <8 x i64> %vec.ind, - %45 = icmp eq i64 %index.next.1, 256 - br i1 %45, label %adi_kernel5.exit.loopexit12, label %vector.body, !llvm.loop !24 - -pregion_for_entry.entry.i: ; preds = %if.end.r_exit.i.1, %pregion_for_entry.entry.i.preheader - %_local_id_x.0 = phi i64 [ 0, %pregion_for_entry.entry.i.preheader ], [ %51, %if.end.r_exit.i.1 ] - %add1.i.i = add nuw nsw i64 %_local_id_x.0, %mul.i.i - %conv.i = trunc i64 %add1.i.i to i32 - %cmp.i = icmp slt i32 %conv.i, %3 - br i1 %cmp.i, label %if.then.i, label %if.end.r_exit.i - -if.then.i: ; preds = %pregion_for_entry.entry.i - %add.i = add nsw i32 %mul.i, %conv.i - %idxprom.i = sext i32 %add.i to i64 - %arrayidx.i = getelementptr inbounds float, float* %2, i64 %idxprom.i - %46 = load float, float* %arrayidx.i, align 4, !tbaa !12 - %arrayidx6.i = getelementptr inbounds float, float* %1, i64 %idxprom.i - %47 = load float, float* %arrayidx6.i, align 4, !tbaa !12 - %div.i = fdiv float %46, %47, !fpmath !21 - store float %div.i, float* %arrayidx.i, align 4, !tbaa !12, !llvm.access.group !22 - br label %if.end.r_exit.i - -if.end.r_exit.i: ; preds = %if.then.i, %pregion_for_entry.entry.i - %48 = or i64 %_local_id_x.0, 1 - %add1.i.i.1 = add nuw nsw i64 %48, %mul.i.i - %conv.i.1 = trunc i64 %add1.i.i.1 to i32 - %cmp.i.1 = icmp slt i32 %conv.i.1, %3 - br i1 %cmp.i.1, label %if.then.i.1, label %if.end.r_exit.i.1 - -adi_kernel5.exit.loopexit: ; preds = %if.end.r_exit.i.1 - br label %adi_kernel5.exit - -adi_kernel5.exit.loopexit12: ; preds = %vector.body - br label %adi_kernel5.exit - -adi_kernel5.exit: ; preds = %adi_kernel5.exit.loopexit12, %adi_kernel5.exit.loopexit - ret void - -if.then.i.1: ; preds = %if.end.r_exit.i - %add.i.1 = add nsw i32 %mul.i, %conv.i.1 - %idxprom.i.1 = sext i32 %add.i.1 to i64 - %arrayidx.i.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.1 - %49 = load float, float* %arrayidx.i.1, align 4, !tbaa !12 - %arrayidx6.i.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.1 - %50 = load float, float* %arrayidx6.i.1, align 4, !tbaa !12 - %div.i.1 = fdiv float %49, %50, !fpmath !21 - store float %div.i.1, float* %arrayidx.i.1, align 4, !tbaa !12, !llvm.access.group !22 - br label %if.end.r_exit.i.1 - -if.end.r_exit.i.1: ; preds = %if.then.i.1, %if.end.r_exit.i - %51 = add nuw nsw i64 %_local_id_x.0, 2 - %exitcond.not.1 = icmp eq i64 %51, 256 - br i1 %exitcond.not.1, label %adi_kernel5.exit.loopexit, label %pregion_for_entry.entry.i, !llvm.loop !27 -} - -; Function Attrs: nofree norecurse nounwind -define void @_pocl_kernel_adi_kernel5_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { -vector.scevcheck: - %5 = getelementptr i8*, i8** %0, i64 1 - %6 = bitcast i8** %5 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 2 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 3 - %14 = bitcast i8** %13 to i32** - %15 = load i32*, i32** %14, align 8 - %16 = load i32, i32* %15, align 4 - %mul.i.i.i = shl i64 %2, 8 - %sub.i.i = add nsw i32 %16, -1 - %mul.i.i = mul nsw i32 %sub.i.i, %16 - %17 = add i32 %16, -1 - %18 = mul i32 %16, %17 - %19 = trunc i64 %2 to i32 - %20 = shl i32 %19, 8 - %21 = add i32 %18, %20 - %22 = icmp sgt i32 %21, 2147483392 - br i1 %22, label %pregion_for_entry.entry.i.i.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i - -vector.memcheck: ; preds = %vector.scevcheck - %23 = add i32 %16, -1 - %24 = mul i32 %16, %23 - %25 = trunc i64 %2 to i32 - %26 = shl i32 %25, 8 - %27 = add i32 %24, %26 - %28 = sext i32 %27 to i64 - %scevgep = getelementptr float, float* %12, i64 %28 - %29 = add nsw i64 %28, 256 - %scevgep2 = getelementptr float, float* %12, i64 %29 - %scevgep4 = getelementptr float, float* %8, i64 %28 - %scevgep6 = getelementptr float, float* %8, i64 %29 - %bound0 = icmp ult float* %scevgep, %scevgep6 - %bound1 = icmp ult float* %scevgep4, %scevgep2 - %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %pregion_for_entry.entry.i.i.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert8 = insertelement <8 x i32> undef, i32 %16, i32 0 - %broadcast.splat9 = shufflevector <8 x i32> %broadcast.splatinsert8, <8 x i32> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next.1, %vector.body ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next.1, %vector.body ] - %30 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %31 = trunc <8 x i64> %30 to <8 x i32> - %32 = icmp sgt <8 x i32> %broadcast.splat9, %31 - %33 = extractelement <8 x i32> %31, i32 0 - %34 = add nsw i32 %mul.i.i, %33 - %35 = sext i32 %34 to i64 - %36 = getelementptr inbounds float, float* %12, i64 %35 - %37 = bitcast float* %36 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %37, i32 4, <8 x i1> %32, <8 x float> undef), !tbaa !12, !alias.scope !28, !noalias !31 - %38 = getelementptr inbounds float, float* %8, i64 %35 - %39 = bitcast float* %38 to <8 x float>* - %wide.masked.load10 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %39, i32 4, <8 x i1> %32, <8 x float> undef), !tbaa !12, !alias.scope !31 - %40 = fdiv <8 x float> %wide.masked.load, %wide.masked.load10, !fpmath !21 - %41 = bitcast float* %36 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %40, <8 x float>* %41, i32 4, <8 x i1> %32), !tbaa !12, !alias.scope !28, !noalias !31, !llvm.access.group !22 - %vec.ind.next = add <8 x i64> %vec.ind, - %42 = add nuw nsw <8 x i64> %vec.ind.next, %broadcast.splat - %43 = trunc <8 x i64> %42 to <8 x i32> - %44 = icmp sgt <8 x i32> %broadcast.splat9, %43 - %45 = extractelement <8 x i32> %43, i32 0 - %46 = add nsw i32 %mul.i.i, %45 - %47 = sext i32 %46 to i64 - %48 = getelementptr inbounds float, float* %12, i64 %47 - %49 = bitcast float* %48 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %49, i32 4, <8 x i1> %44, <8 x float> undef), !tbaa !12, !alias.scope !28, !noalias !31 - %50 = getelementptr inbounds float, float* %8, i64 %47 - %51 = bitcast float* %50 to <8 x float>* - %wide.masked.load10.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %51, i32 4, <8 x i1> %44, <8 x float> undef), !tbaa !12, !alias.scope !31 - %52 = fdiv <8 x float> %wide.masked.load.1, %wide.masked.load10.1, !fpmath !21 - %53 = bitcast float* %48 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %52, <8 x float>* %53, i32 4, <8 x i1> %44), !tbaa !12, !alias.scope !28, !noalias !31, !llvm.access.group !22 - %index.next.1 = add nuw nsw i64 %index, 16 - %vec.ind.next.1 = add <8 x i64> %vec.ind, - %54 = icmp eq i64 %index.next.1, 256 - br i1 %54, label %_pocl_kernel_adi_kernel5.exit.loopexit12, label %vector.body, !llvm.loop !33 - -pregion_for_entry.entry.i.i: ; preds = %if.end.r_exit.i.i.1, %pregion_for_entry.entry.i.i.preheader - %_local_id_x.i.0 = phi i64 [ 0, %pregion_for_entry.entry.i.i.preheader ], [ %60, %if.end.r_exit.i.i.1 ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %conv.i.i = trunc i64 %add1.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %16, %conv.i.i - br i1 %cmp.i.i, label %if.then.i.i, label %if.end.r_exit.i.i - -if.then.i.i: ; preds = %pregion_for_entry.entry.i.i - %add.i.i = add nsw i32 %mul.i.i, %conv.i.i - %idxprom.i.i = sext i32 %add.i.i to i64 - %arrayidx.i.i = getelementptr inbounds float, float* %12, i64 %idxprom.i.i - %55 = load float, float* %arrayidx.i.i, align 4, !tbaa !12 - %arrayidx6.i.i = getelementptr inbounds float, float* %8, i64 %idxprom.i.i - %56 = load float, float* %arrayidx6.i.i, align 4, !tbaa !12 - %div.i.i = fdiv float %55, %56, !fpmath !21 - store float %div.i.i, float* %arrayidx.i.i, align 4, !tbaa !12, !llvm.access.group !22 - br label %if.end.r_exit.i.i - -if.end.r_exit.i.i: ; preds = %if.then.i.i, %pregion_for_entry.entry.i.i - %57 = or i64 %_local_id_x.i.0, 1 - %add1.i.i.i.1 = add nuw nsw i64 %57, %mul.i.i.i - %conv.i.i.1 = trunc i64 %add1.i.i.i.1 to i32 - %cmp.i.i.1 = icmp sgt i32 %16, %conv.i.i.1 - br i1 %cmp.i.i.1, label %if.then.i.i.1, label %if.end.r_exit.i.i.1 - -_pocl_kernel_adi_kernel5.exit.loopexit: ; preds = %if.end.r_exit.i.i.1 - br label %_pocl_kernel_adi_kernel5.exit - -_pocl_kernel_adi_kernel5.exit.loopexit12: ; preds = %vector.body - br label %_pocl_kernel_adi_kernel5.exit - -_pocl_kernel_adi_kernel5.exit: ; preds = %_pocl_kernel_adi_kernel5.exit.loopexit12, %_pocl_kernel_adi_kernel5.exit.loopexit - ret void - -if.then.i.i.1: ; preds = %if.end.r_exit.i.i - %add.i.i.1 = add nsw i32 %mul.i.i, %conv.i.i.1 - %idxprom.i.i.1 = sext i32 %add.i.i.1 to i64 - %arrayidx.i.i.1 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.1 - %58 = load float, float* %arrayidx.i.i.1, align 4, !tbaa !12 - %arrayidx6.i.i.1 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.1 - %59 = load float, float* %arrayidx6.i.i.1, align 4, !tbaa !12 - %div.i.i.1 = fdiv float %58, %59, !fpmath !21 - store float %div.i.i.1, float* %arrayidx.i.i.1, align 4, !tbaa !12, !llvm.access.group !22 - br label %if.end.r_exit.i.i.1 - -if.end.r_exit.i.i.1: ; preds = %if.then.i.i.1, %if.end.r_exit.i.i - %60 = add nuw nsw i64 %_local_id_x.i.0, 2 - %exitcond.not.1 = icmp eq i64 %60, 256 - br i1 %exitcond.not.1, label %_pocl_kernel_adi_kernel5.exit.loopexit, label %pregion_for_entry.entry.i.i, !llvm.loop !34 -} - -; Function Attrs: nofree norecurse nounwind -define void @_pocl_kernel_adi_kernel5_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { -vector.scevcheck: - %5 = getelementptr i8*, i8** %0, i64 1 - %6 = bitcast i8** %5 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 2 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 3 - %12 = bitcast i8** %11 to i32** - %13 = load i32*, i32** %12, align 8 - %14 = load i32, i32* %13, align 4 - %mul.i.i.i = shl i64 %2, 8 - %sub.i.i = add nsw i32 %14, -1 - %mul.i.i = mul nsw i32 %sub.i.i, %14 - %15 = add i32 %14, -1 - %16 = mul i32 %14, %15 - %17 = trunc i64 %2 to i32 - %18 = shl i32 %17, 8 - %19 = add i32 %16, %18 - %20 = icmp sgt i32 %19, 2147483392 - br i1 %20, label %pregion_for_entry.entry.i.i.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i - -vector.memcheck: ; preds = %vector.scevcheck - %21 = add i32 %14, -1 - %22 = mul i32 %14, %21 - %23 = trunc i64 %2 to i32 - %24 = shl i32 %23, 8 - %25 = add i32 %22, %24 - %26 = sext i32 %25 to i64 - %scevgep = getelementptr float, float* %10, i64 %26 - %27 = add nsw i64 %26, 256 - %scevgep2 = getelementptr float, float* %10, i64 %27 - %scevgep4 = getelementptr float, float* %7, i64 %26 - %scevgep6 = getelementptr float, float* %7, i64 %27 - %bound0 = icmp ult float* %scevgep, %scevgep6 - %bound1 = icmp ult float* %scevgep4, %scevgep2 - %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %pregion_for_entry.entry.i.i.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert8 = insertelement <8 x i32> undef, i32 %14, i32 0 - %broadcast.splat9 = shufflevector <8 x i32> %broadcast.splatinsert8, <8 x i32> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next.1, %vector.body ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next.1, %vector.body ] - %28 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %29 = trunc <8 x i64> %28 to <8 x i32> - %30 = icmp sgt <8 x i32> %broadcast.splat9, %29 - %31 = extractelement <8 x i32> %29, i32 0 - %32 = add nsw i32 %mul.i.i, %31 - %33 = sext i32 %32 to i64 - %34 = getelementptr inbounds float, float* %10, i64 %33 - %35 = bitcast float* %34 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %35, i32 4, <8 x i1> %30, <8 x float> undef), !tbaa !12, !alias.scope !35, !noalias !38 - %36 = getelementptr inbounds float, float* %7, i64 %33 - %37 = bitcast float* %36 to <8 x float>* - %wide.masked.load10 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %37, i32 4, <8 x i1> %30, <8 x float> undef), !tbaa !12, !alias.scope !38 - %38 = fdiv <8 x float> %wide.masked.load, %wide.masked.load10, !fpmath !21 - %39 = bitcast float* %34 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %38, <8 x float>* %39, i32 4, <8 x i1> %30), !tbaa !12, !alias.scope !35, !noalias !38, !llvm.access.group !22 - %vec.ind.next = add <8 x i64> %vec.ind, - %40 = add nuw nsw <8 x i64> %vec.ind.next, %broadcast.splat - %41 = trunc <8 x i64> %40 to <8 x i32> - %42 = icmp sgt <8 x i32> %broadcast.splat9, %41 - %43 = extractelement <8 x i32> %41, i32 0 - %44 = add nsw i32 %mul.i.i, %43 - %45 = sext i32 %44 to i64 - %46 = getelementptr inbounds float, float* %10, i64 %45 - %47 = bitcast float* %46 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %47, i32 4, <8 x i1> %42, <8 x float> undef), !tbaa !12, !alias.scope !35, !noalias !38 - %48 = getelementptr inbounds float, float* %7, i64 %45 - %49 = bitcast float* %48 to <8 x float>* - %wide.masked.load10.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %49, i32 4, <8 x i1> %42, <8 x float> undef), !tbaa !12, !alias.scope !38 - %50 = fdiv <8 x float> %wide.masked.load.1, %wide.masked.load10.1, !fpmath !21 - %51 = bitcast float* %46 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %50, <8 x float>* %51, i32 4, <8 x i1> %42), !tbaa !12, !alias.scope !35, !noalias !38, !llvm.access.group !22 - %index.next.1 = add nuw nsw i64 %index, 16 - %vec.ind.next.1 = add <8 x i64> %vec.ind, - %52 = icmp eq i64 %index.next.1, 256 - br i1 %52, label %_pocl_kernel_adi_kernel5.exit.loopexit12, label %vector.body, !llvm.loop !40 - -pregion_for_entry.entry.i.i: ; preds = %if.end.r_exit.i.i.1, %pregion_for_entry.entry.i.i.preheader - %_local_id_x.i.0 = phi i64 [ 0, %pregion_for_entry.entry.i.i.preheader ], [ %58, %if.end.r_exit.i.i.1 ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %conv.i.i = trunc i64 %add1.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %14, %conv.i.i - br i1 %cmp.i.i, label %if.then.i.i, label %if.end.r_exit.i.i - -if.then.i.i: ; preds = %pregion_for_entry.entry.i.i - %add.i.i = add nsw i32 %mul.i.i, %conv.i.i - %idxprom.i.i = sext i32 %add.i.i to i64 - %arrayidx.i.i = getelementptr inbounds float, float* %10, i64 %idxprom.i.i - %53 = load float, float* %arrayidx.i.i, align 4, !tbaa !12 - %arrayidx6.i.i = getelementptr inbounds float, float* %7, i64 %idxprom.i.i - %54 = load float, float* %arrayidx6.i.i, align 4, !tbaa !12 - %div.i.i = fdiv float %53, %54, !fpmath !21 - store float %div.i.i, float* %arrayidx.i.i, align 4, !tbaa !12, !llvm.access.group !22 - br label %if.end.r_exit.i.i - -if.end.r_exit.i.i: ; preds = %if.then.i.i, %pregion_for_entry.entry.i.i - %55 = or i64 %_local_id_x.i.0, 1 - %add1.i.i.i.1 = add nuw nsw i64 %55, %mul.i.i.i - %conv.i.i.1 = trunc i64 %add1.i.i.i.1 to i32 - %cmp.i.i.1 = icmp sgt i32 %14, %conv.i.i.1 - br i1 %cmp.i.i.1, label %if.then.i.i.1, label %if.end.r_exit.i.i.1 - -_pocl_kernel_adi_kernel5.exit.loopexit: ; preds = %if.end.r_exit.i.i.1 - br label %_pocl_kernel_adi_kernel5.exit - -_pocl_kernel_adi_kernel5.exit.loopexit12: ; preds = %vector.body - br label %_pocl_kernel_adi_kernel5.exit - -_pocl_kernel_adi_kernel5.exit: ; preds = %_pocl_kernel_adi_kernel5.exit.loopexit12, %_pocl_kernel_adi_kernel5.exit.loopexit - ret void - -if.then.i.i.1: ; preds = %if.end.r_exit.i.i - %add.i.i.1 = add nsw i32 %mul.i.i, %conv.i.i.1 - %idxprom.i.i.1 = sext i32 %add.i.i.1 to i64 - %arrayidx.i.i.1 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.1 - %56 = load float, float* %arrayidx.i.i.1, align 4, !tbaa !12 - %arrayidx6.i.i.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.1 - %57 = load float, float* %arrayidx6.i.i.1, align 4, !tbaa !12 - %div.i.i.1 = fdiv float %56, %57, !fpmath !21 - store float %div.i.i.1, float* %arrayidx.i.i.1, align 4, !tbaa !12, !llvm.access.group !22 - br label %if.end.r_exit.i.i.1 - -if.end.r_exit.i.i.1: ; preds = %if.then.i.i.1, %if.end.r_exit.i.i - %58 = add nuw nsw i64 %_local_id_x.i.0, 2 - %exitcond.not.1 = icmp eq i64 %58, 256 - br i1 %exitcond.not.1, label %_pocl_kernel_adi_kernel5.exit.loopexit, label %pregion_for_entry.entry.i.i, !llvm.loop !41 -} - -; Function Attrs: argmemonly nounwind readonly willreturn -declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32 immarg, <8 x i1>, <8 x float>) #2 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>) #3 - -attributes #0 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nofree norecurse nounwind } -attributes #2 = { argmemonly nounwind readonly willreturn } -attributes #3 = { argmemonly nounwind willreturn } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0} -!6 = !{!"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"int"} -!9 = !{!"", !"", !"", !""} -!10 = !{!"A", !"B", !"X", !"n"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{!17, !18} -!18 = distinct !{!18, !"LVerDomain"} -!19 = !{!20} -!20 = distinct !{!20, !18} -!21 = !{float 2.500000e+00} -!22 = !{!23} -!23 = distinct !{} -!24 = distinct !{!24, !25, !26} -!25 = !{!"llvm.loop.parallel_accesses", !23} -!26 = !{!"llvm.loop.isvectorized", i32 1} -!27 = distinct !{!27, !25, !26} -!28 = !{!29} -!29 = distinct !{!29, !30} -!30 = distinct !{!30, !"LVerDomain"} -!31 = !{!32} -!32 = distinct !{!32, !30} -!33 = distinct !{!33, !25, !26} -!34 = distinct !{!34, !25, !26} -!35 = !{!36} -!36 = distinct !{!36, !37} -!37 = distinct !{!37, !"LVerDomain"} -!38 = !{!39} -!39 = distinct !{!39, !37} -!40 = distinct !{!40, !25, !26} -!41 = distinct !{!41, !25, !26} diff --git a/pocl_irs/adi_kernel6.ll b/pocl_irs/adi_kernel6.ll deleted file mode 100644 index 17153ce..0000000 --- a/pocl_irs/adi_kernel6.ll +++ /dev/null @@ -1,688 +0,0 @@ -; ModuleID = './BF/DFFLECFOLOBPKCKMNEPCKIANKJKKLLHBOGBCO/adi_kernel6/256-1-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_adi_kernel6(float* nocapture readonly %0, float* nocapture readonly %1, float* nocapture %2, i32 %3, i32 %4, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %5, i64 %6, i64 %7, i64 %8) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { -vector.scevcheck: - %mul.i.i = shl i64 %6, 8 - %sub.i = sub i32 -2, %3 - %sub2.i = add i32 %sub.i, %4 - %mul.i = mul nsw i32 %sub2.i, %4 - %sub3.i = sub i32 -3, %3 - %sub4.i = add i32 %sub3.i, %4 - %mul5.i = mul nsw i32 %sub4.i, %4 - %9 = mul i32 %sub2.i, %4 - %10 = trunc i64 %6 to i32 - %11 = shl i32 %10, 8 - %12 = add i32 %9, %11 - %13 = icmp sgt i32 %12, 2147483392 - %14 = mul i32 %sub4.i, %4 - %15 = add i32 %14, %11 - %16 = icmp sgt i32 %15, 2147483392 - %17 = or i1 %13, %16 - br i1 %17, label %pregion_for_entry.entry.i.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i - -vector.memcheck: ; preds = %vector.scevcheck - %18 = mul i32 %sub2.i, %4 - %19 = trunc i64 %6 to i32 - %20 = shl i32 %19, 8 - %21 = add i32 %18, %20 - %22 = sext i32 %21 to i64 - %scevgep = getelementptr float, float* %2, i64 %22 - %23 = add nsw i64 %22, 256 - %scevgep5 = getelementptr float, float* %2, i64 %23 - %24 = mul i32 %sub4.i, %4 - %25 = add i32 %24, %20 - %26 = sext i32 %25 to i64 - %scevgep7 = getelementptr float, float* %2, i64 %26 - %27 = add nsw i64 %26, 256 - %scevgep9 = getelementptr float, float* %2, i64 %27 - %scevgep11 = getelementptr float, float* %0, i64 %26 - %scevgep13 = getelementptr float, float* %0, i64 %27 - %scevgep15 = getelementptr float, float* %1, i64 %22 - %scevgep17 = getelementptr float, float* %1, i64 %23 - %bound0 = icmp ult float* %scevgep, %scevgep9 - %bound1 = icmp ult float* %scevgep7, %scevgep5 - %found.conflict = and i1 %bound0, %bound1 - %bound019 = icmp ult float* %scevgep, %scevgep13 - %bound120 = icmp ult float* %scevgep11, %scevgep5 - %found.conflict21 = and i1 %bound019, %bound120 - %conflict.rdx = or i1 %found.conflict, %found.conflict21 - %bound022 = icmp ult float* %scevgep, %scevgep17 - %bound123 = icmp ult float* %scevgep15, %scevgep5 - %found.conflict24 = and i1 %bound022, %bound123 - %conflict.rdx25 = or i1 %conflict.rdx, %found.conflict24 - br i1 %conflict.rdx25, label %pregion_for_entry.entry.i.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert26 = insertelement <8 x i32> undef, i32 %4, i32 0 - %broadcast.splat27 = shufflevector <8 x i32> %broadcast.splatinsert26, <8 x i32> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next.1, %vector.body ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next.1, %vector.body ] - %28 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %29 = trunc <8 x i64> %28 to <8 x i32> - %30 = icmp sgt <8 x i32> %broadcast.splat27, %29 - %31 = extractelement <8 x i32> %29, i32 0 - %32 = add nsw i32 %mul.i, %31 - %33 = sext i32 %32 to i64 - %34 = getelementptr inbounds float, float* %2, i64 %33 - %35 = bitcast float* %34 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %35, i32 4, <8 x i1> %30, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %36 = add nsw i32 %mul5.i, %31 - %37 = sext i32 %36 to i64 - %38 = getelementptr inbounds float, float* %2, i64 %37 - %39 = bitcast float* %38 to <8 x float>* - %wide.masked.load28 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %39, i32 4, <8 x i1> %30, <8 x float> undef), !tbaa !12, !alias.scope !23 - %40 = getelementptr inbounds float, float* %0, i64 %37 - %41 = bitcast float* %40 to <8 x float>* - %wide.masked.load29 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %41, i32 4, <8 x i1> %30, <8 x float> undef), !tbaa !12, !alias.scope !24 - %42 = fneg <8 x float> %wide.masked.load28 - %43 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %42, <8 x float> %wide.masked.load29, <8 x float> %wide.masked.load) - %44 = getelementptr inbounds float, float* %1, i64 %33 - %45 = bitcast float* %44 to <8 x float>* - %wide.masked.load30 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %45, i32 4, <8 x i1> %30, <8 x float> undef), !tbaa !12, !alias.scope !25 - %46 = fdiv <8 x float> %43, %wide.masked.load30, !fpmath !26 - %47 = bitcast float* %34 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %46, <8 x float>* %47, i32 4, <8 x i1> %30), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !27 - %vec.ind.next = add <8 x i64> %vec.ind, - %48 = add nuw nsw <8 x i64> %vec.ind.next, %broadcast.splat - %49 = trunc <8 x i64> %48 to <8 x i32> - %50 = icmp sgt <8 x i32> %broadcast.splat27, %49 - %51 = extractelement <8 x i32> %49, i32 0 - %52 = add nsw i32 %mul.i, %51 - %53 = sext i32 %52 to i64 - %54 = getelementptr inbounds float, float* %2, i64 %53 - %55 = bitcast float* %54 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %55, i32 4, <8 x i1> %50, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %56 = add nsw i32 %mul5.i, %51 - %57 = sext i32 %56 to i64 - %58 = getelementptr inbounds float, float* %2, i64 %57 - %59 = bitcast float* %58 to <8 x float>* - %wide.masked.load28.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %59, i32 4, <8 x i1> %50, <8 x float> undef), !tbaa !12, !alias.scope !23 - %60 = getelementptr inbounds float, float* %0, i64 %57 - %61 = bitcast float* %60 to <8 x float>* - %wide.masked.load29.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %61, i32 4, <8 x i1> %50, <8 x float> undef), !tbaa !12, !alias.scope !24 - %62 = fneg <8 x float> %wide.masked.load28.1 - %63 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %62, <8 x float> %wide.masked.load29.1, <8 x float> %wide.masked.load.1) - %64 = getelementptr inbounds float, float* %1, i64 %53 - %65 = bitcast float* %64 to <8 x float>* - %wide.masked.load30.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %65, i32 4, <8 x i1> %50, <8 x float> undef), !tbaa !12, !alias.scope !25 - %66 = fdiv <8 x float> %63, %wide.masked.load30.1, !fpmath !26 - %67 = bitcast float* %54 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %66, <8 x float>* %67, i32 4, <8 x i1> %50), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !27 - %index.next.1 = add nuw nsw i64 %index, 16 - %vec.ind.next.1 = add <8 x i64> %vec.ind, - %68 = icmp eq i64 %index.next.1, 256 - br i1 %68, label %adi_kernel6.exit.loopexit32, label %vector.body, !llvm.loop !29 - -pregion_for_entry.entry.i: ; preds = %if.end.r_exit.i.1, %pregion_for_entry.entry.i.preheader - %_local_id_x.0 = phi i64 [ 0, %pregion_for_entry.entry.i.preheader ], [ %80, %if.end.r_exit.i.1 ] - %add1.i.i = add nuw nsw i64 %_local_id_x.0, %mul.i.i - %conv.i = trunc i64 %add1.i.i to i32 - %cmp.i = icmp slt i32 %conv.i, %4 - br i1 %cmp.i, label %if.then.i, label %if.end.r_exit.i - -if.then.i: ; preds = %pregion_for_entry.entry.i - %add.i = add nsw i32 %mul.i, %conv.i - %idxprom.i = sext i32 %add.i to i64 - %arrayidx.i = getelementptr inbounds float, float* %2, i64 %idxprom.i - %69 = load float, float* %arrayidx.i, align 4, !tbaa !12 - %add6.i = add nsw i32 %mul5.i, %conv.i - %idxprom7.i = sext i32 %add6.i to i64 - %arrayidx8.i = getelementptr inbounds float, float* %2, i64 %idxprom7.i - %70 = load float, float* %arrayidx8.i, align 4, !tbaa !12 - %arrayidx14.i = getelementptr inbounds float, float* %0, i64 %idxprom7.i - %71 = load float, float* %arrayidx14.i, align 4, !tbaa !12 - %neg.i = fneg float %70 - %72 = tail call float @llvm.fmuladd.f32(float %neg.i, float %71, float %69) #5 - %arrayidx21.i = getelementptr inbounds float, float* %1, i64 %idxprom.i - %73 = load float, float* %arrayidx21.i, align 4, !tbaa !12 - %div.i = fdiv float %72, %73, !fpmath !26 - store float %div.i, float* %arrayidx.i, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i - -if.end.r_exit.i: ; preds = %if.then.i, %pregion_for_entry.entry.i - %74 = or i64 %_local_id_x.0, 1 - %add1.i.i.1 = add nuw nsw i64 %74, %mul.i.i - %conv.i.1 = trunc i64 %add1.i.i.1 to i32 - %cmp.i.1 = icmp slt i32 %conv.i.1, %4 - br i1 %cmp.i.1, label %if.then.i.1, label %if.end.r_exit.i.1 - -adi_kernel6.exit.loopexit: ; preds = %if.end.r_exit.i.1 - br label %adi_kernel6.exit - -adi_kernel6.exit.loopexit32: ; preds = %vector.body - br label %adi_kernel6.exit - -adi_kernel6.exit: ; preds = %adi_kernel6.exit.loopexit32, %adi_kernel6.exit.loopexit - ret void - -if.then.i.1: ; preds = %if.end.r_exit.i - %add.i.1 = add nsw i32 %mul.i, %conv.i.1 - %idxprom.i.1 = sext i32 %add.i.1 to i64 - %arrayidx.i.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.1 - %75 = load float, float* %arrayidx.i.1, align 4, !tbaa !12 - %add6.i.1 = add nsw i32 %mul5.i, %conv.i.1 - %idxprom7.i.1 = sext i32 %add6.i.1 to i64 - %arrayidx8.i.1 = getelementptr inbounds float, float* %2, i64 %idxprom7.i.1 - %76 = load float, float* %arrayidx8.i.1, align 4, !tbaa !12 - %arrayidx14.i.1 = getelementptr inbounds float, float* %0, i64 %idxprom7.i.1 - %77 = load float, float* %arrayidx14.i.1, align 4, !tbaa !12 - %neg.i.1 = fneg float %76 - %78 = tail call float @llvm.fmuladd.f32(float %neg.i.1, float %77, float %75) #5 - %arrayidx21.i.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.1 - %79 = load float, float* %arrayidx21.i.1, align 4, !tbaa !12 - %div.i.1 = fdiv float %78, %79, !fpmath !26 - store float %div.i.1, float* %arrayidx.i.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.1 - -if.end.r_exit.i.1: ; preds = %if.then.i.1, %if.end.r_exit.i - %80 = add nuw nsw i64 %_local_id_x.0, 2 - %exitcond.not.1 = icmp eq i64 %80, 256 - br i1 %exitcond.not.1, label %adi_kernel6.exit.loopexit, label %pregion_for_entry.entry.i, !llvm.loop !32 -} - -; Function Attrs: nofree nounwind -define void @_pocl_kernel_adi_kernel6_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { -vector.scevcheck: - %5 = bitcast i8** %0 to float*** - %6 = load float**, float*** %5, align 8 - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float*** - %10 = load float**, float*** %9, align 8 - %11 = load float*, float** %10, align 8 - %12 = getelementptr i8*, i8** %0, i64 2 - %13 = bitcast i8** %12 to float*** - %14 = load float**, float*** %13, align 8 - %15 = load float*, float** %14, align 8 - %16 = getelementptr i8*, i8** %0, i64 3 - %17 = bitcast i8** %16 to i32** - %18 = load i32*, i32** %17, align 8 - %19 = load i32, i32* %18, align 4 - %20 = getelementptr i8*, i8** %0, i64 4 - %21 = bitcast i8** %20 to i32** - %22 = load i32*, i32** %21, align 8 - %23 = load i32, i32* %22, align 4 - %mul.i.i.i = shl i64 %2, 8 - %sub.i.i = sub i32 -2, %19 - %sub2.i.i = add i32 %sub.i.i, %23 - %mul.i.i = mul nsw i32 %sub2.i.i, %23 - %sub3.i.i = sub i32 -3, %19 - %sub4.i.i = add i32 %sub3.i.i, %23 - %mul5.i.i = mul nsw i32 %sub4.i.i, %23 - %24 = mul i32 %23, %sub2.i.i - %25 = trunc i64 %2 to i32 - %26 = shl i32 %25, 8 - %27 = add i32 %24, %26 - %28 = icmp sgt i32 %27, 2147483392 - %29 = mul i32 %23, %sub4.i.i - %30 = add i32 %29, %26 - %31 = icmp sgt i32 %30, 2147483392 - %32 = or i1 %28, %31 - br i1 %32, label %pregion_for_entry.entry.i.i.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i - -vector.memcheck: ; preds = %vector.scevcheck - %33 = mul i32 %23, %sub2.i.i - %34 = trunc i64 %2 to i32 - %35 = shl i32 %34, 8 - %36 = add i32 %33, %35 - %37 = sext i32 %36 to i64 - %scevgep = getelementptr float, float* %15, i64 %37 - %38 = add nsw i64 %37, 256 - %scevgep5 = getelementptr float, float* %15, i64 %38 - %39 = mul i32 %23, %sub4.i.i - %40 = add i32 %39, %35 - %41 = sext i32 %40 to i64 - %scevgep7 = getelementptr float, float* %15, i64 %41 - %42 = add nsw i64 %41, 256 - %scevgep9 = getelementptr float, float* %15, i64 %42 - %scevgep11 = getelementptr float, float* %7, i64 %41 - %scevgep13 = getelementptr float, float* %7, i64 %42 - %scevgep15 = getelementptr float, float* %11, i64 %37 - %scevgep17 = getelementptr float, float* %11, i64 %38 - %bound0 = icmp ult float* %scevgep, %scevgep9 - %bound1 = icmp ult float* %scevgep7, %scevgep5 - %found.conflict = and i1 %bound0, %bound1 - %bound019 = icmp ult float* %scevgep, %scevgep13 - %bound120 = icmp ult float* %scevgep11, %scevgep5 - %found.conflict21 = and i1 %bound019, %bound120 - %conflict.rdx = or i1 %found.conflict, %found.conflict21 - %bound022 = icmp ult float* %scevgep, %scevgep17 - %bound123 = icmp ult float* %scevgep15, %scevgep5 - %found.conflict24 = and i1 %bound022, %bound123 - %conflict.rdx25 = or i1 %conflict.rdx, %found.conflict24 - br i1 %conflict.rdx25, label %pregion_for_entry.entry.i.i.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert26 = insertelement <8 x i32> undef, i32 %23, i32 0 - %broadcast.splat27 = shufflevector <8 x i32> %broadcast.splatinsert26, <8 x i32> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next.1, %vector.body ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next.1, %vector.body ] - %43 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %44 = trunc <8 x i64> %43 to <8 x i32> - %45 = icmp sgt <8 x i32> %broadcast.splat27, %44 - %46 = extractelement <8 x i32> %44, i32 0 - %47 = add nsw i32 %mul.i.i, %46 - %48 = sext i32 %47 to i64 - %49 = getelementptr inbounds float, float* %15, i64 %48 - %50 = bitcast float* %49 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %50, i32 4, <8 x i1> %45, <8 x float> undef), !tbaa !12, !alias.scope !33, !noalias !36 - %51 = add nsw i32 %mul5.i.i, %46 - %52 = sext i32 %51 to i64 - %53 = getelementptr inbounds float, float* %15, i64 %52 - %54 = bitcast float* %53 to <8 x float>* - %wide.masked.load28 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %54, i32 4, <8 x i1> %45, <8 x float> undef), !tbaa !12, !alias.scope !40 - %55 = getelementptr inbounds float, float* %7, i64 %52 - %56 = bitcast float* %55 to <8 x float>* - %wide.masked.load29 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %56, i32 4, <8 x i1> %45, <8 x float> undef), !tbaa !12, !alias.scope !41 - %57 = fneg <8 x float> %wide.masked.load28 - %58 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %57, <8 x float> %wide.masked.load29, <8 x float> %wide.masked.load) - %59 = getelementptr inbounds float, float* %11, i64 %48 - %60 = bitcast float* %59 to <8 x float>* - %wide.masked.load30 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %60, i32 4, <8 x i1> %45, <8 x float> undef), !tbaa !12, !alias.scope !42 - %61 = fdiv <8 x float> %58, %wide.masked.load30, !fpmath !26 - %62 = bitcast float* %49 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %61, <8 x float>* %62, i32 4, <8 x i1> %45), !tbaa !12, !alias.scope !33, !noalias !36, !llvm.access.group !27 - %vec.ind.next = add <8 x i64> %vec.ind, - %63 = add nuw nsw <8 x i64> %vec.ind.next, %broadcast.splat - %64 = trunc <8 x i64> %63 to <8 x i32> - %65 = icmp sgt <8 x i32> %broadcast.splat27, %64 - %66 = extractelement <8 x i32> %64, i32 0 - %67 = add nsw i32 %mul.i.i, %66 - %68 = sext i32 %67 to i64 - %69 = getelementptr inbounds float, float* %15, i64 %68 - %70 = bitcast float* %69 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %70, i32 4, <8 x i1> %65, <8 x float> undef), !tbaa !12, !alias.scope !33, !noalias !36 - %71 = add nsw i32 %mul5.i.i, %66 - %72 = sext i32 %71 to i64 - %73 = getelementptr inbounds float, float* %15, i64 %72 - %74 = bitcast float* %73 to <8 x float>* - %wide.masked.load28.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %74, i32 4, <8 x i1> %65, <8 x float> undef), !tbaa !12, !alias.scope !40 - %75 = getelementptr inbounds float, float* %7, i64 %72 - %76 = bitcast float* %75 to <8 x float>* - %wide.masked.load29.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %76, i32 4, <8 x i1> %65, <8 x float> undef), !tbaa !12, !alias.scope !41 - %77 = fneg <8 x float> %wide.masked.load28.1 - %78 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %77, <8 x float> %wide.masked.load29.1, <8 x float> %wide.masked.load.1) - %79 = getelementptr inbounds float, float* %11, i64 %68 - %80 = bitcast float* %79 to <8 x float>* - %wide.masked.load30.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %80, i32 4, <8 x i1> %65, <8 x float> undef), !tbaa !12, !alias.scope !42 - %81 = fdiv <8 x float> %78, %wide.masked.load30.1, !fpmath !26 - %82 = bitcast float* %69 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %81, <8 x float>* %82, i32 4, <8 x i1> %65), !tbaa !12, !alias.scope !33, !noalias !36, !llvm.access.group !27 - %index.next.1 = add nuw nsw i64 %index, 16 - %vec.ind.next.1 = add <8 x i64> %vec.ind, - %83 = icmp eq i64 %index.next.1, 256 - br i1 %83, label %_pocl_kernel_adi_kernel6.exit.loopexit32, label %vector.body, !llvm.loop !43 - -pregion_for_entry.entry.i.i: ; preds = %if.end.r_exit.i.i.1, %pregion_for_entry.entry.i.i.preheader - %_local_id_x.i.0 = phi i64 [ 0, %pregion_for_entry.entry.i.i.preheader ], [ %95, %if.end.r_exit.i.i.1 ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %conv.i.i = trunc i64 %add1.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %23, %conv.i.i - br i1 %cmp.i.i, label %if.then.i.i, label %if.end.r_exit.i.i - -if.then.i.i: ; preds = %pregion_for_entry.entry.i.i - %add.i.i = add nsw i32 %mul.i.i, %conv.i.i - %idxprom.i.i = sext i32 %add.i.i to i64 - %arrayidx.i.i = getelementptr inbounds float, float* %15, i64 %idxprom.i.i - %84 = load float, float* %arrayidx.i.i, align 4, !tbaa !12 - %add6.i.i = add nsw i32 %mul5.i.i, %conv.i.i - %idxprom7.i.i = sext i32 %add6.i.i to i64 - %arrayidx8.i.i = getelementptr inbounds float, float* %15, i64 %idxprom7.i.i - %85 = load float, float* %arrayidx8.i.i, align 4, !tbaa !12 - %arrayidx14.i.i = getelementptr inbounds float, float* %7, i64 %idxprom7.i.i - %86 = load float, float* %arrayidx14.i.i, align 4, !tbaa !12 - %neg.i.i = fneg float %85 - %87 = tail call float @llvm.fmuladd.f32(float %neg.i.i, float %86, float %84) #5 - %arrayidx21.i.i = getelementptr inbounds float, float* %11, i64 %idxprom.i.i - %88 = load float, float* %arrayidx21.i.i, align 4, !tbaa !12 - %div.i.i = fdiv float %87, %88, !fpmath !26 - store float %div.i.i, float* %arrayidx.i.i, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i - -if.end.r_exit.i.i: ; preds = %if.then.i.i, %pregion_for_entry.entry.i.i - %89 = or i64 %_local_id_x.i.0, 1 - %add1.i.i.i.1 = add nuw nsw i64 %89, %mul.i.i.i - %conv.i.i.1 = trunc i64 %add1.i.i.i.1 to i32 - %cmp.i.i.1 = icmp sgt i32 %23, %conv.i.i.1 - br i1 %cmp.i.i.1, label %if.then.i.i.1, label %if.end.r_exit.i.i.1 - -_pocl_kernel_adi_kernel6.exit.loopexit: ; preds = %if.end.r_exit.i.i.1 - br label %_pocl_kernel_adi_kernel6.exit - -_pocl_kernel_adi_kernel6.exit.loopexit32: ; preds = %vector.body - br label %_pocl_kernel_adi_kernel6.exit - -_pocl_kernel_adi_kernel6.exit: ; preds = %_pocl_kernel_adi_kernel6.exit.loopexit32, %_pocl_kernel_adi_kernel6.exit.loopexit - ret void - -if.then.i.i.1: ; preds = %if.end.r_exit.i.i - %add.i.i.1 = add nsw i32 %mul.i.i, %conv.i.i.1 - %idxprom.i.i.1 = sext i32 %add.i.i.1 to i64 - %arrayidx.i.i.1 = getelementptr inbounds float, float* %15, i64 %idxprom.i.i.1 - %90 = load float, float* %arrayidx.i.i.1, align 4, !tbaa !12 - %add6.i.i.1 = add nsw i32 %mul5.i.i, %conv.i.i.1 - %idxprom7.i.i.1 = sext i32 %add6.i.i.1 to i64 - %arrayidx8.i.i.1 = getelementptr inbounds float, float* %15, i64 %idxprom7.i.i.1 - %91 = load float, float* %arrayidx8.i.i.1, align 4, !tbaa !12 - %arrayidx14.i.i.1 = getelementptr inbounds float, float* %7, i64 %idxprom7.i.i.1 - %92 = load float, float* %arrayidx14.i.i.1, align 4, !tbaa !12 - %neg.i.i.1 = fneg float %91 - %93 = tail call float @llvm.fmuladd.f32(float %neg.i.i.1, float %92, float %90) #5 - %arrayidx21.i.i.1 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.1 - %94 = load float, float* %arrayidx21.i.i.1, align 4, !tbaa !12 - %div.i.i.1 = fdiv float %93, %94, !fpmath !26 - store float %div.i.i.1, float* %arrayidx.i.i.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.1 - -if.end.r_exit.i.i.1: ; preds = %if.then.i.i.1, %if.end.r_exit.i.i - %95 = add nuw nsw i64 %_local_id_x.i.0, 2 - %exitcond.not.1 = icmp eq i64 %95, 256 - br i1 %exitcond.not.1, label %_pocl_kernel_adi_kernel6.exit.loopexit, label %pregion_for_entry.entry.i.i, !llvm.loop !44 -} - -; Function Attrs: nofree nounwind -define void @_pocl_kernel_adi_kernel6_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { -vector.scevcheck: - %5 = bitcast i8** %0 to float** - %6 = load float*, float** %5, align 8 - %7 = getelementptr i8*, i8** %0, i64 1 - %8 = bitcast i8** %7 to float** - %9 = load float*, float** %8, align 8 - %10 = getelementptr i8*, i8** %0, i64 2 - %11 = bitcast i8** %10 to float** - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 3 - %14 = bitcast i8** %13 to i32** - %15 = load i32*, i32** %14, align 8 - %16 = load i32, i32* %15, align 4 - %17 = getelementptr i8*, i8** %0, i64 4 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %mul.i.i.i = shl i64 %2, 8 - %sub.i.i = sub i32 -2, %16 - %sub2.i.i = add i32 %sub.i.i, %20 - %mul.i.i = mul nsw i32 %sub2.i.i, %20 - %sub3.i.i = sub i32 -3, %16 - %sub4.i.i = add i32 %sub3.i.i, %20 - %mul5.i.i = mul nsw i32 %sub4.i.i, %20 - %21 = mul i32 %20, %sub2.i.i - %22 = trunc i64 %2 to i32 - %23 = shl i32 %22, 8 - %24 = add i32 %21, %23 - %25 = icmp sgt i32 %24, 2147483392 - %26 = mul i32 %20, %sub4.i.i - %27 = add i32 %26, %23 - %28 = icmp sgt i32 %27, 2147483392 - %29 = or i1 %25, %28 - br i1 %29, label %pregion_for_entry.entry.i.i.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i - -vector.memcheck: ; preds = %vector.scevcheck - %30 = mul i32 %20, %sub2.i.i - %31 = trunc i64 %2 to i32 - %32 = shl i32 %31, 8 - %33 = add i32 %30, %32 - %34 = sext i32 %33 to i64 - %scevgep = getelementptr float, float* %12, i64 %34 - %35 = add nsw i64 %34, 256 - %scevgep5 = getelementptr float, float* %12, i64 %35 - %36 = mul i32 %20, %sub4.i.i - %37 = add i32 %36, %32 - %38 = sext i32 %37 to i64 - %scevgep7 = getelementptr float, float* %12, i64 %38 - %39 = add nsw i64 %38, 256 - %scevgep9 = getelementptr float, float* %12, i64 %39 - %scevgep11 = getelementptr float, float* %6, i64 %38 - %scevgep13 = getelementptr float, float* %6, i64 %39 - %scevgep15 = getelementptr float, float* %9, i64 %34 - %scevgep17 = getelementptr float, float* %9, i64 %35 - %bound0 = icmp ult float* %scevgep, %scevgep9 - %bound1 = icmp ult float* %scevgep7, %scevgep5 - %found.conflict = and i1 %bound0, %bound1 - %bound019 = icmp ult float* %scevgep, %scevgep13 - %bound120 = icmp ult float* %scevgep11, %scevgep5 - %found.conflict21 = and i1 %bound019, %bound120 - %conflict.rdx = or i1 %found.conflict, %found.conflict21 - %bound022 = icmp ult float* %scevgep, %scevgep17 - %bound123 = icmp ult float* %scevgep15, %scevgep5 - %found.conflict24 = and i1 %bound022, %bound123 - %conflict.rdx25 = or i1 %conflict.rdx, %found.conflict24 - br i1 %conflict.rdx25, label %pregion_for_entry.entry.i.i.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert26 = insertelement <8 x i32> undef, i32 %20, i32 0 - %broadcast.splat27 = shufflevector <8 x i32> %broadcast.splatinsert26, <8 x i32> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next.1, %vector.body ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next.1, %vector.body ] - %40 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %41 = trunc <8 x i64> %40 to <8 x i32> - %42 = icmp sgt <8 x i32> %broadcast.splat27, %41 - %43 = extractelement <8 x i32> %41, i32 0 - %44 = add nsw i32 %mul.i.i, %43 - %45 = sext i32 %44 to i64 - %46 = getelementptr inbounds float, float* %12, i64 %45 - %47 = bitcast float* %46 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %47, i32 4, <8 x i1> %42, <8 x float> undef), !tbaa !12, !alias.scope !45, !noalias !48 - %48 = add nsw i32 %mul5.i.i, %43 - %49 = sext i32 %48 to i64 - %50 = getelementptr inbounds float, float* %12, i64 %49 - %51 = bitcast float* %50 to <8 x float>* - %wide.masked.load28 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %51, i32 4, <8 x i1> %42, <8 x float> undef), !tbaa !12, !alias.scope !52 - %52 = getelementptr inbounds float, float* %6, i64 %49 - %53 = bitcast float* %52 to <8 x float>* - %wide.masked.load29 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %53, i32 4, <8 x i1> %42, <8 x float> undef), !tbaa !12, !alias.scope !53 - %54 = fneg <8 x float> %wide.masked.load28 - %55 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %54, <8 x float> %wide.masked.load29, <8 x float> %wide.masked.load) - %56 = getelementptr inbounds float, float* %9, i64 %45 - %57 = bitcast float* %56 to <8 x float>* - %wide.masked.load30 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %57, i32 4, <8 x i1> %42, <8 x float> undef), !tbaa !12, !alias.scope !54 - %58 = fdiv <8 x float> %55, %wide.masked.load30, !fpmath !26 - %59 = bitcast float* %46 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %58, <8 x float>* %59, i32 4, <8 x i1> %42), !tbaa !12, !alias.scope !45, !noalias !48, !llvm.access.group !27 - %vec.ind.next = add <8 x i64> %vec.ind, - %60 = add nuw nsw <8 x i64> %vec.ind.next, %broadcast.splat - %61 = trunc <8 x i64> %60 to <8 x i32> - %62 = icmp sgt <8 x i32> %broadcast.splat27, %61 - %63 = extractelement <8 x i32> %61, i32 0 - %64 = add nsw i32 %mul.i.i, %63 - %65 = sext i32 %64 to i64 - %66 = getelementptr inbounds float, float* %12, i64 %65 - %67 = bitcast float* %66 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %67, i32 4, <8 x i1> %62, <8 x float> undef), !tbaa !12, !alias.scope !45, !noalias !48 - %68 = add nsw i32 %mul5.i.i, %63 - %69 = sext i32 %68 to i64 - %70 = getelementptr inbounds float, float* %12, i64 %69 - %71 = bitcast float* %70 to <8 x float>* - %wide.masked.load28.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %71, i32 4, <8 x i1> %62, <8 x float> undef), !tbaa !12, !alias.scope !52 - %72 = getelementptr inbounds float, float* %6, i64 %69 - %73 = bitcast float* %72 to <8 x float>* - %wide.masked.load29.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %73, i32 4, <8 x i1> %62, <8 x float> undef), !tbaa !12, !alias.scope !53 - %74 = fneg <8 x float> %wide.masked.load28.1 - %75 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %74, <8 x float> %wide.masked.load29.1, <8 x float> %wide.masked.load.1) - %76 = getelementptr inbounds float, float* %9, i64 %65 - %77 = bitcast float* %76 to <8 x float>* - %wide.masked.load30.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %77, i32 4, <8 x i1> %62, <8 x float> undef), !tbaa !12, !alias.scope !54 - %78 = fdiv <8 x float> %75, %wide.masked.load30.1, !fpmath !26 - %79 = bitcast float* %66 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %78, <8 x float>* %79, i32 4, <8 x i1> %62), !tbaa !12, !alias.scope !45, !noalias !48, !llvm.access.group !27 - %index.next.1 = add nuw nsw i64 %index, 16 - %vec.ind.next.1 = add <8 x i64> %vec.ind, - %80 = icmp eq i64 %index.next.1, 256 - br i1 %80, label %_pocl_kernel_adi_kernel6.exit.loopexit32, label %vector.body, !llvm.loop !55 - -pregion_for_entry.entry.i.i: ; preds = %if.end.r_exit.i.i.1, %pregion_for_entry.entry.i.i.preheader - %_local_id_x.i.0 = phi i64 [ 0, %pregion_for_entry.entry.i.i.preheader ], [ %92, %if.end.r_exit.i.i.1 ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %conv.i.i = trunc i64 %add1.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %20, %conv.i.i - br i1 %cmp.i.i, label %if.then.i.i, label %if.end.r_exit.i.i - -if.then.i.i: ; preds = %pregion_for_entry.entry.i.i - %add.i.i = add nsw i32 %mul.i.i, %conv.i.i - %idxprom.i.i = sext i32 %add.i.i to i64 - %arrayidx.i.i = getelementptr inbounds float, float* %12, i64 %idxprom.i.i - %81 = load float, float* %arrayidx.i.i, align 4, !tbaa !12 - %add6.i.i = add nsw i32 %mul5.i.i, %conv.i.i - %idxprom7.i.i = sext i32 %add6.i.i to i64 - %arrayidx8.i.i = getelementptr inbounds float, float* %12, i64 %idxprom7.i.i - %82 = load float, float* %arrayidx8.i.i, align 4, !tbaa !12 - %arrayidx14.i.i = getelementptr inbounds float, float* %6, i64 %idxprom7.i.i - %83 = load float, float* %arrayidx14.i.i, align 4, !tbaa !12 - %neg.i.i = fneg float %82 - %84 = tail call float @llvm.fmuladd.f32(float %neg.i.i, float %83, float %81) #5 - %arrayidx21.i.i = getelementptr inbounds float, float* %9, i64 %idxprom.i.i - %85 = load float, float* %arrayidx21.i.i, align 4, !tbaa !12 - %div.i.i = fdiv float %84, %85, !fpmath !26 - store float %div.i.i, float* %arrayidx.i.i, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i - -if.end.r_exit.i.i: ; preds = %if.then.i.i, %pregion_for_entry.entry.i.i - %86 = or i64 %_local_id_x.i.0, 1 - %add1.i.i.i.1 = add nuw nsw i64 %86, %mul.i.i.i - %conv.i.i.1 = trunc i64 %add1.i.i.i.1 to i32 - %cmp.i.i.1 = icmp sgt i32 %20, %conv.i.i.1 - br i1 %cmp.i.i.1, label %if.then.i.i.1, label %if.end.r_exit.i.i.1 - -_pocl_kernel_adi_kernel6.exit.loopexit: ; preds = %if.end.r_exit.i.i.1 - br label %_pocl_kernel_adi_kernel6.exit - -_pocl_kernel_adi_kernel6.exit.loopexit32: ; preds = %vector.body - br label %_pocl_kernel_adi_kernel6.exit - -_pocl_kernel_adi_kernel6.exit: ; preds = %_pocl_kernel_adi_kernel6.exit.loopexit32, %_pocl_kernel_adi_kernel6.exit.loopexit - ret void - -if.then.i.i.1: ; preds = %if.end.r_exit.i.i - %add.i.i.1 = add nsw i32 %mul.i.i, %conv.i.i.1 - %idxprom.i.i.1 = sext i32 %add.i.i.1 to i64 - %arrayidx.i.i.1 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.1 - %87 = load float, float* %arrayidx.i.i.1, align 4, !tbaa !12 - %add6.i.i.1 = add nsw i32 %mul5.i.i, %conv.i.i.1 - %idxprom7.i.i.1 = sext i32 %add6.i.i.1 to i64 - %arrayidx8.i.i.1 = getelementptr inbounds float, float* %12, i64 %idxprom7.i.i.1 - %88 = load float, float* %arrayidx8.i.i.1, align 4, !tbaa !12 - %arrayidx14.i.i.1 = getelementptr inbounds float, float* %6, i64 %idxprom7.i.i.1 - %89 = load float, float* %arrayidx14.i.i.1, align 4, !tbaa !12 - %neg.i.i.1 = fneg float %88 - %90 = tail call float @llvm.fmuladd.f32(float %neg.i.i.1, float %89, float %87) #5 - %arrayidx21.i.i.1 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.1 - %91 = load float, float* %arrayidx21.i.i.1, align 4, !tbaa !12 - %div.i.i.1 = fdiv float %90, %91, !fpmath !26 - store float %div.i.i.1, float* %arrayidx.i.i.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.1 - -if.end.r_exit.i.i.1: ; preds = %if.then.i.i.1, %if.end.r_exit.i.i - %92 = add nuw nsw i64 %_local_id_x.i.0, 2 - %exitcond.not.1 = icmp eq i64 %92, 256 - br i1 %exitcond.not.1, label %_pocl_kernel_adi_kernel6.exit.loopexit, label %pregion_for_entry.entry.i.i, !llvm.loop !56 -} - -; Function Attrs: argmemonly nounwind readonly willreturn -declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32 immarg, <8 x i1>, <8 x float>) #3 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <8 x float> @llvm.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>) #0 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>) #4 - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nofree nounwind } -attributes #3 = { argmemonly nounwind readonly willreturn } -attributes #4 = { argmemonly nounwind willreturn } -attributes #5 = { nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"int", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"int", !"int"} -!9 = !{!"", !"", !"", !"", !""} -!10 = !{!"A", !"B", !"X", !"i1", !"n"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{!17, !18} -!18 = distinct !{!18, !"LVerDomain"} -!19 = !{!20, !21, !22} -!20 = distinct !{!20, !18} -!21 = distinct !{!21, !18} -!22 = distinct !{!22, !18} -!23 = !{!20} -!24 = !{!21} -!25 = !{!22} -!26 = !{float 2.500000e+00} -!27 = !{!28} -!28 = distinct !{} -!29 = distinct !{!29, !30, !31} -!30 = !{!"llvm.loop.parallel_accesses", !28} -!31 = !{!"llvm.loop.isvectorized", i32 1} -!32 = distinct !{!32, !30, !31} -!33 = !{!34} -!34 = distinct !{!34, !35} -!35 = distinct !{!35, !"LVerDomain"} -!36 = !{!37, !38, !39} -!37 = distinct !{!37, !35} -!38 = distinct !{!38, !35} -!39 = distinct !{!39, !35} -!40 = !{!37} -!41 = !{!38} -!42 = !{!39} -!43 = distinct !{!43, !30, !31} -!44 = distinct !{!44, !30, !31} -!45 = !{!46} -!46 = distinct !{!46, !47} -!47 = distinct !{!47, !"LVerDomain"} -!48 = !{!49, !50, !51} -!49 = distinct !{!49, !47} -!50 = distinct !{!50, !47} -!51 = distinct !{!51, !47} -!52 = !{!49} -!53 = !{!50} -!54 = !{!51} -!55 = distinct !{!55, !30, !31} -!56 = distinct !{!56, !30, !31} diff --git a/pocl_irs/atax_kernel1.ll b/pocl_irs/atax_kernel1.ll deleted file mode 100644 index ddf0c7a..0000000 --- a/pocl_irs/atax_kernel1.ll +++ /dev/null @@ -1,338 +0,0 @@ -; ModuleID = './BA/MLCHIBMEHLNJHOLIEAILJNHPJELIMDLCMGIJP/atax_kernel1/32-1-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_atax_kernel1(float* nocapture readonly %0, float* nocapture readonly %1, float* nocapture %2, i32 %3, i32 %4, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %5, i64 %6, i64 %7, i64 %8) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %6, 5 - %cmp217.i = icmp sgt i32 %4, 0 - %wide.trip.count.i = zext i32 %4 to i64 - br i1 %cmp217.i, label %pregion_for_entry.entry.i.us.preheader, label %atax_kernel1.exit - -pregion_for_entry.entry.i.us.preheader: ; preds = %9 - br label %pregion_for_entry.entry.i.us - -pregion_for_entry.entry.i.us: ; preds = %if.end.r_exit.i.us.1, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ %23, %if.end.r_exit.i.us.1 ], [ 0, %pregion_for_entry.entry.i.us.preheader ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp.i.us = icmp slt i32 %conv.i.us, %3 - br i1 %cmp.i.us, label %for.body.lr.ph.i.us, label %if.end.r_exit.i.us - -for.body.lr.ph.i.us: ; preds = %pregion_for_entry.entry.i.us - %mul.i.us = mul nsw i32 %conv.i.us, %4 - %sext.i.us = shl i64 %add1.i.i.us, 32 - %idxprom7.i.us = ashr exact i64 %sext.i.us, 32 - %arrayidx8.i.us = getelementptr inbounds float, float* %2, i64 %idxprom7.i.us - %10 = sext i32 %mul.i.us to i64 - %.pre.i1.us4 = load float, float* %arrayidx8.i.us, align 4, !tbaa !12 - br label %for.body.i.us - -for.body.i.us: ; preds = %for.body.i.us, %for.body.lr.ph.i.us - %indvars.iv.next.i3.us = phi i64 [ %indvars.iv.next.i.us, %for.body.i.us ], [ 0, %for.body.lr.ph.i.us ] - %11 = phi float [ %15, %for.body.i.us ], [ %.pre.i1.us4, %for.body.lr.ph.i.us ] - %12 = add nsw i64 %indvars.iv.next.i3.us, %10 - %arrayidx.i.us = getelementptr inbounds float, float* %0, i64 %12 - %13 = load float, float* %arrayidx.i.us, align 4, !tbaa !12 - %arrayidx5.i.us = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.i3.us - %14 = load float, float* %arrayidx5.i.us, align 4, !tbaa !12 - %15 = tail call float @llvm.fmuladd.f32(float %13, float %14, float %11) #2 - store float %15, float* %arrayidx8.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us = add nuw nsw i64 %indvars.iv.next.i3.us, 1 - %exitcond.not.i.us = icmp eq i64 %indvars.iv.next.i.us, %wide.trip.count.i - br i1 %exitcond.not.i.us, label %if.end.r_exit.i.us.loopexit, label %for.body.i.us, !llvm.loop !18 - -if.end.r_exit.i.us.loopexit: ; preds = %for.body.i.us - br label %if.end.r_exit.i.us - -if.end.r_exit.i.us: ; preds = %if.end.r_exit.i.us.loopexit, %pregion_for_entry.entry.i.us - %16 = or i64 %_local_id_x.0.us, 1 - %add1.i.i.us.1 = add nuw nsw i64 %16, %mul.i.i - %conv.i.us.1 = trunc i64 %add1.i.i.us.1 to i32 - %cmp.i.us.1 = icmp slt i32 %conv.i.us.1, %3 - br i1 %cmp.i.us.1, label %for.body.lr.ph.i.us.1, label %if.end.r_exit.i.us.1 - -atax_kernel1.exit.loopexit: ; preds = %if.end.r_exit.i.us.1 - br label %atax_kernel1.exit - -atax_kernel1.exit: ; preds = %atax_kernel1.exit.loopexit, %9 - ret void - -for.body.lr.ph.i.us.1: ; preds = %if.end.r_exit.i.us - %mul.i.us.1 = mul nsw i32 %conv.i.us.1, %4 - %sext.i.us.1 = shl i64 %add1.i.i.us.1, 32 - %idxprom7.i.us.1 = ashr exact i64 %sext.i.us.1, 32 - %arrayidx8.i.us.1 = getelementptr inbounds float, float* %2, i64 %idxprom7.i.us.1 - %17 = sext i32 %mul.i.us.1 to i64 - %.pre.i1.us4.1 = load float, float* %arrayidx8.i.us.1, align 4, !tbaa !12 - br label %for.body.i.us.1 - -for.body.i.us.1: ; preds = %for.body.i.us.1, %for.body.lr.ph.i.us.1 - %indvars.iv.next.i3.us.1 = phi i64 [ %indvars.iv.next.i.us.1, %for.body.i.us.1 ], [ 0, %for.body.lr.ph.i.us.1 ] - %18 = phi float [ %22, %for.body.i.us.1 ], [ %.pre.i1.us4.1, %for.body.lr.ph.i.us.1 ] - %19 = add nsw i64 %indvars.iv.next.i3.us.1, %17 - %arrayidx.i.us.1 = getelementptr inbounds float, float* %0, i64 %19 - %20 = load float, float* %arrayidx.i.us.1, align 4, !tbaa !12 - %arrayidx5.i.us.1 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.i3.us.1 - %21 = load float, float* %arrayidx5.i.us.1, align 4, !tbaa !12 - %22 = tail call float @llvm.fmuladd.f32(float %20, float %21, float %18) #2 - store float %22, float* %arrayidx8.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.1 = add nuw nsw i64 %indvars.iv.next.i3.us.1, 1 - %exitcond.not.i.us.1 = icmp eq i64 %indvars.iv.next.i.us.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.1, label %if.end.r_exit.i.us.1.loopexit, label %for.body.i.us.1, !llvm.loop !18 - -if.end.r_exit.i.us.1.loopexit: ; preds = %for.body.i.us.1 - br label %if.end.r_exit.i.us.1 - -if.end.r_exit.i.us.1: ; preds = %if.end.r_exit.i.us.1.loopexit, %if.end.r_exit.i.us - %23 = add nuw nsw i64 %_local_id_x.0.us, 2 - %exitcond.not.1 = icmp eq i64 %23, 32 - br i1 %exitcond.not.1, label %atax_kernel1.exit.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !20 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_atax_kernel1_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to float*** - %15 = load float**, float*** %14, align 8 - %16 = load float*, float** %15, align 8 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %21 = getelementptr i8*, i8** %0, i64 4 - %22 = bitcast i8** %21 to i32** - %23 = load i32*, i32** %22, align 8 - %24 = load i32, i32* %23, align 4 - %mul.i.i.i = shl i64 %2, 5 - %cmp217.i.i = icmp sgt i32 %24, 0 - %wide.trip.count.i.i = zext i32 %24 to i64 - br i1 %cmp217.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %_pocl_kernel_atax_kernel1.exit - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us.1, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %38, %if.end.r_exit.i.i.us.1 ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %20, %conv.i.i.us - br i1 %cmp.i.i.us, label %for.body.lr.ph.i.i.us, label %if.end.r_exit.i.i.us - -for.body.lr.ph.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %mul.i.i.us = mul nsw i32 %24, %conv.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom7.i.i.us = ashr exact i64 %sext.i.i.us, 32 - %arrayidx8.i.i.us = getelementptr inbounds float, float* %16, i64 %idxprom7.i.i.us - %25 = sext i32 %mul.i.i.us to i64 - %.pre.i.i1.us4 = load float, float* %arrayidx8.i.i.us, align 4, !tbaa !12 - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %for.body.lr.ph.i.i.us - %indvars.iv.next.i.i3.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %for.body.lr.ph.i.i.us ] - %26 = phi float [ %30, %for.body.i.i.us ], [ %.pre.i.i1.us4, %for.body.lr.ph.i.i.us ] - %27 = add nsw i64 %indvars.iv.next.i.i3.us, %25 - %arrayidx.i.i.us = getelementptr inbounds float, float* %8, i64 %27 - %28 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %arrayidx5.i.i.us = getelementptr inbounds float, float* %12, i64 %indvars.iv.next.i.i3.us - %29 = load float, float* %arrayidx5.i.i.us, align 4, !tbaa !12 - %30 = tail call float @llvm.fmuladd.f32(float %28, float %29, float %26) #2 - store float %30, float* %arrayidx8.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i3.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us, label %if.end.r_exit.i.i.us.loopexit, label %for.body.i.i.us, !llvm.loop !18 - -if.end.r_exit.i.i.us.loopexit: ; preds = %for.body.i.i.us - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %if.end.r_exit.i.i.us.loopexit, %pregion_for_entry.entry.i.i.us - %31 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1 = add nuw nsw i64 %31, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp.i.i.us.1 = icmp sgt i32 %20, %conv.i.i.us.1 - br i1 %cmp.i.i.us.1, label %for.body.lr.ph.i.i.us.1, label %if.end.r_exit.i.i.us.1 - -_pocl_kernel_atax_kernel1.exit.loopexit: ; preds = %if.end.r_exit.i.i.us.1 - br label %_pocl_kernel_atax_kernel1.exit - -_pocl_kernel_atax_kernel1.exit: ; preds = %_pocl_kernel_atax_kernel1.exit.loopexit, %5 - ret void - -for.body.lr.ph.i.i.us.1: ; preds = %if.end.r_exit.i.i.us - %mul.i.i.us.1 = mul nsw i32 %24, %conv.i.i.us.1 - %sext.i.i.us.1 = shl i64 %add1.i.i.i.us.1, 32 - %idxprom7.i.i.us.1 = ashr exact i64 %sext.i.i.us.1, 32 - %arrayidx8.i.i.us.1 = getelementptr inbounds float, float* %16, i64 %idxprom7.i.i.us.1 - %32 = sext i32 %mul.i.i.us.1 to i64 - %.pre.i.i1.us4.1 = load float, float* %arrayidx8.i.i.us.1, align 4, !tbaa !12 - br label %for.body.i.i.us.1 - -for.body.i.i.us.1: ; preds = %for.body.i.i.us.1, %for.body.lr.ph.i.i.us.1 - %indvars.iv.next.i.i3.us.1 = phi i64 [ %indvars.iv.next.i.i.us.1, %for.body.i.i.us.1 ], [ 0, %for.body.lr.ph.i.i.us.1 ] - %33 = phi float [ %37, %for.body.i.i.us.1 ], [ %.pre.i.i1.us4.1, %for.body.lr.ph.i.i.us.1 ] - %34 = add nsw i64 %indvars.iv.next.i.i3.us.1, %32 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %8, i64 %34 - %35 = load float, float* %arrayidx.i.i.us.1, align 4, !tbaa !12 - %arrayidx5.i.i.us.1 = getelementptr inbounds float, float* %12, i64 %indvars.iv.next.i.i3.us.1 - %36 = load float, float* %arrayidx5.i.i.us.1, align 4, !tbaa !12 - %37 = tail call float @llvm.fmuladd.f32(float %35, float %36, float %33) #2 - store float %37, float* %arrayidx8.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.1, 1 - %exitcond.not.i.i.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.1, label %if.end.r_exit.i.i.us.1.loopexit, label %for.body.i.i.us.1, !llvm.loop !18 - -if.end.r_exit.i.i.us.1.loopexit: ; preds = %for.body.i.i.us.1 - br label %if.end.r_exit.i.i.us.1 - -if.end.r_exit.i.i.us.1: ; preds = %if.end.r_exit.i.i.us.1.loopexit, %if.end.r_exit.i.i.us - %38 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %38, 32 - br i1 %exitcond.not.1, label %_pocl_kernel_atax_kernel1.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !20 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_atax_kernel1_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to float** - %13 = load float*, float** %12, align 8 - %14 = getelementptr i8*, i8** %0, i64 3 - %15 = bitcast i8** %14 to i32** - %16 = load i32*, i32** %15, align 8 - %17 = load i32, i32* %16, align 4 - %18 = getelementptr i8*, i8** %0, i64 4 - %19 = bitcast i8** %18 to i32** - %20 = load i32*, i32** %19, align 8 - %21 = load i32, i32* %20, align 4 - %mul.i.i.i = shl i64 %2, 5 - %cmp217.i.i = icmp sgt i32 %21, 0 - %wide.trip.count.i.i = zext i32 %21 to i64 - br i1 %cmp217.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %_pocl_kernel_atax_kernel1.exit - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us.1, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %35, %if.end.r_exit.i.i.us.1 ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %17, %conv.i.i.us - br i1 %cmp.i.i.us, label %for.body.lr.ph.i.i.us, label %if.end.r_exit.i.i.us - -for.body.lr.ph.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %mul.i.i.us = mul nsw i32 %21, %conv.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom7.i.i.us = ashr exact i64 %sext.i.i.us, 32 - %arrayidx8.i.i.us = getelementptr inbounds float, float* %13, i64 %idxprom7.i.i.us - %22 = sext i32 %mul.i.i.us to i64 - %.pre.i.i1.us4 = load float, float* %arrayidx8.i.i.us, align 4, !tbaa !12 - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %for.body.lr.ph.i.i.us - %indvars.iv.next.i.i3.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %for.body.lr.ph.i.i.us ] - %23 = phi float [ %27, %for.body.i.i.us ], [ %.pre.i.i1.us4, %for.body.lr.ph.i.i.us ] - %24 = add nsw i64 %indvars.iv.next.i.i3.us, %22 - %arrayidx.i.i.us = getelementptr inbounds float, float* %7, i64 %24 - %25 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %arrayidx5.i.i.us = getelementptr inbounds float, float* %10, i64 %indvars.iv.next.i.i3.us - %26 = load float, float* %arrayidx5.i.i.us, align 4, !tbaa !12 - %27 = tail call float @llvm.fmuladd.f32(float %25, float %26, float %23) #2 - store float %27, float* %arrayidx8.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i3.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us, label %if.end.r_exit.i.i.us.loopexit, label %for.body.i.i.us, !llvm.loop !18 - -if.end.r_exit.i.i.us.loopexit: ; preds = %for.body.i.i.us - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %if.end.r_exit.i.i.us.loopexit, %pregion_for_entry.entry.i.i.us - %28 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1 = add nuw nsw i64 %28, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp.i.i.us.1 = icmp sgt i32 %17, %conv.i.i.us.1 - br i1 %cmp.i.i.us.1, label %for.body.lr.ph.i.i.us.1, label %if.end.r_exit.i.i.us.1 - -_pocl_kernel_atax_kernel1.exit.loopexit: ; preds = %if.end.r_exit.i.i.us.1 - br label %_pocl_kernel_atax_kernel1.exit - -_pocl_kernel_atax_kernel1.exit: ; preds = %_pocl_kernel_atax_kernel1.exit.loopexit, %5 - ret void - -for.body.lr.ph.i.i.us.1: ; preds = %if.end.r_exit.i.i.us - %mul.i.i.us.1 = mul nsw i32 %21, %conv.i.i.us.1 - %sext.i.i.us.1 = shl i64 %add1.i.i.i.us.1, 32 - %idxprom7.i.i.us.1 = ashr exact i64 %sext.i.i.us.1, 32 - %arrayidx8.i.i.us.1 = getelementptr inbounds float, float* %13, i64 %idxprom7.i.i.us.1 - %29 = sext i32 %mul.i.i.us.1 to i64 - %.pre.i.i1.us4.1 = load float, float* %arrayidx8.i.i.us.1, align 4, !tbaa !12 - br label %for.body.i.i.us.1 - -for.body.i.i.us.1: ; preds = %for.body.i.i.us.1, %for.body.lr.ph.i.i.us.1 - %indvars.iv.next.i.i3.us.1 = phi i64 [ %indvars.iv.next.i.i.us.1, %for.body.i.i.us.1 ], [ 0, %for.body.lr.ph.i.i.us.1 ] - %30 = phi float [ %34, %for.body.i.i.us.1 ], [ %.pre.i.i1.us4.1, %for.body.lr.ph.i.i.us.1 ] - %31 = add nsw i64 %indvars.iv.next.i.i3.us.1, %29 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %7, i64 %31 - %32 = load float, float* %arrayidx.i.i.us.1, align 4, !tbaa !12 - %arrayidx5.i.i.us.1 = getelementptr inbounds float, float* %10, i64 %indvars.iv.next.i.i3.us.1 - %33 = load float, float* %arrayidx5.i.i.us.1, align 4, !tbaa !12 - %34 = tail call float @llvm.fmuladd.f32(float %32, float %33, float %30) #2 - store float %34, float* %arrayidx8.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.1, 1 - %exitcond.not.i.i.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.1, label %if.end.r_exit.i.i.us.1.loopexit, label %for.body.i.i.us.1, !llvm.loop !18 - -if.end.r_exit.i.i.us.1.loopexit: ; preds = %for.body.i.i.us.1 - br label %if.end.r_exit.i.i.us.1 - -if.end.r_exit.i.i.us.1: ; preds = %if.end.r_exit.i.i.us.1.loopexit, %if.end.r_exit.i.i.us - %35 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %35, 32 - br i1 %exitcond.not.1, label %_pocl_kernel_atax_kernel1.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !20 -} - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"int", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"int", !"int"} -!9 = !{!"", !"", !"", !"", !""} -!10 = !{!"A", !"x", !"tmp", !"nx", !"ny"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{} -!18 = distinct !{!18, !19} -!19 = !{!"llvm.loop.unroll.disable"} -!20 = distinct !{!20, !21} -!21 = !{!"llvm.loop.parallel_accesses", !17} diff --git a/pocl_irs/atax_kernel2.ll b/pocl_irs/atax_kernel2.ll deleted file mode 100644 index 75081e6..0000000 --- a/pocl_irs/atax_kernel2.ll +++ /dev/null @@ -1,335 +0,0 @@ -; ModuleID = './BA/MLCHIBMEHLNJHOLIEAILJNHPJELIMDLCMGIJP/atax_kernel2/32-1-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_atax_kernel2(float* nocapture readonly %0, float* nocapture %1, float* nocapture readonly %2, i32 %3, i32 %4, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %5, i64 %6, i64 %7, i64 %8) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %6, 5 - %cmp217.i = icmp sgt i32 %3, 0 - %10 = sext i32 %4 to i64 - %wide.trip.count.i = zext i32 %3 to i64 - br i1 %cmp217.i, label %pregion_for_entry.entry.i.us.preheader, label %atax_kernel2.exit - -pregion_for_entry.entry.i.us.preheader: ; preds = %9 - br label %pregion_for_entry.entry.i.us - -pregion_for_entry.entry.i.us: ; preds = %if.end.r_exit.i.us.1, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ %24, %if.end.r_exit.i.us.1 ], [ 0, %pregion_for_entry.entry.i.us.preheader ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp.i.us = icmp slt i32 %conv.i.us, %4 - br i1 %cmp.i.us, label %for.body.lr.ph.i.us, label %if.end.r_exit.i.us - -for.body.lr.ph.i.us: ; preds = %pregion_for_entry.entry.i.us - %sext.i.us = shl i64 %add1.i.i.us, 32 - %idxprom7.i.us = ashr exact i64 %sext.i.us, 32 - %arrayidx8.i.us = getelementptr inbounds float, float* %1, i64 %idxprom7.i.us - %.pre.i2.us5 = load float, float* %arrayidx8.i.us, align 4, !tbaa !12 - br label %for.body.i.us - -for.body.i.us: ; preds = %for.body.i.us, %for.body.lr.ph.i.us - %indvars.iv.next.i4.us = phi i64 [ %indvars.iv.next.i.us, %for.body.i.us ], [ 0, %for.body.lr.ph.i.us ] - %11 = phi float [ %16, %for.body.i.us ], [ %.pre.i2.us5, %for.body.lr.ph.i.us ] - %12 = mul nsw i64 %indvars.iv.next.i4.us, %10 - %13 = add nsw i64 %12, %idxprom7.i.us - %arrayidx.i.us = getelementptr inbounds float, float* %0, i64 %13 - %14 = load float, float* %arrayidx.i.us, align 4, !tbaa !12 - %arrayidx5.i.us = getelementptr inbounds float, float* %2, i64 %indvars.iv.next.i4.us - %15 = load float, float* %arrayidx5.i.us, align 4, !tbaa !12 - %16 = tail call float @llvm.fmuladd.f32(float %14, float %15, float %11) #2 - store float %16, float* %arrayidx8.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us = add nuw nsw i64 %indvars.iv.next.i4.us, 1 - %exitcond.not.i.us = icmp eq i64 %indvars.iv.next.i.us, %wide.trip.count.i - br i1 %exitcond.not.i.us, label %if.end.r_exit.i.us.loopexit, label %for.body.i.us, !llvm.loop !18 - -if.end.r_exit.i.us.loopexit: ; preds = %for.body.i.us - br label %if.end.r_exit.i.us - -if.end.r_exit.i.us: ; preds = %if.end.r_exit.i.us.loopexit, %pregion_for_entry.entry.i.us - %17 = or i64 %_local_id_x.0.us, 1 - %add1.i.i.us.1 = add nuw nsw i64 %17, %mul.i.i - %conv.i.us.1 = trunc i64 %add1.i.i.us.1 to i32 - %cmp.i.us.1 = icmp slt i32 %conv.i.us.1, %4 - br i1 %cmp.i.us.1, label %for.body.lr.ph.i.us.1, label %if.end.r_exit.i.us.1 - -atax_kernel2.exit.loopexit: ; preds = %if.end.r_exit.i.us.1 - br label %atax_kernel2.exit - -atax_kernel2.exit: ; preds = %atax_kernel2.exit.loopexit, %9 - ret void - -for.body.lr.ph.i.us.1: ; preds = %if.end.r_exit.i.us - %sext.i.us.1 = shl i64 %add1.i.i.us.1, 32 - %idxprom7.i.us.1 = ashr exact i64 %sext.i.us.1, 32 - %arrayidx8.i.us.1 = getelementptr inbounds float, float* %1, i64 %idxprom7.i.us.1 - %.pre.i2.us5.1 = load float, float* %arrayidx8.i.us.1, align 4, !tbaa !12 - br label %for.body.i.us.1 - -for.body.i.us.1: ; preds = %for.body.i.us.1, %for.body.lr.ph.i.us.1 - %indvars.iv.next.i4.us.1 = phi i64 [ %indvars.iv.next.i.us.1, %for.body.i.us.1 ], [ 0, %for.body.lr.ph.i.us.1 ] - %18 = phi float [ %23, %for.body.i.us.1 ], [ %.pre.i2.us5.1, %for.body.lr.ph.i.us.1 ] - %19 = mul nsw i64 %indvars.iv.next.i4.us.1, %10 - %20 = add nsw i64 %19, %idxprom7.i.us.1 - %arrayidx.i.us.1 = getelementptr inbounds float, float* %0, i64 %20 - %21 = load float, float* %arrayidx.i.us.1, align 4, !tbaa !12 - %arrayidx5.i.us.1 = getelementptr inbounds float, float* %2, i64 %indvars.iv.next.i4.us.1 - %22 = load float, float* %arrayidx5.i.us.1, align 4, !tbaa !12 - %23 = tail call float @llvm.fmuladd.f32(float %21, float %22, float %18) #2 - store float %23, float* %arrayidx8.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.1 = add nuw nsw i64 %indvars.iv.next.i4.us.1, 1 - %exitcond.not.i.us.1 = icmp eq i64 %indvars.iv.next.i.us.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.1, label %if.end.r_exit.i.us.1.loopexit, label %for.body.i.us.1, !llvm.loop !18 - -if.end.r_exit.i.us.1.loopexit: ; preds = %for.body.i.us.1 - br label %if.end.r_exit.i.us.1 - -if.end.r_exit.i.us.1: ; preds = %if.end.r_exit.i.us.1.loopexit, %if.end.r_exit.i.us - %24 = add nuw nsw i64 %_local_id_x.0.us, 2 - %exitcond.not.1 = icmp eq i64 %24, 32 - br i1 %exitcond.not.1, label %atax_kernel2.exit.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !20 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_atax_kernel2_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to float*** - %15 = load float**, float*** %14, align 8 - %16 = load float*, float** %15, align 8 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %21 = getelementptr i8*, i8** %0, i64 4 - %22 = bitcast i8** %21 to i32** - %23 = load i32*, i32** %22, align 8 - %24 = load i32, i32* %23, align 4 - %mul.i.i.i = shl i64 %2, 5 - %cmp217.i.i = icmp sgt i32 %20, 0 - %25 = sext i32 %24 to i64 - %wide.trip.count.i.i = zext i32 %20 to i64 - br i1 %cmp217.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %_pocl_kernel_atax_kernel2.exit - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us.1, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %39, %if.end.r_exit.i.i.us.1 ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %24, %conv.i.i.us - br i1 %cmp.i.i.us, label %for.body.lr.ph.i.i.us, label %if.end.r_exit.i.i.us - -for.body.lr.ph.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom7.i.i.us = ashr exact i64 %sext.i.i.us, 32 - %arrayidx8.i.i.us = getelementptr inbounds float, float* %12, i64 %idxprom7.i.i.us - %.pre.i.i2.us5 = load float, float* %arrayidx8.i.i.us, align 4, !tbaa !12 - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %for.body.lr.ph.i.i.us - %indvars.iv.next.i.i4.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %for.body.lr.ph.i.i.us ] - %26 = phi float [ %31, %for.body.i.i.us ], [ %.pre.i.i2.us5, %for.body.lr.ph.i.i.us ] - %27 = mul nsw i64 %indvars.iv.next.i.i4.us, %25 - %28 = add nsw i64 %27, %idxprom7.i.i.us - %arrayidx.i.i.us = getelementptr inbounds float, float* %8, i64 %28 - %29 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %arrayidx5.i.i.us = getelementptr inbounds float, float* %16, i64 %indvars.iv.next.i.i4.us - %30 = load float, float* %arrayidx5.i.i.us, align 4, !tbaa !12 - %31 = tail call float @llvm.fmuladd.f32(float %29, float %30, float %26) #2 - store float %31, float* %arrayidx8.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i4.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us, label %if.end.r_exit.i.i.us.loopexit, label %for.body.i.i.us, !llvm.loop !18 - -if.end.r_exit.i.i.us.loopexit: ; preds = %for.body.i.i.us - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %if.end.r_exit.i.i.us.loopexit, %pregion_for_entry.entry.i.i.us - %32 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1 = add nuw nsw i64 %32, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp.i.i.us.1 = icmp sgt i32 %24, %conv.i.i.us.1 - br i1 %cmp.i.i.us.1, label %for.body.lr.ph.i.i.us.1, label %if.end.r_exit.i.i.us.1 - -_pocl_kernel_atax_kernel2.exit.loopexit: ; preds = %if.end.r_exit.i.i.us.1 - br label %_pocl_kernel_atax_kernel2.exit - -_pocl_kernel_atax_kernel2.exit: ; preds = %_pocl_kernel_atax_kernel2.exit.loopexit, %5 - ret void - -for.body.lr.ph.i.i.us.1: ; preds = %if.end.r_exit.i.i.us - %sext.i.i.us.1 = shl i64 %add1.i.i.i.us.1, 32 - %idxprom7.i.i.us.1 = ashr exact i64 %sext.i.i.us.1, 32 - %arrayidx8.i.i.us.1 = getelementptr inbounds float, float* %12, i64 %idxprom7.i.i.us.1 - %.pre.i.i2.us5.1 = load float, float* %arrayidx8.i.i.us.1, align 4, !tbaa !12 - br label %for.body.i.i.us.1 - -for.body.i.i.us.1: ; preds = %for.body.i.i.us.1, %for.body.lr.ph.i.i.us.1 - %indvars.iv.next.i.i4.us.1 = phi i64 [ %indvars.iv.next.i.i.us.1, %for.body.i.i.us.1 ], [ 0, %for.body.lr.ph.i.i.us.1 ] - %33 = phi float [ %38, %for.body.i.i.us.1 ], [ %.pre.i.i2.us5.1, %for.body.lr.ph.i.i.us.1 ] - %34 = mul nsw i64 %indvars.iv.next.i.i4.us.1, %25 - %35 = add nsw i64 %34, %idxprom7.i.i.us.1 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %8, i64 %35 - %36 = load float, float* %arrayidx.i.i.us.1, align 4, !tbaa !12 - %arrayidx5.i.i.us.1 = getelementptr inbounds float, float* %16, i64 %indvars.iv.next.i.i4.us.1 - %37 = load float, float* %arrayidx5.i.i.us.1, align 4, !tbaa !12 - %38 = tail call float @llvm.fmuladd.f32(float %36, float %37, float %33) #2 - store float %38, float* %arrayidx8.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.1 = add nuw nsw i64 %indvars.iv.next.i.i4.us.1, 1 - %exitcond.not.i.i.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.1, label %if.end.r_exit.i.i.us.1.loopexit, label %for.body.i.i.us.1, !llvm.loop !18 - -if.end.r_exit.i.i.us.1.loopexit: ; preds = %for.body.i.i.us.1 - br label %if.end.r_exit.i.i.us.1 - -if.end.r_exit.i.i.us.1: ; preds = %if.end.r_exit.i.i.us.1.loopexit, %if.end.r_exit.i.i.us - %39 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %39, 32 - br i1 %exitcond.not.1, label %_pocl_kernel_atax_kernel2.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !20 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_atax_kernel2_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to float** - %13 = load float*, float** %12, align 8 - %14 = getelementptr i8*, i8** %0, i64 3 - %15 = bitcast i8** %14 to i32** - %16 = load i32*, i32** %15, align 8 - %17 = load i32, i32* %16, align 4 - %18 = getelementptr i8*, i8** %0, i64 4 - %19 = bitcast i8** %18 to i32** - %20 = load i32*, i32** %19, align 8 - %21 = load i32, i32* %20, align 4 - %mul.i.i.i = shl i64 %2, 5 - %cmp217.i.i = icmp sgt i32 %17, 0 - %22 = sext i32 %21 to i64 - %wide.trip.count.i.i = zext i32 %17 to i64 - br i1 %cmp217.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %_pocl_kernel_atax_kernel2.exit - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us.1, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %36, %if.end.r_exit.i.i.us.1 ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %21, %conv.i.i.us - br i1 %cmp.i.i.us, label %for.body.lr.ph.i.i.us, label %if.end.r_exit.i.i.us - -for.body.lr.ph.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom7.i.i.us = ashr exact i64 %sext.i.i.us, 32 - %arrayidx8.i.i.us = getelementptr inbounds float, float* %10, i64 %idxprom7.i.i.us - %.pre.i.i2.us5 = load float, float* %arrayidx8.i.i.us, align 4, !tbaa !12 - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %for.body.lr.ph.i.i.us - %indvars.iv.next.i.i4.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %for.body.lr.ph.i.i.us ] - %23 = phi float [ %28, %for.body.i.i.us ], [ %.pre.i.i2.us5, %for.body.lr.ph.i.i.us ] - %24 = mul nsw i64 %indvars.iv.next.i.i4.us, %22 - %25 = add nsw i64 %24, %idxprom7.i.i.us - %arrayidx.i.i.us = getelementptr inbounds float, float* %7, i64 %25 - %26 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %arrayidx5.i.i.us = getelementptr inbounds float, float* %13, i64 %indvars.iv.next.i.i4.us - %27 = load float, float* %arrayidx5.i.i.us, align 4, !tbaa !12 - %28 = tail call float @llvm.fmuladd.f32(float %26, float %27, float %23) #2 - store float %28, float* %arrayidx8.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i4.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us, label %if.end.r_exit.i.i.us.loopexit, label %for.body.i.i.us, !llvm.loop !18 - -if.end.r_exit.i.i.us.loopexit: ; preds = %for.body.i.i.us - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %if.end.r_exit.i.i.us.loopexit, %pregion_for_entry.entry.i.i.us - %29 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1 = add nuw nsw i64 %29, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp.i.i.us.1 = icmp sgt i32 %21, %conv.i.i.us.1 - br i1 %cmp.i.i.us.1, label %for.body.lr.ph.i.i.us.1, label %if.end.r_exit.i.i.us.1 - -_pocl_kernel_atax_kernel2.exit.loopexit: ; preds = %if.end.r_exit.i.i.us.1 - br label %_pocl_kernel_atax_kernel2.exit - -_pocl_kernel_atax_kernel2.exit: ; preds = %_pocl_kernel_atax_kernel2.exit.loopexit, %5 - ret void - -for.body.lr.ph.i.i.us.1: ; preds = %if.end.r_exit.i.i.us - %sext.i.i.us.1 = shl i64 %add1.i.i.i.us.1, 32 - %idxprom7.i.i.us.1 = ashr exact i64 %sext.i.i.us.1, 32 - %arrayidx8.i.i.us.1 = getelementptr inbounds float, float* %10, i64 %idxprom7.i.i.us.1 - %.pre.i.i2.us5.1 = load float, float* %arrayidx8.i.i.us.1, align 4, !tbaa !12 - br label %for.body.i.i.us.1 - -for.body.i.i.us.1: ; preds = %for.body.i.i.us.1, %for.body.lr.ph.i.i.us.1 - %indvars.iv.next.i.i4.us.1 = phi i64 [ %indvars.iv.next.i.i.us.1, %for.body.i.i.us.1 ], [ 0, %for.body.lr.ph.i.i.us.1 ] - %30 = phi float [ %35, %for.body.i.i.us.1 ], [ %.pre.i.i2.us5.1, %for.body.lr.ph.i.i.us.1 ] - %31 = mul nsw i64 %indvars.iv.next.i.i4.us.1, %22 - %32 = add nsw i64 %31, %idxprom7.i.i.us.1 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %7, i64 %32 - %33 = load float, float* %arrayidx.i.i.us.1, align 4, !tbaa !12 - %arrayidx5.i.i.us.1 = getelementptr inbounds float, float* %13, i64 %indvars.iv.next.i.i4.us.1 - %34 = load float, float* %arrayidx5.i.i.us.1, align 4, !tbaa !12 - %35 = tail call float @llvm.fmuladd.f32(float %33, float %34, float %30) #2 - store float %35, float* %arrayidx8.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.1 = add nuw nsw i64 %indvars.iv.next.i.i4.us.1, 1 - %exitcond.not.i.i.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.1, label %if.end.r_exit.i.i.us.1.loopexit, label %for.body.i.i.us.1, !llvm.loop !18 - -if.end.r_exit.i.i.us.1.loopexit: ; preds = %for.body.i.i.us.1 - br label %if.end.r_exit.i.i.us.1 - -if.end.r_exit.i.i.us.1: ; preds = %if.end.r_exit.i.i.us.1.loopexit, %if.end.r_exit.i.i.us - %36 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %36, 32 - br i1 %exitcond.not.1, label %_pocl_kernel_atax_kernel2.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !20 -} - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"int", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"int", !"int"} -!9 = !{!"", !"", !"", !"", !""} -!10 = !{!"A", !"y", !"tmp", !"nx", !"ny"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{} -!18 = distinct !{!18, !19} -!19 = !{!"llvm.loop.unroll.disable"} -!20 = distinct !{!20, !21} -!21 = !{!"llvm.loop.parallel_accesses", !17} diff --git a/pocl_irs/bicg_kernel1.ll b/pocl_irs/bicg_kernel1.ll deleted file mode 100644 index c205ae4..0000000 --- a/pocl_irs/bicg_kernel1.ll +++ /dev/null @@ -1,1233 +0,0 @@ -; ModuleID = './DC/PEIMDLPPEFFJPJMNMNJKJHDPNNPAAECGDPFEA/bicgKernel1/256-1-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_bicgKernel1(float* nocapture readonly %0, float* nocapture readonly %1, float* nocapture %2, i32 %3, i32 %4, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %5, i64 %6, i64 %7, i64 %8) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %6, 8 - %cmp221.i = icmp sgt i32 %4, 0 - %wide.trip.count.i = zext i32 %4 to i64 - br i1 %cmp221.i, label %pregion_for_entry.entry.i.us.preheader, label %vector.ph - -pregion_for_entry.entry.i.us.preheader: ; preds = %9 - br label %pregion_for_entry.entry.i.us - -vector.ph: ; preds = %9 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert10 = insertelement <8 x i32> undef, i32 %3, i32 0 - %broadcast.splat11 = shufflevector <8 x i32> %broadcast.splatinsert10, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert12 = insertelement <8 x i32> undef, i32 %3, i32 0 - %broadcast.splat13 = shufflevector <8 x i32> %broadcast.splatinsert12, <8 x i32> undef, <8 x i32> zeroinitializer - %10 = or <8 x i64> %broadcast.splat, - %11 = trunc <8 x i64> %10 to <8 x i32> - %12 = trunc i64 %mul.i.i to i32 - %13 = or i32 %12, 8 - %14 = insertelement <8 x i32> undef, i32 %13, i64 0 - %15 = shufflevector <8 x i32> %14, <8 x i32> undef, <8 x i32> zeroinitializer - %16 = or <8 x i32> %15, - %17 = icmp sgt <8 x i32> %broadcast.splat11, %11 - %18 = icmp sgt <8 x i32> %broadcast.splat13, %16 - %19 = extractelement <8 x i64> %10, i32 0 - %20 = shl i64 %19, 32 - %21 = ashr exact i64 %20, 32 - %22 = getelementptr inbounds float, float* %2, i64 %21 - %23 = bitcast float* %22 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %23, i32 4, <8 x i1> %17), !tbaa !12, !llvm.access.group !16 - %24 = getelementptr inbounds float, float* %22, i64 8 - %25 = bitcast float* %24 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %25, i32 4, <8 x i1> %18), !tbaa !12, !llvm.access.group !16 - %26 = or <8 x i64> %broadcast.splat, - %27 = trunc <8 x i64> %26 to <8 x i32> - %28 = trunc i64 %mul.i.i to i32 - %29 = or i32 %28, 8 - %30 = insertelement <8 x i32> undef, i32 %29, i64 0 - %31 = shufflevector <8 x i32> %30, <8 x i32> undef, <8 x i32> zeroinitializer - %32 = or <8 x i32> %31, - %33 = icmp sgt <8 x i32> %broadcast.splat11, %27 - %34 = icmp sgt <8 x i32> %broadcast.splat13, %32 - %35 = extractelement <8 x i64> %26, i32 0 - %36 = shl i64 %35, 32 - %37 = ashr exact i64 %36, 32 - %38 = getelementptr inbounds float, float* %2, i64 %37 - %39 = bitcast float* %38 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %39, i32 4, <8 x i1> %33), !tbaa !12, !llvm.access.group !16 - %40 = getelementptr inbounds float, float* %38, i64 8 - %41 = bitcast float* %40 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %41, i32 4, <8 x i1> %34), !tbaa !12, !llvm.access.group !16 - %42 = or <8 x i64> %broadcast.splat, - %43 = trunc <8 x i64> %42 to <8 x i32> - %44 = trunc i64 %mul.i.i to i32 - %45 = or i32 %44, 8 - %46 = insertelement <8 x i32> undef, i32 %45, i64 0 - %47 = shufflevector <8 x i32> %46, <8 x i32> undef, <8 x i32> zeroinitializer - %48 = or <8 x i32> %47, - %49 = icmp sgt <8 x i32> %broadcast.splat11, %43 - %50 = icmp sgt <8 x i32> %broadcast.splat13, %48 - %51 = extractelement <8 x i64> %42, i32 0 - %52 = shl i64 %51, 32 - %53 = ashr exact i64 %52, 32 - %54 = getelementptr inbounds float, float* %2, i64 %53 - %55 = bitcast float* %54 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %55, i32 4, <8 x i1> %49), !tbaa !12, !llvm.access.group !16 - %56 = getelementptr inbounds float, float* %54, i64 8 - %57 = bitcast float* %56 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %57, i32 4, <8 x i1> %50), !tbaa !12, !llvm.access.group !16 - %58 = or <8 x i64> %broadcast.splat, - %59 = trunc <8 x i64> %58 to <8 x i32> - %60 = trunc i64 %mul.i.i to i32 - %61 = or i32 %60, 8 - %62 = insertelement <8 x i32> undef, i32 %61, i64 0 - %63 = shufflevector <8 x i32> %62, <8 x i32> undef, <8 x i32> zeroinitializer - %64 = or <8 x i32> %63, - %65 = icmp sgt <8 x i32> %broadcast.splat11, %59 - %66 = icmp sgt <8 x i32> %broadcast.splat13, %64 - %67 = extractelement <8 x i64> %58, i32 0 - %68 = shl i64 %67, 32 - %69 = ashr exact i64 %68, 32 - %70 = getelementptr inbounds float, float* %2, i64 %69 - %71 = bitcast float* %70 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %71, i32 4, <8 x i1> %65), !tbaa !12, !llvm.access.group !16 - %72 = getelementptr inbounds float, float* %70, i64 8 - %73 = bitcast float* %72 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %73, i32 4, <8 x i1> %66), !tbaa !12, !llvm.access.group !16 - %74 = or <8 x i64> %broadcast.splat, - %75 = trunc <8 x i64> %74 to <8 x i32> - %76 = trunc i64 %mul.i.i to i32 - %77 = or i32 %76, 8 - %78 = insertelement <8 x i32> undef, i32 %77, i64 0 - %79 = shufflevector <8 x i32> %78, <8 x i32> undef, <8 x i32> zeroinitializer - %80 = or <8 x i32> %79, - %81 = icmp sgt <8 x i32> %broadcast.splat11, %75 - %82 = icmp sgt <8 x i32> %broadcast.splat13, %80 - %83 = extractelement <8 x i64> %74, i32 0 - %84 = shl i64 %83, 32 - %85 = ashr exact i64 %84, 32 - %86 = getelementptr inbounds float, float* %2, i64 %85 - %87 = bitcast float* %86 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %87, i32 4, <8 x i1> %81), !tbaa !12, !llvm.access.group !16 - %88 = getelementptr inbounds float, float* %86, i64 8 - %89 = bitcast float* %88 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %89, i32 4, <8 x i1> %82), !tbaa !12, !llvm.access.group !16 - %90 = or <8 x i64> %broadcast.splat, - %91 = trunc <8 x i64> %90 to <8 x i32> - %92 = trunc i64 %mul.i.i to i32 - %93 = or i32 %92, 8 - %94 = insertelement <8 x i32> undef, i32 %93, i64 0 - %95 = shufflevector <8 x i32> %94, <8 x i32> undef, <8 x i32> zeroinitializer - %96 = or <8 x i32> %95, - %97 = icmp sgt <8 x i32> %broadcast.splat11, %91 - %98 = icmp sgt <8 x i32> %broadcast.splat13, %96 - %99 = extractelement <8 x i64> %90, i32 0 - %100 = shl i64 %99, 32 - %101 = ashr exact i64 %100, 32 - %102 = getelementptr inbounds float, float* %2, i64 %101 - %103 = bitcast float* %102 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %103, i32 4, <8 x i1> %97), !tbaa !12, !llvm.access.group !16 - %104 = getelementptr inbounds float, float* %102, i64 8 - %105 = bitcast float* %104 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %105, i32 4, <8 x i1> %98), !tbaa !12, !llvm.access.group !16 - %106 = or <8 x i64> %broadcast.splat, - %107 = trunc <8 x i64> %106 to <8 x i32> - %108 = trunc i64 %mul.i.i to i32 - %109 = or i32 %108, 8 - %110 = insertelement <8 x i32> undef, i32 %109, i64 0 - %111 = shufflevector <8 x i32> %110, <8 x i32> undef, <8 x i32> zeroinitializer - %112 = or <8 x i32> %111, - %113 = icmp sgt <8 x i32> %broadcast.splat11, %107 - %114 = icmp sgt <8 x i32> %broadcast.splat13, %112 - %115 = extractelement <8 x i64> %106, i32 0 - %116 = shl i64 %115, 32 - %117 = ashr exact i64 %116, 32 - %118 = getelementptr inbounds float, float* %2, i64 %117 - %119 = bitcast float* %118 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %119, i32 4, <8 x i1> %113), !tbaa !12, !llvm.access.group !16 - %120 = getelementptr inbounds float, float* %118, i64 8 - %121 = bitcast float* %120 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %121, i32 4, <8 x i1> %114), !tbaa !12, !llvm.access.group !16 - %122 = or <8 x i64> %broadcast.splat, - %123 = trunc <8 x i64> %122 to <8 x i32> - %124 = trunc i64 %mul.i.i to i32 - %125 = or i32 %124, 8 - %126 = insertelement <8 x i32> undef, i32 %125, i64 0 - %127 = shufflevector <8 x i32> %126, <8 x i32> undef, <8 x i32> zeroinitializer - %128 = or <8 x i32> %127, - %129 = icmp sgt <8 x i32> %broadcast.splat11, %123 - %130 = icmp sgt <8 x i32> %broadcast.splat13, %128 - %131 = extractelement <8 x i64> %122, i32 0 - %132 = shl i64 %131, 32 - %133 = ashr exact i64 %132, 32 - %134 = getelementptr inbounds float, float* %2, i64 %133 - %135 = bitcast float* %134 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %135, i32 4, <8 x i1> %129), !tbaa !12, !llvm.access.group !16 - %136 = getelementptr inbounds float, float* %134, i64 8 - %137 = bitcast float* %136 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %137, i32 4, <8 x i1> %130), !tbaa !12, !llvm.access.group !16 - %138 = or <8 x i64> %broadcast.splat, - %139 = trunc <8 x i64> %138 to <8 x i32> - %140 = trunc i64 %mul.i.i to i32 - %141 = or i32 %140, 8 - %142 = insertelement <8 x i32> undef, i32 %141, i64 0 - %143 = shufflevector <8 x i32> %142, <8 x i32> undef, <8 x i32> zeroinitializer - %144 = or <8 x i32> %143, - %145 = icmp sgt <8 x i32> %broadcast.splat11, %139 - %146 = icmp sgt <8 x i32> %broadcast.splat13, %144 - %147 = extractelement <8 x i64> %138, i32 0 - %148 = shl i64 %147, 32 - %149 = ashr exact i64 %148, 32 - %150 = getelementptr inbounds float, float* %2, i64 %149 - %151 = bitcast float* %150 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %151, i32 4, <8 x i1> %145), !tbaa !12, !llvm.access.group !16 - %152 = getelementptr inbounds float, float* %150, i64 8 - %153 = bitcast float* %152 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %153, i32 4, <8 x i1> %146), !tbaa !12, !llvm.access.group !16 - %154 = or <8 x i64> %broadcast.splat, - %155 = trunc <8 x i64> %154 to <8 x i32> - %156 = trunc i64 %mul.i.i to i32 - %157 = or i32 %156, 8 - %158 = insertelement <8 x i32> undef, i32 %157, i64 0 - %159 = shufflevector <8 x i32> %158, <8 x i32> undef, <8 x i32> zeroinitializer - %160 = or <8 x i32> %159, - %161 = icmp sgt <8 x i32> %broadcast.splat11, %155 - %162 = icmp sgt <8 x i32> %broadcast.splat13, %160 - %163 = extractelement <8 x i64> %154, i32 0 - %164 = shl i64 %163, 32 - %165 = ashr exact i64 %164, 32 - %166 = getelementptr inbounds float, float* %2, i64 %165 - %167 = bitcast float* %166 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %167, i32 4, <8 x i1> %161), !tbaa !12, !llvm.access.group !16 - %168 = getelementptr inbounds float, float* %166, i64 8 - %169 = bitcast float* %168 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %169, i32 4, <8 x i1> %162), !tbaa !12, !llvm.access.group !16 - %170 = or <8 x i64> %broadcast.splat, - %171 = trunc <8 x i64> %170 to <8 x i32> - %172 = trunc i64 %mul.i.i to i32 - %173 = or i32 %172, 8 - %174 = insertelement <8 x i32> undef, i32 %173, i64 0 - %175 = shufflevector <8 x i32> %174, <8 x i32> undef, <8 x i32> zeroinitializer - %176 = or <8 x i32> %175, - %177 = icmp sgt <8 x i32> %broadcast.splat11, %171 - %178 = icmp sgt <8 x i32> %broadcast.splat13, %176 - %179 = extractelement <8 x i64> %170, i32 0 - %180 = shl i64 %179, 32 - %181 = ashr exact i64 %180, 32 - %182 = getelementptr inbounds float, float* %2, i64 %181 - %183 = bitcast float* %182 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %183, i32 4, <8 x i1> %177), !tbaa !12, !llvm.access.group !16 - %184 = getelementptr inbounds float, float* %182, i64 8 - %185 = bitcast float* %184 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %185, i32 4, <8 x i1> %178), !tbaa !12, !llvm.access.group !16 - %186 = or <8 x i64> %broadcast.splat, - %187 = trunc <8 x i64> %186 to <8 x i32> - %188 = trunc i64 %mul.i.i to i32 - %189 = or i32 %188, 8 - %190 = insertelement <8 x i32> undef, i32 %189, i64 0 - %191 = shufflevector <8 x i32> %190, <8 x i32> undef, <8 x i32> zeroinitializer - %192 = or <8 x i32> %191, - %193 = icmp sgt <8 x i32> %broadcast.splat11, %187 - %194 = icmp sgt <8 x i32> %broadcast.splat13, %192 - %195 = extractelement <8 x i64> %186, i32 0 - %196 = shl i64 %195, 32 - %197 = ashr exact i64 %196, 32 - %198 = getelementptr inbounds float, float* %2, i64 %197 - %199 = bitcast float* %198 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %199, i32 4, <8 x i1> %193), !tbaa !12, !llvm.access.group !16 - %200 = getelementptr inbounds float, float* %198, i64 8 - %201 = bitcast float* %200 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %201, i32 4, <8 x i1> %194), !tbaa !12, !llvm.access.group !16 - %202 = or <8 x i64> %broadcast.splat, - %203 = trunc <8 x i64> %202 to <8 x i32> - %204 = trunc i64 %mul.i.i to i32 - %205 = or i32 %204, 8 - %206 = insertelement <8 x i32> undef, i32 %205, i64 0 - %207 = shufflevector <8 x i32> %206, <8 x i32> undef, <8 x i32> zeroinitializer - %208 = or <8 x i32> %207, - %209 = icmp sgt <8 x i32> %broadcast.splat11, %203 - %210 = icmp sgt <8 x i32> %broadcast.splat13, %208 - %211 = extractelement <8 x i64> %202, i32 0 - %212 = shl i64 %211, 32 - %213 = ashr exact i64 %212, 32 - %214 = getelementptr inbounds float, float* %2, i64 %213 - %215 = bitcast float* %214 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %215, i32 4, <8 x i1> %209), !tbaa !12, !llvm.access.group !16 - %216 = getelementptr inbounds float, float* %214, i64 8 - %217 = bitcast float* %216 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %217, i32 4, <8 x i1> %210), !tbaa !12, !llvm.access.group !16 - %218 = or <8 x i64> %broadcast.splat, - %219 = trunc <8 x i64> %218 to <8 x i32> - %220 = trunc i64 %mul.i.i to i32 - %221 = or i32 %220, 8 - %222 = insertelement <8 x i32> undef, i32 %221, i64 0 - %223 = shufflevector <8 x i32> %222, <8 x i32> undef, <8 x i32> zeroinitializer - %224 = or <8 x i32> %223, - %225 = icmp sgt <8 x i32> %broadcast.splat11, %219 - %226 = icmp sgt <8 x i32> %broadcast.splat13, %224 - %227 = extractelement <8 x i64> %218, i32 0 - %228 = shl i64 %227, 32 - %229 = ashr exact i64 %228, 32 - %230 = getelementptr inbounds float, float* %2, i64 %229 - %231 = bitcast float* %230 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %231, i32 4, <8 x i1> %225), !tbaa !12, !llvm.access.group !16 - %232 = getelementptr inbounds float, float* %230, i64 8 - %233 = bitcast float* %232 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %233, i32 4, <8 x i1> %226), !tbaa !12, !llvm.access.group !16 - %234 = or <8 x i64> %broadcast.splat, - %235 = trunc <8 x i64> %234 to <8 x i32> - %236 = trunc i64 %mul.i.i to i32 - %237 = or i32 %236, 8 - %238 = insertelement <8 x i32> undef, i32 %237, i64 0 - %239 = shufflevector <8 x i32> %238, <8 x i32> undef, <8 x i32> zeroinitializer - %240 = or <8 x i32> %239, - %241 = icmp sgt <8 x i32> %broadcast.splat11, %235 - %242 = icmp sgt <8 x i32> %broadcast.splat13, %240 - %243 = extractelement <8 x i64> %234, i32 0 - %244 = shl i64 %243, 32 - %245 = ashr exact i64 %244, 32 - %246 = getelementptr inbounds float, float* %2, i64 %245 - %247 = bitcast float* %246 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %247, i32 4, <8 x i1> %241), !tbaa !12, !llvm.access.group !16 - %248 = getelementptr inbounds float, float* %246, i64 8 - %249 = bitcast float* %248 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %249, i32 4, <8 x i1> %242), !tbaa !12, !llvm.access.group !16 - %250 = or <8 x i64> %broadcast.splat, - %251 = trunc <8 x i64> %250 to <8 x i32> - %252 = trunc i64 %mul.i.i to i32 - %253 = or i32 %252, 8 - %254 = insertelement <8 x i32> undef, i32 %253, i64 0 - %255 = shufflevector <8 x i32> %254, <8 x i32> undef, <8 x i32> zeroinitializer - %256 = or <8 x i32> %255, - %257 = icmp sgt <8 x i32> %broadcast.splat11, %251 - %258 = icmp sgt <8 x i32> %broadcast.splat13, %256 - %259 = extractelement <8 x i64> %250, i32 0 - %260 = shl i64 %259, 32 - %261 = ashr exact i64 %260, 32 - %262 = getelementptr inbounds float, float* %2, i64 %261 - %263 = bitcast float* %262 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %263, i32 4, <8 x i1> %257), !tbaa !12, !llvm.access.group !16 - %264 = getelementptr inbounds float, float* %262, i64 8 - %265 = bitcast float* %264 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %265, i32 4, <8 x i1> %258), !tbaa !12, !llvm.access.group !16 - br label %bicgKernel1.exit - -pregion_for_entry.entry.i.us: ; preds = %if.end.i.us.1, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ %279, %if.end.i.us.1 ], [ 0, %pregion_for_entry.entry.i.us.preheader ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp.i.us = icmp slt i32 %conv.i.us, %3 - br i1 %cmp.i.us, label %if.then.i.us, label %if.end.i.us - -if.then.i.us: ; preds = %pregion_for_entry.entry.i.us - %sext.i.us = shl i64 %add1.i.i.us, 32 - %idxprom.i.us = ashr exact i64 %sext.i.us, 32 - %arrayidx.i.us = getelementptr inbounds float, float* %2, i64 %idxprom.i.us - store float 0.000000e+00, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %mul.i.us = mul nsw i32 %conv.i.us, %4 - %266 = sext i32 %mul.i.us to i64 - br label %for.body.i.us - -for.body.i.us: ; preds = %for.body.i.us, %if.then.i.us - %indvars.iv.next.i2.us = phi i64 [ %indvars.iv.next.i.us, %for.body.i.us ], [ 0, %if.then.i.us ] - %267 = phi float [ %271, %for.body.i.us ], [ 0.000000e+00, %if.then.i.us ] - %268 = add nsw i64 %indvars.iv.next.i2.us, %266 - %arrayidx5.i.us = getelementptr inbounds float, float* %0, i64 %268 - %269 = load float, float* %arrayidx5.i.us, align 4, !tbaa !12 - %arrayidx7.i.us = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.i2.us - %270 = load float, float* %arrayidx7.i.us, align 4, !tbaa !12 - %271 = tail call float @llvm.fmuladd.f32(float %269, float %270, float %267) #2 - store float %271, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us = add nuw nsw i64 %indvars.iv.next.i2.us, 1 - %exitcond.not.i.us = icmp eq i64 %indvars.iv.next.i.us, %wide.trip.count.i - br i1 %exitcond.not.i.us, label %if.end.i.us.loopexit, label %for.body.i.us, !llvm.loop !18 - -if.end.i.us.loopexit: ; preds = %for.body.i.us - br label %if.end.i.us - -if.end.i.us: ; preds = %if.end.i.us.loopexit, %pregion_for_entry.entry.i.us - %272 = or i64 %_local_id_x.0.us, 1 - %add1.i.i.us.1 = add nuw nsw i64 %272, %mul.i.i - %conv.i.us.1 = trunc i64 %add1.i.i.us.1 to i32 - %cmp.i.us.1 = icmp slt i32 %conv.i.us.1, %3 - br i1 %cmp.i.us.1, label %if.then.i.us.1, label %if.end.i.us.1 - -bicgKernel1.exit.loopexit: ; preds = %if.end.i.us.1 - br label %bicgKernel1.exit - -bicgKernel1.exit: ; preds = %bicgKernel1.exit.loopexit, %vector.ph - ret void - -if.then.i.us.1: ; preds = %if.end.i.us - %sext.i.us.1 = shl i64 %add1.i.i.us.1, 32 - %idxprom.i.us.1 = ashr exact i64 %sext.i.us.1, 32 - %arrayidx.i.us.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.1 - store float 0.000000e+00, float* %arrayidx.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %mul.i.us.1 = mul nsw i32 %conv.i.us.1, %4 - %273 = sext i32 %mul.i.us.1 to i64 - br label %for.body.i.us.1 - -for.body.i.us.1: ; preds = %for.body.i.us.1, %if.then.i.us.1 - %indvars.iv.next.i2.us.1 = phi i64 [ %indvars.iv.next.i.us.1, %for.body.i.us.1 ], [ 0, %if.then.i.us.1 ] - %274 = phi float [ %278, %for.body.i.us.1 ], [ 0.000000e+00, %if.then.i.us.1 ] - %275 = add nsw i64 %indvars.iv.next.i2.us.1, %273 - %arrayidx5.i.us.1 = getelementptr inbounds float, float* %0, i64 %275 - %276 = load float, float* %arrayidx5.i.us.1, align 4, !tbaa !12 - %arrayidx7.i.us.1 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.i2.us.1 - %277 = load float, float* %arrayidx7.i.us.1, align 4, !tbaa !12 - %278 = tail call float @llvm.fmuladd.f32(float %276, float %277, float %274) #2 - store float %278, float* %arrayidx.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.1 = add nuw nsw i64 %indvars.iv.next.i2.us.1, 1 - %exitcond.not.i.us.1 = icmp eq i64 %indvars.iv.next.i.us.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.1, label %if.end.i.us.1.loopexit, label %for.body.i.us.1, !llvm.loop !18 - -if.end.i.us.1.loopexit: ; preds = %for.body.i.us.1 - br label %if.end.i.us.1 - -if.end.i.us.1: ; preds = %if.end.i.us.1.loopexit, %if.end.i.us - %279 = add nuw nsw i64 %_local_id_x.0.us, 2 - %exitcond.not.1 = icmp eq i64 %279, 256 - br i1 %exitcond.not.1, label %bicgKernel1.exit.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !20 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_bicgKernel1_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to float*** - %15 = load float**, float*** %14, align 8 - %16 = load float*, float** %15, align 8 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %21 = getelementptr i8*, i8** %0, i64 4 - %22 = bitcast i8** %21 to i32** - %23 = load i32*, i32** %22, align 8 - %24 = load i32, i32* %23, align 4 - %mul.i.i.i = shl i64 %2, 8 - %cmp221.i.i = icmp sgt i32 %24, 0 - %wide.trip.count.i.i = zext i32 %24 to i64 - br i1 %cmp221.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.ph - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -vector.ph: ; preds = %5 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert10 = insertelement <8 x i32> undef, i32 %20, i32 0 - %broadcast.splat11 = shufflevector <8 x i32> %broadcast.splatinsert10, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert12 = insertelement <8 x i32> undef, i32 %20, i32 0 - %broadcast.splat13 = shufflevector <8 x i32> %broadcast.splatinsert12, <8 x i32> undef, <8 x i32> zeroinitializer - %25 = or <8 x i64> %broadcast.splat, - %26 = trunc <8 x i64> %25 to <8 x i32> - %27 = trunc i64 %mul.i.i.i to i32 - %28 = or i32 %27, 8 - %29 = insertelement <8 x i32> undef, i32 %28, i64 0 - %30 = shufflevector <8 x i32> %29, <8 x i32> undef, <8 x i32> zeroinitializer - %31 = or <8 x i32> %30, - %32 = icmp sgt <8 x i32> %broadcast.splat11, %26 - %33 = icmp sgt <8 x i32> %broadcast.splat13, %31 - %34 = extractelement <8 x i64> %25, i32 0 - %35 = shl i64 %34, 32 - %36 = ashr exact i64 %35, 32 - %37 = getelementptr inbounds float, float* %16, i64 %36 - %38 = bitcast float* %37 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %38, i32 4, <8 x i1> %32), !tbaa !12, !llvm.access.group !16 - %39 = getelementptr inbounds float, float* %37, i64 8 - %40 = bitcast float* %39 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %40, i32 4, <8 x i1> %33), !tbaa !12, !llvm.access.group !16 - %41 = or <8 x i64> %broadcast.splat, - %42 = trunc <8 x i64> %41 to <8 x i32> - %43 = trunc i64 %mul.i.i.i to i32 - %44 = or i32 %43, 8 - %45 = insertelement <8 x i32> undef, i32 %44, i64 0 - %46 = shufflevector <8 x i32> %45, <8 x i32> undef, <8 x i32> zeroinitializer - %47 = or <8 x i32> %46, - %48 = icmp sgt <8 x i32> %broadcast.splat11, %42 - %49 = icmp sgt <8 x i32> %broadcast.splat13, %47 - %50 = extractelement <8 x i64> %41, i32 0 - %51 = shl i64 %50, 32 - %52 = ashr exact i64 %51, 32 - %53 = getelementptr inbounds float, float* %16, i64 %52 - %54 = bitcast float* %53 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %54, i32 4, <8 x i1> %48), !tbaa !12, !llvm.access.group !16 - %55 = getelementptr inbounds float, float* %53, i64 8 - %56 = bitcast float* %55 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %56, i32 4, <8 x i1> %49), !tbaa !12, !llvm.access.group !16 - %57 = or <8 x i64> %broadcast.splat, - %58 = trunc <8 x i64> %57 to <8 x i32> - %59 = trunc i64 %mul.i.i.i to i32 - %60 = or i32 %59, 8 - %61 = insertelement <8 x i32> undef, i32 %60, i64 0 - %62 = shufflevector <8 x i32> %61, <8 x i32> undef, <8 x i32> zeroinitializer - %63 = or <8 x i32> %62, - %64 = icmp sgt <8 x i32> %broadcast.splat11, %58 - %65 = icmp sgt <8 x i32> %broadcast.splat13, %63 - %66 = extractelement <8 x i64> %57, i32 0 - %67 = shl i64 %66, 32 - %68 = ashr exact i64 %67, 32 - %69 = getelementptr inbounds float, float* %16, i64 %68 - %70 = bitcast float* %69 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %70, i32 4, <8 x i1> %64), !tbaa !12, !llvm.access.group !16 - %71 = getelementptr inbounds float, float* %69, i64 8 - %72 = bitcast float* %71 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %72, i32 4, <8 x i1> %65), !tbaa !12, !llvm.access.group !16 - %73 = or <8 x i64> %broadcast.splat, - %74 = trunc <8 x i64> %73 to <8 x i32> - %75 = trunc i64 %mul.i.i.i to i32 - %76 = or i32 %75, 8 - %77 = insertelement <8 x i32> undef, i32 %76, i64 0 - %78 = shufflevector <8 x i32> %77, <8 x i32> undef, <8 x i32> zeroinitializer - %79 = or <8 x i32> %78, - %80 = icmp sgt <8 x i32> %broadcast.splat11, %74 - %81 = icmp sgt <8 x i32> %broadcast.splat13, %79 - %82 = extractelement <8 x i64> %73, i32 0 - %83 = shl i64 %82, 32 - %84 = ashr exact i64 %83, 32 - %85 = getelementptr inbounds float, float* %16, i64 %84 - %86 = bitcast float* %85 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %86, i32 4, <8 x i1> %80), !tbaa !12, !llvm.access.group !16 - %87 = getelementptr inbounds float, float* %85, i64 8 - %88 = bitcast float* %87 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %88, i32 4, <8 x i1> %81), !tbaa !12, !llvm.access.group !16 - %89 = or <8 x i64> %broadcast.splat, - %90 = trunc <8 x i64> %89 to <8 x i32> - %91 = trunc i64 %mul.i.i.i to i32 - %92 = or i32 %91, 8 - %93 = insertelement <8 x i32> undef, i32 %92, i64 0 - %94 = shufflevector <8 x i32> %93, <8 x i32> undef, <8 x i32> zeroinitializer - %95 = or <8 x i32> %94, - %96 = icmp sgt <8 x i32> %broadcast.splat11, %90 - %97 = icmp sgt <8 x i32> %broadcast.splat13, %95 - %98 = extractelement <8 x i64> %89, i32 0 - %99 = shl i64 %98, 32 - %100 = ashr exact i64 %99, 32 - %101 = getelementptr inbounds float, float* %16, i64 %100 - %102 = bitcast float* %101 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %102, i32 4, <8 x i1> %96), !tbaa !12, !llvm.access.group !16 - %103 = getelementptr inbounds float, float* %101, i64 8 - %104 = bitcast float* %103 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %104, i32 4, <8 x i1> %97), !tbaa !12, !llvm.access.group !16 - %105 = or <8 x i64> %broadcast.splat, - %106 = trunc <8 x i64> %105 to <8 x i32> - %107 = trunc i64 %mul.i.i.i to i32 - %108 = or i32 %107, 8 - %109 = insertelement <8 x i32> undef, i32 %108, i64 0 - %110 = shufflevector <8 x i32> %109, <8 x i32> undef, <8 x i32> zeroinitializer - %111 = or <8 x i32> %110, - %112 = icmp sgt <8 x i32> %broadcast.splat11, %106 - %113 = icmp sgt <8 x i32> %broadcast.splat13, %111 - %114 = extractelement <8 x i64> %105, i32 0 - %115 = shl i64 %114, 32 - %116 = ashr exact i64 %115, 32 - %117 = getelementptr inbounds float, float* %16, i64 %116 - %118 = bitcast float* %117 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %118, i32 4, <8 x i1> %112), !tbaa !12, !llvm.access.group !16 - %119 = getelementptr inbounds float, float* %117, i64 8 - %120 = bitcast float* %119 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %120, i32 4, <8 x i1> %113), !tbaa !12, !llvm.access.group !16 - %121 = or <8 x i64> %broadcast.splat, - %122 = trunc <8 x i64> %121 to <8 x i32> - %123 = trunc i64 %mul.i.i.i to i32 - %124 = or i32 %123, 8 - %125 = insertelement <8 x i32> undef, i32 %124, i64 0 - %126 = shufflevector <8 x i32> %125, <8 x i32> undef, <8 x i32> zeroinitializer - %127 = or <8 x i32> %126, - %128 = icmp sgt <8 x i32> %broadcast.splat11, %122 - %129 = icmp sgt <8 x i32> %broadcast.splat13, %127 - %130 = extractelement <8 x i64> %121, i32 0 - %131 = shl i64 %130, 32 - %132 = ashr exact i64 %131, 32 - %133 = getelementptr inbounds float, float* %16, i64 %132 - %134 = bitcast float* %133 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %134, i32 4, <8 x i1> %128), !tbaa !12, !llvm.access.group !16 - %135 = getelementptr inbounds float, float* %133, i64 8 - %136 = bitcast float* %135 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %136, i32 4, <8 x i1> %129), !tbaa !12, !llvm.access.group !16 - %137 = or <8 x i64> %broadcast.splat, - %138 = trunc <8 x i64> %137 to <8 x i32> - %139 = trunc i64 %mul.i.i.i to i32 - %140 = or i32 %139, 8 - %141 = insertelement <8 x i32> undef, i32 %140, i64 0 - %142 = shufflevector <8 x i32> %141, <8 x i32> undef, <8 x i32> zeroinitializer - %143 = or <8 x i32> %142, - %144 = icmp sgt <8 x i32> %broadcast.splat11, %138 - %145 = icmp sgt <8 x i32> %broadcast.splat13, %143 - %146 = extractelement <8 x i64> %137, i32 0 - %147 = shl i64 %146, 32 - %148 = ashr exact i64 %147, 32 - %149 = getelementptr inbounds float, float* %16, i64 %148 - %150 = bitcast float* %149 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %150, i32 4, <8 x i1> %144), !tbaa !12, !llvm.access.group !16 - %151 = getelementptr inbounds float, float* %149, i64 8 - %152 = bitcast float* %151 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %152, i32 4, <8 x i1> %145), !tbaa !12, !llvm.access.group !16 - %153 = or <8 x i64> %broadcast.splat, - %154 = trunc <8 x i64> %153 to <8 x i32> - %155 = trunc i64 %mul.i.i.i to i32 - %156 = or i32 %155, 8 - %157 = insertelement <8 x i32> undef, i32 %156, i64 0 - %158 = shufflevector <8 x i32> %157, <8 x i32> undef, <8 x i32> zeroinitializer - %159 = or <8 x i32> %158, - %160 = icmp sgt <8 x i32> %broadcast.splat11, %154 - %161 = icmp sgt <8 x i32> %broadcast.splat13, %159 - %162 = extractelement <8 x i64> %153, i32 0 - %163 = shl i64 %162, 32 - %164 = ashr exact i64 %163, 32 - %165 = getelementptr inbounds float, float* %16, i64 %164 - %166 = bitcast float* %165 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %166, i32 4, <8 x i1> %160), !tbaa !12, !llvm.access.group !16 - %167 = getelementptr inbounds float, float* %165, i64 8 - %168 = bitcast float* %167 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %168, i32 4, <8 x i1> %161), !tbaa !12, !llvm.access.group !16 - %169 = or <8 x i64> %broadcast.splat, - %170 = trunc <8 x i64> %169 to <8 x i32> - %171 = trunc i64 %mul.i.i.i to i32 - %172 = or i32 %171, 8 - %173 = insertelement <8 x i32> undef, i32 %172, i64 0 - %174 = shufflevector <8 x i32> %173, <8 x i32> undef, <8 x i32> zeroinitializer - %175 = or <8 x i32> %174, - %176 = icmp sgt <8 x i32> %broadcast.splat11, %170 - %177 = icmp sgt <8 x i32> %broadcast.splat13, %175 - %178 = extractelement <8 x i64> %169, i32 0 - %179 = shl i64 %178, 32 - %180 = ashr exact i64 %179, 32 - %181 = getelementptr inbounds float, float* %16, i64 %180 - %182 = bitcast float* %181 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %182, i32 4, <8 x i1> %176), !tbaa !12, !llvm.access.group !16 - %183 = getelementptr inbounds float, float* %181, i64 8 - %184 = bitcast float* %183 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %184, i32 4, <8 x i1> %177), !tbaa !12, !llvm.access.group !16 - %185 = or <8 x i64> %broadcast.splat, - %186 = trunc <8 x i64> %185 to <8 x i32> - %187 = trunc i64 %mul.i.i.i to i32 - %188 = or i32 %187, 8 - %189 = insertelement <8 x i32> undef, i32 %188, i64 0 - %190 = shufflevector <8 x i32> %189, <8 x i32> undef, <8 x i32> zeroinitializer - %191 = or <8 x i32> %190, - %192 = icmp sgt <8 x i32> %broadcast.splat11, %186 - %193 = icmp sgt <8 x i32> %broadcast.splat13, %191 - %194 = extractelement <8 x i64> %185, i32 0 - %195 = shl i64 %194, 32 - %196 = ashr exact i64 %195, 32 - %197 = getelementptr inbounds float, float* %16, i64 %196 - %198 = bitcast float* %197 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %198, i32 4, <8 x i1> %192), !tbaa !12, !llvm.access.group !16 - %199 = getelementptr inbounds float, float* %197, i64 8 - %200 = bitcast float* %199 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %200, i32 4, <8 x i1> %193), !tbaa !12, !llvm.access.group !16 - %201 = or <8 x i64> %broadcast.splat, - %202 = trunc <8 x i64> %201 to <8 x i32> - %203 = trunc i64 %mul.i.i.i to i32 - %204 = or i32 %203, 8 - %205 = insertelement <8 x i32> undef, i32 %204, i64 0 - %206 = shufflevector <8 x i32> %205, <8 x i32> undef, <8 x i32> zeroinitializer - %207 = or <8 x i32> %206, - %208 = icmp sgt <8 x i32> %broadcast.splat11, %202 - %209 = icmp sgt <8 x i32> %broadcast.splat13, %207 - %210 = extractelement <8 x i64> %201, i32 0 - %211 = shl i64 %210, 32 - %212 = ashr exact i64 %211, 32 - %213 = getelementptr inbounds float, float* %16, i64 %212 - %214 = bitcast float* %213 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %214, i32 4, <8 x i1> %208), !tbaa !12, !llvm.access.group !16 - %215 = getelementptr inbounds float, float* %213, i64 8 - %216 = bitcast float* %215 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %216, i32 4, <8 x i1> %209), !tbaa !12, !llvm.access.group !16 - %217 = or <8 x i64> %broadcast.splat, - %218 = trunc <8 x i64> %217 to <8 x i32> - %219 = trunc i64 %mul.i.i.i to i32 - %220 = or i32 %219, 8 - %221 = insertelement <8 x i32> undef, i32 %220, i64 0 - %222 = shufflevector <8 x i32> %221, <8 x i32> undef, <8 x i32> zeroinitializer - %223 = or <8 x i32> %222, - %224 = icmp sgt <8 x i32> %broadcast.splat11, %218 - %225 = icmp sgt <8 x i32> %broadcast.splat13, %223 - %226 = extractelement <8 x i64> %217, i32 0 - %227 = shl i64 %226, 32 - %228 = ashr exact i64 %227, 32 - %229 = getelementptr inbounds float, float* %16, i64 %228 - %230 = bitcast float* %229 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %230, i32 4, <8 x i1> %224), !tbaa !12, !llvm.access.group !16 - %231 = getelementptr inbounds float, float* %229, i64 8 - %232 = bitcast float* %231 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %232, i32 4, <8 x i1> %225), !tbaa !12, !llvm.access.group !16 - %233 = or <8 x i64> %broadcast.splat, - %234 = trunc <8 x i64> %233 to <8 x i32> - %235 = trunc i64 %mul.i.i.i to i32 - %236 = or i32 %235, 8 - %237 = insertelement <8 x i32> undef, i32 %236, i64 0 - %238 = shufflevector <8 x i32> %237, <8 x i32> undef, <8 x i32> zeroinitializer - %239 = or <8 x i32> %238, - %240 = icmp sgt <8 x i32> %broadcast.splat11, %234 - %241 = icmp sgt <8 x i32> %broadcast.splat13, %239 - %242 = extractelement <8 x i64> %233, i32 0 - %243 = shl i64 %242, 32 - %244 = ashr exact i64 %243, 32 - %245 = getelementptr inbounds float, float* %16, i64 %244 - %246 = bitcast float* %245 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %246, i32 4, <8 x i1> %240), !tbaa !12, !llvm.access.group !16 - %247 = getelementptr inbounds float, float* %245, i64 8 - %248 = bitcast float* %247 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %248, i32 4, <8 x i1> %241), !tbaa !12, !llvm.access.group !16 - %249 = or <8 x i64> %broadcast.splat, - %250 = trunc <8 x i64> %249 to <8 x i32> - %251 = trunc i64 %mul.i.i.i to i32 - %252 = or i32 %251, 8 - %253 = insertelement <8 x i32> undef, i32 %252, i64 0 - %254 = shufflevector <8 x i32> %253, <8 x i32> undef, <8 x i32> zeroinitializer - %255 = or <8 x i32> %254, - %256 = icmp sgt <8 x i32> %broadcast.splat11, %250 - %257 = icmp sgt <8 x i32> %broadcast.splat13, %255 - %258 = extractelement <8 x i64> %249, i32 0 - %259 = shl i64 %258, 32 - %260 = ashr exact i64 %259, 32 - %261 = getelementptr inbounds float, float* %16, i64 %260 - %262 = bitcast float* %261 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %262, i32 4, <8 x i1> %256), !tbaa !12, !llvm.access.group !16 - %263 = getelementptr inbounds float, float* %261, i64 8 - %264 = bitcast float* %263 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %264, i32 4, <8 x i1> %257), !tbaa !12, !llvm.access.group !16 - %265 = or <8 x i64> %broadcast.splat, - %266 = trunc <8 x i64> %265 to <8 x i32> - %267 = trunc i64 %mul.i.i.i to i32 - %268 = or i32 %267, 8 - %269 = insertelement <8 x i32> undef, i32 %268, i64 0 - %270 = shufflevector <8 x i32> %269, <8 x i32> undef, <8 x i32> zeroinitializer - %271 = or <8 x i32> %270, - %272 = icmp sgt <8 x i32> %broadcast.splat11, %266 - %273 = icmp sgt <8 x i32> %broadcast.splat13, %271 - %274 = extractelement <8 x i64> %265, i32 0 - %275 = shl i64 %274, 32 - %276 = ashr exact i64 %275, 32 - %277 = getelementptr inbounds float, float* %16, i64 %276 - %278 = bitcast float* %277 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %278, i32 4, <8 x i1> %272), !tbaa !12, !llvm.access.group !16 - %279 = getelementptr inbounds float, float* %277, i64 8 - %280 = bitcast float* %279 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %280, i32 4, <8 x i1> %273), !tbaa !12, !llvm.access.group !16 - br label %_pocl_kernel_bicgKernel1.exit - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.i.i.us.1, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %294, %if.end.i.i.us.1 ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %20, %conv.i.i.us - br i1 %cmp.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom.i.i.us = ashr exact i64 %sext.i.i.us, 32 - %arrayidx.i.i.us = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us - store float 0.000000e+00, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %mul.i.i.us = mul nsw i32 %24, %conv.i.i.us - %281 = sext i32 %mul.i.i.us to i64 - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %if.then.i.i.us - %indvars.iv.next.i.i2.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %if.then.i.i.us ] - %282 = phi float [ %286, %for.body.i.i.us ], [ 0.000000e+00, %if.then.i.i.us ] - %283 = add nsw i64 %indvars.iv.next.i.i2.us, %281 - %arrayidx5.i.i.us = getelementptr inbounds float, float* %8, i64 %283 - %284 = load float, float* %arrayidx5.i.i.us, align 4, !tbaa !12 - %arrayidx7.i.i.us = getelementptr inbounds float, float* %12, i64 %indvars.iv.next.i.i2.us - %285 = load float, float* %arrayidx7.i.i.us, align 4, !tbaa !12 - %286 = tail call float @llvm.fmuladd.f32(float %284, float %285, float %282) #2 - store float %286, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i2.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us, label %if.end.i.i.us.loopexit, label %for.body.i.i.us, !llvm.loop !18 - -if.end.i.i.us.loopexit: ; preds = %for.body.i.i.us - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.end.i.i.us.loopexit, %pregion_for_entry.entry.i.i.us - %287 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1 = add nuw nsw i64 %287, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp.i.i.us.1 = icmp sgt i32 %20, %conv.i.i.us.1 - br i1 %cmp.i.i.us.1, label %if.then.i.i.us.1, label %if.end.i.i.us.1 - -_pocl_kernel_bicgKernel1.exit.loopexit: ; preds = %if.end.i.i.us.1 - br label %_pocl_kernel_bicgKernel1.exit - -_pocl_kernel_bicgKernel1.exit: ; preds = %_pocl_kernel_bicgKernel1.exit.loopexit, %vector.ph - ret void - -if.then.i.i.us.1: ; preds = %if.end.i.i.us - %sext.i.i.us.1 = shl i64 %add1.i.i.i.us.1, 32 - %idxprom.i.i.us.1 = ashr exact i64 %sext.i.i.us.1, 32 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %mul.i.i.us.1 = mul nsw i32 %24, %conv.i.i.us.1 - %288 = sext i32 %mul.i.i.us.1 to i64 - br label %for.body.i.i.us.1 - -for.body.i.i.us.1: ; preds = %for.body.i.i.us.1, %if.then.i.i.us.1 - %indvars.iv.next.i.i2.us.1 = phi i64 [ %indvars.iv.next.i.i.us.1, %for.body.i.i.us.1 ], [ 0, %if.then.i.i.us.1 ] - %289 = phi float [ %293, %for.body.i.i.us.1 ], [ 0.000000e+00, %if.then.i.i.us.1 ] - %290 = add nsw i64 %indvars.iv.next.i.i2.us.1, %288 - %arrayidx5.i.i.us.1 = getelementptr inbounds float, float* %8, i64 %290 - %291 = load float, float* %arrayidx5.i.i.us.1, align 4, !tbaa !12 - %arrayidx7.i.i.us.1 = getelementptr inbounds float, float* %12, i64 %indvars.iv.next.i.i2.us.1 - %292 = load float, float* %arrayidx7.i.i.us.1, align 4, !tbaa !12 - %293 = tail call float @llvm.fmuladd.f32(float %291, float %292, float %289) #2 - store float %293, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.1 = add nuw nsw i64 %indvars.iv.next.i.i2.us.1, 1 - %exitcond.not.i.i.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.1, label %if.end.i.i.us.1.loopexit, label %for.body.i.i.us.1, !llvm.loop !18 - -if.end.i.i.us.1.loopexit: ; preds = %for.body.i.i.us.1 - br label %if.end.i.i.us.1 - -if.end.i.i.us.1: ; preds = %if.end.i.i.us.1.loopexit, %if.end.i.i.us - %294 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %294, 256 - br i1 %exitcond.not.1, label %_pocl_kernel_bicgKernel1.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !20 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_bicgKernel1_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to float** - %13 = load float*, float** %12, align 8 - %14 = getelementptr i8*, i8** %0, i64 3 - %15 = bitcast i8** %14 to i32** - %16 = load i32*, i32** %15, align 8 - %17 = load i32, i32* %16, align 4 - %18 = getelementptr i8*, i8** %0, i64 4 - %19 = bitcast i8** %18 to i32** - %20 = load i32*, i32** %19, align 8 - %21 = load i32, i32* %20, align 4 - %mul.i.i.i = shl i64 %2, 8 - %cmp221.i.i = icmp sgt i32 %21, 0 - %wide.trip.count.i.i = zext i32 %21 to i64 - br i1 %cmp221.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.ph - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -vector.ph: ; preds = %5 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert10 = insertelement <8 x i32> undef, i32 %17, i32 0 - %broadcast.splat11 = shufflevector <8 x i32> %broadcast.splatinsert10, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert12 = insertelement <8 x i32> undef, i32 %17, i32 0 - %broadcast.splat13 = shufflevector <8 x i32> %broadcast.splatinsert12, <8 x i32> undef, <8 x i32> zeroinitializer - %22 = or <8 x i64> %broadcast.splat, - %23 = trunc <8 x i64> %22 to <8 x i32> - %24 = trunc i64 %mul.i.i.i to i32 - %25 = or i32 %24, 8 - %26 = insertelement <8 x i32> undef, i32 %25, i64 0 - %27 = shufflevector <8 x i32> %26, <8 x i32> undef, <8 x i32> zeroinitializer - %28 = or <8 x i32> %27, - %29 = icmp sgt <8 x i32> %broadcast.splat11, %23 - %30 = icmp sgt <8 x i32> %broadcast.splat13, %28 - %31 = extractelement <8 x i64> %22, i32 0 - %32 = shl i64 %31, 32 - %33 = ashr exact i64 %32, 32 - %34 = getelementptr inbounds float, float* %13, i64 %33 - %35 = bitcast float* %34 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %35, i32 4, <8 x i1> %29), !tbaa !12, !llvm.access.group !16 - %36 = getelementptr inbounds float, float* %34, i64 8 - %37 = bitcast float* %36 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %37, i32 4, <8 x i1> %30), !tbaa !12, !llvm.access.group !16 - %38 = or <8 x i64> %broadcast.splat, - %39 = trunc <8 x i64> %38 to <8 x i32> - %40 = trunc i64 %mul.i.i.i to i32 - %41 = or i32 %40, 8 - %42 = insertelement <8 x i32> undef, i32 %41, i64 0 - %43 = shufflevector <8 x i32> %42, <8 x i32> undef, <8 x i32> zeroinitializer - %44 = or <8 x i32> %43, - %45 = icmp sgt <8 x i32> %broadcast.splat11, %39 - %46 = icmp sgt <8 x i32> %broadcast.splat13, %44 - %47 = extractelement <8 x i64> %38, i32 0 - %48 = shl i64 %47, 32 - %49 = ashr exact i64 %48, 32 - %50 = getelementptr inbounds float, float* %13, i64 %49 - %51 = bitcast float* %50 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %51, i32 4, <8 x i1> %45), !tbaa !12, !llvm.access.group !16 - %52 = getelementptr inbounds float, float* %50, i64 8 - %53 = bitcast float* %52 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %53, i32 4, <8 x i1> %46), !tbaa !12, !llvm.access.group !16 - %54 = or <8 x i64> %broadcast.splat, - %55 = trunc <8 x i64> %54 to <8 x i32> - %56 = trunc i64 %mul.i.i.i to i32 - %57 = or i32 %56, 8 - %58 = insertelement <8 x i32> undef, i32 %57, i64 0 - %59 = shufflevector <8 x i32> %58, <8 x i32> undef, <8 x i32> zeroinitializer - %60 = or <8 x i32> %59, - %61 = icmp sgt <8 x i32> %broadcast.splat11, %55 - %62 = icmp sgt <8 x i32> %broadcast.splat13, %60 - %63 = extractelement <8 x i64> %54, i32 0 - %64 = shl i64 %63, 32 - %65 = ashr exact i64 %64, 32 - %66 = getelementptr inbounds float, float* %13, i64 %65 - %67 = bitcast float* %66 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %67, i32 4, <8 x i1> %61), !tbaa !12, !llvm.access.group !16 - %68 = getelementptr inbounds float, float* %66, i64 8 - %69 = bitcast float* %68 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %69, i32 4, <8 x i1> %62), !tbaa !12, !llvm.access.group !16 - %70 = or <8 x i64> %broadcast.splat, - %71 = trunc <8 x i64> %70 to <8 x i32> - %72 = trunc i64 %mul.i.i.i to i32 - %73 = or i32 %72, 8 - %74 = insertelement <8 x i32> undef, i32 %73, i64 0 - %75 = shufflevector <8 x i32> %74, <8 x i32> undef, <8 x i32> zeroinitializer - %76 = or <8 x i32> %75, - %77 = icmp sgt <8 x i32> %broadcast.splat11, %71 - %78 = icmp sgt <8 x i32> %broadcast.splat13, %76 - %79 = extractelement <8 x i64> %70, i32 0 - %80 = shl i64 %79, 32 - %81 = ashr exact i64 %80, 32 - %82 = getelementptr inbounds float, float* %13, i64 %81 - %83 = bitcast float* %82 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %83, i32 4, <8 x i1> %77), !tbaa !12, !llvm.access.group !16 - %84 = getelementptr inbounds float, float* %82, i64 8 - %85 = bitcast float* %84 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %85, i32 4, <8 x i1> %78), !tbaa !12, !llvm.access.group !16 - %86 = or <8 x i64> %broadcast.splat, - %87 = trunc <8 x i64> %86 to <8 x i32> - %88 = trunc i64 %mul.i.i.i to i32 - %89 = or i32 %88, 8 - %90 = insertelement <8 x i32> undef, i32 %89, i64 0 - %91 = shufflevector <8 x i32> %90, <8 x i32> undef, <8 x i32> zeroinitializer - %92 = or <8 x i32> %91, - %93 = icmp sgt <8 x i32> %broadcast.splat11, %87 - %94 = icmp sgt <8 x i32> %broadcast.splat13, %92 - %95 = extractelement <8 x i64> %86, i32 0 - %96 = shl i64 %95, 32 - %97 = ashr exact i64 %96, 32 - %98 = getelementptr inbounds float, float* %13, i64 %97 - %99 = bitcast float* %98 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %99, i32 4, <8 x i1> %93), !tbaa !12, !llvm.access.group !16 - %100 = getelementptr inbounds float, float* %98, i64 8 - %101 = bitcast float* %100 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %101, i32 4, <8 x i1> %94), !tbaa !12, !llvm.access.group !16 - %102 = or <8 x i64> %broadcast.splat, - %103 = trunc <8 x i64> %102 to <8 x i32> - %104 = trunc i64 %mul.i.i.i to i32 - %105 = or i32 %104, 8 - %106 = insertelement <8 x i32> undef, i32 %105, i64 0 - %107 = shufflevector <8 x i32> %106, <8 x i32> undef, <8 x i32> zeroinitializer - %108 = or <8 x i32> %107, - %109 = icmp sgt <8 x i32> %broadcast.splat11, %103 - %110 = icmp sgt <8 x i32> %broadcast.splat13, %108 - %111 = extractelement <8 x i64> %102, i32 0 - %112 = shl i64 %111, 32 - %113 = ashr exact i64 %112, 32 - %114 = getelementptr inbounds float, float* %13, i64 %113 - %115 = bitcast float* %114 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %115, i32 4, <8 x i1> %109), !tbaa !12, !llvm.access.group !16 - %116 = getelementptr inbounds float, float* %114, i64 8 - %117 = bitcast float* %116 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %117, i32 4, <8 x i1> %110), !tbaa !12, !llvm.access.group !16 - %118 = or <8 x i64> %broadcast.splat, - %119 = trunc <8 x i64> %118 to <8 x i32> - %120 = trunc i64 %mul.i.i.i to i32 - %121 = or i32 %120, 8 - %122 = insertelement <8 x i32> undef, i32 %121, i64 0 - %123 = shufflevector <8 x i32> %122, <8 x i32> undef, <8 x i32> zeroinitializer - %124 = or <8 x i32> %123, - %125 = icmp sgt <8 x i32> %broadcast.splat11, %119 - %126 = icmp sgt <8 x i32> %broadcast.splat13, %124 - %127 = extractelement <8 x i64> %118, i32 0 - %128 = shl i64 %127, 32 - %129 = ashr exact i64 %128, 32 - %130 = getelementptr inbounds float, float* %13, i64 %129 - %131 = bitcast float* %130 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %131, i32 4, <8 x i1> %125), !tbaa !12, !llvm.access.group !16 - %132 = getelementptr inbounds float, float* %130, i64 8 - %133 = bitcast float* %132 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %133, i32 4, <8 x i1> %126), !tbaa !12, !llvm.access.group !16 - %134 = or <8 x i64> %broadcast.splat, - %135 = trunc <8 x i64> %134 to <8 x i32> - %136 = trunc i64 %mul.i.i.i to i32 - %137 = or i32 %136, 8 - %138 = insertelement <8 x i32> undef, i32 %137, i64 0 - %139 = shufflevector <8 x i32> %138, <8 x i32> undef, <8 x i32> zeroinitializer - %140 = or <8 x i32> %139, - %141 = icmp sgt <8 x i32> %broadcast.splat11, %135 - %142 = icmp sgt <8 x i32> %broadcast.splat13, %140 - %143 = extractelement <8 x i64> %134, i32 0 - %144 = shl i64 %143, 32 - %145 = ashr exact i64 %144, 32 - %146 = getelementptr inbounds float, float* %13, i64 %145 - %147 = bitcast float* %146 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %147, i32 4, <8 x i1> %141), !tbaa !12, !llvm.access.group !16 - %148 = getelementptr inbounds float, float* %146, i64 8 - %149 = bitcast float* %148 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %149, i32 4, <8 x i1> %142), !tbaa !12, !llvm.access.group !16 - %150 = or <8 x i64> %broadcast.splat, - %151 = trunc <8 x i64> %150 to <8 x i32> - %152 = trunc i64 %mul.i.i.i to i32 - %153 = or i32 %152, 8 - %154 = insertelement <8 x i32> undef, i32 %153, i64 0 - %155 = shufflevector <8 x i32> %154, <8 x i32> undef, <8 x i32> zeroinitializer - %156 = or <8 x i32> %155, - %157 = icmp sgt <8 x i32> %broadcast.splat11, %151 - %158 = icmp sgt <8 x i32> %broadcast.splat13, %156 - %159 = extractelement <8 x i64> %150, i32 0 - %160 = shl i64 %159, 32 - %161 = ashr exact i64 %160, 32 - %162 = getelementptr inbounds float, float* %13, i64 %161 - %163 = bitcast float* %162 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %163, i32 4, <8 x i1> %157), !tbaa !12, !llvm.access.group !16 - %164 = getelementptr inbounds float, float* %162, i64 8 - %165 = bitcast float* %164 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %165, i32 4, <8 x i1> %158), !tbaa !12, !llvm.access.group !16 - %166 = or <8 x i64> %broadcast.splat, - %167 = trunc <8 x i64> %166 to <8 x i32> - %168 = trunc i64 %mul.i.i.i to i32 - %169 = or i32 %168, 8 - %170 = insertelement <8 x i32> undef, i32 %169, i64 0 - %171 = shufflevector <8 x i32> %170, <8 x i32> undef, <8 x i32> zeroinitializer - %172 = or <8 x i32> %171, - %173 = icmp sgt <8 x i32> %broadcast.splat11, %167 - %174 = icmp sgt <8 x i32> %broadcast.splat13, %172 - %175 = extractelement <8 x i64> %166, i32 0 - %176 = shl i64 %175, 32 - %177 = ashr exact i64 %176, 32 - %178 = getelementptr inbounds float, float* %13, i64 %177 - %179 = bitcast float* %178 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %179, i32 4, <8 x i1> %173), !tbaa !12, !llvm.access.group !16 - %180 = getelementptr inbounds float, float* %178, i64 8 - %181 = bitcast float* %180 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %181, i32 4, <8 x i1> %174), !tbaa !12, !llvm.access.group !16 - %182 = or <8 x i64> %broadcast.splat, - %183 = trunc <8 x i64> %182 to <8 x i32> - %184 = trunc i64 %mul.i.i.i to i32 - %185 = or i32 %184, 8 - %186 = insertelement <8 x i32> undef, i32 %185, i64 0 - %187 = shufflevector <8 x i32> %186, <8 x i32> undef, <8 x i32> zeroinitializer - %188 = or <8 x i32> %187, - %189 = icmp sgt <8 x i32> %broadcast.splat11, %183 - %190 = icmp sgt <8 x i32> %broadcast.splat13, %188 - %191 = extractelement <8 x i64> %182, i32 0 - %192 = shl i64 %191, 32 - %193 = ashr exact i64 %192, 32 - %194 = getelementptr inbounds float, float* %13, i64 %193 - %195 = bitcast float* %194 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %195, i32 4, <8 x i1> %189), !tbaa !12, !llvm.access.group !16 - %196 = getelementptr inbounds float, float* %194, i64 8 - %197 = bitcast float* %196 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %197, i32 4, <8 x i1> %190), !tbaa !12, !llvm.access.group !16 - %198 = or <8 x i64> %broadcast.splat, - %199 = trunc <8 x i64> %198 to <8 x i32> - %200 = trunc i64 %mul.i.i.i to i32 - %201 = or i32 %200, 8 - %202 = insertelement <8 x i32> undef, i32 %201, i64 0 - %203 = shufflevector <8 x i32> %202, <8 x i32> undef, <8 x i32> zeroinitializer - %204 = or <8 x i32> %203, - %205 = icmp sgt <8 x i32> %broadcast.splat11, %199 - %206 = icmp sgt <8 x i32> %broadcast.splat13, %204 - %207 = extractelement <8 x i64> %198, i32 0 - %208 = shl i64 %207, 32 - %209 = ashr exact i64 %208, 32 - %210 = getelementptr inbounds float, float* %13, i64 %209 - %211 = bitcast float* %210 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %211, i32 4, <8 x i1> %205), !tbaa !12, !llvm.access.group !16 - %212 = getelementptr inbounds float, float* %210, i64 8 - %213 = bitcast float* %212 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %213, i32 4, <8 x i1> %206), !tbaa !12, !llvm.access.group !16 - %214 = or <8 x i64> %broadcast.splat, - %215 = trunc <8 x i64> %214 to <8 x i32> - %216 = trunc i64 %mul.i.i.i to i32 - %217 = or i32 %216, 8 - %218 = insertelement <8 x i32> undef, i32 %217, i64 0 - %219 = shufflevector <8 x i32> %218, <8 x i32> undef, <8 x i32> zeroinitializer - %220 = or <8 x i32> %219, - %221 = icmp sgt <8 x i32> %broadcast.splat11, %215 - %222 = icmp sgt <8 x i32> %broadcast.splat13, %220 - %223 = extractelement <8 x i64> %214, i32 0 - %224 = shl i64 %223, 32 - %225 = ashr exact i64 %224, 32 - %226 = getelementptr inbounds float, float* %13, i64 %225 - %227 = bitcast float* %226 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %227, i32 4, <8 x i1> %221), !tbaa !12, !llvm.access.group !16 - %228 = getelementptr inbounds float, float* %226, i64 8 - %229 = bitcast float* %228 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %229, i32 4, <8 x i1> %222), !tbaa !12, !llvm.access.group !16 - %230 = or <8 x i64> %broadcast.splat, - %231 = trunc <8 x i64> %230 to <8 x i32> - %232 = trunc i64 %mul.i.i.i to i32 - %233 = or i32 %232, 8 - %234 = insertelement <8 x i32> undef, i32 %233, i64 0 - %235 = shufflevector <8 x i32> %234, <8 x i32> undef, <8 x i32> zeroinitializer - %236 = or <8 x i32> %235, - %237 = icmp sgt <8 x i32> %broadcast.splat11, %231 - %238 = icmp sgt <8 x i32> %broadcast.splat13, %236 - %239 = extractelement <8 x i64> %230, i32 0 - %240 = shl i64 %239, 32 - %241 = ashr exact i64 %240, 32 - %242 = getelementptr inbounds float, float* %13, i64 %241 - %243 = bitcast float* %242 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %243, i32 4, <8 x i1> %237), !tbaa !12, !llvm.access.group !16 - %244 = getelementptr inbounds float, float* %242, i64 8 - %245 = bitcast float* %244 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %245, i32 4, <8 x i1> %238), !tbaa !12, !llvm.access.group !16 - %246 = or <8 x i64> %broadcast.splat, - %247 = trunc <8 x i64> %246 to <8 x i32> - %248 = trunc i64 %mul.i.i.i to i32 - %249 = or i32 %248, 8 - %250 = insertelement <8 x i32> undef, i32 %249, i64 0 - %251 = shufflevector <8 x i32> %250, <8 x i32> undef, <8 x i32> zeroinitializer - %252 = or <8 x i32> %251, - %253 = icmp sgt <8 x i32> %broadcast.splat11, %247 - %254 = icmp sgt <8 x i32> %broadcast.splat13, %252 - %255 = extractelement <8 x i64> %246, i32 0 - %256 = shl i64 %255, 32 - %257 = ashr exact i64 %256, 32 - %258 = getelementptr inbounds float, float* %13, i64 %257 - %259 = bitcast float* %258 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %259, i32 4, <8 x i1> %253), !tbaa !12, !llvm.access.group !16 - %260 = getelementptr inbounds float, float* %258, i64 8 - %261 = bitcast float* %260 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %261, i32 4, <8 x i1> %254), !tbaa !12, !llvm.access.group !16 - %262 = or <8 x i64> %broadcast.splat, - %263 = trunc <8 x i64> %262 to <8 x i32> - %264 = trunc i64 %mul.i.i.i to i32 - %265 = or i32 %264, 8 - %266 = insertelement <8 x i32> undef, i32 %265, i64 0 - %267 = shufflevector <8 x i32> %266, <8 x i32> undef, <8 x i32> zeroinitializer - %268 = or <8 x i32> %267, - %269 = icmp sgt <8 x i32> %broadcast.splat11, %263 - %270 = icmp sgt <8 x i32> %broadcast.splat13, %268 - %271 = extractelement <8 x i64> %262, i32 0 - %272 = shl i64 %271, 32 - %273 = ashr exact i64 %272, 32 - %274 = getelementptr inbounds float, float* %13, i64 %273 - %275 = bitcast float* %274 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %275, i32 4, <8 x i1> %269), !tbaa !12, !llvm.access.group !16 - %276 = getelementptr inbounds float, float* %274, i64 8 - %277 = bitcast float* %276 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %277, i32 4, <8 x i1> %270), !tbaa !12, !llvm.access.group !16 - br label %_pocl_kernel_bicgKernel1.exit - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.i.i.us.1, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %291, %if.end.i.i.us.1 ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %17, %conv.i.i.us - br i1 %cmp.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom.i.i.us = ashr exact i64 %sext.i.i.us, 32 - %arrayidx.i.i.us = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us - store float 0.000000e+00, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %mul.i.i.us = mul nsw i32 %21, %conv.i.i.us - %278 = sext i32 %mul.i.i.us to i64 - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %if.then.i.i.us - %indvars.iv.next.i.i2.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %if.then.i.i.us ] - %279 = phi float [ %283, %for.body.i.i.us ], [ 0.000000e+00, %if.then.i.i.us ] - %280 = add nsw i64 %indvars.iv.next.i.i2.us, %278 - %arrayidx5.i.i.us = getelementptr inbounds float, float* %7, i64 %280 - %281 = load float, float* %arrayidx5.i.i.us, align 4, !tbaa !12 - %arrayidx7.i.i.us = getelementptr inbounds float, float* %10, i64 %indvars.iv.next.i.i2.us - %282 = load float, float* %arrayidx7.i.i.us, align 4, !tbaa !12 - %283 = tail call float @llvm.fmuladd.f32(float %281, float %282, float %279) #2 - store float %283, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i2.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us, label %if.end.i.i.us.loopexit, label %for.body.i.i.us, !llvm.loop !18 - -if.end.i.i.us.loopexit: ; preds = %for.body.i.i.us - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.end.i.i.us.loopexit, %pregion_for_entry.entry.i.i.us - %284 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1 = add nuw nsw i64 %284, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp.i.i.us.1 = icmp sgt i32 %17, %conv.i.i.us.1 - br i1 %cmp.i.i.us.1, label %if.then.i.i.us.1, label %if.end.i.i.us.1 - -_pocl_kernel_bicgKernel1.exit.loopexit: ; preds = %if.end.i.i.us.1 - br label %_pocl_kernel_bicgKernel1.exit - -_pocl_kernel_bicgKernel1.exit: ; preds = %_pocl_kernel_bicgKernel1.exit.loopexit, %vector.ph - ret void - -if.then.i.i.us.1: ; preds = %if.end.i.i.us - %sext.i.i.us.1 = shl i64 %add1.i.i.i.us.1, 32 - %idxprom.i.i.us.1 = ashr exact i64 %sext.i.i.us.1, 32 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %mul.i.i.us.1 = mul nsw i32 %21, %conv.i.i.us.1 - %285 = sext i32 %mul.i.i.us.1 to i64 - br label %for.body.i.i.us.1 - -for.body.i.i.us.1: ; preds = %for.body.i.i.us.1, %if.then.i.i.us.1 - %indvars.iv.next.i.i2.us.1 = phi i64 [ %indvars.iv.next.i.i.us.1, %for.body.i.i.us.1 ], [ 0, %if.then.i.i.us.1 ] - %286 = phi float [ %290, %for.body.i.i.us.1 ], [ 0.000000e+00, %if.then.i.i.us.1 ] - %287 = add nsw i64 %indvars.iv.next.i.i2.us.1, %285 - %arrayidx5.i.i.us.1 = getelementptr inbounds float, float* %7, i64 %287 - %288 = load float, float* %arrayidx5.i.i.us.1, align 4, !tbaa !12 - %arrayidx7.i.i.us.1 = getelementptr inbounds float, float* %10, i64 %indvars.iv.next.i.i2.us.1 - %289 = load float, float* %arrayidx7.i.i.us.1, align 4, !tbaa !12 - %290 = tail call float @llvm.fmuladd.f32(float %288, float %289, float %286) #2 - store float %290, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.1 = add nuw nsw i64 %indvars.iv.next.i.i2.us.1, 1 - %exitcond.not.i.i.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.1, label %if.end.i.i.us.1.loopexit, label %for.body.i.i.us.1, !llvm.loop !18 - -if.end.i.i.us.1.loopexit: ; preds = %for.body.i.i.us.1 - br label %if.end.i.i.us.1 - -if.end.i.i.us.1: ; preds = %if.end.i.i.us.1.loopexit, %if.end.i.i.us - %291 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %291, 256 - br i1 %exitcond.not.1, label %_pocl_kernel_bicgKernel1.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !20 -} - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>) #3 - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } -attributes #3 = { argmemonly nounwind willreturn } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"int", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"int", !"int"} -!9 = !{!"", !"", !"", !"", !""} -!10 = !{!"A", !"p", !"q", !"nx", !"ny"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{} -!18 = distinct !{!18, !19} -!19 = !{!"llvm.loop.unroll.disable"} -!20 = distinct !{!20, !21} -!21 = !{!"llvm.loop.parallel_accesses", !17} diff --git a/pocl_irs/bicg_kernel2.ll b/pocl_irs/bicg_kernel2.ll deleted file mode 100644 index 04df963..0000000 --- a/pocl_irs/bicg_kernel2.ll +++ /dev/null @@ -1,1230 +0,0 @@ -; ModuleID = './DC/PEIMDLPPEFFJPJMNMNJKJHDPNNPAAECGDPFEA/bicgKernel2/256-1-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_bicgKernel2(float* nocapture readonly %0, float* nocapture readonly %1, float* nocapture %2, i32 %3, i32 %4, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %5, i64 %6, i64 %7, i64 %8) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %6, 8 - %cmp221.i = icmp sgt i32 %3, 0 - %10 = sext i32 %4 to i64 - %wide.trip.count.i = zext i32 %3 to i64 - br i1 %cmp221.i, label %pregion_for_entry.entry.i.us.preheader, label %vector.ph - -pregion_for_entry.entry.i.us.preheader: ; preds = %9 - br label %pregion_for_entry.entry.i.us - -vector.ph: ; preds = %9 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert11 = insertelement <8 x i32> undef, i32 %4, i32 0 - %broadcast.splat12 = shufflevector <8 x i32> %broadcast.splatinsert11, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert13 = insertelement <8 x i32> undef, i32 %4, i32 0 - %broadcast.splat14 = shufflevector <8 x i32> %broadcast.splatinsert13, <8 x i32> undef, <8 x i32> zeroinitializer - %11 = or <8 x i64> %broadcast.splat, - %12 = trunc <8 x i64> %11 to <8 x i32> - %13 = trunc i64 %mul.i.i to i32 - %14 = or i32 %13, 8 - %15 = insertelement <8 x i32> undef, i32 %14, i64 0 - %16 = shufflevector <8 x i32> %15, <8 x i32> undef, <8 x i32> zeroinitializer - %17 = or <8 x i32> %16, - %18 = icmp sgt <8 x i32> %broadcast.splat12, %12 - %19 = icmp sgt <8 x i32> %broadcast.splat14, %17 - %20 = extractelement <8 x i64> %11, i32 0 - %21 = shl i64 %20, 32 - %22 = ashr exact i64 %21, 32 - %23 = getelementptr inbounds float, float* %2, i64 %22 - %24 = bitcast float* %23 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %24, i32 4, <8 x i1> %18), !tbaa !12, !llvm.access.group !16 - %25 = getelementptr inbounds float, float* %23, i64 8 - %26 = bitcast float* %25 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %26, i32 4, <8 x i1> %19), !tbaa !12, !llvm.access.group !16 - %27 = or <8 x i64> %broadcast.splat, - %28 = trunc <8 x i64> %27 to <8 x i32> - %29 = trunc i64 %mul.i.i to i32 - %30 = or i32 %29, 8 - %31 = insertelement <8 x i32> undef, i32 %30, i64 0 - %32 = shufflevector <8 x i32> %31, <8 x i32> undef, <8 x i32> zeroinitializer - %33 = or <8 x i32> %32, - %34 = icmp sgt <8 x i32> %broadcast.splat12, %28 - %35 = icmp sgt <8 x i32> %broadcast.splat14, %33 - %36 = extractelement <8 x i64> %27, i32 0 - %37 = shl i64 %36, 32 - %38 = ashr exact i64 %37, 32 - %39 = getelementptr inbounds float, float* %2, i64 %38 - %40 = bitcast float* %39 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %40, i32 4, <8 x i1> %34), !tbaa !12, !llvm.access.group !16 - %41 = getelementptr inbounds float, float* %39, i64 8 - %42 = bitcast float* %41 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %42, i32 4, <8 x i1> %35), !tbaa !12, !llvm.access.group !16 - %43 = or <8 x i64> %broadcast.splat, - %44 = trunc <8 x i64> %43 to <8 x i32> - %45 = trunc i64 %mul.i.i to i32 - %46 = or i32 %45, 8 - %47 = insertelement <8 x i32> undef, i32 %46, i64 0 - %48 = shufflevector <8 x i32> %47, <8 x i32> undef, <8 x i32> zeroinitializer - %49 = or <8 x i32> %48, - %50 = icmp sgt <8 x i32> %broadcast.splat12, %44 - %51 = icmp sgt <8 x i32> %broadcast.splat14, %49 - %52 = extractelement <8 x i64> %43, i32 0 - %53 = shl i64 %52, 32 - %54 = ashr exact i64 %53, 32 - %55 = getelementptr inbounds float, float* %2, i64 %54 - %56 = bitcast float* %55 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %56, i32 4, <8 x i1> %50), !tbaa !12, !llvm.access.group !16 - %57 = getelementptr inbounds float, float* %55, i64 8 - %58 = bitcast float* %57 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %58, i32 4, <8 x i1> %51), !tbaa !12, !llvm.access.group !16 - %59 = or <8 x i64> %broadcast.splat, - %60 = trunc <8 x i64> %59 to <8 x i32> - %61 = trunc i64 %mul.i.i to i32 - %62 = or i32 %61, 8 - %63 = insertelement <8 x i32> undef, i32 %62, i64 0 - %64 = shufflevector <8 x i32> %63, <8 x i32> undef, <8 x i32> zeroinitializer - %65 = or <8 x i32> %64, - %66 = icmp sgt <8 x i32> %broadcast.splat12, %60 - %67 = icmp sgt <8 x i32> %broadcast.splat14, %65 - %68 = extractelement <8 x i64> %59, i32 0 - %69 = shl i64 %68, 32 - %70 = ashr exact i64 %69, 32 - %71 = getelementptr inbounds float, float* %2, i64 %70 - %72 = bitcast float* %71 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %72, i32 4, <8 x i1> %66), !tbaa !12, !llvm.access.group !16 - %73 = getelementptr inbounds float, float* %71, i64 8 - %74 = bitcast float* %73 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %74, i32 4, <8 x i1> %67), !tbaa !12, !llvm.access.group !16 - %75 = or <8 x i64> %broadcast.splat, - %76 = trunc <8 x i64> %75 to <8 x i32> - %77 = trunc i64 %mul.i.i to i32 - %78 = or i32 %77, 8 - %79 = insertelement <8 x i32> undef, i32 %78, i64 0 - %80 = shufflevector <8 x i32> %79, <8 x i32> undef, <8 x i32> zeroinitializer - %81 = or <8 x i32> %80, - %82 = icmp sgt <8 x i32> %broadcast.splat12, %76 - %83 = icmp sgt <8 x i32> %broadcast.splat14, %81 - %84 = extractelement <8 x i64> %75, i32 0 - %85 = shl i64 %84, 32 - %86 = ashr exact i64 %85, 32 - %87 = getelementptr inbounds float, float* %2, i64 %86 - %88 = bitcast float* %87 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %88, i32 4, <8 x i1> %82), !tbaa !12, !llvm.access.group !16 - %89 = getelementptr inbounds float, float* %87, i64 8 - %90 = bitcast float* %89 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %90, i32 4, <8 x i1> %83), !tbaa !12, !llvm.access.group !16 - %91 = or <8 x i64> %broadcast.splat, - %92 = trunc <8 x i64> %91 to <8 x i32> - %93 = trunc i64 %mul.i.i to i32 - %94 = or i32 %93, 8 - %95 = insertelement <8 x i32> undef, i32 %94, i64 0 - %96 = shufflevector <8 x i32> %95, <8 x i32> undef, <8 x i32> zeroinitializer - %97 = or <8 x i32> %96, - %98 = icmp sgt <8 x i32> %broadcast.splat12, %92 - %99 = icmp sgt <8 x i32> %broadcast.splat14, %97 - %100 = extractelement <8 x i64> %91, i32 0 - %101 = shl i64 %100, 32 - %102 = ashr exact i64 %101, 32 - %103 = getelementptr inbounds float, float* %2, i64 %102 - %104 = bitcast float* %103 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %104, i32 4, <8 x i1> %98), !tbaa !12, !llvm.access.group !16 - %105 = getelementptr inbounds float, float* %103, i64 8 - %106 = bitcast float* %105 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %106, i32 4, <8 x i1> %99), !tbaa !12, !llvm.access.group !16 - %107 = or <8 x i64> %broadcast.splat, - %108 = trunc <8 x i64> %107 to <8 x i32> - %109 = trunc i64 %mul.i.i to i32 - %110 = or i32 %109, 8 - %111 = insertelement <8 x i32> undef, i32 %110, i64 0 - %112 = shufflevector <8 x i32> %111, <8 x i32> undef, <8 x i32> zeroinitializer - %113 = or <8 x i32> %112, - %114 = icmp sgt <8 x i32> %broadcast.splat12, %108 - %115 = icmp sgt <8 x i32> %broadcast.splat14, %113 - %116 = extractelement <8 x i64> %107, i32 0 - %117 = shl i64 %116, 32 - %118 = ashr exact i64 %117, 32 - %119 = getelementptr inbounds float, float* %2, i64 %118 - %120 = bitcast float* %119 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %120, i32 4, <8 x i1> %114), !tbaa !12, !llvm.access.group !16 - %121 = getelementptr inbounds float, float* %119, i64 8 - %122 = bitcast float* %121 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %122, i32 4, <8 x i1> %115), !tbaa !12, !llvm.access.group !16 - %123 = or <8 x i64> %broadcast.splat, - %124 = trunc <8 x i64> %123 to <8 x i32> - %125 = trunc i64 %mul.i.i to i32 - %126 = or i32 %125, 8 - %127 = insertelement <8 x i32> undef, i32 %126, i64 0 - %128 = shufflevector <8 x i32> %127, <8 x i32> undef, <8 x i32> zeroinitializer - %129 = or <8 x i32> %128, - %130 = icmp sgt <8 x i32> %broadcast.splat12, %124 - %131 = icmp sgt <8 x i32> %broadcast.splat14, %129 - %132 = extractelement <8 x i64> %123, i32 0 - %133 = shl i64 %132, 32 - %134 = ashr exact i64 %133, 32 - %135 = getelementptr inbounds float, float* %2, i64 %134 - %136 = bitcast float* %135 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %136, i32 4, <8 x i1> %130), !tbaa !12, !llvm.access.group !16 - %137 = getelementptr inbounds float, float* %135, i64 8 - %138 = bitcast float* %137 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %138, i32 4, <8 x i1> %131), !tbaa !12, !llvm.access.group !16 - %139 = or <8 x i64> %broadcast.splat, - %140 = trunc <8 x i64> %139 to <8 x i32> - %141 = trunc i64 %mul.i.i to i32 - %142 = or i32 %141, 8 - %143 = insertelement <8 x i32> undef, i32 %142, i64 0 - %144 = shufflevector <8 x i32> %143, <8 x i32> undef, <8 x i32> zeroinitializer - %145 = or <8 x i32> %144, - %146 = icmp sgt <8 x i32> %broadcast.splat12, %140 - %147 = icmp sgt <8 x i32> %broadcast.splat14, %145 - %148 = extractelement <8 x i64> %139, i32 0 - %149 = shl i64 %148, 32 - %150 = ashr exact i64 %149, 32 - %151 = getelementptr inbounds float, float* %2, i64 %150 - %152 = bitcast float* %151 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %152, i32 4, <8 x i1> %146), !tbaa !12, !llvm.access.group !16 - %153 = getelementptr inbounds float, float* %151, i64 8 - %154 = bitcast float* %153 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %154, i32 4, <8 x i1> %147), !tbaa !12, !llvm.access.group !16 - %155 = or <8 x i64> %broadcast.splat, - %156 = trunc <8 x i64> %155 to <8 x i32> - %157 = trunc i64 %mul.i.i to i32 - %158 = or i32 %157, 8 - %159 = insertelement <8 x i32> undef, i32 %158, i64 0 - %160 = shufflevector <8 x i32> %159, <8 x i32> undef, <8 x i32> zeroinitializer - %161 = or <8 x i32> %160, - %162 = icmp sgt <8 x i32> %broadcast.splat12, %156 - %163 = icmp sgt <8 x i32> %broadcast.splat14, %161 - %164 = extractelement <8 x i64> %155, i32 0 - %165 = shl i64 %164, 32 - %166 = ashr exact i64 %165, 32 - %167 = getelementptr inbounds float, float* %2, i64 %166 - %168 = bitcast float* %167 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %168, i32 4, <8 x i1> %162), !tbaa !12, !llvm.access.group !16 - %169 = getelementptr inbounds float, float* %167, i64 8 - %170 = bitcast float* %169 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %170, i32 4, <8 x i1> %163), !tbaa !12, !llvm.access.group !16 - %171 = or <8 x i64> %broadcast.splat, - %172 = trunc <8 x i64> %171 to <8 x i32> - %173 = trunc i64 %mul.i.i to i32 - %174 = or i32 %173, 8 - %175 = insertelement <8 x i32> undef, i32 %174, i64 0 - %176 = shufflevector <8 x i32> %175, <8 x i32> undef, <8 x i32> zeroinitializer - %177 = or <8 x i32> %176, - %178 = icmp sgt <8 x i32> %broadcast.splat12, %172 - %179 = icmp sgt <8 x i32> %broadcast.splat14, %177 - %180 = extractelement <8 x i64> %171, i32 0 - %181 = shl i64 %180, 32 - %182 = ashr exact i64 %181, 32 - %183 = getelementptr inbounds float, float* %2, i64 %182 - %184 = bitcast float* %183 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %184, i32 4, <8 x i1> %178), !tbaa !12, !llvm.access.group !16 - %185 = getelementptr inbounds float, float* %183, i64 8 - %186 = bitcast float* %185 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %186, i32 4, <8 x i1> %179), !tbaa !12, !llvm.access.group !16 - %187 = or <8 x i64> %broadcast.splat, - %188 = trunc <8 x i64> %187 to <8 x i32> - %189 = trunc i64 %mul.i.i to i32 - %190 = or i32 %189, 8 - %191 = insertelement <8 x i32> undef, i32 %190, i64 0 - %192 = shufflevector <8 x i32> %191, <8 x i32> undef, <8 x i32> zeroinitializer - %193 = or <8 x i32> %192, - %194 = icmp sgt <8 x i32> %broadcast.splat12, %188 - %195 = icmp sgt <8 x i32> %broadcast.splat14, %193 - %196 = extractelement <8 x i64> %187, i32 0 - %197 = shl i64 %196, 32 - %198 = ashr exact i64 %197, 32 - %199 = getelementptr inbounds float, float* %2, i64 %198 - %200 = bitcast float* %199 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %200, i32 4, <8 x i1> %194), !tbaa !12, !llvm.access.group !16 - %201 = getelementptr inbounds float, float* %199, i64 8 - %202 = bitcast float* %201 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %202, i32 4, <8 x i1> %195), !tbaa !12, !llvm.access.group !16 - %203 = or <8 x i64> %broadcast.splat, - %204 = trunc <8 x i64> %203 to <8 x i32> - %205 = trunc i64 %mul.i.i to i32 - %206 = or i32 %205, 8 - %207 = insertelement <8 x i32> undef, i32 %206, i64 0 - %208 = shufflevector <8 x i32> %207, <8 x i32> undef, <8 x i32> zeroinitializer - %209 = or <8 x i32> %208, - %210 = icmp sgt <8 x i32> %broadcast.splat12, %204 - %211 = icmp sgt <8 x i32> %broadcast.splat14, %209 - %212 = extractelement <8 x i64> %203, i32 0 - %213 = shl i64 %212, 32 - %214 = ashr exact i64 %213, 32 - %215 = getelementptr inbounds float, float* %2, i64 %214 - %216 = bitcast float* %215 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %216, i32 4, <8 x i1> %210), !tbaa !12, !llvm.access.group !16 - %217 = getelementptr inbounds float, float* %215, i64 8 - %218 = bitcast float* %217 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %218, i32 4, <8 x i1> %211), !tbaa !12, !llvm.access.group !16 - %219 = or <8 x i64> %broadcast.splat, - %220 = trunc <8 x i64> %219 to <8 x i32> - %221 = trunc i64 %mul.i.i to i32 - %222 = or i32 %221, 8 - %223 = insertelement <8 x i32> undef, i32 %222, i64 0 - %224 = shufflevector <8 x i32> %223, <8 x i32> undef, <8 x i32> zeroinitializer - %225 = or <8 x i32> %224, - %226 = icmp sgt <8 x i32> %broadcast.splat12, %220 - %227 = icmp sgt <8 x i32> %broadcast.splat14, %225 - %228 = extractelement <8 x i64> %219, i32 0 - %229 = shl i64 %228, 32 - %230 = ashr exact i64 %229, 32 - %231 = getelementptr inbounds float, float* %2, i64 %230 - %232 = bitcast float* %231 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %232, i32 4, <8 x i1> %226), !tbaa !12, !llvm.access.group !16 - %233 = getelementptr inbounds float, float* %231, i64 8 - %234 = bitcast float* %233 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %234, i32 4, <8 x i1> %227), !tbaa !12, !llvm.access.group !16 - %235 = or <8 x i64> %broadcast.splat, - %236 = trunc <8 x i64> %235 to <8 x i32> - %237 = trunc i64 %mul.i.i to i32 - %238 = or i32 %237, 8 - %239 = insertelement <8 x i32> undef, i32 %238, i64 0 - %240 = shufflevector <8 x i32> %239, <8 x i32> undef, <8 x i32> zeroinitializer - %241 = or <8 x i32> %240, - %242 = icmp sgt <8 x i32> %broadcast.splat12, %236 - %243 = icmp sgt <8 x i32> %broadcast.splat14, %241 - %244 = extractelement <8 x i64> %235, i32 0 - %245 = shl i64 %244, 32 - %246 = ashr exact i64 %245, 32 - %247 = getelementptr inbounds float, float* %2, i64 %246 - %248 = bitcast float* %247 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %248, i32 4, <8 x i1> %242), !tbaa !12, !llvm.access.group !16 - %249 = getelementptr inbounds float, float* %247, i64 8 - %250 = bitcast float* %249 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %250, i32 4, <8 x i1> %243), !tbaa !12, !llvm.access.group !16 - %251 = or <8 x i64> %broadcast.splat, - %252 = trunc <8 x i64> %251 to <8 x i32> - %253 = trunc i64 %mul.i.i to i32 - %254 = or i32 %253, 8 - %255 = insertelement <8 x i32> undef, i32 %254, i64 0 - %256 = shufflevector <8 x i32> %255, <8 x i32> undef, <8 x i32> zeroinitializer - %257 = or <8 x i32> %256, - %258 = icmp sgt <8 x i32> %broadcast.splat12, %252 - %259 = icmp sgt <8 x i32> %broadcast.splat14, %257 - %260 = extractelement <8 x i64> %251, i32 0 - %261 = shl i64 %260, 32 - %262 = ashr exact i64 %261, 32 - %263 = getelementptr inbounds float, float* %2, i64 %262 - %264 = bitcast float* %263 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %264, i32 4, <8 x i1> %258), !tbaa !12, !llvm.access.group !16 - %265 = getelementptr inbounds float, float* %263, i64 8 - %266 = bitcast float* %265 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %266, i32 4, <8 x i1> %259), !tbaa !12, !llvm.access.group !16 - br label %bicgKernel2.exit - -pregion_for_entry.entry.i.us: ; preds = %if.end.i.us.1, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ %280, %if.end.i.us.1 ], [ 0, %pregion_for_entry.entry.i.us.preheader ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp.i.us = icmp slt i32 %conv.i.us, %4 - br i1 %cmp.i.us, label %if.then.i.us, label %if.end.i.us - -if.then.i.us: ; preds = %pregion_for_entry.entry.i.us - %sext.i.us = shl i64 %add1.i.i.us, 32 - %idxprom.i.us = ashr exact i64 %sext.i.us, 32 - %arrayidx.i.us = getelementptr inbounds float, float* %2, i64 %idxprom.i.us - store float 0.000000e+00, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %for.body.i.us - -for.body.i.us: ; preds = %for.body.i.us, %if.then.i.us - %indvars.iv.next.i3.us = phi i64 [ %indvars.iv.next.i.us, %for.body.i.us ], [ 0, %if.then.i.us ] - %267 = phi float [ %272, %for.body.i.us ], [ 0.000000e+00, %if.then.i.us ] - %268 = mul nsw i64 %indvars.iv.next.i3.us, %10 - %269 = add nsw i64 %268, %idxprom.i.us - %arrayidx5.i.us = getelementptr inbounds float, float* %0, i64 %269 - %270 = load float, float* %arrayidx5.i.us, align 4, !tbaa !12 - %arrayidx7.i.us = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.i3.us - %271 = load float, float* %arrayidx7.i.us, align 4, !tbaa !12 - %272 = tail call float @llvm.fmuladd.f32(float %270, float %271, float %267) #2 - store float %272, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us = add nuw nsw i64 %indvars.iv.next.i3.us, 1 - %exitcond.not.i.us = icmp eq i64 %indvars.iv.next.i.us, %wide.trip.count.i - br i1 %exitcond.not.i.us, label %if.end.i.us.loopexit, label %for.body.i.us, !llvm.loop !18 - -if.end.i.us.loopexit: ; preds = %for.body.i.us - br label %if.end.i.us - -if.end.i.us: ; preds = %if.end.i.us.loopexit, %pregion_for_entry.entry.i.us - %273 = or i64 %_local_id_x.0.us, 1 - %add1.i.i.us.1 = add nuw nsw i64 %273, %mul.i.i - %conv.i.us.1 = trunc i64 %add1.i.i.us.1 to i32 - %cmp.i.us.1 = icmp slt i32 %conv.i.us.1, %4 - br i1 %cmp.i.us.1, label %if.then.i.us.1, label %if.end.i.us.1 - -bicgKernel2.exit.loopexit: ; preds = %if.end.i.us.1 - br label %bicgKernel2.exit - -bicgKernel2.exit: ; preds = %bicgKernel2.exit.loopexit, %vector.ph - ret void - -if.then.i.us.1: ; preds = %if.end.i.us - %sext.i.us.1 = shl i64 %add1.i.i.us.1, 32 - %idxprom.i.us.1 = ashr exact i64 %sext.i.us.1, 32 - %arrayidx.i.us.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.1 - store float 0.000000e+00, float* %arrayidx.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %for.body.i.us.1 - -for.body.i.us.1: ; preds = %for.body.i.us.1, %if.then.i.us.1 - %indvars.iv.next.i3.us.1 = phi i64 [ %indvars.iv.next.i.us.1, %for.body.i.us.1 ], [ 0, %if.then.i.us.1 ] - %274 = phi float [ %279, %for.body.i.us.1 ], [ 0.000000e+00, %if.then.i.us.1 ] - %275 = mul nsw i64 %indvars.iv.next.i3.us.1, %10 - %276 = add nsw i64 %275, %idxprom.i.us.1 - %arrayidx5.i.us.1 = getelementptr inbounds float, float* %0, i64 %276 - %277 = load float, float* %arrayidx5.i.us.1, align 4, !tbaa !12 - %arrayidx7.i.us.1 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.i3.us.1 - %278 = load float, float* %arrayidx7.i.us.1, align 4, !tbaa !12 - %279 = tail call float @llvm.fmuladd.f32(float %277, float %278, float %274) #2 - store float %279, float* %arrayidx.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.1 = add nuw nsw i64 %indvars.iv.next.i3.us.1, 1 - %exitcond.not.i.us.1 = icmp eq i64 %indvars.iv.next.i.us.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.1, label %if.end.i.us.1.loopexit, label %for.body.i.us.1, !llvm.loop !18 - -if.end.i.us.1.loopexit: ; preds = %for.body.i.us.1 - br label %if.end.i.us.1 - -if.end.i.us.1: ; preds = %if.end.i.us.1.loopexit, %if.end.i.us - %280 = add nuw nsw i64 %_local_id_x.0.us, 2 - %exitcond.not.1 = icmp eq i64 %280, 256 - br i1 %exitcond.not.1, label %bicgKernel2.exit.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !20 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_bicgKernel2_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to float*** - %15 = load float**, float*** %14, align 8 - %16 = load float*, float** %15, align 8 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %21 = getelementptr i8*, i8** %0, i64 4 - %22 = bitcast i8** %21 to i32** - %23 = load i32*, i32** %22, align 8 - %24 = load i32, i32* %23, align 4 - %mul.i.i.i = shl i64 %2, 8 - %cmp221.i.i = icmp sgt i32 %20, 0 - %25 = sext i32 %24 to i64 - %wide.trip.count.i.i = zext i32 %20 to i64 - br i1 %cmp221.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.ph - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -vector.ph: ; preds = %5 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert11 = insertelement <8 x i32> undef, i32 %24, i32 0 - %broadcast.splat12 = shufflevector <8 x i32> %broadcast.splatinsert11, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert13 = insertelement <8 x i32> undef, i32 %24, i32 0 - %broadcast.splat14 = shufflevector <8 x i32> %broadcast.splatinsert13, <8 x i32> undef, <8 x i32> zeroinitializer - %26 = or <8 x i64> %broadcast.splat, - %27 = trunc <8 x i64> %26 to <8 x i32> - %28 = trunc i64 %mul.i.i.i to i32 - %29 = or i32 %28, 8 - %30 = insertelement <8 x i32> undef, i32 %29, i64 0 - %31 = shufflevector <8 x i32> %30, <8 x i32> undef, <8 x i32> zeroinitializer - %32 = or <8 x i32> %31, - %33 = icmp sgt <8 x i32> %broadcast.splat12, %27 - %34 = icmp sgt <8 x i32> %broadcast.splat14, %32 - %35 = extractelement <8 x i64> %26, i32 0 - %36 = shl i64 %35, 32 - %37 = ashr exact i64 %36, 32 - %38 = getelementptr inbounds float, float* %16, i64 %37 - %39 = bitcast float* %38 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %39, i32 4, <8 x i1> %33), !tbaa !12, !llvm.access.group !16 - %40 = getelementptr inbounds float, float* %38, i64 8 - %41 = bitcast float* %40 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %41, i32 4, <8 x i1> %34), !tbaa !12, !llvm.access.group !16 - %42 = or <8 x i64> %broadcast.splat, - %43 = trunc <8 x i64> %42 to <8 x i32> - %44 = trunc i64 %mul.i.i.i to i32 - %45 = or i32 %44, 8 - %46 = insertelement <8 x i32> undef, i32 %45, i64 0 - %47 = shufflevector <8 x i32> %46, <8 x i32> undef, <8 x i32> zeroinitializer - %48 = or <8 x i32> %47, - %49 = icmp sgt <8 x i32> %broadcast.splat12, %43 - %50 = icmp sgt <8 x i32> %broadcast.splat14, %48 - %51 = extractelement <8 x i64> %42, i32 0 - %52 = shl i64 %51, 32 - %53 = ashr exact i64 %52, 32 - %54 = getelementptr inbounds float, float* %16, i64 %53 - %55 = bitcast float* %54 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %55, i32 4, <8 x i1> %49), !tbaa !12, !llvm.access.group !16 - %56 = getelementptr inbounds float, float* %54, i64 8 - %57 = bitcast float* %56 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %57, i32 4, <8 x i1> %50), !tbaa !12, !llvm.access.group !16 - %58 = or <8 x i64> %broadcast.splat, - %59 = trunc <8 x i64> %58 to <8 x i32> - %60 = trunc i64 %mul.i.i.i to i32 - %61 = or i32 %60, 8 - %62 = insertelement <8 x i32> undef, i32 %61, i64 0 - %63 = shufflevector <8 x i32> %62, <8 x i32> undef, <8 x i32> zeroinitializer - %64 = or <8 x i32> %63, - %65 = icmp sgt <8 x i32> %broadcast.splat12, %59 - %66 = icmp sgt <8 x i32> %broadcast.splat14, %64 - %67 = extractelement <8 x i64> %58, i32 0 - %68 = shl i64 %67, 32 - %69 = ashr exact i64 %68, 32 - %70 = getelementptr inbounds float, float* %16, i64 %69 - %71 = bitcast float* %70 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %71, i32 4, <8 x i1> %65), !tbaa !12, !llvm.access.group !16 - %72 = getelementptr inbounds float, float* %70, i64 8 - %73 = bitcast float* %72 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %73, i32 4, <8 x i1> %66), !tbaa !12, !llvm.access.group !16 - %74 = or <8 x i64> %broadcast.splat, - %75 = trunc <8 x i64> %74 to <8 x i32> - %76 = trunc i64 %mul.i.i.i to i32 - %77 = or i32 %76, 8 - %78 = insertelement <8 x i32> undef, i32 %77, i64 0 - %79 = shufflevector <8 x i32> %78, <8 x i32> undef, <8 x i32> zeroinitializer - %80 = or <8 x i32> %79, - %81 = icmp sgt <8 x i32> %broadcast.splat12, %75 - %82 = icmp sgt <8 x i32> %broadcast.splat14, %80 - %83 = extractelement <8 x i64> %74, i32 0 - %84 = shl i64 %83, 32 - %85 = ashr exact i64 %84, 32 - %86 = getelementptr inbounds float, float* %16, i64 %85 - %87 = bitcast float* %86 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %87, i32 4, <8 x i1> %81), !tbaa !12, !llvm.access.group !16 - %88 = getelementptr inbounds float, float* %86, i64 8 - %89 = bitcast float* %88 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %89, i32 4, <8 x i1> %82), !tbaa !12, !llvm.access.group !16 - %90 = or <8 x i64> %broadcast.splat, - %91 = trunc <8 x i64> %90 to <8 x i32> - %92 = trunc i64 %mul.i.i.i to i32 - %93 = or i32 %92, 8 - %94 = insertelement <8 x i32> undef, i32 %93, i64 0 - %95 = shufflevector <8 x i32> %94, <8 x i32> undef, <8 x i32> zeroinitializer - %96 = or <8 x i32> %95, - %97 = icmp sgt <8 x i32> %broadcast.splat12, %91 - %98 = icmp sgt <8 x i32> %broadcast.splat14, %96 - %99 = extractelement <8 x i64> %90, i32 0 - %100 = shl i64 %99, 32 - %101 = ashr exact i64 %100, 32 - %102 = getelementptr inbounds float, float* %16, i64 %101 - %103 = bitcast float* %102 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %103, i32 4, <8 x i1> %97), !tbaa !12, !llvm.access.group !16 - %104 = getelementptr inbounds float, float* %102, i64 8 - %105 = bitcast float* %104 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %105, i32 4, <8 x i1> %98), !tbaa !12, !llvm.access.group !16 - %106 = or <8 x i64> %broadcast.splat, - %107 = trunc <8 x i64> %106 to <8 x i32> - %108 = trunc i64 %mul.i.i.i to i32 - %109 = or i32 %108, 8 - %110 = insertelement <8 x i32> undef, i32 %109, i64 0 - %111 = shufflevector <8 x i32> %110, <8 x i32> undef, <8 x i32> zeroinitializer - %112 = or <8 x i32> %111, - %113 = icmp sgt <8 x i32> %broadcast.splat12, %107 - %114 = icmp sgt <8 x i32> %broadcast.splat14, %112 - %115 = extractelement <8 x i64> %106, i32 0 - %116 = shl i64 %115, 32 - %117 = ashr exact i64 %116, 32 - %118 = getelementptr inbounds float, float* %16, i64 %117 - %119 = bitcast float* %118 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %119, i32 4, <8 x i1> %113), !tbaa !12, !llvm.access.group !16 - %120 = getelementptr inbounds float, float* %118, i64 8 - %121 = bitcast float* %120 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %121, i32 4, <8 x i1> %114), !tbaa !12, !llvm.access.group !16 - %122 = or <8 x i64> %broadcast.splat, - %123 = trunc <8 x i64> %122 to <8 x i32> - %124 = trunc i64 %mul.i.i.i to i32 - %125 = or i32 %124, 8 - %126 = insertelement <8 x i32> undef, i32 %125, i64 0 - %127 = shufflevector <8 x i32> %126, <8 x i32> undef, <8 x i32> zeroinitializer - %128 = or <8 x i32> %127, - %129 = icmp sgt <8 x i32> %broadcast.splat12, %123 - %130 = icmp sgt <8 x i32> %broadcast.splat14, %128 - %131 = extractelement <8 x i64> %122, i32 0 - %132 = shl i64 %131, 32 - %133 = ashr exact i64 %132, 32 - %134 = getelementptr inbounds float, float* %16, i64 %133 - %135 = bitcast float* %134 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %135, i32 4, <8 x i1> %129), !tbaa !12, !llvm.access.group !16 - %136 = getelementptr inbounds float, float* %134, i64 8 - %137 = bitcast float* %136 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %137, i32 4, <8 x i1> %130), !tbaa !12, !llvm.access.group !16 - %138 = or <8 x i64> %broadcast.splat, - %139 = trunc <8 x i64> %138 to <8 x i32> - %140 = trunc i64 %mul.i.i.i to i32 - %141 = or i32 %140, 8 - %142 = insertelement <8 x i32> undef, i32 %141, i64 0 - %143 = shufflevector <8 x i32> %142, <8 x i32> undef, <8 x i32> zeroinitializer - %144 = or <8 x i32> %143, - %145 = icmp sgt <8 x i32> %broadcast.splat12, %139 - %146 = icmp sgt <8 x i32> %broadcast.splat14, %144 - %147 = extractelement <8 x i64> %138, i32 0 - %148 = shl i64 %147, 32 - %149 = ashr exact i64 %148, 32 - %150 = getelementptr inbounds float, float* %16, i64 %149 - %151 = bitcast float* %150 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %151, i32 4, <8 x i1> %145), !tbaa !12, !llvm.access.group !16 - %152 = getelementptr inbounds float, float* %150, i64 8 - %153 = bitcast float* %152 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %153, i32 4, <8 x i1> %146), !tbaa !12, !llvm.access.group !16 - %154 = or <8 x i64> %broadcast.splat, - %155 = trunc <8 x i64> %154 to <8 x i32> - %156 = trunc i64 %mul.i.i.i to i32 - %157 = or i32 %156, 8 - %158 = insertelement <8 x i32> undef, i32 %157, i64 0 - %159 = shufflevector <8 x i32> %158, <8 x i32> undef, <8 x i32> zeroinitializer - %160 = or <8 x i32> %159, - %161 = icmp sgt <8 x i32> %broadcast.splat12, %155 - %162 = icmp sgt <8 x i32> %broadcast.splat14, %160 - %163 = extractelement <8 x i64> %154, i32 0 - %164 = shl i64 %163, 32 - %165 = ashr exact i64 %164, 32 - %166 = getelementptr inbounds float, float* %16, i64 %165 - %167 = bitcast float* %166 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %167, i32 4, <8 x i1> %161), !tbaa !12, !llvm.access.group !16 - %168 = getelementptr inbounds float, float* %166, i64 8 - %169 = bitcast float* %168 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %169, i32 4, <8 x i1> %162), !tbaa !12, !llvm.access.group !16 - %170 = or <8 x i64> %broadcast.splat, - %171 = trunc <8 x i64> %170 to <8 x i32> - %172 = trunc i64 %mul.i.i.i to i32 - %173 = or i32 %172, 8 - %174 = insertelement <8 x i32> undef, i32 %173, i64 0 - %175 = shufflevector <8 x i32> %174, <8 x i32> undef, <8 x i32> zeroinitializer - %176 = or <8 x i32> %175, - %177 = icmp sgt <8 x i32> %broadcast.splat12, %171 - %178 = icmp sgt <8 x i32> %broadcast.splat14, %176 - %179 = extractelement <8 x i64> %170, i32 0 - %180 = shl i64 %179, 32 - %181 = ashr exact i64 %180, 32 - %182 = getelementptr inbounds float, float* %16, i64 %181 - %183 = bitcast float* %182 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %183, i32 4, <8 x i1> %177), !tbaa !12, !llvm.access.group !16 - %184 = getelementptr inbounds float, float* %182, i64 8 - %185 = bitcast float* %184 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %185, i32 4, <8 x i1> %178), !tbaa !12, !llvm.access.group !16 - %186 = or <8 x i64> %broadcast.splat, - %187 = trunc <8 x i64> %186 to <8 x i32> - %188 = trunc i64 %mul.i.i.i to i32 - %189 = or i32 %188, 8 - %190 = insertelement <8 x i32> undef, i32 %189, i64 0 - %191 = shufflevector <8 x i32> %190, <8 x i32> undef, <8 x i32> zeroinitializer - %192 = or <8 x i32> %191, - %193 = icmp sgt <8 x i32> %broadcast.splat12, %187 - %194 = icmp sgt <8 x i32> %broadcast.splat14, %192 - %195 = extractelement <8 x i64> %186, i32 0 - %196 = shl i64 %195, 32 - %197 = ashr exact i64 %196, 32 - %198 = getelementptr inbounds float, float* %16, i64 %197 - %199 = bitcast float* %198 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %199, i32 4, <8 x i1> %193), !tbaa !12, !llvm.access.group !16 - %200 = getelementptr inbounds float, float* %198, i64 8 - %201 = bitcast float* %200 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %201, i32 4, <8 x i1> %194), !tbaa !12, !llvm.access.group !16 - %202 = or <8 x i64> %broadcast.splat, - %203 = trunc <8 x i64> %202 to <8 x i32> - %204 = trunc i64 %mul.i.i.i to i32 - %205 = or i32 %204, 8 - %206 = insertelement <8 x i32> undef, i32 %205, i64 0 - %207 = shufflevector <8 x i32> %206, <8 x i32> undef, <8 x i32> zeroinitializer - %208 = or <8 x i32> %207, - %209 = icmp sgt <8 x i32> %broadcast.splat12, %203 - %210 = icmp sgt <8 x i32> %broadcast.splat14, %208 - %211 = extractelement <8 x i64> %202, i32 0 - %212 = shl i64 %211, 32 - %213 = ashr exact i64 %212, 32 - %214 = getelementptr inbounds float, float* %16, i64 %213 - %215 = bitcast float* %214 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %215, i32 4, <8 x i1> %209), !tbaa !12, !llvm.access.group !16 - %216 = getelementptr inbounds float, float* %214, i64 8 - %217 = bitcast float* %216 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %217, i32 4, <8 x i1> %210), !tbaa !12, !llvm.access.group !16 - %218 = or <8 x i64> %broadcast.splat, - %219 = trunc <8 x i64> %218 to <8 x i32> - %220 = trunc i64 %mul.i.i.i to i32 - %221 = or i32 %220, 8 - %222 = insertelement <8 x i32> undef, i32 %221, i64 0 - %223 = shufflevector <8 x i32> %222, <8 x i32> undef, <8 x i32> zeroinitializer - %224 = or <8 x i32> %223, - %225 = icmp sgt <8 x i32> %broadcast.splat12, %219 - %226 = icmp sgt <8 x i32> %broadcast.splat14, %224 - %227 = extractelement <8 x i64> %218, i32 0 - %228 = shl i64 %227, 32 - %229 = ashr exact i64 %228, 32 - %230 = getelementptr inbounds float, float* %16, i64 %229 - %231 = bitcast float* %230 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %231, i32 4, <8 x i1> %225), !tbaa !12, !llvm.access.group !16 - %232 = getelementptr inbounds float, float* %230, i64 8 - %233 = bitcast float* %232 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %233, i32 4, <8 x i1> %226), !tbaa !12, !llvm.access.group !16 - %234 = or <8 x i64> %broadcast.splat, - %235 = trunc <8 x i64> %234 to <8 x i32> - %236 = trunc i64 %mul.i.i.i to i32 - %237 = or i32 %236, 8 - %238 = insertelement <8 x i32> undef, i32 %237, i64 0 - %239 = shufflevector <8 x i32> %238, <8 x i32> undef, <8 x i32> zeroinitializer - %240 = or <8 x i32> %239, - %241 = icmp sgt <8 x i32> %broadcast.splat12, %235 - %242 = icmp sgt <8 x i32> %broadcast.splat14, %240 - %243 = extractelement <8 x i64> %234, i32 0 - %244 = shl i64 %243, 32 - %245 = ashr exact i64 %244, 32 - %246 = getelementptr inbounds float, float* %16, i64 %245 - %247 = bitcast float* %246 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %247, i32 4, <8 x i1> %241), !tbaa !12, !llvm.access.group !16 - %248 = getelementptr inbounds float, float* %246, i64 8 - %249 = bitcast float* %248 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %249, i32 4, <8 x i1> %242), !tbaa !12, !llvm.access.group !16 - %250 = or <8 x i64> %broadcast.splat, - %251 = trunc <8 x i64> %250 to <8 x i32> - %252 = trunc i64 %mul.i.i.i to i32 - %253 = or i32 %252, 8 - %254 = insertelement <8 x i32> undef, i32 %253, i64 0 - %255 = shufflevector <8 x i32> %254, <8 x i32> undef, <8 x i32> zeroinitializer - %256 = or <8 x i32> %255, - %257 = icmp sgt <8 x i32> %broadcast.splat12, %251 - %258 = icmp sgt <8 x i32> %broadcast.splat14, %256 - %259 = extractelement <8 x i64> %250, i32 0 - %260 = shl i64 %259, 32 - %261 = ashr exact i64 %260, 32 - %262 = getelementptr inbounds float, float* %16, i64 %261 - %263 = bitcast float* %262 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %263, i32 4, <8 x i1> %257), !tbaa !12, !llvm.access.group !16 - %264 = getelementptr inbounds float, float* %262, i64 8 - %265 = bitcast float* %264 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %265, i32 4, <8 x i1> %258), !tbaa !12, !llvm.access.group !16 - %266 = or <8 x i64> %broadcast.splat, - %267 = trunc <8 x i64> %266 to <8 x i32> - %268 = trunc i64 %mul.i.i.i to i32 - %269 = or i32 %268, 8 - %270 = insertelement <8 x i32> undef, i32 %269, i64 0 - %271 = shufflevector <8 x i32> %270, <8 x i32> undef, <8 x i32> zeroinitializer - %272 = or <8 x i32> %271, - %273 = icmp sgt <8 x i32> %broadcast.splat12, %267 - %274 = icmp sgt <8 x i32> %broadcast.splat14, %272 - %275 = extractelement <8 x i64> %266, i32 0 - %276 = shl i64 %275, 32 - %277 = ashr exact i64 %276, 32 - %278 = getelementptr inbounds float, float* %16, i64 %277 - %279 = bitcast float* %278 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %279, i32 4, <8 x i1> %273), !tbaa !12, !llvm.access.group !16 - %280 = getelementptr inbounds float, float* %278, i64 8 - %281 = bitcast float* %280 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %281, i32 4, <8 x i1> %274), !tbaa !12, !llvm.access.group !16 - br label %_pocl_kernel_bicgKernel2.exit - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.i.i.us.1, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %295, %if.end.i.i.us.1 ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %24, %conv.i.i.us - br i1 %cmp.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom.i.i.us = ashr exact i64 %sext.i.i.us, 32 - %arrayidx.i.i.us = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us - store float 0.000000e+00, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %if.then.i.i.us - %indvars.iv.next.i.i3.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %if.then.i.i.us ] - %282 = phi float [ %287, %for.body.i.i.us ], [ 0.000000e+00, %if.then.i.i.us ] - %283 = mul nsw i64 %indvars.iv.next.i.i3.us, %25 - %284 = add nsw i64 %283, %idxprom.i.i.us - %arrayidx5.i.i.us = getelementptr inbounds float, float* %8, i64 %284 - %285 = load float, float* %arrayidx5.i.i.us, align 4, !tbaa !12 - %arrayidx7.i.i.us = getelementptr inbounds float, float* %12, i64 %indvars.iv.next.i.i3.us - %286 = load float, float* %arrayidx7.i.i.us, align 4, !tbaa !12 - %287 = tail call float @llvm.fmuladd.f32(float %285, float %286, float %282) #2 - store float %287, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i3.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us, label %if.end.i.i.us.loopexit, label %for.body.i.i.us, !llvm.loop !18 - -if.end.i.i.us.loopexit: ; preds = %for.body.i.i.us - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.end.i.i.us.loopexit, %pregion_for_entry.entry.i.i.us - %288 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1 = add nuw nsw i64 %288, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp.i.i.us.1 = icmp sgt i32 %24, %conv.i.i.us.1 - br i1 %cmp.i.i.us.1, label %if.then.i.i.us.1, label %if.end.i.i.us.1 - -_pocl_kernel_bicgKernel2.exit.loopexit: ; preds = %if.end.i.i.us.1 - br label %_pocl_kernel_bicgKernel2.exit - -_pocl_kernel_bicgKernel2.exit: ; preds = %_pocl_kernel_bicgKernel2.exit.loopexit, %vector.ph - ret void - -if.then.i.i.us.1: ; preds = %if.end.i.i.us - %sext.i.i.us.1 = shl i64 %add1.i.i.i.us.1, 32 - %idxprom.i.i.us.1 = ashr exact i64 %sext.i.i.us.1, 32 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %for.body.i.i.us.1 - -for.body.i.i.us.1: ; preds = %for.body.i.i.us.1, %if.then.i.i.us.1 - %indvars.iv.next.i.i3.us.1 = phi i64 [ %indvars.iv.next.i.i.us.1, %for.body.i.i.us.1 ], [ 0, %if.then.i.i.us.1 ] - %289 = phi float [ %294, %for.body.i.i.us.1 ], [ 0.000000e+00, %if.then.i.i.us.1 ] - %290 = mul nsw i64 %indvars.iv.next.i.i3.us.1, %25 - %291 = add nsw i64 %290, %idxprom.i.i.us.1 - %arrayidx5.i.i.us.1 = getelementptr inbounds float, float* %8, i64 %291 - %292 = load float, float* %arrayidx5.i.i.us.1, align 4, !tbaa !12 - %arrayidx7.i.i.us.1 = getelementptr inbounds float, float* %12, i64 %indvars.iv.next.i.i3.us.1 - %293 = load float, float* %arrayidx7.i.i.us.1, align 4, !tbaa !12 - %294 = tail call float @llvm.fmuladd.f32(float %292, float %293, float %289) #2 - store float %294, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.1, 1 - %exitcond.not.i.i.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.1, label %if.end.i.i.us.1.loopexit, label %for.body.i.i.us.1, !llvm.loop !18 - -if.end.i.i.us.1.loopexit: ; preds = %for.body.i.i.us.1 - br label %if.end.i.i.us.1 - -if.end.i.i.us.1: ; preds = %if.end.i.i.us.1.loopexit, %if.end.i.i.us - %295 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %295, 256 - br i1 %exitcond.not.1, label %_pocl_kernel_bicgKernel2.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !20 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_bicgKernel2_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to float** - %13 = load float*, float** %12, align 8 - %14 = getelementptr i8*, i8** %0, i64 3 - %15 = bitcast i8** %14 to i32** - %16 = load i32*, i32** %15, align 8 - %17 = load i32, i32* %16, align 4 - %18 = getelementptr i8*, i8** %0, i64 4 - %19 = bitcast i8** %18 to i32** - %20 = load i32*, i32** %19, align 8 - %21 = load i32, i32* %20, align 4 - %mul.i.i.i = shl i64 %2, 8 - %cmp221.i.i = icmp sgt i32 %17, 0 - %22 = sext i32 %21 to i64 - %wide.trip.count.i.i = zext i32 %17 to i64 - br i1 %cmp221.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.ph - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -vector.ph: ; preds = %5 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert11 = insertelement <8 x i32> undef, i32 %21, i32 0 - %broadcast.splat12 = shufflevector <8 x i32> %broadcast.splatinsert11, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert13 = insertelement <8 x i32> undef, i32 %21, i32 0 - %broadcast.splat14 = shufflevector <8 x i32> %broadcast.splatinsert13, <8 x i32> undef, <8 x i32> zeroinitializer - %23 = or <8 x i64> %broadcast.splat, - %24 = trunc <8 x i64> %23 to <8 x i32> - %25 = trunc i64 %mul.i.i.i to i32 - %26 = or i32 %25, 8 - %27 = insertelement <8 x i32> undef, i32 %26, i64 0 - %28 = shufflevector <8 x i32> %27, <8 x i32> undef, <8 x i32> zeroinitializer - %29 = or <8 x i32> %28, - %30 = icmp sgt <8 x i32> %broadcast.splat12, %24 - %31 = icmp sgt <8 x i32> %broadcast.splat14, %29 - %32 = extractelement <8 x i64> %23, i32 0 - %33 = shl i64 %32, 32 - %34 = ashr exact i64 %33, 32 - %35 = getelementptr inbounds float, float* %13, i64 %34 - %36 = bitcast float* %35 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %36, i32 4, <8 x i1> %30), !tbaa !12, !llvm.access.group !16 - %37 = getelementptr inbounds float, float* %35, i64 8 - %38 = bitcast float* %37 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %38, i32 4, <8 x i1> %31), !tbaa !12, !llvm.access.group !16 - %39 = or <8 x i64> %broadcast.splat, - %40 = trunc <8 x i64> %39 to <8 x i32> - %41 = trunc i64 %mul.i.i.i to i32 - %42 = or i32 %41, 8 - %43 = insertelement <8 x i32> undef, i32 %42, i64 0 - %44 = shufflevector <8 x i32> %43, <8 x i32> undef, <8 x i32> zeroinitializer - %45 = or <8 x i32> %44, - %46 = icmp sgt <8 x i32> %broadcast.splat12, %40 - %47 = icmp sgt <8 x i32> %broadcast.splat14, %45 - %48 = extractelement <8 x i64> %39, i32 0 - %49 = shl i64 %48, 32 - %50 = ashr exact i64 %49, 32 - %51 = getelementptr inbounds float, float* %13, i64 %50 - %52 = bitcast float* %51 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %52, i32 4, <8 x i1> %46), !tbaa !12, !llvm.access.group !16 - %53 = getelementptr inbounds float, float* %51, i64 8 - %54 = bitcast float* %53 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %54, i32 4, <8 x i1> %47), !tbaa !12, !llvm.access.group !16 - %55 = or <8 x i64> %broadcast.splat, - %56 = trunc <8 x i64> %55 to <8 x i32> - %57 = trunc i64 %mul.i.i.i to i32 - %58 = or i32 %57, 8 - %59 = insertelement <8 x i32> undef, i32 %58, i64 0 - %60 = shufflevector <8 x i32> %59, <8 x i32> undef, <8 x i32> zeroinitializer - %61 = or <8 x i32> %60, - %62 = icmp sgt <8 x i32> %broadcast.splat12, %56 - %63 = icmp sgt <8 x i32> %broadcast.splat14, %61 - %64 = extractelement <8 x i64> %55, i32 0 - %65 = shl i64 %64, 32 - %66 = ashr exact i64 %65, 32 - %67 = getelementptr inbounds float, float* %13, i64 %66 - %68 = bitcast float* %67 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %68, i32 4, <8 x i1> %62), !tbaa !12, !llvm.access.group !16 - %69 = getelementptr inbounds float, float* %67, i64 8 - %70 = bitcast float* %69 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %70, i32 4, <8 x i1> %63), !tbaa !12, !llvm.access.group !16 - %71 = or <8 x i64> %broadcast.splat, - %72 = trunc <8 x i64> %71 to <8 x i32> - %73 = trunc i64 %mul.i.i.i to i32 - %74 = or i32 %73, 8 - %75 = insertelement <8 x i32> undef, i32 %74, i64 0 - %76 = shufflevector <8 x i32> %75, <8 x i32> undef, <8 x i32> zeroinitializer - %77 = or <8 x i32> %76, - %78 = icmp sgt <8 x i32> %broadcast.splat12, %72 - %79 = icmp sgt <8 x i32> %broadcast.splat14, %77 - %80 = extractelement <8 x i64> %71, i32 0 - %81 = shl i64 %80, 32 - %82 = ashr exact i64 %81, 32 - %83 = getelementptr inbounds float, float* %13, i64 %82 - %84 = bitcast float* %83 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %84, i32 4, <8 x i1> %78), !tbaa !12, !llvm.access.group !16 - %85 = getelementptr inbounds float, float* %83, i64 8 - %86 = bitcast float* %85 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %86, i32 4, <8 x i1> %79), !tbaa !12, !llvm.access.group !16 - %87 = or <8 x i64> %broadcast.splat, - %88 = trunc <8 x i64> %87 to <8 x i32> - %89 = trunc i64 %mul.i.i.i to i32 - %90 = or i32 %89, 8 - %91 = insertelement <8 x i32> undef, i32 %90, i64 0 - %92 = shufflevector <8 x i32> %91, <8 x i32> undef, <8 x i32> zeroinitializer - %93 = or <8 x i32> %92, - %94 = icmp sgt <8 x i32> %broadcast.splat12, %88 - %95 = icmp sgt <8 x i32> %broadcast.splat14, %93 - %96 = extractelement <8 x i64> %87, i32 0 - %97 = shl i64 %96, 32 - %98 = ashr exact i64 %97, 32 - %99 = getelementptr inbounds float, float* %13, i64 %98 - %100 = bitcast float* %99 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %100, i32 4, <8 x i1> %94), !tbaa !12, !llvm.access.group !16 - %101 = getelementptr inbounds float, float* %99, i64 8 - %102 = bitcast float* %101 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %102, i32 4, <8 x i1> %95), !tbaa !12, !llvm.access.group !16 - %103 = or <8 x i64> %broadcast.splat, - %104 = trunc <8 x i64> %103 to <8 x i32> - %105 = trunc i64 %mul.i.i.i to i32 - %106 = or i32 %105, 8 - %107 = insertelement <8 x i32> undef, i32 %106, i64 0 - %108 = shufflevector <8 x i32> %107, <8 x i32> undef, <8 x i32> zeroinitializer - %109 = or <8 x i32> %108, - %110 = icmp sgt <8 x i32> %broadcast.splat12, %104 - %111 = icmp sgt <8 x i32> %broadcast.splat14, %109 - %112 = extractelement <8 x i64> %103, i32 0 - %113 = shl i64 %112, 32 - %114 = ashr exact i64 %113, 32 - %115 = getelementptr inbounds float, float* %13, i64 %114 - %116 = bitcast float* %115 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %116, i32 4, <8 x i1> %110), !tbaa !12, !llvm.access.group !16 - %117 = getelementptr inbounds float, float* %115, i64 8 - %118 = bitcast float* %117 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %118, i32 4, <8 x i1> %111), !tbaa !12, !llvm.access.group !16 - %119 = or <8 x i64> %broadcast.splat, - %120 = trunc <8 x i64> %119 to <8 x i32> - %121 = trunc i64 %mul.i.i.i to i32 - %122 = or i32 %121, 8 - %123 = insertelement <8 x i32> undef, i32 %122, i64 0 - %124 = shufflevector <8 x i32> %123, <8 x i32> undef, <8 x i32> zeroinitializer - %125 = or <8 x i32> %124, - %126 = icmp sgt <8 x i32> %broadcast.splat12, %120 - %127 = icmp sgt <8 x i32> %broadcast.splat14, %125 - %128 = extractelement <8 x i64> %119, i32 0 - %129 = shl i64 %128, 32 - %130 = ashr exact i64 %129, 32 - %131 = getelementptr inbounds float, float* %13, i64 %130 - %132 = bitcast float* %131 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %132, i32 4, <8 x i1> %126), !tbaa !12, !llvm.access.group !16 - %133 = getelementptr inbounds float, float* %131, i64 8 - %134 = bitcast float* %133 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %134, i32 4, <8 x i1> %127), !tbaa !12, !llvm.access.group !16 - %135 = or <8 x i64> %broadcast.splat, - %136 = trunc <8 x i64> %135 to <8 x i32> - %137 = trunc i64 %mul.i.i.i to i32 - %138 = or i32 %137, 8 - %139 = insertelement <8 x i32> undef, i32 %138, i64 0 - %140 = shufflevector <8 x i32> %139, <8 x i32> undef, <8 x i32> zeroinitializer - %141 = or <8 x i32> %140, - %142 = icmp sgt <8 x i32> %broadcast.splat12, %136 - %143 = icmp sgt <8 x i32> %broadcast.splat14, %141 - %144 = extractelement <8 x i64> %135, i32 0 - %145 = shl i64 %144, 32 - %146 = ashr exact i64 %145, 32 - %147 = getelementptr inbounds float, float* %13, i64 %146 - %148 = bitcast float* %147 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %148, i32 4, <8 x i1> %142), !tbaa !12, !llvm.access.group !16 - %149 = getelementptr inbounds float, float* %147, i64 8 - %150 = bitcast float* %149 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %150, i32 4, <8 x i1> %143), !tbaa !12, !llvm.access.group !16 - %151 = or <8 x i64> %broadcast.splat, - %152 = trunc <8 x i64> %151 to <8 x i32> - %153 = trunc i64 %mul.i.i.i to i32 - %154 = or i32 %153, 8 - %155 = insertelement <8 x i32> undef, i32 %154, i64 0 - %156 = shufflevector <8 x i32> %155, <8 x i32> undef, <8 x i32> zeroinitializer - %157 = or <8 x i32> %156, - %158 = icmp sgt <8 x i32> %broadcast.splat12, %152 - %159 = icmp sgt <8 x i32> %broadcast.splat14, %157 - %160 = extractelement <8 x i64> %151, i32 0 - %161 = shl i64 %160, 32 - %162 = ashr exact i64 %161, 32 - %163 = getelementptr inbounds float, float* %13, i64 %162 - %164 = bitcast float* %163 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %164, i32 4, <8 x i1> %158), !tbaa !12, !llvm.access.group !16 - %165 = getelementptr inbounds float, float* %163, i64 8 - %166 = bitcast float* %165 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %166, i32 4, <8 x i1> %159), !tbaa !12, !llvm.access.group !16 - %167 = or <8 x i64> %broadcast.splat, - %168 = trunc <8 x i64> %167 to <8 x i32> - %169 = trunc i64 %mul.i.i.i to i32 - %170 = or i32 %169, 8 - %171 = insertelement <8 x i32> undef, i32 %170, i64 0 - %172 = shufflevector <8 x i32> %171, <8 x i32> undef, <8 x i32> zeroinitializer - %173 = or <8 x i32> %172, - %174 = icmp sgt <8 x i32> %broadcast.splat12, %168 - %175 = icmp sgt <8 x i32> %broadcast.splat14, %173 - %176 = extractelement <8 x i64> %167, i32 0 - %177 = shl i64 %176, 32 - %178 = ashr exact i64 %177, 32 - %179 = getelementptr inbounds float, float* %13, i64 %178 - %180 = bitcast float* %179 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %180, i32 4, <8 x i1> %174), !tbaa !12, !llvm.access.group !16 - %181 = getelementptr inbounds float, float* %179, i64 8 - %182 = bitcast float* %181 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %182, i32 4, <8 x i1> %175), !tbaa !12, !llvm.access.group !16 - %183 = or <8 x i64> %broadcast.splat, - %184 = trunc <8 x i64> %183 to <8 x i32> - %185 = trunc i64 %mul.i.i.i to i32 - %186 = or i32 %185, 8 - %187 = insertelement <8 x i32> undef, i32 %186, i64 0 - %188 = shufflevector <8 x i32> %187, <8 x i32> undef, <8 x i32> zeroinitializer - %189 = or <8 x i32> %188, - %190 = icmp sgt <8 x i32> %broadcast.splat12, %184 - %191 = icmp sgt <8 x i32> %broadcast.splat14, %189 - %192 = extractelement <8 x i64> %183, i32 0 - %193 = shl i64 %192, 32 - %194 = ashr exact i64 %193, 32 - %195 = getelementptr inbounds float, float* %13, i64 %194 - %196 = bitcast float* %195 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %196, i32 4, <8 x i1> %190), !tbaa !12, !llvm.access.group !16 - %197 = getelementptr inbounds float, float* %195, i64 8 - %198 = bitcast float* %197 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %198, i32 4, <8 x i1> %191), !tbaa !12, !llvm.access.group !16 - %199 = or <8 x i64> %broadcast.splat, - %200 = trunc <8 x i64> %199 to <8 x i32> - %201 = trunc i64 %mul.i.i.i to i32 - %202 = or i32 %201, 8 - %203 = insertelement <8 x i32> undef, i32 %202, i64 0 - %204 = shufflevector <8 x i32> %203, <8 x i32> undef, <8 x i32> zeroinitializer - %205 = or <8 x i32> %204, - %206 = icmp sgt <8 x i32> %broadcast.splat12, %200 - %207 = icmp sgt <8 x i32> %broadcast.splat14, %205 - %208 = extractelement <8 x i64> %199, i32 0 - %209 = shl i64 %208, 32 - %210 = ashr exact i64 %209, 32 - %211 = getelementptr inbounds float, float* %13, i64 %210 - %212 = bitcast float* %211 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %212, i32 4, <8 x i1> %206), !tbaa !12, !llvm.access.group !16 - %213 = getelementptr inbounds float, float* %211, i64 8 - %214 = bitcast float* %213 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %214, i32 4, <8 x i1> %207), !tbaa !12, !llvm.access.group !16 - %215 = or <8 x i64> %broadcast.splat, - %216 = trunc <8 x i64> %215 to <8 x i32> - %217 = trunc i64 %mul.i.i.i to i32 - %218 = or i32 %217, 8 - %219 = insertelement <8 x i32> undef, i32 %218, i64 0 - %220 = shufflevector <8 x i32> %219, <8 x i32> undef, <8 x i32> zeroinitializer - %221 = or <8 x i32> %220, - %222 = icmp sgt <8 x i32> %broadcast.splat12, %216 - %223 = icmp sgt <8 x i32> %broadcast.splat14, %221 - %224 = extractelement <8 x i64> %215, i32 0 - %225 = shl i64 %224, 32 - %226 = ashr exact i64 %225, 32 - %227 = getelementptr inbounds float, float* %13, i64 %226 - %228 = bitcast float* %227 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %228, i32 4, <8 x i1> %222), !tbaa !12, !llvm.access.group !16 - %229 = getelementptr inbounds float, float* %227, i64 8 - %230 = bitcast float* %229 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %230, i32 4, <8 x i1> %223), !tbaa !12, !llvm.access.group !16 - %231 = or <8 x i64> %broadcast.splat, - %232 = trunc <8 x i64> %231 to <8 x i32> - %233 = trunc i64 %mul.i.i.i to i32 - %234 = or i32 %233, 8 - %235 = insertelement <8 x i32> undef, i32 %234, i64 0 - %236 = shufflevector <8 x i32> %235, <8 x i32> undef, <8 x i32> zeroinitializer - %237 = or <8 x i32> %236, - %238 = icmp sgt <8 x i32> %broadcast.splat12, %232 - %239 = icmp sgt <8 x i32> %broadcast.splat14, %237 - %240 = extractelement <8 x i64> %231, i32 0 - %241 = shl i64 %240, 32 - %242 = ashr exact i64 %241, 32 - %243 = getelementptr inbounds float, float* %13, i64 %242 - %244 = bitcast float* %243 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %244, i32 4, <8 x i1> %238), !tbaa !12, !llvm.access.group !16 - %245 = getelementptr inbounds float, float* %243, i64 8 - %246 = bitcast float* %245 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %246, i32 4, <8 x i1> %239), !tbaa !12, !llvm.access.group !16 - %247 = or <8 x i64> %broadcast.splat, - %248 = trunc <8 x i64> %247 to <8 x i32> - %249 = trunc i64 %mul.i.i.i to i32 - %250 = or i32 %249, 8 - %251 = insertelement <8 x i32> undef, i32 %250, i64 0 - %252 = shufflevector <8 x i32> %251, <8 x i32> undef, <8 x i32> zeroinitializer - %253 = or <8 x i32> %252, - %254 = icmp sgt <8 x i32> %broadcast.splat12, %248 - %255 = icmp sgt <8 x i32> %broadcast.splat14, %253 - %256 = extractelement <8 x i64> %247, i32 0 - %257 = shl i64 %256, 32 - %258 = ashr exact i64 %257, 32 - %259 = getelementptr inbounds float, float* %13, i64 %258 - %260 = bitcast float* %259 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %260, i32 4, <8 x i1> %254), !tbaa !12, !llvm.access.group !16 - %261 = getelementptr inbounds float, float* %259, i64 8 - %262 = bitcast float* %261 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %262, i32 4, <8 x i1> %255), !tbaa !12, !llvm.access.group !16 - %263 = or <8 x i64> %broadcast.splat, - %264 = trunc <8 x i64> %263 to <8 x i32> - %265 = trunc i64 %mul.i.i.i to i32 - %266 = or i32 %265, 8 - %267 = insertelement <8 x i32> undef, i32 %266, i64 0 - %268 = shufflevector <8 x i32> %267, <8 x i32> undef, <8 x i32> zeroinitializer - %269 = or <8 x i32> %268, - %270 = icmp sgt <8 x i32> %broadcast.splat12, %264 - %271 = icmp sgt <8 x i32> %broadcast.splat14, %269 - %272 = extractelement <8 x i64> %263, i32 0 - %273 = shl i64 %272, 32 - %274 = ashr exact i64 %273, 32 - %275 = getelementptr inbounds float, float* %13, i64 %274 - %276 = bitcast float* %275 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %276, i32 4, <8 x i1> %270), !tbaa !12, !llvm.access.group !16 - %277 = getelementptr inbounds float, float* %275, i64 8 - %278 = bitcast float* %277 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %278, i32 4, <8 x i1> %271), !tbaa !12, !llvm.access.group !16 - br label %_pocl_kernel_bicgKernel2.exit - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.i.i.us.1, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %292, %if.end.i.i.us.1 ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %21, %conv.i.i.us - br i1 %cmp.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom.i.i.us = ashr exact i64 %sext.i.i.us, 32 - %arrayidx.i.i.us = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us - store float 0.000000e+00, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %if.then.i.i.us - %indvars.iv.next.i.i3.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %if.then.i.i.us ] - %279 = phi float [ %284, %for.body.i.i.us ], [ 0.000000e+00, %if.then.i.i.us ] - %280 = mul nsw i64 %indvars.iv.next.i.i3.us, %22 - %281 = add nsw i64 %280, %idxprom.i.i.us - %arrayidx5.i.i.us = getelementptr inbounds float, float* %7, i64 %281 - %282 = load float, float* %arrayidx5.i.i.us, align 4, !tbaa !12 - %arrayidx7.i.i.us = getelementptr inbounds float, float* %10, i64 %indvars.iv.next.i.i3.us - %283 = load float, float* %arrayidx7.i.i.us, align 4, !tbaa !12 - %284 = tail call float @llvm.fmuladd.f32(float %282, float %283, float %279) #2 - store float %284, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i3.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us, label %if.end.i.i.us.loopexit, label %for.body.i.i.us, !llvm.loop !18 - -if.end.i.i.us.loopexit: ; preds = %for.body.i.i.us - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.end.i.i.us.loopexit, %pregion_for_entry.entry.i.i.us - %285 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1 = add nuw nsw i64 %285, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp.i.i.us.1 = icmp sgt i32 %21, %conv.i.i.us.1 - br i1 %cmp.i.i.us.1, label %if.then.i.i.us.1, label %if.end.i.i.us.1 - -_pocl_kernel_bicgKernel2.exit.loopexit: ; preds = %if.end.i.i.us.1 - br label %_pocl_kernel_bicgKernel2.exit - -_pocl_kernel_bicgKernel2.exit: ; preds = %_pocl_kernel_bicgKernel2.exit.loopexit, %vector.ph - ret void - -if.then.i.i.us.1: ; preds = %if.end.i.i.us - %sext.i.i.us.1 = shl i64 %add1.i.i.i.us.1, 32 - %idxprom.i.i.us.1 = ashr exact i64 %sext.i.i.us.1, 32 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %for.body.i.i.us.1 - -for.body.i.i.us.1: ; preds = %for.body.i.i.us.1, %if.then.i.i.us.1 - %indvars.iv.next.i.i3.us.1 = phi i64 [ %indvars.iv.next.i.i.us.1, %for.body.i.i.us.1 ], [ 0, %if.then.i.i.us.1 ] - %286 = phi float [ %291, %for.body.i.i.us.1 ], [ 0.000000e+00, %if.then.i.i.us.1 ] - %287 = mul nsw i64 %indvars.iv.next.i.i3.us.1, %22 - %288 = add nsw i64 %287, %idxprom.i.i.us.1 - %arrayidx5.i.i.us.1 = getelementptr inbounds float, float* %7, i64 %288 - %289 = load float, float* %arrayidx5.i.i.us.1, align 4, !tbaa !12 - %arrayidx7.i.i.us.1 = getelementptr inbounds float, float* %10, i64 %indvars.iv.next.i.i3.us.1 - %290 = load float, float* %arrayidx7.i.i.us.1, align 4, !tbaa !12 - %291 = tail call float @llvm.fmuladd.f32(float %289, float %290, float %286) #2 - store float %291, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.1, 1 - %exitcond.not.i.i.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.1, label %if.end.i.i.us.1.loopexit, label %for.body.i.i.us.1, !llvm.loop !18 - -if.end.i.i.us.1.loopexit: ; preds = %for.body.i.i.us.1 - br label %if.end.i.i.us.1 - -if.end.i.i.us.1: ; preds = %if.end.i.i.us.1.loopexit, %if.end.i.i.us - %292 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %292, 256 - br i1 %exitcond.not.1, label %_pocl_kernel_bicgKernel2.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !20 -} - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>) #3 - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } -attributes #3 = { argmemonly nounwind willreturn } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"int", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"int", !"int"} -!9 = !{!"", !"", !"", !"", !""} -!10 = !{!"A", !"r", !"s", !"nx", !"ny"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{} -!18 = distinct !{!18, !19} -!19 = !{!"llvm.loop.unroll.disable"} -!20 = distinct !{!20, !21} -!21 = !{!"llvm.loop.parallel_accesses", !17} diff --git a/pocl_irs/correlation_corr.ll b/pocl_irs/correlation_corr.ll deleted file mode 100644 index 22bc19f..0000000 --- a/pocl_irs/correlation_corr.ll +++ /dev/null @@ -1,485 +0,0 @@ -; ModuleID = './KK/KIGILFKIOCMFGODNJKKCPJJIDHNBMKHCFFGPF/corr_kernel/256-1-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_corr_kernel(float* nocapture %0, float* nocapture readonly %1, i32 %2, i32 %3, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %4, i64 %5, i64 %6, i64 %7) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %5, 8 - %sub.i = add nsw i32 %2, -1 - %cmp662.i = icmp sgt i32 %3, 0 - %9 = sext i32 %2 to i64 - %wide.trip.count.i = zext i32 %3 to i64 - br i1 %cmp662.i, label %pregion_for_entry.entry.i.us.preheader, label %pregion_for_entry.entry.i.preheader - -pregion_for_entry.entry.i.preheader: ; preds = %8 - br label %pregion_for_entry.entry.i - -pregion_for_entry.entry.i.us.preheader: ; preds = %8 - br label %pregion_for_entry.entry.i.us - -pregion_for_entry.entry.i.us: ; preds = %if.end.i.us, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ %23, %if.end.i.us ], [ 0, %pregion_for_entry.entry.i.us.preheader ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp.i.us = icmp sgt i32 %sub.i, %conv.i.us - br i1 %cmp.i.us, label %if.then.i.us, label %if.end.i.us - -if.then.i.us: ; preds = %pregion_for_entry.entry.i.us - %mul.i.us = mul nsw i32 %conv.i.us, %2 - %add.i.us = add nsw i32 %mul.i.us, %conv.i.us - %idxprom.i.us = sext i32 %add.i.us to i64 - %arrayidx.i.us = getelementptr inbounds float, float* %0, i64 %idxprom.i.us - store float 1.000000e+00, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %j2.064.i.us = add nsw i32 %conv.i.us, 1 - %cmp365.i.us = icmp slt i32 %j2.064.i.us, %2 - br i1 %cmp365.i.us, label %for.cond5.preheader.lr.ph.i.us, label %if.end.i.us - -for.cond5.preheader.lr.ph.i.us: ; preds = %if.then.i.us - %sext.i.us = shl i64 %add1.i.i.us, 32 - %10 = ashr exact i64 %sext.i.us, 32 - %11 = add nsw i64 %10, 1 - %12 = sext i32 %mul.i.us to i64 - br label %for.cond5.preheader.us.i.us - -for.cond5.preheader.us.i.us: ; preds = %for.end.loopexit.us.i.us, %for.cond5.preheader.lr.ph.i.us - %indvars.iv.next71.us.i9.us = phi i64 [ %indvars.iv.next71.us.i.us, %for.end.loopexit.us.i.us ], [ %11, %for.cond5.preheader.lr.ph.i.us ] - %13 = add nsw i64 %indvars.iv.next71.us.i9.us, %12 - %arrayidx21.us.i.us = getelementptr inbounds float, float* %0, i64 %13 - %.pre.us.i1.us11 = load float, float* %arrayidx21.us.i.us, align 4, !tbaa !12 - br label %for.body8.us.i.us - -for.body8.us.i.us: ; preds = %for.body8.us.i.us, %for.cond5.preheader.us.i.us - %indvars.iv.next.us.i7.us = phi i64 [ %indvars.iv.next.us.i.us, %for.body8.us.i.us ], [ 0, %for.cond5.preheader.us.i.us ] - %14 = phi float [ %20, %for.body8.us.i.us ], [ %.pre.us.i1.us11, %for.cond5.preheader.us.i.us ] - %15 = mul nsw i64 %indvars.iv.next.us.i7.us, %9 - %16 = add nsw i64 %15, %10 - %arrayidx12.us.i.us = getelementptr inbounds float, float* %1, i64 %16 - %17 = load float, float* %arrayidx12.us.i.us, align 4, !tbaa !12 - %18 = add nsw i64 %15, %indvars.iv.next71.us.i9.us - %arrayidx16.us.i.us = getelementptr inbounds float, float* %1, i64 %18 - %19 = load float, float* %arrayidx16.us.i.us, align 4, !tbaa !12 - %20 = tail call float @llvm.fmuladd.f32(float %17, float %19, float %14) #2 - store float %20, float* %arrayidx21.us.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.us.i.us = add nuw nsw i64 %indvars.iv.next.us.i7.us, 1 - %exitcond.not.us.i.us = icmp eq i64 %indvars.iv.next.us.i.us, %wide.trip.count.i - br i1 %exitcond.not.us.i.us, label %for.end.loopexit.us.i.us, label %for.body8.us.i.us, !llvm.loop !18 - -for.end.loopexit.us.i.us: ; preds = %for.body8.us.i.us - %.lcssa = phi float [ %20, %for.body8.us.i.us ] - %21 = mul nsw i64 %indvars.iv.next71.us.i9.us, %9 - %22 = add nsw i64 %21, %10 - %arrayidx29.us.i.us = getelementptr inbounds float, float* %0, i64 %22 - store float %.lcssa, float* %arrayidx29.us.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next71.us.i.us = add nsw i64 %indvars.iv.next71.us.i9.us, 1 - %lftr.wideiv.us.i.us = trunc i64 %indvars.iv.next71.us.i.us to i32 - %exitcond76.not.us.i.us = icmp eq i32 %lftr.wideiv.us.i.us, %2 - br i1 %exitcond76.not.us.i.us, label %if.end.i.us.loopexit, label %for.cond5.preheader.us.i.us, !llvm.loop !20 - -if.end.i.us.loopexit: ; preds = %for.end.loopexit.us.i.us - br label %if.end.i.us - -if.end.i.us: ; preds = %if.end.i.us.loopexit, %if.then.i.us, %pregion_for_entry.entry.i.us - %23 = add nuw nsw i64 %_local_id_x.0.us, 1 - %exitcond.not = icmp eq i64 %23, 256 - br i1 %exitcond.not, label %corr_kernel.exit.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !21 - -pregion_for_entry.entry.i: ; preds = %if.end.i, %pregion_for_entry.entry.i.preheader - %_local_id_x.0 = phi i64 [ %33, %if.end.i ], [ 0, %pregion_for_entry.entry.i.preheader ] - %add1.i.i = add nuw nsw i64 %_local_id_x.0, %mul.i.i - %conv.i = trunc i64 %add1.i.i to i32 - %cmp.i = icmp sgt i32 %sub.i, %conv.i - br i1 %cmp.i, label %if.then.i, label %if.end.i - -if.then.i: ; preds = %pregion_for_entry.entry.i - %mul.i = mul nsw i32 %conv.i, %2 - %add.i = add nsw i32 %mul.i, %conv.i - %idxprom.i = sext i32 %add.i to i64 - %arrayidx.i = getelementptr inbounds float, float* %0, i64 %idxprom.i - store float 1.000000e+00, float* %arrayidx.i, align 4, !tbaa !12, !llvm.access.group !16 - %j2.064.i = add nsw i32 %conv.i, 1 - %cmp365.i = icmp slt i32 %j2.064.i, %2 - br i1 %cmp365.i, label %for.cond5.preheader.lr.ph.i, label %if.end.i - -for.cond5.preheader.lr.ph.i: ; preds = %if.then.i - %sext.i = shl i64 %add1.i.i, 32 - %24 = ashr exact i64 %sext.i, 32 - %25 = add nsw i64 %24, 1 - %26 = sext i32 %mul.i to i64 - br label %for.cond5.preheader.i - -for.cond5.preheader.i: ; preds = %for.cond5.preheader.i, %for.cond5.preheader.lr.ph.i - %indvars.iv.next71.i3 = phi i64 [ %indvars.iv.next71.i, %for.cond5.preheader.i ], [ %25, %for.cond5.preheader.lr.ph.i ] - %27 = add nsw i64 %indvars.iv.next71.i3, %26 - %arrayidx25.i = getelementptr inbounds float, float* %0, i64 %27 - %28 = bitcast float* %arrayidx25.i to i32* - %29 = load i32, i32* %28, align 4, !tbaa !12 - %30 = mul nsw i64 %indvars.iv.next71.i3, %9 - %31 = add nsw i64 %30, %24 - %arrayidx29.i = getelementptr inbounds float, float* %0, i64 %31 - %32 = bitcast float* %arrayidx29.i to i32* - store i32 %29, i32* %32, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next71.i = add nsw i64 %indvars.iv.next71.i3, 1 - %lftr.wideiv.i = trunc i64 %indvars.iv.next71.i to i32 - %exitcond76.not.i = icmp eq i32 %lftr.wideiv.i, %2 - br i1 %exitcond76.not.i, label %if.end.i.loopexit, label %for.cond5.preheader.i, !llvm.loop !20 - -if.end.i.loopexit: ; preds = %for.cond5.preheader.i - br label %if.end.i - -if.end.i: ; preds = %if.end.i.loopexit, %if.then.i, %pregion_for_entry.entry.i - %33 = add nuw nsw i64 %_local_id_x.0, 1 - %exitcond13.not = icmp eq i64 %33, 256 - br i1 %exitcond13.not, label %corr_kernel.exit.loopexit17, label %pregion_for_entry.entry.i, !llvm.loop !21 - -corr_kernel.exit.loopexit: ; preds = %if.end.i.us - br label %corr_kernel.exit - -corr_kernel.exit.loopexit17: ; preds = %if.end.i - br label %corr_kernel.exit - -corr_kernel.exit: ; preds = %corr_kernel.exit.loopexit17, %corr_kernel.exit.loopexit - ret void -} - -; Function Attrs: nounwind -define void @_pocl_kernel_corr_kernel_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to i32** - %15 = load i32*, i32** %14, align 8 - %16 = load i32, i32* %15, align 4 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %mul.i.i.i = shl i64 %2, 8 - %sub.i.i = add nsw i32 %16, -1 - %cmp662.i.i = icmp sgt i32 %20, 0 - %21 = sext i32 %16 to i64 - %wide.trip.count.i.i = zext i32 %20 to i64 - br i1 %cmp662.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %pregion_for_entry.entry.i.i.preheader - -pregion_for_entry.entry.i.i.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %35, %if.end.i.i.us ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %sub.i.i, %conv.i.i.us - br i1 %cmp.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %mul.i.i.us = mul nsw i32 %16, %conv.i.i.us - %add.i.i.us = add nsw i32 %mul.i.i.us, %conv.i.i.us - %idxprom.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us - store float 1.000000e+00, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %j2.064.i.i.us = add nsw i32 %conv.i.i.us, 1 - %cmp365.i.i.us = icmp slt i32 %j2.064.i.i.us, %16 - br i1 %cmp365.i.i.us, label %for.cond5.preheader.lr.ph.i.i.us, label %if.end.i.i.us - -for.cond5.preheader.lr.ph.i.i.us: ; preds = %if.then.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %22 = ashr exact i64 %sext.i.i.us, 32 - %23 = add nsw i64 %22, 1 - %24 = sext i32 %mul.i.i.us to i64 - br label %for.cond5.preheader.us.i.i.us - -for.cond5.preheader.us.i.i.us: ; preds = %for.end.loopexit.us.i.i.us, %for.cond5.preheader.lr.ph.i.i.us - %indvars.iv.next71.us.i.i9.us = phi i64 [ %indvars.iv.next71.us.i.i.us, %for.end.loopexit.us.i.i.us ], [ %23, %for.cond5.preheader.lr.ph.i.i.us ] - %25 = add nsw i64 %indvars.iv.next71.us.i.i9.us, %24 - %arrayidx21.us.i.i.us = getelementptr inbounds float, float* %8, i64 %25 - %.pre.us.i.i1.us11 = load float, float* %arrayidx21.us.i.i.us, align 4, !tbaa !12 - br label %for.body8.us.i.i.us - -for.body8.us.i.i.us: ; preds = %for.body8.us.i.i.us, %for.cond5.preheader.us.i.i.us - %indvars.iv.next.us.i.i7.us = phi i64 [ %indvars.iv.next.us.i.i.us, %for.body8.us.i.i.us ], [ 0, %for.cond5.preheader.us.i.i.us ] - %26 = phi float [ %32, %for.body8.us.i.i.us ], [ %.pre.us.i.i1.us11, %for.cond5.preheader.us.i.i.us ] - %27 = mul nsw i64 %indvars.iv.next.us.i.i7.us, %21 - %28 = add nsw i64 %27, %22 - %arrayidx12.us.i.i.us = getelementptr inbounds float, float* %12, i64 %28 - %29 = load float, float* %arrayidx12.us.i.i.us, align 4, !tbaa !12 - %30 = add nsw i64 %27, %indvars.iv.next71.us.i.i9.us - %arrayidx16.us.i.i.us = getelementptr inbounds float, float* %12, i64 %30 - %31 = load float, float* %arrayidx16.us.i.i.us, align 4, !tbaa !12 - %32 = tail call float @llvm.fmuladd.f32(float %29, float %31, float %26) #2 - store float %32, float* %arrayidx21.us.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.us.i.i.us = add nuw nsw i64 %indvars.iv.next.us.i.i7.us, 1 - %exitcond.not.us.i.i.us = icmp eq i64 %indvars.iv.next.us.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.us.i.i.us, label %for.end.loopexit.us.i.i.us, label %for.body8.us.i.i.us, !llvm.loop !18 - -for.end.loopexit.us.i.i.us: ; preds = %for.body8.us.i.i.us - %.lcssa = phi float [ %32, %for.body8.us.i.i.us ] - %33 = mul nsw i64 %indvars.iv.next71.us.i.i9.us, %21 - %34 = add nsw i64 %33, %22 - %arrayidx29.us.i.i.us = getelementptr inbounds float, float* %8, i64 %34 - store float %.lcssa, float* %arrayidx29.us.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next71.us.i.i.us = add nsw i64 %indvars.iv.next71.us.i.i9.us, 1 - %lftr.wideiv.us.i.i.us = trunc i64 %indvars.iv.next71.us.i.i.us to i32 - %exitcond76.not.us.i.i.us = icmp eq i32 %16, %lftr.wideiv.us.i.i.us - br i1 %exitcond76.not.us.i.i.us, label %if.end.i.i.us.loopexit, label %for.cond5.preheader.us.i.i.us, !llvm.loop !20 - -if.end.i.i.us.loopexit: ; preds = %for.end.loopexit.us.i.i.us - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.end.i.i.us.loopexit, %if.then.i.i.us, %pregion_for_entry.entry.i.i.us - %35 = add nuw nsw i64 %_local_id_x.i.0.us, 1 - %exitcond.not = icmp eq i64 %35, 256 - br i1 %exitcond.not, label %_pocl_kernel_corr_kernel.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !21 - -pregion_for_entry.entry.i.i: ; preds = %if.end.i.i, %pregion_for_entry.entry.i.i.preheader - %_local_id_x.i.0 = phi i64 [ %45, %if.end.i.i ], [ 0, %pregion_for_entry.entry.i.i.preheader ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %conv.i.i = trunc i64 %add1.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %sub.i.i, %conv.i.i - br i1 %cmp.i.i, label %if.then.i.i, label %if.end.i.i - -if.then.i.i: ; preds = %pregion_for_entry.entry.i.i - %mul.i.i = mul nsw i32 %16, %conv.i.i - %add.i.i = add nsw i32 %mul.i.i, %conv.i.i - %idxprom.i.i = sext i32 %add.i.i to i64 - %arrayidx.i.i = getelementptr inbounds float, float* %8, i64 %idxprom.i.i - store float 1.000000e+00, float* %arrayidx.i.i, align 4, !tbaa !12, !llvm.access.group !16 - %j2.064.i.i = add nsw i32 %conv.i.i, 1 - %cmp365.i.i = icmp slt i32 %j2.064.i.i, %16 - br i1 %cmp365.i.i, label %for.cond5.preheader.lr.ph.i.i, label %if.end.i.i - -for.cond5.preheader.lr.ph.i.i: ; preds = %if.then.i.i - %sext.i.i = shl i64 %add1.i.i.i, 32 - %36 = ashr exact i64 %sext.i.i, 32 - %37 = add nsw i64 %36, 1 - %38 = sext i32 %mul.i.i to i64 - br label %for.cond5.preheader.i.i - -for.cond5.preheader.i.i: ; preds = %for.cond5.preheader.i.i, %for.cond5.preheader.lr.ph.i.i - %indvars.iv.next71.i.i3 = phi i64 [ %indvars.iv.next71.i.i, %for.cond5.preheader.i.i ], [ %37, %for.cond5.preheader.lr.ph.i.i ] - %39 = add nsw i64 %indvars.iv.next71.i.i3, %38 - %arrayidx25.i.i = getelementptr inbounds float, float* %8, i64 %39 - %40 = bitcast float* %arrayidx25.i.i to i32* - %41 = load i32, i32* %40, align 4, !tbaa !12 - %42 = mul nsw i64 %indvars.iv.next71.i.i3, %21 - %43 = add nsw i64 %42, %36 - %arrayidx29.i.i = getelementptr inbounds float, float* %8, i64 %43 - %44 = bitcast float* %arrayidx29.i.i to i32* - store i32 %41, i32* %44, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next71.i.i = add nsw i64 %indvars.iv.next71.i.i3, 1 - %lftr.wideiv.i.i = trunc i64 %indvars.iv.next71.i.i to i32 - %exitcond76.not.i.i = icmp eq i32 %16, %lftr.wideiv.i.i - br i1 %exitcond76.not.i.i, label %if.end.i.i.loopexit, label %for.cond5.preheader.i.i, !llvm.loop !20 - -if.end.i.i.loopexit: ; preds = %for.cond5.preheader.i.i - br label %if.end.i.i - -if.end.i.i: ; preds = %if.end.i.i.loopexit, %if.then.i.i, %pregion_for_entry.entry.i.i - %45 = add nuw nsw i64 %_local_id_x.i.0, 1 - %exitcond13.not = icmp eq i64 %45, 256 - br i1 %exitcond13.not, label %_pocl_kernel_corr_kernel.exit.loopexit17, label %pregion_for_entry.entry.i.i, !llvm.loop !21 - -_pocl_kernel_corr_kernel.exit.loopexit: ; preds = %if.end.i.i.us - br label %_pocl_kernel_corr_kernel.exit - -_pocl_kernel_corr_kernel.exit.loopexit17: ; preds = %if.end.i.i - br label %_pocl_kernel_corr_kernel.exit - -_pocl_kernel_corr_kernel.exit: ; preds = %_pocl_kernel_corr_kernel.exit.loopexit17, %_pocl_kernel_corr_kernel.exit.loopexit - ret void -} - -; Function Attrs: nounwind -define void @_pocl_kernel_corr_kernel_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to i32** - %13 = load i32*, i32** %12, align 8 - %14 = load i32, i32* %13, align 4 - %15 = getelementptr i8*, i8** %0, i64 3 - %16 = bitcast i8** %15 to i32** - %17 = load i32*, i32** %16, align 8 - %18 = load i32, i32* %17, align 4 - %mul.i.i.i = shl i64 %2, 8 - %sub.i.i = add nsw i32 %14, -1 - %cmp662.i.i = icmp sgt i32 %18, 0 - %19 = sext i32 %14 to i64 - %wide.trip.count.i.i = zext i32 %18 to i64 - br i1 %cmp662.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %pregion_for_entry.entry.i.i.preheader - -pregion_for_entry.entry.i.i.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %33, %if.end.i.i.us ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %sub.i.i, %conv.i.i.us - br i1 %cmp.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %mul.i.i.us = mul nsw i32 %14, %conv.i.i.us - %add.i.i.us = add nsw i32 %mul.i.i.us, %conv.i.i.us - %idxprom.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us - store float 1.000000e+00, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %j2.064.i.i.us = add nsw i32 %conv.i.i.us, 1 - %cmp365.i.i.us = icmp slt i32 %j2.064.i.i.us, %14 - br i1 %cmp365.i.i.us, label %for.cond5.preheader.lr.ph.i.i.us, label %if.end.i.i.us - -for.cond5.preheader.lr.ph.i.i.us: ; preds = %if.then.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %20 = ashr exact i64 %sext.i.i.us, 32 - %21 = add nsw i64 %20, 1 - %22 = sext i32 %mul.i.i.us to i64 - br label %for.cond5.preheader.us.i.i.us - -for.cond5.preheader.us.i.i.us: ; preds = %for.end.loopexit.us.i.i.us, %for.cond5.preheader.lr.ph.i.i.us - %indvars.iv.next71.us.i.i9.us = phi i64 [ %indvars.iv.next71.us.i.i.us, %for.end.loopexit.us.i.i.us ], [ %21, %for.cond5.preheader.lr.ph.i.i.us ] - %23 = add nsw i64 %indvars.iv.next71.us.i.i9.us, %22 - %arrayidx21.us.i.i.us = getelementptr inbounds float, float* %7, i64 %23 - %.pre.us.i.i1.us11 = load float, float* %arrayidx21.us.i.i.us, align 4, !tbaa !12 - br label %for.body8.us.i.i.us - -for.body8.us.i.i.us: ; preds = %for.body8.us.i.i.us, %for.cond5.preheader.us.i.i.us - %indvars.iv.next.us.i.i7.us = phi i64 [ %indvars.iv.next.us.i.i.us, %for.body8.us.i.i.us ], [ 0, %for.cond5.preheader.us.i.i.us ] - %24 = phi float [ %30, %for.body8.us.i.i.us ], [ %.pre.us.i.i1.us11, %for.cond5.preheader.us.i.i.us ] - %25 = mul nsw i64 %indvars.iv.next.us.i.i7.us, %19 - %26 = add nsw i64 %25, %20 - %arrayidx12.us.i.i.us = getelementptr inbounds float, float* %10, i64 %26 - %27 = load float, float* %arrayidx12.us.i.i.us, align 4, !tbaa !12 - %28 = add nsw i64 %25, %indvars.iv.next71.us.i.i9.us - %arrayidx16.us.i.i.us = getelementptr inbounds float, float* %10, i64 %28 - %29 = load float, float* %arrayidx16.us.i.i.us, align 4, !tbaa !12 - %30 = tail call float @llvm.fmuladd.f32(float %27, float %29, float %24) #2 - store float %30, float* %arrayidx21.us.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.us.i.i.us = add nuw nsw i64 %indvars.iv.next.us.i.i7.us, 1 - %exitcond.not.us.i.i.us = icmp eq i64 %indvars.iv.next.us.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.us.i.i.us, label %for.end.loopexit.us.i.i.us, label %for.body8.us.i.i.us, !llvm.loop !18 - -for.end.loopexit.us.i.i.us: ; preds = %for.body8.us.i.i.us - %.lcssa = phi float [ %30, %for.body8.us.i.i.us ] - %31 = mul nsw i64 %indvars.iv.next71.us.i.i9.us, %19 - %32 = add nsw i64 %31, %20 - %arrayidx29.us.i.i.us = getelementptr inbounds float, float* %7, i64 %32 - store float %.lcssa, float* %arrayidx29.us.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next71.us.i.i.us = add nsw i64 %indvars.iv.next71.us.i.i9.us, 1 - %lftr.wideiv.us.i.i.us = trunc i64 %indvars.iv.next71.us.i.i.us to i32 - %exitcond76.not.us.i.i.us = icmp eq i32 %14, %lftr.wideiv.us.i.i.us - br i1 %exitcond76.not.us.i.i.us, label %if.end.i.i.us.loopexit, label %for.cond5.preheader.us.i.i.us, !llvm.loop !20 - -if.end.i.i.us.loopexit: ; preds = %for.end.loopexit.us.i.i.us - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.end.i.i.us.loopexit, %if.then.i.i.us, %pregion_for_entry.entry.i.i.us - %33 = add nuw nsw i64 %_local_id_x.i.0.us, 1 - %exitcond.not = icmp eq i64 %33, 256 - br i1 %exitcond.not, label %_pocl_kernel_corr_kernel.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !21 - -pregion_for_entry.entry.i.i: ; preds = %if.end.i.i, %pregion_for_entry.entry.i.i.preheader - %_local_id_x.i.0 = phi i64 [ %43, %if.end.i.i ], [ 0, %pregion_for_entry.entry.i.i.preheader ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %conv.i.i = trunc i64 %add1.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %sub.i.i, %conv.i.i - br i1 %cmp.i.i, label %if.then.i.i, label %if.end.i.i - -if.then.i.i: ; preds = %pregion_for_entry.entry.i.i - %mul.i.i = mul nsw i32 %14, %conv.i.i - %add.i.i = add nsw i32 %mul.i.i, %conv.i.i - %idxprom.i.i = sext i32 %add.i.i to i64 - %arrayidx.i.i = getelementptr inbounds float, float* %7, i64 %idxprom.i.i - store float 1.000000e+00, float* %arrayidx.i.i, align 4, !tbaa !12, !llvm.access.group !16 - %j2.064.i.i = add nsw i32 %conv.i.i, 1 - %cmp365.i.i = icmp slt i32 %j2.064.i.i, %14 - br i1 %cmp365.i.i, label %for.cond5.preheader.lr.ph.i.i, label %if.end.i.i - -for.cond5.preheader.lr.ph.i.i: ; preds = %if.then.i.i - %sext.i.i = shl i64 %add1.i.i.i, 32 - %34 = ashr exact i64 %sext.i.i, 32 - %35 = add nsw i64 %34, 1 - %36 = sext i32 %mul.i.i to i64 - br label %for.cond5.preheader.i.i - -for.cond5.preheader.i.i: ; preds = %for.cond5.preheader.i.i, %for.cond5.preheader.lr.ph.i.i - %indvars.iv.next71.i.i3 = phi i64 [ %indvars.iv.next71.i.i, %for.cond5.preheader.i.i ], [ %35, %for.cond5.preheader.lr.ph.i.i ] - %37 = add nsw i64 %indvars.iv.next71.i.i3, %36 - %arrayidx25.i.i = getelementptr inbounds float, float* %7, i64 %37 - %38 = bitcast float* %arrayidx25.i.i to i32* - %39 = load i32, i32* %38, align 4, !tbaa !12 - %40 = mul nsw i64 %indvars.iv.next71.i.i3, %19 - %41 = add nsw i64 %40, %34 - %arrayidx29.i.i = getelementptr inbounds float, float* %7, i64 %41 - %42 = bitcast float* %arrayidx29.i.i to i32* - store i32 %39, i32* %42, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next71.i.i = add nsw i64 %indvars.iv.next71.i.i3, 1 - %lftr.wideiv.i.i = trunc i64 %indvars.iv.next71.i.i to i32 - %exitcond76.not.i.i = icmp eq i32 %14, %lftr.wideiv.i.i - br i1 %exitcond76.not.i.i, label %if.end.i.i.loopexit, label %for.cond5.preheader.i.i, !llvm.loop !20 - -if.end.i.i.loopexit: ; preds = %for.cond5.preheader.i.i - br label %if.end.i.i - -if.end.i.i: ; preds = %if.end.i.i.loopexit, %if.then.i.i, %pregion_for_entry.entry.i.i - %43 = add nuw nsw i64 %_local_id_x.i.0, 1 - %exitcond13.not = icmp eq i64 %43, 256 - br i1 %exitcond13.not, label %_pocl_kernel_corr_kernel.exit.loopexit17, label %pregion_for_entry.entry.i.i, !llvm.loop !21 - -_pocl_kernel_corr_kernel.exit.loopexit: ; preds = %if.end.i.i.us - br label %_pocl_kernel_corr_kernel.exit - -_pocl_kernel_corr_kernel.exit.loopexit17: ; preds = %if.end.i.i - br label %_pocl_kernel_corr_kernel.exit - -_pocl_kernel_corr_kernel.exit: ; preds = %_pocl_kernel_corr_kernel.exit.loopexit17, %_pocl_kernel_corr_kernel.exit.loopexit - ret void -} - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"int", !"int"} -!8 = !{!"float*", !"float*", !"int", !"int"} -!9 = !{!"", !"", !"", !""} -!10 = !{!"symmat", !"data", !"m", !"n"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{} -!18 = distinct !{!18, !19} -!19 = !{!"llvm.loop.unroll.disable"} -!20 = distinct !{!20, !19} -!21 = distinct !{!21, !22} -!22 = !{!"llvm.loop.parallel_accesses", !17} diff --git a/pocl_irs/correlation_mean.ll b/pocl_irs/correlation_mean.ll deleted file mode 100644 index eda7a88..0000000 --- a/pocl_irs/correlation_mean.ll +++ /dev/null @@ -1,1150 +0,0 @@ -; ModuleID = './KK/KIGILFKIOCMFGODNJKKCPJJIDHNBMKHCFFGPF/mean_kernel/256-1-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_mean_kernel(float* nocapture %0, float* nocapture readonly %1, float %2, i32 %3, i32 %4, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %5, i64 %6, i64 %7, i64 %8) local_unnamed_addr #0 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %6, 8 - %cmp222.i = icmp sgt i32 %4, 0 - %10 = sext i32 %3 to i64 - %wide.trip.count.i = zext i32 %4 to i64 - br i1 %cmp222.i, label %pregion_for_entry.entry.i.us.preheader, label %pregion_for_entry.entry.i.preheader - -pregion_for_entry.entry.i.us.preheader: ; preds = %9 - br label %pregion_for_entry.entry.i.us - -pregion_for_entry.entry.i.preheader: ; preds = %9 - %div.i = fdiv float 0.000000e+00, %2 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert13 = insertelement <8 x i32> undef, i32 %3, i32 0 - %broadcast.splat14 = shufflevector <8 x i32> %broadcast.splatinsert13, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert15 = insertelement <8 x i32> undef, i32 %3, i32 0 - %broadcast.splat16 = shufflevector <8 x i32> %broadcast.splatinsert15, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert17 = insertelement <8 x float> undef, float %div.i, i32 0 - %broadcast.splat18 = shufflevector <8 x float> %broadcast.splatinsert17, <8 x float> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert19 = insertelement <8 x float> undef, float %div.i, i32 0 - %broadcast.splat20 = shufflevector <8 x float> %broadcast.splatinsert19, <8 x float> undef, <8 x i32> zeroinitializer - %11 = or <8 x i64> %broadcast.splat, - %12 = trunc <8 x i64> %11 to <8 x i32> - %13 = trunc i64 %mul.i.i to i32 - %14 = or i32 %13, 8 - %15 = insertelement <8 x i32> undef, i32 %14, i64 0 - %16 = shufflevector <8 x i32> %15, <8 x i32> undef, <8 x i32> zeroinitializer - %17 = or <8 x i32> %16, - %18 = icmp sgt <8 x i32> %broadcast.splat14, %12 - %19 = icmp sgt <8 x i32> %broadcast.splat16, %17 - %20 = extractelement <8 x i64> %11, i32 0 - %21 = shl i64 %20, 32 - %22 = ashr exact i64 %21, 32 - %23 = getelementptr inbounds float, float* %0, i64 %22 - %24 = bitcast float* %23 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %24, i32 4, <8 x i1> %18), !tbaa !12, !llvm.access.group !16 - %25 = getelementptr inbounds float, float* %23, i64 8 - %26 = bitcast float* %25 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %26, i32 4, <8 x i1> %19), !tbaa !12, !llvm.access.group !16 - %27 = or <8 x i64> %broadcast.splat, - %28 = trunc <8 x i64> %27 to <8 x i32> - %29 = trunc i64 %mul.i.i to i32 - %30 = or i32 %29, 8 - %31 = insertelement <8 x i32> undef, i32 %30, i64 0 - %32 = shufflevector <8 x i32> %31, <8 x i32> undef, <8 x i32> zeroinitializer - %33 = or <8 x i32> %32, - %34 = icmp sgt <8 x i32> %broadcast.splat14, %28 - %35 = icmp sgt <8 x i32> %broadcast.splat16, %33 - %36 = extractelement <8 x i64> %27, i32 0 - %37 = shl i64 %36, 32 - %38 = ashr exact i64 %37, 32 - %39 = getelementptr inbounds float, float* %0, i64 %38 - %40 = bitcast float* %39 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %40, i32 4, <8 x i1> %34), !tbaa !12, !llvm.access.group !16 - %41 = getelementptr inbounds float, float* %39, i64 8 - %42 = bitcast float* %41 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %42, i32 4, <8 x i1> %35), !tbaa !12, !llvm.access.group !16 - %43 = or <8 x i64> %broadcast.splat, - %44 = trunc <8 x i64> %43 to <8 x i32> - %45 = trunc i64 %mul.i.i to i32 - %46 = or i32 %45, 8 - %47 = insertelement <8 x i32> undef, i32 %46, i64 0 - %48 = shufflevector <8 x i32> %47, <8 x i32> undef, <8 x i32> zeroinitializer - %49 = or <8 x i32> %48, - %50 = icmp sgt <8 x i32> %broadcast.splat14, %44 - %51 = icmp sgt <8 x i32> %broadcast.splat16, %49 - %52 = extractelement <8 x i64> %43, i32 0 - %53 = shl i64 %52, 32 - %54 = ashr exact i64 %53, 32 - %55 = getelementptr inbounds float, float* %0, i64 %54 - %56 = bitcast float* %55 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %56, i32 4, <8 x i1> %50), !tbaa !12, !llvm.access.group !16 - %57 = getelementptr inbounds float, float* %55, i64 8 - %58 = bitcast float* %57 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %58, i32 4, <8 x i1> %51), !tbaa !12, !llvm.access.group !16 - %59 = or <8 x i64> %broadcast.splat, - %60 = trunc <8 x i64> %59 to <8 x i32> - %61 = trunc i64 %mul.i.i to i32 - %62 = or i32 %61, 8 - %63 = insertelement <8 x i32> undef, i32 %62, i64 0 - %64 = shufflevector <8 x i32> %63, <8 x i32> undef, <8 x i32> zeroinitializer - %65 = or <8 x i32> %64, - %66 = icmp sgt <8 x i32> %broadcast.splat14, %60 - %67 = icmp sgt <8 x i32> %broadcast.splat16, %65 - %68 = extractelement <8 x i64> %59, i32 0 - %69 = shl i64 %68, 32 - %70 = ashr exact i64 %69, 32 - %71 = getelementptr inbounds float, float* %0, i64 %70 - %72 = bitcast float* %71 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %72, i32 4, <8 x i1> %66), !tbaa !12, !llvm.access.group !16 - %73 = getelementptr inbounds float, float* %71, i64 8 - %74 = bitcast float* %73 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %74, i32 4, <8 x i1> %67), !tbaa !12, !llvm.access.group !16 - %75 = or <8 x i64> %broadcast.splat, - %76 = trunc <8 x i64> %75 to <8 x i32> - %77 = trunc i64 %mul.i.i to i32 - %78 = or i32 %77, 8 - %79 = insertelement <8 x i32> undef, i32 %78, i64 0 - %80 = shufflevector <8 x i32> %79, <8 x i32> undef, <8 x i32> zeroinitializer - %81 = or <8 x i32> %80, - %82 = icmp sgt <8 x i32> %broadcast.splat14, %76 - %83 = icmp sgt <8 x i32> %broadcast.splat16, %81 - %84 = extractelement <8 x i64> %75, i32 0 - %85 = shl i64 %84, 32 - %86 = ashr exact i64 %85, 32 - %87 = getelementptr inbounds float, float* %0, i64 %86 - %88 = bitcast float* %87 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %88, i32 4, <8 x i1> %82), !tbaa !12, !llvm.access.group !16 - %89 = getelementptr inbounds float, float* %87, i64 8 - %90 = bitcast float* %89 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %90, i32 4, <8 x i1> %83), !tbaa !12, !llvm.access.group !16 - %91 = or <8 x i64> %broadcast.splat, - %92 = trunc <8 x i64> %91 to <8 x i32> - %93 = trunc i64 %mul.i.i to i32 - %94 = or i32 %93, 8 - %95 = insertelement <8 x i32> undef, i32 %94, i64 0 - %96 = shufflevector <8 x i32> %95, <8 x i32> undef, <8 x i32> zeroinitializer - %97 = or <8 x i32> %96, - %98 = icmp sgt <8 x i32> %broadcast.splat14, %92 - %99 = icmp sgt <8 x i32> %broadcast.splat16, %97 - %100 = extractelement <8 x i64> %91, i32 0 - %101 = shl i64 %100, 32 - %102 = ashr exact i64 %101, 32 - %103 = getelementptr inbounds float, float* %0, i64 %102 - %104 = bitcast float* %103 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %104, i32 4, <8 x i1> %98), !tbaa !12, !llvm.access.group !16 - %105 = getelementptr inbounds float, float* %103, i64 8 - %106 = bitcast float* %105 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %106, i32 4, <8 x i1> %99), !tbaa !12, !llvm.access.group !16 - %107 = or <8 x i64> %broadcast.splat, - %108 = trunc <8 x i64> %107 to <8 x i32> - %109 = trunc i64 %mul.i.i to i32 - %110 = or i32 %109, 8 - %111 = insertelement <8 x i32> undef, i32 %110, i64 0 - %112 = shufflevector <8 x i32> %111, <8 x i32> undef, <8 x i32> zeroinitializer - %113 = or <8 x i32> %112, - %114 = icmp sgt <8 x i32> %broadcast.splat14, %108 - %115 = icmp sgt <8 x i32> %broadcast.splat16, %113 - %116 = extractelement <8 x i64> %107, i32 0 - %117 = shl i64 %116, 32 - %118 = ashr exact i64 %117, 32 - %119 = getelementptr inbounds float, float* %0, i64 %118 - %120 = bitcast float* %119 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %120, i32 4, <8 x i1> %114), !tbaa !12, !llvm.access.group !16 - %121 = getelementptr inbounds float, float* %119, i64 8 - %122 = bitcast float* %121 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %122, i32 4, <8 x i1> %115), !tbaa !12, !llvm.access.group !16 - %123 = or <8 x i64> %broadcast.splat, - %124 = trunc <8 x i64> %123 to <8 x i32> - %125 = trunc i64 %mul.i.i to i32 - %126 = or i32 %125, 8 - %127 = insertelement <8 x i32> undef, i32 %126, i64 0 - %128 = shufflevector <8 x i32> %127, <8 x i32> undef, <8 x i32> zeroinitializer - %129 = or <8 x i32> %128, - %130 = icmp sgt <8 x i32> %broadcast.splat14, %124 - %131 = icmp sgt <8 x i32> %broadcast.splat16, %129 - %132 = extractelement <8 x i64> %123, i32 0 - %133 = shl i64 %132, 32 - %134 = ashr exact i64 %133, 32 - %135 = getelementptr inbounds float, float* %0, i64 %134 - %136 = bitcast float* %135 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %136, i32 4, <8 x i1> %130), !tbaa !12, !llvm.access.group !16 - %137 = getelementptr inbounds float, float* %135, i64 8 - %138 = bitcast float* %137 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %138, i32 4, <8 x i1> %131), !tbaa !12, !llvm.access.group !16 - %139 = or <8 x i64> %broadcast.splat, - %140 = trunc <8 x i64> %139 to <8 x i32> - %141 = trunc i64 %mul.i.i to i32 - %142 = or i32 %141, 8 - %143 = insertelement <8 x i32> undef, i32 %142, i64 0 - %144 = shufflevector <8 x i32> %143, <8 x i32> undef, <8 x i32> zeroinitializer - %145 = or <8 x i32> %144, - %146 = icmp sgt <8 x i32> %broadcast.splat14, %140 - %147 = icmp sgt <8 x i32> %broadcast.splat16, %145 - %148 = extractelement <8 x i64> %139, i32 0 - %149 = shl i64 %148, 32 - %150 = ashr exact i64 %149, 32 - %151 = getelementptr inbounds float, float* %0, i64 %150 - %152 = bitcast float* %151 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %152, i32 4, <8 x i1> %146), !tbaa !12, !llvm.access.group !16 - %153 = getelementptr inbounds float, float* %151, i64 8 - %154 = bitcast float* %153 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %154, i32 4, <8 x i1> %147), !tbaa !12, !llvm.access.group !16 - %155 = or <8 x i64> %broadcast.splat, - %156 = trunc <8 x i64> %155 to <8 x i32> - %157 = trunc i64 %mul.i.i to i32 - %158 = or i32 %157, 8 - %159 = insertelement <8 x i32> undef, i32 %158, i64 0 - %160 = shufflevector <8 x i32> %159, <8 x i32> undef, <8 x i32> zeroinitializer - %161 = or <8 x i32> %160, - %162 = icmp sgt <8 x i32> %broadcast.splat14, %156 - %163 = icmp sgt <8 x i32> %broadcast.splat16, %161 - %164 = extractelement <8 x i64> %155, i32 0 - %165 = shl i64 %164, 32 - %166 = ashr exact i64 %165, 32 - %167 = getelementptr inbounds float, float* %0, i64 %166 - %168 = bitcast float* %167 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %168, i32 4, <8 x i1> %162), !tbaa !12, !llvm.access.group !16 - %169 = getelementptr inbounds float, float* %167, i64 8 - %170 = bitcast float* %169 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %170, i32 4, <8 x i1> %163), !tbaa !12, !llvm.access.group !16 - %171 = or <8 x i64> %broadcast.splat, - %172 = trunc <8 x i64> %171 to <8 x i32> - %173 = trunc i64 %mul.i.i to i32 - %174 = or i32 %173, 8 - %175 = insertelement <8 x i32> undef, i32 %174, i64 0 - %176 = shufflevector <8 x i32> %175, <8 x i32> undef, <8 x i32> zeroinitializer - %177 = or <8 x i32> %176, - %178 = icmp sgt <8 x i32> %broadcast.splat14, %172 - %179 = icmp sgt <8 x i32> %broadcast.splat16, %177 - %180 = extractelement <8 x i64> %171, i32 0 - %181 = shl i64 %180, 32 - %182 = ashr exact i64 %181, 32 - %183 = getelementptr inbounds float, float* %0, i64 %182 - %184 = bitcast float* %183 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %184, i32 4, <8 x i1> %178), !tbaa !12, !llvm.access.group !16 - %185 = getelementptr inbounds float, float* %183, i64 8 - %186 = bitcast float* %185 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %186, i32 4, <8 x i1> %179), !tbaa !12, !llvm.access.group !16 - %187 = or <8 x i64> %broadcast.splat, - %188 = trunc <8 x i64> %187 to <8 x i32> - %189 = trunc i64 %mul.i.i to i32 - %190 = or i32 %189, 8 - %191 = insertelement <8 x i32> undef, i32 %190, i64 0 - %192 = shufflevector <8 x i32> %191, <8 x i32> undef, <8 x i32> zeroinitializer - %193 = or <8 x i32> %192, - %194 = icmp sgt <8 x i32> %broadcast.splat14, %188 - %195 = icmp sgt <8 x i32> %broadcast.splat16, %193 - %196 = extractelement <8 x i64> %187, i32 0 - %197 = shl i64 %196, 32 - %198 = ashr exact i64 %197, 32 - %199 = getelementptr inbounds float, float* %0, i64 %198 - %200 = bitcast float* %199 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %200, i32 4, <8 x i1> %194), !tbaa !12, !llvm.access.group !16 - %201 = getelementptr inbounds float, float* %199, i64 8 - %202 = bitcast float* %201 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %202, i32 4, <8 x i1> %195), !tbaa !12, !llvm.access.group !16 - %203 = or <8 x i64> %broadcast.splat, - %204 = trunc <8 x i64> %203 to <8 x i32> - %205 = trunc i64 %mul.i.i to i32 - %206 = or i32 %205, 8 - %207 = insertelement <8 x i32> undef, i32 %206, i64 0 - %208 = shufflevector <8 x i32> %207, <8 x i32> undef, <8 x i32> zeroinitializer - %209 = or <8 x i32> %208, - %210 = icmp sgt <8 x i32> %broadcast.splat14, %204 - %211 = icmp sgt <8 x i32> %broadcast.splat16, %209 - %212 = extractelement <8 x i64> %203, i32 0 - %213 = shl i64 %212, 32 - %214 = ashr exact i64 %213, 32 - %215 = getelementptr inbounds float, float* %0, i64 %214 - %216 = bitcast float* %215 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %216, i32 4, <8 x i1> %210), !tbaa !12, !llvm.access.group !16 - %217 = getelementptr inbounds float, float* %215, i64 8 - %218 = bitcast float* %217 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %218, i32 4, <8 x i1> %211), !tbaa !12, !llvm.access.group !16 - %219 = or <8 x i64> %broadcast.splat, - %220 = trunc <8 x i64> %219 to <8 x i32> - %221 = trunc i64 %mul.i.i to i32 - %222 = or i32 %221, 8 - %223 = insertelement <8 x i32> undef, i32 %222, i64 0 - %224 = shufflevector <8 x i32> %223, <8 x i32> undef, <8 x i32> zeroinitializer - %225 = or <8 x i32> %224, - %226 = icmp sgt <8 x i32> %broadcast.splat14, %220 - %227 = icmp sgt <8 x i32> %broadcast.splat16, %225 - %228 = extractelement <8 x i64> %219, i32 0 - %229 = shl i64 %228, 32 - %230 = ashr exact i64 %229, 32 - %231 = getelementptr inbounds float, float* %0, i64 %230 - %232 = bitcast float* %231 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %232, i32 4, <8 x i1> %226), !tbaa !12, !llvm.access.group !16 - %233 = getelementptr inbounds float, float* %231, i64 8 - %234 = bitcast float* %233 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %234, i32 4, <8 x i1> %227), !tbaa !12, !llvm.access.group !16 - %235 = or <8 x i64> %broadcast.splat, - %236 = trunc <8 x i64> %235 to <8 x i32> - %237 = trunc i64 %mul.i.i to i32 - %238 = or i32 %237, 8 - %239 = insertelement <8 x i32> undef, i32 %238, i64 0 - %240 = shufflevector <8 x i32> %239, <8 x i32> undef, <8 x i32> zeroinitializer - %241 = or <8 x i32> %240, - %242 = icmp sgt <8 x i32> %broadcast.splat14, %236 - %243 = icmp sgt <8 x i32> %broadcast.splat16, %241 - %244 = extractelement <8 x i64> %235, i32 0 - %245 = shl i64 %244, 32 - %246 = ashr exact i64 %245, 32 - %247 = getelementptr inbounds float, float* %0, i64 %246 - %248 = bitcast float* %247 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %248, i32 4, <8 x i1> %242), !tbaa !12, !llvm.access.group !16 - %249 = getelementptr inbounds float, float* %247, i64 8 - %250 = bitcast float* %249 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %250, i32 4, <8 x i1> %243), !tbaa !12, !llvm.access.group !16 - %251 = or <8 x i64> %broadcast.splat, - %252 = trunc <8 x i64> %251 to <8 x i32> - %253 = trunc i64 %mul.i.i to i32 - %254 = or i32 %253, 8 - %255 = insertelement <8 x i32> undef, i32 %254, i64 0 - %256 = shufflevector <8 x i32> %255, <8 x i32> undef, <8 x i32> zeroinitializer - %257 = or <8 x i32> %256, - %258 = icmp sgt <8 x i32> %broadcast.splat14, %252 - %259 = icmp sgt <8 x i32> %broadcast.splat16, %257 - %260 = extractelement <8 x i64> %251, i32 0 - %261 = shl i64 %260, 32 - %262 = ashr exact i64 %261, 32 - %263 = getelementptr inbounds float, float* %0, i64 %262 - %264 = bitcast float* %263 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %264, i32 4, <8 x i1> %258), !tbaa !12, !llvm.access.group !16 - %265 = getelementptr inbounds float, float* %263, i64 8 - %266 = bitcast float* %265 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %266, i32 4, <8 x i1> %259), !tbaa !12, !llvm.access.group !16 - br label %mean_kernel.exit - -pregion_for_entry.entry.i.us: ; preds = %if.end.r_exit.i.us, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ %270, %if.end.r_exit.i.us ], [ 0, %pregion_for_entry.entry.i.us.preheader ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp.i.us = icmp slt i32 %conv.i.us, %3 - br i1 %cmp.i.us, label %if.then.i.us, label %if.end.r_exit.i.us - -if.then.i.us: ; preds = %pregion_for_entry.entry.i.us - %sext.i.us = shl i64 %add1.i.i.us, 32 - %idxprom.i.us = ashr exact i64 %sext.i.us, 32 - %arrayidx.i.us = getelementptr inbounds float, float* %0, i64 %idxprom.i.us - store float 0.000000e+00, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %for.body.i.us - -for.body.i.us: ; preds = %for.body.i.us, %if.then.i.us - %indvars.iv.next.i5.us = phi i64 [ %indvars.iv.next.i.us, %for.body.i.us ], [ 0, %if.then.i.us ] - %add8.i2.us = phi float [ %add8.i.us, %for.body.i.us ], [ 0.000000e+00, %if.then.i.us ] - %267 = mul nsw i64 %indvars.iv.next.i5.us, %10 - %268 = add nsw i64 %267, %idxprom.i.us - %arrayidx5.i.us = getelementptr inbounds float, float* %1, i64 %268 - %269 = load float, float* %arrayidx5.i.us, align 4, !tbaa !12 - %add8.i.us = fadd float %add8.i2.us, %269 - store float %add8.i.us, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us = add nuw nsw i64 %indvars.iv.next.i5.us, 1 - %exitcond.not.i.us = icmp eq i64 %indvars.iv.next.i.us, %wide.trip.count.i - br i1 %exitcond.not.i.us, label %for.end.loopexit.i.us, label %for.body.i.us, !llvm.loop !18 - -for.end.loopexit.i.us: ; preds = %for.body.i.us - %add8.i.us.lcssa = phi float [ %add8.i.us, %for.body.i.us ] - %div.i.us = fdiv float %add8.i.us.lcssa, %2, !fpmath !20 - store float %div.i.us, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us - -if.end.r_exit.i.us: ; preds = %for.end.loopexit.i.us, %pregion_for_entry.entry.i.us - %270 = add nuw nsw i64 %_local_id_x.0.us, 1 - %exitcond.not = icmp eq i64 %270, 256 - br i1 %exitcond.not, label %mean_kernel.exit.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !21 - -mean_kernel.exit.loopexit: ; preds = %if.end.r_exit.i.us - br label %mean_kernel.exit - -mean_kernel.exit: ; preds = %mean_kernel.exit.loopexit, %pregion_for_entry.entry.i.preheader - ret void -} - -; Function Attrs: nounwind -define void @_pocl_kernel_mean_kernel_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to float** - %15 = load float*, float** %14, align 8 - %16 = load float, float* %15, align 4 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %21 = getelementptr i8*, i8** %0, i64 4 - %22 = bitcast i8** %21 to i32** - %23 = load i32*, i32** %22, align 8 - %24 = load i32, i32* %23, align 4 - %mul.i.i.i = shl i64 %2, 8 - %cmp222.i.i = icmp sgt i32 %24, 0 - %25 = sext i32 %20 to i64 - %wide.trip.count.i.i = zext i32 %24 to i64 - br i1 %cmp222.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %pregion_for_entry.entry.i.i.preheader - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -pregion_for_entry.entry.i.i.preheader: ; preds = %5 - %div.i.i = fdiv float 0.000000e+00, %16 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert13 = insertelement <8 x i32> undef, i32 %20, i32 0 - %broadcast.splat14 = shufflevector <8 x i32> %broadcast.splatinsert13, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert15 = insertelement <8 x i32> undef, i32 %20, i32 0 - %broadcast.splat16 = shufflevector <8 x i32> %broadcast.splatinsert15, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert17 = insertelement <8 x float> undef, float %div.i.i, i32 0 - %broadcast.splat18 = shufflevector <8 x float> %broadcast.splatinsert17, <8 x float> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert19 = insertelement <8 x float> undef, float %div.i.i, i32 0 - %broadcast.splat20 = shufflevector <8 x float> %broadcast.splatinsert19, <8 x float> undef, <8 x i32> zeroinitializer - %26 = or <8 x i64> %broadcast.splat, - %27 = trunc <8 x i64> %26 to <8 x i32> - %28 = trunc i64 %mul.i.i.i to i32 - %29 = or i32 %28, 8 - %30 = insertelement <8 x i32> undef, i32 %29, i64 0 - %31 = shufflevector <8 x i32> %30, <8 x i32> undef, <8 x i32> zeroinitializer - %32 = or <8 x i32> %31, - %33 = icmp sgt <8 x i32> %broadcast.splat14, %27 - %34 = icmp sgt <8 x i32> %broadcast.splat16, %32 - %35 = extractelement <8 x i64> %26, i32 0 - %36 = shl i64 %35, 32 - %37 = ashr exact i64 %36, 32 - %38 = getelementptr inbounds float, float* %8, i64 %37 - %39 = bitcast float* %38 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %39, i32 4, <8 x i1> %33), !tbaa !12, !llvm.access.group !16 - %40 = getelementptr inbounds float, float* %38, i64 8 - %41 = bitcast float* %40 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %41, i32 4, <8 x i1> %34), !tbaa !12, !llvm.access.group !16 - %42 = or <8 x i64> %broadcast.splat, - %43 = trunc <8 x i64> %42 to <8 x i32> - %44 = trunc i64 %mul.i.i.i to i32 - %45 = or i32 %44, 8 - %46 = insertelement <8 x i32> undef, i32 %45, i64 0 - %47 = shufflevector <8 x i32> %46, <8 x i32> undef, <8 x i32> zeroinitializer - %48 = or <8 x i32> %47, - %49 = icmp sgt <8 x i32> %broadcast.splat14, %43 - %50 = icmp sgt <8 x i32> %broadcast.splat16, %48 - %51 = extractelement <8 x i64> %42, i32 0 - %52 = shl i64 %51, 32 - %53 = ashr exact i64 %52, 32 - %54 = getelementptr inbounds float, float* %8, i64 %53 - %55 = bitcast float* %54 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %55, i32 4, <8 x i1> %49), !tbaa !12, !llvm.access.group !16 - %56 = getelementptr inbounds float, float* %54, i64 8 - %57 = bitcast float* %56 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %57, i32 4, <8 x i1> %50), !tbaa !12, !llvm.access.group !16 - %58 = or <8 x i64> %broadcast.splat, - %59 = trunc <8 x i64> %58 to <8 x i32> - %60 = trunc i64 %mul.i.i.i to i32 - %61 = or i32 %60, 8 - %62 = insertelement <8 x i32> undef, i32 %61, i64 0 - %63 = shufflevector <8 x i32> %62, <8 x i32> undef, <8 x i32> zeroinitializer - %64 = or <8 x i32> %63, - %65 = icmp sgt <8 x i32> %broadcast.splat14, %59 - %66 = icmp sgt <8 x i32> %broadcast.splat16, %64 - %67 = extractelement <8 x i64> %58, i32 0 - %68 = shl i64 %67, 32 - %69 = ashr exact i64 %68, 32 - %70 = getelementptr inbounds float, float* %8, i64 %69 - %71 = bitcast float* %70 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %71, i32 4, <8 x i1> %65), !tbaa !12, !llvm.access.group !16 - %72 = getelementptr inbounds float, float* %70, i64 8 - %73 = bitcast float* %72 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %73, i32 4, <8 x i1> %66), !tbaa !12, !llvm.access.group !16 - %74 = or <8 x i64> %broadcast.splat, - %75 = trunc <8 x i64> %74 to <8 x i32> - %76 = trunc i64 %mul.i.i.i to i32 - %77 = or i32 %76, 8 - %78 = insertelement <8 x i32> undef, i32 %77, i64 0 - %79 = shufflevector <8 x i32> %78, <8 x i32> undef, <8 x i32> zeroinitializer - %80 = or <8 x i32> %79, - %81 = icmp sgt <8 x i32> %broadcast.splat14, %75 - %82 = icmp sgt <8 x i32> %broadcast.splat16, %80 - %83 = extractelement <8 x i64> %74, i32 0 - %84 = shl i64 %83, 32 - %85 = ashr exact i64 %84, 32 - %86 = getelementptr inbounds float, float* %8, i64 %85 - %87 = bitcast float* %86 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %87, i32 4, <8 x i1> %81), !tbaa !12, !llvm.access.group !16 - %88 = getelementptr inbounds float, float* %86, i64 8 - %89 = bitcast float* %88 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %89, i32 4, <8 x i1> %82), !tbaa !12, !llvm.access.group !16 - %90 = or <8 x i64> %broadcast.splat, - %91 = trunc <8 x i64> %90 to <8 x i32> - %92 = trunc i64 %mul.i.i.i to i32 - %93 = or i32 %92, 8 - %94 = insertelement <8 x i32> undef, i32 %93, i64 0 - %95 = shufflevector <8 x i32> %94, <8 x i32> undef, <8 x i32> zeroinitializer - %96 = or <8 x i32> %95, - %97 = icmp sgt <8 x i32> %broadcast.splat14, %91 - %98 = icmp sgt <8 x i32> %broadcast.splat16, %96 - %99 = extractelement <8 x i64> %90, i32 0 - %100 = shl i64 %99, 32 - %101 = ashr exact i64 %100, 32 - %102 = getelementptr inbounds float, float* %8, i64 %101 - %103 = bitcast float* %102 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %103, i32 4, <8 x i1> %97), !tbaa !12, !llvm.access.group !16 - %104 = getelementptr inbounds float, float* %102, i64 8 - %105 = bitcast float* %104 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %105, i32 4, <8 x i1> %98), !tbaa !12, !llvm.access.group !16 - %106 = or <8 x i64> %broadcast.splat, - %107 = trunc <8 x i64> %106 to <8 x i32> - %108 = trunc i64 %mul.i.i.i to i32 - %109 = or i32 %108, 8 - %110 = insertelement <8 x i32> undef, i32 %109, i64 0 - %111 = shufflevector <8 x i32> %110, <8 x i32> undef, <8 x i32> zeroinitializer - %112 = or <8 x i32> %111, - %113 = icmp sgt <8 x i32> %broadcast.splat14, %107 - %114 = icmp sgt <8 x i32> %broadcast.splat16, %112 - %115 = extractelement <8 x i64> %106, i32 0 - %116 = shl i64 %115, 32 - %117 = ashr exact i64 %116, 32 - %118 = getelementptr inbounds float, float* %8, i64 %117 - %119 = bitcast float* %118 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %119, i32 4, <8 x i1> %113), !tbaa !12, !llvm.access.group !16 - %120 = getelementptr inbounds float, float* %118, i64 8 - %121 = bitcast float* %120 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %121, i32 4, <8 x i1> %114), !tbaa !12, !llvm.access.group !16 - %122 = or <8 x i64> %broadcast.splat, - %123 = trunc <8 x i64> %122 to <8 x i32> - %124 = trunc i64 %mul.i.i.i to i32 - %125 = or i32 %124, 8 - %126 = insertelement <8 x i32> undef, i32 %125, i64 0 - %127 = shufflevector <8 x i32> %126, <8 x i32> undef, <8 x i32> zeroinitializer - %128 = or <8 x i32> %127, - %129 = icmp sgt <8 x i32> %broadcast.splat14, %123 - %130 = icmp sgt <8 x i32> %broadcast.splat16, %128 - %131 = extractelement <8 x i64> %122, i32 0 - %132 = shl i64 %131, 32 - %133 = ashr exact i64 %132, 32 - %134 = getelementptr inbounds float, float* %8, i64 %133 - %135 = bitcast float* %134 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %135, i32 4, <8 x i1> %129), !tbaa !12, !llvm.access.group !16 - %136 = getelementptr inbounds float, float* %134, i64 8 - %137 = bitcast float* %136 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %137, i32 4, <8 x i1> %130), !tbaa !12, !llvm.access.group !16 - %138 = or <8 x i64> %broadcast.splat, - %139 = trunc <8 x i64> %138 to <8 x i32> - %140 = trunc i64 %mul.i.i.i to i32 - %141 = or i32 %140, 8 - %142 = insertelement <8 x i32> undef, i32 %141, i64 0 - %143 = shufflevector <8 x i32> %142, <8 x i32> undef, <8 x i32> zeroinitializer - %144 = or <8 x i32> %143, - %145 = icmp sgt <8 x i32> %broadcast.splat14, %139 - %146 = icmp sgt <8 x i32> %broadcast.splat16, %144 - %147 = extractelement <8 x i64> %138, i32 0 - %148 = shl i64 %147, 32 - %149 = ashr exact i64 %148, 32 - %150 = getelementptr inbounds float, float* %8, i64 %149 - %151 = bitcast float* %150 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %151, i32 4, <8 x i1> %145), !tbaa !12, !llvm.access.group !16 - %152 = getelementptr inbounds float, float* %150, i64 8 - %153 = bitcast float* %152 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %153, i32 4, <8 x i1> %146), !tbaa !12, !llvm.access.group !16 - %154 = or <8 x i64> %broadcast.splat, - %155 = trunc <8 x i64> %154 to <8 x i32> - %156 = trunc i64 %mul.i.i.i to i32 - %157 = or i32 %156, 8 - %158 = insertelement <8 x i32> undef, i32 %157, i64 0 - %159 = shufflevector <8 x i32> %158, <8 x i32> undef, <8 x i32> zeroinitializer - %160 = or <8 x i32> %159, - %161 = icmp sgt <8 x i32> %broadcast.splat14, %155 - %162 = icmp sgt <8 x i32> %broadcast.splat16, %160 - %163 = extractelement <8 x i64> %154, i32 0 - %164 = shl i64 %163, 32 - %165 = ashr exact i64 %164, 32 - %166 = getelementptr inbounds float, float* %8, i64 %165 - %167 = bitcast float* %166 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %167, i32 4, <8 x i1> %161), !tbaa !12, !llvm.access.group !16 - %168 = getelementptr inbounds float, float* %166, i64 8 - %169 = bitcast float* %168 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %169, i32 4, <8 x i1> %162), !tbaa !12, !llvm.access.group !16 - %170 = or <8 x i64> %broadcast.splat, - %171 = trunc <8 x i64> %170 to <8 x i32> - %172 = trunc i64 %mul.i.i.i to i32 - %173 = or i32 %172, 8 - %174 = insertelement <8 x i32> undef, i32 %173, i64 0 - %175 = shufflevector <8 x i32> %174, <8 x i32> undef, <8 x i32> zeroinitializer - %176 = or <8 x i32> %175, - %177 = icmp sgt <8 x i32> %broadcast.splat14, %171 - %178 = icmp sgt <8 x i32> %broadcast.splat16, %176 - %179 = extractelement <8 x i64> %170, i32 0 - %180 = shl i64 %179, 32 - %181 = ashr exact i64 %180, 32 - %182 = getelementptr inbounds float, float* %8, i64 %181 - %183 = bitcast float* %182 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %183, i32 4, <8 x i1> %177), !tbaa !12, !llvm.access.group !16 - %184 = getelementptr inbounds float, float* %182, i64 8 - %185 = bitcast float* %184 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %185, i32 4, <8 x i1> %178), !tbaa !12, !llvm.access.group !16 - %186 = or <8 x i64> %broadcast.splat, - %187 = trunc <8 x i64> %186 to <8 x i32> - %188 = trunc i64 %mul.i.i.i to i32 - %189 = or i32 %188, 8 - %190 = insertelement <8 x i32> undef, i32 %189, i64 0 - %191 = shufflevector <8 x i32> %190, <8 x i32> undef, <8 x i32> zeroinitializer - %192 = or <8 x i32> %191, - %193 = icmp sgt <8 x i32> %broadcast.splat14, %187 - %194 = icmp sgt <8 x i32> %broadcast.splat16, %192 - %195 = extractelement <8 x i64> %186, i32 0 - %196 = shl i64 %195, 32 - %197 = ashr exact i64 %196, 32 - %198 = getelementptr inbounds float, float* %8, i64 %197 - %199 = bitcast float* %198 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %199, i32 4, <8 x i1> %193), !tbaa !12, !llvm.access.group !16 - %200 = getelementptr inbounds float, float* %198, i64 8 - %201 = bitcast float* %200 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %201, i32 4, <8 x i1> %194), !tbaa !12, !llvm.access.group !16 - %202 = or <8 x i64> %broadcast.splat, - %203 = trunc <8 x i64> %202 to <8 x i32> - %204 = trunc i64 %mul.i.i.i to i32 - %205 = or i32 %204, 8 - %206 = insertelement <8 x i32> undef, i32 %205, i64 0 - %207 = shufflevector <8 x i32> %206, <8 x i32> undef, <8 x i32> zeroinitializer - %208 = or <8 x i32> %207, - %209 = icmp sgt <8 x i32> %broadcast.splat14, %203 - %210 = icmp sgt <8 x i32> %broadcast.splat16, %208 - %211 = extractelement <8 x i64> %202, i32 0 - %212 = shl i64 %211, 32 - %213 = ashr exact i64 %212, 32 - %214 = getelementptr inbounds float, float* %8, i64 %213 - %215 = bitcast float* %214 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %215, i32 4, <8 x i1> %209), !tbaa !12, !llvm.access.group !16 - %216 = getelementptr inbounds float, float* %214, i64 8 - %217 = bitcast float* %216 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %217, i32 4, <8 x i1> %210), !tbaa !12, !llvm.access.group !16 - %218 = or <8 x i64> %broadcast.splat, - %219 = trunc <8 x i64> %218 to <8 x i32> - %220 = trunc i64 %mul.i.i.i to i32 - %221 = or i32 %220, 8 - %222 = insertelement <8 x i32> undef, i32 %221, i64 0 - %223 = shufflevector <8 x i32> %222, <8 x i32> undef, <8 x i32> zeroinitializer - %224 = or <8 x i32> %223, - %225 = icmp sgt <8 x i32> %broadcast.splat14, %219 - %226 = icmp sgt <8 x i32> %broadcast.splat16, %224 - %227 = extractelement <8 x i64> %218, i32 0 - %228 = shl i64 %227, 32 - %229 = ashr exact i64 %228, 32 - %230 = getelementptr inbounds float, float* %8, i64 %229 - %231 = bitcast float* %230 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %231, i32 4, <8 x i1> %225), !tbaa !12, !llvm.access.group !16 - %232 = getelementptr inbounds float, float* %230, i64 8 - %233 = bitcast float* %232 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %233, i32 4, <8 x i1> %226), !tbaa !12, !llvm.access.group !16 - %234 = or <8 x i64> %broadcast.splat, - %235 = trunc <8 x i64> %234 to <8 x i32> - %236 = trunc i64 %mul.i.i.i to i32 - %237 = or i32 %236, 8 - %238 = insertelement <8 x i32> undef, i32 %237, i64 0 - %239 = shufflevector <8 x i32> %238, <8 x i32> undef, <8 x i32> zeroinitializer - %240 = or <8 x i32> %239, - %241 = icmp sgt <8 x i32> %broadcast.splat14, %235 - %242 = icmp sgt <8 x i32> %broadcast.splat16, %240 - %243 = extractelement <8 x i64> %234, i32 0 - %244 = shl i64 %243, 32 - %245 = ashr exact i64 %244, 32 - %246 = getelementptr inbounds float, float* %8, i64 %245 - %247 = bitcast float* %246 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %247, i32 4, <8 x i1> %241), !tbaa !12, !llvm.access.group !16 - %248 = getelementptr inbounds float, float* %246, i64 8 - %249 = bitcast float* %248 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %249, i32 4, <8 x i1> %242), !tbaa !12, !llvm.access.group !16 - %250 = or <8 x i64> %broadcast.splat, - %251 = trunc <8 x i64> %250 to <8 x i32> - %252 = trunc i64 %mul.i.i.i to i32 - %253 = or i32 %252, 8 - %254 = insertelement <8 x i32> undef, i32 %253, i64 0 - %255 = shufflevector <8 x i32> %254, <8 x i32> undef, <8 x i32> zeroinitializer - %256 = or <8 x i32> %255, - %257 = icmp sgt <8 x i32> %broadcast.splat14, %251 - %258 = icmp sgt <8 x i32> %broadcast.splat16, %256 - %259 = extractelement <8 x i64> %250, i32 0 - %260 = shl i64 %259, 32 - %261 = ashr exact i64 %260, 32 - %262 = getelementptr inbounds float, float* %8, i64 %261 - %263 = bitcast float* %262 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %263, i32 4, <8 x i1> %257), !tbaa !12, !llvm.access.group !16 - %264 = getelementptr inbounds float, float* %262, i64 8 - %265 = bitcast float* %264 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %265, i32 4, <8 x i1> %258), !tbaa !12, !llvm.access.group !16 - %266 = or <8 x i64> %broadcast.splat, - %267 = trunc <8 x i64> %266 to <8 x i32> - %268 = trunc i64 %mul.i.i.i to i32 - %269 = or i32 %268, 8 - %270 = insertelement <8 x i32> undef, i32 %269, i64 0 - %271 = shufflevector <8 x i32> %270, <8 x i32> undef, <8 x i32> zeroinitializer - %272 = or <8 x i32> %271, - %273 = icmp sgt <8 x i32> %broadcast.splat14, %267 - %274 = icmp sgt <8 x i32> %broadcast.splat16, %272 - %275 = extractelement <8 x i64> %266, i32 0 - %276 = shl i64 %275, 32 - %277 = ashr exact i64 %276, 32 - %278 = getelementptr inbounds float, float* %8, i64 %277 - %279 = bitcast float* %278 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %279, i32 4, <8 x i1> %273), !tbaa !12, !llvm.access.group !16 - %280 = getelementptr inbounds float, float* %278, i64 8 - %281 = bitcast float* %280 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %281, i32 4, <8 x i1> %274), !tbaa !12, !llvm.access.group !16 - br label %_pocl_kernel_mean_kernel.exit - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %285, %if.end.r_exit.i.i.us ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %20, %conv.i.i.us - br i1 %cmp.i.i.us, label %if.then.i.i.us, label %if.end.r_exit.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom.i.i.us = ashr exact i64 %sext.i.i.us, 32 - %arrayidx.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us - store float 0.000000e+00, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %if.then.i.i.us - %indvars.iv.next.i.i5.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %if.then.i.i.us ] - %add8.i.i2.us = phi float [ %add8.i.i.us, %for.body.i.i.us ], [ 0.000000e+00, %if.then.i.i.us ] - %282 = mul nsw i64 %indvars.iv.next.i.i5.us, %25 - %283 = add nsw i64 %282, %idxprom.i.i.us - %arrayidx5.i.i.us = getelementptr inbounds float, float* %12, i64 %283 - %284 = load float, float* %arrayidx5.i.i.us, align 4, !tbaa !12 - %add8.i.i.us = fadd float %add8.i.i2.us, %284 - store float %add8.i.i.us, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i5.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us, label %for.end.loopexit.i.i.us, label %for.body.i.i.us, !llvm.loop !18 - -for.end.loopexit.i.i.us: ; preds = %for.body.i.i.us - %add8.i.i.us.lcssa = phi float [ %add8.i.i.us, %for.body.i.i.us ] - %div.i.i.us = fdiv float %add8.i.i.us.lcssa, %16, !fpmath !20 - store float %div.i.i.us, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %for.end.loopexit.i.i.us, %pregion_for_entry.entry.i.i.us - %285 = add nuw nsw i64 %_local_id_x.i.0.us, 1 - %exitcond.not = icmp eq i64 %285, 256 - br i1 %exitcond.not, label %_pocl_kernel_mean_kernel.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !21 - -_pocl_kernel_mean_kernel.exit.loopexit: ; preds = %if.end.r_exit.i.i.us - br label %_pocl_kernel_mean_kernel.exit - -_pocl_kernel_mean_kernel.exit: ; preds = %_pocl_kernel_mean_kernel.exit.loopexit, %pregion_for_entry.entry.i.i.preheader - ret void -} - -; Function Attrs: nounwind -define void @_pocl_kernel_mean_kernel_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to float** - %13 = load float*, float** %12, align 8 - %14 = load float, float* %13, align 4 - %15 = getelementptr i8*, i8** %0, i64 3 - %16 = bitcast i8** %15 to i32** - %17 = load i32*, i32** %16, align 8 - %18 = load i32, i32* %17, align 4 - %19 = getelementptr i8*, i8** %0, i64 4 - %20 = bitcast i8** %19 to i32** - %21 = load i32*, i32** %20, align 8 - %22 = load i32, i32* %21, align 4 - %mul.i.i.i = shl i64 %2, 8 - %cmp222.i.i = icmp sgt i32 %22, 0 - %23 = sext i32 %18 to i64 - %wide.trip.count.i.i = zext i32 %22 to i64 - br i1 %cmp222.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %pregion_for_entry.entry.i.i.preheader - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -pregion_for_entry.entry.i.i.preheader: ; preds = %5 - %div.i.i = fdiv float 0.000000e+00, %14 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert13 = insertelement <8 x i32> undef, i32 %18, i32 0 - %broadcast.splat14 = shufflevector <8 x i32> %broadcast.splatinsert13, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert15 = insertelement <8 x i32> undef, i32 %18, i32 0 - %broadcast.splat16 = shufflevector <8 x i32> %broadcast.splatinsert15, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert17 = insertelement <8 x float> undef, float %div.i.i, i32 0 - %broadcast.splat18 = shufflevector <8 x float> %broadcast.splatinsert17, <8 x float> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert19 = insertelement <8 x float> undef, float %div.i.i, i32 0 - %broadcast.splat20 = shufflevector <8 x float> %broadcast.splatinsert19, <8 x float> undef, <8 x i32> zeroinitializer - %24 = or <8 x i64> %broadcast.splat, - %25 = trunc <8 x i64> %24 to <8 x i32> - %26 = trunc i64 %mul.i.i.i to i32 - %27 = or i32 %26, 8 - %28 = insertelement <8 x i32> undef, i32 %27, i64 0 - %29 = shufflevector <8 x i32> %28, <8 x i32> undef, <8 x i32> zeroinitializer - %30 = or <8 x i32> %29, - %31 = icmp sgt <8 x i32> %broadcast.splat14, %25 - %32 = icmp sgt <8 x i32> %broadcast.splat16, %30 - %33 = extractelement <8 x i64> %24, i32 0 - %34 = shl i64 %33, 32 - %35 = ashr exact i64 %34, 32 - %36 = getelementptr inbounds float, float* %7, i64 %35 - %37 = bitcast float* %36 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %37, i32 4, <8 x i1> %31), !tbaa !12, !llvm.access.group !16 - %38 = getelementptr inbounds float, float* %36, i64 8 - %39 = bitcast float* %38 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %39, i32 4, <8 x i1> %32), !tbaa !12, !llvm.access.group !16 - %40 = or <8 x i64> %broadcast.splat, - %41 = trunc <8 x i64> %40 to <8 x i32> - %42 = trunc i64 %mul.i.i.i to i32 - %43 = or i32 %42, 8 - %44 = insertelement <8 x i32> undef, i32 %43, i64 0 - %45 = shufflevector <8 x i32> %44, <8 x i32> undef, <8 x i32> zeroinitializer - %46 = or <8 x i32> %45, - %47 = icmp sgt <8 x i32> %broadcast.splat14, %41 - %48 = icmp sgt <8 x i32> %broadcast.splat16, %46 - %49 = extractelement <8 x i64> %40, i32 0 - %50 = shl i64 %49, 32 - %51 = ashr exact i64 %50, 32 - %52 = getelementptr inbounds float, float* %7, i64 %51 - %53 = bitcast float* %52 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %53, i32 4, <8 x i1> %47), !tbaa !12, !llvm.access.group !16 - %54 = getelementptr inbounds float, float* %52, i64 8 - %55 = bitcast float* %54 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %55, i32 4, <8 x i1> %48), !tbaa !12, !llvm.access.group !16 - %56 = or <8 x i64> %broadcast.splat, - %57 = trunc <8 x i64> %56 to <8 x i32> - %58 = trunc i64 %mul.i.i.i to i32 - %59 = or i32 %58, 8 - %60 = insertelement <8 x i32> undef, i32 %59, i64 0 - %61 = shufflevector <8 x i32> %60, <8 x i32> undef, <8 x i32> zeroinitializer - %62 = or <8 x i32> %61, - %63 = icmp sgt <8 x i32> %broadcast.splat14, %57 - %64 = icmp sgt <8 x i32> %broadcast.splat16, %62 - %65 = extractelement <8 x i64> %56, i32 0 - %66 = shl i64 %65, 32 - %67 = ashr exact i64 %66, 32 - %68 = getelementptr inbounds float, float* %7, i64 %67 - %69 = bitcast float* %68 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %69, i32 4, <8 x i1> %63), !tbaa !12, !llvm.access.group !16 - %70 = getelementptr inbounds float, float* %68, i64 8 - %71 = bitcast float* %70 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %71, i32 4, <8 x i1> %64), !tbaa !12, !llvm.access.group !16 - %72 = or <8 x i64> %broadcast.splat, - %73 = trunc <8 x i64> %72 to <8 x i32> - %74 = trunc i64 %mul.i.i.i to i32 - %75 = or i32 %74, 8 - %76 = insertelement <8 x i32> undef, i32 %75, i64 0 - %77 = shufflevector <8 x i32> %76, <8 x i32> undef, <8 x i32> zeroinitializer - %78 = or <8 x i32> %77, - %79 = icmp sgt <8 x i32> %broadcast.splat14, %73 - %80 = icmp sgt <8 x i32> %broadcast.splat16, %78 - %81 = extractelement <8 x i64> %72, i32 0 - %82 = shl i64 %81, 32 - %83 = ashr exact i64 %82, 32 - %84 = getelementptr inbounds float, float* %7, i64 %83 - %85 = bitcast float* %84 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %85, i32 4, <8 x i1> %79), !tbaa !12, !llvm.access.group !16 - %86 = getelementptr inbounds float, float* %84, i64 8 - %87 = bitcast float* %86 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %87, i32 4, <8 x i1> %80), !tbaa !12, !llvm.access.group !16 - %88 = or <8 x i64> %broadcast.splat, - %89 = trunc <8 x i64> %88 to <8 x i32> - %90 = trunc i64 %mul.i.i.i to i32 - %91 = or i32 %90, 8 - %92 = insertelement <8 x i32> undef, i32 %91, i64 0 - %93 = shufflevector <8 x i32> %92, <8 x i32> undef, <8 x i32> zeroinitializer - %94 = or <8 x i32> %93, - %95 = icmp sgt <8 x i32> %broadcast.splat14, %89 - %96 = icmp sgt <8 x i32> %broadcast.splat16, %94 - %97 = extractelement <8 x i64> %88, i32 0 - %98 = shl i64 %97, 32 - %99 = ashr exact i64 %98, 32 - %100 = getelementptr inbounds float, float* %7, i64 %99 - %101 = bitcast float* %100 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %101, i32 4, <8 x i1> %95), !tbaa !12, !llvm.access.group !16 - %102 = getelementptr inbounds float, float* %100, i64 8 - %103 = bitcast float* %102 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %103, i32 4, <8 x i1> %96), !tbaa !12, !llvm.access.group !16 - %104 = or <8 x i64> %broadcast.splat, - %105 = trunc <8 x i64> %104 to <8 x i32> - %106 = trunc i64 %mul.i.i.i to i32 - %107 = or i32 %106, 8 - %108 = insertelement <8 x i32> undef, i32 %107, i64 0 - %109 = shufflevector <8 x i32> %108, <8 x i32> undef, <8 x i32> zeroinitializer - %110 = or <8 x i32> %109, - %111 = icmp sgt <8 x i32> %broadcast.splat14, %105 - %112 = icmp sgt <8 x i32> %broadcast.splat16, %110 - %113 = extractelement <8 x i64> %104, i32 0 - %114 = shl i64 %113, 32 - %115 = ashr exact i64 %114, 32 - %116 = getelementptr inbounds float, float* %7, i64 %115 - %117 = bitcast float* %116 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %117, i32 4, <8 x i1> %111), !tbaa !12, !llvm.access.group !16 - %118 = getelementptr inbounds float, float* %116, i64 8 - %119 = bitcast float* %118 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %119, i32 4, <8 x i1> %112), !tbaa !12, !llvm.access.group !16 - %120 = or <8 x i64> %broadcast.splat, - %121 = trunc <8 x i64> %120 to <8 x i32> - %122 = trunc i64 %mul.i.i.i to i32 - %123 = or i32 %122, 8 - %124 = insertelement <8 x i32> undef, i32 %123, i64 0 - %125 = shufflevector <8 x i32> %124, <8 x i32> undef, <8 x i32> zeroinitializer - %126 = or <8 x i32> %125, - %127 = icmp sgt <8 x i32> %broadcast.splat14, %121 - %128 = icmp sgt <8 x i32> %broadcast.splat16, %126 - %129 = extractelement <8 x i64> %120, i32 0 - %130 = shl i64 %129, 32 - %131 = ashr exact i64 %130, 32 - %132 = getelementptr inbounds float, float* %7, i64 %131 - %133 = bitcast float* %132 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %133, i32 4, <8 x i1> %127), !tbaa !12, !llvm.access.group !16 - %134 = getelementptr inbounds float, float* %132, i64 8 - %135 = bitcast float* %134 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %135, i32 4, <8 x i1> %128), !tbaa !12, !llvm.access.group !16 - %136 = or <8 x i64> %broadcast.splat, - %137 = trunc <8 x i64> %136 to <8 x i32> - %138 = trunc i64 %mul.i.i.i to i32 - %139 = or i32 %138, 8 - %140 = insertelement <8 x i32> undef, i32 %139, i64 0 - %141 = shufflevector <8 x i32> %140, <8 x i32> undef, <8 x i32> zeroinitializer - %142 = or <8 x i32> %141, - %143 = icmp sgt <8 x i32> %broadcast.splat14, %137 - %144 = icmp sgt <8 x i32> %broadcast.splat16, %142 - %145 = extractelement <8 x i64> %136, i32 0 - %146 = shl i64 %145, 32 - %147 = ashr exact i64 %146, 32 - %148 = getelementptr inbounds float, float* %7, i64 %147 - %149 = bitcast float* %148 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %149, i32 4, <8 x i1> %143), !tbaa !12, !llvm.access.group !16 - %150 = getelementptr inbounds float, float* %148, i64 8 - %151 = bitcast float* %150 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %151, i32 4, <8 x i1> %144), !tbaa !12, !llvm.access.group !16 - %152 = or <8 x i64> %broadcast.splat, - %153 = trunc <8 x i64> %152 to <8 x i32> - %154 = trunc i64 %mul.i.i.i to i32 - %155 = or i32 %154, 8 - %156 = insertelement <8 x i32> undef, i32 %155, i64 0 - %157 = shufflevector <8 x i32> %156, <8 x i32> undef, <8 x i32> zeroinitializer - %158 = or <8 x i32> %157, - %159 = icmp sgt <8 x i32> %broadcast.splat14, %153 - %160 = icmp sgt <8 x i32> %broadcast.splat16, %158 - %161 = extractelement <8 x i64> %152, i32 0 - %162 = shl i64 %161, 32 - %163 = ashr exact i64 %162, 32 - %164 = getelementptr inbounds float, float* %7, i64 %163 - %165 = bitcast float* %164 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %165, i32 4, <8 x i1> %159), !tbaa !12, !llvm.access.group !16 - %166 = getelementptr inbounds float, float* %164, i64 8 - %167 = bitcast float* %166 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %167, i32 4, <8 x i1> %160), !tbaa !12, !llvm.access.group !16 - %168 = or <8 x i64> %broadcast.splat, - %169 = trunc <8 x i64> %168 to <8 x i32> - %170 = trunc i64 %mul.i.i.i to i32 - %171 = or i32 %170, 8 - %172 = insertelement <8 x i32> undef, i32 %171, i64 0 - %173 = shufflevector <8 x i32> %172, <8 x i32> undef, <8 x i32> zeroinitializer - %174 = or <8 x i32> %173, - %175 = icmp sgt <8 x i32> %broadcast.splat14, %169 - %176 = icmp sgt <8 x i32> %broadcast.splat16, %174 - %177 = extractelement <8 x i64> %168, i32 0 - %178 = shl i64 %177, 32 - %179 = ashr exact i64 %178, 32 - %180 = getelementptr inbounds float, float* %7, i64 %179 - %181 = bitcast float* %180 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %181, i32 4, <8 x i1> %175), !tbaa !12, !llvm.access.group !16 - %182 = getelementptr inbounds float, float* %180, i64 8 - %183 = bitcast float* %182 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %183, i32 4, <8 x i1> %176), !tbaa !12, !llvm.access.group !16 - %184 = or <8 x i64> %broadcast.splat, - %185 = trunc <8 x i64> %184 to <8 x i32> - %186 = trunc i64 %mul.i.i.i to i32 - %187 = or i32 %186, 8 - %188 = insertelement <8 x i32> undef, i32 %187, i64 0 - %189 = shufflevector <8 x i32> %188, <8 x i32> undef, <8 x i32> zeroinitializer - %190 = or <8 x i32> %189, - %191 = icmp sgt <8 x i32> %broadcast.splat14, %185 - %192 = icmp sgt <8 x i32> %broadcast.splat16, %190 - %193 = extractelement <8 x i64> %184, i32 0 - %194 = shl i64 %193, 32 - %195 = ashr exact i64 %194, 32 - %196 = getelementptr inbounds float, float* %7, i64 %195 - %197 = bitcast float* %196 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %197, i32 4, <8 x i1> %191), !tbaa !12, !llvm.access.group !16 - %198 = getelementptr inbounds float, float* %196, i64 8 - %199 = bitcast float* %198 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %199, i32 4, <8 x i1> %192), !tbaa !12, !llvm.access.group !16 - %200 = or <8 x i64> %broadcast.splat, - %201 = trunc <8 x i64> %200 to <8 x i32> - %202 = trunc i64 %mul.i.i.i to i32 - %203 = or i32 %202, 8 - %204 = insertelement <8 x i32> undef, i32 %203, i64 0 - %205 = shufflevector <8 x i32> %204, <8 x i32> undef, <8 x i32> zeroinitializer - %206 = or <8 x i32> %205, - %207 = icmp sgt <8 x i32> %broadcast.splat14, %201 - %208 = icmp sgt <8 x i32> %broadcast.splat16, %206 - %209 = extractelement <8 x i64> %200, i32 0 - %210 = shl i64 %209, 32 - %211 = ashr exact i64 %210, 32 - %212 = getelementptr inbounds float, float* %7, i64 %211 - %213 = bitcast float* %212 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %213, i32 4, <8 x i1> %207), !tbaa !12, !llvm.access.group !16 - %214 = getelementptr inbounds float, float* %212, i64 8 - %215 = bitcast float* %214 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %215, i32 4, <8 x i1> %208), !tbaa !12, !llvm.access.group !16 - %216 = or <8 x i64> %broadcast.splat, - %217 = trunc <8 x i64> %216 to <8 x i32> - %218 = trunc i64 %mul.i.i.i to i32 - %219 = or i32 %218, 8 - %220 = insertelement <8 x i32> undef, i32 %219, i64 0 - %221 = shufflevector <8 x i32> %220, <8 x i32> undef, <8 x i32> zeroinitializer - %222 = or <8 x i32> %221, - %223 = icmp sgt <8 x i32> %broadcast.splat14, %217 - %224 = icmp sgt <8 x i32> %broadcast.splat16, %222 - %225 = extractelement <8 x i64> %216, i32 0 - %226 = shl i64 %225, 32 - %227 = ashr exact i64 %226, 32 - %228 = getelementptr inbounds float, float* %7, i64 %227 - %229 = bitcast float* %228 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %229, i32 4, <8 x i1> %223), !tbaa !12, !llvm.access.group !16 - %230 = getelementptr inbounds float, float* %228, i64 8 - %231 = bitcast float* %230 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %231, i32 4, <8 x i1> %224), !tbaa !12, !llvm.access.group !16 - %232 = or <8 x i64> %broadcast.splat, - %233 = trunc <8 x i64> %232 to <8 x i32> - %234 = trunc i64 %mul.i.i.i to i32 - %235 = or i32 %234, 8 - %236 = insertelement <8 x i32> undef, i32 %235, i64 0 - %237 = shufflevector <8 x i32> %236, <8 x i32> undef, <8 x i32> zeroinitializer - %238 = or <8 x i32> %237, - %239 = icmp sgt <8 x i32> %broadcast.splat14, %233 - %240 = icmp sgt <8 x i32> %broadcast.splat16, %238 - %241 = extractelement <8 x i64> %232, i32 0 - %242 = shl i64 %241, 32 - %243 = ashr exact i64 %242, 32 - %244 = getelementptr inbounds float, float* %7, i64 %243 - %245 = bitcast float* %244 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %245, i32 4, <8 x i1> %239), !tbaa !12, !llvm.access.group !16 - %246 = getelementptr inbounds float, float* %244, i64 8 - %247 = bitcast float* %246 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %247, i32 4, <8 x i1> %240), !tbaa !12, !llvm.access.group !16 - %248 = or <8 x i64> %broadcast.splat, - %249 = trunc <8 x i64> %248 to <8 x i32> - %250 = trunc i64 %mul.i.i.i to i32 - %251 = or i32 %250, 8 - %252 = insertelement <8 x i32> undef, i32 %251, i64 0 - %253 = shufflevector <8 x i32> %252, <8 x i32> undef, <8 x i32> zeroinitializer - %254 = or <8 x i32> %253, - %255 = icmp sgt <8 x i32> %broadcast.splat14, %249 - %256 = icmp sgt <8 x i32> %broadcast.splat16, %254 - %257 = extractelement <8 x i64> %248, i32 0 - %258 = shl i64 %257, 32 - %259 = ashr exact i64 %258, 32 - %260 = getelementptr inbounds float, float* %7, i64 %259 - %261 = bitcast float* %260 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %261, i32 4, <8 x i1> %255), !tbaa !12, !llvm.access.group !16 - %262 = getelementptr inbounds float, float* %260, i64 8 - %263 = bitcast float* %262 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %263, i32 4, <8 x i1> %256), !tbaa !12, !llvm.access.group !16 - %264 = or <8 x i64> %broadcast.splat, - %265 = trunc <8 x i64> %264 to <8 x i32> - %266 = trunc i64 %mul.i.i.i to i32 - %267 = or i32 %266, 8 - %268 = insertelement <8 x i32> undef, i32 %267, i64 0 - %269 = shufflevector <8 x i32> %268, <8 x i32> undef, <8 x i32> zeroinitializer - %270 = or <8 x i32> %269, - %271 = icmp sgt <8 x i32> %broadcast.splat14, %265 - %272 = icmp sgt <8 x i32> %broadcast.splat16, %270 - %273 = extractelement <8 x i64> %264, i32 0 - %274 = shl i64 %273, 32 - %275 = ashr exact i64 %274, 32 - %276 = getelementptr inbounds float, float* %7, i64 %275 - %277 = bitcast float* %276 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %277, i32 4, <8 x i1> %271), !tbaa !12, !llvm.access.group !16 - %278 = getelementptr inbounds float, float* %276, i64 8 - %279 = bitcast float* %278 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %279, i32 4, <8 x i1> %272), !tbaa !12, !llvm.access.group !16 - br label %_pocl_kernel_mean_kernel.exit - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %283, %if.end.r_exit.i.i.us ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %18, %conv.i.i.us - br i1 %cmp.i.i.us, label %if.then.i.i.us, label %if.end.r_exit.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom.i.i.us = ashr exact i64 %sext.i.i.us, 32 - %arrayidx.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us - store float 0.000000e+00, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %if.then.i.i.us - %indvars.iv.next.i.i5.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %if.then.i.i.us ] - %add8.i.i2.us = phi float [ %add8.i.i.us, %for.body.i.i.us ], [ 0.000000e+00, %if.then.i.i.us ] - %280 = mul nsw i64 %indvars.iv.next.i.i5.us, %23 - %281 = add nsw i64 %280, %idxprom.i.i.us - %arrayidx5.i.i.us = getelementptr inbounds float, float* %10, i64 %281 - %282 = load float, float* %arrayidx5.i.i.us, align 4, !tbaa !12 - %add8.i.i.us = fadd float %add8.i.i2.us, %282 - store float %add8.i.i.us, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i5.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us, label %for.end.loopexit.i.i.us, label %for.body.i.i.us, !llvm.loop !18 - -for.end.loopexit.i.i.us: ; preds = %for.body.i.i.us - %add8.i.i.us.lcssa = phi float [ %add8.i.i.us, %for.body.i.i.us ] - %div.i.i.us = fdiv float %add8.i.i.us.lcssa, %14, !fpmath !20 - store float %div.i.i.us, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %for.end.loopexit.i.i.us, %pregion_for_entry.entry.i.i.us - %283 = add nuw nsw i64 %_local_id_x.i.0.us, 1 - %exitcond.not = icmp eq i64 %283, 256 - br i1 %exitcond.not, label %_pocl_kernel_mean_kernel.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !21 - -_pocl_kernel_mean_kernel.exit.loopexit: ; preds = %if.end.r_exit.i.i.us - br label %_pocl_kernel_mean_kernel.exit - -_pocl_kernel_mean_kernel.exit: ; preds = %_pocl_kernel_mean_kernel.exit.loopexit, %pregion_for_entry.entry.i.i.preheader - ret void -} - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>) #2 - -attributes #0 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind } -attributes #2 = { argmemonly nounwind willreturn } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 0, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE", !"int", !"int"} -!8 = !{!"float*", !"float*", !"float", !"int", !"int"} -!9 = !{!"", !"", !"", !"", !""} -!10 = !{!"mean", !"data", !"float_n", !"m", !"n"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{} -!18 = distinct !{!18, !19} -!19 = !{!"llvm.loop.unroll.disable"} -!20 = !{float 2.500000e+00} -!21 = distinct !{!21, !22} -!22 = !{!"llvm.loop.parallel_accesses", !17} diff --git a/pocl_irs/correlation_reduce.ll b/pocl_irs/correlation_reduce.ll deleted file mode 100644 index be20c9c..0000000 --- a/pocl_irs/correlation_reduce.ll +++ /dev/null @@ -1,5335 +0,0 @@ -; ModuleID = './KK/KIGILFKIOCMFGODNJKKCPJJIDHNBMKHCFFGPF/reduce_kernel/32-8-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32(float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_reduce_kernel(float* nocapture readonly %0, float* nocapture readonly %1, float* nocapture %2, float %3, i32 %4, i32 %5, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %6, i64 %7, i64 %8, i64 %9) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { -pregion_for_entry.pregion_for_init.i: - %mul.i.i = shl i64 %7, 5 - %mul3.i.i = shl i64 %8, 3 - %10 = tail call float @llvm.sqrt.f32(float %3) #5 - %conv2.i = trunc i64 %mul3.i.i to i32 - %cmp.i = icmp slt i32 %conv2.i, %5 - %mul.i = mul nsw i32 %conv2.i, %4 - br i1 %cmp.i, label %vector.scevcheck, label %pregion_for_end.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i - %11 = trunc i64 %8 to i32 - %12 = mul i32 %11, %4 - %13 = shl i32 %12, 3 - %14 = trunc i64 %7 to i32 - %15 = shl i32 %14, 5 - %16 = add i32 %13, %15 - %17 = icmp sgt i32 %16, 2147483616 - br i1 %17, label %pregion_for_entry.entry.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.us - -vector.memcheck: ; preds = %vector.scevcheck - %18 = trunc i64 %7 to i32 - %19 = shl i32 %18, 5 - %20 = sext i32 %19 to i64 - %scevgep = getelementptr float, float* %0, i64 %20 - %21 = add nsw i64 %20, 32 - %scevgep4 = getelementptr float, float* %0, i64 %21 - %22 = trunc i64 %8 to i32 - %23 = mul i32 %22, %4 - %24 = shl i32 %23, 3 - %25 = add i32 %24, %19 - %26 = sext i32 %25 to i64 - %scevgep6 = getelementptr float, float* %2, i64 %26 - %27 = add nsw i64 %26, 32 - %scevgep8 = getelementptr float, float* %2, i64 %27 - %scevgep10 = getelementptr float, float* %1, i64 %20 - %scevgep12 = getelementptr float, float* %1, i64 %21 - %bound0 = icmp ult float* %scevgep, %scevgep8 - %bound1 = icmp ult float* %scevgep6, %scevgep4 - %found.conflict = and i1 %bound0, %bound1 - %bound014 = icmp ult float* %scevgep10, %scevgep8 - %bound115 = icmp ult float* %scevgep6, %scevgep12 - %found.conflict16 = and i1 %bound014, %bound115 - %conflict.rdx = or i1 %found.conflict, %found.conflict16 - br i1 %conflict.rdx, label %pregion_for_entry.entry.i.us.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert17 = insertelement <8 x i32> undef, i32 %4, i32 0 - %broadcast.splat18 = shufflevector <8 x i32> %broadcast.splatinsert17, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert21 = insertelement <8 x float> undef, float %10, i32 0 - %broadcast.splat22 = shufflevector <8 x float> %broadcast.splatinsert21, <8 x float> undef, <8 x i32> zeroinitializer - %28 = or <8 x i64> %broadcast.splat, - %29 = trunc <8 x i64> %28 to <8 x i32> - %30 = icmp sgt <8 x i32> %broadcast.splat18, %29 - %31 = extractelement <8 x i64> %28, i32 0 - %32 = shl i64 %31, 32 - %33 = ashr exact i64 %32, 32 - %34 = getelementptr inbounds float, float* %0, i64 %33 - %35 = bitcast float* %34 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %35, i32 4, <8 x i1> %30, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %36 = extractelement <8 x i32> %29, i32 0 - %37 = add nsw i32 %mul.i, %36 - %38 = sext i32 %37 to i64 - %39 = getelementptr inbounds float, float* %2, i64 %38 - %40 = bitcast float* %39 to <8 x float>* - %wide.masked.load19 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %40, i32 4, <8 x i1> %30, <8 x float> undef), !tbaa !12, !alias.scope !19 - %41 = fsub <8 x float> %wide.masked.load19, %wide.masked.load - %42 = bitcast float* %39 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %41, <8 x float>* %42, i32 4, <8 x i1> %30), !tbaa !12, !alias.scope !19, !llvm.access.group !21 - %43 = getelementptr inbounds float, float* %1, i64 %33 - %44 = bitcast float* %43 to <8 x float>* - %wide.masked.load20 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %44, i32 4, <8 x i1> %30, <8 x float> undef), !tbaa !12, !alias.scope !24, !noalias !19 - %45 = fmul <8 x float> %broadcast.splat22, %wide.masked.load20 - %46 = fdiv <8 x float> %41, %45, !fpmath !26 - %47 = bitcast float* %39 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %46, <8 x float>* %47, i32 4, <8 x i1> %30), !tbaa !12, !alias.scope !19, !llvm.access.group !21 - %48 = or <8 x i64> %broadcast.splat, - %49 = trunc <8 x i64> %48 to <8 x i32> - %50 = icmp sgt <8 x i32> %broadcast.splat18, %49 - %51 = extractelement <8 x i64> %48, i32 0 - %52 = shl i64 %51, 32 - %53 = ashr exact i64 %52, 32 - %54 = getelementptr inbounds float, float* %0, i64 %53 - %55 = bitcast float* %54 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %55, i32 4, <8 x i1> %50, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %56 = extractelement <8 x i32> %49, i32 0 - %57 = add nsw i32 %mul.i, %56 - %58 = sext i32 %57 to i64 - %59 = getelementptr inbounds float, float* %2, i64 %58 - %60 = bitcast float* %59 to <8 x float>* - %wide.masked.load19.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %60, i32 4, <8 x i1> %50, <8 x float> undef), !tbaa !12, !alias.scope !19 - %61 = fsub <8 x float> %wide.masked.load19.1, %wide.masked.load.1 - %62 = bitcast float* %59 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %61, <8 x float>* %62, i32 4, <8 x i1> %50), !tbaa !12, !alias.scope !19, !llvm.access.group !21 - %63 = getelementptr inbounds float, float* %1, i64 %53 - %64 = bitcast float* %63 to <8 x float>* - %wide.masked.load20.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %64, i32 4, <8 x i1> %50, <8 x float> undef), !tbaa !12, !alias.scope !24, !noalias !19 - %65 = fmul <8 x float> %broadcast.splat22, %wide.masked.load20.1 - %66 = fdiv <8 x float> %61, %65, !fpmath !26 - %67 = bitcast float* %59 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %66, <8 x float>* %67, i32 4, <8 x i1> %50), !tbaa !12, !alias.scope !19, !llvm.access.group !21 - %68 = or <8 x i64> %broadcast.splat, - %69 = trunc <8 x i64> %68 to <8 x i32> - %70 = icmp sgt <8 x i32> %broadcast.splat18, %69 - %71 = extractelement <8 x i64> %68, i32 0 - %72 = shl i64 %71, 32 - %73 = ashr exact i64 %72, 32 - %74 = getelementptr inbounds float, float* %0, i64 %73 - %75 = bitcast float* %74 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %75, i32 4, <8 x i1> %70, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %76 = extractelement <8 x i32> %69, i32 0 - %77 = add nsw i32 %mul.i, %76 - %78 = sext i32 %77 to i64 - %79 = getelementptr inbounds float, float* %2, i64 %78 - %80 = bitcast float* %79 to <8 x float>* - %wide.masked.load19.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %80, i32 4, <8 x i1> %70, <8 x float> undef), !tbaa !12, !alias.scope !19 - %81 = fsub <8 x float> %wide.masked.load19.2, %wide.masked.load.2 - %82 = bitcast float* %79 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %81, <8 x float>* %82, i32 4, <8 x i1> %70), !tbaa !12, !alias.scope !19, !llvm.access.group !21 - %83 = getelementptr inbounds float, float* %1, i64 %73 - %84 = bitcast float* %83 to <8 x float>* - %wide.masked.load20.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %84, i32 4, <8 x i1> %70, <8 x float> undef), !tbaa !12, !alias.scope !24, !noalias !19 - %85 = fmul <8 x float> %broadcast.splat22, %wide.masked.load20.2 - %86 = fdiv <8 x float> %81, %85, !fpmath !26 - %87 = bitcast float* %79 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %86, <8 x float>* %87, i32 4, <8 x i1> %70), !tbaa !12, !alias.scope !19, !llvm.access.group !21 - %88 = or <8 x i64> %broadcast.splat, - %89 = trunc <8 x i64> %88 to <8 x i32> - %90 = icmp sgt <8 x i32> %broadcast.splat18, %89 - %91 = extractelement <8 x i64> %88, i32 0 - %92 = shl i64 %91, 32 - %93 = ashr exact i64 %92, 32 - %94 = getelementptr inbounds float, float* %0, i64 %93 - %95 = bitcast float* %94 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %95, i32 4, <8 x i1> %90, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %96 = extractelement <8 x i32> %89, i32 0 - %97 = add nsw i32 %mul.i, %96 - %98 = sext i32 %97 to i64 - %99 = getelementptr inbounds float, float* %2, i64 %98 - %100 = bitcast float* %99 to <8 x float>* - %wide.masked.load19.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %100, i32 4, <8 x i1> %90, <8 x float> undef), !tbaa !12, !alias.scope !19 - %101 = fsub <8 x float> %wide.masked.load19.3, %wide.masked.load.3 - %102 = bitcast float* %99 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %101, <8 x float>* %102, i32 4, <8 x i1> %90), !tbaa !12, !alias.scope !19, !llvm.access.group !21 - %103 = getelementptr inbounds float, float* %1, i64 %93 - %104 = bitcast float* %103 to <8 x float>* - %wide.masked.load20.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %104, i32 4, <8 x i1> %90, <8 x float> undef), !tbaa !12, !alias.scope !24, !noalias !19 - %105 = fmul <8 x float> %broadcast.splat22, %wide.masked.load20.3 - %106 = fdiv <8 x float> %101, %105, !fpmath !26 - %107 = bitcast float* %99 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %106, <8 x float>* %107, i32 4, <8 x i1> %90), !tbaa !12, !alias.scope !19, !llvm.access.group !21 - br label %pregion_for_end.i - -pregion_for_entry.entry.i.us: ; preds = %if.end.r_exit.i.us.1369, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ 0, %pregion_for_entry.entry.i.us.preheader ], [ %829, %if.end.r_exit.i.us.1369 ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp4.i.us = icmp slt i32 %conv.i.us, %4 - br i1 %cmp4.i.us, label %if.then.i.us, label %if.end.r_exit.i.us - -if.then.i.us: ; preds = %pregion_for_entry.entry.i.us - %sext.i.us = shl i64 %add1.i.i.us, 32 - %idxprom.i.us = ashr exact i64 %sext.i.us, 32 - %arrayidx.i.us = getelementptr inbounds float, float* %0, i64 %idxprom.i.us - %108 = load float, float* %arrayidx.i.us, align 4, !tbaa !12 - %add.i.us = add nsw i32 %mul.i, %conv.i.us - %idxprom6.i.us = sext i32 %add.i.us to i64 - %arrayidx7.i.us = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us - %109 = load float, float* %arrayidx7.i.us, align 4, !tbaa !12 - %sub.i.us = fsub float %109, %108 - store float %sub.i.us, float* %arrayidx7.i.us, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.us = getelementptr inbounds float, float* %1, i64 %idxprom.i.us - %110 = load float, float* %arrayidx10.i.us, align 4, !tbaa !12 - %mul11.i.us = fmul float %10, %110 - %div.i.us = fdiv float %sub.i.us, %mul11.i.us, !fpmath !26 - store float %div.i.us, float* %arrayidx7.i.us, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us - -if.end.r_exit.i.us: ; preds = %if.then.i.us, %pregion_for_entry.entry.i.us - %111 = or i64 %_local_id_x.0.us, 1 - %add1.i.i.us.1354 = add nuw nsw i64 %111, %mul.i.i - %conv.i.us.1355 = trunc i64 %add1.i.i.us.1354 to i32 - %cmp4.i.us.1356 = icmp slt i32 %conv.i.us.1355, %4 - br i1 %cmp4.i.us.1356, label %if.then.i.us.1368, label %if.end.r_exit.i.us.1369 - -pregion_for_end.i.loopexit: ; preds = %if.end.r_exit.i.us.1369 - br label %pregion_for_end.i - -pregion_for_end.i: ; preds = %pregion_for_end.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i - %112 = trunc i64 %mul3.i.i to i32 - %conv2.i.1 = or i32 %112, 1 - %cmp.i.1 = icmp slt i32 %conv2.i.1, %5 - %mul.i.1 = mul nsw i32 %conv2.i.1, %4 - br i1 %cmp.i.1, label %vector.scevcheck30, label %pregion_for_end.i.1 - -vector.scevcheck30: ; preds = %pregion_for_end.i - %113 = mul i32 %conv2.i.1, %4 - %114 = trunc i64 %7 to i32 - %115 = shl i32 %114, 5 - %116 = add i32 %113, %115 - %117 = icmp sgt i32 %116, 2147483616 - br i1 %117, label %pregion_for_entry.entry.i.us.1.preheader, label %vector.memcheck52 - -pregion_for_entry.entry.i.us.1.preheader: ; preds = %vector.memcheck52, %vector.scevcheck30 - br label %pregion_for_entry.entry.i.us.1 - -vector.memcheck52: ; preds = %vector.scevcheck30 - %118 = trunc i64 %7 to i32 - %119 = shl i32 %118, 5 - %120 = sext i32 %119 to i64 - %scevgep32 = getelementptr float, float* %0, i64 %120 - %121 = add nsw i64 %120, 32 - %scevgep34 = getelementptr float, float* %0, i64 %121 - %122 = mul i32 %conv2.i.1, %4 - %123 = add i32 %122, %119 - %124 = sext i32 %123 to i64 - %scevgep36 = getelementptr float, float* %2, i64 %124 - %125 = add nsw i64 %124, 32 - %scevgep38 = getelementptr float, float* %2, i64 %125 - %scevgep40 = getelementptr float, float* %1, i64 %120 - %scevgep42 = getelementptr float, float* %1, i64 %121 - %bound044 = icmp ult float* %scevgep32, %scevgep38 - %bound145 = icmp ult float* %scevgep36, %scevgep34 - %found.conflict46 = and i1 %bound044, %bound145 - %bound047 = icmp ult float* %scevgep40, %scevgep38 - %bound148 = icmp ult float* %scevgep36, %scevgep42 - %found.conflict49 = and i1 %bound047, %bound148 - %conflict.rdx50 = or i1 %found.conflict46, %found.conflict49 - br i1 %conflict.rdx50, label %pregion_for_entry.entry.i.us.1.preheader, label %vector.ph53 - -vector.ph53: ; preds = %vector.memcheck52 - %broadcast.splatinsert60 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat61 = shufflevector <8 x i64> %broadcast.splatinsert60, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert62 = insertelement <8 x i32> undef, i32 %4, i32 0 - %broadcast.splat63 = shufflevector <8 x i32> %broadcast.splatinsert62, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert67 = insertelement <8 x float> undef, float %10, i32 0 - %broadcast.splat68 = shufflevector <8 x float> %broadcast.splatinsert67, <8 x float> undef, <8 x i32> zeroinitializer - %126 = or <8 x i64> %broadcast.splat61, - %127 = trunc <8 x i64> %126 to <8 x i32> - %128 = icmp sgt <8 x i32> %broadcast.splat63, %127 - %129 = extractelement <8 x i64> %126, i32 0 - %130 = shl i64 %129, 32 - %131 = ashr exact i64 %130, 32 - %132 = getelementptr inbounds float, float* %0, i64 %131 - %133 = bitcast float* %132 to <8 x float>* - %wide.masked.load64 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %133, i32 4, <8 x i1> %128, <8 x float> undef), !tbaa !12, !alias.scope !27, !noalias !30 - %134 = extractelement <8 x i32> %127, i32 0 - %135 = add nsw i32 %mul.i.1, %134 - %136 = sext i32 %135 to i64 - %137 = getelementptr inbounds float, float* %2, i64 %136 - %138 = bitcast float* %137 to <8 x float>* - %wide.masked.load65 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %138, i32 4, <8 x i1> %128, <8 x float> undef), !tbaa !12, !alias.scope !30 - %139 = fsub <8 x float> %wide.masked.load65, %wide.masked.load64 - %140 = bitcast float* %137 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %139, <8 x float>* %140, i32 4, <8 x i1> %128), !tbaa !12, !alias.scope !30, !llvm.access.group !21 - %141 = getelementptr inbounds float, float* %1, i64 %131 - %142 = bitcast float* %141 to <8 x float>* - %wide.masked.load66 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %142, i32 4, <8 x i1> %128, <8 x float> undef), !tbaa !12, !alias.scope !32, !noalias !30 - %143 = fmul <8 x float> %broadcast.splat68, %wide.masked.load66 - %144 = fdiv <8 x float> %139, %143, !fpmath !26 - %145 = bitcast float* %137 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %144, <8 x float>* %145, i32 4, <8 x i1> %128), !tbaa !12, !alias.scope !30, !llvm.access.group !21 - %146 = or <8 x i64> %broadcast.splat61, - %147 = trunc <8 x i64> %146 to <8 x i32> - %148 = icmp sgt <8 x i32> %broadcast.splat63, %147 - %149 = extractelement <8 x i64> %146, i32 0 - %150 = shl i64 %149, 32 - %151 = ashr exact i64 %150, 32 - %152 = getelementptr inbounds float, float* %0, i64 %151 - %153 = bitcast float* %152 to <8 x float>* - %wide.masked.load64.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %153, i32 4, <8 x i1> %148, <8 x float> undef), !tbaa !12, !alias.scope !27, !noalias !30 - %154 = extractelement <8 x i32> %147, i32 0 - %155 = add nsw i32 %mul.i.1, %154 - %156 = sext i32 %155 to i64 - %157 = getelementptr inbounds float, float* %2, i64 %156 - %158 = bitcast float* %157 to <8 x float>* - %wide.masked.load65.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %158, i32 4, <8 x i1> %148, <8 x float> undef), !tbaa !12, !alias.scope !30 - %159 = fsub <8 x float> %wide.masked.load65.1, %wide.masked.load64.1 - %160 = bitcast float* %157 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %159, <8 x float>* %160, i32 4, <8 x i1> %148), !tbaa !12, !alias.scope !30, !llvm.access.group !21 - %161 = getelementptr inbounds float, float* %1, i64 %151 - %162 = bitcast float* %161 to <8 x float>* - %wide.masked.load66.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %162, i32 4, <8 x i1> %148, <8 x float> undef), !tbaa !12, !alias.scope !32, !noalias !30 - %163 = fmul <8 x float> %broadcast.splat68, %wide.masked.load66.1 - %164 = fdiv <8 x float> %159, %163, !fpmath !26 - %165 = bitcast float* %157 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %164, <8 x float>* %165, i32 4, <8 x i1> %148), !tbaa !12, !alias.scope !30, !llvm.access.group !21 - %166 = or <8 x i64> %broadcast.splat61, - %167 = trunc <8 x i64> %166 to <8 x i32> - %168 = icmp sgt <8 x i32> %broadcast.splat63, %167 - %169 = extractelement <8 x i64> %166, i32 0 - %170 = shl i64 %169, 32 - %171 = ashr exact i64 %170, 32 - %172 = getelementptr inbounds float, float* %0, i64 %171 - %173 = bitcast float* %172 to <8 x float>* - %wide.masked.load64.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %173, i32 4, <8 x i1> %168, <8 x float> undef), !tbaa !12, !alias.scope !27, !noalias !30 - %174 = extractelement <8 x i32> %167, i32 0 - %175 = add nsw i32 %mul.i.1, %174 - %176 = sext i32 %175 to i64 - %177 = getelementptr inbounds float, float* %2, i64 %176 - %178 = bitcast float* %177 to <8 x float>* - %wide.masked.load65.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %178, i32 4, <8 x i1> %168, <8 x float> undef), !tbaa !12, !alias.scope !30 - %179 = fsub <8 x float> %wide.masked.load65.2, %wide.masked.load64.2 - %180 = bitcast float* %177 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %179, <8 x float>* %180, i32 4, <8 x i1> %168), !tbaa !12, !alias.scope !30, !llvm.access.group !21 - %181 = getelementptr inbounds float, float* %1, i64 %171 - %182 = bitcast float* %181 to <8 x float>* - %wide.masked.load66.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %182, i32 4, <8 x i1> %168, <8 x float> undef), !tbaa !12, !alias.scope !32, !noalias !30 - %183 = fmul <8 x float> %broadcast.splat68, %wide.masked.load66.2 - %184 = fdiv <8 x float> %179, %183, !fpmath !26 - %185 = bitcast float* %177 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %184, <8 x float>* %185, i32 4, <8 x i1> %168), !tbaa !12, !alias.scope !30, !llvm.access.group !21 - %186 = or <8 x i64> %broadcast.splat61, - %187 = trunc <8 x i64> %186 to <8 x i32> - %188 = icmp sgt <8 x i32> %broadcast.splat63, %187 - %189 = extractelement <8 x i64> %186, i32 0 - %190 = shl i64 %189, 32 - %191 = ashr exact i64 %190, 32 - %192 = getelementptr inbounds float, float* %0, i64 %191 - %193 = bitcast float* %192 to <8 x float>* - %wide.masked.load64.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %193, i32 4, <8 x i1> %188, <8 x float> undef), !tbaa !12, !alias.scope !27, !noalias !30 - %194 = extractelement <8 x i32> %187, i32 0 - %195 = add nsw i32 %mul.i.1, %194 - %196 = sext i32 %195 to i64 - %197 = getelementptr inbounds float, float* %2, i64 %196 - %198 = bitcast float* %197 to <8 x float>* - %wide.masked.load65.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %198, i32 4, <8 x i1> %188, <8 x float> undef), !tbaa !12, !alias.scope !30 - %199 = fsub <8 x float> %wide.masked.load65.3, %wide.masked.load64.3 - %200 = bitcast float* %197 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %199, <8 x float>* %200, i32 4, <8 x i1> %188), !tbaa !12, !alias.scope !30, !llvm.access.group !21 - %201 = getelementptr inbounds float, float* %1, i64 %191 - %202 = bitcast float* %201 to <8 x float>* - %wide.masked.load66.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %202, i32 4, <8 x i1> %188, <8 x float> undef), !tbaa !12, !alias.scope !32, !noalias !30 - %203 = fmul <8 x float> %broadcast.splat68, %wide.masked.load66.3 - %204 = fdiv <8 x float> %199, %203, !fpmath !26 - %205 = bitcast float* %197 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %204, <8 x float>* %205, i32 4, <8 x i1> %188), !tbaa !12, !alias.scope !30, !llvm.access.group !21 - br label %pregion_for_end.i.1 - -pregion_for_entry.entry.i.us.1: ; preds = %if.end.r_exit.i.us.1.1, %pregion_for_entry.entry.i.us.1.preheader - %_local_id_x.0.us.1 = phi i64 [ 0, %pregion_for_entry.entry.i.us.1.preheader ], [ %825, %if.end.r_exit.i.us.1.1 ] - %add1.i.i.us.1 = add nuw nsw i64 %_local_id_x.0.us.1, %mul.i.i - %conv.i.us.1 = trunc i64 %add1.i.i.us.1 to i32 - %cmp4.i.us.1 = icmp slt i32 %conv.i.us.1, %4 - br i1 %cmp4.i.us.1, label %if.then.i.us.1, label %if.end.r_exit.i.us.1 - -if.then.i.us.1: ; preds = %pregion_for_entry.entry.i.us.1 - %sext.i.us.1 = shl i64 %add1.i.i.us.1, 32 - %idxprom.i.us.1 = ashr exact i64 %sext.i.us.1, 32 - %arrayidx.i.us.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.1 - %206 = load float, float* %arrayidx.i.us.1, align 4, !tbaa !12 - %add.i.us.1 = add nsw i32 %mul.i.1, %conv.i.us.1 - %idxprom6.i.us.1 = sext i32 %add.i.us.1 to i64 - %arrayidx7.i.us.1 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.1 - %207 = load float, float* %arrayidx7.i.us.1, align 4, !tbaa !12 - %sub.i.us.1 = fsub float %207, %206 - store float %sub.i.us.1, float* %arrayidx7.i.us.1, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.us.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.1 - %208 = load float, float* %arrayidx10.i.us.1, align 4, !tbaa !12 - %mul11.i.us.1 = fmul float %10, %208 - %div.i.us.1 = fdiv float %sub.i.us.1, %mul11.i.us.1, !fpmath !26 - store float %div.i.us.1, float* %arrayidx7.i.us.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.1 - -if.end.r_exit.i.us.1: ; preds = %if.then.i.us.1, %pregion_for_entry.entry.i.us.1 - %209 = or i64 %_local_id_x.0.us.1, 1 - %add1.i.i.us.1.1 = add nuw nsw i64 %209, %mul.i.i - %conv.i.us.1.1 = trunc i64 %add1.i.i.us.1.1 to i32 - %cmp4.i.us.1.1 = icmp slt i32 %conv.i.us.1.1, %4 - br i1 %cmp4.i.us.1.1, label %if.then.i.us.1.1, label %if.end.r_exit.i.us.1.1 - -pregion_for_end.i.1.loopexit: ; preds = %if.end.r_exit.i.us.1.1 - br label %pregion_for_end.i.1 - -pregion_for_end.i.1: ; preds = %pregion_for_end.i.1.loopexit, %vector.ph53, %pregion_for_end.i - %210 = trunc i64 %mul3.i.i to i32 - %conv2.i.2 = or i32 %210, 2 - %cmp.i.2 = icmp slt i32 %conv2.i.2, %5 - %mul.i.2 = mul nsw i32 %conv2.i.2, %4 - br i1 %cmp.i.2, label %vector.scevcheck76, label %pregion_for_end.i.2 - -vector.scevcheck76: ; preds = %pregion_for_end.i.1 - %211 = mul i32 %conv2.i.2, %4 - %212 = trunc i64 %7 to i32 - %213 = shl i32 %212, 5 - %214 = add i32 %211, %213 - %215 = icmp sgt i32 %214, 2147483616 - br i1 %215, label %pregion_for_entry.entry.i.us.2.preheader, label %vector.memcheck98 - -pregion_for_entry.entry.i.us.2.preheader: ; preds = %vector.memcheck98, %vector.scevcheck76 - br label %pregion_for_entry.entry.i.us.2 - -vector.memcheck98: ; preds = %vector.scevcheck76 - %216 = trunc i64 %7 to i32 - %217 = shl i32 %216, 5 - %218 = sext i32 %217 to i64 - %scevgep78 = getelementptr float, float* %0, i64 %218 - %219 = add nsw i64 %218, 32 - %scevgep80 = getelementptr float, float* %0, i64 %219 - %220 = mul i32 %conv2.i.2, %4 - %221 = add i32 %220, %217 - %222 = sext i32 %221 to i64 - %scevgep82 = getelementptr float, float* %2, i64 %222 - %223 = add nsw i64 %222, 32 - %scevgep84 = getelementptr float, float* %2, i64 %223 - %scevgep86 = getelementptr float, float* %1, i64 %218 - %scevgep88 = getelementptr float, float* %1, i64 %219 - %bound090 = icmp ult float* %scevgep78, %scevgep84 - %bound191 = icmp ult float* %scevgep82, %scevgep80 - %found.conflict92 = and i1 %bound090, %bound191 - %bound093 = icmp ult float* %scevgep86, %scevgep84 - %bound194 = icmp ult float* %scevgep82, %scevgep88 - %found.conflict95 = and i1 %bound093, %bound194 - %conflict.rdx96 = or i1 %found.conflict92, %found.conflict95 - br i1 %conflict.rdx96, label %pregion_for_entry.entry.i.us.2.preheader, label %vector.ph99 - -vector.ph99: ; preds = %vector.memcheck98 - %broadcast.splatinsert106 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat107 = shufflevector <8 x i64> %broadcast.splatinsert106, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert108 = insertelement <8 x i32> undef, i32 %4, i32 0 - %broadcast.splat109 = shufflevector <8 x i32> %broadcast.splatinsert108, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert113 = insertelement <8 x float> undef, float %10, i32 0 - %broadcast.splat114 = shufflevector <8 x float> %broadcast.splatinsert113, <8 x float> undef, <8 x i32> zeroinitializer - %224 = or <8 x i64> %broadcast.splat107, - %225 = trunc <8 x i64> %224 to <8 x i32> - %226 = icmp sgt <8 x i32> %broadcast.splat109, %225 - %227 = extractelement <8 x i64> %224, i32 0 - %228 = shl i64 %227, 32 - %229 = ashr exact i64 %228, 32 - %230 = getelementptr inbounds float, float* %0, i64 %229 - %231 = bitcast float* %230 to <8 x float>* - %wide.masked.load110 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %231, i32 4, <8 x i1> %226, <8 x float> undef), !tbaa !12, !alias.scope !34, !noalias !37 - %232 = extractelement <8 x i32> %225, i32 0 - %233 = add nsw i32 %mul.i.2, %232 - %234 = sext i32 %233 to i64 - %235 = getelementptr inbounds float, float* %2, i64 %234 - %236 = bitcast float* %235 to <8 x float>* - %wide.masked.load111 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %236, i32 4, <8 x i1> %226, <8 x float> undef), !tbaa !12, !alias.scope !37 - %237 = fsub <8 x float> %wide.masked.load111, %wide.masked.load110 - %238 = bitcast float* %235 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %237, <8 x float>* %238, i32 4, <8 x i1> %226), !tbaa !12, !alias.scope !37, !llvm.access.group !21 - %239 = getelementptr inbounds float, float* %1, i64 %229 - %240 = bitcast float* %239 to <8 x float>* - %wide.masked.load112 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %240, i32 4, <8 x i1> %226, <8 x float> undef), !tbaa !12, !alias.scope !39, !noalias !37 - %241 = fmul <8 x float> %broadcast.splat114, %wide.masked.load112 - %242 = fdiv <8 x float> %237, %241, !fpmath !26 - %243 = bitcast float* %235 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %242, <8 x float>* %243, i32 4, <8 x i1> %226), !tbaa !12, !alias.scope !37, !llvm.access.group !21 - %244 = or <8 x i64> %broadcast.splat107, - %245 = trunc <8 x i64> %244 to <8 x i32> - %246 = icmp sgt <8 x i32> %broadcast.splat109, %245 - %247 = extractelement <8 x i64> %244, i32 0 - %248 = shl i64 %247, 32 - %249 = ashr exact i64 %248, 32 - %250 = getelementptr inbounds float, float* %0, i64 %249 - %251 = bitcast float* %250 to <8 x float>* - %wide.masked.load110.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %251, i32 4, <8 x i1> %246, <8 x float> undef), !tbaa !12, !alias.scope !34, !noalias !37 - %252 = extractelement <8 x i32> %245, i32 0 - %253 = add nsw i32 %mul.i.2, %252 - %254 = sext i32 %253 to i64 - %255 = getelementptr inbounds float, float* %2, i64 %254 - %256 = bitcast float* %255 to <8 x float>* - %wide.masked.load111.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %256, i32 4, <8 x i1> %246, <8 x float> undef), !tbaa !12, !alias.scope !37 - %257 = fsub <8 x float> %wide.masked.load111.1, %wide.masked.load110.1 - %258 = bitcast float* %255 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %257, <8 x float>* %258, i32 4, <8 x i1> %246), !tbaa !12, !alias.scope !37, !llvm.access.group !21 - %259 = getelementptr inbounds float, float* %1, i64 %249 - %260 = bitcast float* %259 to <8 x float>* - %wide.masked.load112.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %260, i32 4, <8 x i1> %246, <8 x float> undef), !tbaa !12, !alias.scope !39, !noalias !37 - %261 = fmul <8 x float> %broadcast.splat114, %wide.masked.load112.1 - %262 = fdiv <8 x float> %257, %261, !fpmath !26 - %263 = bitcast float* %255 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %262, <8 x float>* %263, i32 4, <8 x i1> %246), !tbaa !12, !alias.scope !37, !llvm.access.group !21 - %264 = or <8 x i64> %broadcast.splat107, - %265 = trunc <8 x i64> %264 to <8 x i32> - %266 = icmp sgt <8 x i32> %broadcast.splat109, %265 - %267 = extractelement <8 x i64> %264, i32 0 - %268 = shl i64 %267, 32 - %269 = ashr exact i64 %268, 32 - %270 = getelementptr inbounds float, float* %0, i64 %269 - %271 = bitcast float* %270 to <8 x float>* - %wide.masked.load110.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %271, i32 4, <8 x i1> %266, <8 x float> undef), !tbaa !12, !alias.scope !34, !noalias !37 - %272 = extractelement <8 x i32> %265, i32 0 - %273 = add nsw i32 %mul.i.2, %272 - %274 = sext i32 %273 to i64 - %275 = getelementptr inbounds float, float* %2, i64 %274 - %276 = bitcast float* %275 to <8 x float>* - %wide.masked.load111.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %276, i32 4, <8 x i1> %266, <8 x float> undef), !tbaa !12, !alias.scope !37 - %277 = fsub <8 x float> %wide.masked.load111.2, %wide.masked.load110.2 - %278 = bitcast float* %275 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %277, <8 x float>* %278, i32 4, <8 x i1> %266), !tbaa !12, !alias.scope !37, !llvm.access.group !21 - %279 = getelementptr inbounds float, float* %1, i64 %269 - %280 = bitcast float* %279 to <8 x float>* - %wide.masked.load112.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %280, i32 4, <8 x i1> %266, <8 x float> undef), !tbaa !12, !alias.scope !39, !noalias !37 - %281 = fmul <8 x float> %broadcast.splat114, %wide.masked.load112.2 - %282 = fdiv <8 x float> %277, %281, !fpmath !26 - %283 = bitcast float* %275 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %282, <8 x float>* %283, i32 4, <8 x i1> %266), !tbaa !12, !alias.scope !37, !llvm.access.group !21 - %284 = or <8 x i64> %broadcast.splat107, - %285 = trunc <8 x i64> %284 to <8 x i32> - %286 = icmp sgt <8 x i32> %broadcast.splat109, %285 - %287 = extractelement <8 x i64> %284, i32 0 - %288 = shl i64 %287, 32 - %289 = ashr exact i64 %288, 32 - %290 = getelementptr inbounds float, float* %0, i64 %289 - %291 = bitcast float* %290 to <8 x float>* - %wide.masked.load110.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %291, i32 4, <8 x i1> %286, <8 x float> undef), !tbaa !12, !alias.scope !34, !noalias !37 - %292 = extractelement <8 x i32> %285, i32 0 - %293 = add nsw i32 %mul.i.2, %292 - %294 = sext i32 %293 to i64 - %295 = getelementptr inbounds float, float* %2, i64 %294 - %296 = bitcast float* %295 to <8 x float>* - %wide.masked.load111.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %296, i32 4, <8 x i1> %286, <8 x float> undef), !tbaa !12, !alias.scope !37 - %297 = fsub <8 x float> %wide.masked.load111.3, %wide.masked.load110.3 - %298 = bitcast float* %295 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %297, <8 x float>* %298, i32 4, <8 x i1> %286), !tbaa !12, !alias.scope !37, !llvm.access.group !21 - %299 = getelementptr inbounds float, float* %1, i64 %289 - %300 = bitcast float* %299 to <8 x float>* - %wide.masked.load112.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %300, i32 4, <8 x i1> %286, <8 x float> undef), !tbaa !12, !alias.scope !39, !noalias !37 - %301 = fmul <8 x float> %broadcast.splat114, %wide.masked.load112.3 - %302 = fdiv <8 x float> %297, %301, !fpmath !26 - %303 = bitcast float* %295 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %302, <8 x float>* %303, i32 4, <8 x i1> %286), !tbaa !12, !alias.scope !37, !llvm.access.group !21 - br label %pregion_for_end.i.2 - -pregion_for_entry.entry.i.us.2: ; preds = %if.end.r_exit.i.us.2.1, %pregion_for_entry.entry.i.us.2.preheader - %_local_id_x.0.us.2 = phi i64 [ 0, %pregion_for_entry.entry.i.us.2.preheader ], [ %821, %if.end.r_exit.i.us.2.1 ] - %add1.i.i.us.2 = add nuw nsw i64 %_local_id_x.0.us.2, %mul.i.i - %conv.i.us.2 = trunc i64 %add1.i.i.us.2 to i32 - %cmp4.i.us.2 = icmp slt i32 %conv.i.us.2, %4 - br i1 %cmp4.i.us.2, label %if.then.i.us.2, label %if.end.r_exit.i.us.2 - -if.then.i.us.2: ; preds = %pregion_for_entry.entry.i.us.2 - %sext.i.us.2 = shl i64 %add1.i.i.us.2, 32 - %idxprom.i.us.2 = ashr exact i64 %sext.i.us.2, 32 - %arrayidx.i.us.2 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.2 - %304 = load float, float* %arrayidx.i.us.2, align 4, !tbaa !12 - %add.i.us.2 = add nsw i32 %mul.i.2, %conv.i.us.2 - %idxprom6.i.us.2 = sext i32 %add.i.us.2 to i64 - %arrayidx7.i.us.2 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.2 - %305 = load float, float* %arrayidx7.i.us.2, align 4, !tbaa !12 - %sub.i.us.2 = fsub float %305, %304 - store float %sub.i.us.2, float* %arrayidx7.i.us.2, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.us.2 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.2 - %306 = load float, float* %arrayidx10.i.us.2, align 4, !tbaa !12 - %mul11.i.us.2 = fmul float %10, %306 - %div.i.us.2 = fdiv float %sub.i.us.2, %mul11.i.us.2, !fpmath !26 - store float %div.i.us.2, float* %arrayidx7.i.us.2, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.2 - -if.end.r_exit.i.us.2: ; preds = %if.then.i.us.2, %pregion_for_entry.entry.i.us.2 - %307 = or i64 %_local_id_x.0.us.2, 1 - %add1.i.i.us.2.1 = add nuw nsw i64 %307, %mul.i.i - %conv.i.us.2.1 = trunc i64 %add1.i.i.us.2.1 to i32 - %cmp4.i.us.2.1 = icmp slt i32 %conv.i.us.2.1, %4 - br i1 %cmp4.i.us.2.1, label %if.then.i.us.2.1, label %if.end.r_exit.i.us.2.1 - -pregion_for_end.i.2.loopexit: ; preds = %if.end.r_exit.i.us.2.1 - br label %pregion_for_end.i.2 - -pregion_for_end.i.2: ; preds = %pregion_for_end.i.2.loopexit, %vector.ph99, %pregion_for_end.i.1 - %308 = trunc i64 %mul3.i.i to i32 - %conv2.i.3 = or i32 %308, 3 - %cmp.i.3 = icmp slt i32 %conv2.i.3, %5 - %mul.i.3 = mul nsw i32 %conv2.i.3, %4 - br i1 %cmp.i.3, label %vector.scevcheck122, label %pregion_for_end.i.3 - -vector.scevcheck122: ; preds = %pregion_for_end.i.2 - %309 = mul i32 %conv2.i.3, %4 - %310 = trunc i64 %7 to i32 - %311 = shl i32 %310, 5 - %312 = add i32 %309, %311 - %313 = icmp sgt i32 %312, 2147483616 - br i1 %313, label %pregion_for_entry.entry.i.us.3.preheader, label %vector.memcheck144 - -pregion_for_entry.entry.i.us.3.preheader: ; preds = %vector.memcheck144, %vector.scevcheck122 - br label %pregion_for_entry.entry.i.us.3 - -vector.memcheck144: ; preds = %vector.scevcheck122 - %314 = trunc i64 %7 to i32 - %315 = shl i32 %314, 5 - %316 = sext i32 %315 to i64 - %scevgep124 = getelementptr float, float* %0, i64 %316 - %317 = add nsw i64 %316, 32 - %scevgep126 = getelementptr float, float* %0, i64 %317 - %318 = mul i32 %conv2.i.3, %4 - %319 = add i32 %318, %315 - %320 = sext i32 %319 to i64 - %scevgep128 = getelementptr float, float* %2, i64 %320 - %321 = add nsw i64 %320, 32 - %scevgep130 = getelementptr float, float* %2, i64 %321 - %scevgep132 = getelementptr float, float* %1, i64 %316 - %scevgep134 = getelementptr float, float* %1, i64 %317 - %bound0136 = icmp ult float* %scevgep124, %scevgep130 - %bound1137 = icmp ult float* %scevgep128, %scevgep126 - %found.conflict138 = and i1 %bound0136, %bound1137 - %bound0139 = icmp ult float* %scevgep132, %scevgep130 - %bound1140 = icmp ult float* %scevgep128, %scevgep134 - %found.conflict141 = and i1 %bound0139, %bound1140 - %conflict.rdx142 = or i1 %found.conflict138, %found.conflict141 - br i1 %conflict.rdx142, label %pregion_for_entry.entry.i.us.3.preheader, label %vector.ph145 - -vector.ph145: ; preds = %vector.memcheck144 - %broadcast.splatinsert152 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat153 = shufflevector <8 x i64> %broadcast.splatinsert152, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert154 = insertelement <8 x i32> undef, i32 %4, i32 0 - %broadcast.splat155 = shufflevector <8 x i32> %broadcast.splatinsert154, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert159 = insertelement <8 x float> undef, float %10, i32 0 - %broadcast.splat160 = shufflevector <8 x float> %broadcast.splatinsert159, <8 x float> undef, <8 x i32> zeroinitializer - %322 = or <8 x i64> %broadcast.splat153, - %323 = trunc <8 x i64> %322 to <8 x i32> - %324 = icmp sgt <8 x i32> %broadcast.splat155, %323 - %325 = extractelement <8 x i64> %322, i32 0 - %326 = shl i64 %325, 32 - %327 = ashr exact i64 %326, 32 - %328 = getelementptr inbounds float, float* %0, i64 %327 - %329 = bitcast float* %328 to <8 x float>* - %wide.masked.load156 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %329, i32 4, <8 x i1> %324, <8 x float> undef), !tbaa !12, !alias.scope !41, !noalias !44 - %330 = extractelement <8 x i32> %323, i32 0 - %331 = add nsw i32 %mul.i.3, %330 - %332 = sext i32 %331 to i64 - %333 = getelementptr inbounds float, float* %2, i64 %332 - %334 = bitcast float* %333 to <8 x float>* - %wide.masked.load157 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %334, i32 4, <8 x i1> %324, <8 x float> undef), !tbaa !12, !alias.scope !44 - %335 = fsub <8 x float> %wide.masked.load157, %wide.masked.load156 - %336 = bitcast float* %333 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %335, <8 x float>* %336, i32 4, <8 x i1> %324), !tbaa !12, !alias.scope !44, !llvm.access.group !21 - %337 = getelementptr inbounds float, float* %1, i64 %327 - %338 = bitcast float* %337 to <8 x float>* - %wide.masked.load158 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %338, i32 4, <8 x i1> %324, <8 x float> undef), !tbaa !12, !alias.scope !46, !noalias !44 - %339 = fmul <8 x float> %broadcast.splat160, %wide.masked.load158 - %340 = fdiv <8 x float> %335, %339, !fpmath !26 - %341 = bitcast float* %333 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %340, <8 x float>* %341, i32 4, <8 x i1> %324), !tbaa !12, !alias.scope !44, !llvm.access.group !21 - %342 = or <8 x i64> %broadcast.splat153, - %343 = trunc <8 x i64> %342 to <8 x i32> - %344 = icmp sgt <8 x i32> %broadcast.splat155, %343 - %345 = extractelement <8 x i64> %342, i32 0 - %346 = shl i64 %345, 32 - %347 = ashr exact i64 %346, 32 - %348 = getelementptr inbounds float, float* %0, i64 %347 - %349 = bitcast float* %348 to <8 x float>* - %wide.masked.load156.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %349, i32 4, <8 x i1> %344, <8 x float> undef), !tbaa !12, !alias.scope !41, !noalias !44 - %350 = extractelement <8 x i32> %343, i32 0 - %351 = add nsw i32 %mul.i.3, %350 - %352 = sext i32 %351 to i64 - %353 = getelementptr inbounds float, float* %2, i64 %352 - %354 = bitcast float* %353 to <8 x float>* - %wide.masked.load157.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %354, i32 4, <8 x i1> %344, <8 x float> undef), !tbaa !12, !alias.scope !44 - %355 = fsub <8 x float> %wide.masked.load157.1, %wide.masked.load156.1 - %356 = bitcast float* %353 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %355, <8 x float>* %356, i32 4, <8 x i1> %344), !tbaa !12, !alias.scope !44, !llvm.access.group !21 - %357 = getelementptr inbounds float, float* %1, i64 %347 - %358 = bitcast float* %357 to <8 x float>* - %wide.masked.load158.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %358, i32 4, <8 x i1> %344, <8 x float> undef), !tbaa !12, !alias.scope !46, !noalias !44 - %359 = fmul <8 x float> %broadcast.splat160, %wide.masked.load158.1 - %360 = fdiv <8 x float> %355, %359, !fpmath !26 - %361 = bitcast float* %353 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %360, <8 x float>* %361, i32 4, <8 x i1> %344), !tbaa !12, !alias.scope !44, !llvm.access.group !21 - %362 = or <8 x i64> %broadcast.splat153, - %363 = trunc <8 x i64> %362 to <8 x i32> - %364 = icmp sgt <8 x i32> %broadcast.splat155, %363 - %365 = extractelement <8 x i64> %362, i32 0 - %366 = shl i64 %365, 32 - %367 = ashr exact i64 %366, 32 - %368 = getelementptr inbounds float, float* %0, i64 %367 - %369 = bitcast float* %368 to <8 x float>* - %wide.masked.load156.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %369, i32 4, <8 x i1> %364, <8 x float> undef), !tbaa !12, !alias.scope !41, !noalias !44 - %370 = extractelement <8 x i32> %363, i32 0 - %371 = add nsw i32 %mul.i.3, %370 - %372 = sext i32 %371 to i64 - %373 = getelementptr inbounds float, float* %2, i64 %372 - %374 = bitcast float* %373 to <8 x float>* - %wide.masked.load157.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %374, i32 4, <8 x i1> %364, <8 x float> undef), !tbaa !12, !alias.scope !44 - %375 = fsub <8 x float> %wide.masked.load157.2, %wide.masked.load156.2 - %376 = bitcast float* %373 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %375, <8 x float>* %376, i32 4, <8 x i1> %364), !tbaa !12, !alias.scope !44, !llvm.access.group !21 - %377 = getelementptr inbounds float, float* %1, i64 %367 - %378 = bitcast float* %377 to <8 x float>* - %wide.masked.load158.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %378, i32 4, <8 x i1> %364, <8 x float> undef), !tbaa !12, !alias.scope !46, !noalias !44 - %379 = fmul <8 x float> %broadcast.splat160, %wide.masked.load158.2 - %380 = fdiv <8 x float> %375, %379, !fpmath !26 - %381 = bitcast float* %373 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %380, <8 x float>* %381, i32 4, <8 x i1> %364), !tbaa !12, !alias.scope !44, !llvm.access.group !21 - %382 = or <8 x i64> %broadcast.splat153, - %383 = trunc <8 x i64> %382 to <8 x i32> - %384 = icmp sgt <8 x i32> %broadcast.splat155, %383 - %385 = extractelement <8 x i64> %382, i32 0 - %386 = shl i64 %385, 32 - %387 = ashr exact i64 %386, 32 - %388 = getelementptr inbounds float, float* %0, i64 %387 - %389 = bitcast float* %388 to <8 x float>* - %wide.masked.load156.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %389, i32 4, <8 x i1> %384, <8 x float> undef), !tbaa !12, !alias.scope !41, !noalias !44 - %390 = extractelement <8 x i32> %383, i32 0 - %391 = add nsw i32 %mul.i.3, %390 - %392 = sext i32 %391 to i64 - %393 = getelementptr inbounds float, float* %2, i64 %392 - %394 = bitcast float* %393 to <8 x float>* - %wide.masked.load157.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %394, i32 4, <8 x i1> %384, <8 x float> undef), !tbaa !12, !alias.scope !44 - %395 = fsub <8 x float> %wide.masked.load157.3, %wide.masked.load156.3 - %396 = bitcast float* %393 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %395, <8 x float>* %396, i32 4, <8 x i1> %384), !tbaa !12, !alias.scope !44, !llvm.access.group !21 - %397 = getelementptr inbounds float, float* %1, i64 %387 - %398 = bitcast float* %397 to <8 x float>* - %wide.masked.load158.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %398, i32 4, <8 x i1> %384, <8 x float> undef), !tbaa !12, !alias.scope !46, !noalias !44 - %399 = fmul <8 x float> %broadcast.splat160, %wide.masked.load158.3 - %400 = fdiv <8 x float> %395, %399, !fpmath !26 - %401 = bitcast float* %393 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %400, <8 x float>* %401, i32 4, <8 x i1> %384), !tbaa !12, !alias.scope !44, !llvm.access.group !21 - br label %pregion_for_end.i.3 - -pregion_for_entry.entry.i.us.3: ; preds = %if.end.r_exit.i.us.3.1, %pregion_for_entry.entry.i.us.3.preheader - %_local_id_x.0.us.3 = phi i64 [ 0, %pregion_for_entry.entry.i.us.3.preheader ], [ %817, %if.end.r_exit.i.us.3.1 ] - %add1.i.i.us.3 = add nuw nsw i64 %_local_id_x.0.us.3, %mul.i.i - %conv.i.us.3 = trunc i64 %add1.i.i.us.3 to i32 - %cmp4.i.us.3 = icmp slt i32 %conv.i.us.3, %4 - br i1 %cmp4.i.us.3, label %if.then.i.us.3, label %if.end.r_exit.i.us.3 - -if.then.i.us.3: ; preds = %pregion_for_entry.entry.i.us.3 - %sext.i.us.3 = shl i64 %add1.i.i.us.3, 32 - %idxprom.i.us.3 = ashr exact i64 %sext.i.us.3, 32 - %arrayidx.i.us.3 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.3 - %402 = load float, float* %arrayidx.i.us.3, align 4, !tbaa !12 - %add.i.us.3 = add nsw i32 %mul.i.3, %conv.i.us.3 - %idxprom6.i.us.3 = sext i32 %add.i.us.3 to i64 - %arrayidx7.i.us.3 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.3 - %403 = load float, float* %arrayidx7.i.us.3, align 4, !tbaa !12 - %sub.i.us.3 = fsub float %403, %402 - store float %sub.i.us.3, float* %arrayidx7.i.us.3, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.us.3 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.3 - %404 = load float, float* %arrayidx10.i.us.3, align 4, !tbaa !12 - %mul11.i.us.3 = fmul float %10, %404 - %div.i.us.3 = fdiv float %sub.i.us.3, %mul11.i.us.3, !fpmath !26 - store float %div.i.us.3, float* %arrayidx7.i.us.3, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.3 - -if.end.r_exit.i.us.3: ; preds = %if.then.i.us.3, %pregion_for_entry.entry.i.us.3 - %405 = or i64 %_local_id_x.0.us.3, 1 - %add1.i.i.us.3.1 = add nuw nsw i64 %405, %mul.i.i - %conv.i.us.3.1 = trunc i64 %add1.i.i.us.3.1 to i32 - %cmp4.i.us.3.1 = icmp slt i32 %conv.i.us.3.1, %4 - br i1 %cmp4.i.us.3.1, label %if.then.i.us.3.1, label %if.end.r_exit.i.us.3.1 - -pregion_for_end.i.3.loopexit: ; preds = %if.end.r_exit.i.us.3.1 - br label %pregion_for_end.i.3 - -pregion_for_end.i.3: ; preds = %pregion_for_end.i.3.loopexit, %vector.ph145, %pregion_for_end.i.2 - %406 = trunc i64 %mul3.i.i to i32 - %conv2.i.4 = or i32 %406, 4 - %cmp.i.4 = icmp slt i32 %conv2.i.4, %5 - %mul.i.4 = mul nsw i32 %conv2.i.4, %4 - br i1 %cmp.i.4, label %vector.scevcheck168, label %pregion_for_end.i.4 - -vector.scevcheck168: ; preds = %pregion_for_end.i.3 - %407 = mul i32 %conv2.i.4, %4 - %408 = trunc i64 %7 to i32 - %409 = shl i32 %408, 5 - %410 = add i32 %407, %409 - %411 = icmp sgt i32 %410, 2147483616 - br i1 %411, label %pregion_for_entry.entry.i.us.4.preheader, label %vector.memcheck190 - -pregion_for_entry.entry.i.us.4.preheader: ; preds = %vector.memcheck190, %vector.scevcheck168 - br label %pregion_for_entry.entry.i.us.4 - -vector.memcheck190: ; preds = %vector.scevcheck168 - %412 = trunc i64 %7 to i32 - %413 = shl i32 %412, 5 - %414 = sext i32 %413 to i64 - %scevgep170 = getelementptr float, float* %0, i64 %414 - %415 = add nsw i64 %414, 32 - %scevgep172 = getelementptr float, float* %0, i64 %415 - %416 = mul i32 %conv2.i.4, %4 - %417 = add i32 %416, %413 - %418 = sext i32 %417 to i64 - %scevgep174 = getelementptr float, float* %2, i64 %418 - %419 = add nsw i64 %418, 32 - %scevgep176 = getelementptr float, float* %2, i64 %419 - %scevgep178 = getelementptr float, float* %1, i64 %414 - %scevgep180 = getelementptr float, float* %1, i64 %415 - %bound0182 = icmp ult float* %scevgep170, %scevgep176 - %bound1183 = icmp ult float* %scevgep174, %scevgep172 - %found.conflict184 = and i1 %bound0182, %bound1183 - %bound0185 = icmp ult float* %scevgep178, %scevgep176 - %bound1186 = icmp ult float* %scevgep174, %scevgep180 - %found.conflict187 = and i1 %bound0185, %bound1186 - %conflict.rdx188 = or i1 %found.conflict184, %found.conflict187 - br i1 %conflict.rdx188, label %pregion_for_entry.entry.i.us.4.preheader, label %vector.ph191 - -vector.ph191: ; preds = %vector.memcheck190 - %broadcast.splatinsert198 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat199 = shufflevector <8 x i64> %broadcast.splatinsert198, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert200 = insertelement <8 x i32> undef, i32 %4, i32 0 - %broadcast.splat201 = shufflevector <8 x i32> %broadcast.splatinsert200, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert205 = insertelement <8 x float> undef, float %10, i32 0 - %broadcast.splat206 = shufflevector <8 x float> %broadcast.splatinsert205, <8 x float> undef, <8 x i32> zeroinitializer - %420 = or <8 x i64> %broadcast.splat199, - %421 = trunc <8 x i64> %420 to <8 x i32> - %422 = icmp sgt <8 x i32> %broadcast.splat201, %421 - %423 = extractelement <8 x i64> %420, i32 0 - %424 = shl i64 %423, 32 - %425 = ashr exact i64 %424, 32 - %426 = getelementptr inbounds float, float* %0, i64 %425 - %427 = bitcast float* %426 to <8 x float>* - %wide.masked.load202 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %427, i32 4, <8 x i1> %422, <8 x float> undef), !tbaa !12, !alias.scope !48, !noalias !51 - %428 = extractelement <8 x i32> %421, i32 0 - %429 = add nsw i32 %mul.i.4, %428 - %430 = sext i32 %429 to i64 - %431 = getelementptr inbounds float, float* %2, i64 %430 - %432 = bitcast float* %431 to <8 x float>* - %wide.masked.load203 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %432, i32 4, <8 x i1> %422, <8 x float> undef), !tbaa !12, !alias.scope !51 - %433 = fsub <8 x float> %wide.masked.load203, %wide.masked.load202 - %434 = bitcast float* %431 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %433, <8 x float>* %434, i32 4, <8 x i1> %422), !tbaa !12, !alias.scope !51, !llvm.access.group !21 - %435 = getelementptr inbounds float, float* %1, i64 %425 - %436 = bitcast float* %435 to <8 x float>* - %wide.masked.load204 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %436, i32 4, <8 x i1> %422, <8 x float> undef), !tbaa !12, !alias.scope !53, !noalias !51 - %437 = fmul <8 x float> %broadcast.splat206, %wide.masked.load204 - %438 = fdiv <8 x float> %433, %437, !fpmath !26 - %439 = bitcast float* %431 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %438, <8 x float>* %439, i32 4, <8 x i1> %422), !tbaa !12, !alias.scope !51, !llvm.access.group !21 - %440 = or <8 x i64> %broadcast.splat199, - %441 = trunc <8 x i64> %440 to <8 x i32> - %442 = icmp sgt <8 x i32> %broadcast.splat201, %441 - %443 = extractelement <8 x i64> %440, i32 0 - %444 = shl i64 %443, 32 - %445 = ashr exact i64 %444, 32 - %446 = getelementptr inbounds float, float* %0, i64 %445 - %447 = bitcast float* %446 to <8 x float>* - %wide.masked.load202.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %447, i32 4, <8 x i1> %442, <8 x float> undef), !tbaa !12, !alias.scope !48, !noalias !51 - %448 = extractelement <8 x i32> %441, i32 0 - %449 = add nsw i32 %mul.i.4, %448 - %450 = sext i32 %449 to i64 - %451 = getelementptr inbounds float, float* %2, i64 %450 - %452 = bitcast float* %451 to <8 x float>* - %wide.masked.load203.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %452, i32 4, <8 x i1> %442, <8 x float> undef), !tbaa !12, !alias.scope !51 - %453 = fsub <8 x float> %wide.masked.load203.1, %wide.masked.load202.1 - %454 = bitcast float* %451 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %453, <8 x float>* %454, i32 4, <8 x i1> %442), !tbaa !12, !alias.scope !51, !llvm.access.group !21 - %455 = getelementptr inbounds float, float* %1, i64 %445 - %456 = bitcast float* %455 to <8 x float>* - %wide.masked.load204.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %456, i32 4, <8 x i1> %442, <8 x float> undef), !tbaa !12, !alias.scope !53, !noalias !51 - %457 = fmul <8 x float> %broadcast.splat206, %wide.masked.load204.1 - %458 = fdiv <8 x float> %453, %457, !fpmath !26 - %459 = bitcast float* %451 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %458, <8 x float>* %459, i32 4, <8 x i1> %442), !tbaa !12, !alias.scope !51, !llvm.access.group !21 - %460 = or <8 x i64> %broadcast.splat199, - %461 = trunc <8 x i64> %460 to <8 x i32> - %462 = icmp sgt <8 x i32> %broadcast.splat201, %461 - %463 = extractelement <8 x i64> %460, i32 0 - %464 = shl i64 %463, 32 - %465 = ashr exact i64 %464, 32 - %466 = getelementptr inbounds float, float* %0, i64 %465 - %467 = bitcast float* %466 to <8 x float>* - %wide.masked.load202.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %467, i32 4, <8 x i1> %462, <8 x float> undef), !tbaa !12, !alias.scope !48, !noalias !51 - %468 = extractelement <8 x i32> %461, i32 0 - %469 = add nsw i32 %mul.i.4, %468 - %470 = sext i32 %469 to i64 - %471 = getelementptr inbounds float, float* %2, i64 %470 - %472 = bitcast float* %471 to <8 x float>* - %wide.masked.load203.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %472, i32 4, <8 x i1> %462, <8 x float> undef), !tbaa !12, !alias.scope !51 - %473 = fsub <8 x float> %wide.masked.load203.2, %wide.masked.load202.2 - %474 = bitcast float* %471 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %473, <8 x float>* %474, i32 4, <8 x i1> %462), !tbaa !12, !alias.scope !51, !llvm.access.group !21 - %475 = getelementptr inbounds float, float* %1, i64 %465 - %476 = bitcast float* %475 to <8 x float>* - %wide.masked.load204.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %476, i32 4, <8 x i1> %462, <8 x float> undef), !tbaa !12, !alias.scope !53, !noalias !51 - %477 = fmul <8 x float> %broadcast.splat206, %wide.masked.load204.2 - %478 = fdiv <8 x float> %473, %477, !fpmath !26 - %479 = bitcast float* %471 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %478, <8 x float>* %479, i32 4, <8 x i1> %462), !tbaa !12, !alias.scope !51, !llvm.access.group !21 - %480 = or <8 x i64> %broadcast.splat199, - %481 = trunc <8 x i64> %480 to <8 x i32> - %482 = icmp sgt <8 x i32> %broadcast.splat201, %481 - %483 = extractelement <8 x i64> %480, i32 0 - %484 = shl i64 %483, 32 - %485 = ashr exact i64 %484, 32 - %486 = getelementptr inbounds float, float* %0, i64 %485 - %487 = bitcast float* %486 to <8 x float>* - %wide.masked.load202.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %487, i32 4, <8 x i1> %482, <8 x float> undef), !tbaa !12, !alias.scope !48, !noalias !51 - %488 = extractelement <8 x i32> %481, i32 0 - %489 = add nsw i32 %mul.i.4, %488 - %490 = sext i32 %489 to i64 - %491 = getelementptr inbounds float, float* %2, i64 %490 - %492 = bitcast float* %491 to <8 x float>* - %wide.masked.load203.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %492, i32 4, <8 x i1> %482, <8 x float> undef), !tbaa !12, !alias.scope !51 - %493 = fsub <8 x float> %wide.masked.load203.3, %wide.masked.load202.3 - %494 = bitcast float* %491 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %493, <8 x float>* %494, i32 4, <8 x i1> %482), !tbaa !12, !alias.scope !51, !llvm.access.group !21 - %495 = getelementptr inbounds float, float* %1, i64 %485 - %496 = bitcast float* %495 to <8 x float>* - %wide.masked.load204.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %496, i32 4, <8 x i1> %482, <8 x float> undef), !tbaa !12, !alias.scope !53, !noalias !51 - %497 = fmul <8 x float> %broadcast.splat206, %wide.masked.load204.3 - %498 = fdiv <8 x float> %493, %497, !fpmath !26 - %499 = bitcast float* %491 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %498, <8 x float>* %499, i32 4, <8 x i1> %482), !tbaa !12, !alias.scope !51, !llvm.access.group !21 - br label %pregion_for_end.i.4 - -pregion_for_entry.entry.i.us.4: ; preds = %if.end.r_exit.i.us.4.1, %pregion_for_entry.entry.i.us.4.preheader - %_local_id_x.0.us.4 = phi i64 [ 0, %pregion_for_entry.entry.i.us.4.preheader ], [ %813, %if.end.r_exit.i.us.4.1 ] - %add1.i.i.us.4 = add nuw nsw i64 %_local_id_x.0.us.4, %mul.i.i - %conv.i.us.4 = trunc i64 %add1.i.i.us.4 to i32 - %cmp4.i.us.4 = icmp slt i32 %conv.i.us.4, %4 - br i1 %cmp4.i.us.4, label %if.then.i.us.4, label %if.end.r_exit.i.us.4 - -if.then.i.us.4: ; preds = %pregion_for_entry.entry.i.us.4 - %sext.i.us.4 = shl i64 %add1.i.i.us.4, 32 - %idxprom.i.us.4 = ashr exact i64 %sext.i.us.4, 32 - %arrayidx.i.us.4 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.4 - %500 = load float, float* %arrayidx.i.us.4, align 4, !tbaa !12 - %add.i.us.4 = add nsw i32 %mul.i.4, %conv.i.us.4 - %idxprom6.i.us.4 = sext i32 %add.i.us.4 to i64 - %arrayidx7.i.us.4 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.4 - %501 = load float, float* %arrayidx7.i.us.4, align 4, !tbaa !12 - %sub.i.us.4 = fsub float %501, %500 - store float %sub.i.us.4, float* %arrayidx7.i.us.4, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.us.4 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.4 - %502 = load float, float* %arrayidx10.i.us.4, align 4, !tbaa !12 - %mul11.i.us.4 = fmul float %10, %502 - %div.i.us.4 = fdiv float %sub.i.us.4, %mul11.i.us.4, !fpmath !26 - store float %div.i.us.4, float* %arrayidx7.i.us.4, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.4 - -if.end.r_exit.i.us.4: ; preds = %if.then.i.us.4, %pregion_for_entry.entry.i.us.4 - %503 = or i64 %_local_id_x.0.us.4, 1 - %add1.i.i.us.4.1 = add nuw nsw i64 %503, %mul.i.i - %conv.i.us.4.1 = trunc i64 %add1.i.i.us.4.1 to i32 - %cmp4.i.us.4.1 = icmp slt i32 %conv.i.us.4.1, %4 - br i1 %cmp4.i.us.4.1, label %if.then.i.us.4.1, label %if.end.r_exit.i.us.4.1 - -pregion_for_end.i.4.loopexit: ; preds = %if.end.r_exit.i.us.4.1 - br label %pregion_for_end.i.4 - -pregion_for_end.i.4: ; preds = %pregion_for_end.i.4.loopexit, %vector.ph191, %pregion_for_end.i.3 - %504 = trunc i64 %mul3.i.i to i32 - %conv2.i.5 = or i32 %504, 5 - %cmp.i.5 = icmp slt i32 %conv2.i.5, %5 - %mul.i.5 = mul nsw i32 %conv2.i.5, %4 - br i1 %cmp.i.5, label %vector.scevcheck214, label %pregion_for_end.i.5 - -vector.scevcheck214: ; preds = %pregion_for_end.i.4 - %505 = mul i32 %conv2.i.5, %4 - %506 = trunc i64 %7 to i32 - %507 = shl i32 %506, 5 - %508 = add i32 %505, %507 - %509 = icmp sgt i32 %508, 2147483616 - br i1 %509, label %pregion_for_entry.entry.i.us.5.preheader, label %vector.memcheck236 - -pregion_for_entry.entry.i.us.5.preheader: ; preds = %vector.memcheck236, %vector.scevcheck214 - br label %pregion_for_entry.entry.i.us.5 - -vector.memcheck236: ; preds = %vector.scevcheck214 - %510 = trunc i64 %7 to i32 - %511 = shl i32 %510, 5 - %512 = sext i32 %511 to i64 - %scevgep216 = getelementptr float, float* %0, i64 %512 - %513 = add nsw i64 %512, 32 - %scevgep218 = getelementptr float, float* %0, i64 %513 - %514 = mul i32 %conv2.i.5, %4 - %515 = add i32 %514, %511 - %516 = sext i32 %515 to i64 - %scevgep220 = getelementptr float, float* %2, i64 %516 - %517 = add nsw i64 %516, 32 - %scevgep222 = getelementptr float, float* %2, i64 %517 - %scevgep224 = getelementptr float, float* %1, i64 %512 - %scevgep226 = getelementptr float, float* %1, i64 %513 - %bound0228 = icmp ult float* %scevgep216, %scevgep222 - %bound1229 = icmp ult float* %scevgep220, %scevgep218 - %found.conflict230 = and i1 %bound0228, %bound1229 - %bound0231 = icmp ult float* %scevgep224, %scevgep222 - %bound1232 = icmp ult float* %scevgep220, %scevgep226 - %found.conflict233 = and i1 %bound0231, %bound1232 - %conflict.rdx234 = or i1 %found.conflict230, %found.conflict233 - br i1 %conflict.rdx234, label %pregion_for_entry.entry.i.us.5.preheader, label %vector.ph237 - -vector.ph237: ; preds = %vector.memcheck236 - %broadcast.splatinsert244 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat245 = shufflevector <8 x i64> %broadcast.splatinsert244, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert246 = insertelement <8 x i32> undef, i32 %4, i32 0 - %broadcast.splat247 = shufflevector <8 x i32> %broadcast.splatinsert246, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert251 = insertelement <8 x float> undef, float %10, i32 0 - %broadcast.splat252 = shufflevector <8 x float> %broadcast.splatinsert251, <8 x float> undef, <8 x i32> zeroinitializer - %518 = or <8 x i64> %broadcast.splat245, - %519 = trunc <8 x i64> %518 to <8 x i32> - %520 = icmp sgt <8 x i32> %broadcast.splat247, %519 - %521 = extractelement <8 x i64> %518, i32 0 - %522 = shl i64 %521, 32 - %523 = ashr exact i64 %522, 32 - %524 = getelementptr inbounds float, float* %0, i64 %523 - %525 = bitcast float* %524 to <8 x float>* - %wide.masked.load248 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %525, i32 4, <8 x i1> %520, <8 x float> undef), !tbaa !12, !alias.scope !55, !noalias !58 - %526 = extractelement <8 x i32> %519, i32 0 - %527 = add nsw i32 %mul.i.5, %526 - %528 = sext i32 %527 to i64 - %529 = getelementptr inbounds float, float* %2, i64 %528 - %530 = bitcast float* %529 to <8 x float>* - %wide.masked.load249 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %530, i32 4, <8 x i1> %520, <8 x float> undef), !tbaa !12, !alias.scope !58 - %531 = fsub <8 x float> %wide.masked.load249, %wide.masked.load248 - %532 = bitcast float* %529 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %531, <8 x float>* %532, i32 4, <8 x i1> %520), !tbaa !12, !alias.scope !58, !llvm.access.group !21 - %533 = getelementptr inbounds float, float* %1, i64 %523 - %534 = bitcast float* %533 to <8 x float>* - %wide.masked.load250 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %534, i32 4, <8 x i1> %520, <8 x float> undef), !tbaa !12, !alias.scope !60, !noalias !58 - %535 = fmul <8 x float> %broadcast.splat252, %wide.masked.load250 - %536 = fdiv <8 x float> %531, %535, !fpmath !26 - %537 = bitcast float* %529 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %536, <8 x float>* %537, i32 4, <8 x i1> %520), !tbaa !12, !alias.scope !58, !llvm.access.group !21 - %538 = or <8 x i64> %broadcast.splat245, - %539 = trunc <8 x i64> %538 to <8 x i32> - %540 = icmp sgt <8 x i32> %broadcast.splat247, %539 - %541 = extractelement <8 x i64> %538, i32 0 - %542 = shl i64 %541, 32 - %543 = ashr exact i64 %542, 32 - %544 = getelementptr inbounds float, float* %0, i64 %543 - %545 = bitcast float* %544 to <8 x float>* - %wide.masked.load248.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %545, i32 4, <8 x i1> %540, <8 x float> undef), !tbaa !12, !alias.scope !55, !noalias !58 - %546 = extractelement <8 x i32> %539, i32 0 - %547 = add nsw i32 %mul.i.5, %546 - %548 = sext i32 %547 to i64 - %549 = getelementptr inbounds float, float* %2, i64 %548 - %550 = bitcast float* %549 to <8 x float>* - %wide.masked.load249.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %550, i32 4, <8 x i1> %540, <8 x float> undef), !tbaa !12, !alias.scope !58 - %551 = fsub <8 x float> %wide.masked.load249.1, %wide.masked.load248.1 - %552 = bitcast float* %549 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %551, <8 x float>* %552, i32 4, <8 x i1> %540), !tbaa !12, !alias.scope !58, !llvm.access.group !21 - %553 = getelementptr inbounds float, float* %1, i64 %543 - %554 = bitcast float* %553 to <8 x float>* - %wide.masked.load250.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %554, i32 4, <8 x i1> %540, <8 x float> undef), !tbaa !12, !alias.scope !60, !noalias !58 - %555 = fmul <8 x float> %broadcast.splat252, %wide.masked.load250.1 - %556 = fdiv <8 x float> %551, %555, !fpmath !26 - %557 = bitcast float* %549 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %556, <8 x float>* %557, i32 4, <8 x i1> %540), !tbaa !12, !alias.scope !58, !llvm.access.group !21 - %558 = or <8 x i64> %broadcast.splat245, - %559 = trunc <8 x i64> %558 to <8 x i32> - %560 = icmp sgt <8 x i32> %broadcast.splat247, %559 - %561 = extractelement <8 x i64> %558, i32 0 - %562 = shl i64 %561, 32 - %563 = ashr exact i64 %562, 32 - %564 = getelementptr inbounds float, float* %0, i64 %563 - %565 = bitcast float* %564 to <8 x float>* - %wide.masked.load248.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %565, i32 4, <8 x i1> %560, <8 x float> undef), !tbaa !12, !alias.scope !55, !noalias !58 - %566 = extractelement <8 x i32> %559, i32 0 - %567 = add nsw i32 %mul.i.5, %566 - %568 = sext i32 %567 to i64 - %569 = getelementptr inbounds float, float* %2, i64 %568 - %570 = bitcast float* %569 to <8 x float>* - %wide.masked.load249.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %570, i32 4, <8 x i1> %560, <8 x float> undef), !tbaa !12, !alias.scope !58 - %571 = fsub <8 x float> %wide.masked.load249.2, %wide.masked.load248.2 - %572 = bitcast float* %569 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %571, <8 x float>* %572, i32 4, <8 x i1> %560), !tbaa !12, !alias.scope !58, !llvm.access.group !21 - %573 = getelementptr inbounds float, float* %1, i64 %563 - %574 = bitcast float* %573 to <8 x float>* - %wide.masked.load250.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %574, i32 4, <8 x i1> %560, <8 x float> undef), !tbaa !12, !alias.scope !60, !noalias !58 - %575 = fmul <8 x float> %broadcast.splat252, %wide.masked.load250.2 - %576 = fdiv <8 x float> %571, %575, !fpmath !26 - %577 = bitcast float* %569 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %576, <8 x float>* %577, i32 4, <8 x i1> %560), !tbaa !12, !alias.scope !58, !llvm.access.group !21 - %578 = or <8 x i64> %broadcast.splat245, - %579 = trunc <8 x i64> %578 to <8 x i32> - %580 = icmp sgt <8 x i32> %broadcast.splat247, %579 - %581 = extractelement <8 x i64> %578, i32 0 - %582 = shl i64 %581, 32 - %583 = ashr exact i64 %582, 32 - %584 = getelementptr inbounds float, float* %0, i64 %583 - %585 = bitcast float* %584 to <8 x float>* - %wide.masked.load248.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %585, i32 4, <8 x i1> %580, <8 x float> undef), !tbaa !12, !alias.scope !55, !noalias !58 - %586 = extractelement <8 x i32> %579, i32 0 - %587 = add nsw i32 %mul.i.5, %586 - %588 = sext i32 %587 to i64 - %589 = getelementptr inbounds float, float* %2, i64 %588 - %590 = bitcast float* %589 to <8 x float>* - %wide.masked.load249.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %590, i32 4, <8 x i1> %580, <8 x float> undef), !tbaa !12, !alias.scope !58 - %591 = fsub <8 x float> %wide.masked.load249.3, %wide.masked.load248.3 - %592 = bitcast float* %589 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %591, <8 x float>* %592, i32 4, <8 x i1> %580), !tbaa !12, !alias.scope !58, !llvm.access.group !21 - %593 = getelementptr inbounds float, float* %1, i64 %583 - %594 = bitcast float* %593 to <8 x float>* - %wide.masked.load250.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %594, i32 4, <8 x i1> %580, <8 x float> undef), !tbaa !12, !alias.scope !60, !noalias !58 - %595 = fmul <8 x float> %broadcast.splat252, %wide.masked.load250.3 - %596 = fdiv <8 x float> %591, %595, !fpmath !26 - %597 = bitcast float* %589 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %596, <8 x float>* %597, i32 4, <8 x i1> %580), !tbaa !12, !alias.scope !58, !llvm.access.group !21 - br label %pregion_for_end.i.5 - -pregion_for_entry.entry.i.us.5: ; preds = %if.end.r_exit.i.us.5.1, %pregion_for_entry.entry.i.us.5.preheader - %_local_id_x.0.us.5 = phi i64 [ 0, %pregion_for_entry.entry.i.us.5.preheader ], [ %809, %if.end.r_exit.i.us.5.1 ] - %add1.i.i.us.5 = add nuw nsw i64 %_local_id_x.0.us.5, %mul.i.i - %conv.i.us.5 = trunc i64 %add1.i.i.us.5 to i32 - %cmp4.i.us.5 = icmp slt i32 %conv.i.us.5, %4 - br i1 %cmp4.i.us.5, label %if.then.i.us.5, label %if.end.r_exit.i.us.5 - -if.then.i.us.5: ; preds = %pregion_for_entry.entry.i.us.5 - %sext.i.us.5 = shl i64 %add1.i.i.us.5, 32 - %idxprom.i.us.5 = ashr exact i64 %sext.i.us.5, 32 - %arrayidx.i.us.5 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.5 - %598 = load float, float* %arrayidx.i.us.5, align 4, !tbaa !12 - %add.i.us.5 = add nsw i32 %mul.i.5, %conv.i.us.5 - %idxprom6.i.us.5 = sext i32 %add.i.us.5 to i64 - %arrayidx7.i.us.5 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.5 - %599 = load float, float* %arrayidx7.i.us.5, align 4, !tbaa !12 - %sub.i.us.5 = fsub float %599, %598 - store float %sub.i.us.5, float* %arrayidx7.i.us.5, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.us.5 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.5 - %600 = load float, float* %arrayidx10.i.us.5, align 4, !tbaa !12 - %mul11.i.us.5 = fmul float %10, %600 - %div.i.us.5 = fdiv float %sub.i.us.5, %mul11.i.us.5, !fpmath !26 - store float %div.i.us.5, float* %arrayidx7.i.us.5, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.5 - -if.end.r_exit.i.us.5: ; preds = %if.then.i.us.5, %pregion_for_entry.entry.i.us.5 - %601 = or i64 %_local_id_x.0.us.5, 1 - %add1.i.i.us.5.1 = add nuw nsw i64 %601, %mul.i.i - %conv.i.us.5.1 = trunc i64 %add1.i.i.us.5.1 to i32 - %cmp4.i.us.5.1 = icmp slt i32 %conv.i.us.5.1, %4 - br i1 %cmp4.i.us.5.1, label %if.then.i.us.5.1, label %if.end.r_exit.i.us.5.1 - -pregion_for_end.i.5.loopexit: ; preds = %if.end.r_exit.i.us.5.1 - br label %pregion_for_end.i.5 - -pregion_for_end.i.5: ; preds = %pregion_for_end.i.5.loopexit, %vector.ph237, %pregion_for_end.i.4 - %602 = trunc i64 %mul3.i.i to i32 - %conv2.i.6 = or i32 %602, 6 - %cmp.i.6 = icmp slt i32 %conv2.i.6, %5 - %mul.i.6 = mul nsw i32 %conv2.i.6, %4 - br i1 %cmp.i.6, label %vector.scevcheck260, label %pregion_for_end.i.6 - -vector.scevcheck260: ; preds = %pregion_for_end.i.5 - %603 = mul i32 %conv2.i.6, %4 - %604 = trunc i64 %7 to i32 - %605 = shl i32 %604, 5 - %606 = add i32 %603, %605 - %607 = icmp sgt i32 %606, 2147483616 - br i1 %607, label %pregion_for_entry.entry.i.us.6.preheader, label %vector.memcheck282 - -pregion_for_entry.entry.i.us.6.preheader: ; preds = %vector.memcheck282, %vector.scevcheck260 - br label %pregion_for_entry.entry.i.us.6 - -vector.memcheck282: ; preds = %vector.scevcheck260 - %608 = trunc i64 %7 to i32 - %609 = shl i32 %608, 5 - %610 = sext i32 %609 to i64 - %scevgep262 = getelementptr float, float* %0, i64 %610 - %611 = add nsw i64 %610, 32 - %scevgep264 = getelementptr float, float* %0, i64 %611 - %612 = mul i32 %conv2.i.6, %4 - %613 = add i32 %612, %609 - %614 = sext i32 %613 to i64 - %scevgep266 = getelementptr float, float* %2, i64 %614 - %615 = add nsw i64 %614, 32 - %scevgep268 = getelementptr float, float* %2, i64 %615 - %scevgep270 = getelementptr float, float* %1, i64 %610 - %scevgep272 = getelementptr float, float* %1, i64 %611 - %bound0274 = icmp ult float* %scevgep262, %scevgep268 - %bound1275 = icmp ult float* %scevgep266, %scevgep264 - %found.conflict276 = and i1 %bound0274, %bound1275 - %bound0277 = icmp ult float* %scevgep270, %scevgep268 - %bound1278 = icmp ult float* %scevgep266, %scevgep272 - %found.conflict279 = and i1 %bound0277, %bound1278 - %conflict.rdx280 = or i1 %found.conflict276, %found.conflict279 - br i1 %conflict.rdx280, label %pregion_for_entry.entry.i.us.6.preheader, label %vector.ph283 - -vector.ph283: ; preds = %vector.memcheck282 - %broadcast.splatinsert290 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat291 = shufflevector <8 x i64> %broadcast.splatinsert290, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert292 = insertelement <8 x i32> undef, i32 %4, i32 0 - %broadcast.splat293 = shufflevector <8 x i32> %broadcast.splatinsert292, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert297 = insertelement <8 x float> undef, float %10, i32 0 - %broadcast.splat298 = shufflevector <8 x float> %broadcast.splatinsert297, <8 x float> undef, <8 x i32> zeroinitializer - %616 = or <8 x i64> %broadcast.splat291, - %617 = trunc <8 x i64> %616 to <8 x i32> - %618 = icmp sgt <8 x i32> %broadcast.splat293, %617 - %619 = extractelement <8 x i64> %616, i32 0 - %620 = shl i64 %619, 32 - %621 = ashr exact i64 %620, 32 - %622 = getelementptr inbounds float, float* %0, i64 %621 - %623 = bitcast float* %622 to <8 x float>* - %wide.masked.load294 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %623, i32 4, <8 x i1> %618, <8 x float> undef), !tbaa !12, !alias.scope !62, !noalias !65 - %624 = extractelement <8 x i32> %617, i32 0 - %625 = add nsw i32 %mul.i.6, %624 - %626 = sext i32 %625 to i64 - %627 = getelementptr inbounds float, float* %2, i64 %626 - %628 = bitcast float* %627 to <8 x float>* - %wide.masked.load295 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %628, i32 4, <8 x i1> %618, <8 x float> undef), !tbaa !12, !alias.scope !65 - %629 = fsub <8 x float> %wide.masked.load295, %wide.masked.load294 - %630 = bitcast float* %627 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %629, <8 x float>* %630, i32 4, <8 x i1> %618), !tbaa !12, !alias.scope !65, !llvm.access.group !21 - %631 = getelementptr inbounds float, float* %1, i64 %621 - %632 = bitcast float* %631 to <8 x float>* - %wide.masked.load296 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %632, i32 4, <8 x i1> %618, <8 x float> undef), !tbaa !12, !alias.scope !67, !noalias !65 - %633 = fmul <8 x float> %broadcast.splat298, %wide.masked.load296 - %634 = fdiv <8 x float> %629, %633, !fpmath !26 - %635 = bitcast float* %627 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %634, <8 x float>* %635, i32 4, <8 x i1> %618), !tbaa !12, !alias.scope !65, !llvm.access.group !21 - %636 = or <8 x i64> %broadcast.splat291, - %637 = trunc <8 x i64> %636 to <8 x i32> - %638 = icmp sgt <8 x i32> %broadcast.splat293, %637 - %639 = extractelement <8 x i64> %636, i32 0 - %640 = shl i64 %639, 32 - %641 = ashr exact i64 %640, 32 - %642 = getelementptr inbounds float, float* %0, i64 %641 - %643 = bitcast float* %642 to <8 x float>* - %wide.masked.load294.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %643, i32 4, <8 x i1> %638, <8 x float> undef), !tbaa !12, !alias.scope !62, !noalias !65 - %644 = extractelement <8 x i32> %637, i32 0 - %645 = add nsw i32 %mul.i.6, %644 - %646 = sext i32 %645 to i64 - %647 = getelementptr inbounds float, float* %2, i64 %646 - %648 = bitcast float* %647 to <8 x float>* - %wide.masked.load295.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %648, i32 4, <8 x i1> %638, <8 x float> undef), !tbaa !12, !alias.scope !65 - %649 = fsub <8 x float> %wide.masked.load295.1, %wide.masked.load294.1 - %650 = bitcast float* %647 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %649, <8 x float>* %650, i32 4, <8 x i1> %638), !tbaa !12, !alias.scope !65, !llvm.access.group !21 - %651 = getelementptr inbounds float, float* %1, i64 %641 - %652 = bitcast float* %651 to <8 x float>* - %wide.masked.load296.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %652, i32 4, <8 x i1> %638, <8 x float> undef), !tbaa !12, !alias.scope !67, !noalias !65 - %653 = fmul <8 x float> %broadcast.splat298, %wide.masked.load296.1 - %654 = fdiv <8 x float> %649, %653, !fpmath !26 - %655 = bitcast float* %647 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %654, <8 x float>* %655, i32 4, <8 x i1> %638), !tbaa !12, !alias.scope !65, !llvm.access.group !21 - %656 = or <8 x i64> %broadcast.splat291, - %657 = trunc <8 x i64> %656 to <8 x i32> - %658 = icmp sgt <8 x i32> %broadcast.splat293, %657 - %659 = extractelement <8 x i64> %656, i32 0 - %660 = shl i64 %659, 32 - %661 = ashr exact i64 %660, 32 - %662 = getelementptr inbounds float, float* %0, i64 %661 - %663 = bitcast float* %662 to <8 x float>* - %wide.masked.load294.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %663, i32 4, <8 x i1> %658, <8 x float> undef), !tbaa !12, !alias.scope !62, !noalias !65 - %664 = extractelement <8 x i32> %657, i32 0 - %665 = add nsw i32 %mul.i.6, %664 - %666 = sext i32 %665 to i64 - %667 = getelementptr inbounds float, float* %2, i64 %666 - %668 = bitcast float* %667 to <8 x float>* - %wide.masked.load295.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %668, i32 4, <8 x i1> %658, <8 x float> undef), !tbaa !12, !alias.scope !65 - %669 = fsub <8 x float> %wide.masked.load295.2, %wide.masked.load294.2 - %670 = bitcast float* %667 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %669, <8 x float>* %670, i32 4, <8 x i1> %658), !tbaa !12, !alias.scope !65, !llvm.access.group !21 - %671 = getelementptr inbounds float, float* %1, i64 %661 - %672 = bitcast float* %671 to <8 x float>* - %wide.masked.load296.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %672, i32 4, <8 x i1> %658, <8 x float> undef), !tbaa !12, !alias.scope !67, !noalias !65 - %673 = fmul <8 x float> %broadcast.splat298, %wide.masked.load296.2 - %674 = fdiv <8 x float> %669, %673, !fpmath !26 - %675 = bitcast float* %667 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %674, <8 x float>* %675, i32 4, <8 x i1> %658), !tbaa !12, !alias.scope !65, !llvm.access.group !21 - %676 = or <8 x i64> %broadcast.splat291, - %677 = trunc <8 x i64> %676 to <8 x i32> - %678 = icmp sgt <8 x i32> %broadcast.splat293, %677 - %679 = extractelement <8 x i64> %676, i32 0 - %680 = shl i64 %679, 32 - %681 = ashr exact i64 %680, 32 - %682 = getelementptr inbounds float, float* %0, i64 %681 - %683 = bitcast float* %682 to <8 x float>* - %wide.masked.load294.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %683, i32 4, <8 x i1> %678, <8 x float> undef), !tbaa !12, !alias.scope !62, !noalias !65 - %684 = extractelement <8 x i32> %677, i32 0 - %685 = add nsw i32 %mul.i.6, %684 - %686 = sext i32 %685 to i64 - %687 = getelementptr inbounds float, float* %2, i64 %686 - %688 = bitcast float* %687 to <8 x float>* - %wide.masked.load295.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %688, i32 4, <8 x i1> %678, <8 x float> undef), !tbaa !12, !alias.scope !65 - %689 = fsub <8 x float> %wide.masked.load295.3, %wide.masked.load294.3 - %690 = bitcast float* %687 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %689, <8 x float>* %690, i32 4, <8 x i1> %678), !tbaa !12, !alias.scope !65, !llvm.access.group !21 - %691 = getelementptr inbounds float, float* %1, i64 %681 - %692 = bitcast float* %691 to <8 x float>* - %wide.masked.load296.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %692, i32 4, <8 x i1> %678, <8 x float> undef), !tbaa !12, !alias.scope !67, !noalias !65 - %693 = fmul <8 x float> %broadcast.splat298, %wide.masked.load296.3 - %694 = fdiv <8 x float> %689, %693, !fpmath !26 - %695 = bitcast float* %687 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %694, <8 x float>* %695, i32 4, <8 x i1> %678), !tbaa !12, !alias.scope !65, !llvm.access.group !21 - br label %pregion_for_end.i.6 - -pregion_for_entry.entry.i.us.6: ; preds = %if.end.r_exit.i.us.6.1, %pregion_for_entry.entry.i.us.6.preheader - %_local_id_x.0.us.6 = phi i64 [ 0, %pregion_for_entry.entry.i.us.6.preheader ], [ %805, %if.end.r_exit.i.us.6.1 ] - %add1.i.i.us.6 = add nuw nsw i64 %_local_id_x.0.us.6, %mul.i.i - %conv.i.us.6 = trunc i64 %add1.i.i.us.6 to i32 - %cmp4.i.us.6 = icmp slt i32 %conv.i.us.6, %4 - br i1 %cmp4.i.us.6, label %if.then.i.us.6, label %if.end.r_exit.i.us.6 - -if.then.i.us.6: ; preds = %pregion_for_entry.entry.i.us.6 - %sext.i.us.6 = shl i64 %add1.i.i.us.6, 32 - %idxprom.i.us.6 = ashr exact i64 %sext.i.us.6, 32 - %arrayidx.i.us.6 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.6 - %696 = load float, float* %arrayidx.i.us.6, align 4, !tbaa !12 - %add.i.us.6 = add nsw i32 %mul.i.6, %conv.i.us.6 - %idxprom6.i.us.6 = sext i32 %add.i.us.6 to i64 - %arrayidx7.i.us.6 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.6 - %697 = load float, float* %arrayidx7.i.us.6, align 4, !tbaa !12 - %sub.i.us.6 = fsub float %697, %696 - store float %sub.i.us.6, float* %arrayidx7.i.us.6, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.us.6 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.6 - %698 = load float, float* %arrayidx10.i.us.6, align 4, !tbaa !12 - %mul11.i.us.6 = fmul float %10, %698 - %div.i.us.6 = fdiv float %sub.i.us.6, %mul11.i.us.6, !fpmath !26 - store float %div.i.us.6, float* %arrayidx7.i.us.6, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.6 - -if.end.r_exit.i.us.6: ; preds = %if.then.i.us.6, %pregion_for_entry.entry.i.us.6 - %699 = or i64 %_local_id_x.0.us.6, 1 - %add1.i.i.us.6.1 = add nuw nsw i64 %699, %mul.i.i - %conv.i.us.6.1 = trunc i64 %add1.i.i.us.6.1 to i32 - %cmp4.i.us.6.1 = icmp slt i32 %conv.i.us.6.1, %4 - br i1 %cmp4.i.us.6.1, label %if.then.i.us.6.1, label %if.end.r_exit.i.us.6.1 - -pregion_for_end.i.6.loopexit: ; preds = %if.end.r_exit.i.us.6.1 - br label %pregion_for_end.i.6 - -pregion_for_end.i.6: ; preds = %pregion_for_end.i.6.loopexit, %vector.ph283, %pregion_for_end.i.5 - %700 = trunc i64 %mul3.i.i to i32 - %conv2.i.7 = or i32 %700, 7 - %cmp.i.7 = icmp slt i32 %conv2.i.7, %5 - %mul.i.7 = mul nsw i32 %conv2.i.7, %4 - br i1 %cmp.i.7, label %vector.scevcheck306, label %pregion_for_end.i.7 - -vector.scevcheck306: ; preds = %pregion_for_end.i.6 - %701 = mul i32 %conv2.i.7, %4 - %702 = trunc i64 %7 to i32 - %703 = shl i32 %702, 5 - %704 = add i32 %701, %703 - %705 = icmp sgt i32 %704, 2147483616 - br i1 %705, label %pregion_for_entry.entry.i.us.7.preheader, label %vector.memcheck328 - -pregion_for_entry.entry.i.us.7.preheader: ; preds = %vector.memcheck328, %vector.scevcheck306 - br label %pregion_for_entry.entry.i.us.7 - -vector.memcheck328: ; preds = %vector.scevcheck306 - %706 = trunc i64 %7 to i32 - %707 = shl i32 %706, 5 - %708 = sext i32 %707 to i64 - %scevgep308 = getelementptr float, float* %0, i64 %708 - %709 = add nsw i64 %708, 32 - %scevgep310 = getelementptr float, float* %0, i64 %709 - %710 = mul i32 %conv2.i.7, %4 - %711 = add i32 %710, %707 - %712 = sext i32 %711 to i64 - %scevgep312 = getelementptr float, float* %2, i64 %712 - %713 = add nsw i64 %712, 32 - %scevgep314 = getelementptr float, float* %2, i64 %713 - %scevgep316 = getelementptr float, float* %1, i64 %708 - %scevgep318 = getelementptr float, float* %1, i64 %709 - %bound0320 = icmp ult float* %scevgep308, %scevgep314 - %bound1321 = icmp ult float* %scevgep312, %scevgep310 - %found.conflict322 = and i1 %bound0320, %bound1321 - %bound0323 = icmp ult float* %scevgep316, %scevgep314 - %bound1324 = icmp ult float* %scevgep312, %scevgep318 - %found.conflict325 = and i1 %bound0323, %bound1324 - %conflict.rdx326 = or i1 %found.conflict322, %found.conflict325 - br i1 %conflict.rdx326, label %pregion_for_entry.entry.i.us.7.preheader, label %vector.ph329 - -vector.ph329: ; preds = %vector.memcheck328 - %broadcast.splatinsert336 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat337 = shufflevector <8 x i64> %broadcast.splatinsert336, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert338 = insertelement <8 x i32> undef, i32 %4, i32 0 - %broadcast.splat339 = shufflevector <8 x i32> %broadcast.splatinsert338, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert343 = insertelement <8 x float> undef, float %10, i32 0 - %broadcast.splat344 = shufflevector <8 x float> %broadcast.splatinsert343, <8 x float> undef, <8 x i32> zeroinitializer - %714 = or <8 x i64> %broadcast.splat337, - %715 = trunc <8 x i64> %714 to <8 x i32> - %716 = icmp sgt <8 x i32> %broadcast.splat339, %715 - %717 = extractelement <8 x i64> %714, i32 0 - %718 = shl i64 %717, 32 - %719 = ashr exact i64 %718, 32 - %720 = getelementptr inbounds float, float* %0, i64 %719 - %721 = bitcast float* %720 to <8 x float>* - %wide.masked.load340 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %721, i32 4, <8 x i1> %716, <8 x float> undef), !tbaa !12, !alias.scope !69, !noalias !72 - %722 = extractelement <8 x i32> %715, i32 0 - %723 = add nsw i32 %mul.i.7, %722 - %724 = sext i32 %723 to i64 - %725 = getelementptr inbounds float, float* %2, i64 %724 - %726 = bitcast float* %725 to <8 x float>* - %wide.masked.load341 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %726, i32 4, <8 x i1> %716, <8 x float> undef), !tbaa !12, !alias.scope !72 - %727 = fsub <8 x float> %wide.masked.load341, %wide.masked.load340 - %728 = bitcast float* %725 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %727, <8 x float>* %728, i32 4, <8 x i1> %716), !tbaa !12, !alias.scope !72, !llvm.access.group !21 - %729 = getelementptr inbounds float, float* %1, i64 %719 - %730 = bitcast float* %729 to <8 x float>* - %wide.masked.load342 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %730, i32 4, <8 x i1> %716, <8 x float> undef), !tbaa !12, !alias.scope !74, !noalias !72 - %731 = fmul <8 x float> %broadcast.splat344, %wide.masked.load342 - %732 = fdiv <8 x float> %727, %731, !fpmath !26 - %733 = bitcast float* %725 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %732, <8 x float>* %733, i32 4, <8 x i1> %716), !tbaa !12, !alias.scope !72, !llvm.access.group !21 - %734 = or <8 x i64> %broadcast.splat337, - %735 = trunc <8 x i64> %734 to <8 x i32> - %736 = icmp sgt <8 x i32> %broadcast.splat339, %735 - %737 = extractelement <8 x i64> %734, i32 0 - %738 = shl i64 %737, 32 - %739 = ashr exact i64 %738, 32 - %740 = getelementptr inbounds float, float* %0, i64 %739 - %741 = bitcast float* %740 to <8 x float>* - %wide.masked.load340.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %741, i32 4, <8 x i1> %736, <8 x float> undef), !tbaa !12, !alias.scope !69, !noalias !72 - %742 = extractelement <8 x i32> %735, i32 0 - %743 = add nsw i32 %mul.i.7, %742 - %744 = sext i32 %743 to i64 - %745 = getelementptr inbounds float, float* %2, i64 %744 - %746 = bitcast float* %745 to <8 x float>* - %wide.masked.load341.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %746, i32 4, <8 x i1> %736, <8 x float> undef), !tbaa !12, !alias.scope !72 - %747 = fsub <8 x float> %wide.masked.load341.1, %wide.masked.load340.1 - %748 = bitcast float* %745 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %747, <8 x float>* %748, i32 4, <8 x i1> %736), !tbaa !12, !alias.scope !72, !llvm.access.group !21 - %749 = getelementptr inbounds float, float* %1, i64 %739 - %750 = bitcast float* %749 to <8 x float>* - %wide.masked.load342.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %750, i32 4, <8 x i1> %736, <8 x float> undef), !tbaa !12, !alias.scope !74, !noalias !72 - %751 = fmul <8 x float> %broadcast.splat344, %wide.masked.load342.1 - %752 = fdiv <8 x float> %747, %751, !fpmath !26 - %753 = bitcast float* %745 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %752, <8 x float>* %753, i32 4, <8 x i1> %736), !tbaa !12, !alias.scope !72, !llvm.access.group !21 - %754 = or <8 x i64> %broadcast.splat337, - %755 = trunc <8 x i64> %754 to <8 x i32> - %756 = icmp sgt <8 x i32> %broadcast.splat339, %755 - %757 = extractelement <8 x i64> %754, i32 0 - %758 = shl i64 %757, 32 - %759 = ashr exact i64 %758, 32 - %760 = getelementptr inbounds float, float* %0, i64 %759 - %761 = bitcast float* %760 to <8 x float>* - %wide.masked.load340.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %761, i32 4, <8 x i1> %756, <8 x float> undef), !tbaa !12, !alias.scope !69, !noalias !72 - %762 = extractelement <8 x i32> %755, i32 0 - %763 = add nsw i32 %mul.i.7, %762 - %764 = sext i32 %763 to i64 - %765 = getelementptr inbounds float, float* %2, i64 %764 - %766 = bitcast float* %765 to <8 x float>* - %wide.masked.load341.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %766, i32 4, <8 x i1> %756, <8 x float> undef), !tbaa !12, !alias.scope !72 - %767 = fsub <8 x float> %wide.masked.load341.2, %wide.masked.load340.2 - %768 = bitcast float* %765 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %767, <8 x float>* %768, i32 4, <8 x i1> %756), !tbaa !12, !alias.scope !72, !llvm.access.group !21 - %769 = getelementptr inbounds float, float* %1, i64 %759 - %770 = bitcast float* %769 to <8 x float>* - %wide.masked.load342.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %770, i32 4, <8 x i1> %756, <8 x float> undef), !tbaa !12, !alias.scope !74, !noalias !72 - %771 = fmul <8 x float> %broadcast.splat344, %wide.masked.load342.2 - %772 = fdiv <8 x float> %767, %771, !fpmath !26 - %773 = bitcast float* %765 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %772, <8 x float>* %773, i32 4, <8 x i1> %756), !tbaa !12, !alias.scope !72, !llvm.access.group !21 - %774 = or <8 x i64> %broadcast.splat337, - %775 = trunc <8 x i64> %774 to <8 x i32> - %776 = icmp sgt <8 x i32> %broadcast.splat339, %775 - %777 = extractelement <8 x i64> %774, i32 0 - %778 = shl i64 %777, 32 - %779 = ashr exact i64 %778, 32 - %780 = getelementptr inbounds float, float* %0, i64 %779 - %781 = bitcast float* %780 to <8 x float>* - %wide.masked.load340.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %781, i32 4, <8 x i1> %776, <8 x float> undef), !tbaa !12, !alias.scope !69, !noalias !72 - %782 = extractelement <8 x i32> %775, i32 0 - %783 = add nsw i32 %mul.i.7, %782 - %784 = sext i32 %783 to i64 - %785 = getelementptr inbounds float, float* %2, i64 %784 - %786 = bitcast float* %785 to <8 x float>* - %wide.masked.load341.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %786, i32 4, <8 x i1> %776, <8 x float> undef), !tbaa !12, !alias.scope !72 - %787 = fsub <8 x float> %wide.masked.load341.3, %wide.masked.load340.3 - %788 = bitcast float* %785 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %787, <8 x float>* %788, i32 4, <8 x i1> %776), !tbaa !12, !alias.scope !72, !llvm.access.group !21 - %789 = getelementptr inbounds float, float* %1, i64 %779 - %790 = bitcast float* %789 to <8 x float>* - %wide.masked.load342.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %790, i32 4, <8 x i1> %776, <8 x float> undef), !tbaa !12, !alias.scope !74, !noalias !72 - %791 = fmul <8 x float> %broadcast.splat344, %wide.masked.load342.3 - %792 = fdiv <8 x float> %787, %791, !fpmath !26 - %793 = bitcast float* %785 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %792, <8 x float>* %793, i32 4, <8 x i1> %776), !tbaa !12, !alias.scope !72, !llvm.access.group !21 - br label %pregion_for_end.i.7 - -pregion_for_entry.entry.i.us.7: ; preds = %if.end.r_exit.i.us.7.1, %pregion_for_entry.entry.i.us.7.preheader - %_local_id_x.0.us.7 = phi i64 [ 0, %pregion_for_entry.entry.i.us.7.preheader ], [ %801, %if.end.r_exit.i.us.7.1 ] - %add1.i.i.us.7 = add nuw nsw i64 %_local_id_x.0.us.7, %mul.i.i - %conv.i.us.7 = trunc i64 %add1.i.i.us.7 to i32 - %cmp4.i.us.7 = icmp slt i32 %conv.i.us.7, %4 - br i1 %cmp4.i.us.7, label %if.then.i.us.7, label %if.end.r_exit.i.us.7 - -if.then.i.us.7: ; preds = %pregion_for_entry.entry.i.us.7 - %sext.i.us.7 = shl i64 %add1.i.i.us.7, 32 - %idxprom.i.us.7 = ashr exact i64 %sext.i.us.7, 32 - %arrayidx.i.us.7 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.7 - %794 = load float, float* %arrayidx.i.us.7, align 4, !tbaa !12 - %add.i.us.7 = add nsw i32 %mul.i.7, %conv.i.us.7 - %idxprom6.i.us.7 = sext i32 %add.i.us.7 to i64 - %arrayidx7.i.us.7 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.7 - %795 = load float, float* %arrayidx7.i.us.7, align 4, !tbaa !12 - %sub.i.us.7 = fsub float %795, %794 - store float %sub.i.us.7, float* %arrayidx7.i.us.7, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.us.7 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.7 - %796 = load float, float* %arrayidx10.i.us.7, align 4, !tbaa !12 - %mul11.i.us.7 = fmul float %10, %796 - %div.i.us.7 = fdiv float %sub.i.us.7, %mul11.i.us.7, !fpmath !26 - store float %div.i.us.7, float* %arrayidx7.i.us.7, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.7 - -if.end.r_exit.i.us.7: ; preds = %if.then.i.us.7, %pregion_for_entry.entry.i.us.7 - %797 = or i64 %_local_id_x.0.us.7, 1 - %add1.i.i.us.7.1 = add nuw nsw i64 %797, %mul.i.i - %conv.i.us.7.1 = trunc i64 %add1.i.i.us.7.1 to i32 - %cmp4.i.us.7.1 = icmp slt i32 %conv.i.us.7.1, %4 - br i1 %cmp4.i.us.7.1, label %if.then.i.us.7.1, label %if.end.r_exit.i.us.7.1 - -pregion_for_end.i.7.loopexit: ; preds = %if.end.r_exit.i.us.7.1 - br label %pregion_for_end.i.7 - -pregion_for_end.i.7: ; preds = %pregion_for_end.i.7.loopexit, %vector.ph329, %pregion_for_end.i.6 - ret void - -if.then.i.us.7.1: ; preds = %if.end.r_exit.i.us.7 - %sext.i.us.7.1 = shl i64 %add1.i.i.us.7.1, 32 - %idxprom.i.us.7.1 = ashr exact i64 %sext.i.us.7.1, 32 - %arrayidx.i.us.7.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.7.1 - %798 = load float, float* %arrayidx.i.us.7.1, align 4, !tbaa !12 - %add.i.us.7.1 = add nsw i32 %mul.i.7, %conv.i.us.7.1 - %idxprom6.i.us.7.1 = sext i32 %add.i.us.7.1 to i64 - %arrayidx7.i.us.7.1 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.7.1 - %799 = load float, float* %arrayidx7.i.us.7.1, align 4, !tbaa !12 - %sub.i.us.7.1 = fsub float %799, %798 - store float %sub.i.us.7.1, float* %arrayidx7.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.us.7.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.7.1 - %800 = load float, float* %arrayidx10.i.us.7.1, align 4, !tbaa !12 - %mul11.i.us.7.1 = fmul float %10, %800 - %div.i.us.7.1 = fdiv float %sub.i.us.7.1, %mul11.i.us.7.1, !fpmath !26 - store float %div.i.us.7.1, float* %arrayidx7.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.7.1 - -if.end.r_exit.i.us.7.1: ; preds = %if.then.i.us.7.1, %if.end.r_exit.i.us.7 - %801 = add nuw nsw i64 %_local_id_x.0.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %801, 32 - br i1 %exitcond.7.not.1, label %pregion_for_end.i.7.loopexit, label %pregion_for_entry.entry.i.us.7, !llvm.loop !76 - -if.then.i.us.6.1: ; preds = %if.end.r_exit.i.us.6 - %sext.i.us.6.1 = shl i64 %add1.i.i.us.6.1, 32 - %idxprom.i.us.6.1 = ashr exact i64 %sext.i.us.6.1, 32 - %arrayidx.i.us.6.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.6.1 - %802 = load float, float* %arrayidx.i.us.6.1, align 4, !tbaa !12 - %add.i.us.6.1 = add nsw i32 %mul.i.6, %conv.i.us.6.1 - %idxprom6.i.us.6.1 = sext i32 %add.i.us.6.1 to i64 - %arrayidx7.i.us.6.1 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.6.1 - %803 = load float, float* %arrayidx7.i.us.6.1, align 4, !tbaa !12 - %sub.i.us.6.1 = fsub float %803, %802 - store float %sub.i.us.6.1, float* %arrayidx7.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.us.6.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.6.1 - %804 = load float, float* %arrayidx10.i.us.6.1, align 4, !tbaa !12 - %mul11.i.us.6.1 = fmul float %10, %804 - %div.i.us.6.1 = fdiv float %sub.i.us.6.1, %mul11.i.us.6.1, !fpmath !26 - store float %div.i.us.6.1, float* %arrayidx7.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.6.1 - -if.end.r_exit.i.us.6.1: ; preds = %if.then.i.us.6.1, %if.end.r_exit.i.us.6 - %805 = add nuw nsw i64 %_local_id_x.0.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %805, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.6.loopexit, label %pregion_for_entry.entry.i.us.6, !llvm.loop !79 - -if.then.i.us.5.1: ; preds = %if.end.r_exit.i.us.5 - %sext.i.us.5.1 = shl i64 %add1.i.i.us.5.1, 32 - %idxprom.i.us.5.1 = ashr exact i64 %sext.i.us.5.1, 32 - %arrayidx.i.us.5.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.5.1 - %806 = load float, float* %arrayidx.i.us.5.1, align 4, !tbaa !12 - %add.i.us.5.1 = add nsw i32 %mul.i.5, %conv.i.us.5.1 - %idxprom6.i.us.5.1 = sext i32 %add.i.us.5.1 to i64 - %arrayidx7.i.us.5.1 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.5.1 - %807 = load float, float* %arrayidx7.i.us.5.1, align 4, !tbaa !12 - %sub.i.us.5.1 = fsub float %807, %806 - store float %sub.i.us.5.1, float* %arrayidx7.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.us.5.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.5.1 - %808 = load float, float* %arrayidx10.i.us.5.1, align 4, !tbaa !12 - %mul11.i.us.5.1 = fmul float %10, %808 - %div.i.us.5.1 = fdiv float %sub.i.us.5.1, %mul11.i.us.5.1, !fpmath !26 - store float %div.i.us.5.1, float* %arrayidx7.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.5.1 - -if.end.r_exit.i.us.5.1: ; preds = %if.then.i.us.5.1, %if.end.r_exit.i.us.5 - %809 = add nuw nsw i64 %_local_id_x.0.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %809, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.5.loopexit, label %pregion_for_entry.entry.i.us.5, !llvm.loop !80 - -if.then.i.us.4.1: ; preds = %if.end.r_exit.i.us.4 - %sext.i.us.4.1 = shl i64 %add1.i.i.us.4.1, 32 - %idxprom.i.us.4.1 = ashr exact i64 %sext.i.us.4.1, 32 - %arrayidx.i.us.4.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.4.1 - %810 = load float, float* %arrayidx.i.us.4.1, align 4, !tbaa !12 - %add.i.us.4.1 = add nsw i32 %mul.i.4, %conv.i.us.4.1 - %idxprom6.i.us.4.1 = sext i32 %add.i.us.4.1 to i64 - %arrayidx7.i.us.4.1 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.4.1 - %811 = load float, float* %arrayidx7.i.us.4.1, align 4, !tbaa !12 - %sub.i.us.4.1 = fsub float %811, %810 - store float %sub.i.us.4.1, float* %arrayidx7.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.us.4.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.4.1 - %812 = load float, float* %arrayidx10.i.us.4.1, align 4, !tbaa !12 - %mul11.i.us.4.1 = fmul float %10, %812 - %div.i.us.4.1 = fdiv float %sub.i.us.4.1, %mul11.i.us.4.1, !fpmath !26 - store float %div.i.us.4.1, float* %arrayidx7.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.4.1 - -if.end.r_exit.i.us.4.1: ; preds = %if.then.i.us.4.1, %if.end.r_exit.i.us.4 - %813 = add nuw nsw i64 %_local_id_x.0.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %813, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.4.loopexit, label %pregion_for_entry.entry.i.us.4, !llvm.loop !81 - -if.then.i.us.3.1: ; preds = %if.end.r_exit.i.us.3 - %sext.i.us.3.1 = shl i64 %add1.i.i.us.3.1, 32 - %idxprom.i.us.3.1 = ashr exact i64 %sext.i.us.3.1, 32 - %arrayidx.i.us.3.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.3.1 - %814 = load float, float* %arrayidx.i.us.3.1, align 4, !tbaa !12 - %add.i.us.3.1 = add nsw i32 %mul.i.3, %conv.i.us.3.1 - %idxprom6.i.us.3.1 = sext i32 %add.i.us.3.1 to i64 - %arrayidx7.i.us.3.1 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.3.1 - %815 = load float, float* %arrayidx7.i.us.3.1, align 4, !tbaa !12 - %sub.i.us.3.1 = fsub float %815, %814 - store float %sub.i.us.3.1, float* %arrayidx7.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.us.3.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.3.1 - %816 = load float, float* %arrayidx10.i.us.3.1, align 4, !tbaa !12 - %mul11.i.us.3.1 = fmul float %10, %816 - %div.i.us.3.1 = fdiv float %sub.i.us.3.1, %mul11.i.us.3.1, !fpmath !26 - store float %div.i.us.3.1, float* %arrayidx7.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.3.1 - -if.end.r_exit.i.us.3.1: ; preds = %if.then.i.us.3.1, %if.end.r_exit.i.us.3 - %817 = add nuw nsw i64 %_local_id_x.0.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %817, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.3.loopexit, label %pregion_for_entry.entry.i.us.3, !llvm.loop !82 - -if.then.i.us.2.1: ; preds = %if.end.r_exit.i.us.2 - %sext.i.us.2.1 = shl i64 %add1.i.i.us.2.1, 32 - %idxprom.i.us.2.1 = ashr exact i64 %sext.i.us.2.1, 32 - %arrayidx.i.us.2.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.2.1 - %818 = load float, float* %arrayidx.i.us.2.1, align 4, !tbaa !12 - %add.i.us.2.1 = add nsw i32 %mul.i.2, %conv.i.us.2.1 - %idxprom6.i.us.2.1 = sext i32 %add.i.us.2.1 to i64 - %arrayidx7.i.us.2.1 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.2.1 - %819 = load float, float* %arrayidx7.i.us.2.1, align 4, !tbaa !12 - %sub.i.us.2.1 = fsub float %819, %818 - store float %sub.i.us.2.1, float* %arrayidx7.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.us.2.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.2.1 - %820 = load float, float* %arrayidx10.i.us.2.1, align 4, !tbaa !12 - %mul11.i.us.2.1 = fmul float %10, %820 - %div.i.us.2.1 = fdiv float %sub.i.us.2.1, %mul11.i.us.2.1, !fpmath !26 - store float %div.i.us.2.1, float* %arrayidx7.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.2.1 - -if.end.r_exit.i.us.2.1: ; preds = %if.then.i.us.2.1, %if.end.r_exit.i.us.2 - %821 = add nuw nsw i64 %_local_id_x.0.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %821, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.2.loopexit, label %pregion_for_entry.entry.i.us.2, !llvm.loop !83 - -if.then.i.us.1.1: ; preds = %if.end.r_exit.i.us.1 - %sext.i.us.1.1 = shl i64 %add1.i.i.us.1.1, 32 - %idxprom.i.us.1.1 = ashr exact i64 %sext.i.us.1.1, 32 - %arrayidx.i.us.1.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.1.1 - %822 = load float, float* %arrayidx.i.us.1.1, align 4, !tbaa !12 - %add.i.us.1.1 = add nsw i32 %mul.i.1, %conv.i.us.1.1 - %idxprom6.i.us.1.1 = sext i32 %add.i.us.1.1 to i64 - %arrayidx7.i.us.1.1 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.1.1 - %823 = load float, float* %arrayidx7.i.us.1.1, align 4, !tbaa !12 - %sub.i.us.1.1 = fsub float %823, %822 - store float %sub.i.us.1.1, float* %arrayidx7.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.us.1.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.1.1 - %824 = load float, float* %arrayidx10.i.us.1.1, align 4, !tbaa !12 - %mul11.i.us.1.1 = fmul float %10, %824 - %div.i.us.1.1 = fdiv float %sub.i.us.1.1, %mul11.i.us.1.1, !fpmath !26 - store float %div.i.us.1.1, float* %arrayidx7.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.1.1 - -if.end.r_exit.i.us.1.1: ; preds = %if.then.i.us.1.1, %if.end.r_exit.i.us.1 - %825 = add nuw nsw i64 %_local_id_x.0.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %825, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.1.loopexit, label %pregion_for_entry.entry.i.us.1, !llvm.loop !84 - -if.then.i.us.1368: ; preds = %if.end.r_exit.i.us - %sext.i.us.1358 = shl i64 %add1.i.i.us.1354, 32 - %idxprom.i.us.1359 = ashr exact i64 %sext.i.us.1358, 32 - %arrayidx.i.us.1360 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.1359 - %826 = load float, float* %arrayidx.i.us.1360, align 4, !tbaa !12 - %add.i.us.1361 = add nsw i32 %mul.i, %conv.i.us.1355 - %idxprom6.i.us.1362 = sext i32 %add.i.us.1361 to i64 - %arrayidx7.i.us.1363 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.1362 - %827 = load float, float* %arrayidx7.i.us.1363, align 4, !tbaa !12 - %sub.i.us.1364 = fsub float %827, %826 - store float %sub.i.us.1364, float* %arrayidx7.i.us.1363, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.us.1365 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.1359 - %828 = load float, float* %arrayidx10.i.us.1365, align 4, !tbaa !12 - %mul11.i.us.1366 = fmul float %10, %828 - %div.i.us.1367 = fdiv float %sub.i.us.1364, %mul11.i.us.1366, !fpmath !26 - store float %div.i.us.1367, float* %arrayidx7.i.us.1363, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.1369 - -if.end.r_exit.i.us.1369: ; preds = %if.then.i.us.1368, %if.end.r_exit.i.us - %829 = add nuw nsw i64 %_local_id_x.0.us, 2 - %exitcond.not.1 = icmp eq i64 %829, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !85 -} - -; Function Attrs: nofree nounwind -define void @_pocl_kernel_reduce_kernel_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { -pregion_for_entry.pregion_for_init.i.i: - %5 = bitcast i8** %0 to float*** - %6 = load float**, float*** %5, align 8 - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float*** - %10 = load float**, float*** %9, align 8 - %11 = load float*, float** %10, align 8 - %12 = getelementptr i8*, i8** %0, i64 2 - %13 = bitcast i8** %12 to float*** - %14 = load float**, float*** %13, align 8 - %15 = load float*, float** %14, align 8 - %16 = getelementptr i8*, i8** %0, i64 3 - %17 = bitcast i8** %16 to float** - %18 = load float*, float** %17, align 8 - %19 = load float, float* %18, align 4 - %20 = getelementptr i8*, i8** %0, i64 4 - %21 = bitcast i8** %20 to i32** - %22 = load i32*, i32** %21, align 8 - %23 = load i32, i32* %22, align 4 - %24 = getelementptr i8*, i8** %0, i64 5 - %25 = bitcast i8** %24 to i32** - %26 = load i32*, i32** %25, align 8 - %27 = load i32, i32* %26, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %28 = tail call float @llvm.sqrt.f32(float %19) #5 - %conv2.i.i = trunc i64 %mul3.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %27, %conv2.i.i - %mul.i.i = mul nsw i32 %23, %conv2.i.i - br i1 %cmp.i.i, label %vector.scevcheck, label %pregion_for_end.i.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.i - %29 = trunc i64 %3 to i32 - %30 = mul i32 %23, %29 - %31 = shl i32 %30, 3 - %32 = trunc i64 %2 to i32 - %33 = shl i32 %32, 5 - %34 = add i32 %31, %33 - %35 = icmp sgt i32 %34, 2147483616 - br i1 %35, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i.us - -vector.memcheck: ; preds = %vector.scevcheck - %36 = trunc i64 %2 to i32 - %37 = shl i32 %36, 5 - %38 = sext i32 %37 to i64 - %scevgep = getelementptr float, float* %7, i64 %38 - %39 = add nsw i64 %38, 32 - %scevgep4 = getelementptr float, float* %7, i64 %39 - %40 = trunc i64 %3 to i32 - %41 = mul i32 %23, %40 - %42 = shl i32 %41, 3 - %43 = add i32 %42, %37 - %44 = sext i32 %43 to i64 - %scevgep6 = getelementptr float, float* %15, i64 %44 - %45 = add nsw i64 %44, 32 - %scevgep8 = getelementptr float, float* %15, i64 %45 - %scevgep10 = getelementptr float, float* %11, i64 %38 - %scevgep12 = getelementptr float, float* %11, i64 %39 - %bound0 = icmp ult float* %scevgep, %scevgep8 - %bound1 = icmp ult float* %scevgep6, %scevgep4 - %found.conflict = and i1 %bound0, %bound1 - %bound014 = icmp ult float* %scevgep10, %scevgep8 - %bound115 = icmp ult float* %scevgep6, %scevgep12 - %found.conflict16 = and i1 %bound014, %bound115 - %conflict.rdx = or i1 %found.conflict, %found.conflict16 - br i1 %conflict.rdx, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert17 = insertelement <8 x i32> undef, i32 %23, i32 0 - %broadcast.splat18 = shufflevector <8 x i32> %broadcast.splatinsert17, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert21 = insertelement <8 x float> undef, float %28, i32 0 - %broadcast.splat22 = shufflevector <8 x float> %broadcast.splatinsert21, <8 x float> undef, <8 x i32> zeroinitializer - %46 = or <8 x i64> %broadcast.splat, - %47 = trunc <8 x i64> %46 to <8 x i32> - %48 = icmp sgt <8 x i32> %broadcast.splat18, %47 - %49 = extractelement <8 x i64> %46, i32 0 - %50 = shl i64 %49, 32 - %51 = ashr exact i64 %50, 32 - %52 = getelementptr inbounds float, float* %7, i64 %51 - %53 = bitcast float* %52 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %53, i32 4, <8 x i1> %48, <8 x float> undef), !tbaa !12, !alias.scope !86, !noalias !89 - %54 = extractelement <8 x i32> %47, i32 0 - %55 = add nsw i32 %mul.i.i, %54 - %56 = sext i32 %55 to i64 - %57 = getelementptr inbounds float, float* %15, i64 %56 - %58 = bitcast float* %57 to <8 x float>* - %wide.masked.load19 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %58, i32 4, <8 x i1> %48, <8 x float> undef), !tbaa !12, !alias.scope !89 - %59 = fsub <8 x float> %wide.masked.load19, %wide.masked.load - %60 = bitcast float* %57 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %59, <8 x float>* %60, i32 4, <8 x i1> %48), !tbaa !12, !alias.scope !89, !llvm.access.group !21 - %61 = getelementptr inbounds float, float* %11, i64 %51 - %62 = bitcast float* %61 to <8 x float>* - %wide.masked.load20 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %62, i32 4, <8 x i1> %48, <8 x float> undef), !tbaa !12, !alias.scope !91, !noalias !89 - %63 = fmul <8 x float> %broadcast.splat22, %wide.masked.load20 - %64 = fdiv <8 x float> %59, %63, !fpmath !26 - %65 = bitcast float* %57 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %64, <8 x float>* %65, i32 4, <8 x i1> %48), !tbaa !12, !alias.scope !89, !llvm.access.group !21 - %66 = or <8 x i64> %broadcast.splat, - %67 = trunc <8 x i64> %66 to <8 x i32> - %68 = icmp sgt <8 x i32> %broadcast.splat18, %67 - %69 = extractelement <8 x i64> %66, i32 0 - %70 = shl i64 %69, 32 - %71 = ashr exact i64 %70, 32 - %72 = getelementptr inbounds float, float* %7, i64 %71 - %73 = bitcast float* %72 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %73, i32 4, <8 x i1> %68, <8 x float> undef), !tbaa !12, !alias.scope !86, !noalias !89 - %74 = extractelement <8 x i32> %67, i32 0 - %75 = add nsw i32 %mul.i.i, %74 - %76 = sext i32 %75 to i64 - %77 = getelementptr inbounds float, float* %15, i64 %76 - %78 = bitcast float* %77 to <8 x float>* - %wide.masked.load19.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %78, i32 4, <8 x i1> %68, <8 x float> undef), !tbaa !12, !alias.scope !89 - %79 = fsub <8 x float> %wide.masked.load19.1, %wide.masked.load.1 - %80 = bitcast float* %77 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %79, <8 x float>* %80, i32 4, <8 x i1> %68), !tbaa !12, !alias.scope !89, !llvm.access.group !21 - %81 = getelementptr inbounds float, float* %11, i64 %71 - %82 = bitcast float* %81 to <8 x float>* - %wide.masked.load20.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %82, i32 4, <8 x i1> %68, <8 x float> undef), !tbaa !12, !alias.scope !91, !noalias !89 - %83 = fmul <8 x float> %broadcast.splat22, %wide.masked.load20.1 - %84 = fdiv <8 x float> %79, %83, !fpmath !26 - %85 = bitcast float* %77 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %84, <8 x float>* %85, i32 4, <8 x i1> %68), !tbaa !12, !alias.scope !89, !llvm.access.group !21 - %86 = or <8 x i64> %broadcast.splat, - %87 = trunc <8 x i64> %86 to <8 x i32> - %88 = icmp sgt <8 x i32> %broadcast.splat18, %87 - %89 = extractelement <8 x i64> %86, i32 0 - %90 = shl i64 %89, 32 - %91 = ashr exact i64 %90, 32 - %92 = getelementptr inbounds float, float* %7, i64 %91 - %93 = bitcast float* %92 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %93, i32 4, <8 x i1> %88, <8 x float> undef), !tbaa !12, !alias.scope !86, !noalias !89 - %94 = extractelement <8 x i32> %87, i32 0 - %95 = add nsw i32 %mul.i.i, %94 - %96 = sext i32 %95 to i64 - %97 = getelementptr inbounds float, float* %15, i64 %96 - %98 = bitcast float* %97 to <8 x float>* - %wide.masked.load19.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %98, i32 4, <8 x i1> %88, <8 x float> undef), !tbaa !12, !alias.scope !89 - %99 = fsub <8 x float> %wide.masked.load19.2, %wide.masked.load.2 - %100 = bitcast float* %97 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %99, <8 x float>* %100, i32 4, <8 x i1> %88), !tbaa !12, !alias.scope !89, !llvm.access.group !21 - %101 = getelementptr inbounds float, float* %11, i64 %91 - %102 = bitcast float* %101 to <8 x float>* - %wide.masked.load20.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %102, i32 4, <8 x i1> %88, <8 x float> undef), !tbaa !12, !alias.scope !91, !noalias !89 - %103 = fmul <8 x float> %broadcast.splat22, %wide.masked.load20.2 - %104 = fdiv <8 x float> %99, %103, !fpmath !26 - %105 = bitcast float* %97 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %104, <8 x float>* %105, i32 4, <8 x i1> %88), !tbaa !12, !alias.scope !89, !llvm.access.group !21 - %106 = or <8 x i64> %broadcast.splat, - %107 = trunc <8 x i64> %106 to <8 x i32> - %108 = icmp sgt <8 x i32> %broadcast.splat18, %107 - %109 = extractelement <8 x i64> %106, i32 0 - %110 = shl i64 %109, 32 - %111 = ashr exact i64 %110, 32 - %112 = getelementptr inbounds float, float* %7, i64 %111 - %113 = bitcast float* %112 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %113, i32 4, <8 x i1> %108, <8 x float> undef), !tbaa !12, !alias.scope !86, !noalias !89 - %114 = extractelement <8 x i32> %107, i32 0 - %115 = add nsw i32 %mul.i.i, %114 - %116 = sext i32 %115 to i64 - %117 = getelementptr inbounds float, float* %15, i64 %116 - %118 = bitcast float* %117 to <8 x float>* - %wide.masked.load19.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %118, i32 4, <8 x i1> %108, <8 x float> undef), !tbaa !12, !alias.scope !89 - %119 = fsub <8 x float> %wide.masked.load19.3, %wide.masked.load.3 - %120 = bitcast float* %117 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %119, <8 x float>* %120, i32 4, <8 x i1> %108), !tbaa !12, !alias.scope !89, !llvm.access.group !21 - %121 = getelementptr inbounds float, float* %11, i64 %111 - %122 = bitcast float* %121 to <8 x float>* - %wide.masked.load20.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %122, i32 4, <8 x i1> %108, <8 x float> undef), !tbaa !12, !alias.scope !91, !noalias !89 - %123 = fmul <8 x float> %broadcast.splat22, %wide.masked.load20.3 - %124 = fdiv <8 x float> %119, %123, !fpmath !26 - %125 = bitcast float* %117 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %124, <8 x float>* %125, i32 4, <8 x i1> %108), !tbaa !12, !alias.scope !89, !llvm.access.group !21 - br label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us.1369, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.preheader ], [ %847, %if.end.r_exit.i.i.us.1369 ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp4.i.i.us = icmp sgt i32 %23, %conv.i.i.us - br i1 %cmp4.i.i.us, label %if.then.i.i.us, label %if.end.r_exit.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom.i.i.us = ashr exact i64 %sext.i.i.us, 32 - %arrayidx.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us - %126 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %add.i.i.us = add nsw i32 %mul.i.i, %conv.i.i.us - %idxprom6.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx7.i.i.us = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us - %127 = load float, float* %arrayidx7.i.i.us, align 4, !tbaa !12 - %sub.i.i.us = fsub float %127, %126 - store float %sub.i.i.us, float* %arrayidx7.i.i.us, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us - %128 = load float, float* %arrayidx10.i.i.us, align 4, !tbaa !12 - %mul11.i.i.us = fmul float %28, %128 - %div.i.i.us = fdiv float %sub.i.i.us, %mul11.i.i.us, !fpmath !26 - store float %div.i.i.us, float* %arrayidx7.i.i.us, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us - %129 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1354 = add nuw nsw i64 %129, %mul.i.i.i - %conv.i.i.us.1355 = trunc i64 %add1.i.i.i.us.1354 to i32 - %cmp4.i.i.us.1356 = icmp sgt i32 %23, %conv.i.i.us.1355 - br i1 %cmp4.i.i.us.1356, label %if.then.i.i.us.1368, label %if.end.r_exit.i.i.us.1369 - -pregion_for_end.i.i.loopexit: ; preds = %if.end.r_exit.i.i.us.1369 - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i.i - %130 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.1 = or i32 %130, 1 - %cmp.i.i.1 = icmp sgt i32 %27, %conv2.i.i.1 - %mul.i.i.1 = mul nsw i32 %23, %conv2.i.i.1 - br i1 %cmp.i.i.1, label %vector.scevcheck30, label %pregion_for_end.i.i.1 - -vector.scevcheck30: ; preds = %pregion_for_end.i.i - %131 = mul i32 %23, %conv2.i.i.1 - %132 = trunc i64 %2 to i32 - %133 = shl i32 %132, 5 - %134 = add i32 %131, %133 - %135 = icmp sgt i32 %134, 2147483616 - br i1 %135, label %pregion_for_entry.entry.i.i.us.1.preheader, label %vector.memcheck52 - -pregion_for_entry.entry.i.i.us.1.preheader: ; preds = %vector.memcheck52, %vector.scevcheck30 - br label %pregion_for_entry.entry.i.i.us.1 - -vector.memcheck52: ; preds = %vector.scevcheck30 - %136 = trunc i64 %2 to i32 - %137 = shl i32 %136, 5 - %138 = sext i32 %137 to i64 - %scevgep32 = getelementptr float, float* %7, i64 %138 - %139 = add nsw i64 %138, 32 - %scevgep34 = getelementptr float, float* %7, i64 %139 - %140 = mul i32 %23, %conv2.i.i.1 - %141 = add i32 %140, %137 - %142 = sext i32 %141 to i64 - %scevgep36 = getelementptr float, float* %15, i64 %142 - %143 = add nsw i64 %142, 32 - %scevgep38 = getelementptr float, float* %15, i64 %143 - %scevgep40 = getelementptr float, float* %11, i64 %138 - %scevgep42 = getelementptr float, float* %11, i64 %139 - %bound044 = icmp ult float* %scevgep32, %scevgep38 - %bound145 = icmp ult float* %scevgep36, %scevgep34 - %found.conflict46 = and i1 %bound044, %bound145 - %bound047 = icmp ult float* %scevgep40, %scevgep38 - %bound148 = icmp ult float* %scevgep36, %scevgep42 - %found.conflict49 = and i1 %bound047, %bound148 - %conflict.rdx50 = or i1 %found.conflict46, %found.conflict49 - br i1 %conflict.rdx50, label %pregion_for_entry.entry.i.i.us.1.preheader, label %vector.ph53 - -vector.ph53: ; preds = %vector.memcheck52 - %broadcast.splatinsert60 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat61 = shufflevector <8 x i64> %broadcast.splatinsert60, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert62 = insertelement <8 x i32> undef, i32 %23, i32 0 - %broadcast.splat63 = shufflevector <8 x i32> %broadcast.splatinsert62, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert67 = insertelement <8 x float> undef, float %28, i32 0 - %broadcast.splat68 = shufflevector <8 x float> %broadcast.splatinsert67, <8 x float> undef, <8 x i32> zeroinitializer - %144 = or <8 x i64> %broadcast.splat61, - %145 = trunc <8 x i64> %144 to <8 x i32> - %146 = icmp sgt <8 x i32> %broadcast.splat63, %145 - %147 = extractelement <8 x i64> %144, i32 0 - %148 = shl i64 %147, 32 - %149 = ashr exact i64 %148, 32 - %150 = getelementptr inbounds float, float* %7, i64 %149 - %151 = bitcast float* %150 to <8 x float>* - %wide.masked.load64 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %151, i32 4, <8 x i1> %146, <8 x float> undef), !tbaa !12, !alias.scope !93, !noalias !96 - %152 = extractelement <8 x i32> %145, i32 0 - %153 = add nsw i32 %mul.i.i.1, %152 - %154 = sext i32 %153 to i64 - %155 = getelementptr inbounds float, float* %15, i64 %154 - %156 = bitcast float* %155 to <8 x float>* - %wide.masked.load65 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %156, i32 4, <8 x i1> %146, <8 x float> undef), !tbaa !12, !alias.scope !96 - %157 = fsub <8 x float> %wide.masked.load65, %wide.masked.load64 - %158 = bitcast float* %155 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %157, <8 x float>* %158, i32 4, <8 x i1> %146), !tbaa !12, !alias.scope !96, !llvm.access.group !21 - %159 = getelementptr inbounds float, float* %11, i64 %149 - %160 = bitcast float* %159 to <8 x float>* - %wide.masked.load66 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %160, i32 4, <8 x i1> %146, <8 x float> undef), !tbaa !12, !alias.scope !98, !noalias !96 - %161 = fmul <8 x float> %broadcast.splat68, %wide.masked.load66 - %162 = fdiv <8 x float> %157, %161, !fpmath !26 - %163 = bitcast float* %155 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %162, <8 x float>* %163, i32 4, <8 x i1> %146), !tbaa !12, !alias.scope !96, !llvm.access.group !21 - %164 = or <8 x i64> %broadcast.splat61, - %165 = trunc <8 x i64> %164 to <8 x i32> - %166 = icmp sgt <8 x i32> %broadcast.splat63, %165 - %167 = extractelement <8 x i64> %164, i32 0 - %168 = shl i64 %167, 32 - %169 = ashr exact i64 %168, 32 - %170 = getelementptr inbounds float, float* %7, i64 %169 - %171 = bitcast float* %170 to <8 x float>* - %wide.masked.load64.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %171, i32 4, <8 x i1> %166, <8 x float> undef), !tbaa !12, !alias.scope !93, !noalias !96 - %172 = extractelement <8 x i32> %165, i32 0 - %173 = add nsw i32 %mul.i.i.1, %172 - %174 = sext i32 %173 to i64 - %175 = getelementptr inbounds float, float* %15, i64 %174 - %176 = bitcast float* %175 to <8 x float>* - %wide.masked.load65.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %176, i32 4, <8 x i1> %166, <8 x float> undef), !tbaa !12, !alias.scope !96 - %177 = fsub <8 x float> %wide.masked.load65.1, %wide.masked.load64.1 - %178 = bitcast float* %175 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %177, <8 x float>* %178, i32 4, <8 x i1> %166), !tbaa !12, !alias.scope !96, !llvm.access.group !21 - %179 = getelementptr inbounds float, float* %11, i64 %169 - %180 = bitcast float* %179 to <8 x float>* - %wide.masked.load66.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %180, i32 4, <8 x i1> %166, <8 x float> undef), !tbaa !12, !alias.scope !98, !noalias !96 - %181 = fmul <8 x float> %broadcast.splat68, %wide.masked.load66.1 - %182 = fdiv <8 x float> %177, %181, !fpmath !26 - %183 = bitcast float* %175 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %182, <8 x float>* %183, i32 4, <8 x i1> %166), !tbaa !12, !alias.scope !96, !llvm.access.group !21 - %184 = or <8 x i64> %broadcast.splat61, - %185 = trunc <8 x i64> %184 to <8 x i32> - %186 = icmp sgt <8 x i32> %broadcast.splat63, %185 - %187 = extractelement <8 x i64> %184, i32 0 - %188 = shl i64 %187, 32 - %189 = ashr exact i64 %188, 32 - %190 = getelementptr inbounds float, float* %7, i64 %189 - %191 = bitcast float* %190 to <8 x float>* - %wide.masked.load64.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %191, i32 4, <8 x i1> %186, <8 x float> undef), !tbaa !12, !alias.scope !93, !noalias !96 - %192 = extractelement <8 x i32> %185, i32 0 - %193 = add nsw i32 %mul.i.i.1, %192 - %194 = sext i32 %193 to i64 - %195 = getelementptr inbounds float, float* %15, i64 %194 - %196 = bitcast float* %195 to <8 x float>* - %wide.masked.load65.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %196, i32 4, <8 x i1> %186, <8 x float> undef), !tbaa !12, !alias.scope !96 - %197 = fsub <8 x float> %wide.masked.load65.2, %wide.masked.load64.2 - %198 = bitcast float* %195 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %197, <8 x float>* %198, i32 4, <8 x i1> %186), !tbaa !12, !alias.scope !96, !llvm.access.group !21 - %199 = getelementptr inbounds float, float* %11, i64 %189 - %200 = bitcast float* %199 to <8 x float>* - %wide.masked.load66.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %200, i32 4, <8 x i1> %186, <8 x float> undef), !tbaa !12, !alias.scope !98, !noalias !96 - %201 = fmul <8 x float> %broadcast.splat68, %wide.masked.load66.2 - %202 = fdiv <8 x float> %197, %201, !fpmath !26 - %203 = bitcast float* %195 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %202, <8 x float>* %203, i32 4, <8 x i1> %186), !tbaa !12, !alias.scope !96, !llvm.access.group !21 - %204 = or <8 x i64> %broadcast.splat61, - %205 = trunc <8 x i64> %204 to <8 x i32> - %206 = icmp sgt <8 x i32> %broadcast.splat63, %205 - %207 = extractelement <8 x i64> %204, i32 0 - %208 = shl i64 %207, 32 - %209 = ashr exact i64 %208, 32 - %210 = getelementptr inbounds float, float* %7, i64 %209 - %211 = bitcast float* %210 to <8 x float>* - %wide.masked.load64.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %211, i32 4, <8 x i1> %206, <8 x float> undef), !tbaa !12, !alias.scope !93, !noalias !96 - %212 = extractelement <8 x i32> %205, i32 0 - %213 = add nsw i32 %mul.i.i.1, %212 - %214 = sext i32 %213 to i64 - %215 = getelementptr inbounds float, float* %15, i64 %214 - %216 = bitcast float* %215 to <8 x float>* - %wide.masked.load65.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %216, i32 4, <8 x i1> %206, <8 x float> undef), !tbaa !12, !alias.scope !96 - %217 = fsub <8 x float> %wide.masked.load65.3, %wide.masked.load64.3 - %218 = bitcast float* %215 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %217, <8 x float>* %218, i32 4, <8 x i1> %206), !tbaa !12, !alias.scope !96, !llvm.access.group !21 - %219 = getelementptr inbounds float, float* %11, i64 %209 - %220 = bitcast float* %219 to <8 x float>* - %wide.masked.load66.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %220, i32 4, <8 x i1> %206, <8 x float> undef), !tbaa !12, !alias.scope !98, !noalias !96 - %221 = fmul <8 x float> %broadcast.splat68, %wide.masked.load66.3 - %222 = fdiv <8 x float> %217, %221, !fpmath !26 - %223 = bitcast float* %215 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %222, <8 x float>* %223, i32 4, <8 x i1> %206), !tbaa !12, !alias.scope !96, !llvm.access.group !21 - br label %pregion_for_end.i.i.1 - -pregion_for_entry.entry.i.i.us.1: ; preds = %if.end.r_exit.i.i.us.1.1, %pregion_for_entry.entry.i.i.us.1.preheader - %_local_id_x.i.0.us.1 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.1.preheader ], [ %843, %if.end.r_exit.i.i.us.1.1 ] - %add1.i.i.i.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.1, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp4.i.i.us.1 = icmp sgt i32 %23, %conv.i.i.us.1 - br i1 %cmp4.i.i.us.1, label %if.then.i.i.us.1, label %if.end.r_exit.i.i.us.1 - -if.then.i.i.us.1: ; preds = %pregion_for_entry.entry.i.i.us.1 - %sext.i.i.us.1 = shl i64 %add1.i.i.i.us.1, 32 - %idxprom.i.i.us.1 = ashr exact i64 %sext.i.i.us.1, 32 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.1 - %224 = load float, float* %arrayidx.i.i.us.1, align 4, !tbaa !12 - %add.i.i.us.1 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1 - %idxprom6.i.i.us.1 = sext i32 %add.i.i.us.1 to i64 - %arrayidx7.i.i.us.1 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.1 - %225 = load float, float* %arrayidx7.i.i.us.1, align 4, !tbaa !12 - %sub.i.i.us.1 = fsub float %225, %224 - store float %sub.i.i.us.1, float* %arrayidx7.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.1 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.1 - %226 = load float, float* %arrayidx10.i.i.us.1, align 4, !tbaa !12 - %mul11.i.i.us.1 = fmul float %28, %226 - %div.i.i.us.1 = fdiv float %sub.i.i.us.1, %mul11.i.i.us.1, !fpmath !26 - store float %div.i.i.us.1, float* %arrayidx7.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.1 - -if.end.r_exit.i.i.us.1: ; preds = %if.then.i.i.us.1, %pregion_for_entry.entry.i.i.us.1 - %227 = or i64 %_local_id_x.i.0.us.1, 1 - %add1.i.i.i.us.1.1 = add nuw nsw i64 %227, %mul.i.i.i - %conv.i.i.us.1.1 = trunc i64 %add1.i.i.i.us.1.1 to i32 - %cmp4.i.i.us.1.1 = icmp sgt i32 %23, %conv.i.i.us.1.1 - br i1 %cmp4.i.i.us.1.1, label %if.then.i.i.us.1.1, label %if.end.r_exit.i.i.us.1.1 - -pregion_for_end.i.i.1.loopexit: ; preds = %if.end.r_exit.i.i.us.1.1 - br label %pregion_for_end.i.i.1 - -pregion_for_end.i.i.1: ; preds = %pregion_for_end.i.i.1.loopexit, %vector.ph53, %pregion_for_end.i.i - %228 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.2 = or i32 %228, 2 - %cmp.i.i.2 = icmp sgt i32 %27, %conv2.i.i.2 - %mul.i.i.2 = mul nsw i32 %23, %conv2.i.i.2 - br i1 %cmp.i.i.2, label %vector.scevcheck76, label %pregion_for_end.i.i.2 - -vector.scevcheck76: ; preds = %pregion_for_end.i.i.1 - %229 = mul i32 %23, %conv2.i.i.2 - %230 = trunc i64 %2 to i32 - %231 = shl i32 %230, 5 - %232 = add i32 %229, %231 - %233 = icmp sgt i32 %232, 2147483616 - br i1 %233, label %pregion_for_entry.entry.i.i.us.2.preheader, label %vector.memcheck98 - -pregion_for_entry.entry.i.i.us.2.preheader: ; preds = %vector.memcheck98, %vector.scevcheck76 - br label %pregion_for_entry.entry.i.i.us.2 - -vector.memcheck98: ; preds = %vector.scevcheck76 - %234 = trunc i64 %2 to i32 - %235 = shl i32 %234, 5 - %236 = sext i32 %235 to i64 - %scevgep78 = getelementptr float, float* %7, i64 %236 - %237 = add nsw i64 %236, 32 - %scevgep80 = getelementptr float, float* %7, i64 %237 - %238 = mul i32 %23, %conv2.i.i.2 - %239 = add i32 %238, %235 - %240 = sext i32 %239 to i64 - %scevgep82 = getelementptr float, float* %15, i64 %240 - %241 = add nsw i64 %240, 32 - %scevgep84 = getelementptr float, float* %15, i64 %241 - %scevgep86 = getelementptr float, float* %11, i64 %236 - %scevgep88 = getelementptr float, float* %11, i64 %237 - %bound090 = icmp ult float* %scevgep78, %scevgep84 - %bound191 = icmp ult float* %scevgep82, %scevgep80 - %found.conflict92 = and i1 %bound090, %bound191 - %bound093 = icmp ult float* %scevgep86, %scevgep84 - %bound194 = icmp ult float* %scevgep82, %scevgep88 - %found.conflict95 = and i1 %bound093, %bound194 - %conflict.rdx96 = or i1 %found.conflict92, %found.conflict95 - br i1 %conflict.rdx96, label %pregion_for_entry.entry.i.i.us.2.preheader, label %vector.ph99 - -vector.ph99: ; preds = %vector.memcheck98 - %broadcast.splatinsert106 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat107 = shufflevector <8 x i64> %broadcast.splatinsert106, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert108 = insertelement <8 x i32> undef, i32 %23, i32 0 - %broadcast.splat109 = shufflevector <8 x i32> %broadcast.splatinsert108, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert113 = insertelement <8 x float> undef, float %28, i32 0 - %broadcast.splat114 = shufflevector <8 x float> %broadcast.splatinsert113, <8 x float> undef, <8 x i32> zeroinitializer - %242 = or <8 x i64> %broadcast.splat107, - %243 = trunc <8 x i64> %242 to <8 x i32> - %244 = icmp sgt <8 x i32> %broadcast.splat109, %243 - %245 = extractelement <8 x i64> %242, i32 0 - %246 = shl i64 %245, 32 - %247 = ashr exact i64 %246, 32 - %248 = getelementptr inbounds float, float* %7, i64 %247 - %249 = bitcast float* %248 to <8 x float>* - %wide.masked.load110 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %249, i32 4, <8 x i1> %244, <8 x float> undef), !tbaa !12, !alias.scope !100, !noalias !103 - %250 = extractelement <8 x i32> %243, i32 0 - %251 = add nsw i32 %mul.i.i.2, %250 - %252 = sext i32 %251 to i64 - %253 = getelementptr inbounds float, float* %15, i64 %252 - %254 = bitcast float* %253 to <8 x float>* - %wide.masked.load111 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %254, i32 4, <8 x i1> %244, <8 x float> undef), !tbaa !12, !alias.scope !103 - %255 = fsub <8 x float> %wide.masked.load111, %wide.masked.load110 - %256 = bitcast float* %253 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %255, <8 x float>* %256, i32 4, <8 x i1> %244), !tbaa !12, !alias.scope !103, !llvm.access.group !21 - %257 = getelementptr inbounds float, float* %11, i64 %247 - %258 = bitcast float* %257 to <8 x float>* - %wide.masked.load112 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %258, i32 4, <8 x i1> %244, <8 x float> undef), !tbaa !12, !alias.scope !105, !noalias !103 - %259 = fmul <8 x float> %broadcast.splat114, %wide.masked.load112 - %260 = fdiv <8 x float> %255, %259, !fpmath !26 - %261 = bitcast float* %253 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %260, <8 x float>* %261, i32 4, <8 x i1> %244), !tbaa !12, !alias.scope !103, !llvm.access.group !21 - %262 = or <8 x i64> %broadcast.splat107, - %263 = trunc <8 x i64> %262 to <8 x i32> - %264 = icmp sgt <8 x i32> %broadcast.splat109, %263 - %265 = extractelement <8 x i64> %262, i32 0 - %266 = shl i64 %265, 32 - %267 = ashr exact i64 %266, 32 - %268 = getelementptr inbounds float, float* %7, i64 %267 - %269 = bitcast float* %268 to <8 x float>* - %wide.masked.load110.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %269, i32 4, <8 x i1> %264, <8 x float> undef), !tbaa !12, !alias.scope !100, !noalias !103 - %270 = extractelement <8 x i32> %263, i32 0 - %271 = add nsw i32 %mul.i.i.2, %270 - %272 = sext i32 %271 to i64 - %273 = getelementptr inbounds float, float* %15, i64 %272 - %274 = bitcast float* %273 to <8 x float>* - %wide.masked.load111.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %274, i32 4, <8 x i1> %264, <8 x float> undef), !tbaa !12, !alias.scope !103 - %275 = fsub <8 x float> %wide.masked.load111.1, %wide.masked.load110.1 - %276 = bitcast float* %273 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %275, <8 x float>* %276, i32 4, <8 x i1> %264), !tbaa !12, !alias.scope !103, !llvm.access.group !21 - %277 = getelementptr inbounds float, float* %11, i64 %267 - %278 = bitcast float* %277 to <8 x float>* - %wide.masked.load112.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %278, i32 4, <8 x i1> %264, <8 x float> undef), !tbaa !12, !alias.scope !105, !noalias !103 - %279 = fmul <8 x float> %broadcast.splat114, %wide.masked.load112.1 - %280 = fdiv <8 x float> %275, %279, !fpmath !26 - %281 = bitcast float* %273 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %280, <8 x float>* %281, i32 4, <8 x i1> %264), !tbaa !12, !alias.scope !103, !llvm.access.group !21 - %282 = or <8 x i64> %broadcast.splat107, - %283 = trunc <8 x i64> %282 to <8 x i32> - %284 = icmp sgt <8 x i32> %broadcast.splat109, %283 - %285 = extractelement <8 x i64> %282, i32 0 - %286 = shl i64 %285, 32 - %287 = ashr exact i64 %286, 32 - %288 = getelementptr inbounds float, float* %7, i64 %287 - %289 = bitcast float* %288 to <8 x float>* - %wide.masked.load110.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %289, i32 4, <8 x i1> %284, <8 x float> undef), !tbaa !12, !alias.scope !100, !noalias !103 - %290 = extractelement <8 x i32> %283, i32 0 - %291 = add nsw i32 %mul.i.i.2, %290 - %292 = sext i32 %291 to i64 - %293 = getelementptr inbounds float, float* %15, i64 %292 - %294 = bitcast float* %293 to <8 x float>* - %wide.masked.load111.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %294, i32 4, <8 x i1> %284, <8 x float> undef), !tbaa !12, !alias.scope !103 - %295 = fsub <8 x float> %wide.masked.load111.2, %wide.masked.load110.2 - %296 = bitcast float* %293 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %295, <8 x float>* %296, i32 4, <8 x i1> %284), !tbaa !12, !alias.scope !103, !llvm.access.group !21 - %297 = getelementptr inbounds float, float* %11, i64 %287 - %298 = bitcast float* %297 to <8 x float>* - %wide.masked.load112.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %298, i32 4, <8 x i1> %284, <8 x float> undef), !tbaa !12, !alias.scope !105, !noalias !103 - %299 = fmul <8 x float> %broadcast.splat114, %wide.masked.load112.2 - %300 = fdiv <8 x float> %295, %299, !fpmath !26 - %301 = bitcast float* %293 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %300, <8 x float>* %301, i32 4, <8 x i1> %284), !tbaa !12, !alias.scope !103, !llvm.access.group !21 - %302 = or <8 x i64> %broadcast.splat107, - %303 = trunc <8 x i64> %302 to <8 x i32> - %304 = icmp sgt <8 x i32> %broadcast.splat109, %303 - %305 = extractelement <8 x i64> %302, i32 0 - %306 = shl i64 %305, 32 - %307 = ashr exact i64 %306, 32 - %308 = getelementptr inbounds float, float* %7, i64 %307 - %309 = bitcast float* %308 to <8 x float>* - %wide.masked.load110.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %309, i32 4, <8 x i1> %304, <8 x float> undef), !tbaa !12, !alias.scope !100, !noalias !103 - %310 = extractelement <8 x i32> %303, i32 0 - %311 = add nsw i32 %mul.i.i.2, %310 - %312 = sext i32 %311 to i64 - %313 = getelementptr inbounds float, float* %15, i64 %312 - %314 = bitcast float* %313 to <8 x float>* - %wide.masked.load111.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %314, i32 4, <8 x i1> %304, <8 x float> undef), !tbaa !12, !alias.scope !103 - %315 = fsub <8 x float> %wide.masked.load111.3, %wide.masked.load110.3 - %316 = bitcast float* %313 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %315, <8 x float>* %316, i32 4, <8 x i1> %304), !tbaa !12, !alias.scope !103, !llvm.access.group !21 - %317 = getelementptr inbounds float, float* %11, i64 %307 - %318 = bitcast float* %317 to <8 x float>* - %wide.masked.load112.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %318, i32 4, <8 x i1> %304, <8 x float> undef), !tbaa !12, !alias.scope !105, !noalias !103 - %319 = fmul <8 x float> %broadcast.splat114, %wide.masked.load112.3 - %320 = fdiv <8 x float> %315, %319, !fpmath !26 - %321 = bitcast float* %313 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %320, <8 x float>* %321, i32 4, <8 x i1> %304), !tbaa !12, !alias.scope !103, !llvm.access.group !21 - br label %pregion_for_end.i.i.2 - -pregion_for_entry.entry.i.i.us.2: ; preds = %if.end.r_exit.i.i.us.2.1, %pregion_for_entry.entry.i.i.us.2.preheader - %_local_id_x.i.0.us.2 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.2.preheader ], [ %839, %if.end.r_exit.i.i.us.2.1 ] - %add1.i.i.i.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.2, %mul.i.i.i - %conv.i.i.us.2 = trunc i64 %add1.i.i.i.us.2 to i32 - %cmp4.i.i.us.2 = icmp sgt i32 %23, %conv.i.i.us.2 - br i1 %cmp4.i.i.us.2, label %if.then.i.i.us.2, label %if.end.r_exit.i.i.us.2 - -if.then.i.i.us.2: ; preds = %pregion_for_entry.entry.i.i.us.2 - %sext.i.i.us.2 = shl i64 %add1.i.i.i.us.2, 32 - %idxprom.i.i.us.2 = ashr exact i64 %sext.i.i.us.2, 32 - %arrayidx.i.i.us.2 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.2 - %322 = load float, float* %arrayidx.i.i.us.2, align 4, !tbaa !12 - %add.i.i.us.2 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2 - %idxprom6.i.i.us.2 = sext i32 %add.i.i.us.2 to i64 - %arrayidx7.i.i.us.2 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.2 - %323 = load float, float* %arrayidx7.i.i.us.2, align 4, !tbaa !12 - %sub.i.i.us.2 = fsub float %323, %322 - store float %sub.i.i.us.2, float* %arrayidx7.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.2 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.2 - %324 = load float, float* %arrayidx10.i.i.us.2, align 4, !tbaa !12 - %mul11.i.i.us.2 = fmul float %28, %324 - %div.i.i.us.2 = fdiv float %sub.i.i.us.2, %mul11.i.i.us.2, !fpmath !26 - store float %div.i.i.us.2, float* %arrayidx7.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.2 - -if.end.r_exit.i.i.us.2: ; preds = %if.then.i.i.us.2, %pregion_for_entry.entry.i.i.us.2 - %325 = or i64 %_local_id_x.i.0.us.2, 1 - %add1.i.i.i.us.2.1 = add nuw nsw i64 %325, %mul.i.i.i - %conv.i.i.us.2.1 = trunc i64 %add1.i.i.i.us.2.1 to i32 - %cmp4.i.i.us.2.1 = icmp sgt i32 %23, %conv.i.i.us.2.1 - br i1 %cmp4.i.i.us.2.1, label %if.then.i.i.us.2.1, label %if.end.r_exit.i.i.us.2.1 - -pregion_for_end.i.i.2.loopexit: ; preds = %if.end.r_exit.i.i.us.2.1 - br label %pregion_for_end.i.i.2 - -pregion_for_end.i.i.2: ; preds = %pregion_for_end.i.i.2.loopexit, %vector.ph99, %pregion_for_end.i.i.1 - %326 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.3 = or i32 %326, 3 - %cmp.i.i.3 = icmp sgt i32 %27, %conv2.i.i.3 - %mul.i.i.3 = mul nsw i32 %23, %conv2.i.i.3 - br i1 %cmp.i.i.3, label %vector.scevcheck122, label %pregion_for_end.i.i.3 - -vector.scevcheck122: ; preds = %pregion_for_end.i.i.2 - %327 = mul i32 %23, %conv2.i.i.3 - %328 = trunc i64 %2 to i32 - %329 = shl i32 %328, 5 - %330 = add i32 %327, %329 - %331 = icmp sgt i32 %330, 2147483616 - br i1 %331, label %pregion_for_entry.entry.i.i.us.3.preheader, label %vector.memcheck144 - -pregion_for_entry.entry.i.i.us.3.preheader: ; preds = %vector.memcheck144, %vector.scevcheck122 - br label %pregion_for_entry.entry.i.i.us.3 - -vector.memcheck144: ; preds = %vector.scevcheck122 - %332 = trunc i64 %2 to i32 - %333 = shl i32 %332, 5 - %334 = sext i32 %333 to i64 - %scevgep124 = getelementptr float, float* %7, i64 %334 - %335 = add nsw i64 %334, 32 - %scevgep126 = getelementptr float, float* %7, i64 %335 - %336 = mul i32 %23, %conv2.i.i.3 - %337 = add i32 %336, %333 - %338 = sext i32 %337 to i64 - %scevgep128 = getelementptr float, float* %15, i64 %338 - %339 = add nsw i64 %338, 32 - %scevgep130 = getelementptr float, float* %15, i64 %339 - %scevgep132 = getelementptr float, float* %11, i64 %334 - %scevgep134 = getelementptr float, float* %11, i64 %335 - %bound0136 = icmp ult float* %scevgep124, %scevgep130 - %bound1137 = icmp ult float* %scevgep128, %scevgep126 - %found.conflict138 = and i1 %bound0136, %bound1137 - %bound0139 = icmp ult float* %scevgep132, %scevgep130 - %bound1140 = icmp ult float* %scevgep128, %scevgep134 - %found.conflict141 = and i1 %bound0139, %bound1140 - %conflict.rdx142 = or i1 %found.conflict138, %found.conflict141 - br i1 %conflict.rdx142, label %pregion_for_entry.entry.i.i.us.3.preheader, label %vector.ph145 - -vector.ph145: ; preds = %vector.memcheck144 - %broadcast.splatinsert152 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat153 = shufflevector <8 x i64> %broadcast.splatinsert152, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert154 = insertelement <8 x i32> undef, i32 %23, i32 0 - %broadcast.splat155 = shufflevector <8 x i32> %broadcast.splatinsert154, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert159 = insertelement <8 x float> undef, float %28, i32 0 - %broadcast.splat160 = shufflevector <8 x float> %broadcast.splatinsert159, <8 x float> undef, <8 x i32> zeroinitializer - %340 = or <8 x i64> %broadcast.splat153, - %341 = trunc <8 x i64> %340 to <8 x i32> - %342 = icmp sgt <8 x i32> %broadcast.splat155, %341 - %343 = extractelement <8 x i64> %340, i32 0 - %344 = shl i64 %343, 32 - %345 = ashr exact i64 %344, 32 - %346 = getelementptr inbounds float, float* %7, i64 %345 - %347 = bitcast float* %346 to <8 x float>* - %wide.masked.load156 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %347, i32 4, <8 x i1> %342, <8 x float> undef), !tbaa !12, !alias.scope !107, !noalias !110 - %348 = extractelement <8 x i32> %341, i32 0 - %349 = add nsw i32 %mul.i.i.3, %348 - %350 = sext i32 %349 to i64 - %351 = getelementptr inbounds float, float* %15, i64 %350 - %352 = bitcast float* %351 to <8 x float>* - %wide.masked.load157 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %352, i32 4, <8 x i1> %342, <8 x float> undef), !tbaa !12, !alias.scope !110 - %353 = fsub <8 x float> %wide.masked.load157, %wide.masked.load156 - %354 = bitcast float* %351 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %353, <8 x float>* %354, i32 4, <8 x i1> %342), !tbaa !12, !alias.scope !110, !llvm.access.group !21 - %355 = getelementptr inbounds float, float* %11, i64 %345 - %356 = bitcast float* %355 to <8 x float>* - %wide.masked.load158 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %356, i32 4, <8 x i1> %342, <8 x float> undef), !tbaa !12, !alias.scope !112, !noalias !110 - %357 = fmul <8 x float> %broadcast.splat160, %wide.masked.load158 - %358 = fdiv <8 x float> %353, %357, !fpmath !26 - %359 = bitcast float* %351 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %358, <8 x float>* %359, i32 4, <8 x i1> %342), !tbaa !12, !alias.scope !110, !llvm.access.group !21 - %360 = or <8 x i64> %broadcast.splat153, - %361 = trunc <8 x i64> %360 to <8 x i32> - %362 = icmp sgt <8 x i32> %broadcast.splat155, %361 - %363 = extractelement <8 x i64> %360, i32 0 - %364 = shl i64 %363, 32 - %365 = ashr exact i64 %364, 32 - %366 = getelementptr inbounds float, float* %7, i64 %365 - %367 = bitcast float* %366 to <8 x float>* - %wide.masked.load156.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %367, i32 4, <8 x i1> %362, <8 x float> undef), !tbaa !12, !alias.scope !107, !noalias !110 - %368 = extractelement <8 x i32> %361, i32 0 - %369 = add nsw i32 %mul.i.i.3, %368 - %370 = sext i32 %369 to i64 - %371 = getelementptr inbounds float, float* %15, i64 %370 - %372 = bitcast float* %371 to <8 x float>* - %wide.masked.load157.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %372, i32 4, <8 x i1> %362, <8 x float> undef), !tbaa !12, !alias.scope !110 - %373 = fsub <8 x float> %wide.masked.load157.1, %wide.masked.load156.1 - %374 = bitcast float* %371 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %373, <8 x float>* %374, i32 4, <8 x i1> %362), !tbaa !12, !alias.scope !110, !llvm.access.group !21 - %375 = getelementptr inbounds float, float* %11, i64 %365 - %376 = bitcast float* %375 to <8 x float>* - %wide.masked.load158.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %376, i32 4, <8 x i1> %362, <8 x float> undef), !tbaa !12, !alias.scope !112, !noalias !110 - %377 = fmul <8 x float> %broadcast.splat160, %wide.masked.load158.1 - %378 = fdiv <8 x float> %373, %377, !fpmath !26 - %379 = bitcast float* %371 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %378, <8 x float>* %379, i32 4, <8 x i1> %362), !tbaa !12, !alias.scope !110, !llvm.access.group !21 - %380 = or <8 x i64> %broadcast.splat153, - %381 = trunc <8 x i64> %380 to <8 x i32> - %382 = icmp sgt <8 x i32> %broadcast.splat155, %381 - %383 = extractelement <8 x i64> %380, i32 0 - %384 = shl i64 %383, 32 - %385 = ashr exact i64 %384, 32 - %386 = getelementptr inbounds float, float* %7, i64 %385 - %387 = bitcast float* %386 to <8 x float>* - %wide.masked.load156.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %387, i32 4, <8 x i1> %382, <8 x float> undef), !tbaa !12, !alias.scope !107, !noalias !110 - %388 = extractelement <8 x i32> %381, i32 0 - %389 = add nsw i32 %mul.i.i.3, %388 - %390 = sext i32 %389 to i64 - %391 = getelementptr inbounds float, float* %15, i64 %390 - %392 = bitcast float* %391 to <8 x float>* - %wide.masked.load157.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %392, i32 4, <8 x i1> %382, <8 x float> undef), !tbaa !12, !alias.scope !110 - %393 = fsub <8 x float> %wide.masked.load157.2, %wide.masked.load156.2 - %394 = bitcast float* %391 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %393, <8 x float>* %394, i32 4, <8 x i1> %382), !tbaa !12, !alias.scope !110, !llvm.access.group !21 - %395 = getelementptr inbounds float, float* %11, i64 %385 - %396 = bitcast float* %395 to <8 x float>* - %wide.masked.load158.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %396, i32 4, <8 x i1> %382, <8 x float> undef), !tbaa !12, !alias.scope !112, !noalias !110 - %397 = fmul <8 x float> %broadcast.splat160, %wide.masked.load158.2 - %398 = fdiv <8 x float> %393, %397, !fpmath !26 - %399 = bitcast float* %391 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %398, <8 x float>* %399, i32 4, <8 x i1> %382), !tbaa !12, !alias.scope !110, !llvm.access.group !21 - %400 = or <8 x i64> %broadcast.splat153, - %401 = trunc <8 x i64> %400 to <8 x i32> - %402 = icmp sgt <8 x i32> %broadcast.splat155, %401 - %403 = extractelement <8 x i64> %400, i32 0 - %404 = shl i64 %403, 32 - %405 = ashr exact i64 %404, 32 - %406 = getelementptr inbounds float, float* %7, i64 %405 - %407 = bitcast float* %406 to <8 x float>* - %wide.masked.load156.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %407, i32 4, <8 x i1> %402, <8 x float> undef), !tbaa !12, !alias.scope !107, !noalias !110 - %408 = extractelement <8 x i32> %401, i32 0 - %409 = add nsw i32 %mul.i.i.3, %408 - %410 = sext i32 %409 to i64 - %411 = getelementptr inbounds float, float* %15, i64 %410 - %412 = bitcast float* %411 to <8 x float>* - %wide.masked.load157.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %412, i32 4, <8 x i1> %402, <8 x float> undef), !tbaa !12, !alias.scope !110 - %413 = fsub <8 x float> %wide.masked.load157.3, %wide.masked.load156.3 - %414 = bitcast float* %411 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %413, <8 x float>* %414, i32 4, <8 x i1> %402), !tbaa !12, !alias.scope !110, !llvm.access.group !21 - %415 = getelementptr inbounds float, float* %11, i64 %405 - %416 = bitcast float* %415 to <8 x float>* - %wide.masked.load158.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %416, i32 4, <8 x i1> %402, <8 x float> undef), !tbaa !12, !alias.scope !112, !noalias !110 - %417 = fmul <8 x float> %broadcast.splat160, %wide.masked.load158.3 - %418 = fdiv <8 x float> %413, %417, !fpmath !26 - %419 = bitcast float* %411 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %418, <8 x float>* %419, i32 4, <8 x i1> %402), !tbaa !12, !alias.scope !110, !llvm.access.group !21 - br label %pregion_for_end.i.i.3 - -pregion_for_entry.entry.i.i.us.3: ; preds = %if.end.r_exit.i.i.us.3.1, %pregion_for_entry.entry.i.i.us.3.preheader - %_local_id_x.i.0.us.3 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.3.preheader ], [ %835, %if.end.r_exit.i.i.us.3.1 ] - %add1.i.i.i.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.3, %mul.i.i.i - %conv.i.i.us.3 = trunc i64 %add1.i.i.i.us.3 to i32 - %cmp4.i.i.us.3 = icmp sgt i32 %23, %conv.i.i.us.3 - br i1 %cmp4.i.i.us.3, label %if.then.i.i.us.3, label %if.end.r_exit.i.i.us.3 - -if.then.i.i.us.3: ; preds = %pregion_for_entry.entry.i.i.us.3 - %sext.i.i.us.3 = shl i64 %add1.i.i.i.us.3, 32 - %idxprom.i.i.us.3 = ashr exact i64 %sext.i.i.us.3, 32 - %arrayidx.i.i.us.3 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.3 - %420 = load float, float* %arrayidx.i.i.us.3, align 4, !tbaa !12 - %add.i.i.us.3 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3 - %idxprom6.i.i.us.3 = sext i32 %add.i.i.us.3 to i64 - %arrayidx7.i.i.us.3 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.3 - %421 = load float, float* %arrayidx7.i.i.us.3, align 4, !tbaa !12 - %sub.i.i.us.3 = fsub float %421, %420 - store float %sub.i.i.us.3, float* %arrayidx7.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.3 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.3 - %422 = load float, float* %arrayidx10.i.i.us.3, align 4, !tbaa !12 - %mul11.i.i.us.3 = fmul float %28, %422 - %div.i.i.us.3 = fdiv float %sub.i.i.us.3, %mul11.i.i.us.3, !fpmath !26 - store float %div.i.i.us.3, float* %arrayidx7.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.3 - -if.end.r_exit.i.i.us.3: ; preds = %if.then.i.i.us.3, %pregion_for_entry.entry.i.i.us.3 - %423 = or i64 %_local_id_x.i.0.us.3, 1 - %add1.i.i.i.us.3.1 = add nuw nsw i64 %423, %mul.i.i.i - %conv.i.i.us.3.1 = trunc i64 %add1.i.i.i.us.3.1 to i32 - %cmp4.i.i.us.3.1 = icmp sgt i32 %23, %conv.i.i.us.3.1 - br i1 %cmp4.i.i.us.3.1, label %if.then.i.i.us.3.1, label %if.end.r_exit.i.i.us.3.1 - -pregion_for_end.i.i.3.loopexit: ; preds = %if.end.r_exit.i.i.us.3.1 - br label %pregion_for_end.i.i.3 - -pregion_for_end.i.i.3: ; preds = %pregion_for_end.i.i.3.loopexit, %vector.ph145, %pregion_for_end.i.i.2 - %424 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.4 = or i32 %424, 4 - %cmp.i.i.4 = icmp sgt i32 %27, %conv2.i.i.4 - %mul.i.i.4 = mul nsw i32 %23, %conv2.i.i.4 - br i1 %cmp.i.i.4, label %vector.scevcheck168, label %pregion_for_end.i.i.4 - -vector.scevcheck168: ; preds = %pregion_for_end.i.i.3 - %425 = mul i32 %23, %conv2.i.i.4 - %426 = trunc i64 %2 to i32 - %427 = shl i32 %426, 5 - %428 = add i32 %425, %427 - %429 = icmp sgt i32 %428, 2147483616 - br i1 %429, label %pregion_for_entry.entry.i.i.us.4.preheader, label %vector.memcheck190 - -pregion_for_entry.entry.i.i.us.4.preheader: ; preds = %vector.memcheck190, %vector.scevcheck168 - br label %pregion_for_entry.entry.i.i.us.4 - -vector.memcheck190: ; preds = %vector.scevcheck168 - %430 = trunc i64 %2 to i32 - %431 = shl i32 %430, 5 - %432 = sext i32 %431 to i64 - %scevgep170 = getelementptr float, float* %7, i64 %432 - %433 = add nsw i64 %432, 32 - %scevgep172 = getelementptr float, float* %7, i64 %433 - %434 = mul i32 %23, %conv2.i.i.4 - %435 = add i32 %434, %431 - %436 = sext i32 %435 to i64 - %scevgep174 = getelementptr float, float* %15, i64 %436 - %437 = add nsw i64 %436, 32 - %scevgep176 = getelementptr float, float* %15, i64 %437 - %scevgep178 = getelementptr float, float* %11, i64 %432 - %scevgep180 = getelementptr float, float* %11, i64 %433 - %bound0182 = icmp ult float* %scevgep170, %scevgep176 - %bound1183 = icmp ult float* %scevgep174, %scevgep172 - %found.conflict184 = and i1 %bound0182, %bound1183 - %bound0185 = icmp ult float* %scevgep178, %scevgep176 - %bound1186 = icmp ult float* %scevgep174, %scevgep180 - %found.conflict187 = and i1 %bound0185, %bound1186 - %conflict.rdx188 = or i1 %found.conflict184, %found.conflict187 - br i1 %conflict.rdx188, label %pregion_for_entry.entry.i.i.us.4.preheader, label %vector.ph191 - -vector.ph191: ; preds = %vector.memcheck190 - %broadcast.splatinsert198 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat199 = shufflevector <8 x i64> %broadcast.splatinsert198, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert200 = insertelement <8 x i32> undef, i32 %23, i32 0 - %broadcast.splat201 = shufflevector <8 x i32> %broadcast.splatinsert200, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert205 = insertelement <8 x float> undef, float %28, i32 0 - %broadcast.splat206 = shufflevector <8 x float> %broadcast.splatinsert205, <8 x float> undef, <8 x i32> zeroinitializer - %438 = or <8 x i64> %broadcast.splat199, - %439 = trunc <8 x i64> %438 to <8 x i32> - %440 = icmp sgt <8 x i32> %broadcast.splat201, %439 - %441 = extractelement <8 x i64> %438, i32 0 - %442 = shl i64 %441, 32 - %443 = ashr exact i64 %442, 32 - %444 = getelementptr inbounds float, float* %7, i64 %443 - %445 = bitcast float* %444 to <8 x float>* - %wide.masked.load202 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %445, i32 4, <8 x i1> %440, <8 x float> undef), !tbaa !12, !alias.scope !114, !noalias !117 - %446 = extractelement <8 x i32> %439, i32 0 - %447 = add nsw i32 %mul.i.i.4, %446 - %448 = sext i32 %447 to i64 - %449 = getelementptr inbounds float, float* %15, i64 %448 - %450 = bitcast float* %449 to <8 x float>* - %wide.masked.load203 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %450, i32 4, <8 x i1> %440, <8 x float> undef), !tbaa !12, !alias.scope !117 - %451 = fsub <8 x float> %wide.masked.load203, %wide.masked.load202 - %452 = bitcast float* %449 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %451, <8 x float>* %452, i32 4, <8 x i1> %440), !tbaa !12, !alias.scope !117, !llvm.access.group !21 - %453 = getelementptr inbounds float, float* %11, i64 %443 - %454 = bitcast float* %453 to <8 x float>* - %wide.masked.load204 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %454, i32 4, <8 x i1> %440, <8 x float> undef), !tbaa !12, !alias.scope !119, !noalias !117 - %455 = fmul <8 x float> %broadcast.splat206, %wide.masked.load204 - %456 = fdiv <8 x float> %451, %455, !fpmath !26 - %457 = bitcast float* %449 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %456, <8 x float>* %457, i32 4, <8 x i1> %440), !tbaa !12, !alias.scope !117, !llvm.access.group !21 - %458 = or <8 x i64> %broadcast.splat199, - %459 = trunc <8 x i64> %458 to <8 x i32> - %460 = icmp sgt <8 x i32> %broadcast.splat201, %459 - %461 = extractelement <8 x i64> %458, i32 0 - %462 = shl i64 %461, 32 - %463 = ashr exact i64 %462, 32 - %464 = getelementptr inbounds float, float* %7, i64 %463 - %465 = bitcast float* %464 to <8 x float>* - %wide.masked.load202.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %465, i32 4, <8 x i1> %460, <8 x float> undef), !tbaa !12, !alias.scope !114, !noalias !117 - %466 = extractelement <8 x i32> %459, i32 0 - %467 = add nsw i32 %mul.i.i.4, %466 - %468 = sext i32 %467 to i64 - %469 = getelementptr inbounds float, float* %15, i64 %468 - %470 = bitcast float* %469 to <8 x float>* - %wide.masked.load203.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %470, i32 4, <8 x i1> %460, <8 x float> undef), !tbaa !12, !alias.scope !117 - %471 = fsub <8 x float> %wide.masked.load203.1, %wide.masked.load202.1 - %472 = bitcast float* %469 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %471, <8 x float>* %472, i32 4, <8 x i1> %460), !tbaa !12, !alias.scope !117, !llvm.access.group !21 - %473 = getelementptr inbounds float, float* %11, i64 %463 - %474 = bitcast float* %473 to <8 x float>* - %wide.masked.load204.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %474, i32 4, <8 x i1> %460, <8 x float> undef), !tbaa !12, !alias.scope !119, !noalias !117 - %475 = fmul <8 x float> %broadcast.splat206, %wide.masked.load204.1 - %476 = fdiv <8 x float> %471, %475, !fpmath !26 - %477 = bitcast float* %469 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %476, <8 x float>* %477, i32 4, <8 x i1> %460), !tbaa !12, !alias.scope !117, !llvm.access.group !21 - %478 = or <8 x i64> %broadcast.splat199, - %479 = trunc <8 x i64> %478 to <8 x i32> - %480 = icmp sgt <8 x i32> %broadcast.splat201, %479 - %481 = extractelement <8 x i64> %478, i32 0 - %482 = shl i64 %481, 32 - %483 = ashr exact i64 %482, 32 - %484 = getelementptr inbounds float, float* %7, i64 %483 - %485 = bitcast float* %484 to <8 x float>* - %wide.masked.load202.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %485, i32 4, <8 x i1> %480, <8 x float> undef), !tbaa !12, !alias.scope !114, !noalias !117 - %486 = extractelement <8 x i32> %479, i32 0 - %487 = add nsw i32 %mul.i.i.4, %486 - %488 = sext i32 %487 to i64 - %489 = getelementptr inbounds float, float* %15, i64 %488 - %490 = bitcast float* %489 to <8 x float>* - %wide.masked.load203.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %490, i32 4, <8 x i1> %480, <8 x float> undef), !tbaa !12, !alias.scope !117 - %491 = fsub <8 x float> %wide.masked.load203.2, %wide.masked.load202.2 - %492 = bitcast float* %489 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %491, <8 x float>* %492, i32 4, <8 x i1> %480), !tbaa !12, !alias.scope !117, !llvm.access.group !21 - %493 = getelementptr inbounds float, float* %11, i64 %483 - %494 = bitcast float* %493 to <8 x float>* - %wide.masked.load204.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %494, i32 4, <8 x i1> %480, <8 x float> undef), !tbaa !12, !alias.scope !119, !noalias !117 - %495 = fmul <8 x float> %broadcast.splat206, %wide.masked.load204.2 - %496 = fdiv <8 x float> %491, %495, !fpmath !26 - %497 = bitcast float* %489 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %496, <8 x float>* %497, i32 4, <8 x i1> %480), !tbaa !12, !alias.scope !117, !llvm.access.group !21 - %498 = or <8 x i64> %broadcast.splat199, - %499 = trunc <8 x i64> %498 to <8 x i32> - %500 = icmp sgt <8 x i32> %broadcast.splat201, %499 - %501 = extractelement <8 x i64> %498, i32 0 - %502 = shl i64 %501, 32 - %503 = ashr exact i64 %502, 32 - %504 = getelementptr inbounds float, float* %7, i64 %503 - %505 = bitcast float* %504 to <8 x float>* - %wide.masked.load202.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %505, i32 4, <8 x i1> %500, <8 x float> undef), !tbaa !12, !alias.scope !114, !noalias !117 - %506 = extractelement <8 x i32> %499, i32 0 - %507 = add nsw i32 %mul.i.i.4, %506 - %508 = sext i32 %507 to i64 - %509 = getelementptr inbounds float, float* %15, i64 %508 - %510 = bitcast float* %509 to <8 x float>* - %wide.masked.load203.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %510, i32 4, <8 x i1> %500, <8 x float> undef), !tbaa !12, !alias.scope !117 - %511 = fsub <8 x float> %wide.masked.load203.3, %wide.masked.load202.3 - %512 = bitcast float* %509 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %511, <8 x float>* %512, i32 4, <8 x i1> %500), !tbaa !12, !alias.scope !117, !llvm.access.group !21 - %513 = getelementptr inbounds float, float* %11, i64 %503 - %514 = bitcast float* %513 to <8 x float>* - %wide.masked.load204.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %514, i32 4, <8 x i1> %500, <8 x float> undef), !tbaa !12, !alias.scope !119, !noalias !117 - %515 = fmul <8 x float> %broadcast.splat206, %wide.masked.load204.3 - %516 = fdiv <8 x float> %511, %515, !fpmath !26 - %517 = bitcast float* %509 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %516, <8 x float>* %517, i32 4, <8 x i1> %500), !tbaa !12, !alias.scope !117, !llvm.access.group !21 - br label %pregion_for_end.i.i.4 - -pregion_for_entry.entry.i.i.us.4: ; preds = %if.end.r_exit.i.i.us.4.1, %pregion_for_entry.entry.i.i.us.4.preheader - %_local_id_x.i.0.us.4 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.4.preheader ], [ %831, %if.end.r_exit.i.i.us.4.1 ] - %add1.i.i.i.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.4, %mul.i.i.i - %conv.i.i.us.4 = trunc i64 %add1.i.i.i.us.4 to i32 - %cmp4.i.i.us.4 = icmp sgt i32 %23, %conv.i.i.us.4 - br i1 %cmp4.i.i.us.4, label %if.then.i.i.us.4, label %if.end.r_exit.i.i.us.4 - -if.then.i.i.us.4: ; preds = %pregion_for_entry.entry.i.i.us.4 - %sext.i.i.us.4 = shl i64 %add1.i.i.i.us.4, 32 - %idxprom.i.i.us.4 = ashr exact i64 %sext.i.i.us.4, 32 - %arrayidx.i.i.us.4 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.4 - %518 = load float, float* %arrayidx.i.i.us.4, align 4, !tbaa !12 - %add.i.i.us.4 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4 - %idxprom6.i.i.us.4 = sext i32 %add.i.i.us.4 to i64 - %arrayidx7.i.i.us.4 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.4 - %519 = load float, float* %arrayidx7.i.i.us.4, align 4, !tbaa !12 - %sub.i.i.us.4 = fsub float %519, %518 - store float %sub.i.i.us.4, float* %arrayidx7.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.4 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.4 - %520 = load float, float* %arrayidx10.i.i.us.4, align 4, !tbaa !12 - %mul11.i.i.us.4 = fmul float %28, %520 - %div.i.i.us.4 = fdiv float %sub.i.i.us.4, %mul11.i.i.us.4, !fpmath !26 - store float %div.i.i.us.4, float* %arrayidx7.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.4 - -if.end.r_exit.i.i.us.4: ; preds = %if.then.i.i.us.4, %pregion_for_entry.entry.i.i.us.4 - %521 = or i64 %_local_id_x.i.0.us.4, 1 - %add1.i.i.i.us.4.1 = add nuw nsw i64 %521, %mul.i.i.i - %conv.i.i.us.4.1 = trunc i64 %add1.i.i.i.us.4.1 to i32 - %cmp4.i.i.us.4.1 = icmp sgt i32 %23, %conv.i.i.us.4.1 - br i1 %cmp4.i.i.us.4.1, label %if.then.i.i.us.4.1, label %if.end.r_exit.i.i.us.4.1 - -pregion_for_end.i.i.4.loopexit: ; preds = %if.end.r_exit.i.i.us.4.1 - br label %pregion_for_end.i.i.4 - -pregion_for_end.i.i.4: ; preds = %pregion_for_end.i.i.4.loopexit, %vector.ph191, %pregion_for_end.i.i.3 - %522 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.5 = or i32 %522, 5 - %cmp.i.i.5 = icmp sgt i32 %27, %conv2.i.i.5 - %mul.i.i.5 = mul nsw i32 %23, %conv2.i.i.5 - br i1 %cmp.i.i.5, label %vector.scevcheck214, label %pregion_for_end.i.i.5 - -vector.scevcheck214: ; preds = %pregion_for_end.i.i.4 - %523 = mul i32 %23, %conv2.i.i.5 - %524 = trunc i64 %2 to i32 - %525 = shl i32 %524, 5 - %526 = add i32 %523, %525 - %527 = icmp sgt i32 %526, 2147483616 - br i1 %527, label %pregion_for_entry.entry.i.i.us.5.preheader, label %vector.memcheck236 - -pregion_for_entry.entry.i.i.us.5.preheader: ; preds = %vector.memcheck236, %vector.scevcheck214 - br label %pregion_for_entry.entry.i.i.us.5 - -vector.memcheck236: ; preds = %vector.scevcheck214 - %528 = trunc i64 %2 to i32 - %529 = shl i32 %528, 5 - %530 = sext i32 %529 to i64 - %scevgep216 = getelementptr float, float* %7, i64 %530 - %531 = add nsw i64 %530, 32 - %scevgep218 = getelementptr float, float* %7, i64 %531 - %532 = mul i32 %23, %conv2.i.i.5 - %533 = add i32 %532, %529 - %534 = sext i32 %533 to i64 - %scevgep220 = getelementptr float, float* %15, i64 %534 - %535 = add nsw i64 %534, 32 - %scevgep222 = getelementptr float, float* %15, i64 %535 - %scevgep224 = getelementptr float, float* %11, i64 %530 - %scevgep226 = getelementptr float, float* %11, i64 %531 - %bound0228 = icmp ult float* %scevgep216, %scevgep222 - %bound1229 = icmp ult float* %scevgep220, %scevgep218 - %found.conflict230 = and i1 %bound0228, %bound1229 - %bound0231 = icmp ult float* %scevgep224, %scevgep222 - %bound1232 = icmp ult float* %scevgep220, %scevgep226 - %found.conflict233 = and i1 %bound0231, %bound1232 - %conflict.rdx234 = or i1 %found.conflict230, %found.conflict233 - br i1 %conflict.rdx234, label %pregion_for_entry.entry.i.i.us.5.preheader, label %vector.ph237 - -vector.ph237: ; preds = %vector.memcheck236 - %broadcast.splatinsert244 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat245 = shufflevector <8 x i64> %broadcast.splatinsert244, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert246 = insertelement <8 x i32> undef, i32 %23, i32 0 - %broadcast.splat247 = shufflevector <8 x i32> %broadcast.splatinsert246, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert251 = insertelement <8 x float> undef, float %28, i32 0 - %broadcast.splat252 = shufflevector <8 x float> %broadcast.splatinsert251, <8 x float> undef, <8 x i32> zeroinitializer - %536 = or <8 x i64> %broadcast.splat245, - %537 = trunc <8 x i64> %536 to <8 x i32> - %538 = icmp sgt <8 x i32> %broadcast.splat247, %537 - %539 = extractelement <8 x i64> %536, i32 0 - %540 = shl i64 %539, 32 - %541 = ashr exact i64 %540, 32 - %542 = getelementptr inbounds float, float* %7, i64 %541 - %543 = bitcast float* %542 to <8 x float>* - %wide.masked.load248 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %543, i32 4, <8 x i1> %538, <8 x float> undef), !tbaa !12, !alias.scope !121, !noalias !124 - %544 = extractelement <8 x i32> %537, i32 0 - %545 = add nsw i32 %mul.i.i.5, %544 - %546 = sext i32 %545 to i64 - %547 = getelementptr inbounds float, float* %15, i64 %546 - %548 = bitcast float* %547 to <8 x float>* - %wide.masked.load249 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %548, i32 4, <8 x i1> %538, <8 x float> undef), !tbaa !12, !alias.scope !124 - %549 = fsub <8 x float> %wide.masked.load249, %wide.masked.load248 - %550 = bitcast float* %547 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %549, <8 x float>* %550, i32 4, <8 x i1> %538), !tbaa !12, !alias.scope !124, !llvm.access.group !21 - %551 = getelementptr inbounds float, float* %11, i64 %541 - %552 = bitcast float* %551 to <8 x float>* - %wide.masked.load250 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %552, i32 4, <8 x i1> %538, <8 x float> undef), !tbaa !12, !alias.scope !126, !noalias !124 - %553 = fmul <8 x float> %broadcast.splat252, %wide.masked.load250 - %554 = fdiv <8 x float> %549, %553, !fpmath !26 - %555 = bitcast float* %547 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %554, <8 x float>* %555, i32 4, <8 x i1> %538), !tbaa !12, !alias.scope !124, !llvm.access.group !21 - %556 = or <8 x i64> %broadcast.splat245, - %557 = trunc <8 x i64> %556 to <8 x i32> - %558 = icmp sgt <8 x i32> %broadcast.splat247, %557 - %559 = extractelement <8 x i64> %556, i32 0 - %560 = shl i64 %559, 32 - %561 = ashr exact i64 %560, 32 - %562 = getelementptr inbounds float, float* %7, i64 %561 - %563 = bitcast float* %562 to <8 x float>* - %wide.masked.load248.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %563, i32 4, <8 x i1> %558, <8 x float> undef), !tbaa !12, !alias.scope !121, !noalias !124 - %564 = extractelement <8 x i32> %557, i32 0 - %565 = add nsw i32 %mul.i.i.5, %564 - %566 = sext i32 %565 to i64 - %567 = getelementptr inbounds float, float* %15, i64 %566 - %568 = bitcast float* %567 to <8 x float>* - %wide.masked.load249.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %568, i32 4, <8 x i1> %558, <8 x float> undef), !tbaa !12, !alias.scope !124 - %569 = fsub <8 x float> %wide.masked.load249.1, %wide.masked.load248.1 - %570 = bitcast float* %567 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %569, <8 x float>* %570, i32 4, <8 x i1> %558), !tbaa !12, !alias.scope !124, !llvm.access.group !21 - %571 = getelementptr inbounds float, float* %11, i64 %561 - %572 = bitcast float* %571 to <8 x float>* - %wide.masked.load250.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %572, i32 4, <8 x i1> %558, <8 x float> undef), !tbaa !12, !alias.scope !126, !noalias !124 - %573 = fmul <8 x float> %broadcast.splat252, %wide.masked.load250.1 - %574 = fdiv <8 x float> %569, %573, !fpmath !26 - %575 = bitcast float* %567 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %574, <8 x float>* %575, i32 4, <8 x i1> %558), !tbaa !12, !alias.scope !124, !llvm.access.group !21 - %576 = or <8 x i64> %broadcast.splat245, - %577 = trunc <8 x i64> %576 to <8 x i32> - %578 = icmp sgt <8 x i32> %broadcast.splat247, %577 - %579 = extractelement <8 x i64> %576, i32 0 - %580 = shl i64 %579, 32 - %581 = ashr exact i64 %580, 32 - %582 = getelementptr inbounds float, float* %7, i64 %581 - %583 = bitcast float* %582 to <8 x float>* - %wide.masked.load248.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %583, i32 4, <8 x i1> %578, <8 x float> undef), !tbaa !12, !alias.scope !121, !noalias !124 - %584 = extractelement <8 x i32> %577, i32 0 - %585 = add nsw i32 %mul.i.i.5, %584 - %586 = sext i32 %585 to i64 - %587 = getelementptr inbounds float, float* %15, i64 %586 - %588 = bitcast float* %587 to <8 x float>* - %wide.masked.load249.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %588, i32 4, <8 x i1> %578, <8 x float> undef), !tbaa !12, !alias.scope !124 - %589 = fsub <8 x float> %wide.masked.load249.2, %wide.masked.load248.2 - %590 = bitcast float* %587 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %589, <8 x float>* %590, i32 4, <8 x i1> %578), !tbaa !12, !alias.scope !124, !llvm.access.group !21 - %591 = getelementptr inbounds float, float* %11, i64 %581 - %592 = bitcast float* %591 to <8 x float>* - %wide.masked.load250.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %592, i32 4, <8 x i1> %578, <8 x float> undef), !tbaa !12, !alias.scope !126, !noalias !124 - %593 = fmul <8 x float> %broadcast.splat252, %wide.masked.load250.2 - %594 = fdiv <8 x float> %589, %593, !fpmath !26 - %595 = bitcast float* %587 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %594, <8 x float>* %595, i32 4, <8 x i1> %578), !tbaa !12, !alias.scope !124, !llvm.access.group !21 - %596 = or <8 x i64> %broadcast.splat245, - %597 = trunc <8 x i64> %596 to <8 x i32> - %598 = icmp sgt <8 x i32> %broadcast.splat247, %597 - %599 = extractelement <8 x i64> %596, i32 0 - %600 = shl i64 %599, 32 - %601 = ashr exact i64 %600, 32 - %602 = getelementptr inbounds float, float* %7, i64 %601 - %603 = bitcast float* %602 to <8 x float>* - %wide.masked.load248.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %603, i32 4, <8 x i1> %598, <8 x float> undef), !tbaa !12, !alias.scope !121, !noalias !124 - %604 = extractelement <8 x i32> %597, i32 0 - %605 = add nsw i32 %mul.i.i.5, %604 - %606 = sext i32 %605 to i64 - %607 = getelementptr inbounds float, float* %15, i64 %606 - %608 = bitcast float* %607 to <8 x float>* - %wide.masked.load249.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %608, i32 4, <8 x i1> %598, <8 x float> undef), !tbaa !12, !alias.scope !124 - %609 = fsub <8 x float> %wide.masked.load249.3, %wide.masked.load248.3 - %610 = bitcast float* %607 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %609, <8 x float>* %610, i32 4, <8 x i1> %598), !tbaa !12, !alias.scope !124, !llvm.access.group !21 - %611 = getelementptr inbounds float, float* %11, i64 %601 - %612 = bitcast float* %611 to <8 x float>* - %wide.masked.load250.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %612, i32 4, <8 x i1> %598, <8 x float> undef), !tbaa !12, !alias.scope !126, !noalias !124 - %613 = fmul <8 x float> %broadcast.splat252, %wide.masked.load250.3 - %614 = fdiv <8 x float> %609, %613, !fpmath !26 - %615 = bitcast float* %607 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %614, <8 x float>* %615, i32 4, <8 x i1> %598), !tbaa !12, !alias.scope !124, !llvm.access.group !21 - br label %pregion_for_end.i.i.5 - -pregion_for_entry.entry.i.i.us.5: ; preds = %if.end.r_exit.i.i.us.5.1, %pregion_for_entry.entry.i.i.us.5.preheader - %_local_id_x.i.0.us.5 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.5.preheader ], [ %827, %if.end.r_exit.i.i.us.5.1 ] - %add1.i.i.i.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.5, %mul.i.i.i - %conv.i.i.us.5 = trunc i64 %add1.i.i.i.us.5 to i32 - %cmp4.i.i.us.5 = icmp sgt i32 %23, %conv.i.i.us.5 - br i1 %cmp4.i.i.us.5, label %if.then.i.i.us.5, label %if.end.r_exit.i.i.us.5 - -if.then.i.i.us.5: ; preds = %pregion_for_entry.entry.i.i.us.5 - %sext.i.i.us.5 = shl i64 %add1.i.i.i.us.5, 32 - %idxprom.i.i.us.5 = ashr exact i64 %sext.i.i.us.5, 32 - %arrayidx.i.i.us.5 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.5 - %616 = load float, float* %arrayidx.i.i.us.5, align 4, !tbaa !12 - %add.i.i.us.5 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5 - %idxprom6.i.i.us.5 = sext i32 %add.i.i.us.5 to i64 - %arrayidx7.i.i.us.5 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.5 - %617 = load float, float* %arrayidx7.i.i.us.5, align 4, !tbaa !12 - %sub.i.i.us.5 = fsub float %617, %616 - store float %sub.i.i.us.5, float* %arrayidx7.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.5 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.5 - %618 = load float, float* %arrayidx10.i.i.us.5, align 4, !tbaa !12 - %mul11.i.i.us.5 = fmul float %28, %618 - %div.i.i.us.5 = fdiv float %sub.i.i.us.5, %mul11.i.i.us.5, !fpmath !26 - store float %div.i.i.us.5, float* %arrayidx7.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.5 - -if.end.r_exit.i.i.us.5: ; preds = %if.then.i.i.us.5, %pregion_for_entry.entry.i.i.us.5 - %619 = or i64 %_local_id_x.i.0.us.5, 1 - %add1.i.i.i.us.5.1 = add nuw nsw i64 %619, %mul.i.i.i - %conv.i.i.us.5.1 = trunc i64 %add1.i.i.i.us.5.1 to i32 - %cmp4.i.i.us.5.1 = icmp sgt i32 %23, %conv.i.i.us.5.1 - br i1 %cmp4.i.i.us.5.1, label %if.then.i.i.us.5.1, label %if.end.r_exit.i.i.us.5.1 - -pregion_for_end.i.i.5.loopexit: ; preds = %if.end.r_exit.i.i.us.5.1 - br label %pregion_for_end.i.i.5 - -pregion_for_end.i.i.5: ; preds = %pregion_for_end.i.i.5.loopexit, %vector.ph237, %pregion_for_end.i.i.4 - %620 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.6 = or i32 %620, 6 - %cmp.i.i.6 = icmp sgt i32 %27, %conv2.i.i.6 - %mul.i.i.6 = mul nsw i32 %23, %conv2.i.i.6 - br i1 %cmp.i.i.6, label %vector.scevcheck260, label %pregion_for_end.i.i.6 - -vector.scevcheck260: ; preds = %pregion_for_end.i.i.5 - %621 = mul i32 %23, %conv2.i.i.6 - %622 = trunc i64 %2 to i32 - %623 = shl i32 %622, 5 - %624 = add i32 %621, %623 - %625 = icmp sgt i32 %624, 2147483616 - br i1 %625, label %pregion_for_entry.entry.i.i.us.6.preheader, label %vector.memcheck282 - -pregion_for_entry.entry.i.i.us.6.preheader: ; preds = %vector.memcheck282, %vector.scevcheck260 - br label %pregion_for_entry.entry.i.i.us.6 - -vector.memcheck282: ; preds = %vector.scevcheck260 - %626 = trunc i64 %2 to i32 - %627 = shl i32 %626, 5 - %628 = sext i32 %627 to i64 - %scevgep262 = getelementptr float, float* %7, i64 %628 - %629 = add nsw i64 %628, 32 - %scevgep264 = getelementptr float, float* %7, i64 %629 - %630 = mul i32 %23, %conv2.i.i.6 - %631 = add i32 %630, %627 - %632 = sext i32 %631 to i64 - %scevgep266 = getelementptr float, float* %15, i64 %632 - %633 = add nsw i64 %632, 32 - %scevgep268 = getelementptr float, float* %15, i64 %633 - %scevgep270 = getelementptr float, float* %11, i64 %628 - %scevgep272 = getelementptr float, float* %11, i64 %629 - %bound0274 = icmp ult float* %scevgep262, %scevgep268 - %bound1275 = icmp ult float* %scevgep266, %scevgep264 - %found.conflict276 = and i1 %bound0274, %bound1275 - %bound0277 = icmp ult float* %scevgep270, %scevgep268 - %bound1278 = icmp ult float* %scevgep266, %scevgep272 - %found.conflict279 = and i1 %bound0277, %bound1278 - %conflict.rdx280 = or i1 %found.conflict276, %found.conflict279 - br i1 %conflict.rdx280, label %pregion_for_entry.entry.i.i.us.6.preheader, label %vector.ph283 - -vector.ph283: ; preds = %vector.memcheck282 - %broadcast.splatinsert290 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat291 = shufflevector <8 x i64> %broadcast.splatinsert290, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert292 = insertelement <8 x i32> undef, i32 %23, i32 0 - %broadcast.splat293 = shufflevector <8 x i32> %broadcast.splatinsert292, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert297 = insertelement <8 x float> undef, float %28, i32 0 - %broadcast.splat298 = shufflevector <8 x float> %broadcast.splatinsert297, <8 x float> undef, <8 x i32> zeroinitializer - %634 = or <8 x i64> %broadcast.splat291, - %635 = trunc <8 x i64> %634 to <8 x i32> - %636 = icmp sgt <8 x i32> %broadcast.splat293, %635 - %637 = extractelement <8 x i64> %634, i32 0 - %638 = shl i64 %637, 32 - %639 = ashr exact i64 %638, 32 - %640 = getelementptr inbounds float, float* %7, i64 %639 - %641 = bitcast float* %640 to <8 x float>* - %wide.masked.load294 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %641, i32 4, <8 x i1> %636, <8 x float> undef), !tbaa !12, !alias.scope !128, !noalias !131 - %642 = extractelement <8 x i32> %635, i32 0 - %643 = add nsw i32 %mul.i.i.6, %642 - %644 = sext i32 %643 to i64 - %645 = getelementptr inbounds float, float* %15, i64 %644 - %646 = bitcast float* %645 to <8 x float>* - %wide.masked.load295 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %646, i32 4, <8 x i1> %636, <8 x float> undef), !tbaa !12, !alias.scope !131 - %647 = fsub <8 x float> %wide.masked.load295, %wide.masked.load294 - %648 = bitcast float* %645 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %647, <8 x float>* %648, i32 4, <8 x i1> %636), !tbaa !12, !alias.scope !131, !llvm.access.group !21 - %649 = getelementptr inbounds float, float* %11, i64 %639 - %650 = bitcast float* %649 to <8 x float>* - %wide.masked.load296 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %650, i32 4, <8 x i1> %636, <8 x float> undef), !tbaa !12, !alias.scope !133, !noalias !131 - %651 = fmul <8 x float> %broadcast.splat298, %wide.masked.load296 - %652 = fdiv <8 x float> %647, %651, !fpmath !26 - %653 = bitcast float* %645 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %652, <8 x float>* %653, i32 4, <8 x i1> %636), !tbaa !12, !alias.scope !131, !llvm.access.group !21 - %654 = or <8 x i64> %broadcast.splat291, - %655 = trunc <8 x i64> %654 to <8 x i32> - %656 = icmp sgt <8 x i32> %broadcast.splat293, %655 - %657 = extractelement <8 x i64> %654, i32 0 - %658 = shl i64 %657, 32 - %659 = ashr exact i64 %658, 32 - %660 = getelementptr inbounds float, float* %7, i64 %659 - %661 = bitcast float* %660 to <8 x float>* - %wide.masked.load294.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %661, i32 4, <8 x i1> %656, <8 x float> undef), !tbaa !12, !alias.scope !128, !noalias !131 - %662 = extractelement <8 x i32> %655, i32 0 - %663 = add nsw i32 %mul.i.i.6, %662 - %664 = sext i32 %663 to i64 - %665 = getelementptr inbounds float, float* %15, i64 %664 - %666 = bitcast float* %665 to <8 x float>* - %wide.masked.load295.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %666, i32 4, <8 x i1> %656, <8 x float> undef), !tbaa !12, !alias.scope !131 - %667 = fsub <8 x float> %wide.masked.load295.1, %wide.masked.load294.1 - %668 = bitcast float* %665 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %667, <8 x float>* %668, i32 4, <8 x i1> %656), !tbaa !12, !alias.scope !131, !llvm.access.group !21 - %669 = getelementptr inbounds float, float* %11, i64 %659 - %670 = bitcast float* %669 to <8 x float>* - %wide.masked.load296.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %670, i32 4, <8 x i1> %656, <8 x float> undef), !tbaa !12, !alias.scope !133, !noalias !131 - %671 = fmul <8 x float> %broadcast.splat298, %wide.masked.load296.1 - %672 = fdiv <8 x float> %667, %671, !fpmath !26 - %673 = bitcast float* %665 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %672, <8 x float>* %673, i32 4, <8 x i1> %656), !tbaa !12, !alias.scope !131, !llvm.access.group !21 - %674 = or <8 x i64> %broadcast.splat291, - %675 = trunc <8 x i64> %674 to <8 x i32> - %676 = icmp sgt <8 x i32> %broadcast.splat293, %675 - %677 = extractelement <8 x i64> %674, i32 0 - %678 = shl i64 %677, 32 - %679 = ashr exact i64 %678, 32 - %680 = getelementptr inbounds float, float* %7, i64 %679 - %681 = bitcast float* %680 to <8 x float>* - %wide.masked.load294.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %681, i32 4, <8 x i1> %676, <8 x float> undef), !tbaa !12, !alias.scope !128, !noalias !131 - %682 = extractelement <8 x i32> %675, i32 0 - %683 = add nsw i32 %mul.i.i.6, %682 - %684 = sext i32 %683 to i64 - %685 = getelementptr inbounds float, float* %15, i64 %684 - %686 = bitcast float* %685 to <8 x float>* - %wide.masked.load295.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %686, i32 4, <8 x i1> %676, <8 x float> undef), !tbaa !12, !alias.scope !131 - %687 = fsub <8 x float> %wide.masked.load295.2, %wide.masked.load294.2 - %688 = bitcast float* %685 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %687, <8 x float>* %688, i32 4, <8 x i1> %676), !tbaa !12, !alias.scope !131, !llvm.access.group !21 - %689 = getelementptr inbounds float, float* %11, i64 %679 - %690 = bitcast float* %689 to <8 x float>* - %wide.masked.load296.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %690, i32 4, <8 x i1> %676, <8 x float> undef), !tbaa !12, !alias.scope !133, !noalias !131 - %691 = fmul <8 x float> %broadcast.splat298, %wide.masked.load296.2 - %692 = fdiv <8 x float> %687, %691, !fpmath !26 - %693 = bitcast float* %685 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %692, <8 x float>* %693, i32 4, <8 x i1> %676), !tbaa !12, !alias.scope !131, !llvm.access.group !21 - %694 = or <8 x i64> %broadcast.splat291, - %695 = trunc <8 x i64> %694 to <8 x i32> - %696 = icmp sgt <8 x i32> %broadcast.splat293, %695 - %697 = extractelement <8 x i64> %694, i32 0 - %698 = shl i64 %697, 32 - %699 = ashr exact i64 %698, 32 - %700 = getelementptr inbounds float, float* %7, i64 %699 - %701 = bitcast float* %700 to <8 x float>* - %wide.masked.load294.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %701, i32 4, <8 x i1> %696, <8 x float> undef), !tbaa !12, !alias.scope !128, !noalias !131 - %702 = extractelement <8 x i32> %695, i32 0 - %703 = add nsw i32 %mul.i.i.6, %702 - %704 = sext i32 %703 to i64 - %705 = getelementptr inbounds float, float* %15, i64 %704 - %706 = bitcast float* %705 to <8 x float>* - %wide.masked.load295.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %706, i32 4, <8 x i1> %696, <8 x float> undef), !tbaa !12, !alias.scope !131 - %707 = fsub <8 x float> %wide.masked.load295.3, %wide.masked.load294.3 - %708 = bitcast float* %705 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %707, <8 x float>* %708, i32 4, <8 x i1> %696), !tbaa !12, !alias.scope !131, !llvm.access.group !21 - %709 = getelementptr inbounds float, float* %11, i64 %699 - %710 = bitcast float* %709 to <8 x float>* - %wide.masked.load296.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %710, i32 4, <8 x i1> %696, <8 x float> undef), !tbaa !12, !alias.scope !133, !noalias !131 - %711 = fmul <8 x float> %broadcast.splat298, %wide.masked.load296.3 - %712 = fdiv <8 x float> %707, %711, !fpmath !26 - %713 = bitcast float* %705 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %712, <8 x float>* %713, i32 4, <8 x i1> %696), !tbaa !12, !alias.scope !131, !llvm.access.group !21 - br label %pregion_for_end.i.i.6 - -pregion_for_entry.entry.i.i.us.6: ; preds = %if.end.r_exit.i.i.us.6.1, %pregion_for_entry.entry.i.i.us.6.preheader - %_local_id_x.i.0.us.6 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.6.preheader ], [ %823, %if.end.r_exit.i.i.us.6.1 ] - %add1.i.i.i.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.6, %mul.i.i.i - %conv.i.i.us.6 = trunc i64 %add1.i.i.i.us.6 to i32 - %cmp4.i.i.us.6 = icmp sgt i32 %23, %conv.i.i.us.6 - br i1 %cmp4.i.i.us.6, label %if.then.i.i.us.6, label %if.end.r_exit.i.i.us.6 - -if.then.i.i.us.6: ; preds = %pregion_for_entry.entry.i.i.us.6 - %sext.i.i.us.6 = shl i64 %add1.i.i.i.us.6, 32 - %idxprom.i.i.us.6 = ashr exact i64 %sext.i.i.us.6, 32 - %arrayidx.i.i.us.6 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.6 - %714 = load float, float* %arrayidx.i.i.us.6, align 4, !tbaa !12 - %add.i.i.us.6 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6 - %idxprom6.i.i.us.6 = sext i32 %add.i.i.us.6 to i64 - %arrayidx7.i.i.us.6 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.6 - %715 = load float, float* %arrayidx7.i.i.us.6, align 4, !tbaa !12 - %sub.i.i.us.6 = fsub float %715, %714 - store float %sub.i.i.us.6, float* %arrayidx7.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.6 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.6 - %716 = load float, float* %arrayidx10.i.i.us.6, align 4, !tbaa !12 - %mul11.i.i.us.6 = fmul float %28, %716 - %div.i.i.us.6 = fdiv float %sub.i.i.us.6, %mul11.i.i.us.6, !fpmath !26 - store float %div.i.i.us.6, float* %arrayidx7.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.6 - -if.end.r_exit.i.i.us.6: ; preds = %if.then.i.i.us.6, %pregion_for_entry.entry.i.i.us.6 - %717 = or i64 %_local_id_x.i.0.us.6, 1 - %add1.i.i.i.us.6.1 = add nuw nsw i64 %717, %mul.i.i.i - %conv.i.i.us.6.1 = trunc i64 %add1.i.i.i.us.6.1 to i32 - %cmp4.i.i.us.6.1 = icmp sgt i32 %23, %conv.i.i.us.6.1 - br i1 %cmp4.i.i.us.6.1, label %if.then.i.i.us.6.1, label %if.end.r_exit.i.i.us.6.1 - -pregion_for_end.i.i.6.loopexit: ; preds = %if.end.r_exit.i.i.us.6.1 - br label %pregion_for_end.i.i.6 - -pregion_for_end.i.i.6: ; preds = %pregion_for_end.i.i.6.loopexit, %vector.ph283, %pregion_for_end.i.i.5 - %718 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.7 = or i32 %718, 7 - %cmp.i.i.7 = icmp sgt i32 %27, %conv2.i.i.7 - %mul.i.i.7 = mul nsw i32 %23, %conv2.i.i.7 - br i1 %cmp.i.i.7, label %vector.scevcheck306, label %pregion_for_end.i.i.7 - -vector.scevcheck306: ; preds = %pregion_for_end.i.i.6 - %719 = mul i32 %23, %conv2.i.i.7 - %720 = trunc i64 %2 to i32 - %721 = shl i32 %720, 5 - %722 = add i32 %719, %721 - %723 = icmp sgt i32 %722, 2147483616 - br i1 %723, label %pregion_for_entry.entry.i.i.us.7.preheader, label %vector.memcheck328 - -pregion_for_entry.entry.i.i.us.7.preheader: ; preds = %vector.memcheck328, %vector.scevcheck306 - br label %pregion_for_entry.entry.i.i.us.7 - -vector.memcheck328: ; preds = %vector.scevcheck306 - %724 = trunc i64 %2 to i32 - %725 = shl i32 %724, 5 - %726 = sext i32 %725 to i64 - %scevgep308 = getelementptr float, float* %7, i64 %726 - %727 = add nsw i64 %726, 32 - %scevgep310 = getelementptr float, float* %7, i64 %727 - %728 = mul i32 %23, %conv2.i.i.7 - %729 = add i32 %728, %725 - %730 = sext i32 %729 to i64 - %scevgep312 = getelementptr float, float* %15, i64 %730 - %731 = add nsw i64 %730, 32 - %scevgep314 = getelementptr float, float* %15, i64 %731 - %scevgep316 = getelementptr float, float* %11, i64 %726 - %scevgep318 = getelementptr float, float* %11, i64 %727 - %bound0320 = icmp ult float* %scevgep308, %scevgep314 - %bound1321 = icmp ult float* %scevgep312, %scevgep310 - %found.conflict322 = and i1 %bound0320, %bound1321 - %bound0323 = icmp ult float* %scevgep316, %scevgep314 - %bound1324 = icmp ult float* %scevgep312, %scevgep318 - %found.conflict325 = and i1 %bound0323, %bound1324 - %conflict.rdx326 = or i1 %found.conflict322, %found.conflict325 - br i1 %conflict.rdx326, label %pregion_for_entry.entry.i.i.us.7.preheader, label %vector.ph329 - -vector.ph329: ; preds = %vector.memcheck328 - %broadcast.splatinsert336 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat337 = shufflevector <8 x i64> %broadcast.splatinsert336, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert338 = insertelement <8 x i32> undef, i32 %23, i32 0 - %broadcast.splat339 = shufflevector <8 x i32> %broadcast.splatinsert338, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert343 = insertelement <8 x float> undef, float %28, i32 0 - %broadcast.splat344 = shufflevector <8 x float> %broadcast.splatinsert343, <8 x float> undef, <8 x i32> zeroinitializer - %732 = or <8 x i64> %broadcast.splat337, - %733 = trunc <8 x i64> %732 to <8 x i32> - %734 = icmp sgt <8 x i32> %broadcast.splat339, %733 - %735 = extractelement <8 x i64> %732, i32 0 - %736 = shl i64 %735, 32 - %737 = ashr exact i64 %736, 32 - %738 = getelementptr inbounds float, float* %7, i64 %737 - %739 = bitcast float* %738 to <8 x float>* - %wide.masked.load340 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %739, i32 4, <8 x i1> %734, <8 x float> undef), !tbaa !12, !alias.scope !135, !noalias !138 - %740 = extractelement <8 x i32> %733, i32 0 - %741 = add nsw i32 %mul.i.i.7, %740 - %742 = sext i32 %741 to i64 - %743 = getelementptr inbounds float, float* %15, i64 %742 - %744 = bitcast float* %743 to <8 x float>* - %wide.masked.load341 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %744, i32 4, <8 x i1> %734, <8 x float> undef), !tbaa !12, !alias.scope !138 - %745 = fsub <8 x float> %wide.masked.load341, %wide.masked.load340 - %746 = bitcast float* %743 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %745, <8 x float>* %746, i32 4, <8 x i1> %734), !tbaa !12, !alias.scope !138, !llvm.access.group !21 - %747 = getelementptr inbounds float, float* %11, i64 %737 - %748 = bitcast float* %747 to <8 x float>* - %wide.masked.load342 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %748, i32 4, <8 x i1> %734, <8 x float> undef), !tbaa !12, !alias.scope !140, !noalias !138 - %749 = fmul <8 x float> %broadcast.splat344, %wide.masked.load342 - %750 = fdiv <8 x float> %745, %749, !fpmath !26 - %751 = bitcast float* %743 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %750, <8 x float>* %751, i32 4, <8 x i1> %734), !tbaa !12, !alias.scope !138, !llvm.access.group !21 - %752 = or <8 x i64> %broadcast.splat337, - %753 = trunc <8 x i64> %752 to <8 x i32> - %754 = icmp sgt <8 x i32> %broadcast.splat339, %753 - %755 = extractelement <8 x i64> %752, i32 0 - %756 = shl i64 %755, 32 - %757 = ashr exact i64 %756, 32 - %758 = getelementptr inbounds float, float* %7, i64 %757 - %759 = bitcast float* %758 to <8 x float>* - %wide.masked.load340.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %759, i32 4, <8 x i1> %754, <8 x float> undef), !tbaa !12, !alias.scope !135, !noalias !138 - %760 = extractelement <8 x i32> %753, i32 0 - %761 = add nsw i32 %mul.i.i.7, %760 - %762 = sext i32 %761 to i64 - %763 = getelementptr inbounds float, float* %15, i64 %762 - %764 = bitcast float* %763 to <8 x float>* - %wide.masked.load341.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %764, i32 4, <8 x i1> %754, <8 x float> undef), !tbaa !12, !alias.scope !138 - %765 = fsub <8 x float> %wide.masked.load341.1, %wide.masked.load340.1 - %766 = bitcast float* %763 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %765, <8 x float>* %766, i32 4, <8 x i1> %754), !tbaa !12, !alias.scope !138, !llvm.access.group !21 - %767 = getelementptr inbounds float, float* %11, i64 %757 - %768 = bitcast float* %767 to <8 x float>* - %wide.masked.load342.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %768, i32 4, <8 x i1> %754, <8 x float> undef), !tbaa !12, !alias.scope !140, !noalias !138 - %769 = fmul <8 x float> %broadcast.splat344, %wide.masked.load342.1 - %770 = fdiv <8 x float> %765, %769, !fpmath !26 - %771 = bitcast float* %763 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %770, <8 x float>* %771, i32 4, <8 x i1> %754), !tbaa !12, !alias.scope !138, !llvm.access.group !21 - %772 = or <8 x i64> %broadcast.splat337, - %773 = trunc <8 x i64> %772 to <8 x i32> - %774 = icmp sgt <8 x i32> %broadcast.splat339, %773 - %775 = extractelement <8 x i64> %772, i32 0 - %776 = shl i64 %775, 32 - %777 = ashr exact i64 %776, 32 - %778 = getelementptr inbounds float, float* %7, i64 %777 - %779 = bitcast float* %778 to <8 x float>* - %wide.masked.load340.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %779, i32 4, <8 x i1> %774, <8 x float> undef), !tbaa !12, !alias.scope !135, !noalias !138 - %780 = extractelement <8 x i32> %773, i32 0 - %781 = add nsw i32 %mul.i.i.7, %780 - %782 = sext i32 %781 to i64 - %783 = getelementptr inbounds float, float* %15, i64 %782 - %784 = bitcast float* %783 to <8 x float>* - %wide.masked.load341.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %784, i32 4, <8 x i1> %774, <8 x float> undef), !tbaa !12, !alias.scope !138 - %785 = fsub <8 x float> %wide.masked.load341.2, %wide.masked.load340.2 - %786 = bitcast float* %783 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %785, <8 x float>* %786, i32 4, <8 x i1> %774), !tbaa !12, !alias.scope !138, !llvm.access.group !21 - %787 = getelementptr inbounds float, float* %11, i64 %777 - %788 = bitcast float* %787 to <8 x float>* - %wide.masked.load342.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %788, i32 4, <8 x i1> %774, <8 x float> undef), !tbaa !12, !alias.scope !140, !noalias !138 - %789 = fmul <8 x float> %broadcast.splat344, %wide.masked.load342.2 - %790 = fdiv <8 x float> %785, %789, !fpmath !26 - %791 = bitcast float* %783 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %790, <8 x float>* %791, i32 4, <8 x i1> %774), !tbaa !12, !alias.scope !138, !llvm.access.group !21 - %792 = or <8 x i64> %broadcast.splat337, - %793 = trunc <8 x i64> %792 to <8 x i32> - %794 = icmp sgt <8 x i32> %broadcast.splat339, %793 - %795 = extractelement <8 x i64> %792, i32 0 - %796 = shl i64 %795, 32 - %797 = ashr exact i64 %796, 32 - %798 = getelementptr inbounds float, float* %7, i64 %797 - %799 = bitcast float* %798 to <8 x float>* - %wide.masked.load340.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %799, i32 4, <8 x i1> %794, <8 x float> undef), !tbaa !12, !alias.scope !135, !noalias !138 - %800 = extractelement <8 x i32> %793, i32 0 - %801 = add nsw i32 %mul.i.i.7, %800 - %802 = sext i32 %801 to i64 - %803 = getelementptr inbounds float, float* %15, i64 %802 - %804 = bitcast float* %803 to <8 x float>* - %wide.masked.load341.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %804, i32 4, <8 x i1> %794, <8 x float> undef), !tbaa !12, !alias.scope !138 - %805 = fsub <8 x float> %wide.masked.load341.3, %wide.masked.load340.3 - %806 = bitcast float* %803 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %805, <8 x float>* %806, i32 4, <8 x i1> %794), !tbaa !12, !alias.scope !138, !llvm.access.group !21 - %807 = getelementptr inbounds float, float* %11, i64 %797 - %808 = bitcast float* %807 to <8 x float>* - %wide.masked.load342.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %808, i32 4, <8 x i1> %794, <8 x float> undef), !tbaa !12, !alias.scope !140, !noalias !138 - %809 = fmul <8 x float> %broadcast.splat344, %wide.masked.load342.3 - %810 = fdiv <8 x float> %805, %809, !fpmath !26 - %811 = bitcast float* %803 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %810, <8 x float>* %811, i32 4, <8 x i1> %794), !tbaa !12, !alias.scope !138, !llvm.access.group !21 - br label %pregion_for_end.i.i.7 - -pregion_for_entry.entry.i.i.us.7: ; preds = %if.end.r_exit.i.i.us.7.1, %pregion_for_entry.entry.i.i.us.7.preheader - %_local_id_x.i.0.us.7 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.7.preheader ], [ %819, %if.end.r_exit.i.i.us.7.1 ] - %add1.i.i.i.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.7, %mul.i.i.i - %conv.i.i.us.7 = trunc i64 %add1.i.i.i.us.7 to i32 - %cmp4.i.i.us.7 = icmp sgt i32 %23, %conv.i.i.us.7 - br i1 %cmp4.i.i.us.7, label %if.then.i.i.us.7, label %if.end.r_exit.i.i.us.7 - -if.then.i.i.us.7: ; preds = %pregion_for_entry.entry.i.i.us.7 - %sext.i.i.us.7 = shl i64 %add1.i.i.i.us.7, 32 - %idxprom.i.i.us.7 = ashr exact i64 %sext.i.i.us.7, 32 - %arrayidx.i.i.us.7 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.7 - %812 = load float, float* %arrayidx.i.i.us.7, align 4, !tbaa !12 - %add.i.i.us.7 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7 - %idxprom6.i.i.us.7 = sext i32 %add.i.i.us.7 to i64 - %arrayidx7.i.i.us.7 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.7 - %813 = load float, float* %arrayidx7.i.i.us.7, align 4, !tbaa !12 - %sub.i.i.us.7 = fsub float %813, %812 - store float %sub.i.i.us.7, float* %arrayidx7.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.7 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.7 - %814 = load float, float* %arrayidx10.i.i.us.7, align 4, !tbaa !12 - %mul11.i.i.us.7 = fmul float %28, %814 - %div.i.i.us.7 = fdiv float %sub.i.i.us.7, %mul11.i.i.us.7, !fpmath !26 - store float %div.i.i.us.7, float* %arrayidx7.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.7 - -if.end.r_exit.i.i.us.7: ; preds = %if.then.i.i.us.7, %pregion_for_entry.entry.i.i.us.7 - %815 = or i64 %_local_id_x.i.0.us.7, 1 - %add1.i.i.i.us.7.1 = add nuw nsw i64 %815, %mul.i.i.i - %conv.i.i.us.7.1 = trunc i64 %add1.i.i.i.us.7.1 to i32 - %cmp4.i.i.us.7.1 = icmp sgt i32 %23, %conv.i.i.us.7.1 - br i1 %cmp4.i.i.us.7.1, label %if.then.i.i.us.7.1, label %if.end.r_exit.i.i.us.7.1 - -pregion_for_end.i.i.7.loopexit: ; preds = %if.end.r_exit.i.i.us.7.1 - br label %pregion_for_end.i.i.7 - -pregion_for_end.i.i.7: ; preds = %pregion_for_end.i.i.7.loopexit, %vector.ph329, %pregion_for_end.i.i.6 - ret void - -if.then.i.i.us.7.1: ; preds = %if.end.r_exit.i.i.us.7 - %sext.i.i.us.7.1 = shl i64 %add1.i.i.i.us.7.1, 32 - %idxprom.i.i.us.7.1 = ashr exact i64 %sext.i.i.us.7.1, 32 - %arrayidx.i.i.us.7.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.7.1 - %816 = load float, float* %arrayidx.i.i.us.7.1, align 4, !tbaa !12 - %add.i.i.us.7.1 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7.1 - %idxprom6.i.i.us.7.1 = sext i32 %add.i.i.us.7.1 to i64 - %arrayidx7.i.i.us.7.1 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.7.1 - %817 = load float, float* %arrayidx7.i.i.us.7.1, align 4, !tbaa !12 - %sub.i.i.us.7.1 = fsub float %817, %816 - store float %sub.i.i.us.7.1, float* %arrayidx7.i.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.7.1 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.7.1 - %818 = load float, float* %arrayidx10.i.i.us.7.1, align 4, !tbaa !12 - %mul11.i.i.us.7.1 = fmul float %28, %818 - %div.i.i.us.7.1 = fdiv float %sub.i.i.us.7.1, %mul11.i.i.us.7.1, !fpmath !26 - store float %div.i.i.us.7.1, float* %arrayidx7.i.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.7.1 - -if.end.r_exit.i.i.us.7.1: ; preds = %if.then.i.i.us.7.1, %if.end.r_exit.i.i.us.7 - %819 = add nuw nsw i64 %_local_id_x.i.0.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %819, 32 - br i1 %exitcond.7.not.1, label %pregion_for_end.i.i.7.loopexit, label %pregion_for_entry.entry.i.i.us.7, !llvm.loop !142 - -if.then.i.i.us.6.1: ; preds = %if.end.r_exit.i.i.us.6 - %sext.i.i.us.6.1 = shl i64 %add1.i.i.i.us.6.1, 32 - %idxprom.i.i.us.6.1 = ashr exact i64 %sext.i.i.us.6.1, 32 - %arrayidx.i.i.us.6.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.6.1 - %820 = load float, float* %arrayidx.i.i.us.6.1, align 4, !tbaa !12 - %add.i.i.us.6.1 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6.1 - %idxprom6.i.i.us.6.1 = sext i32 %add.i.i.us.6.1 to i64 - %arrayidx7.i.i.us.6.1 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.6.1 - %821 = load float, float* %arrayidx7.i.i.us.6.1, align 4, !tbaa !12 - %sub.i.i.us.6.1 = fsub float %821, %820 - store float %sub.i.i.us.6.1, float* %arrayidx7.i.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.6.1 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.6.1 - %822 = load float, float* %arrayidx10.i.i.us.6.1, align 4, !tbaa !12 - %mul11.i.i.us.6.1 = fmul float %28, %822 - %div.i.i.us.6.1 = fdiv float %sub.i.i.us.6.1, %mul11.i.i.us.6.1, !fpmath !26 - store float %div.i.i.us.6.1, float* %arrayidx7.i.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.6.1 - -if.end.r_exit.i.i.us.6.1: ; preds = %if.then.i.i.us.6.1, %if.end.r_exit.i.i.us.6 - %823 = add nuw nsw i64 %_local_id_x.i.0.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %823, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.i.6.loopexit, label %pregion_for_entry.entry.i.i.us.6, !llvm.loop !143 - -if.then.i.i.us.5.1: ; preds = %if.end.r_exit.i.i.us.5 - %sext.i.i.us.5.1 = shl i64 %add1.i.i.i.us.5.1, 32 - %idxprom.i.i.us.5.1 = ashr exact i64 %sext.i.i.us.5.1, 32 - %arrayidx.i.i.us.5.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.5.1 - %824 = load float, float* %arrayidx.i.i.us.5.1, align 4, !tbaa !12 - %add.i.i.us.5.1 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5.1 - %idxprom6.i.i.us.5.1 = sext i32 %add.i.i.us.5.1 to i64 - %arrayidx7.i.i.us.5.1 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.5.1 - %825 = load float, float* %arrayidx7.i.i.us.5.1, align 4, !tbaa !12 - %sub.i.i.us.5.1 = fsub float %825, %824 - store float %sub.i.i.us.5.1, float* %arrayidx7.i.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.5.1 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.5.1 - %826 = load float, float* %arrayidx10.i.i.us.5.1, align 4, !tbaa !12 - %mul11.i.i.us.5.1 = fmul float %28, %826 - %div.i.i.us.5.1 = fdiv float %sub.i.i.us.5.1, %mul11.i.i.us.5.1, !fpmath !26 - store float %div.i.i.us.5.1, float* %arrayidx7.i.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.5.1 - -if.end.r_exit.i.i.us.5.1: ; preds = %if.then.i.i.us.5.1, %if.end.r_exit.i.i.us.5 - %827 = add nuw nsw i64 %_local_id_x.i.0.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %827, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.i.5.loopexit, label %pregion_for_entry.entry.i.i.us.5, !llvm.loop !144 - -if.then.i.i.us.4.1: ; preds = %if.end.r_exit.i.i.us.4 - %sext.i.i.us.4.1 = shl i64 %add1.i.i.i.us.4.1, 32 - %idxprom.i.i.us.4.1 = ashr exact i64 %sext.i.i.us.4.1, 32 - %arrayidx.i.i.us.4.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.4.1 - %828 = load float, float* %arrayidx.i.i.us.4.1, align 4, !tbaa !12 - %add.i.i.us.4.1 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4.1 - %idxprom6.i.i.us.4.1 = sext i32 %add.i.i.us.4.1 to i64 - %arrayidx7.i.i.us.4.1 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.4.1 - %829 = load float, float* %arrayidx7.i.i.us.4.1, align 4, !tbaa !12 - %sub.i.i.us.4.1 = fsub float %829, %828 - store float %sub.i.i.us.4.1, float* %arrayidx7.i.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.4.1 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.4.1 - %830 = load float, float* %arrayidx10.i.i.us.4.1, align 4, !tbaa !12 - %mul11.i.i.us.4.1 = fmul float %28, %830 - %div.i.i.us.4.1 = fdiv float %sub.i.i.us.4.1, %mul11.i.i.us.4.1, !fpmath !26 - store float %div.i.i.us.4.1, float* %arrayidx7.i.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.4.1 - -if.end.r_exit.i.i.us.4.1: ; preds = %if.then.i.i.us.4.1, %if.end.r_exit.i.i.us.4 - %831 = add nuw nsw i64 %_local_id_x.i.0.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %831, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.i.4.loopexit, label %pregion_for_entry.entry.i.i.us.4, !llvm.loop !145 - -if.then.i.i.us.3.1: ; preds = %if.end.r_exit.i.i.us.3 - %sext.i.i.us.3.1 = shl i64 %add1.i.i.i.us.3.1, 32 - %idxprom.i.i.us.3.1 = ashr exact i64 %sext.i.i.us.3.1, 32 - %arrayidx.i.i.us.3.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.3.1 - %832 = load float, float* %arrayidx.i.i.us.3.1, align 4, !tbaa !12 - %add.i.i.us.3.1 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3.1 - %idxprom6.i.i.us.3.1 = sext i32 %add.i.i.us.3.1 to i64 - %arrayidx7.i.i.us.3.1 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.3.1 - %833 = load float, float* %arrayidx7.i.i.us.3.1, align 4, !tbaa !12 - %sub.i.i.us.3.1 = fsub float %833, %832 - store float %sub.i.i.us.3.1, float* %arrayidx7.i.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.3.1 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.3.1 - %834 = load float, float* %arrayidx10.i.i.us.3.1, align 4, !tbaa !12 - %mul11.i.i.us.3.1 = fmul float %28, %834 - %div.i.i.us.3.1 = fdiv float %sub.i.i.us.3.1, %mul11.i.i.us.3.1, !fpmath !26 - store float %div.i.i.us.3.1, float* %arrayidx7.i.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.3.1 - -if.end.r_exit.i.i.us.3.1: ; preds = %if.then.i.i.us.3.1, %if.end.r_exit.i.i.us.3 - %835 = add nuw nsw i64 %_local_id_x.i.0.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %835, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.i.3.loopexit, label %pregion_for_entry.entry.i.i.us.3, !llvm.loop !146 - -if.then.i.i.us.2.1: ; preds = %if.end.r_exit.i.i.us.2 - %sext.i.i.us.2.1 = shl i64 %add1.i.i.i.us.2.1, 32 - %idxprom.i.i.us.2.1 = ashr exact i64 %sext.i.i.us.2.1, 32 - %arrayidx.i.i.us.2.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.2.1 - %836 = load float, float* %arrayidx.i.i.us.2.1, align 4, !tbaa !12 - %add.i.i.us.2.1 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2.1 - %idxprom6.i.i.us.2.1 = sext i32 %add.i.i.us.2.1 to i64 - %arrayidx7.i.i.us.2.1 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.2.1 - %837 = load float, float* %arrayidx7.i.i.us.2.1, align 4, !tbaa !12 - %sub.i.i.us.2.1 = fsub float %837, %836 - store float %sub.i.i.us.2.1, float* %arrayidx7.i.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.2.1 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.2.1 - %838 = load float, float* %arrayidx10.i.i.us.2.1, align 4, !tbaa !12 - %mul11.i.i.us.2.1 = fmul float %28, %838 - %div.i.i.us.2.1 = fdiv float %sub.i.i.us.2.1, %mul11.i.i.us.2.1, !fpmath !26 - store float %div.i.i.us.2.1, float* %arrayidx7.i.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.2.1 - -if.end.r_exit.i.i.us.2.1: ; preds = %if.then.i.i.us.2.1, %if.end.r_exit.i.i.us.2 - %839 = add nuw nsw i64 %_local_id_x.i.0.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %839, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.i.2.loopexit, label %pregion_for_entry.entry.i.i.us.2, !llvm.loop !147 - -if.then.i.i.us.1.1: ; preds = %if.end.r_exit.i.i.us.1 - %sext.i.i.us.1.1 = shl i64 %add1.i.i.i.us.1.1, 32 - %idxprom.i.i.us.1.1 = ashr exact i64 %sext.i.i.us.1.1, 32 - %arrayidx.i.i.us.1.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.1.1 - %840 = load float, float* %arrayidx.i.i.us.1.1, align 4, !tbaa !12 - %add.i.i.us.1.1 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1.1 - %idxprom6.i.i.us.1.1 = sext i32 %add.i.i.us.1.1 to i64 - %arrayidx7.i.i.us.1.1 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.1.1 - %841 = load float, float* %arrayidx7.i.i.us.1.1, align 4, !tbaa !12 - %sub.i.i.us.1.1 = fsub float %841, %840 - store float %sub.i.i.us.1.1, float* %arrayidx7.i.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.1.1 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.1.1 - %842 = load float, float* %arrayidx10.i.i.us.1.1, align 4, !tbaa !12 - %mul11.i.i.us.1.1 = fmul float %28, %842 - %div.i.i.us.1.1 = fdiv float %sub.i.i.us.1.1, %mul11.i.i.us.1.1, !fpmath !26 - store float %div.i.i.us.1.1, float* %arrayidx7.i.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.1.1 - -if.end.r_exit.i.i.us.1.1: ; preds = %if.then.i.i.us.1.1, %if.end.r_exit.i.i.us.1 - %843 = add nuw nsw i64 %_local_id_x.i.0.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %843, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.i.1.loopexit, label %pregion_for_entry.entry.i.i.us.1, !llvm.loop !148 - -if.then.i.i.us.1368: ; preds = %if.end.r_exit.i.i.us - %sext.i.i.us.1358 = shl i64 %add1.i.i.i.us.1354, 32 - %idxprom.i.i.us.1359 = ashr exact i64 %sext.i.i.us.1358, 32 - %arrayidx.i.i.us.1360 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.1359 - %844 = load float, float* %arrayidx.i.i.us.1360, align 4, !tbaa !12 - %add.i.i.us.1361 = add nsw i32 %mul.i.i, %conv.i.i.us.1355 - %idxprom6.i.i.us.1362 = sext i32 %add.i.i.us.1361 to i64 - %arrayidx7.i.i.us.1363 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.1362 - %845 = load float, float* %arrayidx7.i.i.us.1363, align 4, !tbaa !12 - %sub.i.i.us.1364 = fsub float %845, %844 - store float %sub.i.i.us.1364, float* %arrayidx7.i.i.us.1363, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.1365 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.1359 - %846 = load float, float* %arrayidx10.i.i.us.1365, align 4, !tbaa !12 - %mul11.i.i.us.1366 = fmul float %28, %846 - %div.i.i.us.1367 = fdiv float %sub.i.i.us.1364, %mul11.i.i.us.1366, !fpmath !26 - store float %div.i.i.us.1367, float* %arrayidx7.i.i.us.1363, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.1369 - -if.end.r_exit.i.i.us.1369: ; preds = %if.then.i.i.us.1368, %if.end.r_exit.i.i.us - %847 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %847, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !149 -} - -; Function Attrs: nofree nounwind -define void @_pocl_kernel_reduce_kernel_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { -pregion_for_entry.pregion_for_init.i.i: - %5 = bitcast i8** %0 to float** - %6 = load float*, float** %5, align 8 - %7 = getelementptr i8*, i8** %0, i64 1 - %8 = bitcast i8** %7 to float** - %9 = load float*, float** %8, align 8 - %10 = getelementptr i8*, i8** %0, i64 2 - %11 = bitcast i8** %10 to float** - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 3 - %14 = bitcast i8** %13 to float** - %15 = load float*, float** %14, align 8 - %16 = load float, float* %15, align 4 - %17 = getelementptr i8*, i8** %0, i64 4 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %21 = getelementptr i8*, i8** %0, i64 5 - %22 = bitcast i8** %21 to i32** - %23 = load i32*, i32** %22, align 8 - %24 = load i32, i32* %23, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %25 = tail call float @llvm.sqrt.f32(float %16) #5 - %conv2.i.i = trunc i64 %mul3.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %24, %conv2.i.i - %mul.i.i = mul nsw i32 %20, %conv2.i.i - br i1 %cmp.i.i, label %vector.scevcheck, label %pregion_for_end.i.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.i - %26 = trunc i64 %3 to i32 - %27 = mul i32 %20, %26 - %28 = shl i32 %27, 3 - %29 = trunc i64 %2 to i32 - %30 = shl i32 %29, 5 - %31 = add i32 %28, %30 - %32 = icmp sgt i32 %31, 2147483616 - br i1 %32, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i.us - -vector.memcheck: ; preds = %vector.scevcheck - %33 = trunc i64 %2 to i32 - %34 = shl i32 %33, 5 - %35 = sext i32 %34 to i64 - %scevgep = getelementptr float, float* %6, i64 %35 - %36 = add nsw i64 %35, 32 - %scevgep4 = getelementptr float, float* %6, i64 %36 - %37 = trunc i64 %3 to i32 - %38 = mul i32 %20, %37 - %39 = shl i32 %38, 3 - %40 = add i32 %39, %34 - %41 = sext i32 %40 to i64 - %scevgep6 = getelementptr float, float* %12, i64 %41 - %42 = add nsw i64 %41, 32 - %scevgep8 = getelementptr float, float* %12, i64 %42 - %scevgep10 = getelementptr float, float* %9, i64 %35 - %scevgep12 = getelementptr float, float* %9, i64 %36 - %bound0 = icmp ult float* %scevgep, %scevgep8 - %bound1 = icmp ult float* %scevgep6, %scevgep4 - %found.conflict = and i1 %bound0, %bound1 - %bound014 = icmp ult float* %scevgep10, %scevgep8 - %bound115 = icmp ult float* %scevgep6, %scevgep12 - %found.conflict16 = and i1 %bound014, %bound115 - %conflict.rdx = or i1 %found.conflict, %found.conflict16 - br i1 %conflict.rdx, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert17 = insertelement <8 x i32> undef, i32 %20, i32 0 - %broadcast.splat18 = shufflevector <8 x i32> %broadcast.splatinsert17, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert21 = insertelement <8 x float> undef, float %25, i32 0 - %broadcast.splat22 = shufflevector <8 x float> %broadcast.splatinsert21, <8 x float> undef, <8 x i32> zeroinitializer - %43 = or <8 x i64> %broadcast.splat, - %44 = trunc <8 x i64> %43 to <8 x i32> - %45 = icmp sgt <8 x i32> %broadcast.splat18, %44 - %46 = extractelement <8 x i64> %43, i32 0 - %47 = shl i64 %46, 32 - %48 = ashr exact i64 %47, 32 - %49 = getelementptr inbounds float, float* %6, i64 %48 - %50 = bitcast float* %49 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %50, i32 4, <8 x i1> %45, <8 x float> undef), !tbaa !12, !alias.scope !150, !noalias !153 - %51 = extractelement <8 x i32> %44, i32 0 - %52 = add nsw i32 %mul.i.i, %51 - %53 = sext i32 %52 to i64 - %54 = getelementptr inbounds float, float* %12, i64 %53 - %55 = bitcast float* %54 to <8 x float>* - %wide.masked.load19 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %55, i32 4, <8 x i1> %45, <8 x float> undef), !tbaa !12, !alias.scope !153 - %56 = fsub <8 x float> %wide.masked.load19, %wide.masked.load - %57 = bitcast float* %54 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %56, <8 x float>* %57, i32 4, <8 x i1> %45), !tbaa !12, !alias.scope !153, !llvm.access.group !21 - %58 = getelementptr inbounds float, float* %9, i64 %48 - %59 = bitcast float* %58 to <8 x float>* - %wide.masked.load20 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %59, i32 4, <8 x i1> %45, <8 x float> undef), !tbaa !12, !alias.scope !155, !noalias !153 - %60 = fmul <8 x float> %broadcast.splat22, %wide.masked.load20 - %61 = fdiv <8 x float> %56, %60, !fpmath !26 - %62 = bitcast float* %54 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %61, <8 x float>* %62, i32 4, <8 x i1> %45), !tbaa !12, !alias.scope !153, !llvm.access.group !21 - %63 = or <8 x i64> %broadcast.splat, - %64 = trunc <8 x i64> %63 to <8 x i32> - %65 = icmp sgt <8 x i32> %broadcast.splat18, %64 - %66 = extractelement <8 x i64> %63, i32 0 - %67 = shl i64 %66, 32 - %68 = ashr exact i64 %67, 32 - %69 = getelementptr inbounds float, float* %6, i64 %68 - %70 = bitcast float* %69 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %70, i32 4, <8 x i1> %65, <8 x float> undef), !tbaa !12, !alias.scope !150, !noalias !153 - %71 = extractelement <8 x i32> %64, i32 0 - %72 = add nsw i32 %mul.i.i, %71 - %73 = sext i32 %72 to i64 - %74 = getelementptr inbounds float, float* %12, i64 %73 - %75 = bitcast float* %74 to <8 x float>* - %wide.masked.load19.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %75, i32 4, <8 x i1> %65, <8 x float> undef), !tbaa !12, !alias.scope !153 - %76 = fsub <8 x float> %wide.masked.load19.1, %wide.masked.load.1 - %77 = bitcast float* %74 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %76, <8 x float>* %77, i32 4, <8 x i1> %65), !tbaa !12, !alias.scope !153, !llvm.access.group !21 - %78 = getelementptr inbounds float, float* %9, i64 %68 - %79 = bitcast float* %78 to <8 x float>* - %wide.masked.load20.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %79, i32 4, <8 x i1> %65, <8 x float> undef), !tbaa !12, !alias.scope !155, !noalias !153 - %80 = fmul <8 x float> %broadcast.splat22, %wide.masked.load20.1 - %81 = fdiv <8 x float> %76, %80, !fpmath !26 - %82 = bitcast float* %74 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %81, <8 x float>* %82, i32 4, <8 x i1> %65), !tbaa !12, !alias.scope !153, !llvm.access.group !21 - %83 = or <8 x i64> %broadcast.splat, - %84 = trunc <8 x i64> %83 to <8 x i32> - %85 = icmp sgt <8 x i32> %broadcast.splat18, %84 - %86 = extractelement <8 x i64> %83, i32 0 - %87 = shl i64 %86, 32 - %88 = ashr exact i64 %87, 32 - %89 = getelementptr inbounds float, float* %6, i64 %88 - %90 = bitcast float* %89 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %90, i32 4, <8 x i1> %85, <8 x float> undef), !tbaa !12, !alias.scope !150, !noalias !153 - %91 = extractelement <8 x i32> %84, i32 0 - %92 = add nsw i32 %mul.i.i, %91 - %93 = sext i32 %92 to i64 - %94 = getelementptr inbounds float, float* %12, i64 %93 - %95 = bitcast float* %94 to <8 x float>* - %wide.masked.load19.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %95, i32 4, <8 x i1> %85, <8 x float> undef), !tbaa !12, !alias.scope !153 - %96 = fsub <8 x float> %wide.masked.load19.2, %wide.masked.load.2 - %97 = bitcast float* %94 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %96, <8 x float>* %97, i32 4, <8 x i1> %85), !tbaa !12, !alias.scope !153, !llvm.access.group !21 - %98 = getelementptr inbounds float, float* %9, i64 %88 - %99 = bitcast float* %98 to <8 x float>* - %wide.masked.load20.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %99, i32 4, <8 x i1> %85, <8 x float> undef), !tbaa !12, !alias.scope !155, !noalias !153 - %100 = fmul <8 x float> %broadcast.splat22, %wide.masked.load20.2 - %101 = fdiv <8 x float> %96, %100, !fpmath !26 - %102 = bitcast float* %94 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %101, <8 x float>* %102, i32 4, <8 x i1> %85), !tbaa !12, !alias.scope !153, !llvm.access.group !21 - %103 = or <8 x i64> %broadcast.splat, - %104 = trunc <8 x i64> %103 to <8 x i32> - %105 = icmp sgt <8 x i32> %broadcast.splat18, %104 - %106 = extractelement <8 x i64> %103, i32 0 - %107 = shl i64 %106, 32 - %108 = ashr exact i64 %107, 32 - %109 = getelementptr inbounds float, float* %6, i64 %108 - %110 = bitcast float* %109 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %110, i32 4, <8 x i1> %105, <8 x float> undef), !tbaa !12, !alias.scope !150, !noalias !153 - %111 = extractelement <8 x i32> %104, i32 0 - %112 = add nsw i32 %mul.i.i, %111 - %113 = sext i32 %112 to i64 - %114 = getelementptr inbounds float, float* %12, i64 %113 - %115 = bitcast float* %114 to <8 x float>* - %wide.masked.load19.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %115, i32 4, <8 x i1> %105, <8 x float> undef), !tbaa !12, !alias.scope !153 - %116 = fsub <8 x float> %wide.masked.load19.3, %wide.masked.load.3 - %117 = bitcast float* %114 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %116, <8 x float>* %117, i32 4, <8 x i1> %105), !tbaa !12, !alias.scope !153, !llvm.access.group !21 - %118 = getelementptr inbounds float, float* %9, i64 %108 - %119 = bitcast float* %118 to <8 x float>* - %wide.masked.load20.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %119, i32 4, <8 x i1> %105, <8 x float> undef), !tbaa !12, !alias.scope !155, !noalias !153 - %120 = fmul <8 x float> %broadcast.splat22, %wide.masked.load20.3 - %121 = fdiv <8 x float> %116, %120, !fpmath !26 - %122 = bitcast float* %114 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %121, <8 x float>* %122, i32 4, <8 x i1> %105), !tbaa !12, !alias.scope !153, !llvm.access.group !21 - br label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us.1369, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.preheader ], [ %844, %if.end.r_exit.i.i.us.1369 ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp4.i.i.us = icmp sgt i32 %20, %conv.i.i.us - br i1 %cmp4.i.i.us, label %if.then.i.i.us, label %if.end.r_exit.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom.i.i.us = ashr exact i64 %sext.i.i.us, 32 - %arrayidx.i.i.us = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us - %123 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %add.i.i.us = add nsw i32 %mul.i.i, %conv.i.i.us - %idxprom6.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx7.i.i.us = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us - %124 = load float, float* %arrayidx7.i.i.us, align 4, !tbaa !12 - %sub.i.i.us = fsub float %124, %123 - store float %sub.i.i.us, float* %arrayidx7.i.i.us, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us - %125 = load float, float* %arrayidx10.i.i.us, align 4, !tbaa !12 - %mul11.i.i.us = fmul float %25, %125 - %div.i.i.us = fdiv float %sub.i.i.us, %mul11.i.i.us, !fpmath !26 - store float %div.i.i.us, float* %arrayidx7.i.i.us, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us - %126 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1354 = add nuw nsw i64 %126, %mul.i.i.i - %conv.i.i.us.1355 = trunc i64 %add1.i.i.i.us.1354 to i32 - %cmp4.i.i.us.1356 = icmp sgt i32 %20, %conv.i.i.us.1355 - br i1 %cmp4.i.i.us.1356, label %if.then.i.i.us.1368, label %if.end.r_exit.i.i.us.1369 - -pregion_for_end.i.i.loopexit: ; preds = %if.end.r_exit.i.i.us.1369 - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i.i - %127 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.1 = or i32 %127, 1 - %cmp.i.i.1 = icmp sgt i32 %24, %conv2.i.i.1 - %mul.i.i.1 = mul nsw i32 %20, %conv2.i.i.1 - br i1 %cmp.i.i.1, label %vector.scevcheck30, label %pregion_for_end.i.i.1 - -vector.scevcheck30: ; preds = %pregion_for_end.i.i - %128 = mul i32 %20, %conv2.i.i.1 - %129 = trunc i64 %2 to i32 - %130 = shl i32 %129, 5 - %131 = add i32 %128, %130 - %132 = icmp sgt i32 %131, 2147483616 - br i1 %132, label %pregion_for_entry.entry.i.i.us.1.preheader, label %vector.memcheck52 - -pregion_for_entry.entry.i.i.us.1.preheader: ; preds = %vector.memcheck52, %vector.scevcheck30 - br label %pregion_for_entry.entry.i.i.us.1 - -vector.memcheck52: ; preds = %vector.scevcheck30 - %133 = trunc i64 %2 to i32 - %134 = shl i32 %133, 5 - %135 = sext i32 %134 to i64 - %scevgep32 = getelementptr float, float* %6, i64 %135 - %136 = add nsw i64 %135, 32 - %scevgep34 = getelementptr float, float* %6, i64 %136 - %137 = mul i32 %20, %conv2.i.i.1 - %138 = add i32 %137, %134 - %139 = sext i32 %138 to i64 - %scevgep36 = getelementptr float, float* %12, i64 %139 - %140 = add nsw i64 %139, 32 - %scevgep38 = getelementptr float, float* %12, i64 %140 - %scevgep40 = getelementptr float, float* %9, i64 %135 - %scevgep42 = getelementptr float, float* %9, i64 %136 - %bound044 = icmp ult float* %scevgep32, %scevgep38 - %bound145 = icmp ult float* %scevgep36, %scevgep34 - %found.conflict46 = and i1 %bound044, %bound145 - %bound047 = icmp ult float* %scevgep40, %scevgep38 - %bound148 = icmp ult float* %scevgep36, %scevgep42 - %found.conflict49 = and i1 %bound047, %bound148 - %conflict.rdx50 = or i1 %found.conflict46, %found.conflict49 - br i1 %conflict.rdx50, label %pregion_for_entry.entry.i.i.us.1.preheader, label %vector.ph53 - -vector.ph53: ; preds = %vector.memcheck52 - %broadcast.splatinsert60 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat61 = shufflevector <8 x i64> %broadcast.splatinsert60, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert62 = insertelement <8 x i32> undef, i32 %20, i32 0 - %broadcast.splat63 = shufflevector <8 x i32> %broadcast.splatinsert62, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert67 = insertelement <8 x float> undef, float %25, i32 0 - %broadcast.splat68 = shufflevector <8 x float> %broadcast.splatinsert67, <8 x float> undef, <8 x i32> zeroinitializer - %141 = or <8 x i64> %broadcast.splat61, - %142 = trunc <8 x i64> %141 to <8 x i32> - %143 = icmp sgt <8 x i32> %broadcast.splat63, %142 - %144 = extractelement <8 x i64> %141, i32 0 - %145 = shl i64 %144, 32 - %146 = ashr exact i64 %145, 32 - %147 = getelementptr inbounds float, float* %6, i64 %146 - %148 = bitcast float* %147 to <8 x float>* - %wide.masked.load64 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %148, i32 4, <8 x i1> %143, <8 x float> undef), !tbaa !12, !alias.scope !157, !noalias !160 - %149 = extractelement <8 x i32> %142, i32 0 - %150 = add nsw i32 %mul.i.i.1, %149 - %151 = sext i32 %150 to i64 - %152 = getelementptr inbounds float, float* %12, i64 %151 - %153 = bitcast float* %152 to <8 x float>* - %wide.masked.load65 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %153, i32 4, <8 x i1> %143, <8 x float> undef), !tbaa !12, !alias.scope !160 - %154 = fsub <8 x float> %wide.masked.load65, %wide.masked.load64 - %155 = bitcast float* %152 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %154, <8 x float>* %155, i32 4, <8 x i1> %143), !tbaa !12, !alias.scope !160, !llvm.access.group !21 - %156 = getelementptr inbounds float, float* %9, i64 %146 - %157 = bitcast float* %156 to <8 x float>* - %wide.masked.load66 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %157, i32 4, <8 x i1> %143, <8 x float> undef), !tbaa !12, !alias.scope !162, !noalias !160 - %158 = fmul <8 x float> %broadcast.splat68, %wide.masked.load66 - %159 = fdiv <8 x float> %154, %158, !fpmath !26 - %160 = bitcast float* %152 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %159, <8 x float>* %160, i32 4, <8 x i1> %143), !tbaa !12, !alias.scope !160, !llvm.access.group !21 - %161 = or <8 x i64> %broadcast.splat61, - %162 = trunc <8 x i64> %161 to <8 x i32> - %163 = icmp sgt <8 x i32> %broadcast.splat63, %162 - %164 = extractelement <8 x i64> %161, i32 0 - %165 = shl i64 %164, 32 - %166 = ashr exact i64 %165, 32 - %167 = getelementptr inbounds float, float* %6, i64 %166 - %168 = bitcast float* %167 to <8 x float>* - %wide.masked.load64.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %168, i32 4, <8 x i1> %163, <8 x float> undef), !tbaa !12, !alias.scope !157, !noalias !160 - %169 = extractelement <8 x i32> %162, i32 0 - %170 = add nsw i32 %mul.i.i.1, %169 - %171 = sext i32 %170 to i64 - %172 = getelementptr inbounds float, float* %12, i64 %171 - %173 = bitcast float* %172 to <8 x float>* - %wide.masked.load65.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %173, i32 4, <8 x i1> %163, <8 x float> undef), !tbaa !12, !alias.scope !160 - %174 = fsub <8 x float> %wide.masked.load65.1, %wide.masked.load64.1 - %175 = bitcast float* %172 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %174, <8 x float>* %175, i32 4, <8 x i1> %163), !tbaa !12, !alias.scope !160, !llvm.access.group !21 - %176 = getelementptr inbounds float, float* %9, i64 %166 - %177 = bitcast float* %176 to <8 x float>* - %wide.masked.load66.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %177, i32 4, <8 x i1> %163, <8 x float> undef), !tbaa !12, !alias.scope !162, !noalias !160 - %178 = fmul <8 x float> %broadcast.splat68, %wide.masked.load66.1 - %179 = fdiv <8 x float> %174, %178, !fpmath !26 - %180 = bitcast float* %172 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %179, <8 x float>* %180, i32 4, <8 x i1> %163), !tbaa !12, !alias.scope !160, !llvm.access.group !21 - %181 = or <8 x i64> %broadcast.splat61, - %182 = trunc <8 x i64> %181 to <8 x i32> - %183 = icmp sgt <8 x i32> %broadcast.splat63, %182 - %184 = extractelement <8 x i64> %181, i32 0 - %185 = shl i64 %184, 32 - %186 = ashr exact i64 %185, 32 - %187 = getelementptr inbounds float, float* %6, i64 %186 - %188 = bitcast float* %187 to <8 x float>* - %wide.masked.load64.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %188, i32 4, <8 x i1> %183, <8 x float> undef), !tbaa !12, !alias.scope !157, !noalias !160 - %189 = extractelement <8 x i32> %182, i32 0 - %190 = add nsw i32 %mul.i.i.1, %189 - %191 = sext i32 %190 to i64 - %192 = getelementptr inbounds float, float* %12, i64 %191 - %193 = bitcast float* %192 to <8 x float>* - %wide.masked.load65.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %193, i32 4, <8 x i1> %183, <8 x float> undef), !tbaa !12, !alias.scope !160 - %194 = fsub <8 x float> %wide.masked.load65.2, %wide.masked.load64.2 - %195 = bitcast float* %192 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %194, <8 x float>* %195, i32 4, <8 x i1> %183), !tbaa !12, !alias.scope !160, !llvm.access.group !21 - %196 = getelementptr inbounds float, float* %9, i64 %186 - %197 = bitcast float* %196 to <8 x float>* - %wide.masked.load66.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %197, i32 4, <8 x i1> %183, <8 x float> undef), !tbaa !12, !alias.scope !162, !noalias !160 - %198 = fmul <8 x float> %broadcast.splat68, %wide.masked.load66.2 - %199 = fdiv <8 x float> %194, %198, !fpmath !26 - %200 = bitcast float* %192 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %199, <8 x float>* %200, i32 4, <8 x i1> %183), !tbaa !12, !alias.scope !160, !llvm.access.group !21 - %201 = or <8 x i64> %broadcast.splat61, - %202 = trunc <8 x i64> %201 to <8 x i32> - %203 = icmp sgt <8 x i32> %broadcast.splat63, %202 - %204 = extractelement <8 x i64> %201, i32 0 - %205 = shl i64 %204, 32 - %206 = ashr exact i64 %205, 32 - %207 = getelementptr inbounds float, float* %6, i64 %206 - %208 = bitcast float* %207 to <8 x float>* - %wide.masked.load64.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %208, i32 4, <8 x i1> %203, <8 x float> undef), !tbaa !12, !alias.scope !157, !noalias !160 - %209 = extractelement <8 x i32> %202, i32 0 - %210 = add nsw i32 %mul.i.i.1, %209 - %211 = sext i32 %210 to i64 - %212 = getelementptr inbounds float, float* %12, i64 %211 - %213 = bitcast float* %212 to <8 x float>* - %wide.masked.load65.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %213, i32 4, <8 x i1> %203, <8 x float> undef), !tbaa !12, !alias.scope !160 - %214 = fsub <8 x float> %wide.masked.load65.3, %wide.masked.load64.3 - %215 = bitcast float* %212 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %214, <8 x float>* %215, i32 4, <8 x i1> %203), !tbaa !12, !alias.scope !160, !llvm.access.group !21 - %216 = getelementptr inbounds float, float* %9, i64 %206 - %217 = bitcast float* %216 to <8 x float>* - %wide.masked.load66.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %217, i32 4, <8 x i1> %203, <8 x float> undef), !tbaa !12, !alias.scope !162, !noalias !160 - %218 = fmul <8 x float> %broadcast.splat68, %wide.masked.load66.3 - %219 = fdiv <8 x float> %214, %218, !fpmath !26 - %220 = bitcast float* %212 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %219, <8 x float>* %220, i32 4, <8 x i1> %203), !tbaa !12, !alias.scope !160, !llvm.access.group !21 - br label %pregion_for_end.i.i.1 - -pregion_for_entry.entry.i.i.us.1: ; preds = %if.end.r_exit.i.i.us.1.1, %pregion_for_entry.entry.i.i.us.1.preheader - %_local_id_x.i.0.us.1 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.1.preheader ], [ %840, %if.end.r_exit.i.i.us.1.1 ] - %add1.i.i.i.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.1, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp4.i.i.us.1 = icmp sgt i32 %20, %conv.i.i.us.1 - br i1 %cmp4.i.i.us.1, label %if.then.i.i.us.1, label %if.end.r_exit.i.i.us.1 - -if.then.i.i.us.1: ; preds = %pregion_for_entry.entry.i.i.us.1 - %sext.i.i.us.1 = shl i64 %add1.i.i.i.us.1, 32 - %idxprom.i.i.us.1 = ashr exact i64 %sext.i.i.us.1, 32 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.1 - %221 = load float, float* %arrayidx.i.i.us.1, align 4, !tbaa !12 - %add.i.i.us.1 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1 - %idxprom6.i.i.us.1 = sext i32 %add.i.i.us.1 to i64 - %arrayidx7.i.i.us.1 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.1 - %222 = load float, float* %arrayidx7.i.i.us.1, align 4, !tbaa !12 - %sub.i.i.us.1 = fsub float %222, %221 - store float %sub.i.i.us.1, float* %arrayidx7.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.1 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.1 - %223 = load float, float* %arrayidx10.i.i.us.1, align 4, !tbaa !12 - %mul11.i.i.us.1 = fmul float %25, %223 - %div.i.i.us.1 = fdiv float %sub.i.i.us.1, %mul11.i.i.us.1, !fpmath !26 - store float %div.i.i.us.1, float* %arrayidx7.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.1 - -if.end.r_exit.i.i.us.1: ; preds = %if.then.i.i.us.1, %pregion_for_entry.entry.i.i.us.1 - %224 = or i64 %_local_id_x.i.0.us.1, 1 - %add1.i.i.i.us.1.1 = add nuw nsw i64 %224, %mul.i.i.i - %conv.i.i.us.1.1 = trunc i64 %add1.i.i.i.us.1.1 to i32 - %cmp4.i.i.us.1.1 = icmp sgt i32 %20, %conv.i.i.us.1.1 - br i1 %cmp4.i.i.us.1.1, label %if.then.i.i.us.1.1, label %if.end.r_exit.i.i.us.1.1 - -pregion_for_end.i.i.1.loopexit: ; preds = %if.end.r_exit.i.i.us.1.1 - br label %pregion_for_end.i.i.1 - -pregion_for_end.i.i.1: ; preds = %pregion_for_end.i.i.1.loopexit, %vector.ph53, %pregion_for_end.i.i - %225 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.2 = or i32 %225, 2 - %cmp.i.i.2 = icmp sgt i32 %24, %conv2.i.i.2 - %mul.i.i.2 = mul nsw i32 %20, %conv2.i.i.2 - br i1 %cmp.i.i.2, label %vector.scevcheck76, label %pregion_for_end.i.i.2 - -vector.scevcheck76: ; preds = %pregion_for_end.i.i.1 - %226 = mul i32 %20, %conv2.i.i.2 - %227 = trunc i64 %2 to i32 - %228 = shl i32 %227, 5 - %229 = add i32 %226, %228 - %230 = icmp sgt i32 %229, 2147483616 - br i1 %230, label %pregion_for_entry.entry.i.i.us.2.preheader, label %vector.memcheck98 - -pregion_for_entry.entry.i.i.us.2.preheader: ; preds = %vector.memcheck98, %vector.scevcheck76 - br label %pregion_for_entry.entry.i.i.us.2 - -vector.memcheck98: ; preds = %vector.scevcheck76 - %231 = trunc i64 %2 to i32 - %232 = shl i32 %231, 5 - %233 = sext i32 %232 to i64 - %scevgep78 = getelementptr float, float* %6, i64 %233 - %234 = add nsw i64 %233, 32 - %scevgep80 = getelementptr float, float* %6, i64 %234 - %235 = mul i32 %20, %conv2.i.i.2 - %236 = add i32 %235, %232 - %237 = sext i32 %236 to i64 - %scevgep82 = getelementptr float, float* %12, i64 %237 - %238 = add nsw i64 %237, 32 - %scevgep84 = getelementptr float, float* %12, i64 %238 - %scevgep86 = getelementptr float, float* %9, i64 %233 - %scevgep88 = getelementptr float, float* %9, i64 %234 - %bound090 = icmp ult float* %scevgep78, %scevgep84 - %bound191 = icmp ult float* %scevgep82, %scevgep80 - %found.conflict92 = and i1 %bound090, %bound191 - %bound093 = icmp ult float* %scevgep86, %scevgep84 - %bound194 = icmp ult float* %scevgep82, %scevgep88 - %found.conflict95 = and i1 %bound093, %bound194 - %conflict.rdx96 = or i1 %found.conflict92, %found.conflict95 - br i1 %conflict.rdx96, label %pregion_for_entry.entry.i.i.us.2.preheader, label %vector.ph99 - -vector.ph99: ; preds = %vector.memcheck98 - %broadcast.splatinsert106 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat107 = shufflevector <8 x i64> %broadcast.splatinsert106, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert108 = insertelement <8 x i32> undef, i32 %20, i32 0 - %broadcast.splat109 = shufflevector <8 x i32> %broadcast.splatinsert108, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert113 = insertelement <8 x float> undef, float %25, i32 0 - %broadcast.splat114 = shufflevector <8 x float> %broadcast.splatinsert113, <8 x float> undef, <8 x i32> zeroinitializer - %239 = or <8 x i64> %broadcast.splat107, - %240 = trunc <8 x i64> %239 to <8 x i32> - %241 = icmp sgt <8 x i32> %broadcast.splat109, %240 - %242 = extractelement <8 x i64> %239, i32 0 - %243 = shl i64 %242, 32 - %244 = ashr exact i64 %243, 32 - %245 = getelementptr inbounds float, float* %6, i64 %244 - %246 = bitcast float* %245 to <8 x float>* - %wide.masked.load110 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %246, i32 4, <8 x i1> %241, <8 x float> undef), !tbaa !12, !alias.scope !164, !noalias !167 - %247 = extractelement <8 x i32> %240, i32 0 - %248 = add nsw i32 %mul.i.i.2, %247 - %249 = sext i32 %248 to i64 - %250 = getelementptr inbounds float, float* %12, i64 %249 - %251 = bitcast float* %250 to <8 x float>* - %wide.masked.load111 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %251, i32 4, <8 x i1> %241, <8 x float> undef), !tbaa !12, !alias.scope !167 - %252 = fsub <8 x float> %wide.masked.load111, %wide.masked.load110 - %253 = bitcast float* %250 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %252, <8 x float>* %253, i32 4, <8 x i1> %241), !tbaa !12, !alias.scope !167, !llvm.access.group !21 - %254 = getelementptr inbounds float, float* %9, i64 %244 - %255 = bitcast float* %254 to <8 x float>* - %wide.masked.load112 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %255, i32 4, <8 x i1> %241, <8 x float> undef), !tbaa !12, !alias.scope !169, !noalias !167 - %256 = fmul <8 x float> %broadcast.splat114, %wide.masked.load112 - %257 = fdiv <8 x float> %252, %256, !fpmath !26 - %258 = bitcast float* %250 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %257, <8 x float>* %258, i32 4, <8 x i1> %241), !tbaa !12, !alias.scope !167, !llvm.access.group !21 - %259 = or <8 x i64> %broadcast.splat107, - %260 = trunc <8 x i64> %259 to <8 x i32> - %261 = icmp sgt <8 x i32> %broadcast.splat109, %260 - %262 = extractelement <8 x i64> %259, i32 0 - %263 = shl i64 %262, 32 - %264 = ashr exact i64 %263, 32 - %265 = getelementptr inbounds float, float* %6, i64 %264 - %266 = bitcast float* %265 to <8 x float>* - %wide.masked.load110.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %266, i32 4, <8 x i1> %261, <8 x float> undef), !tbaa !12, !alias.scope !164, !noalias !167 - %267 = extractelement <8 x i32> %260, i32 0 - %268 = add nsw i32 %mul.i.i.2, %267 - %269 = sext i32 %268 to i64 - %270 = getelementptr inbounds float, float* %12, i64 %269 - %271 = bitcast float* %270 to <8 x float>* - %wide.masked.load111.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %271, i32 4, <8 x i1> %261, <8 x float> undef), !tbaa !12, !alias.scope !167 - %272 = fsub <8 x float> %wide.masked.load111.1, %wide.masked.load110.1 - %273 = bitcast float* %270 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %272, <8 x float>* %273, i32 4, <8 x i1> %261), !tbaa !12, !alias.scope !167, !llvm.access.group !21 - %274 = getelementptr inbounds float, float* %9, i64 %264 - %275 = bitcast float* %274 to <8 x float>* - %wide.masked.load112.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %275, i32 4, <8 x i1> %261, <8 x float> undef), !tbaa !12, !alias.scope !169, !noalias !167 - %276 = fmul <8 x float> %broadcast.splat114, %wide.masked.load112.1 - %277 = fdiv <8 x float> %272, %276, !fpmath !26 - %278 = bitcast float* %270 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %277, <8 x float>* %278, i32 4, <8 x i1> %261), !tbaa !12, !alias.scope !167, !llvm.access.group !21 - %279 = or <8 x i64> %broadcast.splat107, - %280 = trunc <8 x i64> %279 to <8 x i32> - %281 = icmp sgt <8 x i32> %broadcast.splat109, %280 - %282 = extractelement <8 x i64> %279, i32 0 - %283 = shl i64 %282, 32 - %284 = ashr exact i64 %283, 32 - %285 = getelementptr inbounds float, float* %6, i64 %284 - %286 = bitcast float* %285 to <8 x float>* - %wide.masked.load110.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %286, i32 4, <8 x i1> %281, <8 x float> undef), !tbaa !12, !alias.scope !164, !noalias !167 - %287 = extractelement <8 x i32> %280, i32 0 - %288 = add nsw i32 %mul.i.i.2, %287 - %289 = sext i32 %288 to i64 - %290 = getelementptr inbounds float, float* %12, i64 %289 - %291 = bitcast float* %290 to <8 x float>* - %wide.masked.load111.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %291, i32 4, <8 x i1> %281, <8 x float> undef), !tbaa !12, !alias.scope !167 - %292 = fsub <8 x float> %wide.masked.load111.2, %wide.masked.load110.2 - %293 = bitcast float* %290 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %292, <8 x float>* %293, i32 4, <8 x i1> %281), !tbaa !12, !alias.scope !167, !llvm.access.group !21 - %294 = getelementptr inbounds float, float* %9, i64 %284 - %295 = bitcast float* %294 to <8 x float>* - %wide.masked.load112.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %295, i32 4, <8 x i1> %281, <8 x float> undef), !tbaa !12, !alias.scope !169, !noalias !167 - %296 = fmul <8 x float> %broadcast.splat114, %wide.masked.load112.2 - %297 = fdiv <8 x float> %292, %296, !fpmath !26 - %298 = bitcast float* %290 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %297, <8 x float>* %298, i32 4, <8 x i1> %281), !tbaa !12, !alias.scope !167, !llvm.access.group !21 - %299 = or <8 x i64> %broadcast.splat107, - %300 = trunc <8 x i64> %299 to <8 x i32> - %301 = icmp sgt <8 x i32> %broadcast.splat109, %300 - %302 = extractelement <8 x i64> %299, i32 0 - %303 = shl i64 %302, 32 - %304 = ashr exact i64 %303, 32 - %305 = getelementptr inbounds float, float* %6, i64 %304 - %306 = bitcast float* %305 to <8 x float>* - %wide.masked.load110.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %306, i32 4, <8 x i1> %301, <8 x float> undef), !tbaa !12, !alias.scope !164, !noalias !167 - %307 = extractelement <8 x i32> %300, i32 0 - %308 = add nsw i32 %mul.i.i.2, %307 - %309 = sext i32 %308 to i64 - %310 = getelementptr inbounds float, float* %12, i64 %309 - %311 = bitcast float* %310 to <8 x float>* - %wide.masked.load111.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %311, i32 4, <8 x i1> %301, <8 x float> undef), !tbaa !12, !alias.scope !167 - %312 = fsub <8 x float> %wide.masked.load111.3, %wide.masked.load110.3 - %313 = bitcast float* %310 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %312, <8 x float>* %313, i32 4, <8 x i1> %301), !tbaa !12, !alias.scope !167, !llvm.access.group !21 - %314 = getelementptr inbounds float, float* %9, i64 %304 - %315 = bitcast float* %314 to <8 x float>* - %wide.masked.load112.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %315, i32 4, <8 x i1> %301, <8 x float> undef), !tbaa !12, !alias.scope !169, !noalias !167 - %316 = fmul <8 x float> %broadcast.splat114, %wide.masked.load112.3 - %317 = fdiv <8 x float> %312, %316, !fpmath !26 - %318 = bitcast float* %310 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %317, <8 x float>* %318, i32 4, <8 x i1> %301), !tbaa !12, !alias.scope !167, !llvm.access.group !21 - br label %pregion_for_end.i.i.2 - -pregion_for_entry.entry.i.i.us.2: ; preds = %if.end.r_exit.i.i.us.2.1, %pregion_for_entry.entry.i.i.us.2.preheader - %_local_id_x.i.0.us.2 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.2.preheader ], [ %836, %if.end.r_exit.i.i.us.2.1 ] - %add1.i.i.i.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.2, %mul.i.i.i - %conv.i.i.us.2 = trunc i64 %add1.i.i.i.us.2 to i32 - %cmp4.i.i.us.2 = icmp sgt i32 %20, %conv.i.i.us.2 - br i1 %cmp4.i.i.us.2, label %if.then.i.i.us.2, label %if.end.r_exit.i.i.us.2 - -if.then.i.i.us.2: ; preds = %pregion_for_entry.entry.i.i.us.2 - %sext.i.i.us.2 = shl i64 %add1.i.i.i.us.2, 32 - %idxprom.i.i.us.2 = ashr exact i64 %sext.i.i.us.2, 32 - %arrayidx.i.i.us.2 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.2 - %319 = load float, float* %arrayidx.i.i.us.2, align 4, !tbaa !12 - %add.i.i.us.2 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2 - %idxprom6.i.i.us.2 = sext i32 %add.i.i.us.2 to i64 - %arrayidx7.i.i.us.2 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.2 - %320 = load float, float* %arrayidx7.i.i.us.2, align 4, !tbaa !12 - %sub.i.i.us.2 = fsub float %320, %319 - store float %sub.i.i.us.2, float* %arrayidx7.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.2 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.2 - %321 = load float, float* %arrayidx10.i.i.us.2, align 4, !tbaa !12 - %mul11.i.i.us.2 = fmul float %25, %321 - %div.i.i.us.2 = fdiv float %sub.i.i.us.2, %mul11.i.i.us.2, !fpmath !26 - store float %div.i.i.us.2, float* %arrayidx7.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.2 - -if.end.r_exit.i.i.us.2: ; preds = %if.then.i.i.us.2, %pregion_for_entry.entry.i.i.us.2 - %322 = or i64 %_local_id_x.i.0.us.2, 1 - %add1.i.i.i.us.2.1 = add nuw nsw i64 %322, %mul.i.i.i - %conv.i.i.us.2.1 = trunc i64 %add1.i.i.i.us.2.1 to i32 - %cmp4.i.i.us.2.1 = icmp sgt i32 %20, %conv.i.i.us.2.1 - br i1 %cmp4.i.i.us.2.1, label %if.then.i.i.us.2.1, label %if.end.r_exit.i.i.us.2.1 - -pregion_for_end.i.i.2.loopexit: ; preds = %if.end.r_exit.i.i.us.2.1 - br label %pregion_for_end.i.i.2 - -pregion_for_end.i.i.2: ; preds = %pregion_for_end.i.i.2.loopexit, %vector.ph99, %pregion_for_end.i.i.1 - %323 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.3 = or i32 %323, 3 - %cmp.i.i.3 = icmp sgt i32 %24, %conv2.i.i.3 - %mul.i.i.3 = mul nsw i32 %20, %conv2.i.i.3 - br i1 %cmp.i.i.3, label %vector.scevcheck122, label %pregion_for_end.i.i.3 - -vector.scevcheck122: ; preds = %pregion_for_end.i.i.2 - %324 = mul i32 %20, %conv2.i.i.3 - %325 = trunc i64 %2 to i32 - %326 = shl i32 %325, 5 - %327 = add i32 %324, %326 - %328 = icmp sgt i32 %327, 2147483616 - br i1 %328, label %pregion_for_entry.entry.i.i.us.3.preheader, label %vector.memcheck144 - -pregion_for_entry.entry.i.i.us.3.preheader: ; preds = %vector.memcheck144, %vector.scevcheck122 - br label %pregion_for_entry.entry.i.i.us.3 - -vector.memcheck144: ; preds = %vector.scevcheck122 - %329 = trunc i64 %2 to i32 - %330 = shl i32 %329, 5 - %331 = sext i32 %330 to i64 - %scevgep124 = getelementptr float, float* %6, i64 %331 - %332 = add nsw i64 %331, 32 - %scevgep126 = getelementptr float, float* %6, i64 %332 - %333 = mul i32 %20, %conv2.i.i.3 - %334 = add i32 %333, %330 - %335 = sext i32 %334 to i64 - %scevgep128 = getelementptr float, float* %12, i64 %335 - %336 = add nsw i64 %335, 32 - %scevgep130 = getelementptr float, float* %12, i64 %336 - %scevgep132 = getelementptr float, float* %9, i64 %331 - %scevgep134 = getelementptr float, float* %9, i64 %332 - %bound0136 = icmp ult float* %scevgep124, %scevgep130 - %bound1137 = icmp ult float* %scevgep128, %scevgep126 - %found.conflict138 = and i1 %bound0136, %bound1137 - %bound0139 = icmp ult float* %scevgep132, %scevgep130 - %bound1140 = icmp ult float* %scevgep128, %scevgep134 - %found.conflict141 = and i1 %bound0139, %bound1140 - %conflict.rdx142 = or i1 %found.conflict138, %found.conflict141 - br i1 %conflict.rdx142, label %pregion_for_entry.entry.i.i.us.3.preheader, label %vector.ph145 - -vector.ph145: ; preds = %vector.memcheck144 - %broadcast.splatinsert152 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat153 = shufflevector <8 x i64> %broadcast.splatinsert152, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert154 = insertelement <8 x i32> undef, i32 %20, i32 0 - %broadcast.splat155 = shufflevector <8 x i32> %broadcast.splatinsert154, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert159 = insertelement <8 x float> undef, float %25, i32 0 - %broadcast.splat160 = shufflevector <8 x float> %broadcast.splatinsert159, <8 x float> undef, <8 x i32> zeroinitializer - %337 = or <8 x i64> %broadcast.splat153, - %338 = trunc <8 x i64> %337 to <8 x i32> - %339 = icmp sgt <8 x i32> %broadcast.splat155, %338 - %340 = extractelement <8 x i64> %337, i32 0 - %341 = shl i64 %340, 32 - %342 = ashr exact i64 %341, 32 - %343 = getelementptr inbounds float, float* %6, i64 %342 - %344 = bitcast float* %343 to <8 x float>* - %wide.masked.load156 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %344, i32 4, <8 x i1> %339, <8 x float> undef), !tbaa !12, !alias.scope !171, !noalias !174 - %345 = extractelement <8 x i32> %338, i32 0 - %346 = add nsw i32 %mul.i.i.3, %345 - %347 = sext i32 %346 to i64 - %348 = getelementptr inbounds float, float* %12, i64 %347 - %349 = bitcast float* %348 to <8 x float>* - %wide.masked.load157 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %349, i32 4, <8 x i1> %339, <8 x float> undef), !tbaa !12, !alias.scope !174 - %350 = fsub <8 x float> %wide.masked.load157, %wide.masked.load156 - %351 = bitcast float* %348 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %350, <8 x float>* %351, i32 4, <8 x i1> %339), !tbaa !12, !alias.scope !174, !llvm.access.group !21 - %352 = getelementptr inbounds float, float* %9, i64 %342 - %353 = bitcast float* %352 to <8 x float>* - %wide.masked.load158 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %353, i32 4, <8 x i1> %339, <8 x float> undef), !tbaa !12, !alias.scope !176, !noalias !174 - %354 = fmul <8 x float> %broadcast.splat160, %wide.masked.load158 - %355 = fdiv <8 x float> %350, %354, !fpmath !26 - %356 = bitcast float* %348 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %355, <8 x float>* %356, i32 4, <8 x i1> %339), !tbaa !12, !alias.scope !174, !llvm.access.group !21 - %357 = or <8 x i64> %broadcast.splat153, - %358 = trunc <8 x i64> %357 to <8 x i32> - %359 = icmp sgt <8 x i32> %broadcast.splat155, %358 - %360 = extractelement <8 x i64> %357, i32 0 - %361 = shl i64 %360, 32 - %362 = ashr exact i64 %361, 32 - %363 = getelementptr inbounds float, float* %6, i64 %362 - %364 = bitcast float* %363 to <8 x float>* - %wide.masked.load156.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %364, i32 4, <8 x i1> %359, <8 x float> undef), !tbaa !12, !alias.scope !171, !noalias !174 - %365 = extractelement <8 x i32> %358, i32 0 - %366 = add nsw i32 %mul.i.i.3, %365 - %367 = sext i32 %366 to i64 - %368 = getelementptr inbounds float, float* %12, i64 %367 - %369 = bitcast float* %368 to <8 x float>* - %wide.masked.load157.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %369, i32 4, <8 x i1> %359, <8 x float> undef), !tbaa !12, !alias.scope !174 - %370 = fsub <8 x float> %wide.masked.load157.1, %wide.masked.load156.1 - %371 = bitcast float* %368 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %370, <8 x float>* %371, i32 4, <8 x i1> %359), !tbaa !12, !alias.scope !174, !llvm.access.group !21 - %372 = getelementptr inbounds float, float* %9, i64 %362 - %373 = bitcast float* %372 to <8 x float>* - %wide.masked.load158.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %373, i32 4, <8 x i1> %359, <8 x float> undef), !tbaa !12, !alias.scope !176, !noalias !174 - %374 = fmul <8 x float> %broadcast.splat160, %wide.masked.load158.1 - %375 = fdiv <8 x float> %370, %374, !fpmath !26 - %376 = bitcast float* %368 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %375, <8 x float>* %376, i32 4, <8 x i1> %359), !tbaa !12, !alias.scope !174, !llvm.access.group !21 - %377 = or <8 x i64> %broadcast.splat153, - %378 = trunc <8 x i64> %377 to <8 x i32> - %379 = icmp sgt <8 x i32> %broadcast.splat155, %378 - %380 = extractelement <8 x i64> %377, i32 0 - %381 = shl i64 %380, 32 - %382 = ashr exact i64 %381, 32 - %383 = getelementptr inbounds float, float* %6, i64 %382 - %384 = bitcast float* %383 to <8 x float>* - %wide.masked.load156.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %384, i32 4, <8 x i1> %379, <8 x float> undef), !tbaa !12, !alias.scope !171, !noalias !174 - %385 = extractelement <8 x i32> %378, i32 0 - %386 = add nsw i32 %mul.i.i.3, %385 - %387 = sext i32 %386 to i64 - %388 = getelementptr inbounds float, float* %12, i64 %387 - %389 = bitcast float* %388 to <8 x float>* - %wide.masked.load157.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %389, i32 4, <8 x i1> %379, <8 x float> undef), !tbaa !12, !alias.scope !174 - %390 = fsub <8 x float> %wide.masked.load157.2, %wide.masked.load156.2 - %391 = bitcast float* %388 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %390, <8 x float>* %391, i32 4, <8 x i1> %379), !tbaa !12, !alias.scope !174, !llvm.access.group !21 - %392 = getelementptr inbounds float, float* %9, i64 %382 - %393 = bitcast float* %392 to <8 x float>* - %wide.masked.load158.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %393, i32 4, <8 x i1> %379, <8 x float> undef), !tbaa !12, !alias.scope !176, !noalias !174 - %394 = fmul <8 x float> %broadcast.splat160, %wide.masked.load158.2 - %395 = fdiv <8 x float> %390, %394, !fpmath !26 - %396 = bitcast float* %388 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %395, <8 x float>* %396, i32 4, <8 x i1> %379), !tbaa !12, !alias.scope !174, !llvm.access.group !21 - %397 = or <8 x i64> %broadcast.splat153, - %398 = trunc <8 x i64> %397 to <8 x i32> - %399 = icmp sgt <8 x i32> %broadcast.splat155, %398 - %400 = extractelement <8 x i64> %397, i32 0 - %401 = shl i64 %400, 32 - %402 = ashr exact i64 %401, 32 - %403 = getelementptr inbounds float, float* %6, i64 %402 - %404 = bitcast float* %403 to <8 x float>* - %wide.masked.load156.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %404, i32 4, <8 x i1> %399, <8 x float> undef), !tbaa !12, !alias.scope !171, !noalias !174 - %405 = extractelement <8 x i32> %398, i32 0 - %406 = add nsw i32 %mul.i.i.3, %405 - %407 = sext i32 %406 to i64 - %408 = getelementptr inbounds float, float* %12, i64 %407 - %409 = bitcast float* %408 to <8 x float>* - %wide.masked.load157.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %409, i32 4, <8 x i1> %399, <8 x float> undef), !tbaa !12, !alias.scope !174 - %410 = fsub <8 x float> %wide.masked.load157.3, %wide.masked.load156.3 - %411 = bitcast float* %408 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %410, <8 x float>* %411, i32 4, <8 x i1> %399), !tbaa !12, !alias.scope !174, !llvm.access.group !21 - %412 = getelementptr inbounds float, float* %9, i64 %402 - %413 = bitcast float* %412 to <8 x float>* - %wide.masked.load158.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %413, i32 4, <8 x i1> %399, <8 x float> undef), !tbaa !12, !alias.scope !176, !noalias !174 - %414 = fmul <8 x float> %broadcast.splat160, %wide.masked.load158.3 - %415 = fdiv <8 x float> %410, %414, !fpmath !26 - %416 = bitcast float* %408 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %415, <8 x float>* %416, i32 4, <8 x i1> %399), !tbaa !12, !alias.scope !174, !llvm.access.group !21 - br label %pregion_for_end.i.i.3 - -pregion_for_entry.entry.i.i.us.3: ; preds = %if.end.r_exit.i.i.us.3.1, %pregion_for_entry.entry.i.i.us.3.preheader - %_local_id_x.i.0.us.3 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.3.preheader ], [ %832, %if.end.r_exit.i.i.us.3.1 ] - %add1.i.i.i.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.3, %mul.i.i.i - %conv.i.i.us.3 = trunc i64 %add1.i.i.i.us.3 to i32 - %cmp4.i.i.us.3 = icmp sgt i32 %20, %conv.i.i.us.3 - br i1 %cmp4.i.i.us.3, label %if.then.i.i.us.3, label %if.end.r_exit.i.i.us.3 - -if.then.i.i.us.3: ; preds = %pregion_for_entry.entry.i.i.us.3 - %sext.i.i.us.3 = shl i64 %add1.i.i.i.us.3, 32 - %idxprom.i.i.us.3 = ashr exact i64 %sext.i.i.us.3, 32 - %arrayidx.i.i.us.3 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.3 - %417 = load float, float* %arrayidx.i.i.us.3, align 4, !tbaa !12 - %add.i.i.us.3 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3 - %idxprom6.i.i.us.3 = sext i32 %add.i.i.us.3 to i64 - %arrayidx7.i.i.us.3 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.3 - %418 = load float, float* %arrayidx7.i.i.us.3, align 4, !tbaa !12 - %sub.i.i.us.3 = fsub float %418, %417 - store float %sub.i.i.us.3, float* %arrayidx7.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.3 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.3 - %419 = load float, float* %arrayidx10.i.i.us.3, align 4, !tbaa !12 - %mul11.i.i.us.3 = fmul float %25, %419 - %div.i.i.us.3 = fdiv float %sub.i.i.us.3, %mul11.i.i.us.3, !fpmath !26 - store float %div.i.i.us.3, float* %arrayidx7.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.3 - -if.end.r_exit.i.i.us.3: ; preds = %if.then.i.i.us.3, %pregion_for_entry.entry.i.i.us.3 - %420 = or i64 %_local_id_x.i.0.us.3, 1 - %add1.i.i.i.us.3.1 = add nuw nsw i64 %420, %mul.i.i.i - %conv.i.i.us.3.1 = trunc i64 %add1.i.i.i.us.3.1 to i32 - %cmp4.i.i.us.3.1 = icmp sgt i32 %20, %conv.i.i.us.3.1 - br i1 %cmp4.i.i.us.3.1, label %if.then.i.i.us.3.1, label %if.end.r_exit.i.i.us.3.1 - -pregion_for_end.i.i.3.loopexit: ; preds = %if.end.r_exit.i.i.us.3.1 - br label %pregion_for_end.i.i.3 - -pregion_for_end.i.i.3: ; preds = %pregion_for_end.i.i.3.loopexit, %vector.ph145, %pregion_for_end.i.i.2 - %421 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.4 = or i32 %421, 4 - %cmp.i.i.4 = icmp sgt i32 %24, %conv2.i.i.4 - %mul.i.i.4 = mul nsw i32 %20, %conv2.i.i.4 - br i1 %cmp.i.i.4, label %vector.scevcheck168, label %pregion_for_end.i.i.4 - -vector.scevcheck168: ; preds = %pregion_for_end.i.i.3 - %422 = mul i32 %20, %conv2.i.i.4 - %423 = trunc i64 %2 to i32 - %424 = shl i32 %423, 5 - %425 = add i32 %422, %424 - %426 = icmp sgt i32 %425, 2147483616 - br i1 %426, label %pregion_for_entry.entry.i.i.us.4.preheader, label %vector.memcheck190 - -pregion_for_entry.entry.i.i.us.4.preheader: ; preds = %vector.memcheck190, %vector.scevcheck168 - br label %pregion_for_entry.entry.i.i.us.4 - -vector.memcheck190: ; preds = %vector.scevcheck168 - %427 = trunc i64 %2 to i32 - %428 = shl i32 %427, 5 - %429 = sext i32 %428 to i64 - %scevgep170 = getelementptr float, float* %6, i64 %429 - %430 = add nsw i64 %429, 32 - %scevgep172 = getelementptr float, float* %6, i64 %430 - %431 = mul i32 %20, %conv2.i.i.4 - %432 = add i32 %431, %428 - %433 = sext i32 %432 to i64 - %scevgep174 = getelementptr float, float* %12, i64 %433 - %434 = add nsw i64 %433, 32 - %scevgep176 = getelementptr float, float* %12, i64 %434 - %scevgep178 = getelementptr float, float* %9, i64 %429 - %scevgep180 = getelementptr float, float* %9, i64 %430 - %bound0182 = icmp ult float* %scevgep170, %scevgep176 - %bound1183 = icmp ult float* %scevgep174, %scevgep172 - %found.conflict184 = and i1 %bound0182, %bound1183 - %bound0185 = icmp ult float* %scevgep178, %scevgep176 - %bound1186 = icmp ult float* %scevgep174, %scevgep180 - %found.conflict187 = and i1 %bound0185, %bound1186 - %conflict.rdx188 = or i1 %found.conflict184, %found.conflict187 - br i1 %conflict.rdx188, label %pregion_for_entry.entry.i.i.us.4.preheader, label %vector.ph191 - -vector.ph191: ; preds = %vector.memcheck190 - %broadcast.splatinsert198 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat199 = shufflevector <8 x i64> %broadcast.splatinsert198, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert200 = insertelement <8 x i32> undef, i32 %20, i32 0 - %broadcast.splat201 = shufflevector <8 x i32> %broadcast.splatinsert200, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert205 = insertelement <8 x float> undef, float %25, i32 0 - %broadcast.splat206 = shufflevector <8 x float> %broadcast.splatinsert205, <8 x float> undef, <8 x i32> zeroinitializer - %435 = or <8 x i64> %broadcast.splat199, - %436 = trunc <8 x i64> %435 to <8 x i32> - %437 = icmp sgt <8 x i32> %broadcast.splat201, %436 - %438 = extractelement <8 x i64> %435, i32 0 - %439 = shl i64 %438, 32 - %440 = ashr exact i64 %439, 32 - %441 = getelementptr inbounds float, float* %6, i64 %440 - %442 = bitcast float* %441 to <8 x float>* - %wide.masked.load202 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %442, i32 4, <8 x i1> %437, <8 x float> undef), !tbaa !12, !alias.scope !178, !noalias !181 - %443 = extractelement <8 x i32> %436, i32 0 - %444 = add nsw i32 %mul.i.i.4, %443 - %445 = sext i32 %444 to i64 - %446 = getelementptr inbounds float, float* %12, i64 %445 - %447 = bitcast float* %446 to <8 x float>* - %wide.masked.load203 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %447, i32 4, <8 x i1> %437, <8 x float> undef), !tbaa !12, !alias.scope !181 - %448 = fsub <8 x float> %wide.masked.load203, %wide.masked.load202 - %449 = bitcast float* %446 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %448, <8 x float>* %449, i32 4, <8 x i1> %437), !tbaa !12, !alias.scope !181, !llvm.access.group !21 - %450 = getelementptr inbounds float, float* %9, i64 %440 - %451 = bitcast float* %450 to <8 x float>* - %wide.masked.load204 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %451, i32 4, <8 x i1> %437, <8 x float> undef), !tbaa !12, !alias.scope !183, !noalias !181 - %452 = fmul <8 x float> %broadcast.splat206, %wide.masked.load204 - %453 = fdiv <8 x float> %448, %452, !fpmath !26 - %454 = bitcast float* %446 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %453, <8 x float>* %454, i32 4, <8 x i1> %437), !tbaa !12, !alias.scope !181, !llvm.access.group !21 - %455 = or <8 x i64> %broadcast.splat199, - %456 = trunc <8 x i64> %455 to <8 x i32> - %457 = icmp sgt <8 x i32> %broadcast.splat201, %456 - %458 = extractelement <8 x i64> %455, i32 0 - %459 = shl i64 %458, 32 - %460 = ashr exact i64 %459, 32 - %461 = getelementptr inbounds float, float* %6, i64 %460 - %462 = bitcast float* %461 to <8 x float>* - %wide.masked.load202.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %462, i32 4, <8 x i1> %457, <8 x float> undef), !tbaa !12, !alias.scope !178, !noalias !181 - %463 = extractelement <8 x i32> %456, i32 0 - %464 = add nsw i32 %mul.i.i.4, %463 - %465 = sext i32 %464 to i64 - %466 = getelementptr inbounds float, float* %12, i64 %465 - %467 = bitcast float* %466 to <8 x float>* - %wide.masked.load203.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %467, i32 4, <8 x i1> %457, <8 x float> undef), !tbaa !12, !alias.scope !181 - %468 = fsub <8 x float> %wide.masked.load203.1, %wide.masked.load202.1 - %469 = bitcast float* %466 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %468, <8 x float>* %469, i32 4, <8 x i1> %457), !tbaa !12, !alias.scope !181, !llvm.access.group !21 - %470 = getelementptr inbounds float, float* %9, i64 %460 - %471 = bitcast float* %470 to <8 x float>* - %wide.masked.load204.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %471, i32 4, <8 x i1> %457, <8 x float> undef), !tbaa !12, !alias.scope !183, !noalias !181 - %472 = fmul <8 x float> %broadcast.splat206, %wide.masked.load204.1 - %473 = fdiv <8 x float> %468, %472, !fpmath !26 - %474 = bitcast float* %466 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %473, <8 x float>* %474, i32 4, <8 x i1> %457), !tbaa !12, !alias.scope !181, !llvm.access.group !21 - %475 = or <8 x i64> %broadcast.splat199, - %476 = trunc <8 x i64> %475 to <8 x i32> - %477 = icmp sgt <8 x i32> %broadcast.splat201, %476 - %478 = extractelement <8 x i64> %475, i32 0 - %479 = shl i64 %478, 32 - %480 = ashr exact i64 %479, 32 - %481 = getelementptr inbounds float, float* %6, i64 %480 - %482 = bitcast float* %481 to <8 x float>* - %wide.masked.load202.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %482, i32 4, <8 x i1> %477, <8 x float> undef), !tbaa !12, !alias.scope !178, !noalias !181 - %483 = extractelement <8 x i32> %476, i32 0 - %484 = add nsw i32 %mul.i.i.4, %483 - %485 = sext i32 %484 to i64 - %486 = getelementptr inbounds float, float* %12, i64 %485 - %487 = bitcast float* %486 to <8 x float>* - %wide.masked.load203.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %487, i32 4, <8 x i1> %477, <8 x float> undef), !tbaa !12, !alias.scope !181 - %488 = fsub <8 x float> %wide.masked.load203.2, %wide.masked.load202.2 - %489 = bitcast float* %486 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %488, <8 x float>* %489, i32 4, <8 x i1> %477), !tbaa !12, !alias.scope !181, !llvm.access.group !21 - %490 = getelementptr inbounds float, float* %9, i64 %480 - %491 = bitcast float* %490 to <8 x float>* - %wide.masked.load204.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %491, i32 4, <8 x i1> %477, <8 x float> undef), !tbaa !12, !alias.scope !183, !noalias !181 - %492 = fmul <8 x float> %broadcast.splat206, %wide.masked.load204.2 - %493 = fdiv <8 x float> %488, %492, !fpmath !26 - %494 = bitcast float* %486 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %493, <8 x float>* %494, i32 4, <8 x i1> %477), !tbaa !12, !alias.scope !181, !llvm.access.group !21 - %495 = or <8 x i64> %broadcast.splat199, - %496 = trunc <8 x i64> %495 to <8 x i32> - %497 = icmp sgt <8 x i32> %broadcast.splat201, %496 - %498 = extractelement <8 x i64> %495, i32 0 - %499 = shl i64 %498, 32 - %500 = ashr exact i64 %499, 32 - %501 = getelementptr inbounds float, float* %6, i64 %500 - %502 = bitcast float* %501 to <8 x float>* - %wide.masked.load202.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %502, i32 4, <8 x i1> %497, <8 x float> undef), !tbaa !12, !alias.scope !178, !noalias !181 - %503 = extractelement <8 x i32> %496, i32 0 - %504 = add nsw i32 %mul.i.i.4, %503 - %505 = sext i32 %504 to i64 - %506 = getelementptr inbounds float, float* %12, i64 %505 - %507 = bitcast float* %506 to <8 x float>* - %wide.masked.load203.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %507, i32 4, <8 x i1> %497, <8 x float> undef), !tbaa !12, !alias.scope !181 - %508 = fsub <8 x float> %wide.masked.load203.3, %wide.masked.load202.3 - %509 = bitcast float* %506 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %508, <8 x float>* %509, i32 4, <8 x i1> %497), !tbaa !12, !alias.scope !181, !llvm.access.group !21 - %510 = getelementptr inbounds float, float* %9, i64 %500 - %511 = bitcast float* %510 to <8 x float>* - %wide.masked.load204.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %511, i32 4, <8 x i1> %497, <8 x float> undef), !tbaa !12, !alias.scope !183, !noalias !181 - %512 = fmul <8 x float> %broadcast.splat206, %wide.masked.load204.3 - %513 = fdiv <8 x float> %508, %512, !fpmath !26 - %514 = bitcast float* %506 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %513, <8 x float>* %514, i32 4, <8 x i1> %497), !tbaa !12, !alias.scope !181, !llvm.access.group !21 - br label %pregion_for_end.i.i.4 - -pregion_for_entry.entry.i.i.us.4: ; preds = %if.end.r_exit.i.i.us.4.1, %pregion_for_entry.entry.i.i.us.4.preheader - %_local_id_x.i.0.us.4 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.4.preheader ], [ %828, %if.end.r_exit.i.i.us.4.1 ] - %add1.i.i.i.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.4, %mul.i.i.i - %conv.i.i.us.4 = trunc i64 %add1.i.i.i.us.4 to i32 - %cmp4.i.i.us.4 = icmp sgt i32 %20, %conv.i.i.us.4 - br i1 %cmp4.i.i.us.4, label %if.then.i.i.us.4, label %if.end.r_exit.i.i.us.4 - -if.then.i.i.us.4: ; preds = %pregion_for_entry.entry.i.i.us.4 - %sext.i.i.us.4 = shl i64 %add1.i.i.i.us.4, 32 - %idxprom.i.i.us.4 = ashr exact i64 %sext.i.i.us.4, 32 - %arrayidx.i.i.us.4 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.4 - %515 = load float, float* %arrayidx.i.i.us.4, align 4, !tbaa !12 - %add.i.i.us.4 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4 - %idxprom6.i.i.us.4 = sext i32 %add.i.i.us.4 to i64 - %arrayidx7.i.i.us.4 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.4 - %516 = load float, float* %arrayidx7.i.i.us.4, align 4, !tbaa !12 - %sub.i.i.us.4 = fsub float %516, %515 - store float %sub.i.i.us.4, float* %arrayidx7.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.4 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.4 - %517 = load float, float* %arrayidx10.i.i.us.4, align 4, !tbaa !12 - %mul11.i.i.us.4 = fmul float %25, %517 - %div.i.i.us.4 = fdiv float %sub.i.i.us.4, %mul11.i.i.us.4, !fpmath !26 - store float %div.i.i.us.4, float* %arrayidx7.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.4 - -if.end.r_exit.i.i.us.4: ; preds = %if.then.i.i.us.4, %pregion_for_entry.entry.i.i.us.4 - %518 = or i64 %_local_id_x.i.0.us.4, 1 - %add1.i.i.i.us.4.1 = add nuw nsw i64 %518, %mul.i.i.i - %conv.i.i.us.4.1 = trunc i64 %add1.i.i.i.us.4.1 to i32 - %cmp4.i.i.us.4.1 = icmp sgt i32 %20, %conv.i.i.us.4.1 - br i1 %cmp4.i.i.us.4.1, label %if.then.i.i.us.4.1, label %if.end.r_exit.i.i.us.4.1 - -pregion_for_end.i.i.4.loopexit: ; preds = %if.end.r_exit.i.i.us.4.1 - br label %pregion_for_end.i.i.4 - -pregion_for_end.i.i.4: ; preds = %pregion_for_end.i.i.4.loopexit, %vector.ph191, %pregion_for_end.i.i.3 - %519 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.5 = or i32 %519, 5 - %cmp.i.i.5 = icmp sgt i32 %24, %conv2.i.i.5 - %mul.i.i.5 = mul nsw i32 %20, %conv2.i.i.5 - br i1 %cmp.i.i.5, label %vector.scevcheck214, label %pregion_for_end.i.i.5 - -vector.scevcheck214: ; preds = %pregion_for_end.i.i.4 - %520 = mul i32 %20, %conv2.i.i.5 - %521 = trunc i64 %2 to i32 - %522 = shl i32 %521, 5 - %523 = add i32 %520, %522 - %524 = icmp sgt i32 %523, 2147483616 - br i1 %524, label %pregion_for_entry.entry.i.i.us.5.preheader, label %vector.memcheck236 - -pregion_for_entry.entry.i.i.us.5.preheader: ; preds = %vector.memcheck236, %vector.scevcheck214 - br label %pregion_for_entry.entry.i.i.us.5 - -vector.memcheck236: ; preds = %vector.scevcheck214 - %525 = trunc i64 %2 to i32 - %526 = shl i32 %525, 5 - %527 = sext i32 %526 to i64 - %scevgep216 = getelementptr float, float* %6, i64 %527 - %528 = add nsw i64 %527, 32 - %scevgep218 = getelementptr float, float* %6, i64 %528 - %529 = mul i32 %20, %conv2.i.i.5 - %530 = add i32 %529, %526 - %531 = sext i32 %530 to i64 - %scevgep220 = getelementptr float, float* %12, i64 %531 - %532 = add nsw i64 %531, 32 - %scevgep222 = getelementptr float, float* %12, i64 %532 - %scevgep224 = getelementptr float, float* %9, i64 %527 - %scevgep226 = getelementptr float, float* %9, i64 %528 - %bound0228 = icmp ult float* %scevgep216, %scevgep222 - %bound1229 = icmp ult float* %scevgep220, %scevgep218 - %found.conflict230 = and i1 %bound0228, %bound1229 - %bound0231 = icmp ult float* %scevgep224, %scevgep222 - %bound1232 = icmp ult float* %scevgep220, %scevgep226 - %found.conflict233 = and i1 %bound0231, %bound1232 - %conflict.rdx234 = or i1 %found.conflict230, %found.conflict233 - br i1 %conflict.rdx234, label %pregion_for_entry.entry.i.i.us.5.preheader, label %vector.ph237 - -vector.ph237: ; preds = %vector.memcheck236 - %broadcast.splatinsert244 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat245 = shufflevector <8 x i64> %broadcast.splatinsert244, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert246 = insertelement <8 x i32> undef, i32 %20, i32 0 - %broadcast.splat247 = shufflevector <8 x i32> %broadcast.splatinsert246, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert251 = insertelement <8 x float> undef, float %25, i32 0 - %broadcast.splat252 = shufflevector <8 x float> %broadcast.splatinsert251, <8 x float> undef, <8 x i32> zeroinitializer - %533 = or <8 x i64> %broadcast.splat245, - %534 = trunc <8 x i64> %533 to <8 x i32> - %535 = icmp sgt <8 x i32> %broadcast.splat247, %534 - %536 = extractelement <8 x i64> %533, i32 0 - %537 = shl i64 %536, 32 - %538 = ashr exact i64 %537, 32 - %539 = getelementptr inbounds float, float* %6, i64 %538 - %540 = bitcast float* %539 to <8 x float>* - %wide.masked.load248 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %540, i32 4, <8 x i1> %535, <8 x float> undef), !tbaa !12, !alias.scope !185, !noalias !188 - %541 = extractelement <8 x i32> %534, i32 0 - %542 = add nsw i32 %mul.i.i.5, %541 - %543 = sext i32 %542 to i64 - %544 = getelementptr inbounds float, float* %12, i64 %543 - %545 = bitcast float* %544 to <8 x float>* - %wide.masked.load249 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %545, i32 4, <8 x i1> %535, <8 x float> undef), !tbaa !12, !alias.scope !188 - %546 = fsub <8 x float> %wide.masked.load249, %wide.masked.load248 - %547 = bitcast float* %544 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %546, <8 x float>* %547, i32 4, <8 x i1> %535), !tbaa !12, !alias.scope !188, !llvm.access.group !21 - %548 = getelementptr inbounds float, float* %9, i64 %538 - %549 = bitcast float* %548 to <8 x float>* - %wide.masked.load250 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %549, i32 4, <8 x i1> %535, <8 x float> undef), !tbaa !12, !alias.scope !190, !noalias !188 - %550 = fmul <8 x float> %broadcast.splat252, %wide.masked.load250 - %551 = fdiv <8 x float> %546, %550, !fpmath !26 - %552 = bitcast float* %544 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %551, <8 x float>* %552, i32 4, <8 x i1> %535), !tbaa !12, !alias.scope !188, !llvm.access.group !21 - %553 = or <8 x i64> %broadcast.splat245, - %554 = trunc <8 x i64> %553 to <8 x i32> - %555 = icmp sgt <8 x i32> %broadcast.splat247, %554 - %556 = extractelement <8 x i64> %553, i32 0 - %557 = shl i64 %556, 32 - %558 = ashr exact i64 %557, 32 - %559 = getelementptr inbounds float, float* %6, i64 %558 - %560 = bitcast float* %559 to <8 x float>* - %wide.masked.load248.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %560, i32 4, <8 x i1> %555, <8 x float> undef), !tbaa !12, !alias.scope !185, !noalias !188 - %561 = extractelement <8 x i32> %554, i32 0 - %562 = add nsw i32 %mul.i.i.5, %561 - %563 = sext i32 %562 to i64 - %564 = getelementptr inbounds float, float* %12, i64 %563 - %565 = bitcast float* %564 to <8 x float>* - %wide.masked.load249.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %565, i32 4, <8 x i1> %555, <8 x float> undef), !tbaa !12, !alias.scope !188 - %566 = fsub <8 x float> %wide.masked.load249.1, %wide.masked.load248.1 - %567 = bitcast float* %564 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %566, <8 x float>* %567, i32 4, <8 x i1> %555), !tbaa !12, !alias.scope !188, !llvm.access.group !21 - %568 = getelementptr inbounds float, float* %9, i64 %558 - %569 = bitcast float* %568 to <8 x float>* - %wide.masked.load250.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %569, i32 4, <8 x i1> %555, <8 x float> undef), !tbaa !12, !alias.scope !190, !noalias !188 - %570 = fmul <8 x float> %broadcast.splat252, %wide.masked.load250.1 - %571 = fdiv <8 x float> %566, %570, !fpmath !26 - %572 = bitcast float* %564 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %571, <8 x float>* %572, i32 4, <8 x i1> %555), !tbaa !12, !alias.scope !188, !llvm.access.group !21 - %573 = or <8 x i64> %broadcast.splat245, - %574 = trunc <8 x i64> %573 to <8 x i32> - %575 = icmp sgt <8 x i32> %broadcast.splat247, %574 - %576 = extractelement <8 x i64> %573, i32 0 - %577 = shl i64 %576, 32 - %578 = ashr exact i64 %577, 32 - %579 = getelementptr inbounds float, float* %6, i64 %578 - %580 = bitcast float* %579 to <8 x float>* - %wide.masked.load248.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %580, i32 4, <8 x i1> %575, <8 x float> undef), !tbaa !12, !alias.scope !185, !noalias !188 - %581 = extractelement <8 x i32> %574, i32 0 - %582 = add nsw i32 %mul.i.i.5, %581 - %583 = sext i32 %582 to i64 - %584 = getelementptr inbounds float, float* %12, i64 %583 - %585 = bitcast float* %584 to <8 x float>* - %wide.masked.load249.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %585, i32 4, <8 x i1> %575, <8 x float> undef), !tbaa !12, !alias.scope !188 - %586 = fsub <8 x float> %wide.masked.load249.2, %wide.masked.load248.2 - %587 = bitcast float* %584 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %586, <8 x float>* %587, i32 4, <8 x i1> %575), !tbaa !12, !alias.scope !188, !llvm.access.group !21 - %588 = getelementptr inbounds float, float* %9, i64 %578 - %589 = bitcast float* %588 to <8 x float>* - %wide.masked.load250.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %589, i32 4, <8 x i1> %575, <8 x float> undef), !tbaa !12, !alias.scope !190, !noalias !188 - %590 = fmul <8 x float> %broadcast.splat252, %wide.masked.load250.2 - %591 = fdiv <8 x float> %586, %590, !fpmath !26 - %592 = bitcast float* %584 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %591, <8 x float>* %592, i32 4, <8 x i1> %575), !tbaa !12, !alias.scope !188, !llvm.access.group !21 - %593 = or <8 x i64> %broadcast.splat245, - %594 = trunc <8 x i64> %593 to <8 x i32> - %595 = icmp sgt <8 x i32> %broadcast.splat247, %594 - %596 = extractelement <8 x i64> %593, i32 0 - %597 = shl i64 %596, 32 - %598 = ashr exact i64 %597, 32 - %599 = getelementptr inbounds float, float* %6, i64 %598 - %600 = bitcast float* %599 to <8 x float>* - %wide.masked.load248.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %600, i32 4, <8 x i1> %595, <8 x float> undef), !tbaa !12, !alias.scope !185, !noalias !188 - %601 = extractelement <8 x i32> %594, i32 0 - %602 = add nsw i32 %mul.i.i.5, %601 - %603 = sext i32 %602 to i64 - %604 = getelementptr inbounds float, float* %12, i64 %603 - %605 = bitcast float* %604 to <8 x float>* - %wide.masked.load249.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %605, i32 4, <8 x i1> %595, <8 x float> undef), !tbaa !12, !alias.scope !188 - %606 = fsub <8 x float> %wide.masked.load249.3, %wide.masked.load248.3 - %607 = bitcast float* %604 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %606, <8 x float>* %607, i32 4, <8 x i1> %595), !tbaa !12, !alias.scope !188, !llvm.access.group !21 - %608 = getelementptr inbounds float, float* %9, i64 %598 - %609 = bitcast float* %608 to <8 x float>* - %wide.masked.load250.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %609, i32 4, <8 x i1> %595, <8 x float> undef), !tbaa !12, !alias.scope !190, !noalias !188 - %610 = fmul <8 x float> %broadcast.splat252, %wide.masked.load250.3 - %611 = fdiv <8 x float> %606, %610, !fpmath !26 - %612 = bitcast float* %604 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %611, <8 x float>* %612, i32 4, <8 x i1> %595), !tbaa !12, !alias.scope !188, !llvm.access.group !21 - br label %pregion_for_end.i.i.5 - -pregion_for_entry.entry.i.i.us.5: ; preds = %if.end.r_exit.i.i.us.5.1, %pregion_for_entry.entry.i.i.us.5.preheader - %_local_id_x.i.0.us.5 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.5.preheader ], [ %824, %if.end.r_exit.i.i.us.5.1 ] - %add1.i.i.i.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.5, %mul.i.i.i - %conv.i.i.us.5 = trunc i64 %add1.i.i.i.us.5 to i32 - %cmp4.i.i.us.5 = icmp sgt i32 %20, %conv.i.i.us.5 - br i1 %cmp4.i.i.us.5, label %if.then.i.i.us.5, label %if.end.r_exit.i.i.us.5 - -if.then.i.i.us.5: ; preds = %pregion_for_entry.entry.i.i.us.5 - %sext.i.i.us.5 = shl i64 %add1.i.i.i.us.5, 32 - %idxprom.i.i.us.5 = ashr exact i64 %sext.i.i.us.5, 32 - %arrayidx.i.i.us.5 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.5 - %613 = load float, float* %arrayidx.i.i.us.5, align 4, !tbaa !12 - %add.i.i.us.5 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5 - %idxprom6.i.i.us.5 = sext i32 %add.i.i.us.5 to i64 - %arrayidx7.i.i.us.5 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.5 - %614 = load float, float* %arrayidx7.i.i.us.5, align 4, !tbaa !12 - %sub.i.i.us.5 = fsub float %614, %613 - store float %sub.i.i.us.5, float* %arrayidx7.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.5 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.5 - %615 = load float, float* %arrayidx10.i.i.us.5, align 4, !tbaa !12 - %mul11.i.i.us.5 = fmul float %25, %615 - %div.i.i.us.5 = fdiv float %sub.i.i.us.5, %mul11.i.i.us.5, !fpmath !26 - store float %div.i.i.us.5, float* %arrayidx7.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.5 - -if.end.r_exit.i.i.us.5: ; preds = %if.then.i.i.us.5, %pregion_for_entry.entry.i.i.us.5 - %616 = or i64 %_local_id_x.i.0.us.5, 1 - %add1.i.i.i.us.5.1 = add nuw nsw i64 %616, %mul.i.i.i - %conv.i.i.us.5.1 = trunc i64 %add1.i.i.i.us.5.1 to i32 - %cmp4.i.i.us.5.1 = icmp sgt i32 %20, %conv.i.i.us.5.1 - br i1 %cmp4.i.i.us.5.1, label %if.then.i.i.us.5.1, label %if.end.r_exit.i.i.us.5.1 - -pregion_for_end.i.i.5.loopexit: ; preds = %if.end.r_exit.i.i.us.5.1 - br label %pregion_for_end.i.i.5 - -pregion_for_end.i.i.5: ; preds = %pregion_for_end.i.i.5.loopexit, %vector.ph237, %pregion_for_end.i.i.4 - %617 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.6 = or i32 %617, 6 - %cmp.i.i.6 = icmp sgt i32 %24, %conv2.i.i.6 - %mul.i.i.6 = mul nsw i32 %20, %conv2.i.i.6 - br i1 %cmp.i.i.6, label %vector.scevcheck260, label %pregion_for_end.i.i.6 - -vector.scevcheck260: ; preds = %pregion_for_end.i.i.5 - %618 = mul i32 %20, %conv2.i.i.6 - %619 = trunc i64 %2 to i32 - %620 = shl i32 %619, 5 - %621 = add i32 %618, %620 - %622 = icmp sgt i32 %621, 2147483616 - br i1 %622, label %pregion_for_entry.entry.i.i.us.6.preheader, label %vector.memcheck282 - -pregion_for_entry.entry.i.i.us.6.preheader: ; preds = %vector.memcheck282, %vector.scevcheck260 - br label %pregion_for_entry.entry.i.i.us.6 - -vector.memcheck282: ; preds = %vector.scevcheck260 - %623 = trunc i64 %2 to i32 - %624 = shl i32 %623, 5 - %625 = sext i32 %624 to i64 - %scevgep262 = getelementptr float, float* %6, i64 %625 - %626 = add nsw i64 %625, 32 - %scevgep264 = getelementptr float, float* %6, i64 %626 - %627 = mul i32 %20, %conv2.i.i.6 - %628 = add i32 %627, %624 - %629 = sext i32 %628 to i64 - %scevgep266 = getelementptr float, float* %12, i64 %629 - %630 = add nsw i64 %629, 32 - %scevgep268 = getelementptr float, float* %12, i64 %630 - %scevgep270 = getelementptr float, float* %9, i64 %625 - %scevgep272 = getelementptr float, float* %9, i64 %626 - %bound0274 = icmp ult float* %scevgep262, %scevgep268 - %bound1275 = icmp ult float* %scevgep266, %scevgep264 - %found.conflict276 = and i1 %bound0274, %bound1275 - %bound0277 = icmp ult float* %scevgep270, %scevgep268 - %bound1278 = icmp ult float* %scevgep266, %scevgep272 - %found.conflict279 = and i1 %bound0277, %bound1278 - %conflict.rdx280 = or i1 %found.conflict276, %found.conflict279 - br i1 %conflict.rdx280, label %pregion_for_entry.entry.i.i.us.6.preheader, label %vector.ph283 - -vector.ph283: ; preds = %vector.memcheck282 - %broadcast.splatinsert290 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat291 = shufflevector <8 x i64> %broadcast.splatinsert290, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert292 = insertelement <8 x i32> undef, i32 %20, i32 0 - %broadcast.splat293 = shufflevector <8 x i32> %broadcast.splatinsert292, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert297 = insertelement <8 x float> undef, float %25, i32 0 - %broadcast.splat298 = shufflevector <8 x float> %broadcast.splatinsert297, <8 x float> undef, <8 x i32> zeroinitializer - %631 = or <8 x i64> %broadcast.splat291, - %632 = trunc <8 x i64> %631 to <8 x i32> - %633 = icmp sgt <8 x i32> %broadcast.splat293, %632 - %634 = extractelement <8 x i64> %631, i32 0 - %635 = shl i64 %634, 32 - %636 = ashr exact i64 %635, 32 - %637 = getelementptr inbounds float, float* %6, i64 %636 - %638 = bitcast float* %637 to <8 x float>* - %wide.masked.load294 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %638, i32 4, <8 x i1> %633, <8 x float> undef), !tbaa !12, !alias.scope !192, !noalias !195 - %639 = extractelement <8 x i32> %632, i32 0 - %640 = add nsw i32 %mul.i.i.6, %639 - %641 = sext i32 %640 to i64 - %642 = getelementptr inbounds float, float* %12, i64 %641 - %643 = bitcast float* %642 to <8 x float>* - %wide.masked.load295 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %643, i32 4, <8 x i1> %633, <8 x float> undef), !tbaa !12, !alias.scope !195 - %644 = fsub <8 x float> %wide.masked.load295, %wide.masked.load294 - %645 = bitcast float* %642 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %644, <8 x float>* %645, i32 4, <8 x i1> %633), !tbaa !12, !alias.scope !195, !llvm.access.group !21 - %646 = getelementptr inbounds float, float* %9, i64 %636 - %647 = bitcast float* %646 to <8 x float>* - %wide.masked.load296 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %647, i32 4, <8 x i1> %633, <8 x float> undef), !tbaa !12, !alias.scope !197, !noalias !195 - %648 = fmul <8 x float> %broadcast.splat298, %wide.masked.load296 - %649 = fdiv <8 x float> %644, %648, !fpmath !26 - %650 = bitcast float* %642 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %649, <8 x float>* %650, i32 4, <8 x i1> %633), !tbaa !12, !alias.scope !195, !llvm.access.group !21 - %651 = or <8 x i64> %broadcast.splat291, - %652 = trunc <8 x i64> %651 to <8 x i32> - %653 = icmp sgt <8 x i32> %broadcast.splat293, %652 - %654 = extractelement <8 x i64> %651, i32 0 - %655 = shl i64 %654, 32 - %656 = ashr exact i64 %655, 32 - %657 = getelementptr inbounds float, float* %6, i64 %656 - %658 = bitcast float* %657 to <8 x float>* - %wide.masked.load294.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %658, i32 4, <8 x i1> %653, <8 x float> undef), !tbaa !12, !alias.scope !192, !noalias !195 - %659 = extractelement <8 x i32> %652, i32 0 - %660 = add nsw i32 %mul.i.i.6, %659 - %661 = sext i32 %660 to i64 - %662 = getelementptr inbounds float, float* %12, i64 %661 - %663 = bitcast float* %662 to <8 x float>* - %wide.masked.load295.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %663, i32 4, <8 x i1> %653, <8 x float> undef), !tbaa !12, !alias.scope !195 - %664 = fsub <8 x float> %wide.masked.load295.1, %wide.masked.load294.1 - %665 = bitcast float* %662 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %664, <8 x float>* %665, i32 4, <8 x i1> %653), !tbaa !12, !alias.scope !195, !llvm.access.group !21 - %666 = getelementptr inbounds float, float* %9, i64 %656 - %667 = bitcast float* %666 to <8 x float>* - %wide.masked.load296.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %667, i32 4, <8 x i1> %653, <8 x float> undef), !tbaa !12, !alias.scope !197, !noalias !195 - %668 = fmul <8 x float> %broadcast.splat298, %wide.masked.load296.1 - %669 = fdiv <8 x float> %664, %668, !fpmath !26 - %670 = bitcast float* %662 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %669, <8 x float>* %670, i32 4, <8 x i1> %653), !tbaa !12, !alias.scope !195, !llvm.access.group !21 - %671 = or <8 x i64> %broadcast.splat291, - %672 = trunc <8 x i64> %671 to <8 x i32> - %673 = icmp sgt <8 x i32> %broadcast.splat293, %672 - %674 = extractelement <8 x i64> %671, i32 0 - %675 = shl i64 %674, 32 - %676 = ashr exact i64 %675, 32 - %677 = getelementptr inbounds float, float* %6, i64 %676 - %678 = bitcast float* %677 to <8 x float>* - %wide.masked.load294.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %678, i32 4, <8 x i1> %673, <8 x float> undef), !tbaa !12, !alias.scope !192, !noalias !195 - %679 = extractelement <8 x i32> %672, i32 0 - %680 = add nsw i32 %mul.i.i.6, %679 - %681 = sext i32 %680 to i64 - %682 = getelementptr inbounds float, float* %12, i64 %681 - %683 = bitcast float* %682 to <8 x float>* - %wide.masked.load295.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %683, i32 4, <8 x i1> %673, <8 x float> undef), !tbaa !12, !alias.scope !195 - %684 = fsub <8 x float> %wide.masked.load295.2, %wide.masked.load294.2 - %685 = bitcast float* %682 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %684, <8 x float>* %685, i32 4, <8 x i1> %673), !tbaa !12, !alias.scope !195, !llvm.access.group !21 - %686 = getelementptr inbounds float, float* %9, i64 %676 - %687 = bitcast float* %686 to <8 x float>* - %wide.masked.load296.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %687, i32 4, <8 x i1> %673, <8 x float> undef), !tbaa !12, !alias.scope !197, !noalias !195 - %688 = fmul <8 x float> %broadcast.splat298, %wide.masked.load296.2 - %689 = fdiv <8 x float> %684, %688, !fpmath !26 - %690 = bitcast float* %682 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %689, <8 x float>* %690, i32 4, <8 x i1> %673), !tbaa !12, !alias.scope !195, !llvm.access.group !21 - %691 = or <8 x i64> %broadcast.splat291, - %692 = trunc <8 x i64> %691 to <8 x i32> - %693 = icmp sgt <8 x i32> %broadcast.splat293, %692 - %694 = extractelement <8 x i64> %691, i32 0 - %695 = shl i64 %694, 32 - %696 = ashr exact i64 %695, 32 - %697 = getelementptr inbounds float, float* %6, i64 %696 - %698 = bitcast float* %697 to <8 x float>* - %wide.masked.load294.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %698, i32 4, <8 x i1> %693, <8 x float> undef), !tbaa !12, !alias.scope !192, !noalias !195 - %699 = extractelement <8 x i32> %692, i32 0 - %700 = add nsw i32 %mul.i.i.6, %699 - %701 = sext i32 %700 to i64 - %702 = getelementptr inbounds float, float* %12, i64 %701 - %703 = bitcast float* %702 to <8 x float>* - %wide.masked.load295.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %703, i32 4, <8 x i1> %693, <8 x float> undef), !tbaa !12, !alias.scope !195 - %704 = fsub <8 x float> %wide.masked.load295.3, %wide.masked.load294.3 - %705 = bitcast float* %702 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %704, <8 x float>* %705, i32 4, <8 x i1> %693), !tbaa !12, !alias.scope !195, !llvm.access.group !21 - %706 = getelementptr inbounds float, float* %9, i64 %696 - %707 = bitcast float* %706 to <8 x float>* - %wide.masked.load296.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %707, i32 4, <8 x i1> %693, <8 x float> undef), !tbaa !12, !alias.scope !197, !noalias !195 - %708 = fmul <8 x float> %broadcast.splat298, %wide.masked.load296.3 - %709 = fdiv <8 x float> %704, %708, !fpmath !26 - %710 = bitcast float* %702 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %709, <8 x float>* %710, i32 4, <8 x i1> %693), !tbaa !12, !alias.scope !195, !llvm.access.group !21 - br label %pregion_for_end.i.i.6 - -pregion_for_entry.entry.i.i.us.6: ; preds = %if.end.r_exit.i.i.us.6.1, %pregion_for_entry.entry.i.i.us.6.preheader - %_local_id_x.i.0.us.6 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.6.preheader ], [ %820, %if.end.r_exit.i.i.us.6.1 ] - %add1.i.i.i.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.6, %mul.i.i.i - %conv.i.i.us.6 = trunc i64 %add1.i.i.i.us.6 to i32 - %cmp4.i.i.us.6 = icmp sgt i32 %20, %conv.i.i.us.6 - br i1 %cmp4.i.i.us.6, label %if.then.i.i.us.6, label %if.end.r_exit.i.i.us.6 - -if.then.i.i.us.6: ; preds = %pregion_for_entry.entry.i.i.us.6 - %sext.i.i.us.6 = shl i64 %add1.i.i.i.us.6, 32 - %idxprom.i.i.us.6 = ashr exact i64 %sext.i.i.us.6, 32 - %arrayidx.i.i.us.6 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.6 - %711 = load float, float* %arrayidx.i.i.us.6, align 4, !tbaa !12 - %add.i.i.us.6 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6 - %idxprom6.i.i.us.6 = sext i32 %add.i.i.us.6 to i64 - %arrayidx7.i.i.us.6 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.6 - %712 = load float, float* %arrayidx7.i.i.us.6, align 4, !tbaa !12 - %sub.i.i.us.6 = fsub float %712, %711 - store float %sub.i.i.us.6, float* %arrayidx7.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.6 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.6 - %713 = load float, float* %arrayidx10.i.i.us.6, align 4, !tbaa !12 - %mul11.i.i.us.6 = fmul float %25, %713 - %div.i.i.us.6 = fdiv float %sub.i.i.us.6, %mul11.i.i.us.6, !fpmath !26 - store float %div.i.i.us.6, float* %arrayidx7.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.6 - -if.end.r_exit.i.i.us.6: ; preds = %if.then.i.i.us.6, %pregion_for_entry.entry.i.i.us.6 - %714 = or i64 %_local_id_x.i.0.us.6, 1 - %add1.i.i.i.us.6.1 = add nuw nsw i64 %714, %mul.i.i.i - %conv.i.i.us.6.1 = trunc i64 %add1.i.i.i.us.6.1 to i32 - %cmp4.i.i.us.6.1 = icmp sgt i32 %20, %conv.i.i.us.6.1 - br i1 %cmp4.i.i.us.6.1, label %if.then.i.i.us.6.1, label %if.end.r_exit.i.i.us.6.1 - -pregion_for_end.i.i.6.loopexit: ; preds = %if.end.r_exit.i.i.us.6.1 - br label %pregion_for_end.i.i.6 - -pregion_for_end.i.i.6: ; preds = %pregion_for_end.i.i.6.loopexit, %vector.ph283, %pregion_for_end.i.i.5 - %715 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.7 = or i32 %715, 7 - %cmp.i.i.7 = icmp sgt i32 %24, %conv2.i.i.7 - %mul.i.i.7 = mul nsw i32 %20, %conv2.i.i.7 - br i1 %cmp.i.i.7, label %vector.scevcheck306, label %pregion_for_end.i.i.7 - -vector.scevcheck306: ; preds = %pregion_for_end.i.i.6 - %716 = mul i32 %20, %conv2.i.i.7 - %717 = trunc i64 %2 to i32 - %718 = shl i32 %717, 5 - %719 = add i32 %716, %718 - %720 = icmp sgt i32 %719, 2147483616 - br i1 %720, label %pregion_for_entry.entry.i.i.us.7.preheader, label %vector.memcheck328 - -pregion_for_entry.entry.i.i.us.7.preheader: ; preds = %vector.memcheck328, %vector.scevcheck306 - br label %pregion_for_entry.entry.i.i.us.7 - -vector.memcheck328: ; preds = %vector.scevcheck306 - %721 = trunc i64 %2 to i32 - %722 = shl i32 %721, 5 - %723 = sext i32 %722 to i64 - %scevgep308 = getelementptr float, float* %6, i64 %723 - %724 = add nsw i64 %723, 32 - %scevgep310 = getelementptr float, float* %6, i64 %724 - %725 = mul i32 %20, %conv2.i.i.7 - %726 = add i32 %725, %722 - %727 = sext i32 %726 to i64 - %scevgep312 = getelementptr float, float* %12, i64 %727 - %728 = add nsw i64 %727, 32 - %scevgep314 = getelementptr float, float* %12, i64 %728 - %scevgep316 = getelementptr float, float* %9, i64 %723 - %scevgep318 = getelementptr float, float* %9, i64 %724 - %bound0320 = icmp ult float* %scevgep308, %scevgep314 - %bound1321 = icmp ult float* %scevgep312, %scevgep310 - %found.conflict322 = and i1 %bound0320, %bound1321 - %bound0323 = icmp ult float* %scevgep316, %scevgep314 - %bound1324 = icmp ult float* %scevgep312, %scevgep318 - %found.conflict325 = and i1 %bound0323, %bound1324 - %conflict.rdx326 = or i1 %found.conflict322, %found.conflict325 - br i1 %conflict.rdx326, label %pregion_for_entry.entry.i.i.us.7.preheader, label %vector.ph329 - -vector.ph329: ; preds = %vector.memcheck328 - %broadcast.splatinsert336 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat337 = shufflevector <8 x i64> %broadcast.splatinsert336, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert338 = insertelement <8 x i32> undef, i32 %20, i32 0 - %broadcast.splat339 = shufflevector <8 x i32> %broadcast.splatinsert338, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert343 = insertelement <8 x float> undef, float %25, i32 0 - %broadcast.splat344 = shufflevector <8 x float> %broadcast.splatinsert343, <8 x float> undef, <8 x i32> zeroinitializer - %729 = or <8 x i64> %broadcast.splat337, - %730 = trunc <8 x i64> %729 to <8 x i32> - %731 = icmp sgt <8 x i32> %broadcast.splat339, %730 - %732 = extractelement <8 x i64> %729, i32 0 - %733 = shl i64 %732, 32 - %734 = ashr exact i64 %733, 32 - %735 = getelementptr inbounds float, float* %6, i64 %734 - %736 = bitcast float* %735 to <8 x float>* - %wide.masked.load340 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %736, i32 4, <8 x i1> %731, <8 x float> undef), !tbaa !12, !alias.scope !199, !noalias !202 - %737 = extractelement <8 x i32> %730, i32 0 - %738 = add nsw i32 %mul.i.i.7, %737 - %739 = sext i32 %738 to i64 - %740 = getelementptr inbounds float, float* %12, i64 %739 - %741 = bitcast float* %740 to <8 x float>* - %wide.masked.load341 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %741, i32 4, <8 x i1> %731, <8 x float> undef), !tbaa !12, !alias.scope !202 - %742 = fsub <8 x float> %wide.masked.load341, %wide.masked.load340 - %743 = bitcast float* %740 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %742, <8 x float>* %743, i32 4, <8 x i1> %731), !tbaa !12, !alias.scope !202, !llvm.access.group !21 - %744 = getelementptr inbounds float, float* %9, i64 %734 - %745 = bitcast float* %744 to <8 x float>* - %wide.masked.load342 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %745, i32 4, <8 x i1> %731, <8 x float> undef), !tbaa !12, !alias.scope !204, !noalias !202 - %746 = fmul <8 x float> %broadcast.splat344, %wide.masked.load342 - %747 = fdiv <8 x float> %742, %746, !fpmath !26 - %748 = bitcast float* %740 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %747, <8 x float>* %748, i32 4, <8 x i1> %731), !tbaa !12, !alias.scope !202, !llvm.access.group !21 - %749 = or <8 x i64> %broadcast.splat337, - %750 = trunc <8 x i64> %749 to <8 x i32> - %751 = icmp sgt <8 x i32> %broadcast.splat339, %750 - %752 = extractelement <8 x i64> %749, i32 0 - %753 = shl i64 %752, 32 - %754 = ashr exact i64 %753, 32 - %755 = getelementptr inbounds float, float* %6, i64 %754 - %756 = bitcast float* %755 to <8 x float>* - %wide.masked.load340.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %756, i32 4, <8 x i1> %751, <8 x float> undef), !tbaa !12, !alias.scope !199, !noalias !202 - %757 = extractelement <8 x i32> %750, i32 0 - %758 = add nsw i32 %mul.i.i.7, %757 - %759 = sext i32 %758 to i64 - %760 = getelementptr inbounds float, float* %12, i64 %759 - %761 = bitcast float* %760 to <8 x float>* - %wide.masked.load341.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %761, i32 4, <8 x i1> %751, <8 x float> undef), !tbaa !12, !alias.scope !202 - %762 = fsub <8 x float> %wide.masked.load341.1, %wide.masked.load340.1 - %763 = bitcast float* %760 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %762, <8 x float>* %763, i32 4, <8 x i1> %751), !tbaa !12, !alias.scope !202, !llvm.access.group !21 - %764 = getelementptr inbounds float, float* %9, i64 %754 - %765 = bitcast float* %764 to <8 x float>* - %wide.masked.load342.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %765, i32 4, <8 x i1> %751, <8 x float> undef), !tbaa !12, !alias.scope !204, !noalias !202 - %766 = fmul <8 x float> %broadcast.splat344, %wide.masked.load342.1 - %767 = fdiv <8 x float> %762, %766, !fpmath !26 - %768 = bitcast float* %760 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %767, <8 x float>* %768, i32 4, <8 x i1> %751), !tbaa !12, !alias.scope !202, !llvm.access.group !21 - %769 = or <8 x i64> %broadcast.splat337, - %770 = trunc <8 x i64> %769 to <8 x i32> - %771 = icmp sgt <8 x i32> %broadcast.splat339, %770 - %772 = extractelement <8 x i64> %769, i32 0 - %773 = shl i64 %772, 32 - %774 = ashr exact i64 %773, 32 - %775 = getelementptr inbounds float, float* %6, i64 %774 - %776 = bitcast float* %775 to <8 x float>* - %wide.masked.load340.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %776, i32 4, <8 x i1> %771, <8 x float> undef), !tbaa !12, !alias.scope !199, !noalias !202 - %777 = extractelement <8 x i32> %770, i32 0 - %778 = add nsw i32 %mul.i.i.7, %777 - %779 = sext i32 %778 to i64 - %780 = getelementptr inbounds float, float* %12, i64 %779 - %781 = bitcast float* %780 to <8 x float>* - %wide.masked.load341.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %781, i32 4, <8 x i1> %771, <8 x float> undef), !tbaa !12, !alias.scope !202 - %782 = fsub <8 x float> %wide.masked.load341.2, %wide.masked.load340.2 - %783 = bitcast float* %780 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %782, <8 x float>* %783, i32 4, <8 x i1> %771), !tbaa !12, !alias.scope !202, !llvm.access.group !21 - %784 = getelementptr inbounds float, float* %9, i64 %774 - %785 = bitcast float* %784 to <8 x float>* - %wide.masked.load342.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %785, i32 4, <8 x i1> %771, <8 x float> undef), !tbaa !12, !alias.scope !204, !noalias !202 - %786 = fmul <8 x float> %broadcast.splat344, %wide.masked.load342.2 - %787 = fdiv <8 x float> %782, %786, !fpmath !26 - %788 = bitcast float* %780 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %787, <8 x float>* %788, i32 4, <8 x i1> %771), !tbaa !12, !alias.scope !202, !llvm.access.group !21 - %789 = or <8 x i64> %broadcast.splat337, - %790 = trunc <8 x i64> %789 to <8 x i32> - %791 = icmp sgt <8 x i32> %broadcast.splat339, %790 - %792 = extractelement <8 x i64> %789, i32 0 - %793 = shl i64 %792, 32 - %794 = ashr exact i64 %793, 32 - %795 = getelementptr inbounds float, float* %6, i64 %794 - %796 = bitcast float* %795 to <8 x float>* - %wide.masked.load340.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %796, i32 4, <8 x i1> %791, <8 x float> undef), !tbaa !12, !alias.scope !199, !noalias !202 - %797 = extractelement <8 x i32> %790, i32 0 - %798 = add nsw i32 %mul.i.i.7, %797 - %799 = sext i32 %798 to i64 - %800 = getelementptr inbounds float, float* %12, i64 %799 - %801 = bitcast float* %800 to <8 x float>* - %wide.masked.load341.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %801, i32 4, <8 x i1> %791, <8 x float> undef), !tbaa !12, !alias.scope !202 - %802 = fsub <8 x float> %wide.masked.load341.3, %wide.masked.load340.3 - %803 = bitcast float* %800 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %802, <8 x float>* %803, i32 4, <8 x i1> %791), !tbaa !12, !alias.scope !202, !llvm.access.group !21 - %804 = getelementptr inbounds float, float* %9, i64 %794 - %805 = bitcast float* %804 to <8 x float>* - %wide.masked.load342.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %805, i32 4, <8 x i1> %791, <8 x float> undef), !tbaa !12, !alias.scope !204, !noalias !202 - %806 = fmul <8 x float> %broadcast.splat344, %wide.masked.load342.3 - %807 = fdiv <8 x float> %802, %806, !fpmath !26 - %808 = bitcast float* %800 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %807, <8 x float>* %808, i32 4, <8 x i1> %791), !tbaa !12, !alias.scope !202, !llvm.access.group !21 - br label %pregion_for_end.i.i.7 - -pregion_for_entry.entry.i.i.us.7: ; preds = %if.end.r_exit.i.i.us.7.1, %pregion_for_entry.entry.i.i.us.7.preheader - %_local_id_x.i.0.us.7 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.7.preheader ], [ %816, %if.end.r_exit.i.i.us.7.1 ] - %add1.i.i.i.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.7, %mul.i.i.i - %conv.i.i.us.7 = trunc i64 %add1.i.i.i.us.7 to i32 - %cmp4.i.i.us.7 = icmp sgt i32 %20, %conv.i.i.us.7 - br i1 %cmp4.i.i.us.7, label %if.then.i.i.us.7, label %if.end.r_exit.i.i.us.7 - -if.then.i.i.us.7: ; preds = %pregion_for_entry.entry.i.i.us.7 - %sext.i.i.us.7 = shl i64 %add1.i.i.i.us.7, 32 - %idxprom.i.i.us.7 = ashr exact i64 %sext.i.i.us.7, 32 - %arrayidx.i.i.us.7 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.7 - %809 = load float, float* %arrayidx.i.i.us.7, align 4, !tbaa !12 - %add.i.i.us.7 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7 - %idxprom6.i.i.us.7 = sext i32 %add.i.i.us.7 to i64 - %arrayidx7.i.i.us.7 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.7 - %810 = load float, float* %arrayidx7.i.i.us.7, align 4, !tbaa !12 - %sub.i.i.us.7 = fsub float %810, %809 - store float %sub.i.i.us.7, float* %arrayidx7.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.7 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.7 - %811 = load float, float* %arrayidx10.i.i.us.7, align 4, !tbaa !12 - %mul11.i.i.us.7 = fmul float %25, %811 - %div.i.i.us.7 = fdiv float %sub.i.i.us.7, %mul11.i.i.us.7, !fpmath !26 - store float %div.i.i.us.7, float* %arrayidx7.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.7 - -if.end.r_exit.i.i.us.7: ; preds = %if.then.i.i.us.7, %pregion_for_entry.entry.i.i.us.7 - %812 = or i64 %_local_id_x.i.0.us.7, 1 - %add1.i.i.i.us.7.1 = add nuw nsw i64 %812, %mul.i.i.i - %conv.i.i.us.7.1 = trunc i64 %add1.i.i.i.us.7.1 to i32 - %cmp4.i.i.us.7.1 = icmp sgt i32 %20, %conv.i.i.us.7.1 - br i1 %cmp4.i.i.us.7.1, label %if.then.i.i.us.7.1, label %if.end.r_exit.i.i.us.7.1 - -pregion_for_end.i.i.7.loopexit: ; preds = %if.end.r_exit.i.i.us.7.1 - br label %pregion_for_end.i.i.7 - -pregion_for_end.i.i.7: ; preds = %pregion_for_end.i.i.7.loopexit, %vector.ph329, %pregion_for_end.i.i.6 - ret void - -if.then.i.i.us.7.1: ; preds = %if.end.r_exit.i.i.us.7 - %sext.i.i.us.7.1 = shl i64 %add1.i.i.i.us.7.1, 32 - %idxprom.i.i.us.7.1 = ashr exact i64 %sext.i.i.us.7.1, 32 - %arrayidx.i.i.us.7.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.7.1 - %813 = load float, float* %arrayidx.i.i.us.7.1, align 4, !tbaa !12 - %add.i.i.us.7.1 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7.1 - %idxprom6.i.i.us.7.1 = sext i32 %add.i.i.us.7.1 to i64 - %arrayidx7.i.i.us.7.1 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.7.1 - %814 = load float, float* %arrayidx7.i.i.us.7.1, align 4, !tbaa !12 - %sub.i.i.us.7.1 = fsub float %814, %813 - store float %sub.i.i.us.7.1, float* %arrayidx7.i.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.7.1 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.7.1 - %815 = load float, float* %arrayidx10.i.i.us.7.1, align 4, !tbaa !12 - %mul11.i.i.us.7.1 = fmul float %25, %815 - %div.i.i.us.7.1 = fdiv float %sub.i.i.us.7.1, %mul11.i.i.us.7.1, !fpmath !26 - store float %div.i.i.us.7.1, float* %arrayidx7.i.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.7.1 - -if.end.r_exit.i.i.us.7.1: ; preds = %if.then.i.i.us.7.1, %if.end.r_exit.i.i.us.7 - %816 = add nuw nsw i64 %_local_id_x.i.0.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %816, 32 - br i1 %exitcond.7.not.1, label %pregion_for_end.i.i.7.loopexit, label %pregion_for_entry.entry.i.i.us.7, !llvm.loop !206 - -if.then.i.i.us.6.1: ; preds = %if.end.r_exit.i.i.us.6 - %sext.i.i.us.6.1 = shl i64 %add1.i.i.i.us.6.1, 32 - %idxprom.i.i.us.6.1 = ashr exact i64 %sext.i.i.us.6.1, 32 - %arrayidx.i.i.us.6.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.6.1 - %817 = load float, float* %arrayidx.i.i.us.6.1, align 4, !tbaa !12 - %add.i.i.us.6.1 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6.1 - %idxprom6.i.i.us.6.1 = sext i32 %add.i.i.us.6.1 to i64 - %arrayidx7.i.i.us.6.1 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.6.1 - %818 = load float, float* %arrayidx7.i.i.us.6.1, align 4, !tbaa !12 - %sub.i.i.us.6.1 = fsub float %818, %817 - store float %sub.i.i.us.6.1, float* %arrayidx7.i.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.6.1 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.6.1 - %819 = load float, float* %arrayidx10.i.i.us.6.1, align 4, !tbaa !12 - %mul11.i.i.us.6.1 = fmul float %25, %819 - %div.i.i.us.6.1 = fdiv float %sub.i.i.us.6.1, %mul11.i.i.us.6.1, !fpmath !26 - store float %div.i.i.us.6.1, float* %arrayidx7.i.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.6.1 - -if.end.r_exit.i.i.us.6.1: ; preds = %if.then.i.i.us.6.1, %if.end.r_exit.i.i.us.6 - %820 = add nuw nsw i64 %_local_id_x.i.0.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %820, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.i.6.loopexit, label %pregion_for_entry.entry.i.i.us.6, !llvm.loop !207 - -if.then.i.i.us.5.1: ; preds = %if.end.r_exit.i.i.us.5 - %sext.i.i.us.5.1 = shl i64 %add1.i.i.i.us.5.1, 32 - %idxprom.i.i.us.5.1 = ashr exact i64 %sext.i.i.us.5.1, 32 - %arrayidx.i.i.us.5.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.5.1 - %821 = load float, float* %arrayidx.i.i.us.5.1, align 4, !tbaa !12 - %add.i.i.us.5.1 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5.1 - %idxprom6.i.i.us.5.1 = sext i32 %add.i.i.us.5.1 to i64 - %arrayidx7.i.i.us.5.1 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.5.1 - %822 = load float, float* %arrayidx7.i.i.us.5.1, align 4, !tbaa !12 - %sub.i.i.us.5.1 = fsub float %822, %821 - store float %sub.i.i.us.5.1, float* %arrayidx7.i.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.5.1 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.5.1 - %823 = load float, float* %arrayidx10.i.i.us.5.1, align 4, !tbaa !12 - %mul11.i.i.us.5.1 = fmul float %25, %823 - %div.i.i.us.5.1 = fdiv float %sub.i.i.us.5.1, %mul11.i.i.us.5.1, !fpmath !26 - store float %div.i.i.us.5.1, float* %arrayidx7.i.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.5.1 - -if.end.r_exit.i.i.us.5.1: ; preds = %if.then.i.i.us.5.1, %if.end.r_exit.i.i.us.5 - %824 = add nuw nsw i64 %_local_id_x.i.0.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %824, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.i.5.loopexit, label %pregion_for_entry.entry.i.i.us.5, !llvm.loop !208 - -if.then.i.i.us.4.1: ; preds = %if.end.r_exit.i.i.us.4 - %sext.i.i.us.4.1 = shl i64 %add1.i.i.i.us.4.1, 32 - %idxprom.i.i.us.4.1 = ashr exact i64 %sext.i.i.us.4.1, 32 - %arrayidx.i.i.us.4.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.4.1 - %825 = load float, float* %arrayidx.i.i.us.4.1, align 4, !tbaa !12 - %add.i.i.us.4.1 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4.1 - %idxprom6.i.i.us.4.1 = sext i32 %add.i.i.us.4.1 to i64 - %arrayidx7.i.i.us.4.1 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.4.1 - %826 = load float, float* %arrayidx7.i.i.us.4.1, align 4, !tbaa !12 - %sub.i.i.us.4.1 = fsub float %826, %825 - store float %sub.i.i.us.4.1, float* %arrayidx7.i.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.4.1 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.4.1 - %827 = load float, float* %arrayidx10.i.i.us.4.1, align 4, !tbaa !12 - %mul11.i.i.us.4.1 = fmul float %25, %827 - %div.i.i.us.4.1 = fdiv float %sub.i.i.us.4.1, %mul11.i.i.us.4.1, !fpmath !26 - store float %div.i.i.us.4.1, float* %arrayidx7.i.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.4.1 - -if.end.r_exit.i.i.us.4.1: ; preds = %if.then.i.i.us.4.1, %if.end.r_exit.i.i.us.4 - %828 = add nuw nsw i64 %_local_id_x.i.0.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %828, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.i.4.loopexit, label %pregion_for_entry.entry.i.i.us.4, !llvm.loop !209 - -if.then.i.i.us.3.1: ; preds = %if.end.r_exit.i.i.us.3 - %sext.i.i.us.3.1 = shl i64 %add1.i.i.i.us.3.1, 32 - %idxprom.i.i.us.3.1 = ashr exact i64 %sext.i.i.us.3.1, 32 - %arrayidx.i.i.us.3.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.3.1 - %829 = load float, float* %arrayidx.i.i.us.3.1, align 4, !tbaa !12 - %add.i.i.us.3.1 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3.1 - %idxprom6.i.i.us.3.1 = sext i32 %add.i.i.us.3.1 to i64 - %arrayidx7.i.i.us.3.1 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.3.1 - %830 = load float, float* %arrayidx7.i.i.us.3.1, align 4, !tbaa !12 - %sub.i.i.us.3.1 = fsub float %830, %829 - store float %sub.i.i.us.3.1, float* %arrayidx7.i.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.3.1 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.3.1 - %831 = load float, float* %arrayidx10.i.i.us.3.1, align 4, !tbaa !12 - %mul11.i.i.us.3.1 = fmul float %25, %831 - %div.i.i.us.3.1 = fdiv float %sub.i.i.us.3.1, %mul11.i.i.us.3.1, !fpmath !26 - store float %div.i.i.us.3.1, float* %arrayidx7.i.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.3.1 - -if.end.r_exit.i.i.us.3.1: ; preds = %if.then.i.i.us.3.1, %if.end.r_exit.i.i.us.3 - %832 = add nuw nsw i64 %_local_id_x.i.0.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %832, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.i.3.loopexit, label %pregion_for_entry.entry.i.i.us.3, !llvm.loop !210 - -if.then.i.i.us.2.1: ; preds = %if.end.r_exit.i.i.us.2 - %sext.i.i.us.2.1 = shl i64 %add1.i.i.i.us.2.1, 32 - %idxprom.i.i.us.2.1 = ashr exact i64 %sext.i.i.us.2.1, 32 - %arrayidx.i.i.us.2.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.2.1 - %833 = load float, float* %arrayidx.i.i.us.2.1, align 4, !tbaa !12 - %add.i.i.us.2.1 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2.1 - %idxprom6.i.i.us.2.1 = sext i32 %add.i.i.us.2.1 to i64 - %arrayidx7.i.i.us.2.1 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.2.1 - %834 = load float, float* %arrayidx7.i.i.us.2.1, align 4, !tbaa !12 - %sub.i.i.us.2.1 = fsub float %834, %833 - store float %sub.i.i.us.2.1, float* %arrayidx7.i.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.2.1 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.2.1 - %835 = load float, float* %arrayidx10.i.i.us.2.1, align 4, !tbaa !12 - %mul11.i.i.us.2.1 = fmul float %25, %835 - %div.i.i.us.2.1 = fdiv float %sub.i.i.us.2.1, %mul11.i.i.us.2.1, !fpmath !26 - store float %div.i.i.us.2.1, float* %arrayidx7.i.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.2.1 - -if.end.r_exit.i.i.us.2.1: ; preds = %if.then.i.i.us.2.1, %if.end.r_exit.i.i.us.2 - %836 = add nuw nsw i64 %_local_id_x.i.0.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %836, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.i.2.loopexit, label %pregion_for_entry.entry.i.i.us.2, !llvm.loop !211 - -if.then.i.i.us.1.1: ; preds = %if.end.r_exit.i.i.us.1 - %sext.i.i.us.1.1 = shl i64 %add1.i.i.i.us.1.1, 32 - %idxprom.i.i.us.1.1 = ashr exact i64 %sext.i.i.us.1.1, 32 - %arrayidx.i.i.us.1.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.1.1 - %837 = load float, float* %arrayidx.i.i.us.1.1, align 4, !tbaa !12 - %add.i.i.us.1.1 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1.1 - %idxprom6.i.i.us.1.1 = sext i32 %add.i.i.us.1.1 to i64 - %arrayidx7.i.i.us.1.1 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.1.1 - %838 = load float, float* %arrayidx7.i.i.us.1.1, align 4, !tbaa !12 - %sub.i.i.us.1.1 = fsub float %838, %837 - store float %sub.i.i.us.1.1, float* %arrayidx7.i.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.1.1 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.1.1 - %839 = load float, float* %arrayidx10.i.i.us.1.1, align 4, !tbaa !12 - %mul11.i.i.us.1.1 = fmul float %25, %839 - %div.i.i.us.1.1 = fdiv float %sub.i.i.us.1.1, %mul11.i.i.us.1.1, !fpmath !26 - store float %div.i.i.us.1.1, float* %arrayidx7.i.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.1.1 - -if.end.r_exit.i.i.us.1.1: ; preds = %if.then.i.i.us.1.1, %if.end.r_exit.i.i.us.1 - %840 = add nuw nsw i64 %_local_id_x.i.0.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %840, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.i.1.loopexit, label %pregion_for_entry.entry.i.i.us.1, !llvm.loop !212 - -if.then.i.i.us.1368: ; preds = %if.end.r_exit.i.i.us - %sext.i.i.us.1358 = shl i64 %add1.i.i.i.us.1354, 32 - %idxprom.i.i.us.1359 = ashr exact i64 %sext.i.i.us.1358, 32 - %arrayidx.i.i.us.1360 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.1359 - %841 = load float, float* %arrayidx.i.i.us.1360, align 4, !tbaa !12 - %add.i.i.us.1361 = add nsw i32 %mul.i.i, %conv.i.i.us.1355 - %idxprom6.i.i.us.1362 = sext i32 %add.i.i.us.1361 to i64 - %arrayidx7.i.i.us.1363 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.1362 - %842 = load float, float* %arrayidx7.i.i.us.1363, align 4, !tbaa !12 - %sub.i.i.us.1364 = fsub float %842, %841 - store float %sub.i.i.us.1364, float* %arrayidx7.i.i.us.1363, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx10.i.i.us.1365 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.1359 - %843 = load float, float* %arrayidx10.i.i.us.1365, align 4, !tbaa !12 - %mul11.i.i.us.1366 = fmul float %25, %843 - %div.i.i.us.1367 = fdiv float %sub.i.i.us.1364, %mul11.i.i.us.1366, !fpmath !26 - store float %div.i.i.us.1367, float* %arrayidx7.i.i.us.1363, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.1369 - -if.end.r_exit.i.i.us.1369: ; preds = %if.then.i.i.us.1368, %if.end.r_exit.i.i.us - %844 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %844, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !213 -} - -; Function Attrs: argmemonly nounwind readonly willreturn -declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32 immarg, <8 x i1>, <8 x float>) #3 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>) #4 - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nofree nounwind } -attributes #3 = { argmemonly nounwind readonly willreturn } -attributes #4 = { argmemonly nounwind willreturn } -attributes #5 = { nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE", !"int", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"float", !"int", !"int"} -!9 = !{!"", !"", !"", !"", !"", !""} -!10 = !{!"mean", !"std", !"data", !"float_n", !"m", !"n"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{!17, !18} -!18 = distinct !{!18, !"LVerDomain"} -!19 = !{!20} -!20 = distinct !{!20, !18} -!21 = !{!22, !23} -!22 = distinct !{} -!23 = distinct !{} -!24 = !{!25} -!25 = distinct !{!25, !18} -!26 = !{float 2.500000e+00} -!27 = !{!28} -!28 = distinct !{!28, !29} -!29 = distinct !{!29, !"LVerDomain"} -!30 = !{!31} -!31 = distinct !{!31, !29} -!32 = !{!33} -!33 = distinct !{!33, !29} -!34 = !{!35} -!35 = distinct !{!35, !36} -!36 = distinct !{!36, !"LVerDomain"} -!37 = !{!38} -!38 = distinct !{!38, !36} -!39 = !{!40} -!40 = distinct !{!40, !36} -!41 = !{!42} -!42 = distinct !{!42, !43} -!43 = distinct !{!43, !"LVerDomain"} -!44 = !{!45} -!45 = distinct !{!45, !43} -!46 = !{!47} -!47 = distinct !{!47, !43} -!48 = !{!49} -!49 = distinct !{!49, !50} -!50 = distinct !{!50, !"LVerDomain"} -!51 = !{!52} -!52 = distinct !{!52, !50} -!53 = !{!54} -!54 = distinct !{!54, !50} -!55 = !{!56} -!56 = distinct !{!56, !57} -!57 = distinct !{!57, !"LVerDomain"} -!58 = !{!59} -!59 = distinct !{!59, !57} -!60 = !{!61} -!61 = distinct !{!61, !57} -!62 = !{!63} -!63 = distinct !{!63, !64} -!64 = distinct !{!64, !"LVerDomain"} -!65 = !{!66} -!66 = distinct !{!66, !64} -!67 = !{!68} -!68 = distinct !{!68, !64} -!69 = !{!70} -!70 = distinct !{!70, !71} -!71 = distinct !{!71, !"LVerDomain"} -!72 = !{!73} -!73 = distinct !{!73, !71} -!74 = !{!75} -!75 = distinct !{!75, !71} -!76 = distinct !{!76, !77, !78} -!77 = !{!"llvm.loop.parallel_accesses", !22} -!78 = !{!"llvm.loop.isvectorized", i32 1} -!79 = distinct !{!79, !77, !78} -!80 = distinct !{!80, !77, !78} -!81 = distinct !{!81, !77, !78} -!82 = distinct !{!82, !77, !78} -!83 = distinct !{!83, !77, !78} -!84 = distinct !{!84, !77, !78} -!85 = distinct !{!85, !77, !78} -!86 = !{!87} -!87 = distinct !{!87, !88} -!88 = distinct !{!88, !"LVerDomain"} -!89 = !{!90} -!90 = distinct !{!90, !88} -!91 = !{!92} -!92 = distinct !{!92, !88} -!93 = !{!94} -!94 = distinct !{!94, !95} -!95 = distinct !{!95, !"LVerDomain"} -!96 = !{!97} -!97 = distinct !{!97, !95} -!98 = !{!99} -!99 = distinct !{!99, !95} -!100 = !{!101} -!101 = distinct !{!101, !102} -!102 = distinct !{!102, !"LVerDomain"} -!103 = !{!104} -!104 = distinct !{!104, !102} -!105 = !{!106} -!106 = distinct !{!106, !102} -!107 = !{!108} -!108 = distinct !{!108, !109} -!109 = distinct !{!109, !"LVerDomain"} -!110 = !{!111} -!111 = distinct !{!111, !109} -!112 = !{!113} -!113 = distinct !{!113, !109} -!114 = !{!115} -!115 = distinct !{!115, !116} -!116 = distinct !{!116, !"LVerDomain"} -!117 = !{!118} -!118 = distinct !{!118, !116} -!119 = !{!120} -!120 = distinct !{!120, !116} -!121 = !{!122} -!122 = distinct !{!122, !123} -!123 = distinct !{!123, !"LVerDomain"} -!124 = !{!125} -!125 = distinct !{!125, !123} -!126 = !{!127} -!127 = distinct !{!127, !123} -!128 = !{!129} -!129 = distinct !{!129, !130} -!130 = distinct !{!130, !"LVerDomain"} -!131 = !{!132} -!132 = distinct !{!132, !130} -!133 = !{!134} -!134 = distinct !{!134, !130} -!135 = !{!136} -!136 = distinct !{!136, !137} -!137 = distinct !{!137, !"LVerDomain"} -!138 = !{!139} -!139 = distinct !{!139, !137} -!140 = !{!141} -!141 = distinct !{!141, !137} -!142 = distinct !{!142, !77, !78} -!143 = distinct !{!143, !77, !78} -!144 = distinct !{!144, !77, !78} -!145 = distinct !{!145, !77, !78} -!146 = distinct !{!146, !77, !78} -!147 = distinct !{!147, !77, !78} -!148 = distinct !{!148, !77, !78} -!149 = distinct !{!149, !77, !78} -!150 = !{!151} -!151 = distinct !{!151, !152} -!152 = distinct !{!152, !"LVerDomain"} -!153 = !{!154} -!154 = distinct !{!154, !152} -!155 = !{!156} -!156 = distinct !{!156, !152} -!157 = !{!158} -!158 = distinct !{!158, !159} -!159 = distinct !{!159, !"LVerDomain"} -!160 = !{!161} -!161 = distinct !{!161, !159} -!162 = !{!163} -!163 = distinct !{!163, !159} -!164 = !{!165} -!165 = distinct !{!165, !166} -!166 = distinct !{!166, !"LVerDomain"} -!167 = !{!168} -!168 = distinct !{!168, !166} -!169 = !{!170} -!170 = distinct !{!170, !166} -!171 = !{!172} -!172 = distinct !{!172, !173} -!173 = distinct !{!173, !"LVerDomain"} -!174 = !{!175} -!175 = distinct !{!175, !173} -!176 = !{!177} -!177 = distinct !{!177, !173} -!178 = !{!179} -!179 = distinct !{!179, !180} -!180 = distinct !{!180, !"LVerDomain"} -!181 = !{!182} -!182 = distinct !{!182, !180} -!183 = !{!184} -!184 = distinct !{!184, !180} -!185 = !{!186} -!186 = distinct !{!186, !187} -!187 = distinct !{!187, !"LVerDomain"} -!188 = !{!189} -!189 = distinct !{!189, !187} -!190 = !{!191} -!191 = distinct !{!191, !187} -!192 = !{!193} -!193 = distinct !{!193, !194} -!194 = distinct !{!194, !"LVerDomain"} -!195 = !{!196} -!196 = distinct !{!196, !194} -!197 = !{!198} -!198 = distinct !{!198, !194} -!199 = !{!200} -!200 = distinct !{!200, !201} -!201 = distinct !{!201, !"LVerDomain"} -!202 = !{!203} -!203 = distinct !{!203, !201} -!204 = !{!205} -!205 = distinct !{!205, !201} -!206 = distinct !{!206, !77, !78} -!207 = distinct !{!207, !77, !78} -!208 = distinct !{!208, !77, !78} -!209 = distinct !{!209, !77, !78} -!210 = distinct !{!210, !77, !78} -!211 = distinct !{!211, !77, !78} -!212 = distinct !{!212, !77, !78} -!213 = distinct !{!213, !77, !78} diff --git a/pocl_irs/correlation_std.ll b/pocl_irs/correlation_std.ll deleted file mode 100644 index 68c50df..0000000 --- a/pocl_irs/correlation_std.ll +++ /dev/null @@ -1,1199 +0,0 @@ -; ModuleID = './KK/KIGILFKIOCMFGODNJKKCPJJIDHNBMKHCFFGPF/std_kernel/256-1-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32(float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_std_kernel(float* nocapture readonly %0, float* nocapture %1, float* nocapture readonly %2, float %3, float %4, i32 %5, i32 %6, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %7, i64 %8, i64 %9, i64 %10) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %8, 8 - %cmp259.i = icmp sgt i32 %6, 0 - %12 = sext i32 %5 to i64 - %wide.trip.count.i = zext i32 %6 to i64 - br i1 %cmp259.i, label %pregion_for_entry.entry.i.us.preheader, label %pregion_for_entry.entry.i.preheader - -pregion_for_entry.entry.i.us.preheader: ; preds = %11 - br label %pregion_for_entry.entry.i.us - -pregion_for_entry.entry.i.preheader: ; preds = %11 - %div.i = fdiv float 0.000000e+00, %3 - %13 = tail call float @llvm.sqrt.f32(float %div.i) #2 - %cmp27.i = fcmp ugt float %13, %4 - %storemerge.i = select i1 %cmp27.i, float %13, float 1.000000e+00 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert12 = insertelement <8 x i32> undef, i32 %5, i32 0 - %broadcast.splat13 = shufflevector <8 x i32> %broadcast.splatinsert12, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert14 = insertelement <8 x i32> undef, i32 %5, i32 0 - %broadcast.splat15 = shufflevector <8 x i32> %broadcast.splatinsert14, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert16 = insertelement <8 x float> undef, float %storemerge.i, i32 0 - %broadcast.splat17 = shufflevector <8 x float> %broadcast.splatinsert16, <8 x float> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert18 = insertelement <8 x float> undef, float %storemerge.i, i32 0 - %broadcast.splat19 = shufflevector <8 x float> %broadcast.splatinsert18, <8 x float> undef, <8 x i32> zeroinitializer - %14 = or <8 x i64> %broadcast.splat, - %15 = trunc <8 x i64> %14 to <8 x i32> - %16 = trunc i64 %mul.i.i to i32 - %17 = or i32 %16, 8 - %18 = insertelement <8 x i32> undef, i32 %17, i64 0 - %19 = shufflevector <8 x i32> %18, <8 x i32> undef, <8 x i32> zeroinitializer - %20 = or <8 x i32> %19, - %21 = icmp sgt <8 x i32> %broadcast.splat13, %15 - %22 = icmp sgt <8 x i32> %broadcast.splat15, %20 - %23 = extractelement <8 x i64> %14, i32 0 - %24 = shl i64 %23, 32 - %25 = ashr exact i64 %24, 32 - %26 = getelementptr inbounds float, float* %1, i64 %25 - %27 = bitcast float* %26 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %27, i32 4, <8 x i1> %21), !tbaa !12, !llvm.access.group !16 - %28 = getelementptr inbounds float, float* %26, i64 8 - %29 = bitcast float* %28 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %29, i32 4, <8 x i1> %22), !tbaa !12, !llvm.access.group !16 - %30 = or <8 x i64> %broadcast.splat, - %31 = trunc <8 x i64> %30 to <8 x i32> - %32 = trunc i64 %mul.i.i to i32 - %33 = or i32 %32, 8 - %34 = insertelement <8 x i32> undef, i32 %33, i64 0 - %35 = shufflevector <8 x i32> %34, <8 x i32> undef, <8 x i32> zeroinitializer - %36 = or <8 x i32> %35, - %37 = icmp sgt <8 x i32> %broadcast.splat13, %31 - %38 = icmp sgt <8 x i32> %broadcast.splat15, %36 - %39 = extractelement <8 x i64> %30, i32 0 - %40 = shl i64 %39, 32 - %41 = ashr exact i64 %40, 32 - %42 = getelementptr inbounds float, float* %1, i64 %41 - %43 = bitcast float* %42 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %43, i32 4, <8 x i1> %37), !tbaa !12, !llvm.access.group !16 - %44 = getelementptr inbounds float, float* %42, i64 8 - %45 = bitcast float* %44 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %45, i32 4, <8 x i1> %38), !tbaa !12, !llvm.access.group !16 - %46 = or <8 x i64> %broadcast.splat, - %47 = trunc <8 x i64> %46 to <8 x i32> - %48 = trunc i64 %mul.i.i to i32 - %49 = or i32 %48, 8 - %50 = insertelement <8 x i32> undef, i32 %49, i64 0 - %51 = shufflevector <8 x i32> %50, <8 x i32> undef, <8 x i32> zeroinitializer - %52 = or <8 x i32> %51, - %53 = icmp sgt <8 x i32> %broadcast.splat13, %47 - %54 = icmp sgt <8 x i32> %broadcast.splat15, %52 - %55 = extractelement <8 x i64> %46, i32 0 - %56 = shl i64 %55, 32 - %57 = ashr exact i64 %56, 32 - %58 = getelementptr inbounds float, float* %1, i64 %57 - %59 = bitcast float* %58 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %59, i32 4, <8 x i1> %53), !tbaa !12, !llvm.access.group !16 - %60 = getelementptr inbounds float, float* %58, i64 8 - %61 = bitcast float* %60 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %61, i32 4, <8 x i1> %54), !tbaa !12, !llvm.access.group !16 - %62 = or <8 x i64> %broadcast.splat, - %63 = trunc <8 x i64> %62 to <8 x i32> - %64 = trunc i64 %mul.i.i to i32 - %65 = or i32 %64, 8 - %66 = insertelement <8 x i32> undef, i32 %65, i64 0 - %67 = shufflevector <8 x i32> %66, <8 x i32> undef, <8 x i32> zeroinitializer - %68 = or <8 x i32> %67, - %69 = icmp sgt <8 x i32> %broadcast.splat13, %63 - %70 = icmp sgt <8 x i32> %broadcast.splat15, %68 - %71 = extractelement <8 x i64> %62, i32 0 - %72 = shl i64 %71, 32 - %73 = ashr exact i64 %72, 32 - %74 = getelementptr inbounds float, float* %1, i64 %73 - %75 = bitcast float* %74 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %75, i32 4, <8 x i1> %69), !tbaa !12, !llvm.access.group !16 - %76 = getelementptr inbounds float, float* %74, i64 8 - %77 = bitcast float* %76 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %77, i32 4, <8 x i1> %70), !tbaa !12, !llvm.access.group !16 - %78 = or <8 x i64> %broadcast.splat, - %79 = trunc <8 x i64> %78 to <8 x i32> - %80 = trunc i64 %mul.i.i to i32 - %81 = or i32 %80, 8 - %82 = insertelement <8 x i32> undef, i32 %81, i64 0 - %83 = shufflevector <8 x i32> %82, <8 x i32> undef, <8 x i32> zeroinitializer - %84 = or <8 x i32> %83, - %85 = icmp sgt <8 x i32> %broadcast.splat13, %79 - %86 = icmp sgt <8 x i32> %broadcast.splat15, %84 - %87 = extractelement <8 x i64> %78, i32 0 - %88 = shl i64 %87, 32 - %89 = ashr exact i64 %88, 32 - %90 = getelementptr inbounds float, float* %1, i64 %89 - %91 = bitcast float* %90 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %91, i32 4, <8 x i1> %85), !tbaa !12, !llvm.access.group !16 - %92 = getelementptr inbounds float, float* %90, i64 8 - %93 = bitcast float* %92 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %93, i32 4, <8 x i1> %86), !tbaa !12, !llvm.access.group !16 - %94 = or <8 x i64> %broadcast.splat, - %95 = trunc <8 x i64> %94 to <8 x i32> - %96 = trunc i64 %mul.i.i to i32 - %97 = or i32 %96, 8 - %98 = insertelement <8 x i32> undef, i32 %97, i64 0 - %99 = shufflevector <8 x i32> %98, <8 x i32> undef, <8 x i32> zeroinitializer - %100 = or <8 x i32> %99, - %101 = icmp sgt <8 x i32> %broadcast.splat13, %95 - %102 = icmp sgt <8 x i32> %broadcast.splat15, %100 - %103 = extractelement <8 x i64> %94, i32 0 - %104 = shl i64 %103, 32 - %105 = ashr exact i64 %104, 32 - %106 = getelementptr inbounds float, float* %1, i64 %105 - %107 = bitcast float* %106 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %107, i32 4, <8 x i1> %101), !tbaa !12, !llvm.access.group !16 - %108 = getelementptr inbounds float, float* %106, i64 8 - %109 = bitcast float* %108 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %109, i32 4, <8 x i1> %102), !tbaa !12, !llvm.access.group !16 - %110 = or <8 x i64> %broadcast.splat, - %111 = trunc <8 x i64> %110 to <8 x i32> - %112 = trunc i64 %mul.i.i to i32 - %113 = or i32 %112, 8 - %114 = insertelement <8 x i32> undef, i32 %113, i64 0 - %115 = shufflevector <8 x i32> %114, <8 x i32> undef, <8 x i32> zeroinitializer - %116 = or <8 x i32> %115, - %117 = icmp sgt <8 x i32> %broadcast.splat13, %111 - %118 = icmp sgt <8 x i32> %broadcast.splat15, %116 - %119 = extractelement <8 x i64> %110, i32 0 - %120 = shl i64 %119, 32 - %121 = ashr exact i64 %120, 32 - %122 = getelementptr inbounds float, float* %1, i64 %121 - %123 = bitcast float* %122 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %123, i32 4, <8 x i1> %117), !tbaa !12, !llvm.access.group !16 - %124 = getelementptr inbounds float, float* %122, i64 8 - %125 = bitcast float* %124 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %125, i32 4, <8 x i1> %118), !tbaa !12, !llvm.access.group !16 - %126 = or <8 x i64> %broadcast.splat, - %127 = trunc <8 x i64> %126 to <8 x i32> - %128 = trunc i64 %mul.i.i to i32 - %129 = or i32 %128, 8 - %130 = insertelement <8 x i32> undef, i32 %129, i64 0 - %131 = shufflevector <8 x i32> %130, <8 x i32> undef, <8 x i32> zeroinitializer - %132 = or <8 x i32> %131, - %133 = icmp sgt <8 x i32> %broadcast.splat13, %127 - %134 = icmp sgt <8 x i32> %broadcast.splat15, %132 - %135 = extractelement <8 x i64> %126, i32 0 - %136 = shl i64 %135, 32 - %137 = ashr exact i64 %136, 32 - %138 = getelementptr inbounds float, float* %1, i64 %137 - %139 = bitcast float* %138 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %139, i32 4, <8 x i1> %133), !tbaa !12, !llvm.access.group !16 - %140 = getelementptr inbounds float, float* %138, i64 8 - %141 = bitcast float* %140 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %141, i32 4, <8 x i1> %134), !tbaa !12, !llvm.access.group !16 - %142 = or <8 x i64> %broadcast.splat, - %143 = trunc <8 x i64> %142 to <8 x i32> - %144 = trunc i64 %mul.i.i to i32 - %145 = or i32 %144, 8 - %146 = insertelement <8 x i32> undef, i32 %145, i64 0 - %147 = shufflevector <8 x i32> %146, <8 x i32> undef, <8 x i32> zeroinitializer - %148 = or <8 x i32> %147, - %149 = icmp sgt <8 x i32> %broadcast.splat13, %143 - %150 = icmp sgt <8 x i32> %broadcast.splat15, %148 - %151 = extractelement <8 x i64> %142, i32 0 - %152 = shl i64 %151, 32 - %153 = ashr exact i64 %152, 32 - %154 = getelementptr inbounds float, float* %1, i64 %153 - %155 = bitcast float* %154 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %155, i32 4, <8 x i1> %149), !tbaa !12, !llvm.access.group !16 - %156 = getelementptr inbounds float, float* %154, i64 8 - %157 = bitcast float* %156 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %157, i32 4, <8 x i1> %150), !tbaa !12, !llvm.access.group !16 - %158 = or <8 x i64> %broadcast.splat, - %159 = trunc <8 x i64> %158 to <8 x i32> - %160 = trunc i64 %mul.i.i to i32 - %161 = or i32 %160, 8 - %162 = insertelement <8 x i32> undef, i32 %161, i64 0 - %163 = shufflevector <8 x i32> %162, <8 x i32> undef, <8 x i32> zeroinitializer - %164 = or <8 x i32> %163, - %165 = icmp sgt <8 x i32> %broadcast.splat13, %159 - %166 = icmp sgt <8 x i32> %broadcast.splat15, %164 - %167 = extractelement <8 x i64> %158, i32 0 - %168 = shl i64 %167, 32 - %169 = ashr exact i64 %168, 32 - %170 = getelementptr inbounds float, float* %1, i64 %169 - %171 = bitcast float* %170 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %171, i32 4, <8 x i1> %165), !tbaa !12, !llvm.access.group !16 - %172 = getelementptr inbounds float, float* %170, i64 8 - %173 = bitcast float* %172 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %173, i32 4, <8 x i1> %166), !tbaa !12, !llvm.access.group !16 - %174 = or <8 x i64> %broadcast.splat, - %175 = trunc <8 x i64> %174 to <8 x i32> - %176 = trunc i64 %mul.i.i to i32 - %177 = or i32 %176, 8 - %178 = insertelement <8 x i32> undef, i32 %177, i64 0 - %179 = shufflevector <8 x i32> %178, <8 x i32> undef, <8 x i32> zeroinitializer - %180 = or <8 x i32> %179, - %181 = icmp sgt <8 x i32> %broadcast.splat13, %175 - %182 = icmp sgt <8 x i32> %broadcast.splat15, %180 - %183 = extractelement <8 x i64> %174, i32 0 - %184 = shl i64 %183, 32 - %185 = ashr exact i64 %184, 32 - %186 = getelementptr inbounds float, float* %1, i64 %185 - %187 = bitcast float* %186 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %187, i32 4, <8 x i1> %181), !tbaa !12, !llvm.access.group !16 - %188 = getelementptr inbounds float, float* %186, i64 8 - %189 = bitcast float* %188 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %189, i32 4, <8 x i1> %182), !tbaa !12, !llvm.access.group !16 - %190 = or <8 x i64> %broadcast.splat, - %191 = trunc <8 x i64> %190 to <8 x i32> - %192 = trunc i64 %mul.i.i to i32 - %193 = or i32 %192, 8 - %194 = insertelement <8 x i32> undef, i32 %193, i64 0 - %195 = shufflevector <8 x i32> %194, <8 x i32> undef, <8 x i32> zeroinitializer - %196 = or <8 x i32> %195, - %197 = icmp sgt <8 x i32> %broadcast.splat13, %191 - %198 = icmp sgt <8 x i32> %broadcast.splat15, %196 - %199 = extractelement <8 x i64> %190, i32 0 - %200 = shl i64 %199, 32 - %201 = ashr exact i64 %200, 32 - %202 = getelementptr inbounds float, float* %1, i64 %201 - %203 = bitcast float* %202 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %203, i32 4, <8 x i1> %197), !tbaa !12, !llvm.access.group !16 - %204 = getelementptr inbounds float, float* %202, i64 8 - %205 = bitcast float* %204 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %205, i32 4, <8 x i1> %198), !tbaa !12, !llvm.access.group !16 - %206 = or <8 x i64> %broadcast.splat, - %207 = trunc <8 x i64> %206 to <8 x i32> - %208 = trunc i64 %mul.i.i to i32 - %209 = or i32 %208, 8 - %210 = insertelement <8 x i32> undef, i32 %209, i64 0 - %211 = shufflevector <8 x i32> %210, <8 x i32> undef, <8 x i32> zeroinitializer - %212 = or <8 x i32> %211, - %213 = icmp sgt <8 x i32> %broadcast.splat13, %207 - %214 = icmp sgt <8 x i32> %broadcast.splat15, %212 - %215 = extractelement <8 x i64> %206, i32 0 - %216 = shl i64 %215, 32 - %217 = ashr exact i64 %216, 32 - %218 = getelementptr inbounds float, float* %1, i64 %217 - %219 = bitcast float* %218 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %219, i32 4, <8 x i1> %213), !tbaa !12, !llvm.access.group !16 - %220 = getelementptr inbounds float, float* %218, i64 8 - %221 = bitcast float* %220 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %221, i32 4, <8 x i1> %214), !tbaa !12, !llvm.access.group !16 - %222 = or <8 x i64> %broadcast.splat, - %223 = trunc <8 x i64> %222 to <8 x i32> - %224 = trunc i64 %mul.i.i to i32 - %225 = or i32 %224, 8 - %226 = insertelement <8 x i32> undef, i32 %225, i64 0 - %227 = shufflevector <8 x i32> %226, <8 x i32> undef, <8 x i32> zeroinitializer - %228 = or <8 x i32> %227, - %229 = icmp sgt <8 x i32> %broadcast.splat13, %223 - %230 = icmp sgt <8 x i32> %broadcast.splat15, %228 - %231 = extractelement <8 x i64> %222, i32 0 - %232 = shl i64 %231, 32 - %233 = ashr exact i64 %232, 32 - %234 = getelementptr inbounds float, float* %1, i64 %233 - %235 = bitcast float* %234 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %235, i32 4, <8 x i1> %229), !tbaa !12, !llvm.access.group !16 - %236 = getelementptr inbounds float, float* %234, i64 8 - %237 = bitcast float* %236 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %237, i32 4, <8 x i1> %230), !tbaa !12, !llvm.access.group !16 - %238 = or <8 x i64> %broadcast.splat, - %239 = trunc <8 x i64> %238 to <8 x i32> - %240 = trunc i64 %mul.i.i to i32 - %241 = or i32 %240, 8 - %242 = insertelement <8 x i32> undef, i32 %241, i64 0 - %243 = shufflevector <8 x i32> %242, <8 x i32> undef, <8 x i32> zeroinitializer - %244 = or <8 x i32> %243, - %245 = icmp sgt <8 x i32> %broadcast.splat13, %239 - %246 = icmp sgt <8 x i32> %broadcast.splat15, %244 - %247 = extractelement <8 x i64> %238, i32 0 - %248 = shl i64 %247, 32 - %249 = ashr exact i64 %248, 32 - %250 = getelementptr inbounds float, float* %1, i64 %249 - %251 = bitcast float* %250 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %251, i32 4, <8 x i1> %245), !tbaa !12, !llvm.access.group !16 - %252 = getelementptr inbounds float, float* %250, i64 8 - %253 = bitcast float* %252 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %253, i32 4, <8 x i1> %246), !tbaa !12, !llvm.access.group !16 - %254 = or <8 x i64> %broadcast.splat, - %255 = trunc <8 x i64> %254 to <8 x i32> - %256 = trunc i64 %mul.i.i to i32 - %257 = or i32 %256, 8 - %258 = insertelement <8 x i32> undef, i32 %257, i64 0 - %259 = shufflevector <8 x i32> %258, <8 x i32> undef, <8 x i32> zeroinitializer - %260 = or <8 x i32> %259, - %261 = icmp sgt <8 x i32> %broadcast.splat13, %255 - %262 = icmp sgt <8 x i32> %broadcast.splat15, %260 - %263 = extractelement <8 x i64> %254, i32 0 - %264 = shl i64 %263, 32 - %265 = ashr exact i64 %264, 32 - %266 = getelementptr inbounds float, float* %1, i64 %265 - %267 = bitcast float* %266 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %267, i32 4, <8 x i1> %261), !tbaa !12, !llvm.access.group !16 - %268 = getelementptr inbounds float, float* %266, i64 8 - %269 = bitcast float* %268 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %269, i32 4, <8 x i1> %262), !tbaa !12, !llvm.access.group !16 - br label %std_kernel.exit - -pregion_for_entry.entry.i.us: ; preds = %if.end32.r_exit.i.us, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ %277, %if.end32.r_exit.i.us ], [ 0, %pregion_for_entry.entry.i.us.preheader ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp.i.us = icmp slt i32 %conv.i.us, %5 - br i1 %cmp.i.us, label %if.then.i.us, label %if.end32.r_exit.i.us - -if.then.i.us: ; preds = %pregion_for_entry.entry.i.us - %sext.i.us = shl i64 %add1.i.i.us, 32 - %idxprom.i.us = ashr exact i64 %sext.i.us, 32 - %arrayidx.i.us = getelementptr inbounds float, float* %1, i64 %idxprom.i.us - store float 0.000000e+00, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %arrayidx7.i.us = getelementptr inbounds float, float* %0, i64 %idxprom.i.us - br label %for.body.i.us - -for.body.i.us: ; preds = %for.body.i.us, %if.then.i.us - %indvars.iv.next.i4.us = phi i64 [ %indvars.iv.next.i.us, %for.body.i.us ], [ 0, %if.then.i.us ] - %270 = phi float [ %275, %for.body.i.us ], [ 0.000000e+00, %if.then.i.us ] - %271 = mul nsw i64 %indvars.iv.next.i4.us, %12 - %272 = add nsw i64 %271, %idxprom.i.us - %arrayidx5.i.us = getelementptr inbounds float, float* %2, i64 %272 - %273 = load float, float* %arrayidx5.i.us, align 4, !tbaa !12 - %274 = load float, float* %arrayidx7.i.us, align 4, !tbaa !12 - %sub.i.us = fsub float %273, %274 - %275 = tail call float @llvm.fmuladd.f32(float %sub.i.us, float %sub.i.us, float %270) #2 - store float %275, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us = add nuw nsw i64 %indvars.iv.next.i4.us, 1 - %exitcond.not.i.us = icmp eq i64 %indvars.iv.next.i.us, %wide.trip.count.i - br i1 %exitcond.not.i.us, label %for.end.loopexit.i.us, label %for.body.i.us, !llvm.loop !18 - -for.end.loopexit.i.us: ; preds = %for.body.i.us - %.lcssa = phi float [ %275, %for.body.i.us ] - %div.i.us = fdiv float %.lcssa, %3, !fpmath !20 - %276 = tail call float @llvm.sqrt.f32(float %div.i.us) #2 - %cmp27.i.us = fcmp ugt float %276, %4 - %storemerge.i.us = select i1 %cmp27.i.us, float %276, float 1.000000e+00 - store float %storemerge.i.us, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end32.r_exit.i.us - -if.end32.r_exit.i.us: ; preds = %for.end.loopexit.i.us, %pregion_for_entry.entry.i.us - %277 = add nuw nsw i64 %_local_id_x.0.us, 1 - %exitcond.not = icmp eq i64 %277, 256 - br i1 %exitcond.not, label %std_kernel.exit.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !21 - -std_kernel.exit.loopexit: ; preds = %if.end32.r_exit.i.us - br label %std_kernel.exit - -std_kernel.exit: ; preds = %std_kernel.exit.loopexit, %pregion_for_entry.entry.i.preheader - ret void -} - -; Function Attrs: nounwind -define void @_pocl_kernel_std_kernel_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to float*** - %15 = load float**, float*** %14, align 8 - %16 = load float*, float** %15, align 8 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to float** - %19 = load float*, float** %18, align 8 - %20 = load float, float* %19, align 4 - %21 = getelementptr i8*, i8** %0, i64 4 - %22 = bitcast i8** %21 to float** - %23 = load float*, float** %22, align 8 - %24 = load float, float* %23, align 4 - %25 = getelementptr i8*, i8** %0, i64 5 - %26 = bitcast i8** %25 to i32** - %27 = load i32*, i32** %26, align 8 - %28 = load i32, i32* %27, align 4 - %29 = getelementptr i8*, i8** %0, i64 6 - %30 = bitcast i8** %29 to i32** - %31 = load i32*, i32** %30, align 8 - %32 = load i32, i32* %31, align 4 - %mul.i.i.i = shl i64 %2, 8 - %cmp259.i.i = icmp sgt i32 %32, 0 - %33 = sext i32 %28 to i64 - %wide.trip.count.i.i = zext i32 %32 to i64 - br i1 %cmp259.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %pregion_for_entry.entry.i.i.preheader - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -pregion_for_entry.entry.i.i.preheader: ; preds = %5 - %div.i.i = fdiv float 0.000000e+00, %20 - %34 = tail call float @llvm.sqrt.f32(float %div.i.i) #2 - %cmp27.i.i = fcmp ugt float %34, %24 - %storemerge.i.i = select i1 %cmp27.i.i, float %34, float 1.000000e+00 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert12 = insertelement <8 x i32> undef, i32 %28, i32 0 - %broadcast.splat13 = shufflevector <8 x i32> %broadcast.splatinsert12, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert14 = insertelement <8 x i32> undef, i32 %28, i32 0 - %broadcast.splat15 = shufflevector <8 x i32> %broadcast.splatinsert14, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert16 = insertelement <8 x float> undef, float %storemerge.i.i, i32 0 - %broadcast.splat17 = shufflevector <8 x float> %broadcast.splatinsert16, <8 x float> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert18 = insertelement <8 x float> undef, float %storemerge.i.i, i32 0 - %broadcast.splat19 = shufflevector <8 x float> %broadcast.splatinsert18, <8 x float> undef, <8 x i32> zeroinitializer - %35 = or <8 x i64> %broadcast.splat, - %36 = trunc <8 x i64> %35 to <8 x i32> - %37 = trunc i64 %mul.i.i.i to i32 - %38 = or i32 %37, 8 - %39 = insertelement <8 x i32> undef, i32 %38, i64 0 - %40 = shufflevector <8 x i32> %39, <8 x i32> undef, <8 x i32> zeroinitializer - %41 = or <8 x i32> %40, - %42 = icmp sgt <8 x i32> %broadcast.splat13, %36 - %43 = icmp sgt <8 x i32> %broadcast.splat15, %41 - %44 = extractelement <8 x i64> %35, i32 0 - %45 = shl i64 %44, 32 - %46 = ashr exact i64 %45, 32 - %47 = getelementptr inbounds float, float* %12, i64 %46 - %48 = bitcast float* %47 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %48, i32 4, <8 x i1> %42), !tbaa !12, !llvm.access.group !16 - %49 = getelementptr inbounds float, float* %47, i64 8 - %50 = bitcast float* %49 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %50, i32 4, <8 x i1> %43), !tbaa !12, !llvm.access.group !16 - %51 = or <8 x i64> %broadcast.splat, - %52 = trunc <8 x i64> %51 to <8 x i32> - %53 = trunc i64 %mul.i.i.i to i32 - %54 = or i32 %53, 8 - %55 = insertelement <8 x i32> undef, i32 %54, i64 0 - %56 = shufflevector <8 x i32> %55, <8 x i32> undef, <8 x i32> zeroinitializer - %57 = or <8 x i32> %56, - %58 = icmp sgt <8 x i32> %broadcast.splat13, %52 - %59 = icmp sgt <8 x i32> %broadcast.splat15, %57 - %60 = extractelement <8 x i64> %51, i32 0 - %61 = shl i64 %60, 32 - %62 = ashr exact i64 %61, 32 - %63 = getelementptr inbounds float, float* %12, i64 %62 - %64 = bitcast float* %63 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %64, i32 4, <8 x i1> %58), !tbaa !12, !llvm.access.group !16 - %65 = getelementptr inbounds float, float* %63, i64 8 - %66 = bitcast float* %65 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %66, i32 4, <8 x i1> %59), !tbaa !12, !llvm.access.group !16 - %67 = or <8 x i64> %broadcast.splat, - %68 = trunc <8 x i64> %67 to <8 x i32> - %69 = trunc i64 %mul.i.i.i to i32 - %70 = or i32 %69, 8 - %71 = insertelement <8 x i32> undef, i32 %70, i64 0 - %72 = shufflevector <8 x i32> %71, <8 x i32> undef, <8 x i32> zeroinitializer - %73 = or <8 x i32> %72, - %74 = icmp sgt <8 x i32> %broadcast.splat13, %68 - %75 = icmp sgt <8 x i32> %broadcast.splat15, %73 - %76 = extractelement <8 x i64> %67, i32 0 - %77 = shl i64 %76, 32 - %78 = ashr exact i64 %77, 32 - %79 = getelementptr inbounds float, float* %12, i64 %78 - %80 = bitcast float* %79 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %80, i32 4, <8 x i1> %74), !tbaa !12, !llvm.access.group !16 - %81 = getelementptr inbounds float, float* %79, i64 8 - %82 = bitcast float* %81 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %82, i32 4, <8 x i1> %75), !tbaa !12, !llvm.access.group !16 - %83 = or <8 x i64> %broadcast.splat, - %84 = trunc <8 x i64> %83 to <8 x i32> - %85 = trunc i64 %mul.i.i.i to i32 - %86 = or i32 %85, 8 - %87 = insertelement <8 x i32> undef, i32 %86, i64 0 - %88 = shufflevector <8 x i32> %87, <8 x i32> undef, <8 x i32> zeroinitializer - %89 = or <8 x i32> %88, - %90 = icmp sgt <8 x i32> %broadcast.splat13, %84 - %91 = icmp sgt <8 x i32> %broadcast.splat15, %89 - %92 = extractelement <8 x i64> %83, i32 0 - %93 = shl i64 %92, 32 - %94 = ashr exact i64 %93, 32 - %95 = getelementptr inbounds float, float* %12, i64 %94 - %96 = bitcast float* %95 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %96, i32 4, <8 x i1> %90), !tbaa !12, !llvm.access.group !16 - %97 = getelementptr inbounds float, float* %95, i64 8 - %98 = bitcast float* %97 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %98, i32 4, <8 x i1> %91), !tbaa !12, !llvm.access.group !16 - %99 = or <8 x i64> %broadcast.splat, - %100 = trunc <8 x i64> %99 to <8 x i32> - %101 = trunc i64 %mul.i.i.i to i32 - %102 = or i32 %101, 8 - %103 = insertelement <8 x i32> undef, i32 %102, i64 0 - %104 = shufflevector <8 x i32> %103, <8 x i32> undef, <8 x i32> zeroinitializer - %105 = or <8 x i32> %104, - %106 = icmp sgt <8 x i32> %broadcast.splat13, %100 - %107 = icmp sgt <8 x i32> %broadcast.splat15, %105 - %108 = extractelement <8 x i64> %99, i32 0 - %109 = shl i64 %108, 32 - %110 = ashr exact i64 %109, 32 - %111 = getelementptr inbounds float, float* %12, i64 %110 - %112 = bitcast float* %111 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %112, i32 4, <8 x i1> %106), !tbaa !12, !llvm.access.group !16 - %113 = getelementptr inbounds float, float* %111, i64 8 - %114 = bitcast float* %113 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %114, i32 4, <8 x i1> %107), !tbaa !12, !llvm.access.group !16 - %115 = or <8 x i64> %broadcast.splat, - %116 = trunc <8 x i64> %115 to <8 x i32> - %117 = trunc i64 %mul.i.i.i to i32 - %118 = or i32 %117, 8 - %119 = insertelement <8 x i32> undef, i32 %118, i64 0 - %120 = shufflevector <8 x i32> %119, <8 x i32> undef, <8 x i32> zeroinitializer - %121 = or <8 x i32> %120, - %122 = icmp sgt <8 x i32> %broadcast.splat13, %116 - %123 = icmp sgt <8 x i32> %broadcast.splat15, %121 - %124 = extractelement <8 x i64> %115, i32 0 - %125 = shl i64 %124, 32 - %126 = ashr exact i64 %125, 32 - %127 = getelementptr inbounds float, float* %12, i64 %126 - %128 = bitcast float* %127 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %128, i32 4, <8 x i1> %122), !tbaa !12, !llvm.access.group !16 - %129 = getelementptr inbounds float, float* %127, i64 8 - %130 = bitcast float* %129 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %130, i32 4, <8 x i1> %123), !tbaa !12, !llvm.access.group !16 - %131 = or <8 x i64> %broadcast.splat, - %132 = trunc <8 x i64> %131 to <8 x i32> - %133 = trunc i64 %mul.i.i.i to i32 - %134 = or i32 %133, 8 - %135 = insertelement <8 x i32> undef, i32 %134, i64 0 - %136 = shufflevector <8 x i32> %135, <8 x i32> undef, <8 x i32> zeroinitializer - %137 = or <8 x i32> %136, - %138 = icmp sgt <8 x i32> %broadcast.splat13, %132 - %139 = icmp sgt <8 x i32> %broadcast.splat15, %137 - %140 = extractelement <8 x i64> %131, i32 0 - %141 = shl i64 %140, 32 - %142 = ashr exact i64 %141, 32 - %143 = getelementptr inbounds float, float* %12, i64 %142 - %144 = bitcast float* %143 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %144, i32 4, <8 x i1> %138), !tbaa !12, !llvm.access.group !16 - %145 = getelementptr inbounds float, float* %143, i64 8 - %146 = bitcast float* %145 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %146, i32 4, <8 x i1> %139), !tbaa !12, !llvm.access.group !16 - %147 = or <8 x i64> %broadcast.splat, - %148 = trunc <8 x i64> %147 to <8 x i32> - %149 = trunc i64 %mul.i.i.i to i32 - %150 = or i32 %149, 8 - %151 = insertelement <8 x i32> undef, i32 %150, i64 0 - %152 = shufflevector <8 x i32> %151, <8 x i32> undef, <8 x i32> zeroinitializer - %153 = or <8 x i32> %152, - %154 = icmp sgt <8 x i32> %broadcast.splat13, %148 - %155 = icmp sgt <8 x i32> %broadcast.splat15, %153 - %156 = extractelement <8 x i64> %147, i32 0 - %157 = shl i64 %156, 32 - %158 = ashr exact i64 %157, 32 - %159 = getelementptr inbounds float, float* %12, i64 %158 - %160 = bitcast float* %159 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %160, i32 4, <8 x i1> %154), !tbaa !12, !llvm.access.group !16 - %161 = getelementptr inbounds float, float* %159, i64 8 - %162 = bitcast float* %161 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %162, i32 4, <8 x i1> %155), !tbaa !12, !llvm.access.group !16 - %163 = or <8 x i64> %broadcast.splat, - %164 = trunc <8 x i64> %163 to <8 x i32> - %165 = trunc i64 %mul.i.i.i to i32 - %166 = or i32 %165, 8 - %167 = insertelement <8 x i32> undef, i32 %166, i64 0 - %168 = shufflevector <8 x i32> %167, <8 x i32> undef, <8 x i32> zeroinitializer - %169 = or <8 x i32> %168, - %170 = icmp sgt <8 x i32> %broadcast.splat13, %164 - %171 = icmp sgt <8 x i32> %broadcast.splat15, %169 - %172 = extractelement <8 x i64> %163, i32 0 - %173 = shl i64 %172, 32 - %174 = ashr exact i64 %173, 32 - %175 = getelementptr inbounds float, float* %12, i64 %174 - %176 = bitcast float* %175 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %176, i32 4, <8 x i1> %170), !tbaa !12, !llvm.access.group !16 - %177 = getelementptr inbounds float, float* %175, i64 8 - %178 = bitcast float* %177 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %178, i32 4, <8 x i1> %171), !tbaa !12, !llvm.access.group !16 - %179 = or <8 x i64> %broadcast.splat, - %180 = trunc <8 x i64> %179 to <8 x i32> - %181 = trunc i64 %mul.i.i.i to i32 - %182 = or i32 %181, 8 - %183 = insertelement <8 x i32> undef, i32 %182, i64 0 - %184 = shufflevector <8 x i32> %183, <8 x i32> undef, <8 x i32> zeroinitializer - %185 = or <8 x i32> %184, - %186 = icmp sgt <8 x i32> %broadcast.splat13, %180 - %187 = icmp sgt <8 x i32> %broadcast.splat15, %185 - %188 = extractelement <8 x i64> %179, i32 0 - %189 = shl i64 %188, 32 - %190 = ashr exact i64 %189, 32 - %191 = getelementptr inbounds float, float* %12, i64 %190 - %192 = bitcast float* %191 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %192, i32 4, <8 x i1> %186), !tbaa !12, !llvm.access.group !16 - %193 = getelementptr inbounds float, float* %191, i64 8 - %194 = bitcast float* %193 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %194, i32 4, <8 x i1> %187), !tbaa !12, !llvm.access.group !16 - %195 = or <8 x i64> %broadcast.splat, - %196 = trunc <8 x i64> %195 to <8 x i32> - %197 = trunc i64 %mul.i.i.i to i32 - %198 = or i32 %197, 8 - %199 = insertelement <8 x i32> undef, i32 %198, i64 0 - %200 = shufflevector <8 x i32> %199, <8 x i32> undef, <8 x i32> zeroinitializer - %201 = or <8 x i32> %200, - %202 = icmp sgt <8 x i32> %broadcast.splat13, %196 - %203 = icmp sgt <8 x i32> %broadcast.splat15, %201 - %204 = extractelement <8 x i64> %195, i32 0 - %205 = shl i64 %204, 32 - %206 = ashr exact i64 %205, 32 - %207 = getelementptr inbounds float, float* %12, i64 %206 - %208 = bitcast float* %207 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %208, i32 4, <8 x i1> %202), !tbaa !12, !llvm.access.group !16 - %209 = getelementptr inbounds float, float* %207, i64 8 - %210 = bitcast float* %209 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %210, i32 4, <8 x i1> %203), !tbaa !12, !llvm.access.group !16 - %211 = or <8 x i64> %broadcast.splat, - %212 = trunc <8 x i64> %211 to <8 x i32> - %213 = trunc i64 %mul.i.i.i to i32 - %214 = or i32 %213, 8 - %215 = insertelement <8 x i32> undef, i32 %214, i64 0 - %216 = shufflevector <8 x i32> %215, <8 x i32> undef, <8 x i32> zeroinitializer - %217 = or <8 x i32> %216, - %218 = icmp sgt <8 x i32> %broadcast.splat13, %212 - %219 = icmp sgt <8 x i32> %broadcast.splat15, %217 - %220 = extractelement <8 x i64> %211, i32 0 - %221 = shl i64 %220, 32 - %222 = ashr exact i64 %221, 32 - %223 = getelementptr inbounds float, float* %12, i64 %222 - %224 = bitcast float* %223 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %224, i32 4, <8 x i1> %218), !tbaa !12, !llvm.access.group !16 - %225 = getelementptr inbounds float, float* %223, i64 8 - %226 = bitcast float* %225 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %226, i32 4, <8 x i1> %219), !tbaa !12, !llvm.access.group !16 - %227 = or <8 x i64> %broadcast.splat, - %228 = trunc <8 x i64> %227 to <8 x i32> - %229 = trunc i64 %mul.i.i.i to i32 - %230 = or i32 %229, 8 - %231 = insertelement <8 x i32> undef, i32 %230, i64 0 - %232 = shufflevector <8 x i32> %231, <8 x i32> undef, <8 x i32> zeroinitializer - %233 = or <8 x i32> %232, - %234 = icmp sgt <8 x i32> %broadcast.splat13, %228 - %235 = icmp sgt <8 x i32> %broadcast.splat15, %233 - %236 = extractelement <8 x i64> %227, i32 0 - %237 = shl i64 %236, 32 - %238 = ashr exact i64 %237, 32 - %239 = getelementptr inbounds float, float* %12, i64 %238 - %240 = bitcast float* %239 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %240, i32 4, <8 x i1> %234), !tbaa !12, !llvm.access.group !16 - %241 = getelementptr inbounds float, float* %239, i64 8 - %242 = bitcast float* %241 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %242, i32 4, <8 x i1> %235), !tbaa !12, !llvm.access.group !16 - %243 = or <8 x i64> %broadcast.splat, - %244 = trunc <8 x i64> %243 to <8 x i32> - %245 = trunc i64 %mul.i.i.i to i32 - %246 = or i32 %245, 8 - %247 = insertelement <8 x i32> undef, i32 %246, i64 0 - %248 = shufflevector <8 x i32> %247, <8 x i32> undef, <8 x i32> zeroinitializer - %249 = or <8 x i32> %248, - %250 = icmp sgt <8 x i32> %broadcast.splat13, %244 - %251 = icmp sgt <8 x i32> %broadcast.splat15, %249 - %252 = extractelement <8 x i64> %243, i32 0 - %253 = shl i64 %252, 32 - %254 = ashr exact i64 %253, 32 - %255 = getelementptr inbounds float, float* %12, i64 %254 - %256 = bitcast float* %255 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %256, i32 4, <8 x i1> %250), !tbaa !12, !llvm.access.group !16 - %257 = getelementptr inbounds float, float* %255, i64 8 - %258 = bitcast float* %257 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %258, i32 4, <8 x i1> %251), !tbaa !12, !llvm.access.group !16 - %259 = or <8 x i64> %broadcast.splat, - %260 = trunc <8 x i64> %259 to <8 x i32> - %261 = trunc i64 %mul.i.i.i to i32 - %262 = or i32 %261, 8 - %263 = insertelement <8 x i32> undef, i32 %262, i64 0 - %264 = shufflevector <8 x i32> %263, <8 x i32> undef, <8 x i32> zeroinitializer - %265 = or <8 x i32> %264, - %266 = icmp sgt <8 x i32> %broadcast.splat13, %260 - %267 = icmp sgt <8 x i32> %broadcast.splat15, %265 - %268 = extractelement <8 x i64> %259, i32 0 - %269 = shl i64 %268, 32 - %270 = ashr exact i64 %269, 32 - %271 = getelementptr inbounds float, float* %12, i64 %270 - %272 = bitcast float* %271 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %272, i32 4, <8 x i1> %266), !tbaa !12, !llvm.access.group !16 - %273 = getelementptr inbounds float, float* %271, i64 8 - %274 = bitcast float* %273 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %274, i32 4, <8 x i1> %267), !tbaa !12, !llvm.access.group !16 - %275 = or <8 x i64> %broadcast.splat, - %276 = trunc <8 x i64> %275 to <8 x i32> - %277 = trunc i64 %mul.i.i.i to i32 - %278 = or i32 %277, 8 - %279 = insertelement <8 x i32> undef, i32 %278, i64 0 - %280 = shufflevector <8 x i32> %279, <8 x i32> undef, <8 x i32> zeroinitializer - %281 = or <8 x i32> %280, - %282 = icmp sgt <8 x i32> %broadcast.splat13, %276 - %283 = icmp sgt <8 x i32> %broadcast.splat15, %281 - %284 = extractelement <8 x i64> %275, i32 0 - %285 = shl i64 %284, 32 - %286 = ashr exact i64 %285, 32 - %287 = getelementptr inbounds float, float* %12, i64 %286 - %288 = bitcast float* %287 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %288, i32 4, <8 x i1> %282), !tbaa !12, !llvm.access.group !16 - %289 = getelementptr inbounds float, float* %287, i64 8 - %290 = bitcast float* %289 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %290, i32 4, <8 x i1> %283), !tbaa !12, !llvm.access.group !16 - br label %_pocl_kernel_std_kernel.exit - -pregion_for_entry.entry.i.i.us: ; preds = %if.end32.r_exit.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %298, %if.end32.r_exit.i.i.us ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %28, %conv.i.i.us - br i1 %cmp.i.i.us, label %if.then.i.i.us, label %if.end32.r_exit.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom.i.i.us = ashr exact i64 %sext.i.i.us, 32 - %arrayidx.i.i.us = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us - store float 0.000000e+00, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %arrayidx7.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %if.then.i.i.us - %indvars.iv.next.i.i4.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %if.then.i.i.us ] - %291 = phi float [ %296, %for.body.i.i.us ], [ 0.000000e+00, %if.then.i.i.us ] - %292 = mul nsw i64 %indvars.iv.next.i.i4.us, %33 - %293 = add nsw i64 %292, %idxprom.i.i.us - %arrayidx5.i.i.us = getelementptr inbounds float, float* %16, i64 %293 - %294 = load float, float* %arrayidx5.i.i.us, align 4, !tbaa !12 - %295 = load float, float* %arrayidx7.i.i.us, align 4, !tbaa !12 - %sub.i.i.us = fsub float %294, %295 - %296 = tail call float @llvm.fmuladd.f32(float %sub.i.i.us, float %sub.i.i.us, float %291) #2 - store float %296, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i4.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us, label %for.end.loopexit.i.i.us, label %for.body.i.i.us, !llvm.loop !18 - -for.end.loopexit.i.i.us: ; preds = %for.body.i.i.us - %.lcssa = phi float [ %296, %for.body.i.i.us ] - %div.i.i.us = fdiv float %.lcssa, %20, !fpmath !20 - %297 = tail call float @llvm.sqrt.f32(float %div.i.i.us) #2 - %cmp27.i.i.us = fcmp ugt float %297, %24 - %storemerge.i.i.us = select i1 %cmp27.i.i.us, float %297, float 1.000000e+00 - store float %storemerge.i.i.us, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end32.r_exit.i.i.us - -if.end32.r_exit.i.i.us: ; preds = %for.end.loopexit.i.i.us, %pregion_for_entry.entry.i.i.us - %298 = add nuw nsw i64 %_local_id_x.i.0.us, 1 - %exitcond.not = icmp eq i64 %298, 256 - br i1 %exitcond.not, label %_pocl_kernel_std_kernel.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !21 - -_pocl_kernel_std_kernel.exit.loopexit: ; preds = %if.end32.r_exit.i.i.us - br label %_pocl_kernel_std_kernel.exit - -_pocl_kernel_std_kernel.exit: ; preds = %_pocl_kernel_std_kernel.exit.loopexit, %pregion_for_entry.entry.i.i.preheader - ret void -} - -; Function Attrs: nounwind -define void @_pocl_kernel_std_kernel_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to float** - %13 = load float*, float** %12, align 8 - %14 = getelementptr i8*, i8** %0, i64 3 - %15 = bitcast i8** %14 to float** - %16 = load float*, float** %15, align 8 - %17 = load float, float* %16, align 4 - %18 = getelementptr i8*, i8** %0, i64 4 - %19 = bitcast i8** %18 to float** - %20 = load float*, float** %19, align 8 - %21 = load float, float* %20, align 4 - %22 = getelementptr i8*, i8** %0, i64 5 - %23 = bitcast i8** %22 to i32** - %24 = load i32*, i32** %23, align 8 - %25 = load i32, i32* %24, align 4 - %26 = getelementptr i8*, i8** %0, i64 6 - %27 = bitcast i8** %26 to i32** - %28 = load i32*, i32** %27, align 8 - %29 = load i32, i32* %28, align 4 - %mul.i.i.i = shl i64 %2, 8 - %cmp259.i.i = icmp sgt i32 %29, 0 - %30 = sext i32 %25 to i64 - %wide.trip.count.i.i = zext i32 %29 to i64 - br i1 %cmp259.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %pregion_for_entry.entry.i.i.preheader - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -pregion_for_entry.entry.i.i.preheader: ; preds = %5 - %div.i.i = fdiv float 0.000000e+00, %17 - %31 = tail call float @llvm.sqrt.f32(float %div.i.i) #2 - %cmp27.i.i = fcmp ugt float %31, %21 - %storemerge.i.i = select i1 %cmp27.i.i, float %31, float 1.000000e+00 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert12 = insertelement <8 x i32> undef, i32 %25, i32 0 - %broadcast.splat13 = shufflevector <8 x i32> %broadcast.splatinsert12, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert14 = insertelement <8 x i32> undef, i32 %25, i32 0 - %broadcast.splat15 = shufflevector <8 x i32> %broadcast.splatinsert14, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert16 = insertelement <8 x float> undef, float %storemerge.i.i, i32 0 - %broadcast.splat17 = shufflevector <8 x float> %broadcast.splatinsert16, <8 x float> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert18 = insertelement <8 x float> undef, float %storemerge.i.i, i32 0 - %broadcast.splat19 = shufflevector <8 x float> %broadcast.splatinsert18, <8 x float> undef, <8 x i32> zeroinitializer - %32 = or <8 x i64> %broadcast.splat, - %33 = trunc <8 x i64> %32 to <8 x i32> - %34 = trunc i64 %mul.i.i.i to i32 - %35 = or i32 %34, 8 - %36 = insertelement <8 x i32> undef, i32 %35, i64 0 - %37 = shufflevector <8 x i32> %36, <8 x i32> undef, <8 x i32> zeroinitializer - %38 = or <8 x i32> %37, - %39 = icmp sgt <8 x i32> %broadcast.splat13, %33 - %40 = icmp sgt <8 x i32> %broadcast.splat15, %38 - %41 = extractelement <8 x i64> %32, i32 0 - %42 = shl i64 %41, 32 - %43 = ashr exact i64 %42, 32 - %44 = getelementptr inbounds float, float* %10, i64 %43 - %45 = bitcast float* %44 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %45, i32 4, <8 x i1> %39), !tbaa !12, !llvm.access.group !16 - %46 = getelementptr inbounds float, float* %44, i64 8 - %47 = bitcast float* %46 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %47, i32 4, <8 x i1> %40), !tbaa !12, !llvm.access.group !16 - %48 = or <8 x i64> %broadcast.splat, - %49 = trunc <8 x i64> %48 to <8 x i32> - %50 = trunc i64 %mul.i.i.i to i32 - %51 = or i32 %50, 8 - %52 = insertelement <8 x i32> undef, i32 %51, i64 0 - %53 = shufflevector <8 x i32> %52, <8 x i32> undef, <8 x i32> zeroinitializer - %54 = or <8 x i32> %53, - %55 = icmp sgt <8 x i32> %broadcast.splat13, %49 - %56 = icmp sgt <8 x i32> %broadcast.splat15, %54 - %57 = extractelement <8 x i64> %48, i32 0 - %58 = shl i64 %57, 32 - %59 = ashr exact i64 %58, 32 - %60 = getelementptr inbounds float, float* %10, i64 %59 - %61 = bitcast float* %60 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %61, i32 4, <8 x i1> %55), !tbaa !12, !llvm.access.group !16 - %62 = getelementptr inbounds float, float* %60, i64 8 - %63 = bitcast float* %62 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %63, i32 4, <8 x i1> %56), !tbaa !12, !llvm.access.group !16 - %64 = or <8 x i64> %broadcast.splat, - %65 = trunc <8 x i64> %64 to <8 x i32> - %66 = trunc i64 %mul.i.i.i to i32 - %67 = or i32 %66, 8 - %68 = insertelement <8 x i32> undef, i32 %67, i64 0 - %69 = shufflevector <8 x i32> %68, <8 x i32> undef, <8 x i32> zeroinitializer - %70 = or <8 x i32> %69, - %71 = icmp sgt <8 x i32> %broadcast.splat13, %65 - %72 = icmp sgt <8 x i32> %broadcast.splat15, %70 - %73 = extractelement <8 x i64> %64, i32 0 - %74 = shl i64 %73, 32 - %75 = ashr exact i64 %74, 32 - %76 = getelementptr inbounds float, float* %10, i64 %75 - %77 = bitcast float* %76 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %77, i32 4, <8 x i1> %71), !tbaa !12, !llvm.access.group !16 - %78 = getelementptr inbounds float, float* %76, i64 8 - %79 = bitcast float* %78 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %79, i32 4, <8 x i1> %72), !tbaa !12, !llvm.access.group !16 - %80 = or <8 x i64> %broadcast.splat, - %81 = trunc <8 x i64> %80 to <8 x i32> - %82 = trunc i64 %mul.i.i.i to i32 - %83 = or i32 %82, 8 - %84 = insertelement <8 x i32> undef, i32 %83, i64 0 - %85 = shufflevector <8 x i32> %84, <8 x i32> undef, <8 x i32> zeroinitializer - %86 = or <8 x i32> %85, - %87 = icmp sgt <8 x i32> %broadcast.splat13, %81 - %88 = icmp sgt <8 x i32> %broadcast.splat15, %86 - %89 = extractelement <8 x i64> %80, i32 0 - %90 = shl i64 %89, 32 - %91 = ashr exact i64 %90, 32 - %92 = getelementptr inbounds float, float* %10, i64 %91 - %93 = bitcast float* %92 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %93, i32 4, <8 x i1> %87), !tbaa !12, !llvm.access.group !16 - %94 = getelementptr inbounds float, float* %92, i64 8 - %95 = bitcast float* %94 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %95, i32 4, <8 x i1> %88), !tbaa !12, !llvm.access.group !16 - %96 = or <8 x i64> %broadcast.splat, - %97 = trunc <8 x i64> %96 to <8 x i32> - %98 = trunc i64 %mul.i.i.i to i32 - %99 = or i32 %98, 8 - %100 = insertelement <8 x i32> undef, i32 %99, i64 0 - %101 = shufflevector <8 x i32> %100, <8 x i32> undef, <8 x i32> zeroinitializer - %102 = or <8 x i32> %101, - %103 = icmp sgt <8 x i32> %broadcast.splat13, %97 - %104 = icmp sgt <8 x i32> %broadcast.splat15, %102 - %105 = extractelement <8 x i64> %96, i32 0 - %106 = shl i64 %105, 32 - %107 = ashr exact i64 %106, 32 - %108 = getelementptr inbounds float, float* %10, i64 %107 - %109 = bitcast float* %108 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %109, i32 4, <8 x i1> %103), !tbaa !12, !llvm.access.group !16 - %110 = getelementptr inbounds float, float* %108, i64 8 - %111 = bitcast float* %110 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %111, i32 4, <8 x i1> %104), !tbaa !12, !llvm.access.group !16 - %112 = or <8 x i64> %broadcast.splat, - %113 = trunc <8 x i64> %112 to <8 x i32> - %114 = trunc i64 %mul.i.i.i to i32 - %115 = or i32 %114, 8 - %116 = insertelement <8 x i32> undef, i32 %115, i64 0 - %117 = shufflevector <8 x i32> %116, <8 x i32> undef, <8 x i32> zeroinitializer - %118 = or <8 x i32> %117, - %119 = icmp sgt <8 x i32> %broadcast.splat13, %113 - %120 = icmp sgt <8 x i32> %broadcast.splat15, %118 - %121 = extractelement <8 x i64> %112, i32 0 - %122 = shl i64 %121, 32 - %123 = ashr exact i64 %122, 32 - %124 = getelementptr inbounds float, float* %10, i64 %123 - %125 = bitcast float* %124 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %125, i32 4, <8 x i1> %119), !tbaa !12, !llvm.access.group !16 - %126 = getelementptr inbounds float, float* %124, i64 8 - %127 = bitcast float* %126 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %127, i32 4, <8 x i1> %120), !tbaa !12, !llvm.access.group !16 - %128 = or <8 x i64> %broadcast.splat, - %129 = trunc <8 x i64> %128 to <8 x i32> - %130 = trunc i64 %mul.i.i.i to i32 - %131 = or i32 %130, 8 - %132 = insertelement <8 x i32> undef, i32 %131, i64 0 - %133 = shufflevector <8 x i32> %132, <8 x i32> undef, <8 x i32> zeroinitializer - %134 = or <8 x i32> %133, - %135 = icmp sgt <8 x i32> %broadcast.splat13, %129 - %136 = icmp sgt <8 x i32> %broadcast.splat15, %134 - %137 = extractelement <8 x i64> %128, i32 0 - %138 = shl i64 %137, 32 - %139 = ashr exact i64 %138, 32 - %140 = getelementptr inbounds float, float* %10, i64 %139 - %141 = bitcast float* %140 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %141, i32 4, <8 x i1> %135), !tbaa !12, !llvm.access.group !16 - %142 = getelementptr inbounds float, float* %140, i64 8 - %143 = bitcast float* %142 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %143, i32 4, <8 x i1> %136), !tbaa !12, !llvm.access.group !16 - %144 = or <8 x i64> %broadcast.splat, - %145 = trunc <8 x i64> %144 to <8 x i32> - %146 = trunc i64 %mul.i.i.i to i32 - %147 = or i32 %146, 8 - %148 = insertelement <8 x i32> undef, i32 %147, i64 0 - %149 = shufflevector <8 x i32> %148, <8 x i32> undef, <8 x i32> zeroinitializer - %150 = or <8 x i32> %149, - %151 = icmp sgt <8 x i32> %broadcast.splat13, %145 - %152 = icmp sgt <8 x i32> %broadcast.splat15, %150 - %153 = extractelement <8 x i64> %144, i32 0 - %154 = shl i64 %153, 32 - %155 = ashr exact i64 %154, 32 - %156 = getelementptr inbounds float, float* %10, i64 %155 - %157 = bitcast float* %156 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %157, i32 4, <8 x i1> %151), !tbaa !12, !llvm.access.group !16 - %158 = getelementptr inbounds float, float* %156, i64 8 - %159 = bitcast float* %158 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %159, i32 4, <8 x i1> %152), !tbaa !12, !llvm.access.group !16 - %160 = or <8 x i64> %broadcast.splat, - %161 = trunc <8 x i64> %160 to <8 x i32> - %162 = trunc i64 %mul.i.i.i to i32 - %163 = or i32 %162, 8 - %164 = insertelement <8 x i32> undef, i32 %163, i64 0 - %165 = shufflevector <8 x i32> %164, <8 x i32> undef, <8 x i32> zeroinitializer - %166 = or <8 x i32> %165, - %167 = icmp sgt <8 x i32> %broadcast.splat13, %161 - %168 = icmp sgt <8 x i32> %broadcast.splat15, %166 - %169 = extractelement <8 x i64> %160, i32 0 - %170 = shl i64 %169, 32 - %171 = ashr exact i64 %170, 32 - %172 = getelementptr inbounds float, float* %10, i64 %171 - %173 = bitcast float* %172 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %173, i32 4, <8 x i1> %167), !tbaa !12, !llvm.access.group !16 - %174 = getelementptr inbounds float, float* %172, i64 8 - %175 = bitcast float* %174 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %175, i32 4, <8 x i1> %168), !tbaa !12, !llvm.access.group !16 - %176 = or <8 x i64> %broadcast.splat, - %177 = trunc <8 x i64> %176 to <8 x i32> - %178 = trunc i64 %mul.i.i.i to i32 - %179 = or i32 %178, 8 - %180 = insertelement <8 x i32> undef, i32 %179, i64 0 - %181 = shufflevector <8 x i32> %180, <8 x i32> undef, <8 x i32> zeroinitializer - %182 = or <8 x i32> %181, - %183 = icmp sgt <8 x i32> %broadcast.splat13, %177 - %184 = icmp sgt <8 x i32> %broadcast.splat15, %182 - %185 = extractelement <8 x i64> %176, i32 0 - %186 = shl i64 %185, 32 - %187 = ashr exact i64 %186, 32 - %188 = getelementptr inbounds float, float* %10, i64 %187 - %189 = bitcast float* %188 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %189, i32 4, <8 x i1> %183), !tbaa !12, !llvm.access.group !16 - %190 = getelementptr inbounds float, float* %188, i64 8 - %191 = bitcast float* %190 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %191, i32 4, <8 x i1> %184), !tbaa !12, !llvm.access.group !16 - %192 = or <8 x i64> %broadcast.splat, - %193 = trunc <8 x i64> %192 to <8 x i32> - %194 = trunc i64 %mul.i.i.i to i32 - %195 = or i32 %194, 8 - %196 = insertelement <8 x i32> undef, i32 %195, i64 0 - %197 = shufflevector <8 x i32> %196, <8 x i32> undef, <8 x i32> zeroinitializer - %198 = or <8 x i32> %197, - %199 = icmp sgt <8 x i32> %broadcast.splat13, %193 - %200 = icmp sgt <8 x i32> %broadcast.splat15, %198 - %201 = extractelement <8 x i64> %192, i32 0 - %202 = shl i64 %201, 32 - %203 = ashr exact i64 %202, 32 - %204 = getelementptr inbounds float, float* %10, i64 %203 - %205 = bitcast float* %204 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %205, i32 4, <8 x i1> %199), !tbaa !12, !llvm.access.group !16 - %206 = getelementptr inbounds float, float* %204, i64 8 - %207 = bitcast float* %206 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %207, i32 4, <8 x i1> %200), !tbaa !12, !llvm.access.group !16 - %208 = or <8 x i64> %broadcast.splat, - %209 = trunc <8 x i64> %208 to <8 x i32> - %210 = trunc i64 %mul.i.i.i to i32 - %211 = or i32 %210, 8 - %212 = insertelement <8 x i32> undef, i32 %211, i64 0 - %213 = shufflevector <8 x i32> %212, <8 x i32> undef, <8 x i32> zeroinitializer - %214 = or <8 x i32> %213, - %215 = icmp sgt <8 x i32> %broadcast.splat13, %209 - %216 = icmp sgt <8 x i32> %broadcast.splat15, %214 - %217 = extractelement <8 x i64> %208, i32 0 - %218 = shl i64 %217, 32 - %219 = ashr exact i64 %218, 32 - %220 = getelementptr inbounds float, float* %10, i64 %219 - %221 = bitcast float* %220 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %221, i32 4, <8 x i1> %215), !tbaa !12, !llvm.access.group !16 - %222 = getelementptr inbounds float, float* %220, i64 8 - %223 = bitcast float* %222 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %223, i32 4, <8 x i1> %216), !tbaa !12, !llvm.access.group !16 - %224 = or <8 x i64> %broadcast.splat, - %225 = trunc <8 x i64> %224 to <8 x i32> - %226 = trunc i64 %mul.i.i.i to i32 - %227 = or i32 %226, 8 - %228 = insertelement <8 x i32> undef, i32 %227, i64 0 - %229 = shufflevector <8 x i32> %228, <8 x i32> undef, <8 x i32> zeroinitializer - %230 = or <8 x i32> %229, - %231 = icmp sgt <8 x i32> %broadcast.splat13, %225 - %232 = icmp sgt <8 x i32> %broadcast.splat15, %230 - %233 = extractelement <8 x i64> %224, i32 0 - %234 = shl i64 %233, 32 - %235 = ashr exact i64 %234, 32 - %236 = getelementptr inbounds float, float* %10, i64 %235 - %237 = bitcast float* %236 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %237, i32 4, <8 x i1> %231), !tbaa !12, !llvm.access.group !16 - %238 = getelementptr inbounds float, float* %236, i64 8 - %239 = bitcast float* %238 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %239, i32 4, <8 x i1> %232), !tbaa !12, !llvm.access.group !16 - %240 = or <8 x i64> %broadcast.splat, - %241 = trunc <8 x i64> %240 to <8 x i32> - %242 = trunc i64 %mul.i.i.i to i32 - %243 = or i32 %242, 8 - %244 = insertelement <8 x i32> undef, i32 %243, i64 0 - %245 = shufflevector <8 x i32> %244, <8 x i32> undef, <8 x i32> zeroinitializer - %246 = or <8 x i32> %245, - %247 = icmp sgt <8 x i32> %broadcast.splat13, %241 - %248 = icmp sgt <8 x i32> %broadcast.splat15, %246 - %249 = extractelement <8 x i64> %240, i32 0 - %250 = shl i64 %249, 32 - %251 = ashr exact i64 %250, 32 - %252 = getelementptr inbounds float, float* %10, i64 %251 - %253 = bitcast float* %252 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %253, i32 4, <8 x i1> %247), !tbaa !12, !llvm.access.group !16 - %254 = getelementptr inbounds float, float* %252, i64 8 - %255 = bitcast float* %254 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %255, i32 4, <8 x i1> %248), !tbaa !12, !llvm.access.group !16 - %256 = or <8 x i64> %broadcast.splat, - %257 = trunc <8 x i64> %256 to <8 x i32> - %258 = trunc i64 %mul.i.i.i to i32 - %259 = or i32 %258, 8 - %260 = insertelement <8 x i32> undef, i32 %259, i64 0 - %261 = shufflevector <8 x i32> %260, <8 x i32> undef, <8 x i32> zeroinitializer - %262 = or <8 x i32> %261, - %263 = icmp sgt <8 x i32> %broadcast.splat13, %257 - %264 = icmp sgt <8 x i32> %broadcast.splat15, %262 - %265 = extractelement <8 x i64> %256, i32 0 - %266 = shl i64 %265, 32 - %267 = ashr exact i64 %266, 32 - %268 = getelementptr inbounds float, float* %10, i64 %267 - %269 = bitcast float* %268 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %269, i32 4, <8 x i1> %263), !tbaa !12, !llvm.access.group !16 - %270 = getelementptr inbounds float, float* %268, i64 8 - %271 = bitcast float* %270 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %271, i32 4, <8 x i1> %264), !tbaa !12, !llvm.access.group !16 - %272 = or <8 x i64> %broadcast.splat, - %273 = trunc <8 x i64> %272 to <8 x i32> - %274 = trunc i64 %mul.i.i.i to i32 - %275 = or i32 %274, 8 - %276 = insertelement <8 x i32> undef, i32 %275, i64 0 - %277 = shufflevector <8 x i32> %276, <8 x i32> undef, <8 x i32> zeroinitializer - %278 = or <8 x i32> %277, - %279 = icmp sgt <8 x i32> %broadcast.splat13, %273 - %280 = icmp sgt <8 x i32> %broadcast.splat15, %278 - %281 = extractelement <8 x i64> %272, i32 0 - %282 = shl i64 %281, 32 - %283 = ashr exact i64 %282, 32 - %284 = getelementptr inbounds float, float* %10, i64 %283 - %285 = bitcast float* %284 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat17, <8 x float>* %285, i32 4, <8 x i1> %279), !tbaa !12, !llvm.access.group !16 - %286 = getelementptr inbounds float, float* %284, i64 8 - %287 = bitcast float* %286 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat19, <8 x float>* %287, i32 4, <8 x i1> %280), !tbaa !12, !llvm.access.group !16 - br label %_pocl_kernel_std_kernel.exit - -pregion_for_entry.entry.i.i.us: ; preds = %if.end32.r_exit.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %295, %if.end32.r_exit.i.i.us ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %25, %conv.i.i.us - br i1 %cmp.i.i.us, label %if.then.i.i.us, label %if.end32.r_exit.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom.i.i.us = ashr exact i64 %sext.i.i.us, 32 - %arrayidx.i.i.us = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us - store float 0.000000e+00, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %arrayidx7.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %if.then.i.i.us - %indvars.iv.next.i.i4.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %if.then.i.i.us ] - %288 = phi float [ %293, %for.body.i.i.us ], [ 0.000000e+00, %if.then.i.i.us ] - %289 = mul nsw i64 %indvars.iv.next.i.i4.us, %30 - %290 = add nsw i64 %289, %idxprom.i.i.us - %arrayidx5.i.i.us = getelementptr inbounds float, float* %13, i64 %290 - %291 = load float, float* %arrayidx5.i.i.us, align 4, !tbaa !12 - %292 = load float, float* %arrayidx7.i.i.us, align 4, !tbaa !12 - %sub.i.i.us = fsub float %291, %292 - %293 = tail call float @llvm.fmuladd.f32(float %sub.i.i.us, float %sub.i.i.us, float %288) #2 - store float %293, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i4.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us, label %for.end.loopexit.i.i.us, label %for.body.i.i.us, !llvm.loop !18 - -for.end.loopexit.i.i.us: ; preds = %for.body.i.i.us - %.lcssa = phi float [ %293, %for.body.i.i.us ] - %div.i.i.us = fdiv float %.lcssa, %17, !fpmath !20 - %294 = tail call float @llvm.sqrt.f32(float %div.i.i.us) #2 - %cmp27.i.i.us = fcmp ugt float %294, %21 - %storemerge.i.i.us = select i1 %cmp27.i.i.us, float %294, float 1.000000e+00 - store float %storemerge.i.i.us, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end32.r_exit.i.i.us - -if.end32.r_exit.i.i.us: ; preds = %for.end.loopexit.i.i.us, %pregion_for_entry.entry.i.i.us - %295 = add nuw nsw i64 %_local_id_x.i.0.us, 1 - %exitcond.not = icmp eq i64 %295, 256 - br i1 %exitcond.not, label %_pocl_kernel_std_kernel.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !21 - -_pocl_kernel_std_kernel.exit.loopexit: ; preds = %if.end32.r_exit.i.i.us - br label %_pocl_kernel_std_kernel.exit - -_pocl_kernel_std_kernel.exit: ; preds = %_pocl_kernel_std_kernel.exit.loopexit, %pregion_for_entry.entry.i.i.preheader - ret void -} - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>) #3 - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } -attributes #3 = { argmemonly nounwind willreturn } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE", !"DATA_TYPE", !"int", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"float", !"float", !"int", !"int"} -!9 = !{!"", !"", !"", !"", !"", !"", !""} -!10 = !{!"mean", !"std", !"data", !"float_n", !"eps", !"m", !"n"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{} -!18 = distinct !{!18, !19} -!19 = !{!"llvm.loop.unroll.disable"} -!20 = !{float 2.500000e+00} -!21 = distinct !{!21, !22} -!22 = !{!"llvm.loop.parallel_accesses", !17} diff --git a/pocl_irs/covariance_covar.ll b/pocl_irs/covariance_covar.ll deleted file mode 100644 index bcdb1c0..0000000 --- a/pocl_irs/covariance_covar.ll +++ /dev/null @@ -1,506 +0,0 @@ -; ModuleID = './NN/PIABNLMPBCCDKKLCJLPFFDMLPCEOMEAIPNGLL/covar_kernel/256-1-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_covar_kernel(float* nocapture %0, float* nocapture readonly %1, i32 %2, i32 %3, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %4, i64 %5, i64 %6, i64 %7) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %5, 8 - %cmp561.i = icmp sgt i32 %3, 0 - %9 = sext i32 %2 to i64 - %wide.trip.count.i = zext i32 %3 to i64 - br i1 %cmp561.i, label %pregion_for_entry.entry.i.us.preheader, label %pregion_for_entry.entry.i.preheader - -pregion_for_entry.entry.i.preheader: ; preds = %8 - br label %pregion_for_entry.entry.i - -pregion_for_entry.entry.i.us.preheader: ; preds = %8 - br label %pregion_for_entry.entry.i.us - -pregion_for_entry.entry.i.us: ; preds = %if.end.i.us, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ %22, %if.end.i.us ], [ 0, %pregion_for_entry.entry.i.us.preheader ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp.i.us = icmp slt i32 %conv.i.us, %2 - br i1 %cmp.i.us, label %for.body.lr.ph.i.us, label %if.end.i.us - -for.body.lr.ph.i.us: ; preds = %pregion_for_entry.entry.i.us - %mul.i.us = mul nsw i32 %conv.i.us, %2 - %sext.i.us = shl i64 %add1.i.i.us, 32 - %10 = ashr exact i64 %sext.i.us, 32 - %11 = sext i32 %mul.i.us to i64 - br label %for.body.us.i.us - -for.body.us.i.us: ; preds = %for.end.loopexit.us.i.us, %for.body.lr.ph.i.us - %indvars.iv.next70.us.i8.us = phi i64 [ %indvars.iv.next70.us.i.us, %for.end.loopexit.us.i.us ], [ %10, %for.body.lr.ph.i.us ] - %12 = add nsw i64 %indvars.iv.next70.us.i8.us, %11 - %arrayidx.us.i.us = getelementptr inbounds float, float* %0, i64 %12 - store float 0.000000e+00, float* %arrayidx.us.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %for.body7.us.i.us - -for.body7.us.i.us: ; preds = %for.body7.us.i.us, %for.body.us.i.us - %indvars.iv.next.us.i6.us = phi i64 [ %indvars.iv.next.us.i.us, %for.body7.us.i.us ], [ 0, %for.body.us.i.us ] - %13 = phi float [ %19, %for.body7.us.i.us ], [ 0.000000e+00, %for.body.us.i.us ] - %14 = mul nsw i64 %indvars.iv.next.us.i6.us, %9 - %15 = add nsw i64 %14, %10 - %arrayidx11.us.i.us = getelementptr inbounds float, float* %1, i64 %15 - %16 = load float, float* %arrayidx11.us.i.us, align 4, !tbaa !12 - %17 = add nsw i64 %14, %indvars.iv.next70.us.i8.us - %arrayidx15.us.i.us = getelementptr inbounds float, float* %1, i64 %17 - %18 = load float, float* %arrayidx15.us.i.us, align 4, !tbaa !12 - %19 = tail call float @llvm.fmuladd.f32(float %16, float %18, float %13) #2 - store float %19, float* %arrayidx.us.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.us.i.us = add nuw nsw i64 %indvars.iv.next.us.i6.us, 1 - %exitcond.not.us.i.us = icmp eq i64 %indvars.iv.next.us.i.us, %wide.trip.count.i - br i1 %exitcond.not.us.i.us, label %for.end.loopexit.us.i.us, label %for.body7.us.i.us, !llvm.loop !18 - -for.end.loopexit.us.i.us: ; preds = %for.body7.us.i.us - %.lcssa = phi float [ %19, %for.body7.us.i.us ] - %20 = mul nsw i64 %indvars.iv.next70.us.i8.us, %9 - %21 = add nsw i64 %20, %10 - %arrayidx28.us.i.us = getelementptr inbounds float, float* %0, i64 %21 - store float %.lcssa, float* %arrayidx28.us.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next70.us.i.us = add nsw i64 %indvars.iv.next70.us.i8.us, 1 - %exitcond75.not.us.i.us = icmp eq i64 %indvars.iv.next70.us.i.us, %9 - br i1 %exitcond75.not.us.i.us, label %if.end.i.us.loopexit, label %for.body.us.i.us, !llvm.loop !20 - -if.end.i.us.loopexit: ; preds = %for.end.loopexit.us.i.us - br label %if.end.i.us - -if.end.i.us: ; preds = %if.end.i.us.loopexit, %pregion_for_entry.entry.i.us - %22 = add nuw nsw i64 %_local_id_x.0.us, 1 - %exitcond.not = icmp eq i64 %22, 256 - br i1 %exitcond.not, label %covar_kernel.exit.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !21 - -pregion_for_entry.entry.i: ; preds = %if.end.i.1, %pregion_for_entry.entry.i.preheader - %_local_id_x.0 = phi i64 [ %36, %if.end.i.1 ], [ 0, %pregion_for_entry.entry.i.preheader ] - %add1.i.i = add nuw nsw i64 %_local_id_x.0, %mul.i.i - %conv.i = trunc i64 %add1.i.i to i32 - %cmp.i = icmp slt i32 %conv.i, %2 - br i1 %cmp.i, label %for.body.lr.ph.i, label %if.end.i - -for.body.lr.ph.i: ; preds = %pregion_for_entry.entry.i - %mul.i = mul nsw i32 %conv.i, %2 - %sext.i = shl i64 %add1.i.i, 32 - %23 = ashr exact i64 %sext.i, 32 - %24 = sext i32 %mul.i to i64 - br label %for.body.i - -for.body.i: ; preds = %for.body.i, %for.body.lr.ph.i - %indvars.iv.next70.i2 = phi i64 [ %indvars.iv.next70.i, %for.body.i ], [ %23, %for.body.lr.ph.i ] - %25 = add nsw i64 %indvars.iv.next70.i2, %24 - %arrayidx.i = getelementptr inbounds float, float* %0, i64 %25 - store float 0.000000e+00, float* %arrayidx.i, align 4, !tbaa !12, !llvm.access.group !16 - %26 = mul nsw i64 %indvars.iv.next70.i2, %9 - %27 = add nsw i64 %26, %23 - %arrayidx28.i = getelementptr inbounds float, float* %0, i64 %27 - %28 = bitcast float* %arrayidx28.i to i32* - store i32 0, i32* %28, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next70.i = add nsw i64 %indvars.iv.next70.i2, 1 - %exitcond75.not.i = icmp eq i64 %indvars.iv.next70.i, %9 - br i1 %exitcond75.not.i, label %if.end.i.loopexit, label %for.body.i, !llvm.loop !20 - -if.end.i.loopexit: ; preds = %for.body.i - br label %if.end.i - -if.end.i: ; preds = %if.end.i.loopexit, %pregion_for_entry.entry.i - %29 = or i64 %_local_id_x.0, 1 - %add1.i.i.1 = add nuw nsw i64 %29, %mul.i.i - %conv.i.1 = trunc i64 %add1.i.i.1 to i32 - %cmp.i.1 = icmp slt i32 %conv.i.1, %2 - br i1 %cmp.i.1, label %for.body.lr.ph.i.1, label %if.end.i.1 - -covar_kernel.exit.loopexit: ; preds = %if.end.i.us - br label %covar_kernel.exit - -covar_kernel.exit.loopexit16: ; preds = %if.end.i.1 - br label %covar_kernel.exit - -covar_kernel.exit: ; preds = %covar_kernel.exit.loopexit16, %covar_kernel.exit.loopexit - ret void - -for.body.lr.ph.i.1: ; preds = %if.end.i - %mul.i.1 = mul nsw i32 %conv.i.1, %2 - %sext.i.1 = shl i64 %add1.i.i.1, 32 - %30 = ashr exact i64 %sext.i.1, 32 - %31 = sext i32 %mul.i.1 to i64 - br label %for.body.i.1 - -for.body.i.1: ; preds = %for.body.i.1, %for.body.lr.ph.i.1 - %indvars.iv.next70.i2.1 = phi i64 [ %indvars.iv.next70.i.1, %for.body.i.1 ], [ %30, %for.body.lr.ph.i.1 ] - %32 = add nsw i64 %indvars.iv.next70.i2.1, %31 - %arrayidx.i.1 = getelementptr inbounds float, float* %0, i64 %32 - store float 0.000000e+00, float* %arrayidx.i.1, align 4, !tbaa !12, !llvm.access.group !16 - %33 = mul nsw i64 %indvars.iv.next70.i2.1, %9 - %34 = add nsw i64 %33, %30 - %arrayidx28.i.1 = getelementptr inbounds float, float* %0, i64 %34 - %35 = bitcast float* %arrayidx28.i.1 to i32* - store i32 0, i32* %35, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next70.i.1 = add nsw i64 %indvars.iv.next70.i2.1, 1 - %exitcond75.not.i.1 = icmp eq i64 %indvars.iv.next70.i.1, %9 - br i1 %exitcond75.not.i.1, label %if.end.i.1.loopexit, label %for.body.i.1, !llvm.loop !20 - -if.end.i.1.loopexit: ; preds = %for.body.i.1 - br label %if.end.i.1 - -if.end.i.1: ; preds = %if.end.i.1.loopexit, %if.end.i - %36 = add nuw nsw i64 %_local_id_x.0, 2 - %exitcond12.not.1 = icmp eq i64 %36, 256 - br i1 %exitcond12.not.1, label %covar_kernel.exit.loopexit16, label %pregion_for_entry.entry.i, !llvm.loop !21 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_covar_kernel_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to i32** - %15 = load i32*, i32** %14, align 8 - %16 = load i32, i32* %15, align 4 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %mul.i.i.i = shl i64 %2, 8 - %cmp561.i.i = icmp sgt i32 %20, 0 - %21 = sext i32 %16 to i64 - %wide.trip.count.i.i = zext i32 %20 to i64 - br i1 %cmp561.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %pregion_for_entry.entry.i.i.preheader - -pregion_for_entry.entry.i.i.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %34, %if.end.i.i.us ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %16, %conv.i.i.us - br i1 %cmp.i.i.us, label %for.body.lr.ph.i.i.us, label %if.end.i.i.us - -for.body.lr.ph.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %mul.i.i.us = mul nsw i32 %16, %conv.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %22 = ashr exact i64 %sext.i.i.us, 32 - %23 = sext i32 %mul.i.i.us to i64 - br label %for.body.us.i.i.us - -for.body.us.i.i.us: ; preds = %for.end.loopexit.us.i.i.us, %for.body.lr.ph.i.i.us - %indvars.iv.next70.us.i.i8.us = phi i64 [ %indvars.iv.next70.us.i.i.us, %for.end.loopexit.us.i.i.us ], [ %22, %for.body.lr.ph.i.i.us ] - %24 = add nsw i64 %indvars.iv.next70.us.i.i8.us, %23 - %arrayidx.us.i.i.us = getelementptr inbounds float, float* %8, i64 %24 - store float 0.000000e+00, float* %arrayidx.us.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %for.body7.us.i.i.us - -for.body7.us.i.i.us: ; preds = %for.body7.us.i.i.us, %for.body.us.i.i.us - %indvars.iv.next.us.i.i6.us = phi i64 [ %indvars.iv.next.us.i.i.us, %for.body7.us.i.i.us ], [ 0, %for.body.us.i.i.us ] - %25 = phi float [ %31, %for.body7.us.i.i.us ], [ 0.000000e+00, %for.body.us.i.i.us ] - %26 = mul nsw i64 %indvars.iv.next.us.i.i6.us, %21 - %27 = add nsw i64 %26, %22 - %arrayidx11.us.i.i.us = getelementptr inbounds float, float* %12, i64 %27 - %28 = load float, float* %arrayidx11.us.i.i.us, align 4, !tbaa !12 - %29 = add nsw i64 %26, %indvars.iv.next70.us.i.i8.us - %arrayidx15.us.i.i.us = getelementptr inbounds float, float* %12, i64 %29 - %30 = load float, float* %arrayidx15.us.i.i.us, align 4, !tbaa !12 - %31 = tail call float @llvm.fmuladd.f32(float %28, float %30, float %25) #2 - store float %31, float* %arrayidx.us.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.us.i.i.us = add nuw nsw i64 %indvars.iv.next.us.i.i6.us, 1 - %exitcond.not.us.i.i.us = icmp eq i64 %indvars.iv.next.us.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.us.i.i.us, label %for.end.loopexit.us.i.i.us, label %for.body7.us.i.i.us, !llvm.loop !18 - -for.end.loopexit.us.i.i.us: ; preds = %for.body7.us.i.i.us - %.lcssa = phi float [ %31, %for.body7.us.i.i.us ] - %32 = mul nsw i64 %indvars.iv.next70.us.i.i8.us, %21 - %33 = add nsw i64 %32, %22 - %arrayidx28.us.i.i.us = getelementptr inbounds float, float* %8, i64 %33 - store float %.lcssa, float* %arrayidx28.us.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next70.us.i.i.us = add nsw i64 %indvars.iv.next70.us.i.i8.us, 1 - %exitcond75.not.us.i.i.us = icmp eq i64 %indvars.iv.next70.us.i.i.us, %21 - br i1 %exitcond75.not.us.i.i.us, label %if.end.i.i.us.loopexit, label %for.body.us.i.i.us, !llvm.loop !20 - -if.end.i.i.us.loopexit: ; preds = %for.end.loopexit.us.i.i.us - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.end.i.i.us.loopexit, %pregion_for_entry.entry.i.i.us - %34 = add nuw nsw i64 %_local_id_x.i.0.us, 1 - %exitcond.not = icmp eq i64 %34, 256 - br i1 %exitcond.not, label %_pocl_kernel_covar_kernel.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !21 - -pregion_for_entry.entry.i.i: ; preds = %if.end.i.i.1, %pregion_for_entry.entry.i.i.preheader - %_local_id_x.i.0 = phi i64 [ %48, %if.end.i.i.1 ], [ 0, %pregion_for_entry.entry.i.i.preheader ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %conv.i.i = trunc i64 %add1.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %16, %conv.i.i - br i1 %cmp.i.i, label %for.body.lr.ph.i.i, label %if.end.i.i - -for.body.lr.ph.i.i: ; preds = %pregion_for_entry.entry.i.i - %mul.i.i = mul nsw i32 %16, %conv.i.i - %sext.i.i = shl i64 %add1.i.i.i, 32 - %35 = ashr exact i64 %sext.i.i, 32 - %36 = sext i32 %mul.i.i to i64 - br label %for.body.i.i - -for.body.i.i: ; preds = %for.body.i.i, %for.body.lr.ph.i.i - %indvars.iv.next70.i.i2 = phi i64 [ %indvars.iv.next70.i.i, %for.body.i.i ], [ %35, %for.body.lr.ph.i.i ] - %37 = add nsw i64 %indvars.iv.next70.i.i2, %36 - %arrayidx.i.i = getelementptr inbounds float, float* %8, i64 %37 - store float 0.000000e+00, float* %arrayidx.i.i, align 4, !tbaa !12, !llvm.access.group !16 - %38 = mul nsw i64 %indvars.iv.next70.i.i2, %21 - %39 = add nsw i64 %38, %35 - %arrayidx28.i.i = getelementptr inbounds float, float* %8, i64 %39 - %40 = bitcast float* %arrayidx28.i.i to i32* - store i32 0, i32* %40, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next70.i.i = add nsw i64 %indvars.iv.next70.i.i2, 1 - %exitcond75.not.i.i = icmp eq i64 %indvars.iv.next70.i.i, %21 - br i1 %exitcond75.not.i.i, label %if.end.i.i.loopexit, label %for.body.i.i, !llvm.loop !20 - -if.end.i.i.loopexit: ; preds = %for.body.i.i - br label %if.end.i.i - -if.end.i.i: ; preds = %if.end.i.i.loopexit, %pregion_for_entry.entry.i.i - %41 = or i64 %_local_id_x.i.0, 1 - %add1.i.i.i.1 = add nuw nsw i64 %41, %mul.i.i.i - %conv.i.i.1 = trunc i64 %add1.i.i.i.1 to i32 - %cmp.i.i.1 = icmp sgt i32 %16, %conv.i.i.1 - br i1 %cmp.i.i.1, label %for.body.lr.ph.i.i.1, label %if.end.i.i.1 - -_pocl_kernel_covar_kernel.exit.loopexit: ; preds = %if.end.i.i.us - br label %_pocl_kernel_covar_kernel.exit - -_pocl_kernel_covar_kernel.exit.loopexit16: ; preds = %if.end.i.i.1 - br label %_pocl_kernel_covar_kernel.exit - -_pocl_kernel_covar_kernel.exit: ; preds = %_pocl_kernel_covar_kernel.exit.loopexit16, %_pocl_kernel_covar_kernel.exit.loopexit - ret void - -for.body.lr.ph.i.i.1: ; preds = %if.end.i.i - %mul.i.i.1 = mul nsw i32 %16, %conv.i.i.1 - %sext.i.i.1 = shl i64 %add1.i.i.i.1, 32 - %42 = ashr exact i64 %sext.i.i.1, 32 - %43 = sext i32 %mul.i.i.1 to i64 - br label %for.body.i.i.1 - -for.body.i.i.1: ; preds = %for.body.i.i.1, %for.body.lr.ph.i.i.1 - %indvars.iv.next70.i.i2.1 = phi i64 [ %indvars.iv.next70.i.i.1, %for.body.i.i.1 ], [ %42, %for.body.lr.ph.i.i.1 ] - %44 = add nsw i64 %indvars.iv.next70.i.i2.1, %43 - %arrayidx.i.i.1 = getelementptr inbounds float, float* %8, i64 %44 - store float 0.000000e+00, float* %arrayidx.i.i.1, align 4, !tbaa !12, !llvm.access.group !16 - %45 = mul nsw i64 %indvars.iv.next70.i.i2.1, %21 - %46 = add nsw i64 %45, %42 - %arrayidx28.i.i.1 = getelementptr inbounds float, float* %8, i64 %46 - %47 = bitcast float* %arrayidx28.i.i.1 to i32* - store i32 0, i32* %47, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next70.i.i.1 = add nsw i64 %indvars.iv.next70.i.i2.1, 1 - %exitcond75.not.i.i.1 = icmp eq i64 %indvars.iv.next70.i.i.1, %21 - br i1 %exitcond75.not.i.i.1, label %if.end.i.i.1.loopexit, label %for.body.i.i.1, !llvm.loop !20 - -if.end.i.i.1.loopexit: ; preds = %for.body.i.i.1 - br label %if.end.i.i.1 - -if.end.i.i.1: ; preds = %if.end.i.i.1.loopexit, %if.end.i.i - %48 = add nuw nsw i64 %_local_id_x.i.0, 2 - %exitcond12.not.1 = icmp eq i64 %48, 256 - br i1 %exitcond12.not.1, label %_pocl_kernel_covar_kernel.exit.loopexit16, label %pregion_for_entry.entry.i.i, !llvm.loop !21 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_covar_kernel_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to i32** - %13 = load i32*, i32** %12, align 8 - %14 = load i32, i32* %13, align 4 - %15 = getelementptr i8*, i8** %0, i64 3 - %16 = bitcast i8** %15 to i32** - %17 = load i32*, i32** %16, align 8 - %18 = load i32, i32* %17, align 4 - %mul.i.i.i = shl i64 %2, 8 - %cmp561.i.i = icmp sgt i32 %18, 0 - %19 = sext i32 %14 to i64 - %wide.trip.count.i.i = zext i32 %18 to i64 - br i1 %cmp561.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %pregion_for_entry.entry.i.i.preheader - -pregion_for_entry.entry.i.i.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %32, %if.end.i.i.us ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %14, %conv.i.i.us - br i1 %cmp.i.i.us, label %for.body.lr.ph.i.i.us, label %if.end.i.i.us - -for.body.lr.ph.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %mul.i.i.us = mul nsw i32 %14, %conv.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %20 = ashr exact i64 %sext.i.i.us, 32 - %21 = sext i32 %mul.i.i.us to i64 - br label %for.body.us.i.i.us - -for.body.us.i.i.us: ; preds = %for.end.loopexit.us.i.i.us, %for.body.lr.ph.i.i.us - %indvars.iv.next70.us.i.i8.us = phi i64 [ %indvars.iv.next70.us.i.i.us, %for.end.loopexit.us.i.i.us ], [ %20, %for.body.lr.ph.i.i.us ] - %22 = add nsw i64 %indvars.iv.next70.us.i.i8.us, %21 - %arrayidx.us.i.i.us = getelementptr inbounds float, float* %7, i64 %22 - store float 0.000000e+00, float* %arrayidx.us.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %for.body7.us.i.i.us - -for.body7.us.i.i.us: ; preds = %for.body7.us.i.i.us, %for.body.us.i.i.us - %indvars.iv.next.us.i.i6.us = phi i64 [ %indvars.iv.next.us.i.i.us, %for.body7.us.i.i.us ], [ 0, %for.body.us.i.i.us ] - %23 = phi float [ %29, %for.body7.us.i.i.us ], [ 0.000000e+00, %for.body.us.i.i.us ] - %24 = mul nsw i64 %indvars.iv.next.us.i.i6.us, %19 - %25 = add nsw i64 %24, %20 - %arrayidx11.us.i.i.us = getelementptr inbounds float, float* %10, i64 %25 - %26 = load float, float* %arrayidx11.us.i.i.us, align 4, !tbaa !12 - %27 = add nsw i64 %24, %indvars.iv.next70.us.i.i8.us - %arrayidx15.us.i.i.us = getelementptr inbounds float, float* %10, i64 %27 - %28 = load float, float* %arrayidx15.us.i.i.us, align 4, !tbaa !12 - %29 = tail call float @llvm.fmuladd.f32(float %26, float %28, float %23) #2 - store float %29, float* %arrayidx.us.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.us.i.i.us = add nuw nsw i64 %indvars.iv.next.us.i.i6.us, 1 - %exitcond.not.us.i.i.us = icmp eq i64 %indvars.iv.next.us.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.us.i.i.us, label %for.end.loopexit.us.i.i.us, label %for.body7.us.i.i.us, !llvm.loop !18 - -for.end.loopexit.us.i.i.us: ; preds = %for.body7.us.i.i.us - %.lcssa = phi float [ %29, %for.body7.us.i.i.us ] - %30 = mul nsw i64 %indvars.iv.next70.us.i.i8.us, %19 - %31 = add nsw i64 %30, %20 - %arrayidx28.us.i.i.us = getelementptr inbounds float, float* %7, i64 %31 - store float %.lcssa, float* %arrayidx28.us.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next70.us.i.i.us = add nsw i64 %indvars.iv.next70.us.i.i8.us, 1 - %exitcond75.not.us.i.i.us = icmp eq i64 %indvars.iv.next70.us.i.i.us, %19 - br i1 %exitcond75.not.us.i.i.us, label %if.end.i.i.us.loopexit, label %for.body.us.i.i.us, !llvm.loop !20 - -if.end.i.i.us.loopexit: ; preds = %for.end.loopexit.us.i.i.us - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.end.i.i.us.loopexit, %pregion_for_entry.entry.i.i.us - %32 = add nuw nsw i64 %_local_id_x.i.0.us, 1 - %exitcond.not = icmp eq i64 %32, 256 - br i1 %exitcond.not, label %_pocl_kernel_covar_kernel.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !21 - -pregion_for_entry.entry.i.i: ; preds = %if.end.i.i.1, %pregion_for_entry.entry.i.i.preheader - %_local_id_x.i.0 = phi i64 [ %46, %if.end.i.i.1 ], [ 0, %pregion_for_entry.entry.i.i.preheader ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %conv.i.i = trunc i64 %add1.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %14, %conv.i.i - br i1 %cmp.i.i, label %for.body.lr.ph.i.i, label %if.end.i.i - -for.body.lr.ph.i.i: ; preds = %pregion_for_entry.entry.i.i - %mul.i.i = mul nsw i32 %14, %conv.i.i - %sext.i.i = shl i64 %add1.i.i.i, 32 - %33 = ashr exact i64 %sext.i.i, 32 - %34 = sext i32 %mul.i.i to i64 - br label %for.body.i.i - -for.body.i.i: ; preds = %for.body.i.i, %for.body.lr.ph.i.i - %indvars.iv.next70.i.i2 = phi i64 [ %indvars.iv.next70.i.i, %for.body.i.i ], [ %33, %for.body.lr.ph.i.i ] - %35 = add nsw i64 %indvars.iv.next70.i.i2, %34 - %arrayidx.i.i = getelementptr inbounds float, float* %7, i64 %35 - store float 0.000000e+00, float* %arrayidx.i.i, align 4, !tbaa !12, !llvm.access.group !16 - %36 = mul nsw i64 %indvars.iv.next70.i.i2, %19 - %37 = add nsw i64 %36, %33 - %arrayidx28.i.i = getelementptr inbounds float, float* %7, i64 %37 - %38 = bitcast float* %arrayidx28.i.i to i32* - store i32 0, i32* %38, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next70.i.i = add nsw i64 %indvars.iv.next70.i.i2, 1 - %exitcond75.not.i.i = icmp eq i64 %indvars.iv.next70.i.i, %19 - br i1 %exitcond75.not.i.i, label %if.end.i.i.loopexit, label %for.body.i.i, !llvm.loop !20 - -if.end.i.i.loopexit: ; preds = %for.body.i.i - br label %if.end.i.i - -if.end.i.i: ; preds = %if.end.i.i.loopexit, %pregion_for_entry.entry.i.i - %39 = or i64 %_local_id_x.i.0, 1 - %add1.i.i.i.1 = add nuw nsw i64 %39, %mul.i.i.i - %conv.i.i.1 = trunc i64 %add1.i.i.i.1 to i32 - %cmp.i.i.1 = icmp sgt i32 %14, %conv.i.i.1 - br i1 %cmp.i.i.1, label %for.body.lr.ph.i.i.1, label %if.end.i.i.1 - -_pocl_kernel_covar_kernel.exit.loopexit: ; preds = %if.end.i.i.us - br label %_pocl_kernel_covar_kernel.exit - -_pocl_kernel_covar_kernel.exit.loopexit16: ; preds = %if.end.i.i.1 - br label %_pocl_kernel_covar_kernel.exit - -_pocl_kernel_covar_kernel.exit: ; preds = %_pocl_kernel_covar_kernel.exit.loopexit16, %_pocl_kernel_covar_kernel.exit.loopexit - ret void - -for.body.lr.ph.i.i.1: ; preds = %if.end.i.i - %mul.i.i.1 = mul nsw i32 %14, %conv.i.i.1 - %sext.i.i.1 = shl i64 %add1.i.i.i.1, 32 - %40 = ashr exact i64 %sext.i.i.1, 32 - %41 = sext i32 %mul.i.i.1 to i64 - br label %for.body.i.i.1 - -for.body.i.i.1: ; preds = %for.body.i.i.1, %for.body.lr.ph.i.i.1 - %indvars.iv.next70.i.i2.1 = phi i64 [ %indvars.iv.next70.i.i.1, %for.body.i.i.1 ], [ %40, %for.body.lr.ph.i.i.1 ] - %42 = add nsw i64 %indvars.iv.next70.i.i2.1, %41 - %arrayidx.i.i.1 = getelementptr inbounds float, float* %7, i64 %42 - store float 0.000000e+00, float* %arrayidx.i.i.1, align 4, !tbaa !12, !llvm.access.group !16 - %43 = mul nsw i64 %indvars.iv.next70.i.i2.1, %19 - %44 = add nsw i64 %43, %40 - %arrayidx28.i.i.1 = getelementptr inbounds float, float* %7, i64 %44 - %45 = bitcast float* %arrayidx28.i.i.1 to i32* - store i32 0, i32* %45, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next70.i.i.1 = add nsw i64 %indvars.iv.next70.i.i2.1, 1 - %exitcond75.not.i.i.1 = icmp eq i64 %indvars.iv.next70.i.i.1, %19 - br i1 %exitcond75.not.i.i.1, label %if.end.i.i.1.loopexit, label %for.body.i.i.1, !llvm.loop !20 - -if.end.i.i.1.loopexit: ; preds = %for.body.i.i.1 - br label %if.end.i.i.1 - -if.end.i.i.1: ; preds = %if.end.i.i.1.loopexit, %if.end.i.i - %46 = add nuw nsw i64 %_local_id_x.i.0, 2 - %exitcond12.not.1 = icmp eq i64 %46, 256 - br i1 %exitcond12.not.1, label %_pocl_kernel_covar_kernel.exit.loopexit16, label %pregion_for_entry.entry.i.i, !llvm.loop !21 -} - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"int", !"int"} -!8 = !{!"float*", !"float*", !"int", !"int"} -!9 = !{!"", !"", !"", !""} -!10 = !{!"symmat", !"data", !"m", !"n"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{} -!18 = distinct !{!18, !19} -!19 = !{!"llvm.loop.unroll.disable"} -!20 = distinct !{!20, !19} -!21 = distinct !{!21, !22} -!22 = !{!"llvm.loop.parallel_accesses", !17} diff --git a/pocl_irs/covariance_mean.ll b/pocl_irs/covariance_mean.ll deleted file mode 100644 index 704cff7..0000000 --- a/pocl_irs/covariance_mean.ll +++ /dev/null @@ -1,1150 +0,0 @@ -; ModuleID = './NN/PIABNLMPBCCDKKLCJLPFFDMLPCEOMEAIPNGLL/mean_kernel/256-1-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_mean_kernel(float* nocapture %0, float* nocapture readonly %1, float %2, i32 %3, i32 %4, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %5, i64 %6, i64 %7, i64 %8) local_unnamed_addr #0 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %6, 8 - %cmp222.i = icmp sgt i32 %4, 0 - %10 = sext i32 %3 to i64 - %wide.trip.count.i = zext i32 %4 to i64 - br i1 %cmp222.i, label %pregion_for_entry.entry.i.us.preheader, label %pregion_for_entry.entry.i.preheader - -pregion_for_entry.entry.i.us.preheader: ; preds = %9 - br label %pregion_for_entry.entry.i.us - -pregion_for_entry.entry.i.preheader: ; preds = %9 - %div.i = fdiv float 0.000000e+00, %2 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert13 = insertelement <8 x i32> undef, i32 %3, i32 0 - %broadcast.splat14 = shufflevector <8 x i32> %broadcast.splatinsert13, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert15 = insertelement <8 x i32> undef, i32 %3, i32 0 - %broadcast.splat16 = shufflevector <8 x i32> %broadcast.splatinsert15, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert17 = insertelement <8 x float> undef, float %div.i, i32 0 - %broadcast.splat18 = shufflevector <8 x float> %broadcast.splatinsert17, <8 x float> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert19 = insertelement <8 x float> undef, float %div.i, i32 0 - %broadcast.splat20 = shufflevector <8 x float> %broadcast.splatinsert19, <8 x float> undef, <8 x i32> zeroinitializer - %11 = or <8 x i64> %broadcast.splat, - %12 = trunc <8 x i64> %11 to <8 x i32> - %13 = trunc i64 %mul.i.i to i32 - %14 = or i32 %13, 8 - %15 = insertelement <8 x i32> undef, i32 %14, i64 0 - %16 = shufflevector <8 x i32> %15, <8 x i32> undef, <8 x i32> zeroinitializer - %17 = or <8 x i32> %16, - %18 = icmp sgt <8 x i32> %broadcast.splat14, %12 - %19 = icmp sgt <8 x i32> %broadcast.splat16, %17 - %20 = extractelement <8 x i64> %11, i32 0 - %21 = shl i64 %20, 32 - %22 = ashr exact i64 %21, 32 - %23 = getelementptr inbounds float, float* %0, i64 %22 - %24 = bitcast float* %23 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %24, i32 4, <8 x i1> %18), !tbaa !12, !llvm.access.group !16 - %25 = getelementptr inbounds float, float* %23, i64 8 - %26 = bitcast float* %25 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %26, i32 4, <8 x i1> %19), !tbaa !12, !llvm.access.group !16 - %27 = or <8 x i64> %broadcast.splat, - %28 = trunc <8 x i64> %27 to <8 x i32> - %29 = trunc i64 %mul.i.i to i32 - %30 = or i32 %29, 8 - %31 = insertelement <8 x i32> undef, i32 %30, i64 0 - %32 = shufflevector <8 x i32> %31, <8 x i32> undef, <8 x i32> zeroinitializer - %33 = or <8 x i32> %32, - %34 = icmp sgt <8 x i32> %broadcast.splat14, %28 - %35 = icmp sgt <8 x i32> %broadcast.splat16, %33 - %36 = extractelement <8 x i64> %27, i32 0 - %37 = shl i64 %36, 32 - %38 = ashr exact i64 %37, 32 - %39 = getelementptr inbounds float, float* %0, i64 %38 - %40 = bitcast float* %39 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %40, i32 4, <8 x i1> %34), !tbaa !12, !llvm.access.group !16 - %41 = getelementptr inbounds float, float* %39, i64 8 - %42 = bitcast float* %41 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %42, i32 4, <8 x i1> %35), !tbaa !12, !llvm.access.group !16 - %43 = or <8 x i64> %broadcast.splat, - %44 = trunc <8 x i64> %43 to <8 x i32> - %45 = trunc i64 %mul.i.i to i32 - %46 = or i32 %45, 8 - %47 = insertelement <8 x i32> undef, i32 %46, i64 0 - %48 = shufflevector <8 x i32> %47, <8 x i32> undef, <8 x i32> zeroinitializer - %49 = or <8 x i32> %48, - %50 = icmp sgt <8 x i32> %broadcast.splat14, %44 - %51 = icmp sgt <8 x i32> %broadcast.splat16, %49 - %52 = extractelement <8 x i64> %43, i32 0 - %53 = shl i64 %52, 32 - %54 = ashr exact i64 %53, 32 - %55 = getelementptr inbounds float, float* %0, i64 %54 - %56 = bitcast float* %55 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %56, i32 4, <8 x i1> %50), !tbaa !12, !llvm.access.group !16 - %57 = getelementptr inbounds float, float* %55, i64 8 - %58 = bitcast float* %57 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %58, i32 4, <8 x i1> %51), !tbaa !12, !llvm.access.group !16 - %59 = or <8 x i64> %broadcast.splat, - %60 = trunc <8 x i64> %59 to <8 x i32> - %61 = trunc i64 %mul.i.i to i32 - %62 = or i32 %61, 8 - %63 = insertelement <8 x i32> undef, i32 %62, i64 0 - %64 = shufflevector <8 x i32> %63, <8 x i32> undef, <8 x i32> zeroinitializer - %65 = or <8 x i32> %64, - %66 = icmp sgt <8 x i32> %broadcast.splat14, %60 - %67 = icmp sgt <8 x i32> %broadcast.splat16, %65 - %68 = extractelement <8 x i64> %59, i32 0 - %69 = shl i64 %68, 32 - %70 = ashr exact i64 %69, 32 - %71 = getelementptr inbounds float, float* %0, i64 %70 - %72 = bitcast float* %71 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %72, i32 4, <8 x i1> %66), !tbaa !12, !llvm.access.group !16 - %73 = getelementptr inbounds float, float* %71, i64 8 - %74 = bitcast float* %73 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %74, i32 4, <8 x i1> %67), !tbaa !12, !llvm.access.group !16 - %75 = or <8 x i64> %broadcast.splat, - %76 = trunc <8 x i64> %75 to <8 x i32> - %77 = trunc i64 %mul.i.i to i32 - %78 = or i32 %77, 8 - %79 = insertelement <8 x i32> undef, i32 %78, i64 0 - %80 = shufflevector <8 x i32> %79, <8 x i32> undef, <8 x i32> zeroinitializer - %81 = or <8 x i32> %80, - %82 = icmp sgt <8 x i32> %broadcast.splat14, %76 - %83 = icmp sgt <8 x i32> %broadcast.splat16, %81 - %84 = extractelement <8 x i64> %75, i32 0 - %85 = shl i64 %84, 32 - %86 = ashr exact i64 %85, 32 - %87 = getelementptr inbounds float, float* %0, i64 %86 - %88 = bitcast float* %87 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %88, i32 4, <8 x i1> %82), !tbaa !12, !llvm.access.group !16 - %89 = getelementptr inbounds float, float* %87, i64 8 - %90 = bitcast float* %89 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %90, i32 4, <8 x i1> %83), !tbaa !12, !llvm.access.group !16 - %91 = or <8 x i64> %broadcast.splat, - %92 = trunc <8 x i64> %91 to <8 x i32> - %93 = trunc i64 %mul.i.i to i32 - %94 = or i32 %93, 8 - %95 = insertelement <8 x i32> undef, i32 %94, i64 0 - %96 = shufflevector <8 x i32> %95, <8 x i32> undef, <8 x i32> zeroinitializer - %97 = or <8 x i32> %96, - %98 = icmp sgt <8 x i32> %broadcast.splat14, %92 - %99 = icmp sgt <8 x i32> %broadcast.splat16, %97 - %100 = extractelement <8 x i64> %91, i32 0 - %101 = shl i64 %100, 32 - %102 = ashr exact i64 %101, 32 - %103 = getelementptr inbounds float, float* %0, i64 %102 - %104 = bitcast float* %103 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %104, i32 4, <8 x i1> %98), !tbaa !12, !llvm.access.group !16 - %105 = getelementptr inbounds float, float* %103, i64 8 - %106 = bitcast float* %105 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %106, i32 4, <8 x i1> %99), !tbaa !12, !llvm.access.group !16 - %107 = or <8 x i64> %broadcast.splat, - %108 = trunc <8 x i64> %107 to <8 x i32> - %109 = trunc i64 %mul.i.i to i32 - %110 = or i32 %109, 8 - %111 = insertelement <8 x i32> undef, i32 %110, i64 0 - %112 = shufflevector <8 x i32> %111, <8 x i32> undef, <8 x i32> zeroinitializer - %113 = or <8 x i32> %112, - %114 = icmp sgt <8 x i32> %broadcast.splat14, %108 - %115 = icmp sgt <8 x i32> %broadcast.splat16, %113 - %116 = extractelement <8 x i64> %107, i32 0 - %117 = shl i64 %116, 32 - %118 = ashr exact i64 %117, 32 - %119 = getelementptr inbounds float, float* %0, i64 %118 - %120 = bitcast float* %119 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %120, i32 4, <8 x i1> %114), !tbaa !12, !llvm.access.group !16 - %121 = getelementptr inbounds float, float* %119, i64 8 - %122 = bitcast float* %121 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %122, i32 4, <8 x i1> %115), !tbaa !12, !llvm.access.group !16 - %123 = or <8 x i64> %broadcast.splat, - %124 = trunc <8 x i64> %123 to <8 x i32> - %125 = trunc i64 %mul.i.i to i32 - %126 = or i32 %125, 8 - %127 = insertelement <8 x i32> undef, i32 %126, i64 0 - %128 = shufflevector <8 x i32> %127, <8 x i32> undef, <8 x i32> zeroinitializer - %129 = or <8 x i32> %128, - %130 = icmp sgt <8 x i32> %broadcast.splat14, %124 - %131 = icmp sgt <8 x i32> %broadcast.splat16, %129 - %132 = extractelement <8 x i64> %123, i32 0 - %133 = shl i64 %132, 32 - %134 = ashr exact i64 %133, 32 - %135 = getelementptr inbounds float, float* %0, i64 %134 - %136 = bitcast float* %135 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %136, i32 4, <8 x i1> %130), !tbaa !12, !llvm.access.group !16 - %137 = getelementptr inbounds float, float* %135, i64 8 - %138 = bitcast float* %137 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %138, i32 4, <8 x i1> %131), !tbaa !12, !llvm.access.group !16 - %139 = or <8 x i64> %broadcast.splat, - %140 = trunc <8 x i64> %139 to <8 x i32> - %141 = trunc i64 %mul.i.i to i32 - %142 = or i32 %141, 8 - %143 = insertelement <8 x i32> undef, i32 %142, i64 0 - %144 = shufflevector <8 x i32> %143, <8 x i32> undef, <8 x i32> zeroinitializer - %145 = or <8 x i32> %144, - %146 = icmp sgt <8 x i32> %broadcast.splat14, %140 - %147 = icmp sgt <8 x i32> %broadcast.splat16, %145 - %148 = extractelement <8 x i64> %139, i32 0 - %149 = shl i64 %148, 32 - %150 = ashr exact i64 %149, 32 - %151 = getelementptr inbounds float, float* %0, i64 %150 - %152 = bitcast float* %151 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %152, i32 4, <8 x i1> %146), !tbaa !12, !llvm.access.group !16 - %153 = getelementptr inbounds float, float* %151, i64 8 - %154 = bitcast float* %153 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %154, i32 4, <8 x i1> %147), !tbaa !12, !llvm.access.group !16 - %155 = or <8 x i64> %broadcast.splat, - %156 = trunc <8 x i64> %155 to <8 x i32> - %157 = trunc i64 %mul.i.i to i32 - %158 = or i32 %157, 8 - %159 = insertelement <8 x i32> undef, i32 %158, i64 0 - %160 = shufflevector <8 x i32> %159, <8 x i32> undef, <8 x i32> zeroinitializer - %161 = or <8 x i32> %160, - %162 = icmp sgt <8 x i32> %broadcast.splat14, %156 - %163 = icmp sgt <8 x i32> %broadcast.splat16, %161 - %164 = extractelement <8 x i64> %155, i32 0 - %165 = shl i64 %164, 32 - %166 = ashr exact i64 %165, 32 - %167 = getelementptr inbounds float, float* %0, i64 %166 - %168 = bitcast float* %167 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %168, i32 4, <8 x i1> %162), !tbaa !12, !llvm.access.group !16 - %169 = getelementptr inbounds float, float* %167, i64 8 - %170 = bitcast float* %169 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %170, i32 4, <8 x i1> %163), !tbaa !12, !llvm.access.group !16 - %171 = or <8 x i64> %broadcast.splat, - %172 = trunc <8 x i64> %171 to <8 x i32> - %173 = trunc i64 %mul.i.i to i32 - %174 = or i32 %173, 8 - %175 = insertelement <8 x i32> undef, i32 %174, i64 0 - %176 = shufflevector <8 x i32> %175, <8 x i32> undef, <8 x i32> zeroinitializer - %177 = or <8 x i32> %176, - %178 = icmp sgt <8 x i32> %broadcast.splat14, %172 - %179 = icmp sgt <8 x i32> %broadcast.splat16, %177 - %180 = extractelement <8 x i64> %171, i32 0 - %181 = shl i64 %180, 32 - %182 = ashr exact i64 %181, 32 - %183 = getelementptr inbounds float, float* %0, i64 %182 - %184 = bitcast float* %183 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %184, i32 4, <8 x i1> %178), !tbaa !12, !llvm.access.group !16 - %185 = getelementptr inbounds float, float* %183, i64 8 - %186 = bitcast float* %185 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %186, i32 4, <8 x i1> %179), !tbaa !12, !llvm.access.group !16 - %187 = or <8 x i64> %broadcast.splat, - %188 = trunc <8 x i64> %187 to <8 x i32> - %189 = trunc i64 %mul.i.i to i32 - %190 = or i32 %189, 8 - %191 = insertelement <8 x i32> undef, i32 %190, i64 0 - %192 = shufflevector <8 x i32> %191, <8 x i32> undef, <8 x i32> zeroinitializer - %193 = or <8 x i32> %192, - %194 = icmp sgt <8 x i32> %broadcast.splat14, %188 - %195 = icmp sgt <8 x i32> %broadcast.splat16, %193 - %196 = extractelement <8 x i64> %187, i32 0 - %197 = shl i64 %196, 32 - %198 = ashr exact i64 %197, 32 - %199 = getelementptr inbounds float, float* %0, i64 %198 - %200 = bitcast float* %199 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %200, i32 4, <8 x i1> %194), !tbaa !12, !llvm.access.group !16 - %201 = getelementptr inbounds float, float* %199, i64 8 - %202 = bitcast float* %201 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %202, i32 4, <8 x i1> %195), !tbaa !12, !llvm.access.group !16 - %203 = or <8 x i64> %broadcast.splat, - %204 = trunc <8 x i64> %203 to <8 x i32> - %205 = trunc i64 %mul.i.i to i32 - %206 = or i32 %205, 8 - %207 = insertelement <8 x i32> undef, i32 %206, i64 0 - %208 = shufflevector <8 x i32> %207, <8 x i32> undef, <8 x i32> zeroinitializer - %209 = or <8 x i32> %208, - %210 = icmp sgt <8 x i32> %broadcast.splat14, %204 - %211 = icmp sgt <8 x i32> %broadcast.splat16, %209 - %212 = extractelement <8 x i64> %203, i32 0 - %213 = shl i64 %212, 32 - %214 = ashr exact i64 %213, 32 - %215 = getelementptr inbounds float, float* %0, i64 %214 - %216 = bitcast float* %215 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %216, i32 4, <8 x i1> %210), !tbaa !12, !llvm.access.group !16 - %217 = getelementptr inbounds float, float* %215, i64 8 - %218 = bitcast float* %217 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %218, i32 4, <8 x i1> %211), !tbaa !12, !llvm.access.group !16 - %219 = or <8 x i64> %broadcast.splat, - %220 = trunc <8 x i64> %219 to <8 x i32> - %221 = trunc i64 %mul.i.i to i32 - %222 = or i32 %221, 8 - %223 = insertelement <8 x i32> undef, i32 %222, i64 0 - %224 = shufflevector <8 x i32> %223, <8 x i32> undef, <8 x i32> zeroinitializer - %225 = or <8 x i32> %224, - %226 = icmp sgt <8 x i32> %broadcast.splat14, %220 - %227 = icmp sgt <8 x i32> %broadcast.splat16, %225 - %228 = extractelement <8 x i64> %219, i32 0 - %229 = shl i64 %228, 32 - %230 = ashr exact i64 %229, 32 - %231 = getelementptr inbounds float, float* %0, i64 %230 - %232 = bitcast float* %231 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %232, i32 4, <8 x i1> %226), !tbaa !12, !llvm.access.group !16 - %233 = getelementptr inbounds float, float* %231, i64 8 - %234 = bitcast float* %233 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %234, i32 4, <8 x i1> %227), !tbaa !12, !llvm.access.group !16 - %235 = or <8 x i64> %broadcast.splat, - %236 = trunc <8 x i64> %235 to <8 x i32> - %237 = trunc i64 %mul.i.i to i32 - %238 = or i32 %237, 8 - %239 = insertelement <8 x i32> undef, i32 %238, i64 0 - %240 = shufflevector <8 x i32> %239, <8 x i32> undef, <8 x i32> zeroinitializer - %241 = or <8 x i32> %240, - %242 = icmp sgt <8 x i32> %broadcast.splat14, %236 - %243 = icmp sgt <8 x i32> %broadcast.splat16, %241 - %244 = extractelement <8 x i64> %235, i32 0 - %245 = shl i64 %244, 32 - %246 = ashr exact i64 %245, 32 - %247 = getelementptr inbounds float, float* %0, i64 %246 - %248 = bitcast float* %247 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %248, i32 4, <8 x i1> %242), !tbaa !12, !llvm.access.group !16 - %249 = getelementptr inbounds float, float* %247, i64 8 - %250 = bitcast float* %249 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %250, i32 4, <8 x i1> %243), !tbaa !12, !llvm.access.group !16 - %251 = or <8 x i64> %broadcast.splat, - %252 = trunc <8 x i64> %251 to <8 x i32> - %253 = trunc i64 %mul.i.i to i32 - %254 = or i32 %253, 8 - %255 = insertelement <8 x i32> undef, i32 %254, i64 0 - %256 = shufflevector <8 x i32> %255, <8 x i32> undef, <8 x i32> zeroinitializer - %257 = or <8 x i32> %256, - %258 = icmp sgt <8 x i32> %broadcast.splat14, %252 - %259 = icmp sgt <8 x i32> %broadcast.splat16, %257 - %260 = extractelement <8 x i64> %251, i32 0 - %261 = shl i64 %260, 32 - %262 = ashr exact i64 %261, 32 - %263 = getelementptr inbounds float, float* %0, i64 %262 - %264 = bitcast float* %263 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %264, i32 4, <8 x i1> %258), !tbaa !12, !llvm.access.group !16 - %265 = getelementptr inbounds float, float* %263, i64 8 - %266 = bitcast float* %265 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %266, i32 4, <8 x i1> %259), !tbaa !12, !llvm.access.group !16 - br label %mean_kernel.exit - -pregion_for_entry.entry.i.us: ; preds = %if.end.r_exit.i.us, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ %270, %if.end.r_exit.i.us ], [ 0, %pregion_for_entry.entry.i.us.preheader ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp.i.us = icmp slt i32 %conv.i.us, %3 - br i1 %cmp.i.us, label %if.then.i.us, label %if.end.r_exit.i.us - -if.then.i.us: ; preds = %pregion_for_entry.entry.i.us - %sext.i.us = shl i64 %add1.i.i.us, 32 - %idxprom.i.us = ashr exact i64 %sext.i.us, 32 - %arrayidx.i.us = getelementptr inbounds float, float* %0, i64 %idxprom.i.us - store float 0.000000e+00, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %for.body.i.us - -for.body.i.us: ; preds = %for.body.i.us, %if.then.i.us - %indvars.iv.next.i5.us = phi i64 [ %indvars.iv.next.i.us, %for.body.i.us ], [ 0, %if.then.i.us ] - %add8.i2.us = phi float [ %add8.i.us, %for.body.i.us ], [ 0.000000e+00, %if.then.i.us ] - %267 = mul nsw i64 %indvars.iv.next.i5.us, %10 - %268 = add nsw i64 %267, %idxprom.i.us - %arrayidx5.i.us = getelementptr inbounds float, float* %1, i64 %268 - %269 = load float, float* %arrayidx5.i.us, align 4, !tbaa !12 - %add8.i.us = fadd float %add8.i2.us, %269 - store float %add8.i.us, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us = add nuw nsw i64 %indvars.iv.next.i5.us, 1 - %exitcond.not.i.us = icmp eq i64 %indvars.iv.next.i.us, %wide.trip.count.i - br i1 %exitcond.not.i.us, label %for.end.loopexit.i.us, label %for.body.i.us, !llvm.loop !18 - -for.end.loopexit.i.us: ; preds = %for.body.i.us - %add8.i.us.lcssa = phi float [ %add8.i.us, %for.body.i.us ] - %div.i.us = fdiv float %add8.i.us.lcssa, %2, !fpmath !20 - store float %div.i.us, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us - -if.end.r_exit.i.us: ; preds = %for.end.loopexit.i.us, %pregion_for_entry.entry.i.us - %270 = add nuw nsw i64 %_local_id_x.0.us, 1 - %exitcond.not = icmp eq i64 %270, 256 - br i1 %exitcond.not, label %mean_kernel.exit.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !21 - -mean_kernel.exit.loopexit: ; preds = %if.end.r_exit.i.us - br label %mean_kernel.exit - -mean_kernel.exit: ; preds = %mean_kernel.exit.loopexit, %pregion_for_entry.entry.i.preheader - ret void -} - -; Function Attrs: nounwind -define void @_pocl_kernel_mean_kernel_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to float** - %15 = load float*, float** %14, align 8 - %16 = load float, float* %15, align 4 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %21 = getelementptr i8*, i8** %0, i64 4 - %22 = bitcast i8** %21 to i32** - %23 = load i32*, i32** %22, align 8 - %24 = load i32, i32* %23, align 4 - %mul.i.i.i = shl i64 %2, 8 - %cmp222.i.i = icmp sgt i32 %24, 0 - %25 = sext i32 %20 to i64 - %wide.trip.count.i.i = zext i32 %24 to i64 - br i1 %cmp222.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %pregion_for_entry.entry.i.i.preheader - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -pregion_for_entry.entry.i.i.preheader: ; preds = %5 - %div.i.i = fdiv float 0.000000e+00, %16 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert13 = insertelement <8 x i32> undef, i32 %20, i32 0 - %broadcast.splat14 = shufflevector <8 x i32> %broadcast.splatinsert13, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert15 = insertelement <8 x i32> undef, i32 %20, i32 0 - %broadcast.splat16 = shufflevector <8 x i32> %broadcast.splatinsert15, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert17 = insertelement <8 x float> undef, float %div.i.i, i32 0 - %broadcast.splat18 = shufflevector <8 x float> %broadcast.splatinsert17, <8 x float> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert19 = insertelement <8 x float> undef, float %div.i.i, i32 0 - %broadcast.splat20 = shufflevector <8 x float> %broadcast.splatinsert19, <8 x float> undef, <8 x i32> zeroinitializer - %26 = or <8 x i64> %broadcast.splat, - %27 = trunc <8 x i64> %26 to <8 x i32> - %28 = trunc i64 %mul.i.i.i to i32 - %29 = or i32 %28, 8 - %30 = insertelement <8 x i32> undef, i32 %29, i64 0 - %31 = shufflevector <8 x i32> %30, <8 x i32> undef, <8 x i32> zeroinitializer - %32 = or <8 x i32> %31, - %33 = icmp sgt <8 x i32> %broadcast.splat14, %27 - %34 = icmp sgt <8 x i32> %broadcast.splat16, %32 - %35 = extractelement <8 x i64> %26, i32 0 - %36 = shl i64 %35, 32 - %37 = ashr exact i64 %36, 32 - %38 = getelementptr inbounds float, float* %8, i64 %37 - %39 = bitcast float* %38 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %39, i32 4, <8 x i1> %33), !tbaa !12, !llvm.access.group !16 - %40 = getelementptr inbounds float, float* %38, i64 8 - %41 = bitcast float* %40 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %41, i32 4, <8 x i1> %34), !tbaa !12, !llvm.access.group !16 - %42 = or <8 x i64> %broadcast.splat, - %43 = trunc <8 x i64> %42 to <8 x i32> - %44 = trunc i64 %mul.i.i.i to i32 - %45 = or i32 %44, 8 - %46 = insertelement <8 x i32> undef, i32 %45, i64 0 - %47 = shufflevector <8 x i32> %46, <8 x i32> undef, <8 x i32> zeroinitializer - %48 = or <8 x i32> %47, - %49 = icmp sgt <8 x i32> %broadcast.splat14, %43 - %50 = icmp sgt <8 x i32> %broadcast.splat16, %48 - %51 = extractelement <8 x i64> %42, i32 0 - %52 = shl i64 %51, 32 - %53 = ashr exact i64 %52, 32 - %54 = getelementptr inbounds float, float* %8, i64 %53 - %55 = bitcast float* %54 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %55, i32 4, <8 x i1> %49), !tbaa !12, !llvm.access.group !16 - %56 = getelementptr inbounds float, float* %54, i64 8 - %57 = bitcast float* %56 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %57, i32 4, <8 x i1> %50), !tbaa !12, !llvm.access.group !16 - %58 = or <8 x i64> %broadcast.splat, - %59 = trunc <8 x i64> %58 to <8 x i32> - %60 = trunc i64 %mul.i.i.i to i32 - %61 = or i32 %60, 8 - %62 = insertelement <8 x i32> undef, i32 %61, i64 0 - %63 = shufflevector <8 x i32> %62, <8 x i32> undef, <8 x i32> zeroinitializer - %64 = or <8 x i32> %63, - %65 = icmp sgt <8 x i32> %broadcast.splat14, %59 - %66 = icmp sgt <8 x i32> %broadcast.splat16, %64 - %67 = extractelement <8 x i64> %58, i32 0 - %68 = shl i64 %67, 32 - %69 = ashr exact i64 %68, 32 - %70 = getelementptr inbounds float, float* %8, i64 %69 - %71 = bitcast float* %70 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %71, i32 4, <8 x i1> %65), !tbaa !12, !llvm.access.group !16 - %72 = getelementptr inbounds float, float* %70, i64 8 - %73 = bitcast float* %72 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %73, i32 4, <8 x i1> %66), !tbaa !12, !llvm.access.group !16 - %74 = or <8 x i64> %broadcast.splat, - %75 = trunc <8 x i64> %74 to <8 x i32> - %76 = trunc i64 %mul.i.i.i to i32 - %77 = or i32 %76, 8 - %78 = insertelement <8 x i32> undef, i32 %77, i64 0 - %79 = shufflevector <8 x i32> %78, <8 x i32> undef, <8 x i32> zeroinitializer - %80 = or <8 x i32> %79, - %81 = icmp sgt <8 x i32> %broadcast.splat14, %75 - %82 = icmp sgt <8 x i32> %broadcast.splat16, %80 - %83 = extractelement <8 x i64> %74, i32 0 - %84 = shl i64 %83, 32 - %85 = ashr exact i64 %84, 32 - %86 = getelementptr inbounds float, float* %8, i64 %85 - %87 = bitcast float* %86 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %87, i32 4, <8 x i1> %81), !tbaa !12, !llvm.access.group !16 - %88 = getelementptr inbounds float, float* %86, i64 8 - %89 = bitcast float* %88 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %89, i32 4, <8 x i1> %82), !tbaa !12, !llvm.access.group !16 - %90 = or <8 x i64> %broadcast.splat, - %91 = trunc <8 x i64> %90 to <8 x i32> - %92 = trunc i64 %mul.i.i.i to i32 - %93 = or i32 %92, 8 - %94 = insertelement <8 x i32> undef, i32 %93, i64 0 - %95 = shufflevector <8 x i32> %94, <8 x i32> undef, <8 x i32> zeroinitializer - %96 = or <8 x i32> %95, - %97 = icmp sgt <8 x i32> %broadcast.splat14, %91 - %98 = icmp sgt <8 x i32> %broadcast.splat16, %96 - %99 = extractelement <8 x i64> %90, i32 0 - %100 = shl i64 %99, 32 - %101 = ashr exact i64 %100, 32 - %102 = getelementptr inbounds float, float* %8, i64 %101 - %103 = bitcast float* %102 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %103, i32 4, <8 x i1> %97), !tbaa !12, !llvm.access.group !16 - %104 = getelementptr inbounds float, float* %102, i64 8 - %105 = bitcast float* %104 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %105, i32 4, <8 x i1> %98), !tbaa !12, !llvm.access.group !16 - %106 = or <8 x i64> %broadcast.splat, - %107 = trunc <8 x i64> %106 to <8 x i32> - %108 = trunc i64 %mul.i.i.i to i32 - %109 = or i32 %108, 8 - %110 = insertelement <8 x i32> undef, i32 %109, i64 0 - %111 = shufflevector <8 x i32> %110, <8 x i32> undef, <8 x i32> zeroinitializer - %112 = or <8 x i32> %111, - %113 = icmp sgt <8 x i32> %broadcast.splat14, %107 - %114 = icmp sgt <8 x i32> %broadcast.splat16, %112 - %115 = extractelement <8 x i64> %106, i32 0 - %116 = shl i64 %115, 32 - %117 = ashr exact i64 %116, 32 - %118 = getelementptr inbounds float, float* %8, i64 %117 - %119 = bitcast float* %118 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %119, i32 4, <8 x i1> %113), !tbaa !12, !llvm.access.group !16 - %120 = getelementptr inbounds float, float* %118, i64 8 - %121 = bitcast float* %120 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %121, i32 4, <8 x i1> %114), !tbaa !12, !llvm.access.group !16 - %122 = or <8 x i64> %broadcast.splat, - %123 = trunc <8 x i64> %122 to <8 x i32> - %124 = trunc i64 %mul.i.i.i to i32 - %125 = or i32 %124, 8 - %126 = insertelement <8 x i32> undef, i32 %125, i64 0 - %127 = shufflevector <8 x i32> %126, <8 x i32> undef, <8 x i32> zeroinitializer - %128 = or <8 x i32> %127, - %129 = icmp sgt <8 x i32> %broadcast.splat14, %123 - %130 = icmp sgt <8 x i32> %broadcast.splat16, %128 - %131 = extractelement <8 x i64> %122, i32 0 - %132 = shl i64 %131, 32 - %133 = ashr exact i64 %132, 32 - %134 = getelementptr inbounds float, float* %8, i64 %133 - %135 = bitcast float* %134 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %135, i32 4, <8 x i1> %129), !tbaa !12, !llvm.access.group !16 - %136 = getelementptr inbounds float, float* %134, i64 8 - %137 = bitcast float* %136 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %137, i32 4, <8 x i1> %130), !tbaa !12, !llvm.access.group !16 - %138 = or <8 x i64> %broadcast.splat, - %139 = trunc <8 x i64> %138 to <8 x i32> - %140 = trunc i64 %mul.i.i.i to i32 - %141 = or i32 %140, 8 - %142 = insertelement <8 x i32> undef, i32 %141, i64 0 - %143 = shufflevector <8 x i32> %142, <8 x i32> undef, <8 x i32> zeroinitializer - %144 = or <8 x i32> %143, - %145 = icmp sgt <8 x i32> %broadcast.splat14, %139 - %146 = icmp sgt <8 x i32> %broadcast.splat16, %144 - %147 = extractelement <8 x i64> %138, i32 0 - %148 = shl i64 %147, 32 - %149 = ashr exact i64 %148, 32 - %150 = getelementptr inbounds float, float* %8, i64 %149 - %151 = bitcast float* %150 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %151, i32 4, <8 x i1> %145), !tbaa !12, !llvm.access.group !16 - %152 = getelementptr inbounds float, float* %150, i64 8 - %153 = bitcast float* %152 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %153, i32 4, <8 x i1> %146), !tbaa !12, !llvm.access.group !16 - %154 = or <8 x i64> %broadcast.splat, - %155 = trunc <8 x i64> %154 to <8 x i32> - %156 = trunc i64 %mul.i.i.i to i32 - %157 = or i32 %156, 8 - %158 = insertelement <8 x i32> undef, i32 %157, i64 0 - %159 = shufflevector <8 x i32> %158, <8 x i32> undef, <8 x i32> zeroinitializer - %160 = or <8 x i32> %159, - %161 = icmp sgt <8 x i32> %broadcast.splat14, %155 - %162 = icmp sgt <8 x i32> %broadcast.splat16, %160 - %163 = extractelement <8 x i64> %154, i32 0 - %164 = shl i64 %163, 32 - %165 = ashr exact i64 %164, 32 - %166 = getelementptr inbounds float, float* %8, i64 %165 - %167 = bitcast float* %166 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %167, i32 4, <8 x i1> %161), !tbaa !12, !llvm.access.group !16 - %168 = getelementptr inbounds float, float* %166, i64 8 - %169 = bitcast float* %168 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %169, i32 4, <8 x i1> %162), !tbaa !12, !llvm.access.group !16 - %170 = or <8 x i64> %broadcast.splat, - %171 = trunc <8 x i64> %170 to <8 x i32> - %172 = trunc i64 %mul.i.i.i to i32 - %173 = or i32 %172, 8 - %174 = insertelement <8 x i32> undef, i32 %173, i64 0 - %175 = shufflevector <8 x i32> %174, <8 x i32> undef, <8 x i32> zeroinitializer - %176 = or <8 x i32> %175, - %177 = icmp sgt <8 x i32> %broadcast.splat14, %171 - %178 = icmp sgt <8 x i32> %broadcast.splat16, %176 - %179 = extractelement <8 x i64> %170, i32 0 - %180 = shl i64 %179, 32 - %181 = ashr exact i64 %180, 32 - %182 = getelementptr inbounds float, float* %8, i64 %181 - %183 = bitcast float* %182 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %183, i32 4, <8 x i1> %177), !tbaa !12, !llvm.access.group !16 - %184 = getelementptr inbounds float, float* %182, i64 8 - %185 = bitcast float* %184 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %185, i32 4, <8 x i1> %178), !tbaa !12, !llvm.access.group !16 - %186 = or <8 x i64> %broadcast.splat, - %187 = trunc <8 x i64> %186 to <8 x i32> - %188 = trunc i64 %mul.i.i.i to i32 - %189 = or i32 %188, 8 - %190 = insertelement <8 x i32> undef, i32 %189, i64 0 - %191 = shufflevector <8 x i32> %190, <8 x i32> undef, <8 x i32> zeroinitializer - %192 = or <8 x i32> %191, - %193 = icmp sgt <8 x i32> %broadcast.splat14, %187 - %194 = icmp sgt <8 x i32> %broadcast.splat16, %192 - %195 = extractelement <8 x i64> %186, i32 0 - %196 = shl i64 %195, 32 - %197 = ashr exact i64 %196, 32 - %198 = getelementptr inbounds float, float* %8, i64 %197 - %199 = bitcast float* %198 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %199, i32 4, <8 x i1> %193), !tbaa !12, !llvm.access.group !16 - %200 = getelementptr inbounds float, float* %198, i64 8 - %201 = bitcast float* %200 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %201, i32 4, <8 x i1> %194), !tbaa !12, !llvm.access.group !16 - %202 = or <8 x i64> %broadcast.splat, - %203 = trunc <8 x i64> %202 to <8 x i32> - %204 = trunc i64 %mul.i.i.i to i32 - %205 = or i32 %204, 8 - %206 = insertelement <8 x i32> undef, i32 %205, i64 0 - %207 = shufflevector <8 x i32> %206, <8 x i32> undef, <8 x i32> zeroinitializer - %208 = or <8 x i32> %207, - %209 = icmp sgt <8 x i32> %broadcast.splat14, %203 - %210 = icmp sgt <8 x i32> %broadcast.splat16, %208 - %211 = extractelement <8 x i64> %202, i32 0 - %212 = shl i64 %211, 32 - %213 = ashr exact i64 %212, 32 - %214 = getelementptr inbounds float, float* %8, i64 %213 - %215 = bitcast float* %214 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %215, i32 4, <8 x i1> %209), !tbaa !12, !llvm.access.group !16 - %216 = getelementptr inbounds float, float* %214, i64 8 - %217 = bitcast float* %216 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %217, i32 4, <8 x i1> %210), !tbaa !12, !llvm.access.group !16 - %218 = or <8 x i64> %broadcast.splat, - %219 = trunc <8 x i64> %218 to <8 x i32> - %220 = trunc i64 %mul.i.i.i to i32 - %221 = or i32 %220, 8 - %222 = insertelement <8 x i32> undef, i32 %221, i64 0 - %223 = shufflevector <8 x i32> %222, <8 x i32> undef, <8 x i32> zeroinitializer - %224 = or <8 x i32> %223, - %225 = icmp sgt <8 x i32> %broadcast.splat14, %219 - %226 = icmp sgt <8 x i32> %broadcast.splat16, %224 - %227 = extractelement <8 x i64> %218, i32 0 - %228 = shl i64 %227, 32 - %229 = ashr exact i64 %228, 32 - %230 = getelementptr inbounds float, float* %8, i64 %229 - %231 = bitcast float* %230 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %231, i32 4, <8 x i1> %225), !tbaa !12, !llvm.access.group !16 - %232 = getelementptr inbounds float, float* %230, i64 8 - %233 = bitcast float* %232 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %233, i32 4, <8 x i1> %226), !tbaa !12, !llvm.access.group !16 - %234 = or <8 x i64> %broadcast.splat, - %235 = trunc <8 x i64> %234 to <8 x i32> - %236 = trunc i64 %mul.i.i.i to i32 - %237 = or i32 %236, 8 - %238 = insertelement <8 x i32> undef, i32 %237, i64 0 - %239 = shufflevector <8 x i32> %238, <8 x i32> undef, <8 x i32> zeroinitializer - %240 = or <8 x i32> %239, - %241 = icmp sgt <8 x i32> %broadcast.splat14, %235 - %242 = icmp sgt <8 x i32> %broadcast.splat16, %240 - %243 = extractelement <8 x i64> %234, i32 0 - %244 = shl i64 %243, 32 - %245 = ashr exact i64 %244, 32 - %246 = getelementptr inbounds float, float* %8, i64 %245 - %247 = bitcast float* %246 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %247, i32 4, <8 x i1> %241), !tbaa !12, !llvm.access.group !16 - %248 = getelementptr inbounds float, float* %246, i64 8 - %249 = bitcast float* %248 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %249, i32 4, <8 x i1> %242), !tbaa !12, !llvm.access.group !16 - %250 = or <8 x i64> %broadcast.splat, - %251 = trunc <8 x i64> %250 to <8 x i32> - %252 = trunc i64 %mul.i.i.i to i32 - %253 = or i32 %252, 8 - %254 = insertelement <8 x i32> undef, i32 %253, i64 0 - %255 = shufflevector <8 x i32> %254, <8 x i32> undef, <8 x i32> zeroinitializer - %256 = or <8 x i32> %255, - %257 = icmp sgt <8 x i32> %broadcast.splat14, %251 - %258 = icmp sgt <8 x i32> %broadcast.splat16, %256 - %259 = extractelement <8 x i64> %250, i32 0 - %260 = shl i64 %259, 32 - %261 = ashr exact i64 %260, 32 - %262 = getelementptr inbounds float, float* %8, i64 %261 - %263 = bitcast float* %262 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %263, i32 4, <8 x i1> %257), !tbaa !12, !llvm.access.group !16 - %264 = getelementptr inbounds float, float* %262, i64 8 - %265 = bitcast float* %264 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %265, i32 4, <8 x i1> %258), !tbaa !12, !llvm.access.group !16 - %266 = or <8 x i64> %broadcast.splat, - %267 = trunc <8 x i64> %266 to <8 x i32> - %268 = trunc i64 %mul.i.i.i to i32 - %269 = or i32 %268, 8 - %270 = insertelement <8 x i32> undef, i32 %269, i64 0 - %271 = shufflevector <8 x i32> %270, <8 x i32> undef, <8 x i32> zeroinitializer - %272 = or <8 x i32> %271, - %273 = icmp sgt <8 x i32> %broadcast.splat14, %267 - %274 = icmp sgt <8 x i32> %broadcast.splat16, %272 - %275 = extractelement <8 x i64> %266, i32 0 - %276 = shl i64 %275, 32 - %277 = ashr exact i64 %276, 32 - %278 = getelementptr inbounds float, float* %8, i64 %277 - %279 = bitcast float* %278 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %279, i32 4, <8 x i1> %273), !tbaa !12, !llvm.access.group !16 - %280 = getelementptr inbounds float, float* %278, i64 8 - %281 = bitcast float* %280 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %281, i32 4, <8 x i1> %274), !tbaa !12, !llvm.access.group !16 - br label %_pocl_kernel_mean_kernel.exit - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %285, %if.end.r_exit.i.i.us ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %20, %conv.i.i.us - br i1 %cmp.i.i.us, label %if.then.i.i.us, label %if.end.r_exit.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom.i.i.us = ashr exact i64 %sext.i.i.us, 32 - %arrayidx.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us - store float 0.000000e+00, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %if.then.i.i.us - %indvars.iv.next.i.i5.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %if.then.i.i.us ] - %add8.i.i2.us = phi float [ %add8.i.i.us, %for.body.i.i.us ], [ 0.000000e+00, %if.then.i.i.us ] - %282 = mul nsw i64 %indvars.iv.next.i.i5.us, %25 - %283 = add nsw i64 %282, %idxprom.i.i.us - %arrayidx5.i.i.us = getelementptr inbounds float, float* %12, i64 %283 - %284 = load float, float* %arrayidx5.i.i.us, align 4, !tbaa !12 - %add8.i.i.us = fadd float %add8.i.i2.us, %284 - store float %add8.i.i.us, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i5.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us, label %for.end.loopexit.i.i.us, label %for.body.i.i.us, !llvm.loop !18 - -for.end.loopexit.i.i.us: ; preds = %for.body.i.i.us - %add8.i.i.us.lcssa = phi float [ %add8.i.i.us, %for.body.i.i.us ] - %div.i.i.us = fdiv float %add8.i.i.us.lcssa, %16, !fpmath !20 - store float %div.i.i.us, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %for.end.loopexit.i.i.us, %pregion_for_entry.entry.i.i.us - %285 = add nuw nsw i64 %_local_id_x.i.0.us, 1 - %exitcond.not = icmp eq i64 %285, 256 - br i1 %exitcond.not, label %_pocl_kernel_mean_kernel.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !21 - -_pocl_kernel_mean_kernel.exit.loopexit: ; preds = %if.end.r_exit.i.i.us - br label %_pocl_kernel_mean_kernel.exit - -_pocl_kernel_mean_kernel.exit: ; preds = %_pocl_kernel_mean_kernel.exit.loopexit, %pregion_for_entry.entry.i.i.preheader - ret void -} - -; Function Attrs: nounwind -define void @_pocl_kernel_mean_kernel_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to float** - %13 = load float*, float** %12, align 8 - %14 = load float, float* %13, align 4 - %15 = getelementptr i8*, i8** %0, i64 3 - %16 = bitcast i8** %15 to i32** - %17 = load i32*, i32** %16, align 8 - %18 = load i32, i32* %17, align 4 - %19 = getelementptr i8*, i8** %0, i64 4 - %20 = bitcast i8** %19 to i32** - %21 = load i32*, i32** %20, align 8 - %22 = load i32, i32* %21, align 4 - %mul.i.i.i = shl i64 %2, 8 - %cmp222.i.i = icmp sgt i32 %22, 0 - %23 = sext i32 %18 to i64 - %wide.trip.count.i.i = zext i32 %22 to i64 - br i1 %cmp222.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %pregion_for_entry.entry.i.i.preheader - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -pregion_for_entry.entry.i.i.preheader: ; preds = %5 - %div.i.i = fdiv float 0.000000e+00, %14 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert13 = insertelement <8 x i32> undef, i32 %18, i32 0 - %broadcast.splat14 = shufflevector <8 x i32> %broadcast.splatinsert13, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert15 = insertelement <8 x i32> undef, i32 %18, i32 0 - %broadcast.splat16 = shufflevector <8 x i32> %broadcast.splatinsert15, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert17 = insertelement <8 x float> undef, float %div.i.i, i32 0 - %broadcast.splat18 = shufflevector <8 x float> %broadcast.splatinsert17, <8 x float> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert19 = insertelement <8 x float> undef, float %div.i.i, i32 0 - %broadcast.splat20 = shufflevector <8 x float> %broadcast.splatinsert19, <8 x float> undef, <8 x i32> zeroinitializer - %24 = or <8 x i64> %broadcast.splat, - %25 = trunc <8 x i64> %24 to <8 x i32> - %26 = trunc i64 %mul.i.i.i to i32 - %27 = or i32 %26, 8 - %28 = insertelement <8 x i32> undef, i32 %27, i64 0 - %29 = shufflevector <8 x i32> %28, <8 x i32> undef, <8 x i32> zeroinitializer - %30 = or <8 x i32> %29, - %31 = icmp sgt <8 x i32> %broadcast.splat14, %25 - %32 = icmp sgt <8 x i32> %broadcast.splat16, %30 - %33 = extractelement <8 x i64> %24, i32 0 - %34 = shl i64 %33, 32 - %35 = ashr exact i64 %34, 32 - %36 = getelementptr inbounds float, float* %7, i64 %35 - %37 = bitcast float* %36 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %37, i32 4, <8 x i1> %31), !tbaa !12, !llvm.access.group !16 - %38 = getelementptr inbounds float, float* %36, i64 8 - %39 = bitcast float* %38 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %39, i32 4, <8 x i1> %32), !tbaa !12, !llvm.access.group !16 - %40 = or <8 x i64> %broadcast.splat, - %41 = trunc <8 x i64> %40 to <8 x i32> - %42 = trunc i64 %mul.i.i.i to i32 - %43 = or i32 %42, 8 - %44 = insertelement <8 x i32> undef, i32 %43, i64 0 - %45 = shufflevector <8 x i32> %44, <8 x i32> undef, <8 x i32> zeroinitializer - %46 = or <8 x i32> %45, - %47 = icmp sgt <8 x i32> %broadcast.splat14, %41 - %48 = icmp sgt <8 x i32> %broadcast.splat16, %46 - %49 = extractelement <8 x i64> %40, i32 0 - %50 = shl i64 %49, 32 - %51 = ashr exact i64 %50, 32 - %52 = getelementptr inbounds float, float* %7, i64 %51 - %53 = bitcast float* %52 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %53, i32 4, <8 x i1> %47), !tbaa !12, !llvm.access.group !16 - %54 = getelementptr inbounds float, float* %52, i64 8 - %55 = bitcast float* %54 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %55, i32 4, <8 x i1> %48), !tbaa !12, !llvm.access.group !16 - %56 = or <8 x i64> %broadcast.splat, - %57 = trunc <8 x i64> %56 to <8 x i32> - %58 = trunc i64 %mul.i.i.i to i32 - %59 = or i32 %58, 8 - %60 = insertelement <8 x i32> undef, i32 %59, i64 0 - %61 = shufflevector <8 x i32> %60, <8 x i32> undef, <8 x i32> zeroinitializer - %62 = or <8 x i32> %61, - %63 = icmp sgt <8 x i32> %broadcast.splat14, %57 - %64 = icmp sgt <8 x i32> %broadcast.splat16, %62 - %65 = extractelement <8 x i64> %56, i32 0 - %66 = shl i64 %65, 32 - %67 = ashr exact i64 %66, 32 - %68 = getelementptr inbounds float, float* %7, i64 %67 - %69 = bitcast float* %68 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %69, i32 4, <8 x i1> %63), !tbaa !12, !llvm.access.group !16 - %70 = getelementptr inbounds float, float* %68, i64 8 - %71 = bitcast float* %70 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %71, i32 4, <8 x i1> %64), !tbaa !12, !llvm.access.group !16 - %72 = or <8 x i64> %broadcast.splat, - %73 = trunc <8 x i64> %72 to <8 x i32> - %74 = trunc i64 %mul.i.i.i to i32 - %75 = or i32 %74, 8 - %76 = insertelement <8 x i32> undef, i32 %75, i64 0 - %77 = shufflevector <8 x i32> %76, <8 x i32> undef, <8 x i32> zeroinitializer - %78 = or <8 x i32> %77, - %79 = icmp sgt <8 x i32> %broadcast.splat14, %73 - %80 = icmp sgt <8 x i32> %broadcast.splat16, %78 - %81 = extractelement <8 x i64> %72, i32 0 - %82 = shl i64 %81, 32 - %83 = ashr exact i64 %82, 32 - %84 = getelementptr inbounds float, float* %7, i64 %83 - %85 = bitcast float* %84 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %85, i32 4, <8 x i1> %79), !tbaa !12, !llvm.access.group !16 - %86 = getelementptr inbounds float, float* %84, i64 8 - %87 = bitcast float* %86 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %87, i32 4, <8 x i1> %80), !tbaa !12, !llvm.access.group !16 - %88 = or <8 x i64> %broadcast.splat, - %89 = trunc <8 x i64> %88 to <8 x i32> - %90 = trunc i64 %mul.i.i.i to i32 - %91 = or i32 %90, 8 - %92 = insertelement <8 x i32> undef, i32 %91, i64 0 - %93 = shufflevector <8 x i32> %92, <8 x i32> undef, <8 x i32> zeroinitializer - %94 = or <8 x i32> %93, - %95 = icmp sgt <8 x i32> %broadcast.splat14, %89 - %96 = icmp sgt <8 x i32> %broadcast.splat16, %94 - %97 = extractelement <8 x i64> %88, i32 0 - %98 = shl i64 %97, 32 - %99 = ashr exact i64 %98, 32 - %100 = getelementptr inbounds float, float* %7, i64 %99 - %101 = bitcast float* %100 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %101, i32 4, <8 x i1> %95), !tbaa !12, !llvm.access.group !16 - %102 = getelementptr inbounds float, float* %100, i64 8 - %103 = bitcast float* %102 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %103, i32 4, <8 x i1> %96), !tbaa !12, !llvm.access.group !16 - %104 = or <8 x i64> %broadcast.splat, - %105 = trunc <8 x i64> %104 to <8 x i32> - %106 = trunc i64 %mul.i.i.i to i32 - %107 = or i32 %106, 8 - %108 = insertelement <8 x i32> undef, i32 %107, i64 0 - %109 = shufflevector <8 x i32> %108, <8 x i32> undef, <8 x i32> zeroinitializer - %110 = or <8 x i32> %109, - %111 = icmp sgt <8 x i32> %broadcast.splat14, %105 - %112 = icmp sgt <8 x i32> %broadcast.splat16, %110 - %113 = extractelement <8 x i64> %104, i32 0 - %114 = shl i64 %113, 32 - %115 = ashr exact i64 %114, 32 - %116 = getelementptr inbounds float, float* %7, i64 %115 - %117 = bitcast float* %116 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %117, i32 4, <8 x i1> %111), !tbaa !12, !llvm.access.group !16 - %118 = getelementptr inbounds float, float* %116, i64 8 - %119 = bitcast float* %118 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %119, i32 4, <8 x i1> %112), !tbaa !12, !llvm.access.group !16 - %120 = or <8 x i64> %broadcast.splat, - %121 = trunc <8 x i64> %120 to <8 x i32> - %122 = trunc i64 %mul.i.i.i to i32 - %123 = or i32 %122, 8 - %124 = insertelement <8 x i32> undef, i32 %123, i64 0 - %125 = shufflevector <8 x i32> %124, <8 x i32> undef, <8 x i32> zeroinitializer - %126 = or <8 x i32> %125, - %127 = icmp sgt <8 x i32> %broadcast.splat14, %121 - %128 = icmp sgt <8 x i32> %broadcast.splat16, %126 - %129 = extractelement <8 x i64> %120, i32 0 - %130 = shl i64 %129, 32 - %131 = ashr exact i64 %130, 32 - %132 = getelementptr inbounds float, float* %7, i64 %131 - %133 = bitcast float* %132 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %133, i32 4, <8 x i1> %127), !tbaa !12, !llvm.access.group !16 - %134 = getelementptr inbounds float, float* %132, i64 8 - %135 = bitcast float* %134 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %135, i32 4, <8 x i1> %128), !tbaa !12, !llvm.access.group !16 - %136 = or <8 x i64> %broadcast.splat, - %137 = trunc <8 x i64> %136 to <8 x i32> - %138 = trunc i64 %mul.i.i.i to i32 - %139 = or i32 %138, 8 - %140 = insertelement <8 x i32> undef, i32 %139, i64 0 - %141 = shufflevector <8 x i32> %140, <8 x i32> undef, <8 x i32> zeroinitializer - %142 = or <8 x i32> %141, - %143 = icmp sgt <8 x i32> %broadcast.splat14, %137 - %144 = icmp sgt <8 x i32> %broadcast.splat16, %142 - %145 = extractelement <8 x i64> %136, i32 0 - %146 = shl i64 %145, 32 - %147 = ashr exact i64 %146, 32 - %148 = getelementptr inbounds float, float* %7, i64 %147 - %149 = bitcast float* %148 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %149, i32 4, <8 x i1> %143), !tbaa !12, !llvm.access.group !16 - %150 = getelementptr inbounds float, float* %148, i64 8 - %151 = bitcast float* %150 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %151, i32 4, <8 x i1> %144), !tbaa !12, !llvm.access.group !16 - %152 = or <8 x i64> %broadcast.splat, - %153 = trunc <8 x i64> %152 to <8 x i32> - %154 = trunc i64 %mul.i.i.i to i32 - %155 = or i32 %154, 8 - %156 = insertelement <8 x i32> undef, i32 %155, i64 0 - %157 = shufflevector <8 x i32> %156, <8 x i32> undef, <8 x i32> zeroinitializer - %158 = or <8 x i32> %157, - %159 = icmp sgt <8 x i32> %broadcast.splat14, %153 - %160 = icmp sgt <8 x i32> %broadcast.splat16, %158 - %161 = extractelement <8 x i64> %152, i32 0 - %162 = shl i64 %161, 32 - %163 = ashr exact i64 %162, 32 - %164 = getelementptr inbounds float, float* %7, i64 %163 - %165 = bitcast float* %164 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %165, i32 4, <8 x i1> %159), !tbaa !12, !llvm.access.group !16 - %166 = getelementptr inbounds float, float* %164, i64 8 - %167 = bitcast float* %166 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %167, i32 4, <8 x i1> %160), !tbaa !12, !llvm.access.group !16 - %168 = or <8 x i64> %broadcast.splat, - %169 = trunc <8 x i64> %168 to <8 x i32> - %170 = trunc i64 %mul.i.i.i to i32 - %171 = or i32 %170, 8 - %172 = insertelement <8 x i32> undef, i32 %171, i64 0 - %173 = shufflevector <8 x i32> %172, <8 x i32> undef, <8 x i32> zeroinitializer - %174 = or <8 x i32> %173, - %175 = icmp sgt <8 x i32> %broadcast.splat14, %169 - %176 = icmp sgt <8 x i32> %broadcast.splat16, %174 - %177 = extractelement <8 x i64> %168, i32 0 - %178 = shl i64 %177, 32 - %179 = ashr exact i64 %178, 32 - %180 = getelementptr inbounds float, float* %7, i64 %179 - %181 = bitcast float* %180 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %181, i32 4, <8 x i1> %175), !tbaa !12, !llvm.access.group !16 - %182 = getelementptr inbounds float, float* %180, i64 8 - %183 = bitcast float* %182 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %183, i32 4, <8 x i1> %176), !tbaa !12, !llvm.access.group !16 - %184 = or <8 x i64> %broadcast.splat, - %185 = trunc <8 x i64> %184 to <8 x i32> - %186 = trunc i64 %mul.i.i.i to i32 - %187 = or i32 %186, 8 - %188 = insertelement <8 x i32> undef, i32 %187, i64 0 - %189 = shufflevector <8 x i32> %188, <8 x i32> undef, <8 x i32> zeroinitializer - %190 = or <8 x i32> %189, - %191 = icmp sgt <8 x i32> %broadcast.splat14, %185 - %192 = icmp sgt <8 x i32> %broadcast.splat16, %190 - %193 = extractelement <8 x i64> %184, i32 0 - %194 = shl i64 %193, 32 - %195 = ashr exact i64 %194, 32 - %196 = getelementptr inbounds float, float* %7, i64 %195 - %197 = bitcast float* %196 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %197, i32 4, <8 x i1> %191), !tbaa !12, !llvm.access.group !16 - %198 = getelementptr inbounds float, float* %196, i64 8 - %199 = bitcast float* %198 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %199, i32 4, <8 x i1> %192), !tbaa !12, !llvm.access.group !16 - %200 = or <8 x i64> %broadcast.splat, - %201 = trunc <8 x i64> %200 to <8 x i32> - %202 = trunc i64 %mul.i.i.i to i32 - %203 = or i32 %202, 8 - %204 = insertelement <8 x i32> undef, i32 %203, i64 0 - %205 = shufflevector <8 x i32> %204, <8 x i32> undef, <8 x i32> zeroinitializer - %206 = or <8 x i32> %205, - %207 = icmp sgt <8 x i32> %broadcast.splat14, %201 - %208 = icmp sgt <8 x i32> %broadcast.splat16, %206 - %209 = extractelement <8 x i64> %200, i32 0 - %210 = shl i64 %209, 32 - %211 = ashr exact i64 %210, 32 - %212 = getelementptr inbounds float, float* %7, i64 %211 - %213 = bitcast float* %212 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %213, i32 4, <8 x i1> %207), !tbaa !12, !llvm.access.group !16 - %214 = getelementptr inbounds float, float* %212, i64 8 - %215 = bitcast float* %214 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %215, i32 4, <8 x i1> %208), !tbaa !12, !llvm.access.group !16 - %216 = or <8 x i64> %broadcast.splat, - %217 = trunc <8 x i64> %216 to <8 x i32> - %218 = trunc i64 %mul.i.i.i to i32 - %219 = or i32 %218, 8 - %220 = insertelement <8 x i32> undef, i32 %219, i64 0 - %221 = shufflevector <8 x i32> %220, <8 x i32> undef, <8 x i32> zeroinitializer - %222 = or <8 x i32> %221, - %223 = icmp sgt <8 x i32> %broadcast.splat14, %217 - %224 = icmp sgt <8 x i32> %broadcast.splat16, %222 - %225 = extractelement <8 x i64> %216, i32 0 - %226 = shl i64 %225, 32 - %227 = ashr exact i64 %226, 32 - %228 = getelementptr inbounds float, float* %7, i64 %227 - %229 = bitcast float* %228 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %229, i32 4, <8 x i1> %223), !tbaa !12, !llvm.access.group !16 - %230 = getelementptr inbounds float, float* %228, i64 8 - %231 = bitcast float* %230 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %231, i32 4, <8 x i1> %224), !tbaa !12, !llvm.access.group !16 - %232 = or <8 x i64> %broadcast.splat, - %233 = trunc <8 x i64> %232 to <8 x i32> - %234 = trunc i64 %mul.i.i.i to i32 - %235 = or i32 %234, 8 - %236 = insertelement <8 x i32> undef, i32 %235, i64 0 - %237 = shufflevector <8 x i32> %236, <8 x i32> undef, <8 x i32> zeroinitializer - %238 = or <8 x i32> %237, - %239 = icmp sgt <8 x i32> %broadcast.splat14, %233 - %240 = icmp sgt <8 x i32> %broadcast.splat16, %238 - %241 = extractelement <8 x i64> %232, i32 0 - %242 = shl i64 %241, 32 - %243 = ashr exact i64 %242, 32 - %244 = getelementptr inbounds float, float* %7, i64 %243 - %245 = bitcast float* %244 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %245, i32 4, <8 x i1> %239), !tbaa !12, !llvm.access.group !16 - %246 = getelementptr inbounds float, float* %244, i64 8 - %247 = bitcast float* %246 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %247, i32 4, <8 x i1> %240), !tbaa !12, !llvm.access.group !16 - %248 = or <8 x i64> %broadcast.splat, - %249 = trunc <8 x i64> %248 to <8 x i32> - %250 = trunc i64 %mul.i.i.i to i32 - %251 = or i32 %250, 8 - %252 = insertelement <8 x i32> undef, i32 %251, i64 0 - %253 = shufflevector <8 x i32> %252, <8 x i32> undef, <8 x i32> zeroinitializer - %254 = or <8 x i32> %253, - %255 = icmp sgt <8 x i32> %broadcast.splat14, %249 - %256 = icmp sgt <8 x i32> %broadcast.splat16, %254 - %257 = extractelement <8 x i64> %248, i32 0 - %258 = shl i64 %257, 32 - %259 = ashr exact i64 %258, 32 - %260 = getelementptr inbounds float, float* %7, i64 %259 - %261 = bitcast float* %260 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %261, i32 4, <8 x i1> %255), !tbaa !12, !llvm.access.group !16 - %262 = getelementptr inbounds float, float* %260, i64 8 - %263 = bitcast float* %262 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %263, i32 4, <8 x i1> %256), !tbaa !12, !llvm.access.group !16 - %264 = or <8 x i64> %broadcast.splat, - %265 = trunc <8 x i64> %264 to <8 x i32> - %266 = trunc i64 %mul.i.i.i to i32 - %267 = or i32 %266, 8 - %268 = insertelement <8 x i32> undef, i32 %267, i64 0 - %269 = shufflevector <8 x i32> %268, <8 x i32> undef, <8 x i32> zeroinitializer - %270 = or <8 x i32> %269, - %271 = icmp sgt <8 x i32> %broadcast.splat14, %265 - %272 = icmp sgt <8 x i32> %broadcast.splat16, %270 - %273 = extractelement <8 x i64> %264, i32 0 - %274 = shl i64 %273, 32 - %275 = ashr exact i64 %274, 32 - %276 = getelementptr inbounds float, float* %7, i64 %275 - %277 = bitcast float* %276 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat18, <8 x float>* %277, i32 4, <8 x i1> %271), !tbaa !12, !llvm.access.group !16 - %278 = getelementptr inbounds float, float* %276, i64 8 - %279 = bitcast float* %278 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %broadcast.splat20, <8 x float>* %279, i32 4, <8 x i1> %272), !tbaa !12, !llvm.access.group !16 - br label %_pocl_kernel_mean_kernel.exit - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %283, %if.end.r_exit.i.i.us ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %18, %conv.i.i.us - br i1 %cmp.i.i.us, label %if.then.i.i.us, label %if.end.r_exit.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom.i.i.us = ashr exact i64 %sext.i.i.us, 32 - %arrayidx.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us - store float 0.000000e+00, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %if.then.i.i.us - %indvars.iv.next.i.i5.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %if.then.i.i.us ] - %add8.i.i2.us = phi float [ %add8.i.i.us, %for.body.i.i.us ], [ 0.000000e+00, %if.then.i.i.us ] - %280 = mul nsw i64 %indvars.iv.next.i.i5.us, %23 - %281 = add nsw i64 %280, %idxprom.i.i.us - %arrayidx5.i.i.us = getelementptr inbounds float, float* %10, i64 %281 - %282 = load float, float* %arrayidx5.i.i.us, align 4, !tbaa !12 - %add8.i.i.us = fadd float %add8.i.i2.us, %282 - store float %add8.i.i.us, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i5.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us, label %for.end.loopexit.i.i.us, label %for.body.i.i.us, !llvm.loop !18 - -for.end.loopexit.i.i.us: ; preds = %for.body.i.i.us - %add8.i.i.us.lcssa = phi float [ %add8.i.i.us, %for.body.i.i.us ] - %div.i.i.us = fdiv float %add8.i.i.us.lcssa, %14, !fpmath !20 - store float %div.i.i.us, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %for.end.loopexit.i.i.us, %pregion_for_entry.entry.i.i.us - %283 = add nuw nsw i64 %_local_id_x.i.0.us, 1 - %exitcond.not = icmp eq i64 %283, 256 - br i1 %exitcond.not, label %_pocl_kernel_mean_kernel.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !21 - -_pocl_kernel_mean_kernel.exit.loopexit: ; preds = %if.end.r_exit.i.i.us - br label %_pocl_kernel_mean_kernel.exit - -_pocl_kernel_mean_kernel.exit: ; preds = %_pocl_kernel_mean_kernel.exit.loopexit, %pregion_for_entry.entry.i.i.preheader - ret void -} - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>) #2 - -attributes #0 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind } -attributes #2 = { argmemonly nounwind willreturn } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 0, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE", !"int", !"int"} -!8 = !{!"float*", !"float*", !"float", !"int", !"int"} -!9 = !{!"", !"", !"", !"", !""} -!10 = !{!"mean", !"data", !"float_n", !"m", !"n"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{} -!18 = distinct !{!18, !19} -!19 = !{!"llvm.loop.unroll.disable"} -!20 = !{float 2.500000e+00} -!21 = distinct !{!21, !22} -!22 = !{!"llvm.loop.parallel_accesses", !17} diff --git a/pocl_irs/covariance_reduce.ll b/pocl_irs/covariance_reduce.ll deleted file mode 100644 index 38b9a7d..0000000 --- a/pocl_irs/covariance_reduce.ll +++ /dev/null @@ -1,4159 +0,0 @@ -; ModuleID = './NN/PIABNLMPBCCDKKLCJLPFFDMLPCEOMEAIPNGLL/reduce_kernel/32-8-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_reduce_kernel(float* nocapture readonly %0, float* nocapture %1, i32 %2, i32 %3, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %4, i64 %5, i64 %6, i64 %7) local_unnamed_addr #0 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { -pregion_for_entry.pregion_for_init.i: - %mul.i.i = shl i64 %5, 5 - %mul3.i.i = shl i64 %6, 3 - %conv2.i = trunc i64 %mul3.i.i to i32 - %cmp.i = icmp slt i32 %conv2.i, %3 - %mul.i = mul nsw i32 %conv2.i, %2 - br i1 %cmp.i, label %vector.scevcheck, label %pregion_for_end.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i - %8 = trunc i64 %6 to i32 - %9 = mul i32 %8, %2 - %10 = shl i32 %9, 3 - %11 = trunc i64 %5 to i32 - %12 = shl i32 %11, 5 - %13 = add i32 %10, %12 - %14 = icmp sgt i32 %13, 2147483616 - br i1 %14, label %pregion_for_entry.entry.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.us - -vector.memcheck: ; preds = %vector.scevcheck - %15 = trunc i64 %5 to i32 - %16 = shl i32 %15, 5 - %17 = sext i32 %16 to i64 - %scevgep = getelementptr float, float* %0, i64 %17 - %18 = add nsw i64 %17, 32 - %scevgep4 = getelementptr float, float* %0, i64 %18 - %19 = trunc i64 %6 to i32 - %20 = mul i32 %19, %2 - %21 = shl i32 %20, 3 - %22 = add i32 %21, %16 - %23 = sext i32 %22 to i64 - %scevgep6 = getelementptr float, float* %1, i64 %23 - %24 = add nsw i64 %23, 32 - %scevgep8 = getelementptr float, float* %1, i64 %24 - %bound0 = icmp ult float* %scevgep, %scevgep8 - %bound1 = icmp ult float* %scevgep6, %scevgep4 - %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %pregion_for_entry.entry.i.us.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert10 = insertelement <8 x i32> undef, i32 %2, i32 0 - %broadcast.splat11 = shufflevector <8 x i32> %broadcast.splatinsert10, <8 x i32> undef, <8 x i32> zeroinitializer - %25 = or <8 x i64> %broadcast.splat, - %26 = trunc <8 x i64> %25 to <8 x i32> - %27 = icmp sgt <8 x i32> %broadcast.splat11, %26 - %28 = extractelement <8 x i64> %25, i32 0 - %29 = shl i64 %28, 32 - %30 = ashr exact i64 %29, 32 - %31 = getelementptr inbounds float, float* %0, i64 %30 - %32 = bitcast float* %31 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %32, i32 4, <8 x i1> %27, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %33 = extractelement <8 x i32> %26, i32 0 - %34 = add nsw i32 %mul.i, %33 - %35 = sext i32 %34 to i64 - %36 = getelementptr inbounds float, float* %1, i64 %35 - %37 = bitcast float* %36 to <8 x float>* - %wide.masked.load12 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %37, i32 4, <8 x i1> %27, <8 x float> undef), !tbaa !12, !alias.scope !19 - %38 = fsub <8 x float> %wide.masked.load12, %wide.masked.load - %39 = bitcast float* %36 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %38, <8 x float>* %39, i32 4, <8 x i1> %27), !tbaa !12, !alias.scope !19, !llvm.access.group !21 - %40 = or <8 x i64> %broadcast.splat, - %41 = trunc <8 x i64> %40 to <8 x i32> - %42 = icmp sgt <8 x i32> %broadcast.splat11, %41 - %43 = extractelement <8 x i64> %40, i32 0 - %44 = shl i64 %43, 32 - %45 = ashr exact i64 %44, 32 - %46 = getelementptr inbounds float, float* %0, i64 %45 - %47 = bitcast float* %46 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %47, i32 4, <8 x i1> %42, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %48 = extractelement <8 x i32> %41, i32 0 - %49 = add nsw i32 %mul.i, %48 - %50 = sext i32 %49 to i64 - %51 = getelementptr inbounds float, float* %1, i64 %50 - %52 = bitcast float* %51 to <8 x float>* - %wide.masked.load12.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %52, i32 4, <8 x i1> %42, <8 x float> undef), !tbaa !12, !alias.scope !19 - %53 = fsub <8 x float> %wide.masked.load12.1, %wide.masked.load.1 - %54 = bitcast float* %51 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %53, <8 x float>* %54, i32 4, <8 x i1> %42), !tbaa !12, !alias.scope !19, !llvm.access.group !21 - %55 = or <8 x i64> %broadcast.splat, - %56 = trunc <8 x i64> %55 to <8 x i32> - %57 = icmp sgt <8 x i32> %broadcast.splat11, %56 - %58 = extractelement <8 x i64> %55, i32 0 - %59 = shl i64 %58, 32 - %60 = ashr exact i64 %59, 32 - %61 = getelementptr inbounds float, float* %0, i64 %60 - %62 = bitcast float* %61 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %62, i32 4, <8 x i1> %57, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %63 = extractelement <8 x i32> %56, i32 0 - %64 = add nsw i32 %mul.i, %63 - %65 = sext i32 %64 to i64 - %66 = getelementptr inbounds float, float* %1, i64 %65 - %67 = bitcast float* %66 to <8 x float>* - %wide.masked.load12.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %67, i32 4, <8 x i1> %57, <8 x float> undef), !tbaa !12, !alias.scope !19 - %68 = fsub <8 x float> %wide.masked.load12.2, %wide.masked.load.2 - %69 = bitcast float* %66 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %68, <8 x float>* %69, i32 4, <8 x i1> %57), !tbaa !12, !alias.scope !19, !llvm.access.group !21 - %70 = or <8 x i64> %broadcast.splat, - %71 = trunc <8 x i64> %70 to <8 x i32> - %72 = icmp sgt <8 x i32> %broadcast.splat11, %71 - %73 = extractelement <8 x i64> %70, i32 0 - %74 = shl i64 %73, 32 - %75 = ashr exact i64 %74, 32 - %76 = getelementptr inbounds float, float* %0, i64 %75 - %77 = bitcast float* %76 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %77, i32 4, <8 x i1> %72, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %78 = extractelement <8 x i32> %71, i32 0 - %79 = add nsw i32 %mul.i, %78 - %80 = sext i32 %79 to i64 - %81 = getelementptr inbounds float, float* %1, i64 %80 - %82 = bitcast float* %81 to <8 x float>* - %wide.masked.load12.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %82, i32 4, <8 x i1> %72, <8 x float> undef), !tbaa !12, !alias.scope !19 - %83 = fsub <8 x float> %wide.masked.load12.3, %wide.masked.load.3 - %84 = bitcast float* %81 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %83, <8 x float>* %84, i32 4, <8 x i1> %72), !tbaa !12, !alias.scope !19, !llvm.access.group !21 - br label %pregion_for_end.i - -pregion_for_entry.entry.i.us: ; preds = %if.end.r_exit.i.us.1279, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ 0, %pregion_for_entry.entry.i.us.preheader ], [ %650, %if.end.r_exit.i.us.1279 ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp4.i.us = icmp slt i32 %conv.i.us, %2 - br i1 %cmp4.i.us, label %if.then.i.us, label %if.end.r_exit.i.us - -if.then.i.us: ; preds = %pregion_for_entry.entry.i.us - %sext.i.us = shl i64 %add1.i.i.us, 32 - %idxprom.i.us = ashr exact i64 %sext.i.us, 32 - %arrayidx.i.us = getelementptr inbounds float, float* %0, i64 %idxprom.i.us - %85 = load float, float* %arrayidx.i.us, align 4, !tbaa !12 - %add.i.us = add nsw i32 %mul.i, %conv.i.us - %idxprom6.i.us = sext i32 %add.i.us to i64 - %arrayidx7.i.us = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us - %86 = load float, float* %arrayidx7.i.us, align 4, !tbaa !12 - %sub.i.us = fsub float %86, %85 - store float %sub.i.us, float* %arrayidx7.i.us, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us - -if.end.r_exit.i.us: ; preds = %if.then.i.us, %pregion_for_entry.entry.i.us - %87 = or i64 %_local_id_x.0.us, 1 - %add1.i.i.us.1267 = add nuw nsw i64 %87, %mul.i.i - %conv.i.us.1268 = trunc i64 %add1.i.i.us.1267 to i32 - %cmp4.i.us.1269 = icmp slt i32 %conv.i.us.1268, %2 - br i1 %cmp4.i.us.1269, label %if.then.i.us.1278, label %if.end.r_exit.i.us.1279 - -pregion_for_end.i.loopexit: ; preds = %if.end.r_exit.i.us.1279 - br label %pregion_for_end.i - -pregion_for_end.i: ; preds = %pregion_for_end.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i - %88 = trunc i64 %mul3.i.i to i32 - %conv2.i.1 = or i32 %88, 1 - %cmp.i.1 = icmp slt i32 %conv2.i.1, %3 - %mul.i.1 = mul nsw i32 %conv2.i.1, %2 - br i1 %cmp.i.1, label %vector.scevcheck20, label %pregion_for_end.i.1 - -vector.scevcheck20: ; preds = %pregion_for_end.i - %89 = mul i32 %conv2.i.1, %2 - %90 = trunc i64 %5 to i32 - %91 = shl i32 %90, 5 - %92 = add i32 %89, %91 - %93 = icmp sgt i32 %92, 2147483616 - br i1 %93, label %pregion_for_entry.entry.i.us.1.preheader, label %vector.memcheck34 - -pregion_for_entry.entry.i.us.1.preheader: ; preds = %vector.memcheck34, %vector.scevcheck20 - br label %pregion_for_entry.entry.i.us.1 - -vector.memcheck34: ; preds = %vector.scevcheck20 - %94 = trunc i64 %5 to i32 - %95 = shl i32 %94, 5 - %96 = sext i32 %95 to i64 - %scevgep22 = getelementptr float, float* %0, i64 %96 - %97 = add nsw i64 %96, 32 - %scevgep24 = getelementptr float, float* %0, i64 %97 - %98 = mul i32 %conv2.i.1, %2 - %99 = add i32 %98, %95 - %100 = sext i32 %99 to i64 - %scevgep26 = getelementptr float, float* %1, i64 %100 - %101 = add nsw i64 %100, 32 - %scevgep28 = getelementptr float, float* %1, i64 %101 - %bound030 = icmp ult float* %scevgep22, %scevgep28 - %bound131 = icmp ult float* %scevgep26, %scevgep24 - %found.conflict32 = and i1 %bound030, %bound131 - br i1 %found.conflict32, label %pregion_for_entry.entry.i.us.1.preheader, label %vector.ph35 - -vector.ph35: ; preds = %vector.memcheck34 - %broadcast.splatinsert42 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat43 = shufflevector <8 x i64> %broadcast.splatinsert42, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert44 = insertelement <8 x i32> undef, i32 %2, i32 0 - %broadcast.splat45 = shufflevector <8 x i32> %broadcast.splatinsert44, <8 x i32> undef, <8 x i32> zeroinitializer - %102 = or <8 x i64> %broadcast.splat43, - %103 = trunc <8 x i64> %102 to <8 x i32> - %104 = icmp sgt <8 x i32> %broadcast.splat45, %103 - %105 = extractelement <8 x i64> %102, i32 0 - %106 = shl i64 %105, 32 - %107 = ashr exact i64 %106, 32 - %108 = getelementptr inbounds float, float* %0, i64 %107 - %109 = bitcast float* %108 to <8 x float>* - %wide.masked.load46 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %109, i32 4, <8 x i1> %104, <8 x float> undef), !tbaa !12, !alias.scope !24, !noalias !27 - %110 = extractelement <8 x i32> %103, i32 0 - %111 = add nsw i32 %mul.i.1, %110 - %112 = sext i32 %111 to i64 - %113 = getelementptr inbounds float, float* %1, i64 %112 - %114 = bitcast float* %113 to <8 x float>* - %wide.masked.load47 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %114, i32 4, <8 x i1> %104, <8 x float> undef), !tbaa !12, !alias.scope !27 - %115 = fsub <8 x float> %wide.masked.load47, %wide.masked.load46 - %116 = bitcast float* %113 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %115, <8 x float>* %116, i32 4, <8 x i1> %104), !tbaa !12, !alias.scope !27, !llvm.access.group !21 - %117 = or <8 x i64> %broadcast.splat43, - %118 = trunc <8 x i64> %117 to <8 x i32> - %119 = icmp sgt <8 x i32> %broadcast.splat45, %118 - %120 = extractelement <8 x i64> %117, i32 0 - %121 = shl i64 %120, 32 - %122 = ashr exact i64 %121, 32 - %123 = getelementptr inbounds float, float* %0, i64 %122 - %124 = bitcast float* %123 to <8 x float>* - %wide.masked.load46.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %124, i32 4, <8 x i1> %119, <8 x float> undef), !tbaa !12, !alias.scope !24, !noalias !27 - %125 = extractelement <8 x i32> %118, i32 0 - %126 = add nsw i32 %mul.i.1, %125 - %127 = sext i32 %126 to i64 - %128 = getelementptr inbounds float, float* %1, i64 %127 - %129 = bitcast float* %128 to <8 x float>* - %wide.masked.load47.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %129, i32 4, <8 x i1> %119, <8 x float> undef), !tbaa !12, !alias.scope !27 - %130 = fsub <8 x float> %wide.masked.load47.1, %wide.masked.load46.1 - %131 = bitcast float* %128 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %130, <8 x float>* %131, i32 4, <8 x i1> %119), !tbaa !12, !alias.scope !27, !llvm.access.group !21 - %132 = or <8 x i64> %broadcast.splat43, - %133 = trunc <8 x i64> %132 to <8 x i32> - %134 = icmp sgt <8 x i32> %broadcast.splat45, %133 - %135 = extractelement <8 x i64> %132, i32 0 - %136 = shl i64 %135, 32 - %137 = ashr exact i64 %136, 32 - %138 = getelementptr inbounds float, float* %0, i64 %137 - %139 = bitcast float* %138 to <8 x float>* - %wide.masked.load46.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %139, i32 4, <8 x i1> %134, <8 x float> undef), !tbaa !12, !alias.scope !24, !noalias !27 - %140 = extractelement <8 x i32> %133, i32 0 - %141 = add nsw i32 %mul.i.1, %140 - %142 = sext i32 %141 to i64 - %143 = getelementptr inbounds float, float* %1, i64 %142 - %144 = bitcast float* %143 to <8 x float>* - %wide.masked.load47.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %144, i32 4, <8 x i1> %134, <8 x float> undef), !tbaa !12, !alias.scope !27 - %145 = fsub <8 x float> %wide.masked.load47.2, %wide.masked.load46.2 - %146 = bitcast float* %143 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %145, <8 x float>* %146, i32 4, <8 x i1> %134), !tbaa !12, !alias.scope !27, !llvm.access.group !21 - %147 = or <8 x i64> %broadcast.splat43, - %148 = trunc <8 x i64> %147 to <8 x i32> - %149 = icmp sgt <8 x i32> %broadcast.splat45, %148 - %150 = extractelement <8 x i64> %147, i32 0 - %151 = shl i64 %150, 32 - %152 = ashr exact i64 %151, 32 - %153 = getelementptr inbounds float, float* %0, i64 %152 - %154 = bitcast float* %153 to <8 x float>* - %wide.masked.load46.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %154, i32 4, <8 x i1> %149, <8 x float> undef), !tbaa !12, !alias.scope !24, !noalias !27 - %155 = extractelement <8 x i32> %148, i32 0 - %156 = add nsw i32 %mul.i.1, %155 - %157 = sext i32 %156 to i64 - %158 = getelementptr inbounds float, float* %1, i64 %157 - %159 = bitcast float* %158 to <8 x float>* - %wide.masked.load47.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %159, i32 4, <8 x i1> %149, <8 x float> undef), !tbaa !12, !alias.scope !27 - %160 = fsub <8 x float> %wide.masked.load47.3, %wide.masked.load46.3 - %161 = bitcast float* %158 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %160, <8 x float>* %161, i32 4, <8 x i1> %149), !tbaa !12, !alias.scope !27, !llvm.access.group !21 - br label %pregion_for_end.i.1 - -pregion_for_entry.entry.i.us.1: ; preds = %if.end.r_exit.i.us.1.1, %pregion_for_entry.entry.i.us.1.preheader - %_local_id_x.0.us.1 = phi i64 [ 0, %pregion_for_entry.entry.i.us.1.preheader ], [ %647, %if.end.r_exit.i.us.1.1 ] - %add1.i.i.us.1 = add nuw nsw i64 %_local_id_x.0.us.1, %mul.i.i - %conv.i.us.1 = trunc i64 %add1.i.i.us.1 to i32 - %cmp4.i.us.1 = icmp slt i32 %conv.i.us.1, %2 - br i1 %cmp4.i.us.1, label %if.then.i.us.1, label %if.end.r_exit.i.us.1 - -if.then.i.us.1: ; preds = %pregion_for_entry.entry.i.us.1 - %sext.i.us.1 = shl i64 %add1.i.i.us.1, 32 - %idxprom.i.us.1 = ashr exact i64 %sext.i.us.1, 32 - %arrayidx.i.us.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.1 - %162 = load float, float* %arrayidx.i.us.1, align 4, !tbaa !12 - %add.i.us.1 = add nsw i32 %mul.i.1, %conv.i.us.1 - %idxprom6.i.us.1 = sext i32 %add.i.us.1 to i64 - %arrayidx7.i.us.1 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.1 - %163 = load float, float* %arrayidx7.i.us.1, align 4, !tbaa !12 - %sub.i.us.1 = fsub float %163, %162 - store float %sub.i.us.1, float* %arrayidx7.i.us.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.1 - -if.end.r_exit.i.us.1: ; preds = %if.then.i.us.1, %pregion_for_entry.entry.i.us.1 - %164 = or i64 %_local_id_x.0.us.1, 1 - %add1.i.i.us.1.1 = add nuw nsw i64 %164, %mul.i.i - %conv.i.us.1.1 = trunc i64 %add1.i.i.us.1.1 to i32 - %cmp4.i.us.1.1 = icmp slt i32 %conv.i.us.1.1, %2 - br i1 %cmp4.i.us.1.1, label %if.then.i.us.1.1, label %if.end.r_exit.i.us.1.1 - -pregion_for_end.i.1.loopexit: ; preds = %if.end.r_exit.i.us.1.1 - br label %pregion_for_end.i.1 - -pregion_for_end.i.1: ; preds = %pregion_for_end.i.1.loopexit, %vector.ph35, %pregion_for_end.i - %165 = trunc i64 %mul3.i.i to i32 - %conv2.i.2 = or i32 %165, 2 - %cmp.i.2 = icmp slt i32 %conv2.i.2, %3 - %mul.i.2 = mul nsw i32 %conv2.i.2, %2 - br i1 %cmp.i.2, label %vector.scevcheck55, label %pregion_for_end.i.2 - -vector.scevcheck55: ; preds = %pregion_for_end.i.1 - %166 = mul i32 %conv2.i.2, %2 - %167 = trunc i64 %5 to i32 - %168 = shl i32 %167, 5 - %169 = add i32 %166, %168 - %170 = icmp sgt i32 %169, 2147483616 - br i1 %170, label %pregion_for_entry.entry.i.us.2.preheader, label %vector.memcheck69 - -pregion_for_entry.entry.i.us.2.preheader: ; preds = %vector.memcheck69, %vector.scevcheck55 - br label %pregion_for_entry.entry.i.us.2 - -vector.memcheck69: ; preds = %vector.scevcheck55 - %171 = trunc i64 %5 to i32 - %172 = shl i32 %171, 5 - %173 = sext i32 %172 to i64 - %scevgep57 = getelementptr float, float* %0, i64 %173 - %174 = add nsw i64 %173, 32 - %scevgep59 = getelementptr float, float* %0, i64 %174 - %175 = mul i32 %conv2.i.2, %2 - %176 = add i32 %175, %172 - %177 = sext i32 %176 to i64 - %scevgep61 = getelementptr float, float* %1, i64 %177 - %178 = add nsw i64 %177, 32 - %scevgep63 = getelementptr float, float* %1, i64 %178 - %bound065 = icmp ult float* %scevgep57, %scevgep63 - %bound166 = icmp ult float* %scevgep61, %scevgep59 - %found.conflict67 = and i1 %bound065, %bound166 - br i1 %found.conflict67, label %pregion_for_entry.entry.i.us.2.preheader, label %vector.ph70 - -vector.ph70: ; preds = %vector.memcheck69 - %broadcast.splatinsert77 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat78 = shufflevector <8 x i64> %broadcast.splatinsert77, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert79 = insertelement <8 x i32> undef, i32 %2, i32 0 - %broadcast.splat80 = shufflevector <8 x i32> %broadcast.splatinsert79, <8 x i32> undef, <8 x i32> zeroinitializer - %179 = or <8 x i64> %broadcast.splat78, - %180 = trunc <8 x i64> %179 to <8 x i32> - %181 = icmp sgt <8 x i32> %broadcast.splat80, %180 - %182 = extractelement <8 x i64> %179, i32 0 - %183 = shl i64 %182, 32 - %184 = ashr exact i64 %183, 32 - %185 = getelementptr inbounds float, float* %0, i64 %184 - %186 = bitcast float* %185 to <8 x float>* - %wide.masked.load81 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %186, i32 4, <8 x i1> %181, <8 x float> undef), !tbaa !12, !alias.scope !29, !noalias !32 - %187 = extractelement <8 x i32> %180, i32 0 - %188 = add nsw i32 %mul.i.2, %187 - %189 = sext i32 %188 to i64 - %190 = getelementptr inbounds float, float* %1, i64 %189 - %191 = bitcast float* %190 to <8 x float>* - %wide.masked.load82 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %191, i32 4, <8 x i1> %181, <8 x float> undef), !tbaa !12, !alias.scope !32 - %192 = fsub <8 x float> %wide.masked.load82, %wide.masked.load81 - %193 = bitcast float* %190 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %192, <8 x float>* %193, i32 4, <8 x i1> %181), !tbaa !12, !alias.scope !32, !llvm.access.group !21 - %194 = or <8 x i64> %broadcast.splat78, - %195 = trunc <8 x i64> %194 to <8 x i32> - %196 = icmp sgt <8 x i32> %broadcast.splat80, %195 - %197 = extractelement <8 x i64> %194, i32 0 - %198 = shl i64 %197, 32 - %199 = ashr exact i64 %198, 32 - %200 = getelementptr inbounds float, float* %0, i64 %199 - %201 = bitcast float* %200 to <8 x float>* - %wide.masked.load81.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %201, i32 4, <8 x i1> %196, <8 x float> undef), !tbaa !12, !alias.scope !29, !noalias !32 - %202 = extractelement <8 x i32> %195, i32 0 - %203 = add nsw i32 %mul.i.2, %202 - %204 = sext i32 %203 to i64 - %205 = getelementptr inbounds float, float* %1, i64 %204 - %206 = bitcast float* %205 to <8 x float>* - %wide.masked.load82.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %206, i32 4, <8 x i1> %196, <8 x float> undef), !tbaa !12, !alias.scope !32 - %207 = fsub <8 x float> %wide.masked.load82.1, %wide.masked.load81.1 - %208 = bitcast float* %205 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %207, <8 x float>* %208, i32 4, <8 x i1> %196), !tbaa !12, !alias.scope !32, !llvm.access.group !21 - %209 = or <8 x i64> %broadcast.splat78, - %210 = trunc <8 x i64> %209 to <8 x i32> - %211 = icmp sgt <8 x i32> %broadcast.splat80, %210 - %212 = extractelement <8 x i64> %209, i32 0 - %213 = shl i64 %212, 32 - %214 = ashr exact i64 %213, 32 - %215 = getelementptr inbounds float, float* %0, i64 %214 - %216 = bitcast float* %215 to <8 x float>* - %wide.masked.load81.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %216, i32 4, <8 x i1> %211, <8 x float> undef), !tbaa !12, !alias.scope !29, !noalias !32 - %217 = extractelement <8 x i32> %210, i32 0 - %218 = add nsw i32 %mul.i.2, %217 - %219 = sext i32 %218 to i64 - %220 = getelementptr inbounds float, float* %1, i64 %219 - %221 = bitcast float* %220 to <8 x float>* - %wide.masked.load82.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %221, i32 4, <8 x i1> %211, <8 x float> undef), !tbaa !12, !alias.scope !32 - %222 = fsub <8 x float> %wide.masked.load82.2, %wide.masked.load81.2 - %223 = bitcast float* %220 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %222, <8 x float>* %223, i32 4, <8 x i1> %211), !tbaa !12, !alias.scope !32, !llvm.access.group !21 - %224 = or <8 x i64> %broadcast.splat78, - %225 = trunc <8 x i64> %224 to <8 x i32> - %226 = icmp sgt <8 x i32> %broadcast.splat80, %225 - %227 = extractelement <8 x i64> %224, i32 0 - %228 = shl i64 %227, 32 - %229 = ashr exact i64 %228, 32 - %230 = getelementptr inbounds float, float* %0, i64 %229 - %231 = bitcast float* %230 to <8 x float>* - %wide.masked.load81.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %231, i32 4, <8 x i1> %226, <8 x float> undef), !tbaa !12, !alias.scope !29, !noalias !32 - %232 = extractelement <8 x i32> %225, i32 0 - %233 = add nsw i32 %mul.i.2, %232 - %234 = sext i32 %233 to i64 - %235 = getelementptr inbounds float, float* %1, i64 %234 - %236 = bitcast float* %235 to <8 x float>* - %wide.masked.load82.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %236, i32 4, <8 x i1> %226, <8 x float> undef), !tbaa !12, !alias.scope !32 - %237 = fsub <8 x float> %wide.masked.load82.3, %wide.masked.load81.3 - %238 = bitcast float* %235 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %237, <8 x float>* %238, i32 4, <8 x i1> %226), !tbaa !12, !alias.scope !32, !llvm.access.group !21 - br label %pregion_for_end.i.2 - -pregion_for_entry.entry.i.us.2: ; preds = %if.end.r_exit.i.us.2.1, %pregion_for_entry.entry.i.us.2.preheader - %_local_id_x.0.us.2 = phi i64 [ 0, %pregion_for_entry.entry.i.us.2.preheader ], [ %644, %if.end.r_exit.i.us.2.1 ] - %add1.i.i.us.2 = add nuw nsw i64 %_local_id_x.0.us.2, %mul.i.i - %conv.i.us.2 = trunc i64 %add1.i.i.us.2 to i32 - %cmp4.i.us.2 = icmp slt i32 %conv.i.us.2, %2 - br i1 %cmp4.i.us.2, label %if.then.i.us.2, label %if.end.r_exit.i.us.2 - -if.then.i.us.2: ; preds = %pregion_for_entry.entry.i.us.2 - %sext.i.us.2 = shl i64 %add1.i.i.us.2, 32 - %idxprom.i.us.2 = ashr exact i64 %sext.i.us.2, 32 - %arrayidx.i.us.2 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.2 - %239 = load float, float* %arrayidx.i.us.2, align 4, !tbaa !12 - %add.i.us.2 = add nsw i32 %mul.i.2, %conv.i.us.2 - %idxprom6.i.us.2 = sext i32 %add.i.us.2 to i64 - %arrayidx7.i.us.2 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.2 - %240 = load float, float* %arrayidx7.i.us.2, align 4, !tbaa !12 - %sub.i.us.2 = fsub float %240, %239 - store float %sub.i.us.2, float* %arrayidx7.i.us.2, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.2 - -if.end.r_exit.i.us.2: ; preds = %if.then.i.us.2, %pregion_for_entry.entry.i.us.2 - %241 = or i64 %_local_id_x.0.us.2, 1 - %add1.i.i.us.2.1 = add nuw nsw i64 %241, %mul.i.i - %conv.i.us.2.1 = trunc i64 %add1.i.i.us.2.1 to i32 - %cmp4.i.us.2.1 = icmp slt i32 %conv.i.us.2.1, %2 - br i1 %cmp4.i.us.2.1, label %if.then.i.us.2.1, label %if.end.r_exit.i.us.2.1 - -pregion_for_end.i.2.loopexit: ; preds = %if.end.r_exit.i.us.2.1 - br label %pregion_for_end.i.2 - -pregion_for_end.i.2: ; preds = %pregion_for_end.i.2.loopexit, %vector.ph70, %pregion_for_end.i.1 - %242 = trunc i64 %mul3.i.i to i32 - %conv2.i.3 = or i32 %242, 3 - %cmp.i.3 = icmp slt i32 %conv2.i.3, %3 - %mul.i.3 = mul nsw i32 %conv2.i.3, %2 - br i1 %cmp.i.3, label %vector.scevcheck90, label %pregion_for_end.i.3 - -vector.scevcheck90: ; preds = %pregion_for_end.i.2 - %243 = mul i32 %conv2.i.3, %2 - %244 = trunc i64 %5 to i32 - %245 = shl i32 %244, 5 - %246 = add i32 %243, %245 - %247 = icmp sgt i32 %246, 2147483616 - br i1 %247, label %pregion_for_entry.entry.i.us.3.preheader, label %vector.memcheck104 - -pregion_for_entry.entry.i.us.3.preheader: ; preds = %vector.memcheck104, %vector.scevcheck90 - br label %pregion_for_entry.entry.i.us.3 - -vector.memcheck104: ; preds = %vector.scevcheck90 - %248 = trunc i64 %5 to i32 - %249 = shl i32 %248, 5 - %250 = sext i32 %249 to i64 - %scevgep92 = getelementptr float, float* %0, i64 %250 - %251 = add nsw i64 %250, 32 - %scevgep94 = getelementptr float, float* %0, i64 %251 - %252 = mul i32 %conv2.i.3, %2 - %253 = add i32 %252, %249 - %254 = sext i32 %253 to i64 - %scevgep96 = getelementptr float, float* %1, i64 %254 - %255 = add nsw i64 %254, 32 - %scevgep98 = getelementptr float, float* %1, i64 %255 - %bound0100 = icmp ult float* %scevgep92, %scevgep98 - %bound1101 = icmp ult float* %scevgep96, %scevgep94 - %found.conflict102 = and i1 %bound0100, %bound1101 - br i1 %found.conflict102, label %pregion_for_entry.entry.i.us.3.preheader, label %vector.ph105 - -vector.ph105: ; preds = %vector.memcheck104 - %broadcast.splatinsert112 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat113 = shufflevector <8 x i64> %broadcast.splatinsert112, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert114 = insertelement <8 x i32> undef, i32 %2, i32 0 - %broadcast.splat115 = shufflevector <8 x i32> %broadcast.splatinsert114, <8 x i32> undef, <8 x i32> zeroinitializer - %256 = or <8 x i64> %broadcast.splat113, - %257 = trunc <8 x i64> %256 to <8 x i32> - %258 = icmp sgt <8 x i32> %broadcast.splat115, %257 - %259 = extractelement <8 x i64> %256, i32 0 - %260 = shl i64 %259, 32 - %261 = ashr exact i64 %260, 32 - %262 = getelementptr inbounds float, float* %0, i64 %261 - %263 = bitcast float* %262 to <8 x float>* - %wide.masked.load116 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %263, i32 4, <8 x i1> %258, <8 x float> undef), !tbaa !12, !alias.scope !34, !noalias !37 - %264 = extractelement <8 x i32> %257, i32 0 - %265 = add nsw i32 %mul.i.3, %264 - %266 = sext i32 %265 to i64 - %267 = getelementptr inbounds float, float* %1, i64 %266 - %268 = bitcast float* %267 to <8 x float>* - %wide.masked.load117 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %268, i32 4, <8 x i1> %258, <8 x float> undef), !tbaa !12, !alias.scope !37 - %269 = fsub <8 x float> %wide.masked.load117, %wide.masked.load116 - %270 = bitcast float* %267 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %269, <8 x float>* %270, i32 4, <8 x i1> %258), !tbaa !12, !alias.scope !37, !llvm.access.group !21 - %271 = or <8 x i64> %broadcast.splat113, - %272 = trunc <8 x i64> %271 to <8 x i32> - %273 = icmp sgt <8 x i32> %broadcast.splat115, %272 - %274 = extractelement <8 x i64> %271, i32 0 - %275 = shl i64 %274, 32 - %276 = ashr exact i64 %275, 32 - %277 = getelementptr inbounds float, float* %0, i64 %276 - %278 = bitcast float* %277 to <8 x float>* - %wide.masked.load116.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %278, i32 4, <8 x i1> %273, <8 x float> undef), !tbaa !12, !alias.scope !34, !noalias !37 - %279 = extractelement <8 x i32> %272, i32 0 - %280 = add nsw i32 %mul.i.3, %279 - %281 = sext i32 %280 to i64 - %282 = getelementptr inbounds float, float* %1, i64 %281 - %283 = bitcast float* %282 to <8 x float>* - %wide.masked.load117.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %283, i32 4, <8 x i1> %273, <8 x float> undef), !tbaa !12, !alias.scope !37 - %284 = fsub <8 x float> %wide.masked.load117.1, %wide.masked.load116.1 - %285 = bitcast float* %282 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %284, <8 x float>* %285, i32 4, <8 x i1> %273), !tbaa !12, !alias.scope !37, !llvm.access.group !21 - %286 = or <8 x i64> %broadcast.splat113, - %287 = trunc <8 x i64> %286 to <8 x i32> - %288 = icmp sgt <8 x i32> %broadcast.splat115, %287 - %289 = extractelement <8 x i64> %286, i32 0 - %290 = shl i64 %289, 32 - %291 = ashr exact i64 %290, 32 - %292 = getelementptr inbounds float, float* %0, i64 %291 - %293 = bitcast float* %292 to <8 x float>* - %wide.masked.load116.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %293, i32 4, <8 x i1> %288, <8 x float> undef), !tbaa !12, !alias.scope !34, !noalias !37 - %294 = extractelement <8 x i32> %287, i32 0 - %295 = add nsw i32 %mul.i.3, %294 - %296 = sext i32 %295 to i64 - %297 = getelementptr inbounds float, float* %1, i64 %296 - %298 = bitcast float* %297 to <8 x float>* - %wide.masked.load117.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %298, i32 4, <8 x i1> %288, <8 x float> undef), !tbaa !12, !alias.scope !37 - %299 = fsub <8 x float> %wide.masked.load117.2, %wide.masked.load116.2 - %300 = bitcast float* %297 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %299, <8 x float>* %300, i32 4, <8 x i1> %288), !tbaa !12, !alias.scope !37, !llvm.access.group !21 - %301 = or <8 x i64> %broadcast.splat113, - %302 = trunc <8 x i64> %301 to <8 x i32> - %303 = icmp sgt <8 x i32> %broadcast.splat115, %302 - %304 = extractelement <8 x i64> %301, i32 0 - %305 = shl i64 %304, 32 - %306 = ashr exact i64 %305, 32 - %307 = getelementptr inbounds float, float* %0, i64 %306 - %308 = bitcast float* %307 to <8 x float>* - %wide.masked.load116.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %308, i32 4, <8 x i1> %303, <8 x float> undef), !tbaa !12, !alias.scope !34, !noalias !37 - %309 = extractelement <8 x i32> %302, i32 0 - %310 = add nsw i32 %mul.i.3, %309 - %311 = sext i32 %310 to i64 - %312 = getelementptr inbounds float, float* %1, i64 %311 - %313 = bitcast float* %312 to <8 x float>* - %wide.masked.load117.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %313, i32 4, <8 x i1> %303, <8 x float> undef), !tbaa !12, !alias.scope !37 - %314 = fsub <8 x float> %wide.masked.load117.3, %wide.masked.load116.3 - %315 = bitcast float* %312 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %314, <8 x float>* %315, i32 4, <8 x i1> %303), !tbaa !12, !alias.scope !37, !llvm.access.group !21 - br label %pregion_for_end.i.3 - -pregion_for_entry.entry.i.us.3: ; preds = %if.end.r_exit.i.us.3.1, %pregion_for_entry.entry.i.us.3.preheader - %_local_id_x.0.us.3 = phi i64 [ 0, %pregion_for_entry.entry.i.us.3.preheader ], [ %641, %if.end.r_exit.i.us.3.1 ] - %add1.i.i.us.3 = add nuw nsw i64 %_local_id_x.0.us.3, %mul.i.i - %conv.i.us.3 = trunc i64 %add1.i.i.us.3 to i32 - %cmp4.i.us.3 = icmp slt i32 %conv.i.us.3, %2 - br i1 %cmp4.i.us.3, label %if.then.i.us.3, label %if.end.r_exit.i.us.3 - -if.then.i.us.3: ; preds = %pregion_for_entry.entry.i.us.3 - %sext.i.us.3 = shl i64 %add1.i.i.us.3, 32 - %idxprom.i.us.3 = ashr exact i64 %sext.i.us.3, 32 - %arrayidx.i.us.3 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.3 - %316 = load float, float* %arrayidx.i.us.3, align 4, !tbaa !12 - %add.i.us.3 = add nsw i32 %mul.i.3, %conv.i.us.3 - %idxprom6.i.us.3 = sext i32 %add.i.us.3 to i64 - %arrayidx7.i.us.3 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.3 - %317 = load float, float* %arrayidx7.i.us.3, align 4, !tbaa !12 - %sub.i.us.3 = fsub float %317, %316 - store float %sub.i.us.3, float* %arrayidx7.i.us.3, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.3 - -if.end.r_exit.i.us.3: ; preds = %if.then.i.us.3, %pregion_for_entry.entry.i.us.3 - %318 = or i64 %_local_id_x.0.us.3, 1 - %add1.i.i.us.3.1 = add nuw nsw i64 %318, %mul.i.i - %conv.i.us.3.1 = trunc i64 %add1.i.i.us.3.1 to i32 - %cmp4.i.us.3.1 = icmp slt i32 %conv.i.us.3.1, %2 - br i1 %cmp4.i.us.3.1, label %if.then.i.us.3.1, label %if.end.r_exit.i.us.3.1 - -pregion_for_end.i.3.loopexit: ; preds = %if.end.r_exit.i.us.3.1 - br label %pregion_for_end.i.3 - -pregion_for_end.i.3: ; preds = %pregion_for_end.i.3.loopexit, %vector.ph105, %pregion_for_end.i.2 - %319 = trunc i64 %mul3.i.i to i32 - %conv2.i.4 = or i32 %319, 4 - %cmp.i.4 = icmp slt i32 %conv2.i.4, %3 - %mul.i.4 = mul nsw i32 %conv2.i.4, %2 - br i1 %cmp.i.4, label %vector.scevcheck125, label %pregion_for_end.i.4 - -vector.scevcheck125: ; preds = %pregion_for_end.i.3 - %320 = mul i32 %conv2.i.4, %2 - %321 = trunc i64 %5 to i32 - %322 = shl i32 %321, 5 - %323 = add i32 %320, %322 - %324 = icmp sgt i32 %323, 2147483616 - br i1 %324, label %pregion_for_entry.entry.i.us.4.preheader, label %vector.memcheck139 - -pregion_for_entry.entry.i.us.4.preheader: ; preds = %vector.memcheck139, %vector.scevcheck125 - br label %pregion_for_entry.entry.i.us.4 - -vector.memcheck139: ; preds = %vector.scevcheck125 - %325 = trunc i64 %5 to i32 - %326 = shl i32 %325, 5 - %327 = sext i32 %326 to i64 - %scevgep127 = getelementptr float, float* %0, i64 %327 - %328 = add nsw i64 %327, 32 - %scevgep129 = getelementptr float, float* %0, i64 %328 - %329 = mul i32 %conv2.i.4, %2 - %330 = add i32 %329, %326 - %331 = sext i32 %330 to i64 - %scevgep131 = getelementptr float, float* %1, i64 %331 - %332 = add nsw i64 %331, 32 - %scevgep133 = getelementptr float, float* %1, i64 %332 - %bound0135 = icmp ult float* %scevgep127, %scevgep133 - %bound1136 = icmp ult float* %scevgep131, %scevgep129 - %found.conflict137 = and i1 %bound0135, %bound1136 - br i1 %found.conflict137, label %pregion_for_entry.entry.i.us.4.preheader, label %vector.ph140 - -vector.ph140: ; preds = %vector.memcheck139 - %broadcast.splatinsert147 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat148 = shufflevector <8 x i64> %broadcast.splatinsert147, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert149 = insertelement <8 x i32> undef, i32 %2, i32 0 - %broadcast.splat150 = shufflevector <8 x i32> %broadcast.splatinsert149, <8 x i32> undef, <8 x i32> zeroinitializer - %333 = or <8 x i64> %broadcast.splat148, - %334 = trunc <8 x i64> %333 to <8 x i32> - %335 = icmp sgt <8 x i32> %broadcast.splat150, %334 - %336 = extractelement <8 x i64> %333, i32 0 - %337 = shl i64 %336, 32 - %338 = ashr exact i64 %337, 32 - %339 = getelementptr inbounds float, float* %0, i64 %338 - %340 = bitcast float* %339 to <8 x float>* - %wide.masked.load151 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %340, i32 4, <8 x i1> %335, <8 x float> undef), !tbaa !12, !alias.scope !39, !noalias !42 - %341 = extractelement <8 x i32> %334, i32 0 - %342 = add nsw i32 %mul.i.4, %341 - %343 = sext i32 %342 to i64 - %344 = getelementptr inbounds float, float* %1, i64 %343 - %345 = bitcast float* %344 to <8 x float>* - %wide.masked.load152 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %345, i32 4, <8 x i1> %335, <8 x float> undef), !tbaa !12, !alias.scope !42 - %346 = fsub <8 x float> %wide.masked.load152, %wide.masked.load151 - %347 = bitcast float* %344 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %346, <8 x float>* %347, i32 4, <8 x i1> %335), !tbaa !12, !alias.scope !42, !llvm.access.group !21 - %348 = or <8 x i64> %broadcast.splat148, - %349 = trunc <8 x i64> %348 to <8 x i32> - %350 = icmp sgt <8 x i32> %broadcast.splat150, %349 - %351 = extractelement <8 x i64> %348, i32 0 - %352 = shl i64 %351, 32 - %353 = ashr exact i64 %352, 32 - %354 = getelementptr inbounds float, float* %0, i64 %353 - %355 = bitcast float* %354 to <8 x float>* - %wide.masked.load151.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %355, i32 4, <8 x i1> %350, <8 x float> undef), !tbaa !12, !alias.scope !39, !noalias !42 - %356 = extractelement <8 x i32> %349, i32 0 - %357 = add nsw i32 %mul.i.4, %356 - %358 = sext i32 %357 to i64 - %359 = getelementptr inbounds float, float* %1, i64 %358 - %360 = bitcast float* %359 to <8 x float>* - %wide.masked.load152.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %360, i32 4, <8 x i1> %350, <8 x float> undef), !tbaa !12, !alias.scope !42 - %361 = fsub <8 x float> %wide.masked.load152.1, %wide.masked.load151.1 - %362 = bitcast float* %359 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %361, <8 x float>* %362, i32 4, <8 x i1> %350), !tbaa !12, !alias.scope !42, !llvm.access.group !21 - %363 = or <8 x i64> %broadcast.splat148, - %364 = trunc <8 x i64> %363 to <8 x i32> - %365 = icmp sgt <8 x i32> %broadcast.splat150, %364 - %366 = extractelement <8 x i64> %363, i32 0 - %367 = shl i64 %366, 32 - %368 = ashr exact i64 %367, 32 - %369 = getelementptr inbounds float, float* %0, i64 %368 - %370 = bitcast float* %369 to <8 x float>* - %wide.masked.load151.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %370, i32 4, <8 x i1> %365, <8 x float> undef), !tbaa !12, !alias.scope !39, !noalias !42 - %371 = extractelement <8 x i32> %364, i32 0 - %372 = add nsw i32 %mul.i.4, %371 - %373 = sext i32 %372 to i64 - %374 = getelementptr inbounds float, float* %1, i64 %373 - %375 = bitcast float* %374 to <8 x float>* - %wide.masked.load152.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %375, i32 4, <8 x i1> %365, <8 x float> undef), !tbaa !12, !alias.scope !42 - %376 = fsub <8 x float> %wide.masked.load152.2, %wide.masked.load151.2 - %377 = bitcast float* %374 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %376, <8 x float>* %377, i32 4, <8 x i1> %365), !tbaa !12, !alias.scope !42, !llvm.access.group !21 - %378 = or <8 x i64> %broadcast.splat148, - %379 = trunc <8 x i64> %378 to <8 x i32> - %380 = icmp sgt <8 x i32> %broadcast.splat150, %379 - %381 = extractelement <8 x i64> %378, i32 0 - %382 = shl i64 %381, 32 - %383 = ashr exact i64 %382, 32 - %384 = getelementptr inbounds float, float* %0, i64 %383 - %385 = bitcast float* %384 to <8 x float>* - %wide.masked.load151.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %385, i32 4, <8 x i1> %380, <8 x float> undef), !tbaa !12, !alias.scope !39, !noalias !42 - %386 = extractelement <8 x i32> %379, i32 0 - %387 = add nsw i32 %mul.i.4, %386 - %388 = sext i32 %387 to i64 - %389 = getelementptr inbounds float, float* %1, i64 %388 - %390 = bitcast float* %389 to <8 x float>* - %wide.masked.load152.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %390, i32 4, <8 x i1> %380, <8 x float> undef), !tbaa !12, !alias.scope !42 - %391 = fsub <8 x float> %wide.masked.load152.3, %wide.masked.load151.3 - %392 = bitcast float* %389 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %391, <8 x float>* %392, i32 4, <8 x i1> %380), !tbaa !12, !alias.scope !42, !llvm.access.group !21 - br label %pregion_for_end.i.4 - -pregion_for_entry.entry.i.us.4: ; preds = %if.end.r_exit.i.us.4.1, %pregion_for_entry.entry.i.us.4.preheader - %_local_id_x.0.us.4 = phi i64 [ 0, %pregion_for_entry.entry.i.us.4.preheader ], [ %638, %if.end.r_exit.i.us.4.1 ] - %add1.i.i.us.4 = add nuw nsw i64 %_local_id_x.0.us.4, %mul.i.i - %conv.i.us.4 = trunc i64 %add1.i.i.us.4 to i32 - %cmp4.i.us.4 = icmp slt i32 %conv.i.us.4, %2 - br i1 %cmp4.i.us.4, label %if.then.i.us.4, label %if.end.r_exit.i.us.4 - -if.then.i.us.4: ; preds = %pregion_for_entry.entry.i.us.4 - %sext.i.us.4 = shl i64 %add1.i.i.us.4, 32 - %idxprom.i.us.4 = ashr exact i64 %sext.i.us.4, 32 - %arrayidx.i.us.4 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.4 - %393 = load float, float* %arrayidx.i.us.4, align 4, !tbaa !12 - %add.i.us.4 = add nsw i32 %mul.i.4, %conv.i.us.4 - %idxprom6.i.us.4 = sext i32 %add.i.us.4 to i64 - %arrayidx7.i.us.4 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.4 - %394 = load float, float* %arrayidx7.i.us.4, align 4, !tbaa !12 - %sub.i.us.4 = fsub float %394, %393 - store float %sub.i.us.4, float* %arrayidx7.i.us.4, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.4 - -if.end.r_exit.i.us.4: ; preds = %if.then.i.us.4, %pregion_for_entry.entry.i.us.4 - %395 = or i64 %_local_id_x.0.us.4, 1 - %add1.i.i.us.4.1 = add nuw nsw i64 %395, %mul.i.i - %conv.i.us.4.1 = trunc i64 %add1.i.i.us.4.1 to i32 - %cmp4.i.us.4.1 = icmp slt i32 %conv.i.us.4.1, %2 - br i1 %cmp4.i.us.4.1, label %if.then.i.us.4.1, label %if.end.r_exit.i.us.4.1 - -pregion_for_end.i.4.loopexit: ; preds = %if.end.r_exit.i.us.4.1 - br label %pregion_for_end.i.4 - -pregion_for_end.i.4: ; preds = %pregion_for_end.i.4.loopexit, %vector.ph140, %pregion_for_end.i.3 - %396 = trunc i64 %mul3.i.i to i32 - %conv2.i.5 = or i32 %396, 5 - %cmp.i.5 = icmp slt i32 %conv2.i.5, %3 - %mul.i.5 = mul nsw i32 %conv2.i.5, %2 - br i1 %cmp.i.5, label %vector.scevcheck160, label %pregion_for_end.i.5 - -vector.scevcheck160: ; preds = %pregion_for_end.i.4 - %397 = mul i32 %conv2.i.5, %2 - %398 = trunc i64 %5 to i32 - %399 = shl i32 %398, 5 - %400 = add i32 %397, %399 - %401 = icmp sgt i32 %400, 2147483616 - br i1 %401, label %pregion_for_entry.entry.i.us.5.preheader, label %vector.memcheck174 - -pregion_for_entry.entry.i.us.5.preheader: ; preds = %vector.memcheck174, %vector.scevcheck160 - br label %pregion_for_entry.entry.i.us.5 - -vector.memcheck174: ; preds = %vector.scevcheck160 - %402 = trunc i64 %5 to i32 - %403 = shl i32 %402, 5 - %404 = sext i32 %403 to i64 - %scevgep162 = getelementptr float, float* %0, i64 %404 - %405 = add nsw i64 %404, 32 - %scevgep164 = getelementptr float, float* %0, i64 %405 - %406 = mul i32 %conv2.i.5, %2 - %407 = add i32 %406, %403 - %408 = sext i32 %407 to i64 - %scevgep166 = getelementptr float, float* %1, i64 %408 - %409 = add nsw i64 %408, 32 - %scevgep168 = getelementptr float, float* %1, i64 %409 - %bound0170 = icmp ult float* %scevgep162, %scevgep168 - %bound1171 = icmp ult float* %scevgep166, %scevgep164 - %found.conflict172 = and i1 %bound0170, %bound1171 - br i1 %found.conflict172, label %pregion_for_entry.entry.i.us.5.preheader, label %vector.ph175 - -vector.ph175: ; preds = %vector.memcheck174 - %broadcast.splatinsert182 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat183 = shufflevector <8 x i64> %broadcast.splatinsert182, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert184 = insertelement <8 x i32> undef, i32 %2, i32 0 - %broadcast.splat185 = shufflevector <8 x i32> %broadcast.splatinsert184, <8 x i32> undef, <8 x i32> zeroinitializer - %410 = or <8 x i64> %broadcast.splat183, - %411 = trunc <8 x i64> %410 to <8 x i32> - %412 = icmp sgt <8 x i32> %broadcast.splat185, %411 - %413 = extractelement <8 x i64> %410, i32 0 - %414 = shl i64 %413, 32 - %415 = ashr exact i64 %414, 32 - %416 = getelementptr inbounds float, float* %0, i64 %415 - %417 = bitcast float* %416 to <8 x float>* - %wide.masked.load186 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %417, i32 4, <8 x i1> %412, <8 x float> undef), !tbaa !12, !alias.scope !44, !noalias !47 - %418 = extractelement <8 x i32> %411, i32 0 - %419 = add nsw i32 %mul.i.5, %418 - %420 = sext i32 %419 to i64 - %421 = getelementptr inbounds float, float* %1, i64 %420 - %422 = bitcast float* %421 to <8 x float>* - %wide.masked.load187 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %422, i32 4, <8 x i1> %412, <8 x float> undef), !tbaa !12, !alias.scope !47 - %423 = fsub <8 x float> %wide.masked.load187, %wide.masked.load186 - %424 = bitcast float* %421 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %423, <8 x float>* %424, i32 4, <8 x i1> %412), !tbaa !12, !alias.scope !47, !llvm.access.group !21 - %425 = or <8 x i64> %broadcast.splat183, - %426 = trunc <8 x i64> %425 to <8 x i32> - %427 = icmp sgt <8 x i32> %broadcast.splat185, %426 - %428 = extractelement <8 x i64> %425, i32 0 - %429 = shl i64 %428, 32 - %430 = ashr exact i64 %429, 32 - %431 = getelementptr inbounds float, float* %0, i64 %430 - %432 = bitcast float* %431 to <8 x float>* - %wide.masked.load186.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %432, i32 4, <8 x i1> %427, <8 x float> undef), !tbaa !12, !alias.scope !44, !noalias !47 - %433 = extractelement <8 x i32> %426, i32 0 - %434 = add nsw i32 %mul.i.5, %433 - %435 = sext i32 %434 to i64 - %436 = getelementptr inbounds float, float* %1, i64 %435 - %437 = bitcast float* %436 to <8 x float>* - %wide.masked.load187.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %437, i32 4, <8 x i1> %427, <8 x float> undef), !tbaa !12, !alias.scope !47 - %438 = fsub <8 x float> %wide.masked.load187.1, %wide.masked.load186.1 - %439 = bitcast float* %436 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %438, <8 x float>* %439, i32 4, <8 x i1> %427), !tbaa !12, !alias.scope !47, !llvm.access.group !21 - %440 = or <8 x i64> %broadcast.splat183, - %441 = trunc <8 x i64> %440 to <8 x i32> - %442 = icmp sgt <8 x i32> %broadcast.splat185, %441 - %443 = extractelement <8 x i64> %440, i32 0 - %444 = shl i64 %443, 32 - %445 = ashr exact i64 %444, 32 - %446 = getelementptr inbounds float, float* %0, i64 %445 - %447 = bitcast float* %446 to <8 x float>* - %wide.masked.load186.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %447, i32 4, <8 x i1> %442, <8 x float> undef), !tbaa !12, !alias.scope !44, !noalias !47 - %448 = extractelement <8 x i32> %441, i32 0 - %449 = add nsw i32 %mul.i.5, %448 - %450 = sext i32 %449 to i64 - %451 = getelementptr inbounds float, float* %1, i64 %450 - %452 = bitcast float* %451 to <8 x float>* - %wide.masked.load187.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %452, i32 4, <8 x i1> %442, <8 x float> undef), !tbaa !12, !alias.scope !47 - %453 = fsub <8 x float> %wide.masked.load187.2, %wide.masked.load186.2 - %454 = bitcast float* %451 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %453, <8 x float>* %454, i32 4, <8 x i1> %442), !tbaa !12, !alias.scope !47, !llvm.access.group !21 - %455 = or <8 x i64> %broadcast.splat183, - %456 = trunc <8 x i64> %455 to <8 x i32> - %457 = icmp sgt <8 x i32> %broadcast.splat185, %456 - %458 = extractelement <8 x i64> %455, i32 0 - %459 = shl i64 %458, 32 - %460 = ashr exact i64 %459, 32 - %461 = getelementptr inbounds float, float* %0, i64 %460 - %462 = bitcast float* %461 to <8 x float>* - %wide.masked.load186.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %462, i32 4, <8 x i1> %457, <8 x float> undef), !tbaa !12, !alias.scope !44, !noalias !47 - %463 = extractelement <8 x i32> %456, i32 0 - %464 = add nsw i32 %mul.i.5, %463 - %465 = sext i32 %464 to i64 - %466 = getelementptr inbounds float, float* %1, i64 %465 - %467 = bitcast float* %466 to <8 x float>* - %wide.masked.load187.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %467, i32 4, <8 x i1> %457, <8 x float> undef), !tbaa !12, !alias.scope !47 - %468 = fsub <8 x float> %wide.masked.load187.3, %wide.masked.load186.3 - %469 = bitcast float* %466 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %468, <8 x float>* %469, i32 4, <8 x i1> %457), !tbaa !12, !alias.scope !47, !llvm.access.group !21 - br label %pregion_for_end.i.5 - -pregion_for_entry.entry.i.us.5: ; preds = %if.end.r_exit.i.us.5.1, %pregion_for_entry.entry.i.us.5.preheader - %_local_id_x.0.us.5 = phi i64 [ 0, %pregion_for_entry.entry.i.us.5.preheader ], [ %635, %if.end.r_exit.i.us.5.1 ] - %add1.i.i.us.5 = add nuw nsw i64 %_local_id_x.0.us.5, %mul.i.i - %conv.i.us.5 = trunc i64 %add1.i.i.us.5 to i32 - %cmp4.i.us.5 = icmp slt i32 %conv.i.us.5, %2 - br i1 %cmp4.i.us.5, label %if.then.i.us.5, label %if.end.r_exit.i.us.5 - -if.then.i.us.5: ; preds = %pregion_for_entry.entry.i.us.5 - %sext.i.us.5 = shl i64 %add1.i.i.us.5, 32 - %idxprom.i.us.5 = ashr exact i64 %sext.i.us.5, 32 - %arrayidx.i.us.5 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.5 - %470 = load float, float* %arrayidx.i.us.5, align 4, !tbaa !12 - %add.i.us.5 = add nsw i32 %mul.i.5, %conv.i.us.5 - %idxprom6.i.us.5 = sext i32 %add.i.us.5 to i64 - %arrayidx7.i.us.5 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.5 - %471 = load float, float* %arrayidx7.i.us.5, align 4, !tbaa !12 - %sub.i.us.5 = fsub float %471, %470 - store float %sub.i.us.5, float* %arrayidx7.i.us.5, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.5 - -if.end.r_exit.i.us.5: ; preds = %if.then.i.us.5, %pregion_for_entry.entry.i.us.5 - %472 = or i64 %_local_id_x.0.us.5, 1 - %add1.i.i.us.5.1 = add nuw nsw i64 %472, %mul.i.i - %conv.i.us.5.1 = trunc i64 %add1.i.i.us.5.1 to i32 - %cmp4.i.us.5.1 = icmp slt i32 %conv.i.us.5.1, %2 - br i1 %cmp4.i.us.5.1, label %if.then.i.us.5.1, label %if.end.r_exit.i.us.5.1 - -pregion_for_end.i.5.loopexit: ; preds = %if.end.r_exit.i.us.5.1 - br label %pregion_for_end.i.5 - -pregion_for_end.i.5: ; preds = %pregion_for_end.i.5.loopexit, %vector.ph175, %pregion_for_end.i.4 - %473 = trunc i64 %mul3.i.i to i32 - %conv2.i.6 = or i32 %473, 6 - %cmp.i.6 = icmp slt i32 %conv2.i.6, %3 - %mul.i.6 = mul nsw i32 %conv2.i.6, %2 - br i1 %cmp.i.6, label %vector.scevcheck195, label %pregion_for_end.i.6 - -vector.scevcheck195: ; preds = %pregion_for_end.i.5 - %474 = mul i32 %conv2.i.6, %2 - %475 = trunc i64 %5 to i32 - %476 = shl i32 %475, 5 - %477 = add i32 %474, %476 - %478 = icmp sgt i32 %477, 2147483616 - br i1 %478, label %pregion_for_entry.entry.i.us.6.preheader, label %vector.memcheck209 - -pregion_for_entry.entry.i.us.6.preheader: ; preds = %vector.memcheck209, %vector.scevcheck195 - br label %pregion_for_entry.entry.i.us.6 - -vector.memcheck209: ; preds = %vector.scevcheck195 - %479 = trunc i64 %5 to i32 - %480 = shl i32 %479, 5 - %481 = sext i32 %480 to i64 - %scevgep197 = getelementptr float, float* %0, i64 %481 - %482 = add nsw i64 %481, 32 - %scevgep199 = getelementptr float, float* %0, i64 %482 - %483 = mul i32 %conv2.i.6, %2 - %484 = add i32 %483, %480 - %485 = sext i32 %484 to i64 - %scevgep201 = getelementptr float, float* %1, i64 %485 - %486 = add nsw i64 %485, 32 - %scevgep203 = getelementptr float, float* %1, i64 %486 - %bound0205 = icmp ult float* %scevgep197, %scevgep203 - %bound1206 = icmp ult float* %scevgep201, %scevgep199 - %found.conflict207 = and i1 %bound0205, %bound1206 - br i1 %found.conflict207, label %pregion_for_entry.entry.i.us.6.preheader, label %vector.ph210 - -vector.ph210: ; preds = %vector.memcheck209 - %broadcast.splatinsert217 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat218 = shufflevector <8 x i64> %broadcast.splatinsert217, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert219 = insertelement <8 x i32> undef, i32 %2, i32 0 - %broadcast.splat220 = shufflevector <8 x i32> %broadcast.splatinsert219, <8 x i32> undef, <8 x i32> zeroinitializer - %487 = or <8 x i64> %broadcast.splat218, - %488 = trunc <8 x i64> %487 to <8 x i32> - %489 = icmp sgt <8 x i32> %broadcast.splat220, %488 - %490 = extractelement <8 x i64> %487, i32 0 - %491 = shl i64 %490, 32 - %492 = ashr exact i64 %491, 32 - %493 = getelementptr inbounds float, float* %0, i64 %492 - %494 = bitcast float* %493 to <8 x float>* - %wide.masked.load221 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %494, i32 4, <8 x i1> %489, <8 x float> undef), !tbaa !12, !alias.scope !49, !noalias !52 - %495 = extractelement <8 x i32> %488, i32 0 - %496 = add nsw i32 %mul.i.6, %495 - %497 = sext i32 %496 to i64 - %498 = getelementptr inbounds float, float* %1, i64 %497 - %499 = bitcast float* %498 to <8 x float>* - %wide.masked.load222 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %499, i32 4, <8 x i1> %489, <8 x float> undef), !tbaa !12, !alias.scope !52 - %500 = fsub <8 x float> %wide.masked.load222, %wide.masked.load221 - %501 = bitcast float* %498 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %500, <8 x float>* %501, i32 4, <8 x i1> %489), !tbaa !12, !alias.scope !52, !llvm.access.group !21 - %502 = or <8 x i64> %broadcast.splat218, - %503 = trunc <8 x i64> %502 to <8 x i32> - %504 = icmp sgt <8 x i32> %broadcast.splat220, %503 - %505 = extractelement <8 x i64> %502, i32 0 - %506 = shl i64 %505, 32 - %507 = ashr exact i64 %506, 32 - %508 = getelementptr inbounds float, float* %0, i64 %507 - %509 = bitcast float* %508 to <8 x float>* - %wide.masked.load221.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %509, i32 4, <8 x i1> %504, <8 x float> undef), !tbaa !12, !alias.scope !49, !noalias !52 - %510 = extractelement <8 x i32> %503, i32 0 - %511 = add nsw i32 %mul.i.6, %510 - %512 = sext i32 %511 to i64 - %513 = getelementptr inbounds float, float* %1, i64 %512 - %514 = bitcast float* %513 to <8 x float>* - %wide.masked.load222.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %514, i32 4, <8 x i1> %504, <8 x float> undef), !tbaa !12, !alias.scope !52 - %515 = fsub <8 x float> %wide.masked.load222.1, %wide.masked.load221.1 - %516 = bitcast float* %513 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %515, <8 x float>* %516, i32 4, <8 x i1> %504), !tbaa !12, !alias.scope !52, !llvm.access.group !21 - %517 = or <8 x i64> %broadcast.splat218, - %518 = trunc <8 x i64> %517 to <8 x i32> - %519 = icmp sgt <8 x i32> %broadcast.splat220, %518 - %520 = extractelement <8 x i64> %517, i32 0 - %521 = shl i64 %520, 32 - %522 = ashr exact i64 %521, 32 - %523 = getelementptr inbounds float, float* %0, i64 %522 - %524 = bitcast float* %523 to <8 x float>* - %wide.masked.load221.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %524, i32 4, <8 x i1> %519, <8 x float> undef), !tbaa !12, !alias.scope !49, !noalias !52 - %525 = extractelement <8 x i32> %518, i32 0 - %526 = add nsw i32 %mul.i.6, %525 - %527 = sext i32 %526 to i64 - %528 = getelementptr inbounds float, float* %1, i64 %527 - %529 = bitcast float* %528 to <8 x float>* - %wide.masked.load222.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %529, i32 4, <8 x i1> %519, <8 x float> undef), !tbaa !12, !alias.scope !52 - %530 = fsub <8 x float> %wide.masked.load222.2, %wide.masked.load221.2 - %531 = bitcast float* %528 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %530, <8 x float>* %531, i32 4, <8 x i1> %519), !tbaa !12, !alias.scope !52, !llvm.access.group !21 - %532 = or <8 x i64> %broadcast.splat218, - %533 = trunc <8 x i64> %532 to <8 x i32> - %534 = icmp sgt <8 x i32> %broadcast.splat220, %533 - %535 = extractelement <8 x i64> %532, i32 0 - %536 = shl i64 %535, 32 - %537 = ashr exact i64 %536, 32 - %538 = getelementptr inbounds float, float* %0, i64 %537 - %539 = bitcast float* %538 to <8 x float>* - %wide.masked.load221.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %539, i32 4, <8 x i1> %534, <8 x float> undef), !tbaa !12, !alias.scope !49, !noalias !52 - %540 = extractelement <8 x i32> %533, i32 0 - %541 = add nsw i32 %mul.i.6, %540 - %542 = sext i32 %541 to i64 - %543 = getelementptr inbounds float, float* %1, i64 %542 - %544 = bitcast float* %543 to <8 x float>* - %wide.masked.load222.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %544, i32 4, <8 x i1> %534, <8 x float> undef), !tbaa !12, !alias.scope !52 - %545 = fsub <8 x float> %wide.masked.load222.3, %wide.masked.load221.3 - %546 = bitcast float* %543 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %545, <8 x float>* %546, i32 4, <8 x i1> %534), !tbaa !12, !alias.scope !52, !llvm.access.group !21 - br label %pregion_for_end.i.6 - -pregion_for_entry.entry.i.us.6: ; preds = %if.end.r_exit.i.us.6.1, %pregion_for_entry.entry.i.us.6.preheader - %_local_id_x.0.us.6 = phi i64 [ 0, %pregion_for_entry.entry.i.us.6.preheader ], [ %632, %if.end.r_exit.i.us.6.1 ] - %add1.i.i.us.6 = add nuw nsw i64 %_local_id_x.0.us.6, %mul.i.i - %conv.i.us.6 = trunc i64 %add1.i.i.us.6 to i32 - %cmp4.i.us.6 = icmp slt i32 %conv.i.us.6, %2 - br i1 %cmp4.i.us.6, label %if.then.i.us.6, label %if.end.r_exit.i.us.6 - -if.then.i.us.6: ; preds = %pregion_for_entry.entry.i.us.6 - %sext.i.us.6 = shl i64 %add1.i.i.us.6, 32 - %idxprom.i.us.6 = ashr exact i64 %sext.i.us.6, 32 - %arrayidx.i.us.6 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.6 - %547 = load float, float* %arrayidx.i.us.6, align 4, !tbaa !12 - %add.i.us.6 = add nsw i32 %mul.i.6, %conv.i.us.6 - %idxprom6.i.us.6 = sext i32 %add.i.us.6 to i64 - %arrayidx7.i.us.6 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.6 - %548 = load float, float* %arrayidx7.i.us.6, align 4, !tbaa !12 - %sub.i.us.6 = fsub float %548, %547 - store float %sub.i.us.6, float* %arrayidx7.i.us.6, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.6 - -if.end.r_exit.i.us.6: ; preds = %if.then.i.us.6, %pregion_for_entry.entry.i.us.6 - %549 = or i64 %_local_id_x.0.us.6, 1 - %add1.i.i.us.6.1 = add nuw nsw i64 %549, %mul.i.i - %conv.i.us.6.1 = trunc i64 %add1.i.i.us.6.1 to i32 - %cmp4.i.us.6.1 = icmp slt i32 %conv.i.us.6.1, %2 - br i1 %cmp4.i.us.6.1, label %if.then.i.us.6.1, label %if.end.r_exit.i.us.6.1 - -pregion_for_end.i.6.loopexit: ; preds = %if.end.r_exit.i.us.6.1 - br label %pregion_for_end.i.6 - -pregion_for_end.i.6: ; preds = %pregion_for_end.i.6.loopexit, %vector.ph210, %pregion_for_end.i.5 - %550 = trunc i64 %mul3.i.i to i32 - %conv2.i.7 = or i32 %550, 7 - %cmp.i.7 = icmp slt i32 %conv2.i.7, %3 - %mul.i.7 = mul nsw i32 %conv2.i.7, %2 - br i1 %cmp.i.7, label %vector.scevcheck230, label %pregion_for_end.i.7 - -vector.scevcheck230: ; preds = %pregion_for_end.i.6 - %551 = mul i32 %conv2.i.7, %2 - %552 = trunc i64 %5 to i32 - %553 = shl i32 %552, 5 - %554 = add i32 %551, %553 - %555 = icmp sgt i32 %554, 2147483616 - br i1 %555, label %pregion_for_entry.entry.i.us.7.preheader, label %vector.memcheck244 - -pregion_for_entry.entry.i.us.7.preheader: ; preds = %vector.memcheck244, %vector.scevcheck230 - br label %pregion_for_entry.entry.i.us.7 - -vector.memcheck244: ; preds = %vector.scevcheck230 - %556 = trunc i64 %5 to i32 - %557 = shl i32 %556, 5 - %558 = sext i32 %557 to i64 - %scevgep232 = getelementptr float, float* %0, i64 %558 - %559 = add nsw i64 %558, 32 - %scevgep234 = getelementptr float, float* %0, i64 %559 - %560 = mul i32 %conv2.i.7, %2 - %561 = add i32 %560, %557 - %562 = sext i32 %561 to i64 - %scevgep236 = getelementptr float, float* %1, i64 %562 - %563 = add nsw i64 %562, 32 - %scevgep238 = getelementptr float, float* %1, i64 %563 - %bound0240 = icmp ult float* %scevgep232, %scevgep238 - %bound1241 = icmp ult float* %scevgep236, %scevgep234 - %found.conflict242 = and i1 %bound0240, %bound1241 - br i1 %found.conflict242, label %pregion_for_entry.entry.i.us.7.preheader, label %vector.ph245 - -vector.ph245: ; preds = %vector.memcheck244 - %broadcast.splatinsert252 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat253 = shufflevector <8 x i64> %broadcast.splatinsert252, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert254 = insertelement <8 x i32> undef, i32 %2, i32 0 - %broadcast.splat255 = shufflevector <8 x i32> %broadcast.splatinsert254, <8 x i32> undef, <8 x i32> zeroinitializer - %564 = or <8 x i64> %broadcast.splat253, - %565 = trunc <8 x i64> %564 to <8 x i32> - %566 = icmp sgt <8 x i32> %broadcast.splat255, %565 - %567 = extractelement <8 x i64> %564, i32 0 - %568 = shl i64 %567, 32 - %569 = ashr exact i64 %568, 32 - %570 = getelementptr inbounds float, float* %0, i64 %569 - %571 = bitcast float* %570 to <8 x float>* - %wide.masked.load256 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %571, i32 4, <8 x i1> %566, <8 x float> undef), !tbaa !12, !alias.scope !54, !noalias !57 - %572 = extractelement <8 x i32> %565, i32 0 - %573 = add nsw i32 %mul.i.7, %572 - %574 = sext i32 %573 to i64 - %575 = getelementptr inbounds float, float* %1, i64 %574 - %576 = bitcast float* %575 to <8 x float>* - %wide.masked.load257 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %576, i32 4, <8 x i1> %566, <8 x float> undef), !tbaa !12, !alias.scope !57 - %577 = fsub <8 x float> %wide.masked.load257, %wide.masked.load256 - %578 = bitcast float* %575 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %577, <8 x float>* %578, i32 4, <8 x i1> %566), !tbaa !12, !alias.scope !57, !llvm.access.group !21 - %579 = or <8 x i64> %broadcast.splat253, - %580 = trunc <8 x i64> %579 to <8 x i32> - %581 = icmp sgt <8 x i32> %broadcast.splat255, %580 - %582 = extractelement <8 x i64> %579, i32 0 - %583 = shl i64 %582, 32 - %584 = ashr exact i64 %583, 32 - %585 = getelementptr inbounds float, float* %0, i64 %584 - %586 = bitcast float* %585 to <8 x float>* - %wide.masked.load256.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %586, i32 4, <8 x i1> %581, <8 x float> undef), !tbaa !12, !alias.scope !54, !noalias !57 - %587 = extractelement <8 x i32> %580, i32 0 - %588 = add nsw i32 %mul.i.7, %587 - %589 = sext i32 %588 to i64 - %590 = getelementptr inbounds float, float* %1, i64 %589 - %591 = bitcast float* %590 to <8 x float>* - %wide.masked.load257.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %591, i32 4, <8 x i1> %581, <8 x float> undef), !tbaa !12, !alias.scope !57 - %592 = fsub <8 x float> %wide.masked.load257.1, %wide.masked.load256.1 - %593 = bitcast float* %590 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %592, <8 x float>* %593, i32 4, <8 x i1> %581), !tbaa !12, !alias.scope !57, !llvm.access.group !21 - %594 = or <8 x i64> %broadcast.splat253, - %595 = trunc <8 x i64> %594 to <8 x i32> - %596 = icmp sgt <8 x i32> %broadcast.splat255, %595 - %597 = extractelement <8 x i64> %594, i32 0 - %598 = shl i64 %597, 32 - %599 = ashr exact i64 %598, 32 - %600 = getelementptr inbounds float, float* %0, i64 %599 - %601 = bitcast float* %600 to <8 x float>* - %wide.masked.load256.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %601, i32 4, <8 x i1> %596, <8 x float> undef), !tbaa !12, !alias.scope !54, !noalias !57 - %602 = extractelement <8 x i32> %595, i32 0 - %603 = add nsw i32 %mul.i.7, %602 - %604 = sext i32 %603 to i64 - %605 = getelementptr inbounds float, float* %1, i64 %604 - %606 = bitcast float* %605 to <8 x float>* - %wide.masked.load257.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %606, i32 4, <8 x i1> %596, <8 x float> undef), !tbaa !12, !alias.scope !57 - %607 = fsub <8 x float> %wide.masked.load257.2, %wide.masked.load256.2 - %608 = bitcast float* %605 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %607, <8 x float>* %608, i32 4, <8 x i1> %596), !tbaa !12, !alias.scope !57, !llvm.access.group !21 - %609 = or <8 x i64> %broadcast.splat253, - %610 = trunc <8 x i64> %609 to <8 x i32> - %611 = icmp sgt <8 x i32> %broadcast.splat255, %610 - %612 = extractelement <8 x i64> %609, i32 0 - %613 = shl i64 %612, 32 - %614 = ashr exact i64 %613, 32 - %615 = getelementptr inbounds float, float* %0, i64 %614 - %616 = bitcast float* %615 to <8 x float>* - %wide.masked.load256.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %616, i32 4, <8 x i1> %611, <8 x float> undef), !tbaa !12, !alias.scope !54, !noalias !57 - %617 = extractelement <8 x i32> %610, i32 0 - %618 = add nsw i32 %mul.i.7, %617 - %619 = sext i32 %618 to i64 - %620 = getelementptr inbounds float, float* %1, i64 %619 - %621 = bitcast float* %620 to <8 x float>* - %wide.masked.load257.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %621, i32 4, <8 x i1> %611, <8 x float> undef), !tbaa !12, !alias.scope !57 - %622 = fsub <8 x float> %wide.masked.load257.3, %wide.masked.load256.3 - %623 = bitcast float* %620 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %622, <8 x float>* %623, i32 4, <8 x i1> %611), !tbaa !12, !alias.scope !57, !llvm.access.group !21 - br label %pregion_for_end.i.7 - -pregion_for_entry.entry.i.us.7: ; preds = %if.end.r_exit.i.us.7.1, %pregion_for_entry.entry.i.us.7.preheader - %_local_id_x.0.us.7 = phi i64 [ 0, %pregion_for_entry.entry.i.us.7.preheader ], [ %629, %if.end.r_exit.i.us.7.1 ] - %add1.i.i.us.7 = add nuw nsw i64 %_local_id_x.0.us.7, %mul.i.i - %conv.i.us.7 = trunc i64 %add1.i.i.us.7 to i32 - %cmp4.i.us.7 = icmp slt i32 %conv.i.us.7, %2 - br i1 %cmp4.i.us.7, label %if.then.i.us.7, label %if.end.r_exit.i.us.7 - -if.then.i.us.7: ; preds = %pregion_for_entry.entry.i.us.7 - %sext.i.us.7 = shl i64 %add1.i.i.us.7, 32 - %idxprom.i.us.7 = ashr exact i64 %sext.i.us.7, 32 - %arrayidx.i.us.7 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.7 - %624 = load float, float* %arrayidx.i.us.7, align 4, !tbaa !12 - %add.i.us.7 = add nsw i32 %mul.i.7, %conv.i.us.7 - %idxprom6.i.us.7 = sext i32 %add.i.us.7 to i64 - %arrayidx7.i.us.7 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.7 - %625 = load float, float* %arrayidx7.i.us.7, align 4, !tbaa !12 - %sub.i.us.7 = fsub float %625, %624 - store float %sub.i.us.7, float* %arrayidx7.i.us.7, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.7 - -if.end.r_exit.i.us.7: ; preds = %if.then.i.us.7, %pregion_for_entry.entry.i.us.7 - %626 = or i64 %_local_id_x.0.us.7, 1 - %add1.i.i.us.7.1 = add nuw nsw i64 %626, %mul.i.i - %conv.i.us.7.1 = trunc i64 %add1.i.i.us.7.1 to i32 - %cmp4.i.us.7.1 = icmp slt i32 %conv.i.us.7.1, %2 - br i1 %cmp4.i.us.7.1, label %if.then.i.us.7.1, label %if.end.r_exit.i.us.7.1 - -pregion_for_end.i.7.loopexit: ; preds = %if.end.r_exit.i.us.7.1 - br label %pregion_for_end.i.7 - -pregion_for_end.i.7: ; preds = %pregion_for_end.i.7.loopexit, %vector.ph245, %pregion_for_end.i.6 - ret void - -if.then.i.us.7.1: ; preds = %if.end.r_exit.i.us.7 - %sext.i.us.7.1 = shl i64 %add1.i.i.us.7.1, 32 - %idxprom.i.us.7.1 = ashr exact i64 %sext.i.us.7.1, 32 - %arrayidx.i.us.7.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.7.1 - %627 = load float, float* %arrayidx.i.us.7.1, align 4, !tbaa !12 - %add.i.us.7.1 = add nsw i32 %mul.i.7, %conv.i.us.7.1 - %idxprom6.i.us.7.1 = sext i32 %add.i.us.7.1 to i64 - %arrayidx7.i.us.7.1 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.7.1 - %628 = load float, float* %arrayidx7.i.us.7.1, align 4, !tbaa !12 - %sub.i.us.7.1 = fsub float %628, %627 - store float %sub.i.us.7.1, float* %arrayidx7.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.7.1 - -if.end.r_exit.i.us.7.1: ; preds = %if.then.i.us.7.1, %if.end.r_exit.i.us.7 - %629 = add nuw nsw i64 %_local_id_x.0.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %629, 32 - br i1 %exitcond.7.not.1, label %pregion_for_end.i.7.loopexit, label %pregion_for_entry.entry.i.us.7, !llvm.loop !59 - -if.then.i.us.6.1: ; preds = %if.end.r_exit.i.us.6 - %sext.i.us.6.1 = shl i64 %add1.i.i.us.6.1, 32 - %idxprom.i.us.6.1 = ashr exact i64 %sext.i.us.6.1, 32 - %arrayidx.i.us.6.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.6.1 - %630 = load float, float* %arrayidx.i.us.6.1, align 4, !tbaa !12 - %add.i.us.6.1 = add nsw i32 %mul.i.6, %conv.i.us.6.1 - %idxprom6.i.us.6.1 = sext i32 %add.i.us.6.1 to i64 - %arrayidx7.i.us.6.1 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.6.1 - %631 = load float, float* %arrayidx7.i.us.6.1, align 4, !tbaa !12 - %sub.i.us.6.1 = fsub float %631, %630 - store float %sub.i.us.6.1, float* %arrayidx7.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.6.1 - -if.end.r_exit.i.us.6.1: ; preds = %if.then.i.us.6.1, %if.end.r_exit.i.us.6 - %632 = add nuw nsw i64 %_local_id_x.0.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %632, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.6.loopexit, label %pregion_for_entry.entry.i.us.6, !llvm.loop !62 - -if.then.i.us.5.1: ; preds = %if.end.r_exit.i.us.5 - %sext.i.us.5.1 = shl i64 %add1.i.i.us.5.1, 32 - %idxprom.i.us.5.1 = ashr exact i64 %sext.i.us.5.1, 32 - %arrayidx.i.us.5.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.5.1 - %633 = load float, float* %arrayidx.i.us.5.1, align 4, !tbaa !12 - %add.i.us.5.1 = add nsw i32 %mul.i.5, %conv.i.us.5.1 - %idxprom6.i.us.5.1 = sext i32 %add.i.us.5.1 to i64 - %arrayidx7.i.us.5.1 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.5.1 - %634 = load float, float* %arrayidx7.i.us.5.1, align 4, !tbaa !12 - %sub.i.us.5.1 = fsub float %634, %633 - store float %sub.i.us.5.1, float* %arrayidx7.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.5.1 - -if.end.r_exit.i.us.5.1: ; preds = %if.then.i.us.5.1, %if.end.r_exit.i.us.5 - %635 = add nuw nsw i64 %_local_id_x.0.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %635, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.5.loopexit, label %pregion_for_entry.entry.i.us.5, !llvm.loop !63 - -if.then.i.us.4.1: ; preds = %if.end.r_exit.i.us.4 - %sext.i.us.4.1 = shl i64 %add1.i.i.us.4.1, 32 - %idxprom.i.us.4.1 = ashr exact i64 %sext.i.us.4.1, 32 - %arrayidx.i.us.4.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.4.1 - %636 = load float, float* %arrayidx.i.us.4.1, align 4, !tbaa !12 - %add.i.us.4.1 = add nsw i32 %mul.i.4, %conv.i.us.4.1 - %idxprom6.i.us.4.1 = sext i32 %add.i.us.4.1 to i64 - %arrayidx7.i.us.4.1 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.4.1 - %637 = load float, float* %arrayidx7.i.us.4.1, align 4, !tbaa !12 - %sub.i.us.4.1 = fsub float %637, %636 - store float %sub.i.us.4.1, float* %arrayidx7.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.4.1 - -if.end.r_exit.i.us.4.1: ; preds = %if.then.i.us.4.1, %if.end.r_exit.i.us.4 - %638 = add nuw nsw i64 %_local_id_x.0.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %638, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.4.loopexit, label %pregion_for_entry.entry.i.us.4, !llvm.loop !64 - -if.then.i.us.3.1: ; preds = %if.end.r_exit.i.us.3 - %sext.i.us.3.1 = shl i64 %add1.i.i.us.3.1, 32 - %idxprom.i.us.3.1 = ashr exact i64 %sext.i.us.3.1, 32 - %arrayidx.i.us.3.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.3.1 - %639 = load float, float* %arrayidx.i.us.3.1, align 4, !tbaa !12 - %add.i.us.3.1 = add nsw i32 %mul.i.3, %conv.i.us.3.1 - %idxprom6.i.us.3.1 = sext i32 %add.i.us.3.1 to i64 - %arrayidx7.i.us.3.1 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.3.1 - %640 = load float, float* %arrayidx7.i.us.3.1, align 4, !tbaa !12 - %sub.i.us.3.1 = fsub float %640, %639 - store float %sub.i.us.3.1, float* %arrayidx7.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.3.1 - -if.end.r_exit.i.us.3.1: ; preds = %if.then.i.us.3.1, %if.end.r_exit.i.us.3 - %641 = add nuw nsw i64 %_local_id_x.0.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %641, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.3.loopexit, label %pregion_for_entry.entry.i.us.3, !llvm.loop !65 - -if.then.i.us.2.1: ; preds = %if.end.r_exit.i.us.2 - %sext.i.us.2.1 = shl i64 %add1.i.i.us.2.1, 32 - %idxprom.i.us.2.1 = ashr exact i64 %sext.i.us.2.1, 32 - %arrayidx.i.us.2.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.2.1 - %642 = load float, float* %arrayidx.i.us.2.1, align 4, !tbaa !12 - %add.i.us.2.1 = add nsw i32 %mul.i.2, %conv.i.us.2.1 - %idxprom6.i.us.2.1 = sext i32 %add.i.us.2.1 to i64 - %arrayidx7.i.us.2.1 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.2.1 - %643 = load float, float* %arrayidx7.i.us.2.1, align 4, !tbaa !12 - %sub.i.us.2.1 = fsub float %643, %642 - store float %sub.i.us.2.1, float* %arrayidx7.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.2.1 - -if.end.r_exit.i.us.2.1: ; preds = %if.then.i.us.2.1, %if.end.r_exit.i.us.2 - %644 = add nuw nsw i64 %_local_id_x.0.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %644, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.2.loopexit, label %pregion_for_entry.entry.i.us.2, !llvm.loop !66 - -if.then.i.us.1.1: ; preds = %if.end.r_exit.i.us.1 - %sext.i.us.1.1 = shl i64 %add1.i.i.us.1.1, 32 - %idxprom.i.us.1.1 = ashr exact i64 %sext.i.us.1.1, 32 - %arrayidx.i.us.1.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.1.1 - %645 = load float, float* %arrayidx.i.us.1.1, align 4, !tbaa !12 - %add.i.us.1.1 = add nsw i32 %mul.i.1, %conv.i.us.1.1 - %idxprom6.i.us.1.1 = sext i32 %add.i.us.1.1 to i64 - %arrayidx7.i.us.1.1 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.1.1 - %646 = load float, float* %arrayidx7.i.us.1.1, align 4, !tbaa !12 - %sub.i.us.1.1 = fsub float %646, %645 - store float %sub.i.us.1.1, float* %arrayidx7.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.1.1 - -if.end.r_exit.i.us.1.1: ; preds = %if.then.i.us.1.1, %if.end.r_exit.i.us.1 - %647 = add nuw nsw i64 %_local_id_x.0.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %647, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.1.loopexit, label %pregion_for_entry.entry.i.us.1, !llvm.loop !67 - -if.then.i.us.1278: ; preds = %if.end.r_exit.i.us - %sext.i.us.1271 = shl i64 %add1.i.i.us.1267, 32 - %idxprom.i.us.1272 = ashr exact i64 %sext.i.us.1271, 32 - %arrayidx.i.us.1273 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.1272 - %648 = load float, float* %arrayidx.i.us.1273, align 4, !tbaa !12 - %add.i.us.1274 = add nsw i32 %mul.i, %conv.i.us.1268 - %idxprom6.i.us.1275 = sext i32 %add.i.us.1274 to i64 - %arrayidx7.i.us.1276 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.1275 - %649 = load float, float* %arrayidx7.i.us.1276, align 4, !tbaa !12 - %sub.i.us.1277 = fsub float %649, %648 - store float %sub.i.us.1277, float* %arrayidx7.i.us.1276, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.1279 - -if.end.r_exit.i.us.1279: ; preds = %if.then.i.us.1278, %if.end.r_exit.i.us - %650 = add nuw nsw i64 %_local_id_x.0.us, 2 - %exitcond.not.1 = icmp eq i64 %650, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !68 -} - -; Function Attrs: nofree norecurse nounwind -define void @_pocl_kernel_reduce_kernel_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { -pregion_for_entry.pregion_for_init.i.i: - %5 = bitcast i8** %0 to float*** - %6 = load float**, float*** %5, align 8 - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float*** - %10 = load float**, float*** %9, align 8 - %11 = load float*, float** %10, align 8 - %12 = getelementptr i8*, i8** %0, i64 2 - %13 = bitcast i8** %12 to i32** - %14 = load i32*, i32** %13, align 8 - %15 = load i32, i32* %14, align 4 - %16 = getelementptr i8*, i8** %0, i64 3 - %17 = bitcast i8** %16 to i32** - %18 = load i32*, i32** %17, align 8 - %19 = load i32, i32* %18, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %conv2.i.i = trunc i64 %mul3.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %19, %conv2.i.i - %mul.i.i = mul nsw i32 %15, %conv2.i.i - br i1 %cmp.i.i, label %vector.scevcheck, label %pregion_for_end.i.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.i - %20 = trunc i64 %3 to i32 - %21 = mul i32 %15, %20 - %22 = shl i32 %21, 3 - %23 = trunc i64 %2 to i32 - %24 = shl i32 %23, 5 - %25 = add i32 %22, %24 - %26 = icmp sgt i32 %25, 2147483616 - br i1 %26, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i.us - -vector.memcheck: ; preds = %vector.scevcheck - %27 = trunc i64 %2 to i32 - %28 = shl i32 %27, 5 - %29 = sext i32 %28 to i64 - %scevgep = getelementptr float, float* %7, i64 %29 - %30 = add nsw i64 %29, 32 - %scevgep4 = getelementptr float, float* %7, i64 %30 - %31 = trunc i64 %3 to i32 - %32 = mul i32 %15, %31 - %33 = shl i32 %32, 3 - %34 = add i32 %33, %28 - %35 = sext i32 %34 to i64 - %scevgep6 = getelementptr float, float* %11, i64 %35 - %36 = add nsw i64 %35, 32 - %scevgep8 = getelementptr float, float* %11, i64 %36 - %bound0 = icmp ult float* %scevgep, %scevgep8 - %bound1 = icmp ult float* %scevgep6, %scevgep4 - %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert10 = insertelement <8 x i32> undef, i32 %15, i32 0 - %broadcast.splat11 = shufflevector <8 x i32> %broadcast.splatinsert10, <8 x i32> undef, <8 x i32> zeroinitializer - %37 = or <8 x i64> %broadcast.splat, - %38 = trunc <8 x i64> %37 to <8 x i32> - %39 = icmp sgt <8 x i32> %broadcast.splat11, %38 - %40 = extractelement <8 x i64> %37, i32 0 - %41 = shl i64 %40, 32 - %42 = ashr exact i64 %41, 32 - %43 = getelementptr inbounds float, float* %7, i64 %42 - %44 = bitcast float* %43 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %44, i32 4, <8 x i1> %39, <8 x float> undef), !tbaa !12, !alias.scope !69, !noalias !72 - %45 = extractelement <8 x i32> %38, i32 0 - %46 = add nsw i32 %mul.i.i, %45 - %47 = sext i32 %46 to i64 - %48 = getelementptr inbounds float, float* %11, i64 %47 - %49 = bitcast float* %48 to <8 x float>* - %wide.masked.load12 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %49, i32 4, <8 x i1> %39, <8 x float> undef), !tbaa !12, !alias.scope !72 - %50 = fsub <8 x float> %wide.masked.load12, %wide.masked.load - %51 = bitcast float* %48 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %50, <8 x float>* %51, i32 4, <8 x i1> %39), !tbaa !12, !alias.scope !72, !llvm.access.group !21 - %52 = or <8 x i64> %broadcast.splat, - %53 = trunc <8 x i64> %52 to <8 x i32> - %54 = icmp sgt <8 x i32> %broadcast.splat11, %53 - %55 = extractelement <8 x i64> %52, i32 0 - %56 = shl i64 %55, 32 - %57 = ashr exact i64 %56, 32 - %58 = getelementptr inbounds float, float* %7, i64 %57 - %59 = bitcast float* %58 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %59, i32 4, <8 x i1> %54, <8 x float> undef), !tbaa !12, !alias.scope !69, !noalias !72 - %60 = extractelement <8 x i32> %53, i32 0 - %61 = add nsw i32 %mul.i.i, %60 - %62 = sext i32 %61 to i64 - %63 = getelementptr inbounds float, float* %11, i64 %62 - %64 = bitcast float* %63 to <8 x float>* - %wide.masked.load12.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %64, i32 4, <8 x i1> %54, <8 x float> undef), !tbaa !12, !alias.scope !72 - %65 = fsub <8 x float> %wide.masked.load12.1, %wide.masked.load.1 - %66 = bitcast float* %63 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %65, <8 x float>* %66, i32 4, <8 x i1> %54), !tbaa !12, !alias.scope !72, !llvm.access.group !21 - %67 = or <8 x i64> %broadcast.splat, - %68 = trunc <8 x i64> %67 to <8 x i32> - %69 = icmp sgt <8 x i32> %broadcast.splat11, %68 - %70 = extractelement <8 x i64> %67, i32 0 - %71 = shl i64 %70, 32 - %72 = ashr exact i64 %71, 32 - %73 = getelementptr inbounds float, float* %7, i64 %72 - %74 = bitcast float* %73 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %74, i32 4, <8 x i1> %69, <8 x float> undef), !tbaa !12, !alias.scope !69, !noalias !72 - %75 = extractelement <8 x i32> %68, i32 0 - %76 = add nsw i32 %mul.i.i, %75 - %77 = sext i32 %76 to i64 - %78 = getelementptr inbounds float, float* %11, i64 %77 - %79 = bitcast float* %78 to <8 x float>* - %wide.masked.load12.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %79, i32 4, <8 x i1> %69, <8 x float> undef), !tbaa !12, !alias.scope !72 - %80 = fsub <8 x float> %wide.masked.load12.2, %wide.masked.load.2 - %81 = bitcast float* %78 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %80, <8 x float>* %81, i32 4, <8 x i1> %69), !tbaa !12, !alias.scope !72, !llvm.access.group !21 - %82 = or <8 x i64> %broadcast.splat, - %83 = trunc <8 x i64> %82 to <8 x i32> - %84 = icmp sgt <8 x i32> %broadcast.splat11, %83 - %85 = extractelement <8 x i64> %82, i32 0 - %86 = shl i64 %85, 32 - %87 = ashr exact i64 %86, 32 - %88 = getelementptr inbounds float, float* %7, i64 %87 - %89 = bitcast float* %88 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %89, i32 4, <8 x i1> %84, <8 x float> undef), !tbaa !12, !alias.scope !69, !noalias !72 - %90 = extractelement <8 x i32> %83, i32 0 - %91 = add nsw i32 %mul.i.i, %90 - %92 = sext i32 %91 to i64 - %93 = getelementptr inbounds float, float* %11, i64 %92 - %94 = bitcast float* %93 to <8 x float>* - %wide.masked.load12.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %94, i32 4, <8 x i1> %84, <8 x float> undef), !tbaa !12, !alias.scope !72 - %95 = fsub <8 x float> %wide.masked.load12.3, %wide.masked.load.3 - %96 = bitcast float* %93 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %95, <8 x float>* %96, i32 4, <8 x i1> %84), !tbaa !12, !alias.scope !72, !llvm.access.group !21 - br label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us.1279, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.preheader ], [ %662, %if.end.r_exit.i.i.us.1279 ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp4.i.i.us = icmp sgt i32 %15, %conv.i.i.us - br i1 %cmp4.i.i.us, label %if.then.i.i.us, label %if.end.r_exit.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom.i.i.us = ashr exact i64 %sext.i.i.us, 32 - %arrayidx.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us - %97 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %add.i.i.us = add nsw i32 %mul.i.i, %conv.i.i.us - %idxprom6.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx7.i.i.us = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us - %98 = load float, float* %arrayidx7.i.i.us, align 4, !tbaa !12 - %sub.i.i.us = fsub float %98, %97 - store float %sub.i.i.us, float* %arrayidx7.i.i.us, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us - %99 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1267 = add nuw nsw i64 %99, %mul.i.i.i - %conv.i.i.us.1268 = trunc i64 %add1.i.i.i.us.1267 to i32 - %cmp4.i.i.us.1269 = icmp sgt i32 %15, %conv.i.i.us.1268 - br i1 %cmp4.i.i.us.1269, label %if.then.i.i.us.1278, label %if.end.r_exit.i.i.us.1279 - -pregion_for_end.i.i.loopexit: ; preds = %if.end.r_exit.i.i.us.1279 - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i.i - %100 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.1 = or i32 %100, 1 - %cmp.i.i.1 = icmp sgt i32 %19, %conv2.i.i.1 - %mul.i.i.1 = mul nsw i32 %15, %conv2.i.i.1 - br i1 %cmp.i.i.1, label %vector.scevcheck20, label %pregion_for_end.i.i.1 - -vector.scevcheck20: ; preds = %pregion_for_end.i.i - %101 = mul i32 %15, %conv2.i.i.1 - %102 = trunc i64 %2 to i32 - %103 = shl i32 %102, 5 - %104 = add i32 %101, %103 - %105 = icmp sgt i32 %104, 2147483616 - br i1 %105, label %pregion_for_entry.entry.i.i.us.1.preheader, label %vector.memcheck34 - -pregion_for_entry.entry.i.i.us.1.preheader: ; preds = %vector.memcheck34, %vector.scevcheck20 - br label %pregion_for_entry.entry.i.i.us.1 - -vector.memcheck34: ; preds = %vector.scevcheck20 - %106 = trunc i64 %2 to i32 - %107 = shl i32 %106, 5 - %108 = sext i32 %107 to i64 - %scevgep22 = getelementptr float, float* %7, i64 %108 - %109 = add nsw i64 %108, 32 - %scevgep24 = getelementptr float, float* %7, i64 %109 - %110 = mul i32 %15, %conv2.i.i.1 - %111 = add i32 %110, %107 - %112 = sext i32 %111 to i64 - %scevgep26 = getelementptr float, float* %11, i64 %112 - %113 = add nsw i64 %112, 32 - %scevgep28 = getelementptr float, float* %11, i64 %113 - %bound030 = icmp ult float* %scevgep22, %scevgep28 - %bound131 = icmp ult float* %scevgep26, %scevgep24 - %found.conflict32 = and i1 %bound030, %bound131 - br i1 %found.conflict32, label %pregion_for_entry.entry.i.i.us.1.preheader, label %vector.ph35 - -vector.ph35: ; preds = %vector.memcheck34 - %broadcast.splatinsert42 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat43 = shufflevector <8 x i64> %broadcast.splatinsert42, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert44 = insertelement <8 x i32> undef, i32 %15, i32 0 - %broadcast.splat45 = shufflevector <8 x i32> %broadcast.splatinsert44, <8 x i32> undef, <8 x i32> zeroinitializer - %114 = or <8 x i64> %broadcast.splat43, - %115 = trunc <8 x i64> %114 to <8 x i32> - %116 = icmp sgt <8 x i32> %broadcast.splat45, %115 - %117 = extractelement <8 x i64> %114, i32 0 - %118 = shl i64 %117, 32 - %119 = ashr exact i64 %118, 32 - %120 = getelementptr inbounds float, float* %7, i64 %119 - %121 = bitcast float* %120 to <8 x float>* - %wide.masked.load46 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %121, i32 4, <8 x i1> %116, <8 x float> undef), !tbaa !12, !alias.scope !74, !noalias !77 - %122 = extractelement <8 x i32> %115, i32 0 - %123 = add nsw i32 %mul.i.i.1, %122 - %124 = sext i32 %123 to i64 - %125 = getelementptr inbounds float, float* %11, i64 %124 - %126 = bitcast float* %125 to <8 x float>* - %wide.masked.load47 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %126, i32 4, <8 x i1> %116, <8 x float> undef), !tbaa !12, !alias.scope !77 - %127 = fsub <8 x float> %wide.masked.load47, %wide.masked.load46 - %128 = bitcast float* %125 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %127, <8 x float>* %128, i32 4, <8 x i1> %116), !tbaa !12, !alias.scope !77, !llvm.access.group !21 - %129 = or <8 x i64> %broadcast.splat43, - %130 = trunc <8 x i64> %129 to <8 x i32> - %131 = icmp sgt <8 x i32> %broadcast.splat45, %130 - %132 = extractelement <8 x i64> %129, i32 0 - %133 = shl i64 %132, 32 - %134 = ashr exact i64 %133, 32 - %135 = getelementptr inbounds float, float* %7, i64 %134 - %136 = bitcast float* %135 to <8 x float>* - %wide.masked.load46.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %136, i32 4, <8 x i1> %131, <8 x float> undef), !tbaa !12, !alias.scope !74, !noalias !77 - %137 = extractelement <8 x i32> %130, i32 0 - %138 = add nsw i32 %mul.i.i.1, %137 - %139 = sext i32 %138 to i64 - %140 = getelementptr inbounds float, float* %11, i64 %139 - %141 = bitcast float* %140 to <8 x float>* - %wide.masked.load47.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %141, i32 4, <8 x i1> %131, <8 x float> undef), !tbaa !12, !alias.scope !77 - %142 = fsub <8 x float> %wide.masked.load47.1, %wide.masked.load46.1 - %143 = bitcast float* %140 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %142, <8 x float>* %143, i32 4, <8 x i1> %131), !tbaa !12, !alias.scope !77, !llvm.access.group !21 - %144 = or <8 x i64> %broadcast.splat43, - %145 = trunc <8 x i64> %144 to <8 x i32> - %146 = icmp sgt <8 x i32> %broadcast.splat45, %145 - %147 = extractelement <8 x i64> %144, i32 0 - %148 = shl i64 %147, 32 - %149 = ashr exact i64 %148, 32 - %150 = getelementptr inbounds float, float* %7, i64 %149 - %151 = bitcast float* %150 to <8 x float>* - %wide.masked.load46.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %151, i32 4, <8 x i1> %146, <8 x float> undef), !tbaa !12, !alias.scope !74, !noalias !77 - %152 = extractelement <8 x i32> %145, i32 0 - %153 = add nsw i32 %mul.i.i.1, %152 - %154 = sext i32 %153 to i64 - %155 = getelementptr inbounds float, float* %11, i64 %154 - %156 = bitcast float* %155 to <8 x float>* - %wide.masked.load47.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %156, i32 4, <8 x i1> %146, <8 x float> undef), !tbaa !12, !alias.scope !77 - %157 = fsub <8 x float> %wide.masked.load47.2, %wide.masked.load46.2 - %158 = bitcast float* %155 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %157, <8 x float>* %158, i32 4, <8 x i1> %146), !tbaa !12, !alias.scope !77, !llvm.access.group !21 - %159 = or <8 x i64> %broadcast.splat43, - %160 = trunc <8 x i64> %159 to <8 x i32> - %161 = icmp sgt <8 x i32> %broadcast.splat45, %160 - %162 = extractelement <8 x i64> %159, i32 0 - %163 = shl i64 %162, 32 - %164 = ashr exact i64 %163, 32 - %165 = getelementptr inbounds float, float* %7, i64 %164 - %166 = bitcast float* %165 to <8 x float>* - %wide.masked.load46.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %166, i32 4, <8 x i1> %161, <8 x float> undef), !tbaa !12, !alias.scope !74, !noalias !77 - %167 = extractelement <8 x i32> %160, i32 0 - %168 = add nsw i32 %mul.i.i.1, %167 - %169 = sext i32 %168 to i64 - %170 = getelementptr inbounds float, float* %11, i64 %169 - %171 = bitcast float* %170 to <8 x float>* - %wide.masked.load47.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %171, i32 4, <8 x i1> %161, <8 x float> undef), !tbaa !12, !alias.scope !77 - %172 = fsub <8 x float> %wide.masked.load47.3, %wide.masked.load46.3 - %173 = bitcast float* %170 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %172, <8 x float>* %173, i32 4, <8 x i1> %161), !tbaa !12, !alias.scope !77, !llvm.access.group !21 - br label %pregion_for_end.i.i.1 - -pregion_for_entry.entry.i.i.us.1: ; preds = %if.end.r_exit.i.i.us.1.1, %pregion_for_entry.entry.i.i.us.1.preheader - %_local_id_x.i.0.us.1 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.1.preheader ], [ %659, %if.end.r_exit.i.i.us.1.1 ] - %add1.i.i.i.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.1, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp4.i.i.us.1 = icmp sgt i32 %15, %conv.i.i.us.1 - br i1 %cmp4.i.i.us.1, label %if.then.i.i.us.1, label %if.end.r_exit.i.i.us.1 - -if.then.i.i.us.1: ; preds = %pregion_for_entry.entry.i.i.us.1 - %sext.i.i.us.1 = shl i64 %add1.i.i.i.us.1, 32 - %idxprom.i.i.us.1 = ashr exact i64 %sext.i.i.us.1, 32 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.1 - %174 = load float, float* %arrayidx.i.i.us.1, align 4, !tbaa !12 - %add.i.i.us.1 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1 - %idxprom6.i.i.us.1 = sext i32 %add.i.i.us.1 to i64 - %arrayidx7.i.i.us.1 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.1 - %175 = load float, float* %arrayidx7.i.i.us.1, align 4, !tbaa !12 - %sub.i.i.us.1 = fsub float %175, %174 - store float %sub.i.i.us.1, float* %arrayidx7.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.1 - -if.end.r_exit.i.i.us.1: ; preds = %if.then.i.i.us.1, %pregion_for_entry.entry.i.i.us.1 - %176 = or i64 %_local_id_x.i.0.us.1, 1 - %add1.i.i.i.us.1.1 = add nuw nsw i64 %176, %mul.i.i.i - %conv.i.i.us.1.1 = trunc i64 %add1.i.i.i.us.1.1 to i32 - %cmp4.i.i.us.1.1 = icmp sgt i32 %15, %conv.i.i.us.1.1 - br i1 %cmp4.i.i.us.1.1, label %if.then.i.i.us.1.1, label %if.end.r_exit.i.i.us.1.1 - -pregion_for_end.i.i.1.loopexit: ; preds = %if.end.r_exit.i.i.us.1.1 - br label %pregion_for_end.i.i.1 - -pregion_for_end.i.i.1: ; preds = %pregion_for_end.i.i.1.loopexit, %vector.ph35, %pregion_for_end.i.i - %177 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.2 = or i32 %177, 2 - %cmp.i.i.2 = icmp sgt i32 %19, %conv2.i.i.2 - %mul.i.i.2 = mul nsw i32 %15, %conv2.i.i.2 - br i1 %cmp.i.i.2, label %vector.scevcheck55, label %pregion_for_end.i.i.2 - -vector.scevcheck55: ; preds = %pregion_for_end.i.i.1 - %178 = mul i32 %15, %conv2.i.i.2 - %179 = trunc i64 %2 to i32 - %180 = shl i32 %179, 5 - %181 = add i32 %178, %180 - %182 = icmp sgt i32 %181, 2147483616 - br i1 %182, label %pregion_for_entry.entry.i.i.us.2.preheader, label %vector.memcheck69 - -pregion_for_entry.entry.i.i.us.2.preheader: ; preds = %vector.memcheck69, %vector.scevcheck55 - br label %pregion_for_entry.entry.i.i.us.2 - -vector.memcheck69: ; preds = %vector.scevcheck55 - %183 = trunc i64 %2 to i32 - %184 = shl i32 %183, 5 - %185 = sext i32 %184 to i64 - %scevgep57 = getelementptr float, float* %7, i64 %185 - %186 = add nsw i64 %185, 32 - %scevgep59 = getelementptr float, float* %7, i64 %186 - %187 = mul i32 %15, %conv2.i.i.2 - %188 = add i32 %187, %184 - %189 = sext i32 %188 to i64 - %scevgep61 = getelementptr float, float* %11, i64 %189 - %190 = add nsw i64 %189, 32 - %scevgep63 = getelementptr float, float* %11, i64 %190 - %bound065 = icmp ult float* %scevgep57, %scevgep63 - %bound166 = icmp ult float* %scevgep61, %scevgep59 - %found.conflict67 = and i1 %bound065, %bound166 - br i1 %found.conflict67, label %pregion_for_entry.entry.i.i.us.2.preheader, label %vector.ph70 - -vector.ph70: ; preds = %vector.memcheck69 - %broadcast.splatinsert77 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat78 = shufflevector <8 x i64> %broadcast.splatinsert77, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert79 = insertelement <8 x i32> undef, i32 %15, i32 0 - %broadcast.splat80 = shufflevector <8 x i32> %broadcast.splatinsert79, <8 x i32> undef, <8 x i32> zeroinitializer - %191 = or <8 x i64> %broadcast.splat78, - %192 = trunc <8 x i64> %191 to <8 x i32> - %193 = icmp sgt <8 x i32> %broadcast.splat80, %192 - %194 = extractelement <8 x i64> %191, i32 0 - %195 = shl i64 %194, 32 - %196 = ashr exact i64 %195, 32 - %197 = getelementptr inbounds float, float* %7, i64 %196 - %198 = bitcast float* %197 to <8 x float>* - %wide.masked.load81 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %198, i32 4, <8 x i1> %193, <8 x float> undef), !tbaa !12, !alias.scope !79, !noalias !82 - %199 = extractelement <8 x i32> %192, i32 0 - %200 = add nsw i32 %mul.i.i.2, %199 - %201 = sext i32 %200 to i64 - %202 = getelementptr inbounds float, float* %11, i64 %201 - %203 = bitcast float* %202 to <8 x float>* - %wide.masked.load82 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %203, i32 4, <8 x i1> %193, <8 x float> undef), !tbaa !12, !alias.scope !82 - %204 = fsub <8 x float> %wide.masked.load82, %wide.masked.load81 - %205 = bitcast float* %202 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %204, <8 x float>* %205, i32 4, <8 x i1> %193), !tbaa !12, !alias.scope !82, !llvm.access.group !21 - %206 = or <8 x i64> %broadcast.splat78, - %207 = trunc <8 x i64> %206 to <8 x i32> - %208 = icmp sgt <8 x i32> %broadcast.splat80, %207 - %209 = extractelement <8 x i64> %206, i32 0 - %210 = shl i64 %209, 32 - %211 = ashr exact i64 %210, 32 - %212 = getelementptr inbounds float, float* %7, i64 %211 - %213 = bitcast float* %212 to <8 x float>* - %wide.masked.load81.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %213, i32 4, <8 x i1> %208, <8 x float> undef), !tbaa !12, !alias.scope !79, !noalias !82 - %214 = extractelement <8 x i32> %207, i32 0 - %215 = add nsw i32 %mul.i.i.2, %214 - %216 = sext i32 %215 to i64 - %217 = getelementptr inbounds float, float* %11, i64 %216 - %218 = bitcast float* %217 to <8 x float>* - %wide.masked.load82.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %218, i32 4, <8 x i1> %208, <8 x float> undef), !tbaa !12, !alias.scope !82 - %219 = fsub <8 x float> %wide.masked.load82.1, %wide.masked.load81.1 - %220 = bitcast float* %217 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %219, <8 x float>* %220, i32 4, <8 x i1> %208), !tbaa !12, !alias.scope !82, !llvm.access.group !21 - %221 = or <8 x i64> %broadcast.splat78, - %222 = trunc <8 x i64> %221 to <8 x i32> - %223 = icmp sgt <8 x i32> %broadcast.splat80, %222 - %224 = extractelement <8 x i64> %221, i32 0 - %225 = shl i64 %224, 32 - %226 = ashr exact i64 %225, 32 - %227 = getelementptr inbounds float, float* %7, i64 %226 - %228 = bitcast float* %227 to <8 x float>* - %wide.masked.load81.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %228, i32 4, <8 x i1> %223, <8 x float> undef), !tbaa !12, !alias.scope !79, !noalias !82 - %229 = extractelement <8 x i32> %222, i32 0 - %230 = add nsw i32 %mul.i.i.2, %229 - %231 = sext i32 %230 to i64 - %232 = getelementptr inbounds float, float* %11, i64 %231 - %233 = bitcast float* %232 to <8 x float>* - %wide.masked.load82.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %233, i32 4, <8 x i1> %223, <8 x float> undef), !tbaa !12, !alias.scope !82 - %234 = fsub <8 x float> %wide.masked.load82.2, %wide.masked.load81.2 - %235 = bitcast float* %232 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %234, <8 x float>* %235, i32 4, <8 x i1> %223), !tbaa !12, !alias.scope !82, !llvm.access.group !21 - %236 = or <8 x i64> %broadcast.splat78, - %237 = trunc <8 x i64> %236 to <8 x i32> - %238 = icmp sgt <8 x i32> %broadcast.splat80, %237 - %239 = extractelement <8 x i64> %236, i32 0 - %240 = shl i64 %239, 32 - %241 = ashr exact i64 %240, 32 - %242 = getelementptr inbounds float, float* %7, i64 %241 - %243 = bitcast float* %242 to <8 x float>* - %wide.masked.load81.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %243, i32 4, <8 x i1> %238, <8 x float> undef), !tbaa !12, !alias.scope !79, !noalias !82 - %244 = extractelement <8 x i32> %237, i32 0 - %245 = add nsw i32 %mul.i.i.2, %244 - %246 = sext i32 %245 to i64 - %247 = getelementptr inbounds float, float* %11, i64 %246 - %248 = bitcast float* %247 to <8 x float>* - %wide.masked.load82.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %248, i32 4, <8 x i1> %238, <8 x float> undef), !tbaa !12, !alias.scope !82 - %249 = fsub <8 x float> %wide.masked.load82.3, %wide.masked.load81.3 - %250 = bitcast float* %247 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %249, <8 x float>* %250, i32 4, <8 x i1> %238), !tbaa !12, !alias.scope !82, !llvm.access.group !21 - br label %pregion_for_end.i.i.2 - -pregion_for_entry.entry.i.i.us.2: ; preds = %if.end.r_exit.i.i.us.2.1, %pregion_for_entry.entry.i.i.us.2.preheader - %_local_id_x.i.0.us.2 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.2.preheader ], [ %656, %if.end.r_exit.i.i.us.2.1 ] - %add1.i.i.i.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.2, %mul.i.i.i - %conv.i.i.us.2 = trunc i64 %add1.i.i.i.us.2 to i32 - %cmp4.i.i.us.2 = icmp sgt i32 %15, %conv.i.i.us.2 - br i1 %cmp4.i.i.us.2, label %if.then.i.i.us.2, label %if.end.r_exit.i.i.us.2 - -if.then.i.i.us.2: ; preds = %pregion_for_entry.entry.i.i.us.2 - %sext.i.i.us.2 = shl i64 %add1.i.i.i.us.2, 32 - %idxprom.i.i.us.2 = ashr exact i64 %sext.i.i.us.2, 32 - %arrayidx.i.i.us.2 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.2 - %251 = load float, float* %arrayidx.i.i.us.2, align 4, !tbaa !12 - %add.i.i.us.2 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2 - %idxprom6.i.i.us.2 = sext i32 %add.i.i.us.2 to i64 - %arrayidx7.i.i.us.2 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.2 - %252 = load float, float* %arrayidx7.i.i.us.2, align 4, !tbaa !12 - %sub.i.i.us.2 = fsub float %252, %251 - store float %sub.i.i.us.2, float* %arrayidx7.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.2 - -if.end.r_exit.i.i.us.2: ; preds = %if.then.i.i.us.2, %pregion_for_entry.entry.i.i.us.2 - %253 = or i64 %_local_id_x.i.0.us.2, 1 - %add1.i.i.i.us.2.1 = add nuw nsw i64 %253, %mul.i.i.i - %conv.i.i.us.2.1 = trunc i64 %add1.i.i.i.us.2.1 to i32 - %cmp4.i.i.us.2.1 = icmp sgt i32 %15, %conv.i.i.us.2.1 - br i1 %cmp4.i.i.us.2.1, label %if.then.i.i.us.2.1, label %if.end.r_exit.i.i.us.2.1 - -pregion_for_end.i.i.2.loopexit: ; preds = %if.end.r_exit.i.i.us.2.1 - br label %pregion_for_end.i.i.2 - -pregion_for_end.i.i.2: ; preds = %pregion_for_end.i.i.2.loopexit, %vector.ph70, %pregion_for_end.i.i.1 - %254 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.3 = or i32 %254, 3 - %cmp.i.i.3 = icmp sgt i32 %19, %conv2.i.i.3 - %mul.i.i.3 = mul nsw i32 %15, %conv2.i.i.3 - br i1 %cmp.i.i.3, label %vector.scevcheck90, label %pregion_for_end.i.i.3 - -vector.scevcheck90: ; preds = %pregion_for_end.i.i.2 - %255 = mul i32 %15, %conv2.i.i.3 - %256 = trunc i64 %2 to i32 - %257 = shl i32 %256, 5 - %258 = add i32 %255, %257 - %259 = icmp sgt i32 %258, 2147483616 - br i1 %259, label %pregion_for_entry.entry.i.i.us.3.preheader, label %vector.memcheck104 - -pregion_for_entry.entry.i.i.us.3.preheader: ; preds = %vector.memcheck104, %vector.scevcheck90 - br label %pregion_for_entry.entry.i.i.us.3 - -vector.memcheck104: ; preds = %vector.scevcheck90 - %260 = trunc i64 %2 to i32 - %261 = shl i32 %260, 5 - %262 = sext i32 %261 to i64 - %scevgep92 = getelementptr float, float* %7, i64 %262 - %263 = add nsw i64 %262, 32 - %scevgep94 = getelementptr float, float* %7, i64 %263 - %264 = mul i32 %15, %conv2.i.i.3 - %265 = add i32 %264, %261 - %266 = sext i32 %265 to i64 - %scevgep96 = getelementptr float, float* %11, i64 %266 - %267 = add nsw i64 %266, 32 - %scevgep98 = getelementptr float, float* %11, i64 %267 - %bound0100 = icmp ult float* %scevgep92, %scevgep98 - %bound1101 = icmp ult float* %scevgep96, %scevgep94 - %found.conflict102 = and i1 %bound0100, %bound1101 - br i1 %found.conflict102, label %pregion_for_entry.entry.i.i.us.3.preheader, label %vector.ph105 - -vector.ph105: ; preds = %vector.memcheck104 - %broadcast.splatinsert112 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat113 = shufflevector <8 x i64> %broadcast.splatinsert112, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert114 = insertelement <8 x i32> undef, i32 %15, i32 0 - %broadcast.splat115 = shufflevector <8 x i32> %broadcast.splatinsert114, <8 x i32> undef, <8 x i32> zeroinitializer - %268 = or <8 x i64> %broadcast.splat113, - %269 = trunc <8 x i64> %268 to <8 x i32> - %270 = icmp sgt <8 x i32> %broadcast.splat115, %269 - %271 = extractelement <8 x i64> %268, i32 0 - %272 = shl i64 %271, 32 - %273 = ashr exact i64 %272, 32 - %274 = getelementptr inbounds float, float* %7, i64 %273 - %275 = bitcast float* %274 to <8 x float>* - %wide.masked.load116 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %275, i32 4, <8 x i1> %270, <8 x float> undef), !tbaa !12, !alias.scope !84, !noalias !87 - %276 = extractelement <8 x i32> %269, i32 0 - %277 = add nsw i32 %mul.i.i.3, %276 - %278 = sext i32 %277 to i64 - %279 = getelementptr inbounds float, float* %11, i64 %278 - %280 = bitcast float* %279 to <8 x float>* - %wide.masked.load117 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %280, i32 4, <8 x i1> %270, <8 x float> undef), !tbaa !12, !alias.scope !87 - %281 = fsub <8 x float> %wide.masked.load117, %wide.masked.load116 - %282 = bitcast float* %279 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %281, <8 x float>* %282, i32 4, <8 x i1> %270), !tbaa !12, !alias.scope !87, !llvm.access.group !21 - %283 = or <8 x i64> %broadcast.splat113, - %284 = trunc <8 x i64> %283 to <8 x i32> - %285 = icmp sgt <8 x i32> %broadcast.splat115, %284 - %286 = extractelement <8 x i64> %283, i32 0 - %287 = shl i64 %286, 32 - %288 = ashr exact i64 %287, 32 - %289 = getelementptr inbounds float, float* %7, i64 %288 - %290 = bitcast float* %289 to <8 x float>* - %wide.masked.load116.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %290, i32 4, <8 x i1> %285, <8 x float> undef), !tbaa !12, !alias.scope !84, !noalias !87 - %291 = extractelement <8 x i32> %284, i32 0 - %292 = add nsw i32 %mul.i.i.3, %291 - %293 = sext i32 %292 to i64 - %294 = getelementptr inbounds float, float* %11, i64 %293 - %295 = bitcast float* %294 to <8 x float>* - %wide.masked.load117.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %295, i32 4, <8 x i1> %285, <8 x float> undef), !tbaa !12, !alias.scope !87 - %296 = fsub <8 x float> %wide.masked.load117.1, %wide.masked.load116.1 - %297 = bitcast float* %294 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %296, <8 x float>* %297, i32 4, <8 x i1> %285), !tbaa !12, !alias.scope !87, !llvm.access.group !21 - %298 = or <8 x i64> %broadcast.splat113, - %299 = trunc <8 x i64> %298 to <8 x i32> - %300 = icmp sgt <8 x i32> %broadcast.splat115, %299 - %301 = extractelement <8 x i64> %298, i32 0 - %302 = shl i64 %301, 32 - %303 = ashr exact i64 %302, 32 - %304 = getelementptr inbounds float, float* %7, i64 %303 - %305 = bitcast float* %304 to <8 x float>* - %wide.masked.load116.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %305, i32 4, <8 x i1> %300, <8 x float> undef), !tbaa !12, !alias.scope !84, !noalias !87 - %306 = extractelement <8 x i32> %299, i32 0 - %307 = add nsw i32 %mul.i.i.3, %306 - %308 = sext i32 %307 to i64 - %309 = getelementptr inbounds float, float* %11, i64 %308 - %310 = bitcast float* %309 to <8 x float>* - %wide.masked.load117.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %310, i32 4, <8 x i1> %300, <8 x float> undef), !tbaa !12, !alias.scope !87 - %311 = fsub <8 x float> %wide.masked.load117.2, %wide.masked.load116.2 - %312 = bitcast float* %309 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %311, <8 x float>* %312, i32 4, <8 x i1> %300), !tbaa !12, !alias.scope !87, !llvm.access.group !21 - %313 = or <8 x i64> %broadcast.splat113, - %314 = trunc <8 x i64> %313 to <8 x i32> - %315 = icmp sgt <8 x i32> %broadcast.splat115, %314 - %316 = extractelement <8 x i64> %313, i32 0 - %317 = shl i64 %316, 32 - %318 = ashr exact i64 %317, 32 - %319 = getelementptr inbounds float, float* %7, i64 %318 - %320 = bitcast float* %319 to <8 x float>* - %wide.masked.load116.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %320, i32 4, <8 x i1> %315, <8 x float> undef), !tbaa !12, !alias.scope !84, !noalias !87 - %321 = extractelement <8 x i32> %314, i32 0 - %322 = add nsw i32 %mul.i.i.3, %321 - %323 = sext i32 %322 to i64 - %324 = getelementptr inbounds float, float* %11, i64 %323 - %325 = bitcast float* %324 to <8 x float>* - %wide.masked.load117.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %325, i32 4, <8 x i1> %315, <8 x float> undef), !tbaa !12, !alias.scope !87 - %326 = fsub <8 x float> %wide.masked.load117.3, %wide.masked.load116.3 - %327 = bitcast float* %324 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %326, <8 x float>* %327, i32 4, <8 x i1> %315), !tbaa !12, !alias.scope !87, !llvm.access.group !21 - br label %pregion_for_end.i.i.3 - -pregion_for_entry.entry.i.i.us.3: ; preds = %if.end.r_exit.i.i.us.3.1, %pregion_for_entry.entry.i.i.us.3.preheader - %_local_id_x.i.0.us.3 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.3.preheader ], [ %653, %if.end.r_exit.i.i.us.3.1 ] - %add1.i.i.i.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.3, %mul.i.i.i - %conv.i.i.us.3 = trunc i64 %add1.i.i.i.us.3 to i32 - %cmp4.i.i.us.3 = icmp sgt i32 %15, %conv.i.i.us.3 - br i1 %cmp4.i.i.us.3, label %if.then.i.i.us.3, label %if.end.r_exit.i.i.us.3 - -if.then.i.i.us.3: ; preds = %pregion_for_entry.entry.i.i.us.3 - %sext.i.i.us.3 = shl i64 %add1.i.i.i.us.3, 32 - %idxprom.i.i.us.3 = ashr exact i64 %sext.i.i.us.3, 32 - %arrayidx.i.i.us.3 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.3 - %328 = load float, float* %arrayidx.i.i.us.3, align 4, !tbaa !12 - %add.i.i.us.3 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3 - %idxprom6.i.i.us.3 = sext i32 %add.i.i.us.3 to i64 - %arrayidx7.i.i.us.3 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.3 - %329 = load float, float* %arrayidx7.i.i.us.3, align 4, !tbaa !12 - %sub.i.i.us.3 = fsub float %329, %328 - store float %sub.i.i.us.3, float* %arrayidx7.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.3 - -if.end.r_exit.i.i.us.3: ; preds = %if.then.i.i.us.3, %pregion_for_entry.entry.i.i.us.3 - %330 = or i64 %_local_id_x.i.0.us.3, 1 - %add1.i.i.i.us.3.1 = add nuw nsw i64 %330, %mul.i.i.i - %conv.i.i.us.3.1 = trunc i64 %add1.i.i.i.us.3.1 to i32 - %cmp4.i.i.us.3.1 = icmp sgt i32 %15, %conv.i.i.us.3.1 - br i1 %cmp4.i.i.us.3.1, label %if.then.i.i.us.3.1, label %if.end.r_exit.i.i.us.3.1 - -pregion_for_end.i.i.3.loopexit: ; preds = %if.end.r_exit.i.i.us.3.1 - br label %pregion_for_end.i.i.3 - -pregion_for_end.i.i.3: ; preds = %pregion_for_end.i.i.3.loopexit, %vector.ph105, %pregion_for_end.i.i.2 - %331 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.4 = or i32 %331, 4 - %cmp.i.i.4 = icmp sgt i32 %19, %conv2.i.i.4 - %mul.i.i.4 = mul nsw i32 %15, %conv2.i.i.4 - br i1 %cmp.i.i.4, label %vector.scevcheck125, label %pregion_for_end.i.i.4 - -vector.scevcheck125: ; preds = %pregion_for_end.i.i.3 - %332 = mul i32 %15, %conv2.i.i.4 - %333 = trunc i64 %2 to i32 - %334 = shl i32 %333, 5 - %335 = add i32 %332, %334 - %336 = icmp sgt i32 %335, 2147483616 - br i1 %336, label %pregion_for_entry.entry.i.i.us.4.preheader, label %vector.memcheck139 - -pregion_for_entry.entry.i.i.us.4.preheader: ; preds = %vector.memcheck139, %vector.scevcheck125 - br label %pregion_for_entry.entry.i.i.us.4 - -vector.memcheck139: ; preds = %vector.scevcheck125 - %337 = trunc i64 %2 to i32 - %338 = shl i32 %337, 5 - %339 = sext i32 %338 to i64 - %scevgep127 = getelementptr float, float* %7, i64 %339 - %340 = add nsw i64 %339, 32 - %scevgep129 = getelementptr float, float* %7, i64 %340 - %341 = mul i32 %15, %conv2.i.i.4 - %342 = add i32 %341, %338 - %343 = sext i32 %342 to i64 - %scevgep131 = getelementptr float, float* %11, i64 %343 - %344 = add nsw i64 %343, 32 - %scevgep133 = getelementptr float, float* %11, i64 %344 - %bound0135 = icmp ult float* %scevgep127, %scevgep133 - %bound1136 = icmp ult float* %scevgep131, %scevgep129 - %found.conflict137 = and i1 %bound0135, %bound1136 - br i1 %found.conflict137, label %pregion_for_entry.entry.i.i.us.4.preheader, label %vector.ph140 - -vector.ph140: ; preds = %vector.memcheck139 - %broadcast.splatinsert147 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat148 = shufflevector <8 x i64> %broadcast.splatinsert147, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert149 = insertelement <8 x i32> undef, i32 %15, i32 0 - %broadcast.splat150 = shufflevector <8 x i32> %broadcast.splatinsert149, <8 x i32> undef, <8 x i32> zeroinitializer - %345 = or <8 x i64> %broadcast.splat148, - %346 = trunc <8 x i64> %345 to <8 x i32> - %347 = icmp sgt <8 x i32> %broadcast.splat150, %346 - %348 = extractelement <8 x i64> %345, i32 0 - %349 = shl i64 %348, 32 - %350 = ashr exact i64 %349, 32 - %351 = getelementptr inbounds float, float* %7, i64 %350 - %352 = bitcast float* %351 to <8 x float>* - %wide.masked.load151 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %352, i32 4, <8 x i1> %347, <8 x float> undef), !tbaa !12, !alias.scope !89, !noalias !92 - %353 = extractelement <8 x i32> %346, i32 0 - %354 = add nsw i32 %mul.i.i.4, %353 - %355 = sext i32 %354 to i64 - %356 = getelementptr inbounds float, float* %11, i64 %355 - %357 = bitcast float* %356 to <8 x float>* - %wide.masked.load152 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %357, i32 4, <8 x i1> %347, <8 x float> undef), !tbaa !12, !alias.scope !92 - %358 = fsub <8 x float> %wide.masked.load152, %wide.masked.load151 - %359 = bitcast float* %356 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %358, <8 x float>* %359, i32 4, <8 x i1> %347), !tbaa !12, !alias.scope !92, !llvm.access.group !21 - %360 = or <8 x i64> %broadcast.splat148, - %361 = trunc <8 x i64> %360 to <8 x i32> - %362 = icmp sgt <8 x i32> %broadcast.splat150, %361 - %363 = extractelement <8 x i64> %360, i32 0 - %364 = shl i64 %363, 32 - %365 = ashr exact i64 %364, 32 - %366 = getelementptr inbounds float, float* %7, i64 %365 - %367 = bitcast float* %366 to <8 x float>* - %wide.masked.load151.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %367, i32 4, <8 x i1> %362, <8 x float> undef), !tbaa !12, !alias.scope !89, !noalias !92 - %368 = extractelement <8 x i32> %361, i32 0 - %369 = add nsw i32 %mul.i.i.4, %368 - %370 = sext i32 %369 to i64 - %371 = getelementptr inbounds float, float* %11, i64 %370 - %372 = bitcast float* %371 to <8 x float>* - %wide.masked.load152.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %372, i32 4, <8 x i1> %362, <8 x float> undef), !tbaa !12, !alias.scope !92 - %373 = fsub <8 x float> %wide.masked.load152.1, %wide.masked.load151.1 - %374 = bitcast float* %371 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %373, <8 x float>* %374, i32 4, <8 x i1> %362), !tbaa !12, !alias.scope !92, !llvm.access.group !21 - %375 = or <8 x i64> %broadcast.splat148, - %376 = trunc <8 x i64> %375 to <8 x i32> - %377 = icmp sgt <8 x i32> %broadcast.splat150, %376 - %378 = extractelement <8 x i64> %375, i32 0 - %379 = shl i64 %378, 32 - %380 = ashr exact i64 %379, 32 - %381 = getelementptr inbounds float, float* %7, i64 %380 - %382 = bitcast float* %381 to <8 x float>* - %wide.masked.load151.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %382, i32 4, <8 x i1> %377, <8 x float> undef), !tbaa !12, !alias.scope !89, !noalias !92 - %383 = extractelement <8 x i32> %376, i32 0 - %384 = add nsw i32 %mul.i.i.4, %383 - %385 = sext i32 %384 to i64 - %386 = getelementptr inbounds float, float* %11, i64 %385 - %387 = bitcast float* %386 to <8 x float>* - %wide.masked.load152.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %387, i32 4, <8 x i1> %377, <8 x float> undef), !tbaa !12, !alias.scope !92 - %388 = fsub <8 x float> %wide.masked.load152.2, %wide.masked.load151.2 - %389 = bitcast float* %386 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %388, <8 x float>* %389, i32 4, <8 x i1> %377), !tbaa !12, !alias.scope !92, !llvm.access.group !21 - %390 = or <8 x i64> %broadcast.splat148, - %391 = trunc <8 x i64> %390 to <8 x i32> - %392 = icmp sgt <8 x i32> %broadcast.splat150, %391 - %393 = extractelement <8 x i64> %390, i32 0 - %394 = shl i64 %393, 32 - %395 = ashr exact i64 %394, 32 - %396 = getelementptr inbounds float, float* %7, i64 %395 - %397 = bitcast float* %396 to <8 x float>* - %wide.masked.load151.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %397, i32 4, <8 x i1> %392, <8 x float> undef), !tbaa !12, !alias.scope !89, !noalias !92 - %398 = extractelement <8 x i32> %391, i32 0 - %399 = add nsw i32 %mul.i.i.4, %398 - %400 = sext i32 %399 to i64 - %401 = getelementptr inbounds float, float* %11, i64 %400 - %402 = bitcast float* %401 to <8 x float>* - %wide.masked.load152.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %402, i32 4, <8 x i1> %392, <8 x float> undef), !tbaa !12, !alias.scope !92 - %403 = fsub <8 x float> %wide.masked.load152.3, %wide.masked.load151.3 - %404 = bitcast float* %401 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %403, <8 x float>* %404, i32 4, <8 x i1> %392), !tbaa !12, !alias.scope !92, !llvm.access.group !21 - br label %pregion_for_end.i.i.4 - -pregion_for_entry.entry.i.i.us.4: ; preds = %if.end.r_exit.i.i.us.4.1, %pregion_for_entry.entry.i.i.us.4.preheader - %_local_id_x.i.0.us.4 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.4.preheader ], [ %650, %if.end.r_exit.i.i.us.4.1 ] - %add1.i.i.i.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.4, %mul.i.i.i - %conv.i.i.us.4 = trunc i64 %add1.i.i.i.us.4 to i32 - %cmp4.i.i.us.4 = icmp sgt i32 %15, %conv.i.i.us.4 - br i1 %cmp4.i.i.us.4, label %if.then.i.i.us.4, label %if.end.r_exit.i.i.us.4 - -if.then.i.i.us.4: ; preds = %pregion_for_entry.entry.i.i.us.4 - %sext.i.i.us.4 = shl i64 %add1.i.i.i.us.4, 32 - %idxprom.i.i.us.4 = ashr exact i64 %sext.i.i.us.4, 32 - %arrayidx.i.i.us.4 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.4 - %405 = load float, float* %arrayidx.i.i.us.4, align 4, !tbaa !12 - %add.i.i.us.4 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4 - %idxprom6.i.i.us.4 = sext i32 %add.i.i.us.4 to i64 - %arrayidx7.i.i.us.4 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.4 - %406 = load float, float* %arrayidx7.i.i.us.4, align 4, !tbaa !12 - %sub.i.i.us.4 = fsub float %406, %405 - store float %sub.i.i.us.4, float* %arrayidx7.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.4 - -if.end.r_exit.i.i.us.4: ; preds = %if.then.i.i.us.4, %pregion_for_entry.entry.i.i.us.4 - %407 = or i64 %_local_id_x.i.0.us.4, 1 - %add1.i.i.i.us.4.1 = add nuw nsw i64 %407, %mul.i.i.i - %conv.i.i.us.4.1 = trunc i64 %add1.i.i.i.us.4.1 to i32 - %cmp4.i.i.us.4.1 = icmp sgt i32 %15, %conv.i.i.us.4.1 - br i1 %cmp4.i.i.us.4.1, label %if.then.i.i.us.4.1, label %if.end.r_exit.i.i.us.4.1 - -pregion_for_end.i.i.4.loopexit: ; preds = %if.end.r_exit.i.i.us.4.1 - br label %pregion_for_end.i.i.4 - -pregion_for_end.i.i.4: ; preds = %pregion_for_end.i.i.4.loopexit, %vector.ph140, %pregion_for_end.i.i.3 - %408 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.5 = or i32 %408, 5 - %cmp.i.i.5 = icmp sgt i32 %19, %conv2.i.i.5 - %mul.i.i.5 = mul nsw i32 %15, %conv2.i.i.5 - br i1 %cmp.i.i.5, label %vector.scevcheck160, label %pregion_for_end.i.i.5 - -vector.scevcheck160: ; preds = %pregion_for_end.i.i.4 - %409 = mul i32 %15, %conv2.i.i.5 - %410 = trunc i64 %2 to i32 - %411 = shl i32 %410, 5 - %412 = add i32 %409, %411 - %413 = icmp sgt i32 %412, 2147483616 - br i1 %413, label %pregion_for_entry.entry.i.i.us.5.preheader, label %vector.memcheck174 - -pregion_for_entry.entry.i.i.us.5.preheader: ; preds = %vector.memcheck174, %vector.scevcheck160 - br label %pregion_for_entry.entry.i.i.us.5 - -vector.memcheck174: ; preds = %vector.scevcheck160 - %414 = trunc i64 %2 to i32 - %415 = shl i32 %414, 5 - %416 = sext i32 %415 to i64 - %scevgep162 = getelementptr float, float* %7, i64 %416 - %417 = add nsw i64 %416, 32 - %scevgep164 = getelementptr float, float* %7, i64 %417 - %418 = mul i32 %15, %conv2.i.i.5 - %419 = add i32 %418, %415 - %420 = sext i32 %419 to i64 - %scevgep166 = getelementptr float, float* %11, i64 %420 - %421 = add nsw i64 %420, 32 - %scevgep168 = getelementptr float, float* %11, i64 %421 - %bound0170 = icmp ult float* %scevgep162, %scevgep168 - %bound1171 = icmp ult float* %scevgep166, %scevgep164 - %found.conflict172 = and i1 %bound0170, %bound1171 - br i1 %found.conflict172, label %pregion_for_entry.entry.i.i.us.5.preheader, label %vector.ph175 - -vector.ph175: ; preds = %vector.memcheck174 - %broadcast.splatinsert182 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat183 = shufflevector <8 x i64> %broadcast.splatinsert182, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert184 = insertelement <8 x i32> undef, i32 %15, i32 0 - %broadcast.splat185 = shufflevector <8 x i32> %broadcast.splatinsert184, <8 x i32> undef, <8 x i32> zeroinitializer - %422 = or <8 x i64> %broadcast.splat183, - %423 = trunc <8 x i64> %422 to <8 x i32> - %424 = icmp sgt <8 x i32> %broadcast.splat185, %423 - %425 = extractelement <8 x i64> %422, i32 0 - %426 = shl i64 %425, 32 - %427 = ashr exact i64 %426, 32 - %428 = getelementptr inbounds float, float* %7, i64 %427 - %429 = bitcast float* %428 to <8 x float>* - %wide.masked.load186 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %429, i32 4, <8 x i1> %424, <8 x float> undef), !tbaa !12, !alias.scope !94, !noalias !97 - %430 = extractelement <8 x i32> %423, i32 0 - %431 = add nsw i32 %mul.i.i.5, %430 - %432 = sext i32 %431 to i64 - %433 = getelementptr inbounds float, float* %11, i64 %432 - %434 = bitcast float* %433 to <8 x float>* - %wide.masked.load187 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %434, i32 4, <8 x i1> %424, <8 x float> undef), !tbaa !12, !alias.scope !97 - %435 = fsub <8 x float> %wide.masked.load187, %wide.masked.load186 - %436 = bitcast float* %433 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %435, <8 x float>* %436, i32 4, <8 x i1> %424), !tbaa !12, !alias.scope !97, !llvm.access.group !21 - %437 = or <8 x i64> %broadcast.splat183, - %438 = trunc <8 x i64> %437 to <8 x i32> - %439 = icmp sgt <8 x i32> %broadcast.splat185, %438 - %440 = extractelement <8 x i64> %437, i32 0 - %441 = shl i64 %440, 32 - %442 = ashr exact i64 %441, 32 - %443 = getelementptr inbounds float, float* %7, i64 %442 - %444 = bitcast float* %443 to <8 x float>* - %wide.masked.load186.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %444, i32 4, <8 x i1> %439, <8 x float> undef), !tbaa !12, !alias.scope !94, !noalias !97 - %445 = extractelement <8 x i32> %438, i32 0 - %446 = add nsw i32 %mul.i.i.5, %445 - %447 = sext i32 %446 to i64 - %448 = getelementptr inbounds float, float* %11, i64 %447 - %449 = bitcast float* %448 to <8 x float>* - %wide.masked.load187.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %449, i32 4, <8 x i1> %439, <8 x float> undef), !tbaa !12, !alias.scope !97 - %450 = fsub <8 x float> %wide.masked.load187.1, %wide.masked.load186.1 - %451 = bitcast float* %448 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %450, <8 x float>* %451, i32 4, <8 x i1> %439), !tbaa !12, !alias.scope !97, !llvm.access.group !21 - %452 = or <8 x i64> %broadcast.splat183, - %453 = trunc <8 x i64> %452 to <8 x i32> - %454 = icmp sgt <8 x i32> %broadcast.splat185, %453 - %455 = extractelement <8 x i64> %452, i32 0 - %456 = shl i64 %455, 32 - %457 = ashr exact i64 %456, 32 - %458 = getelementptr inbounds float, float* %7, i64 %457 - %459 = bitcast float* %458 to <8 x float>* - %wide.masked.load186.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %459, i32 4, <8 x i1> %454, <8 x float> undef), !tbaa !12, !alias.scope !94, !noalias !97 - %460 = extractelement <8 x i32> %453, i32 0 - %461 = add nsw i32 %mul.i.i.5, %460 - %462 = sext i32 %461 to i64 - %463 = getelementptr inbounds float, float* %11, i64 %462 - %464 = bitcast float* %463 to <8 x float>* - %wide.masked.load187.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %464, i32 4, <8 x i1> %454, <8 x float> undef), !tbaa !12, !alias.scope !97 - %465 = fsub <8 x float> %wide.masked.load187.2, %wide.masked.load186.2 - %466 = bitcast float* %463 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %465, <8 x float>* %466, i32 4, <8 x i1> %454), !tbaa !12, !alias.scope !97, !llvm.access.group !21 - %467 = or <8 x i64> %broadcast.splat183, - %468 = trunc <8 x i64> %467 to <8 x i32> - %469 = icmp sgt <8 x i32> %broadcast.splat185, %468 - %470 = extractelement <8 x i64> %467, i32 0 - %471 = shl i64 %470, 32 - %472 = ashr exact i64 %471, 32 - %473 = getelementptr inbounds float, float* %7, i64 %472 - %474 = bitcast float* %473 to <8 x float>* - %wide.masked.load186.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %474, i32 4, <8 x i1> %469, <8 x float> undef), !tbaa !12, !alias.scope !94, !noalias !97 - %475 = extractelement <8 x i32> %468, i32 0 - %476 = add nsw i32 %mul.i.i.5, %475 - %477 = sext i32 %476 to i64 - %478 = getelementptr inbounds float, float* %11, i64 %477 - %479 = bitcast float* %478 to <8 x float>* - %wide.masked.load187.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %479, i32 4, <8 x i1> %469, <8 x float> undef), !tbaa !12, !alias.scope !97 - %480 = fsub <8 x float> %wide.masked.load187.3, %wide.masked.load186.3 - %481 = bitcast float* %478 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %480, <8 x float>* %481, i32 4, <8 x i1> %469), !tbaa !12, !alias.scope !97, !llvm.access.group !21 - br label %pregion_for_end.i.i.5 - -pregion_for_entry.entry.i.i.us.5: ; preds = %if.end.r_exit.i.i.us.5.1, %pregion_for_entry.entry.i.i.us.5.preheader - %_local_id_x.i.0.us.5 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.5.preheader ], [ %647, %if.end.r_exit.i.i.us.5.1 ] - %add1.i.i.i.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.5, %mul.i.i.i - %conv.i.i.us.5 = trunc i64 %add1.i.i.i.us.5 to i32 - %cmp4.i.i.us.5 = icmp sgt i32 %15, %conv.i.i.us.5 - br i1 %cmp4.i.i.us.5, label %if.then.i.i.us.5, label %if.end.r_exit.i.i.us.5 - -if.then.i.i.us.5: ; preds = %pregion_for_entry.entry.i.i.us.5 - %sext.i.i.us.5 = shl i64 %add1.i.i.i.us.5, 32 - %idxprom.i.i.us.5 = ashr exact i64 %sext.i.i.us.5, 32 - %arrayidx.i.i.us.5 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.5 - %482 = load float, float* %arrayidx.i.i.us.5, align 4, !tbaa !12 - %add.i.i.us.5 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5 - %idxprom6.i.i.us.5 = sext i32 %add.i.i.us.5 to i64 - %arrayidx7.i.i.us.5 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.5 - %483 = load float, float* %arrayidx7.i.i.us.5, align 4, !tbaa !12 - %sub.i.i.us.5 = fsub float %483, %482 - store float %sub.i.i.us.5, float* %arrayidx7.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.5 - -if.end.r_exit.i.i.us.5: ; preds = %if.then.i.i.us.5, %pregion_for_entry.entry.i.i.us.5 - %484 = or i64 %_local_id_x.i.0.us.5, 1 - %add1.i.i.i.us.5.1 = add nuw nsw i64 %484, %mul.i.i.i - %conv.i.i.us.5.1 = trunc i64 %add1.i.i.i.us.5.1 to i32 - %cmp4.i.i.us.5.1 = icmp sgt i32 %15, %conv.i.i.us.5.1 - br i1 %cmp4.i.i.us.5.1, label %if.then.i.i.us.5.1, label %if.end.r_exit.i.i.us.5.1 - -pregion_for_end.i.i.5.loopexit: ; preds = %if.end.r_exit.i.i.us.5.1 - br label %pregion_for_end.i.i.5 - -pregion_for_end.i.i.5: ; preds = %pregion_for_end.i.i.5.loopexit, %vector.ph175, %pregion_for_end.i.i.4 - %485 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.6 = or i32 %485, 6 - %cmp.i.i.6 = icmp sgt i32 %19, %conv2.i.i.6 - %mul.i.i.6 = mul nsw i32 %15, %conv2.i.i.6 - br i1 %cmp.i.i.6, label %vector.scevcheck195, label %pregion_for_end.i.i.6 - -vector.scevcheck195: ; preds = %pregion_for_end.i.i.5 - %486 = mul i32 %15, %conv2.i.i.6 - %487 = trunc i64 %2 to i32 - %488 = shl i32 %487, 5 - %489 = add i32 %486, %488 - %490 = icmp sgt i32 %489, 2147483616 - br i1 %490, label %pregion_for_entry.entry.i.i.us.6.preheader, label %vector.memcheck209 - -pregion_for_entry.entry.i.i.us.6.preheader: ; preds = %vector.memcheck209, %vector.scevcheck195 - br label %pregion_for_entry.entry.i.i.us.6 - -vector.memcheck209: ; preds = %vector.scevcheck195 - %491 = trunc i64 %2 to i32 - %492 = shl i32 %491, 5 - %493 = sext i32 %492 to i64 - %scevgep197 = getelementptr float, float* %7, i64 %493 - %494 = add nsw i64 %493, 32 - %scevgep199 = getelementptr float, float* %7, i64 %494 - %495 = mul i32 %15, %conv2.i.i.6 - %496 = add i32 %495, %492 - %497 = sext i32 %496 to i64 - %scevgep201 = getelementptr float, float* %11, i64 %497 - %498 = add nsw i64 %497, 32 - %scevgep203 = getelementptr float, float* %11, i64 %498 - %bound0205 = icmp ult float* %scevgep197, %scevgep203 - %bound1206 = icmp ult float* %scevgep201, %scevgep199 - %found.conflict207 = and i1 %bound0205, %bound1206 - br i1 %found.conflict207, label %pregion_for_entry.entry.i.i.us.6.preheader, label %vector.ph210 - -vector.ph210: ; preds = %vector.memcheck209 - %broadcast.splatinsert217 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat218 = shufflevector <8 x i64> %broadcast.splatinsert217, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert219 = insertelement <8 x i32> undef, i32 %15, i32 0 - %broadcast.splat220 = shufflevector <8 x i32> %broadcast.splatinsert219, <8 x i32> undef, <8 x i32> zeroinitializer - %499 = or <8 x i64> %broadcast.splat218, - %500 = trunc <8 x i64> %499 to <8 x i32> - %501 = icmp sgt <8 x i32> %broadcast.splat220, %500 - %502 = extractelement <8 x i64> %499, i32 0 - %503 = shl i64 %502, 32 - %504 = ashr exact i64 %503, 32 - %505 = getelementptr inbounds float, float* %7, i64 %504 - %506 = bitcast float* %505 to <8 x float>* - %wide.masked.load221 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %506, i32 4, <8 x i1> %501, <8 x float> undef), !tbaa !12, !alias.scope !99, !noalias !102 - %507 = extractelement <8 x i32> %500, i32 0 - %508 = add nsw i32 %mul.i.i.6, %507 - %509 = sext i32 %508 to i64 - %510 = getelementptr inbounds float, float* %11, i64 %509 - %511 = bitcast float* %510 to <8 x float>* - %wide.masked.load222 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %511, i32 4, <8 x i1> %501, <8 x float> undef), !tbaa !12, !alias.scope !102 - %512 = fsub <8 x float> %wide.masked.load222, %wide.masked.load221 - %513 = bitcast float* %510 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %512, <8 x float>* %513, i32 4, <8 x i1> %501), !tbaa !12, !alias.scope !102, !llvm.access.group !21 - %514 = or <8 x i64> %broadcast.splat218, - %515 = trunc <8 x i64> %514 to <8 x i32> - %516 = icmp sgt <8 x i32> %broadcast.splat220, %515 - %517 = extractelement <8 x i64> %514, i32 0 - %518 = shl i64 %517, 32 - %519 = ashr exact i64 %518, 32 - %520 = getelementptr inbounds float, float* %7, i64 %519 - %521 = bitcast float* %520 to <8 x float>* - %wide.masked.load221.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %521, i32 4, <8 x i1> %516, <8 x float> undef), !tbaa !12, !alias.scope !99, !noalias !102 - %522 = extractelement <8 x i32> %515, i32 0 - %523 = add nsw i32 %mul.i.i.6, %522 - %524 = sext i32 %523 to i64 - %525 = getelementptr inbounds float, float* %11, i64 %524 - %526 = bitcast float* %525 to <8 x float>* - %wide.masked.load222.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %526, i32 4, <8 x i1> %516, <8 x float> undef), !tbaa !12, !alias.scope !102 - %527 = fsub <8 x float> %wide.masked.load222.1, %wide.masked.load221.1 - %528 = bitcast float* %525 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %527, <8 x float>* %528, i32 4, <8 x i1> %516), !tbaa !12, !alias.scope !102, !llvm.access.group !21 - %529 = or <8 x i64> %broadcast.splat218, - %530 = trunc <8 x i64> %529 to <8 x i32> - %531 = icmp sgt <8 x i32> %broadcast.splat220, %530 - %532 = extractelement <8 x i64> %529, i32 0 - %533 = shl i64 %532, 32 - %534 = ashr exact i64 %533, 32 - %535 = getelementptr inbounds float, float* %7, i64 %534 - %536 = bitcast float* %535 to <8 x float>* - %wide.masked.load221.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %536, i32 4, <8 x i1> %531, <8 x float> undef), !tbaa !12, !alias.scope !99, !noalias !102 - %537 = extractelement <8 x i32> %530, i32 0 - %538 = add nsw i32 %mul.i.i.6, %537 - %539 = sext i32 %538 to i64 - %540 = getelementptr inbounds float, float* %11, i64 %539 - %541 = bitcast float* %540 to <8 x float>* - %wide.masked.load222.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %541, i32 4, <8 x i1> %531, <8 x float> undef), !tbaa !12, !alias.scope !102 - %542 = fsub <8 x float> %wide.masked.load222.2, %wide.masked.load221.2 - %543 = bitcast float* %540 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %542, <8 x float>* %543, i32 4, <8 x i1> %531), !tbaa !12, !alias.scope !102, !llvm.access.group !21 - %544 = or <8 x i64> %broadcast.splat218, - %545 = trunc <8 x i64> %544 to <8 x i32> - %546 = icmp sgt <8 x i32> %broadcast.splat220, %545 - %547 = extractelement <8 x i64> %544, i32 0 - %548 = shl i64 %547, 32 - %549 = ashr exact i64 %548, 32 - %550 = getelementptr inbounds float, float* %7, i64 %549 - %551 = bitcast float* %550 to <8 x float>* - %wide.masked.load221.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %551, i32 4, <8 x i1> %546, <8 x float> undef), !tbaa !12, !alias.scope !99, !noalias !102 - %552 = extractelement <8 x i32> %545, i32 0 - %553 = add nsw i32 %mul.i.i.6, %552 - %554 = sext i32 %553 to i64 - %555 = getelementptr inbounds float, float* %11, i64 %554 - %556 = bitcast float* %555 to <8 x float>* - %wide.masked.load222.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %556, i32 4, <8 x i1> %546, <8 x float> undef), !tbaa !12, !alias.scope !102 - %557 = fsub <8 x float> %wide.masked.load222.3, %wide.masked.load221.3 - %558 = bitcast float* %555 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %557, <8 x float>* %558, i32 4, <8 x i1> %546), !tbaa !12, !alias.scope !102, !llvm.access.group !21 - br label %pregion_for_end.i.i.6 - -pregion_for_entry.entry.i.i.us.6: ; preds = %if.end.r_exit.i.i.us.6.1, %pregion_for_entry.entry.i.i.us.6.preheader - %_local_id_x.i.0.us.6 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.6.preheader ], [ %644, %if.end.r_exit.i.i.us.6.1 ] - %add1.i.i.i.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.6, %mul.i.i.i - %conv.i.i.us.6 = trunc i64 %add1.i.i.i.us.6 to i32 - %cmp4.i.i.us.6 = icmp sgt i32 %15, %conv.i.i.us.6 - br i1 %cmp4.i.i.us.6, label %if.then.i.i.us.6, label %if.end.r_exit.i.i.us.6 - -if.then.i.i.us.6: ; preds = %pregion_for_entry.entry.i.i.us.6 - %sext.i.i.us.6 = shl i64 %add1.i.i.i.us.6, 32 - %idxprom.i.i.us.6 = ashr exact i64 %sext.i.i.us.6, 32 - %arrayidx.i.i.us.6 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.6 - %559 = load float, float* %arrayidx.i.i.us.6, align 4, !tbaa !12 - %add.i.i.us.6 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6 - %idxprom6.i.i.us.6 = sext i32 %add.i.i.us.6 to i64 - %arrayidx7.i.i.us.6 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.6 - %560 = load float, float* %arrayidx7.i.i.us.6, align 4, !tbaa !12 - %sub.i.i.us.6 = fsub float %560, %559 - store float %sub.i.i.us.6, float* %arrayidx7.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.6 - -if.end.r_exit.i.i.us.6: ; preds = %if.then.i.i.us.6, %pregion_for_entry.entry.i.i.us.6 - %561 = or i64 %_local_id_x.i.0.us.6, 1 - %add1.i.i.i.us.6.1 = add nuw nsw i64 %561, %mul.i.i.i - %conv.i.i.us.6.1 = trunc i64 %add1.i.i.i.us.6.1 to i32 - %cmp4.i.i.us.6.1 = icmp sgt i32 %15, %conv.i.i.us.6.1 - br i1 %cmp4.i.i.us.6.1, label %if.then.i.i.us.6.1, label %if.end.r_exit.i.i.us.6.1 - -pregion_for_end.i.i.6.loopexit: ; preds = %if.end.r_exit.i.i.us.6.1 - br label %pregion_for_end.i.i.6 - -pregion_for_end.i.i.6: ; preds = %pregion_for_end.i.i.6.loopexit, %vector.ph210, %pregion_for_end.i.i.5 - %562 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.7 = or i32 %562, 7 - %cmp.i.i.7 = icmp sgt i32 %19, %conv2.i.i.7 - %mul.i.i.7 = mul nsw i32 %15, %conv2.i.i.7 - br i1 %cmp.i.i.7, label %vector.scevcheck230, label %pregion_for_end.i.i.7 - -vector.scevcheck230: ; preds = %pregion_for_end.i.i.6 - %563 = mul i32 %15, %conv2.i.i.7 - %564 = trunc i64 %2 to i32 - %565 = shl i32 %564, 5 - %566 = add i32 %563, %565 - %567 = icmp sgt i32 %566, 2147483616 - br i1 %567, label %pregion_for_entry.entry.i.i.us.7.preheader, label %vector.memcheck244 - -pregion_for_entry.entry.i.i.us.7.preheader: ; preds = %vector.memcheck244, %vector.scevcheck230 - br label %pregion_for_entry.entry.i.i.us.7 - -vector.memcheck244: ; preds = %vector.scevcheck230 - %568 = trunc i64 %2 to i32 - %569 = shl i32 %568, 5 - %570 = sext i32 %569 to i64 - %scevgep232 = getelementptr float, float* %7, i64 %570 - %571 = add nsw i64 %570, 32 - %scevgep234 = getelementptr float, float* %7, i64 %571 - %572 = mul i32 %15, %conv2.i.i.7 - %573 = add i32 %572, %569 - %574 = sext i32 %573 to i64 - %scevgep236 = getelementptr float, float* %11, i64 %574 - %575 = add nsw i64 %574, 32 - %scevgep238 = getelementptr float, float* %11, i64 %575 - %bound0240 = icmp ult float* %scevgep232, %scevgep238 - %bound1241 = icmp ult float* %scevgep236, %scevgep234 - %found.conflict242 = and i1 %bound0240, %bound1241 - br i1 %found.conflict242, label %pregion_for_entry.entry.i.i.us.7.preheader, label %vector.ph245 - -vector.ph245: ; preds = %vector.memcheck244 - %broadcast.splatinsert252 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat253 = shufflevector <8 x i64> %broadcast.splatinsert252, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert254 = insertelement <8 x i32> undef, i32 %15, i32 0 - %broadcast.splat255 = shufflevector <8 x i32> %broadcast.splatinsert254, <8 x i32> undef, <8 x i32> zeroinitializer - %576 = or <8 x i64> %broadcast.splat253, - %577 = trunc <8 x i64> %576 to <8 x i32> - %578 = icmp sgt <8 x i32> %broadcast.splat255, %577 - %579 = extractelement <8 x i64> %576, i32 0 - %580 = shl i64 %579, 32 - %581 = ashr exact i64 %580, 32 - %582 = getelementptr inbounds float, float* %7, i64 %581 - %583 = bitcast float* %582 to <8 x float>* - %wide.masked.load256 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %583, i32 4, <8 x i1> %578, <8 x float> undef), !tbaa !12, !alias.scope !104, !noalias !107 - %584 = extractelement <8 x i32> %577, i32 0 - %585 = add nsw i32 %mul.i.i.7, %584 - %586 = sext i32 %585 to i64 - %587 = getelementptr inbounds float, float* %11, i64 %586 - %588 = bitcast float* %587 to <8 x float>* - %wide.masked.load257 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %588, i32 4, <8 x i1> %578, <8 x float> undef), !tbaa !12, !alias.scope !107 - %589 = fsub <8 x float> %wide.masked.load257, %wide.masked.load256 - %590 = bitcast float* %587 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %589, <8 x float>* %590, i32 4, <8 x i1> %578), !tbaa !12, !alias.scope !107, !llvm.access.group !21 - %591 = or <8 x i64> %broadcast.splat253, - %592 = trunc <8 x i64> %591 to <8 x i32> - %593 = icmp sgt <8 x i32> %broadcast.splat255, %592 - %594 = extractelement <8 x i64> %591, i32 0 - %595 = shl i64 %594, 32 - %596 = ashr exact i64 %595, 32 - %597 = getelementptr inbounds float, float* %7, i64 %596 - %598 = bitcast float* %597 to <8 x float>* - %wide.masked.load256.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %598, i32 4, <8 x i1> %593, <8 x float> undef), !tbaa !12, !alias.scope !104, !noalias !107 - %599 = extractelement <8 x i32> %592, i32 0 - %600 = add nsw i32 %mul.i.i.7, %599 - %601 = sext i32 %600 to i64 - %602 = getelementptr inbounds float, float* %11, i64 %601 - %603 = bitcast float* %602 to <8 x float>* - %wide.masked.load257.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %603, i32 4, <8 x i1> %593, <8 x float> undef), !tbaa !12, !alias.scope !107 - %604 = fsub <8 x float> %wide.masked.load257.1, %wide.masked.load256.1 - %605 = bitcast float* %602 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %604, <8 x float>* %605, i32 4, <8 x i1> %593), !tbaa !12, !alias.scope !107, !llvm.access.group !21 - %606 = or <8 x i64> %broadcast.splat253, - %607 = trunc <8 x i64> %606 to <8 x i32> - %608 = icmp sgt <8 x i32> %broadcast.splat255, %607 - %609 = extractelement <8 x i64> %606, i32 0 - %610 = shl i64 %609, 32 - %611 = ashr exact i64 %610, 32 - %612 = getelementptr inbounds float, float* %7, i64 %611 - %613 = bitcast float* %612 to <8 x float>* - %wide.masked.load256.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %613, i32 4, <8 x i1> %608, <8 x float> undef), !tbaa !12, !alias.scope !104, !noalias !107 - %614 = extractelement <8 x i32> %607, i32 0 - %615 = add nsw i32 %mul.i.i.7, %614 - %616 = sext i32 %615 to i64 - %617 = getelementptr inbounds float, float* %11, i64 %616 - %618 = bitcast float* %617 to <8 x float>* - %wide.masked.load257.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %618, i32 4, <8 x i1> %608, <8 x float> undef), !tbaa !12, !alias.scope !107 - %619 = fsub <8 x float> %wide.masked.load257.2, %wide.masked.load256.2 - %620 = bitcast float* %617 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %619, <8 x float>* %620, i32 4, <8 x i1> %608), !tbaa !12, !alias.scope !107, !llvm.access.group !21 - %621 = or <8 x i64> %broadcast.splat253, - %622 = trunc <8 x i64> %621 to <8 x i32> - %623 = icmp sgt <8 x i32> %broadcast.splat255, %622 - %624 = extractelement <8 x i64> %621, i32 0 - %625 = shl i64 %624, 32 - %626 = ashr exact i64 %625, 32 - %627 = getelementptr inbounds float, float* %7, i64 %626 - %628 = bitcast float* %627 to <8 x float>* - %wide.masked.load256.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %628, i32 4, <8 x i1> %623, <8 x float> undef), !tbaa !12, !alias.scope !104, !noalias !107 - %629 = extractelement <8 x i32> %622, i32 0 - %630 = add nsw i32 %mul.i.i.7, %629 - %631 = sext i32 %630 to i64 - %632 = getelementptr inbounds float, float* %11, i64 %631 - %633 = bitcast float* %632 to <8 x float>* - %wide.masked.load257.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %633, i32 4, <8 x i1> %623, <8 x float> undef), !tbaa !12, !alias.scope !107 - %634 = fsub <8 x float> %wide.masked.load257.3, %wide.masked.load256.3 - %635 = bitcast float* %632 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %634, <8 x float>* %635, i32 4, <8 x i1> %623), !tbaa !12, !alias.scope !107, !llvm.access.group !21 - br label %pregion_for_end.i.i.7 - -pregion_for_entry.entry.i.i.us.7: ; preds = %if.end.r_exit.i.i.us.7.1, %pregion_for_entry.entry.i.i.us.7.preheader - %_local_id_x.i.0.us.7 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.7.preheader ], [ %641, %if.end.r_exit.i.i.us.7.1 ] - %add1.i.i.i.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.7, %mul.i.i.i - %conv.i.i.us.7 = trunc i64 %add1.i.i.i.us.7 to i32 - %cmp4.i.i.us.7 = icmp sgt i32 %15, %conv.i.i.us.7 - br i1 %cmp4.i.i.us.7, label %if.then.i.i.us.7, label %if.end.r_exit.i.i.us.7 - -if.then.i.i.us.7: ; preds = %pregion_for_entry.entry.i.i.us.7 - %sext.i.i.us.7 = shl i64 %add1.i.i.i.us.7, 32 - %idxprom.i.i.us.7 = ashr exact i64 %sext.i.i.us.7, 32 - %arrayidx.i.i.us.7 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.7 - %636 = load float, float* %arrayidx.i.i.us.7, align 4, !tbaa !12 - %add.i.i.us.7 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7 - %idxprom6.i.i.us.7 = sext i32 %add.i.i.us.7 to i64 - %arrayidx7.i.i.us.7 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.7 - %637 = load float, float* %arrayidx7.i.i.us.7, align 4, !tbaa !12 - %sub.i.i.us.7 = fsub float %637, %636 - store float %sub.i.i.us.7, float* %arrayidx7.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.7 - -if.end.r_exit.i.i.us.7: ; preds = %if.then.i.i.us.7, %pregion_for_entry.entry.i.i.us.7 - %638 = or i64 %_local_id_x.i.0.us.7, 1 - %add1.i.i.i.us.7.1 = add nuw nsw i64 %638, %mul.i.i.i - %conv.i.i.us.7.1 = trunc i64 %add1.i.i.i.us.7.1 to i32 - %cmp4.i.i.us.7.1 = icmp sgt i32 %15, %conv.i.i.us.7.1 - br i1 %cmp4.i.i.us.7.1, label %if.then.i.i.us.7.1, label %if.end.r_exit.i.i.us.7.1 - -pregion_for_end.i.i.7.loopexit: ; preds = %if.end.r_exit.i.i.us.7.1 - br label %pregion_for_end.i.i.7 - -pregion_for_end.i.i.7: ; preds = %pregion_for_end.i.i.7.loopexit, %vector.ph245, %pregion_for_end.i.i.6 - ret void - -if.then.i.i.us.7.1: ; preds = %if.end.r_exit.i.i.us.7 - %sext.i.i.us.7.1 = shl i64 %add1.i.i.i.us.7.1, 32 - %idxprom.i.i.us.7.1 = ashr exact i64 %sext.i.i.us.7.1, 32 - %arrayidx.i.i.us.7.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.7.1 - %639 = load float, float* %arrayidx.i.i.us.7.1, align 4, !tbaa !12 - %add.i.i.us.7.1 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7.1 - %idxprom6.i.i.us.7.1 = sext i32 %add.i.i.us.7.1 to i64 - %arrayidx7.i.i.us.7.1 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.7.1 - %640 = load float, float* %arrayidx7.i.i.us.7.1, align 4, !tbaa !12 - %sub.i.i.us.7.1 = fsub float %640, %639 - store float %sub.i.i.us.7.1, float* %arrayidx7.i.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.7.1 - -if.end.r_exit.i.i.us.7.1: ; preds = %if.then.i.i.us.7.1, %if.end.r_exit.i.i.us.7 - %641 = add nuw nsw i64 %_local_id_x.i.0.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %641, 32 - br i1 %exitcond.7.not.1, label %pregion_for_end.i.i.7.loopexit, label %pregion_for_entry.entry.i.i.us.7, !llvm.loop !109 - -if.then.i.i.us.6.1: ; preds = %if.end.r_exit.i.i.us.6 - %sext.i.i.us.6.1 = shl i64 %add1.i.i.i.us.6.1, 32 - %idxprom.i.i.us.6.1 = ashr exact i64 %sext.i.i.us.6.1, 32 - %arrayidx.i.i.us.6.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.6.1 - %642 = load float, float* %arrayidx.i.i.us.6.1, align 4, !tbaa !12 - %add.i.i.us.6.1 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6.1 - %idxprom6.i.i.us.6.1 = sext i32 %add.i.i.us.6.1 to i64 - %arrayidx7.i.i.us.6.1 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.6.1 - %643 = load float, float* %arrayidx7.i.i.us.6.1, align 4, !tbaa !12 - %sub.i.i.us.6.1 = fsub float %643, %642 - store float %sub.i.i.us.6.1, float* %arrayidx7.i.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.6.1 - -if.end.r_exit.i.i.us.6.1: ; preds = %if.then.i.i.us.6.1, %if.end.r_exit.i.i.us.6 - %644 = add nuw nsw i64 %_local_id_x.i.0.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %644, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.i.6.loopexit, label %pregion_for_entry.entry.i.i.us.6, !llvm.loop !110 - -if.then.i.i.us.5.1: ; preds = %if.end.r_exit.i.i.us.5 - %sext.i.i.us.5.1 = shl i64 %add1.i.i.i.us.5.1, 32 - %idxprom.i.i.us.5.1 = ashr exact i64 %sext.i.i.us.5.1, 32 - %arrayidx.i.i.us.5.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.5.1 - %645 = load float, float* %arrayidx.i.i.us.5.1, align 4, !tbaa !12 - %add.i.i.us.5.1 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5.1 - %idxprom6.i.i.us.5.1 = sext i32 %add.i.i.us.5.1 to i64 - %arrayidx7.i.i.us.5.1 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.5.1 - %646 = load float, float* %arrayidx7.i.i.us.5.1, align 4, !tbaa !12 - %sub.i.i.us.5.1 = fsub float %646, %645 - store float %sub.i.i.us.5.1, float* %arrayidx7.i.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.5.1 - -if.end.r_exit.i.i.us.5.1: ; preds = %if.then.i.i.us.5.1, %if.end.r_exit.i.i.us.5 - %647 = add nuw nsw i64 %_local_id_x.i.0.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %647, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.i.5.loopexit, label %pregion_for_entry.entry.i.i.us.5, !llvm.loop !111 - -if.then.i.i.us.4.1: ; preds = %if.end.r_exit.i.i.us.4 - %sext.i.i.us.4.1 = shl i64 %add1.i.i.i.us.4.1, 32 - %idxprom.i.i.us.4.1 = ashr exact i64 %sext.i.i.us.4.1, 32 - %arrayidx.i.i.us.4.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.4.1 - %648 = load float, float* %arrayidx.i.i.us.4.1, align 4, !tbaa !12 - %add.i.i.us.4.1 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4.1 - %idxprom6.i.i.us.4.1 = sext i32 %add.i.i.us.4.1 to i64 - %arrayidx7.i.i.us.4.1 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.4.1 - %649 = load float, float* %arrayidx7.i.i.us.4.1, align 4, !tbaa !12 - %sub.i.i.us.4.1 = fsub float %649, %648 - store float %sub.i.i.us.4.1, float* %arrayidx7.i.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.4.1 - -if.end.r_exit.i.i.us.4.1: ; preds = %if.then.i.i.us.4.1, %if.end.r_exit.i.i.us.4 - %650 = add nuw nsw i64 %_local_id_x.i.0.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %650, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.i.4.loopexit, label %pregion_for_entry.entry.i.i.us.4, !llvm.loop !112 - -if.then.i.i.us.3.1: ; preds = %if.end.r_exit.i.i.us.3 - %sext.i.i.us.3.1 = shl i64 %add1.i.i.i.us.3.1, 32 - %idxprom.i.i.us.3.1 = ashr exact i64 %sext.i.i.us.3.1, 32 - %arrayidx.i.i.us.3.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.3.1 - %651 = load float, float* %arrayidx.i.i.us.3.1, align 4, !tbaa !12 - %add.i.i.us.3.1 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3.1 - %idxprom6.i.i.us.3.1 = sext i32 %add.i.i.us.3.1 to i64 - %arrayidx7.i.i.us.3.1 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.3.1 - %652 = load float, float* %arrayidx7.i.i.us.3.1, align 4, !tbaa !12 - %sub.i.i.us.3.1 = fsub float %652, %651 - store float %sub.i.i.us.3.1, float* %arrayidx7.i.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.3.1 - -if.end.r_exit.i.i.us.3.1: ; preds = %if.then.i.i.us.3.1, %if.end.r_exit.i.i.us.3 - %653 = add nuw nsw i64 %_local_id_x.i.0.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %653, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.i.3.loopexit, label %pregion_for_entry.entry.i.i.us.3, !llvm.loop !113 - -if.then.i.i.us.2.1: ; preds = %if.end.r_exit.i.i.us.2 - %sext.i.i.us.2.1 = shl i64 %add1.i.i.i.us.2.1, 32 - %idxprom.i.i.us.2.1 = ashr exact i64 %sext.i.i.us.2.1, 32 - %arrayidx.i.i.us.2.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.2.1 - %654 = load float, float* %arrayidx.i.i.us.2.1, align 4, !tbaa !12 - %add.i.i.us.2.1 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2.1 - %idxprom6.i.i.us.2.1 = sext i32 %add.i.i.us.2.1 to i64 - %arrayidx7.i.i.us.2.1 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.2.1 - %655 = load float, float* %arrayidx7.i.i.us.2.1, align 4, !tbaa !12 - %sub.i.i.us.2.1 = fsub float %655, %654 - store float %sub.i.i.us.2.1, float* %arrayidx7.i.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.2.1 - -if.end.r_exit.i.i.us.2.1: ; preds = %if.then.i.i.us.2.1, %if.end.r_exit.i.i.us.2 - %656 = add nuw nsw i64 %_local_id_x.i.0.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %656, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.i.2.loopexit, label %pregion_for_entry.entry.i.i.us.2, !llvm.loop !114 - -if.then.i.i.us.1.1: ; preds = %if.end.r_exit.i.i.us.1 - %sext.i.i.us.1.1 = shl i64 %add1.i.i.i.us.1.1, 32 - %idxprom.i.i.us.1.1 = ashr exact i64 %sext.i.i.us.1.1, 32 - %arrayidx.i.i.us.1.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.1.1 - %657 = load float, float* %arrayidx.i.i.us.1.1, align 4, !tbaa !12 - %add.i.i.us.1.1 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1.1 - %idxprom6.i.i.us.1.1 = sext i32 %add.i.i.us.1.1 to i64 - %arrayidx7.i.i.us.1.1 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.1.1 - %658 = load float, float* %arrayidx7.i.i.us.1.1, align 4, !tbaa !12 - %sub.i.i.us.1.1 = fsub float %658, %657 - store float %sub.i.i.us.1.1, float* %arrayidx7.i.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.1.1 - -if.end.r_exit.i.i.us.1.1: ; preds = %if.then.i.i.us.1.1, %if.end.r_exit.i.i.us.1 - %659 = add nuw nsw i64 %_local_id_x.i.0.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %659, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.i.1.loopexit, label %pregion_for_entry.entry.i.i.us.1, !llvm.loop !115 - -if.then.i.i.us.1278: ; preds = %if.end.r_exit.i.i.us - %sext.i.i.us.1271 = shl i64 %add1.i.i.i.us.1267, 32 - %idxprom.i.i.us.1272 = ashr exact i64 %sext.i.i.us.1271, 32 - %arrayidx.i.i.us.1273 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.1272 - %660 = load float, float* %arrayidx.i.i.us.1273, align 4, !tbaa !12 - %add.i.i.us.1274 = add nsw i32 %mul.i.i, %conv.i.i.us.1268 - %idxprom6.i.i.us.1275 = sext i32 %add.i.i.us.1274 to i64 - %arrayidx7.i.i.us.1276 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.1275 - %661 = load float, float* %arrayidx7.i.i.us.1276, align 4, !tbaa !12 - %sub.i.i.us.1277 = fsub float %661, %660 - store float %sub.i.i.us.1277, float* %arrayidx7.i.i.us.1276, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.1279 - -if.end.r_exit.i.i.us.1279: ; preds = %if.then.i.i.us.1278, %if.end.r_exit.i.i.us - %662 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %662, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !116 -} - -; Function Attrs: nofree norecurse nounwind -define void @_pocl_kernel_reduce_kernel_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { -pregion_for_entry.pregion_for_init.i.i: - %5 = bitcast i8** %0 to float** - %6 = load float*, float** %5, align 8 - %7 = getelementptr i8*, i8** %0, i64 1 - %8 = bitcast i8** %7 to float** - %9 = load float*, float** %8, align 8 - %10 = getelementptr i8*, i8** %0, i64 2 - %11 = bitcast i8** %10 to i32** - %12 = load i32*, i32** %11, align 8 - %13 = load i32, i32* %12, align 4 - %14 = getelementptr i8*, i8** %0, i64 3 - %15 = bitcast i8** %14 to i32** - %16 = load i32*, i32** %15, align 8 - %17 = load i32, i32* %16, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %conv2.i.i = trunc i64 %mul3.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %17, %conv2.i.i - %mul.i.i = mul nsw i32 %13, %conv2.i.i - br i1 %cmp.i.i, label %vector.scevcheck, label %pregion_for_end.i.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.i - %18 = trunc i64 %3 to i32 - %19 = mul i32 %13, %18 - %20 = shl i32 %19, 3 - %21 = trunc i64 %2 to i32 - %22 = shl i32 %21, 5 - %23 = add i32 %20, %22 - %24 = icmp sgt i32 %23, 2147483616 - br i1 %24, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i.us - -vector.memcheck: ; preds = %vector.scevcheck - %25 = trunc i64 %2 to i32 - %26 = shl i32 %25, 5 - %27 = sext i32 %26 to i64 - %scevgep = getelementptr float, float* %6, i64 %27 - %28 = add nsw i64 %27, 32 - %scevgep4 = getelementptr float, float* %6, i64 %28 - %29 = trunc i64 %3 to i32 - %30 = mul i32 %13, %29 - %31 = shl i32 %30, 3 - %32 = add i32 %31, %26 - %33 = sext i32 %32 to i64 - %scevgep6 = getelementptr float, float* %9, i64 %33 - %34 = add nsw i64 %33, 32 - %scevgep8 = getelementptr float, float* %9, i64 %34 - %bound0 = icmp ult float* %scevgep, %scevgep8 - %bound1 = icmp ult float* %scevgep6, %scevgep4 - %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert10 = insertelement <8 x i32> undef, i32 %13, i32 0 - %broadcast.splat11 = shufflevector <8 x i32> %broadcast.splatinsert10, <8 x i32> undef, <8 x i32> zeroinitializer - %35 = or <8 x i64> %broadcast.splat, - %36 = trunc <8 x i64> %35 to <8 x i32> - %37 = icmp sgt <8 x i32> %broadcast.splat11, %36 - %38 = extractelement <8 x i64> %35, i32 0 - %39 = shl i64 %38, 32 - %40 = ashr exact i64 %39, 32 - %41 = getelementptr inbounds float, float* %6, i64 %40 - %42 = bitcast float* %41 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %42, i32 4, <8 x i1> %37, <8 x float> undef), !tbaa !12, !alias.scope !117, !noalias !120 - %43 = extractelement <8 x i32> %36, i32 0 - %44 = add nsw i32 %mul.i.i, %43 - %45 = sext i32 %44 to i64 - %46 = getelementptr inbounds float, float* %9, i64 %45 - %47 = bitcast float* %46 to <8 x float>* - %wide.masked.load12 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %47, i32 4, <8 x i1> %37, <8 x float> undef), !tbaa !12, !alias.scope !120 - %48 = fsub <8 x float> %wide.masked.load12, %wide.masked.load - %49 = bitcast float* %46 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %48, <8 x float>* %49, i32 4, <8 x i1> %37), !tbaa !12, !alias.scope !120, !llvm.access.group !21 - %50 = or <8 x i64> %broadcast.splat, - %51 = trunc <8 x i64> %50 to <8 x i32> - %52 = icmp sgt <8 x i32> %broadcast.splat11, %51 - %53 = extractelement <8 x i64> %50, i32 0 - %54 = shl i64 %53, 32 - %55 = ashr exact i64 %54, 32 - %56 = getelementptr inbounds float, float* %6, i64 %55 - %57 = bitcast float* %56 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %57, i32 4, <8 x i1> %52, <8 x float> undef), !tbaa !12, !alias.scope !117, !noalias !120 - %58 = extractelement <8 x i32> %51, i32 0 - %59 = add nsw i32 %mul.i.i, %58 - %60 = sext i32 %59 to i64 - %61 = getelementptr inbounds float, float* %9, i64 %60 - %62 = bitcast float* %61 to <8 x float>* - %wide.masked.load12.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %62, i32 4, <8 x i1> %52, <8 x float> undef), !tbaa !12, !alias.scope !120 - %63 = fsub <8 x float> %wide.masked.load12.1, %wide.masked.load.1 - %64 = bitcast float* %61 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %63, <8 x float>* %64, i32 4, <8 x i1> %52), !tbaa !12, !alias.scope !120, !llvm.access.group !21 - %65 = or <8 x i64> %broadcast.splat, - %66 = trunc <8 x i64> %65 to <8 x i32> - %67 = icmp sgt <8 x i32> %broadcast.splat11, %66 - %68 = extractelement <8 x i64> %65, i32 0 - %69 = shl i64 %68, 32 - %70 = ashr exact i64 %69, 32 - %71 = getelementptr inbounds float, float* %6, i64 %70 - %72 = bitcast float* %71 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %72, i32 4, <8 x i1> %67, <8 x float> undef), !tbaa !12, !alias.scope !117, !noalias !120 - %73 = extractelement <8 x i32> %66, i32 0 - %74 = add nsw i32 %mul.i.i, %73 - %75 = sext i32 %74 to i64 - %76 = getelementptr inbounds float, float* %9, i64 %75 - %77 = bitcast float* %76 to <8 x float>* - %wide.masked.load12.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %77, i32 4, <8 x i1> %67, <8 x float> undef), !tbaa !12, !alias.scope !120 - %78 = fsub <8 x float> %wide.masked.load12.2, %wide.masked.load.2 - %79 = bitcast float* %76 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %78, <8 x float>* %79, i32 4, <8 x i1> %67), !tbaa !12, !alias.scope !120, !llvm.access.group !21 - %80 = or <8 x i64> %broadcast.splat, - %81 = trunc <8 x i64> %80 to <8 x i32> - %82 = icmp sgt <8 x i32> %broadcast.splat11, %81 - %83 = extractelement <8 x i64> %80, i32 0 - %84 = shl i64 %83, 32 - %85 = ashr exact i64 %84, 32 - %86 = getelementptr inbounds float, float* %6, i64 %85 - %87 = bitcast float* %86 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %87, i32 4, <8 x i1> %82, <8 x float> undef), !tbaa !12, !alias.scope !117, !noalias !120 - %88 = extractelement <8 x i32> %81, i32 0 - %89 = add nsw i32 %mul.i.i, %88 - %90 = sext i32 %89 to i64 - %91 = getelementptr inbounds float, float* %9, i64 %90 - %92 = bitcast float* %91 to <8 x float>* - %wide.masked.load12.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %92, i32 4, <8 x i1> %82, <8 x float> undef), !tbaa !12, !alias.scope !120 - %93 = fsub <8 x float> %wide.masked.load12.3, %wide.masked.load.3 - %94 = bitcast float* %91 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %93, <8 x float>* %94, i32 4, <8 x i1> %82), !tbaa !12, !alias.scope !120, !llvm.access.group !21 - br label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us.1279, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.preheader ], [ %660, %if.end.r_exit.i.i.us.1279 ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp4.i.i.us = icmp sgt i32 %13, %conv.i.i.us - br i1 %cmp4.i.i.us, label %if.then.i.i.us, label %if.end.r_exit.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom.i.i.us = ashr exact i64 %sext.i.i.us, 32 - %arrayidx.i.i.us = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us - %95 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %add.i.i.us = add nsw i32 %mul.i.i, %conv.i.i.us - %idxprom6.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx7.i.i.us = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us - %96 = load float, float* %arrayidx7.i.i.us, align 4, !tbaa !12 - %sub.i.i.us = fsub float %96, %95 - store float %sub.i.i.us, float* %arrayidx7.i.i.us, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us - %97 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1267 = add nuw nsw i64 %97, %mul.i.i.i - %conv.i.i.us.1268 = trunc i64 %add1.i.i.i.us.1267 to i32 - %cmp4.i.i.us.1269 = icmp sgt i32 %13, %conv.i.i.us.1268 - br i1 %cmp4.i.i.us.1269, label %if.then.i.i.us.1278, label %if.end.r_exit.i.i.us.1279 - -pregion_for_end.i.i.loopexit: ; preds = %if.end.r_exit.i.i.us.1279 - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i.i - %98 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.1 = or i32 %98, 1 - %cmp.i.i.1 = icmp sgt i32 %17, %conv2.i.i.1 - %mul.i.i.1 = mul nsw i32 %13, %conv2.i.i.1 - br i1 %cmp.i.i.1, label %vector.scevcheck20, label %pregion_for_end.i.i.1 - -vector.scevcheck20: ; preds = %pregion_for_end.i.i - %99 = mul i32 %13, %conv2.i.i.1 - %100 = trunc i64 %2 to i32 - %101 = shl i32 %100, 5 - %102 = add i32 %99, %101 - %103 = icmp sgt i32 %102, 2147483616 - br i1 %103, label %pregion_for_entry.entry.i.i.us.1.preheader, label %vector.memcheck34 - -pregion_for_entry.entry.i.i.us.1.preheader: ; preds = %vector.memcheck34, %vector.scevcheck20 - br label %pregion_for_entry.entry.i.i.us.1 - -vector.memcheck34: ; preds = %vector.scevcheck20 - %104 = trunc i64 %2 to i32 - %105 = shl i32 %104, 5 - %106 = sext i32 %105 to i64 - %scevgep22 = getelementptr float, float* %6, i64 %106 - %107 = add nsw i64 %106, 32 - %scevgep24 = getelementptr float, float* %6, i64 %107 - %108 = mul i32 %13, %conv2.i.i.1 - %109 = add i32 %108, %105 - %110 = sext i32 %109 to i64 - %scevgep26 = getelementptr float, float* %9, i64 %110 - %111 = add nsw i64 %110, 32 - %scevgep28 = getelementptr float, float* %9, i64 %111 - %bound030 = icmp ult float* %scevgep22, %scevgep28 - %bound131 = icmp ult float* %scevgep26, %scevgep24 - %found.conflict32 = and i1 %bound030, %bound131 - br i1 %found.conflict32, label %pregion_for_entry.entry.i.i.us.1.preheader, label %vector.ph35 - -vector.ph35: ; preds = %vector.memcheck34 - %broadcast.splatinsert42 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat43 = shufflevector <8 x i64> %broadcast.splatinsert42, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert44 = insertelement <8 x i32> undef, i32 %13, i32 0 - %broadcast.splat45 = shufflevector <8 x i32> %broadcast.splatinsert44, <8 x i32> undef, <8 x i32> zeroinitializer - %112 = or <8 x i64> %broadcast.splat43, - %113 = trunc <8 x i64> %112 to <8 x i32> - %114 = icmp sgt <8 x i32> %broadcast.splat45, %113 - %115 = extractelement <8 x i64> %112, i32 0 - %116 = shl i64 %115, 32 - %117 = ashr exact i64 %116, 32 - %118 = getelementptr inbounds float, float* %6, i64 %117 - %119 = bitcast float* %118 to <8 x float>* - %wide.masked.load46 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %119, i32 4, <8 x i1> %114, <8 x float> undef), !tbaa !12, !alias.scope !122, !noalias !125 - %120 = extractelement <8 x i32> %113, i32 0 - %121 = add nsw i32 %mul.i.i.1, %120 - %122 = sext i32 %121 to i64 - %123 = getelementptr inbounds float, float* %9, i64 %122 - %124 = bitcast float* %123 to <8 x float>* - %wide.masked.load47 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %124, i32 4, <8 x i1> %114, <8 x float> undef), !tbaa !12, !alias.scope !125 - %125 = fsub <8 x float> %wide.masked.load47, %wide.masked.load46 - %126 = bitcast float* %123 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %125, <8 x float>* %126, i32 4, <8 x i1> %114), !tbaa !12, !alias.scope !125, !llvm.access.group !21 - %127 = or <8 x i64> %broadcast.splat43, - %128 = trunc <8 x i64> %127 to <8 x i32> - %129 = icmp sgt <8 x i32> %broadcast.splat45, %128 - %130 = extractelement <8 x i64> %127, i32 0 - %131 = shl i64 %130, 32 - %132 = ashr exact i64 %131, 32 - %133 = getelementptr inbounds float, float* %6, i64 %132 - %134 = bitcast float* %133 to <8 x float>* - %wide.masked.load46.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %134, i32 4, <8 x i1> %129, <8 x float> undef), !tbaa !12, !alias.scope !122, !noalias !125 - %135 = extractelement <8 x i32> %128, i32 0 - %136 = add nsw i32 %mul.i.i.1, %135 - %137 = sext i32 %136 to i64 - %138 = getelementptr inbounds float, float* %9, i64 %137 - %139 = bitcast float* %138 to <8 x float>* - %wide.masked.load47.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %139, i32 4, <8 x i1> %129, <8 x float> undef), !tbaa !12, !alias.scope !125 - %140 = fsub <8 x float> %wide.masked.load47.1, %wide.masked.load46.1 - %141 = bitcast float* %138 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %140, <8 x float>* %141, i32 4, <8 x i1> %129), !tbaa !12, !alias.scope !125, !llvm.access.group !21 - %142 = or <8 x i64> %broadcast.splat43, - %143 = trunc <8 x i64> %142 to <8 x i32> - %144 = icmp sgt <8 x i32> %broadcast.splat45, %143 - %145 = extractelement <8 x i64> %142, i32 0 - %146 = shl i64 %145, 32 - %147 = ashr exact i64 %146, 32 - %148 = getelementptr inbounds float, float* %6, i64 %147 - %149 = bitcast float* %148 to <8 x float>* - %wide.masked.load46.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %149, i32 4, <8 x i1> %144, <8 x float> undef), !tbaa !12, !alias.scope !122, !noalias !125 - %150 = extractelement <8 x i32> %143, i32 0 - %151 = add nsw i32 %mul.i.i.1, %150 - %152 = sext i32 %151 to i64 - %153 = getelementptr inbounds float, float* %9, i64 %152 - %154 = bitcast float* %153 to <8 x float>* - %wide.masked.load47.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %154, i32 4, <8 x i1> %144, <8 x float> undef), !tbaa !12, !alias.scope !125 - %155 = fsub <8 x float> %wide.masked.load47.2, %wide.masked.load46.2 - %156 = bitcast float* %153 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %155, <8 x float>* %156, i32 4, <8 x i1> %144), !tbaa !12, !alias.scope !125, !llvm.access.group !21 - %157 = or <8 x i64> %broadcast.splat43, - %158 = trunc <8 x i64> %157 to <8 x i32> - %159 = icmp sgt <8 x i32> %broadcast.splat45, %158 - %160 = extractelement <8 x i64> %157, i32 0 - %161 = shl i64 %160, 32 - %162 = ashr exact i64 %161, 32 - %163 = getelementptr inbounds float, float* %6, i64 %162 - %164 = bitcast float* %163 to <8 x float>* - %wide.masked.load46.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %164, i32 4, <8 x i1> %159, <8 x float> undef), !tbaa !12, !alias.scope !122, !noalias !125 - %165 = extractelement <8 x i32> %158, i32 0 - %166 = add nsw i32 %mul.i.i.1, %165 - %167 = sext i32 %166 to i64 - %168 = getelementptr inbounds float, float* %9, i64 %167 - %169 = bitcast float* %168 to <8 x float>* - %wide.masked.load47.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %169, i32 4, <8 x i1> %159, <8 x float> undef), !tbaa !12, !alias.scope !125 - %170 = fsub <8 x float> %wide.masked.load47.3, %wide.masked.load46.3 - %171 = bitcast float* %168 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %170, <8 x float>* %171, i32 4, <8 x i1> %159), !tbaa !12, !alias.scope !125, !llvm.access.group !21 - br label %pregion_for_end.i.i.1 - -pregion_for_entry.entry.i.i.us.1: ; preds = %if.end.r_exit.i.i.us.1.1, %pregion_for_entry.entry.i.i.us.1.preheader - %_local_id_x.i.0.us.1 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.1.preheader ], [ %657, %if.end.r_exit.i.i.us.1.1 ] - %add1.i.i.i.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.1, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp4.i.i.us.1 = icmp sgt i32 %13, %conv.i.i.us.1 - br i1 %cmp4.i.i.us.1, label %if.then.i.i.us.1, label %if.end.r_exit.i.i.us.1 - -if.then.i.i.us.1: ; preds = %pregion_for_entry.entry.i.i.us.1 - %sext.i.i.us.1 = shl i64 %add1.i.i.i.us.1, 32 - %idxprom.i.i.us.1 = ashr exact i64 %sext.i.i.us.1, 32 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.1 - %172 = load float, float* %arrayidx.i.i.us.1, align 4, !tbaa !12 - %add.i.i.us.1 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1 - %idxprom6.i.i.us.1 = sext i32 %add.i.i.us.1 to i64 - %arrayidx7.i.i.us.1 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.1 - %173 = load float, float* %arrayidx7.i.i.us.1, align 4, !tbaa !12 - %sub.i.i.us.1 = fsub float %173, %172 - store float %sub.i.i.us.1, float* %arrayidx7.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.1 - -if.end.r_exit.i.i.us.1: ; preds = %if.then.i.i.us.1, %pregion_for_entry.entry.i.i.us.1 - %174 = or i64 %_local_id_x.i.0.us.1, 1 - %add1.i.i.i.us.1.1 = add nuw nsw i64 %174, %mul.i.i.i - %conv.i.i.us.1.1 = trunc i64 %add1.i.i.i.us.1.1 to i32 - %cmp4.i.i.us.1.1 = icmp sgt i32 %13, %conv.i.i.us.1.1 - br i1 %cmp4.i.i.us.1.1, label %if.then.i.i.us.1.1, label %if.end.r_exit.i.i.us.1.1 - -pregion_for_end.i.i.1.loopexit: ; preds = %if.end.r_exit.i.i.us.1.1 - br label %pregion_for_end.i.i.1 - -pregion_for_end.i.i.1: ; preds = %pregion_for_end.i.i.1.loopexit, %vector.ph35, %pregion_for_end.i.i - %175 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.2 = or i32 %175, 2 - %cmp.i.i.2 = icmp sgt i32 %17, %conv2.i.i.2 - %mul.i.i.2 = mul nsw i32 %13, %conv2.i.i.2 - br i1 %cmp.i.i.2, label %vector.scevcheck55, label %pregion_for_end.i.i.2 - -vector.scevcheck55: ; preds = %pregion_for_end.i.i.1 - %176 = mul i32 %13, %conv2.i.i.2 - %177 = trunc i64 %2 to i32 - %178 = shl i32 %177, 5 - %179 = add i32 %176, %178 - %180 = icmp sgt i32 %179, 2147483616 - br i1 %180, label %pregion_for_entry.entry.i.i.us.2.preheader, label %vector.memcheck69 - -pregion_for_entry.entry.i.i.us.2.preheader: ; preds = %vector.memcheck69, %vector.scevcheck55 - br label %pregion_for_entry.entry.i.i.us.2 - -vector.memcheck69: ; preds = %vector.scevcheck55 - %181 = trunc i64 %2 to i32 - %182 = shl i32 %181, 5 - %183 = sext i32 %182 to i64 - %scevgep57 = getelementptr float, float* %6, i64 %183 - %184 = add nsw i64 %183, 32 - %scevgep59 = getelementptr float, float* %6, i64 %184 - %185 = mul i32 %13, %conv2.i.i.2 - %186 = add i32 %185, %182 - %187 = sext i32 %186 to i64 - %scevgep61 = getelementptr float, float* %9, i64 %187 - %188 = add nsw i64 %187, 32 - %scevgep63 = getelementptr float, float* %9, i64 %188 - %bound065 = icmp ult float* %scevgep57, %scevgep63 - %bound166 = icmp ult float* %scevgep61, %scevgep59 - %found.conflict67 = and i1 %bound065, %bound166 - br i1 %found.conflict67, label %pregion_for_entry.entry.i.i.us.2.preheader, label %vector.ph70 - -vector.ph70: ; preds = %vector.memcheck69 - %broadcast.splatinsert77 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat78 = shufflevector <8 x i64> %broadcast.splatinsert77, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert79 = insertelement <8 x i32> undef, i32 %13, i32 0 - %broadcast.splat80 = shufflevector <8 x i32> %broadcast.splatinsert79, <8 x i32> undef, <8 x i32> zeroinitializer - %189 = or <8 x i64> %broadcast.splat78, - %190 = trunc <8 x i64> %189 to <8 x i32> - %191 = icmp sgt <8 x i32> %broadcast.splat80, %190 - %192 = extractelement <8 x i64> %189, i32 0 - %193 = shl i64 %192, 32 - %194 = ashr exact i64 %193, 32 - %195 = getelementptr inbounds float, float* %6, i64 %194 - %196 = bitcast float* %195 to <8 x float>* - %wide.masked.load81 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %196, i32 4, <8 x i1> %191, <8 x float> undef), !tbaa !12, !alias.scope !127, !noalias !130 - %197 = extractelement <8 x i32> %190, i32 0 - %198 = add nsw i32 %mul.i.i.2, %197 - %199 = sext i32 %198 to i64 - %200 = getelementptr inbounds float, float* %9, i64 %199 - %201 = bitcast float* %200 to <8 x float>* - %wide.masked.load82 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %201, i32 4, <8 x i1> %191, <8 x float> undef), !tbaa !12, !alias.scope !130 - %202 = fsub <8 x float> %wide.masked.load82, %wide.masked.load81 - %203 = bitcast float* %200 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %202, <8 x float>* %203, i32 4, <8 x i1> %191), !tbaa !12, !alias.scope !130, !llvm.access.group !21 - %204 = or <8 x i64> %broadcast.splat78, - %205 = trunc <8 x i64> %204 to <8 x i32> - %206 = icmp sgt <8 x i32> %broadcast.splat80, %205 - %207 = extractelement <8 x i64> %204, i32 0 - %208 = shl i64 %207, 32 - %209 = ashr exact i64 %208, 32 - %210 = getelementptr inbounds float, float* %6, i64 %209 - %211 = bitcast float* %210 to <8 x float>* - %wide.masked.load81.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %211, i32 4, <8 x i1> %206, <8 x float> undef), !tbaa !12, !alias.scope !127, !noalias !130 - %212 = extractelement <8 x i32> %205, i32 0 - %213 = add nsw i32 %mul.i.i.2, %212 - %214 = sext i32 %213 to i64 - %215 = getelementptr inbounds float, float* %9, i64 %214 - %216 = bitcast float* %215 to <8 x float>* - %wide.masked.load82.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %216, i32 4, <8 x i1> %206, <8 x float> undef), !tbaa !12, !alias.scope !130 - %217 = fsub <8 x float> %wide.masked.load82.1, %wide.masked.load81.1 - %218 = bitcast float* %215 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %217, <8 x float>* %218, i32 4, <8 x i1> %206), !tbaa !12, !alias.scope !130, !llvm.access.group !21 - %219 = or <8 x i64> %broadcast.splat78, - %220 = trunc <8 x i64> %219 to <8 x i32> - %221 = icmp sgt <8 x i32> %broadcast.splat80, %220 - %222 = extractelement <8 x i64> %219, i32 0 - %223 = shl i64 %222, 32 - %224 = ashr exact i64 %223, 32 - %225 = getelementptr inbounds float, float* %6, i64 %224 - %226 = bitcast float* %225 to <8 x float>* - %wide.masked.load81.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %226, i32 4, <8 x i1> %221, <8 x float> undef), !tbaa !12, !alias.scope !127, !noalias !130 - %227 = extractelement <8 x i32> %220, i32 0 - %228 = add nsw i32 %mul.i.i.2, %227 - %229 = sext i32 %228 to i64 - %230 = getelementptr inbounds float, float* %9, i64 %229 - %231 = bitcast float* %230 to <8 x float>* - %wide.masked.load82.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %231, i32 4, <8 x i1> %221, <8 x float> undef), !tbaa !12, !alias.scope !130 - %232 = fsub <8 x float> %wide.masked.load82.2, %wide.masked.load81.2 - %233 = bitcast float* %230 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %232, <8 x float>* %233, i32 4, <8 x i1> %221), !tbaa !12, !alias.scope !130, !llvm.access.group !21 - %234 = or <8 x i64> %broadcast.splat78, - %235 = trunc <8 x i64> %234 to <8 x i32> - %236 = icmp sgt <8 x i32> %broadcast.splat80, %235 - %237 = extractelement <8 x i64> %234, i32 0 - %238 = shl i64 %237, 32 - %239 = ashr exact i64 %238, 32 - %240 = getelementptr inbounds float, float* %6, i64 %239 - %241 = bitcast float* %240 to <8 x float>* - %wide.masked.load81.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %241, i32 4, <8 x i1> %236, <8 x float> undef), !tbaa !12, !alias.scope !127, !noalias !130 - %242 = extractelement <8 x i32> %235, i32 0 - %243 = add nsw i32 %mul.i.i.2, %242 - %244 = sext i32 %243 to i64 - %245 = getelementptr inbounds float, float* %9, i64 %244 - %246 = bitcast float* %245 to <8 x float>* - %wide.masked.load82.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %246, i32 4, <8 x i1> %236, <8 x float> undef), !tbaa !12, !alias.scope !130 - %247 = fsub <8 x float> %wide.masked.load82.3, %wide.masked.load81.3 - %248 = bitcast float* %245 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %247, <8 x float>* %248, i32 4, <8 x i1> %236), !tbaa !12, !alias.scope !130, !llvm.access.group !21 - br label %pregion_for_end.i.i.2 - -pregion_for_entry.entry.i.i.us.2: ; preds = %if.end.r_exit.i.i.us.2.1, %pregion_for_entry.entry.i.i.us.2.preheader - %_local_id_x.i.0.us.2 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.2.preheader ], [ %654, %if.end.r_exit.i.i.us.2.1 ] - %add1.i.i.i.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.2, %mul.i.i.i - %conv.i.i.us.2 = trunc i64 %add1.i.i.i.us.2 to i32 - %cmp4.i.i.us.2 = icmp sgt i32 %13, %conv.i.i.us.2 - br i1 %cmp4.i.i.us.2, label %if.then.i.i.us.2, label %if.end.r_exit.i.i.us.2 - -if.then.i.i.us.2: ; preds = %pregion_for_entry.entry.i.i.us.2 - %sext.i.i.us.2 = shl i64 %add1.i.i.i.us.2, 32 - %idxprom.i.i.us.2 = ashr exact i64 %sext.i.i.us.2, 32 - %arrayidx.i.i.us.2 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.2 - %249 = load float, float* %arrayidx.i.i.us.2, align 4, !tbaa !12 - %add.i.i.us.2 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2 - %idxprom6.i.i.us.2 = sext i32 %add.i.i.us.2 to i64 - %arrayidx7.i.i.us.2 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.2 - %250 = load float, float* %arrayidx7.i.i.us.2, align 4, !tbaa !12 - %sub.i.i.us.2 = fsub float %250, %249 - store float %sub.i.i.us.2, float* %arrayidx7.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.2 - -if.end.r_exit.i.i.us.2: ; preds = %if.then.i.i.us.2, %pregion_for_entry.entry.i.i.us.2 - %251 = or i64 %_local_id_x.i.0.us.2, 1 - %add1.i.i.i.us.2.1 = add nuw nsw i64 %251, %mul.i.i.i - %conv.i.i.us.2.1 = trunc i64 %add1.i.i.i.us.2.1 to i32 - %cmp4.i.i.us.2.1 = icmp sgt i32 %13, %conv.i.i.us.2.1 - br i1 %cmp4.i.i.us.2.1, label %if.then.i.i.us.2.1, label %if.end.r_exit.i.i.us.2.1 - -pregion_for_end.i.i.2.loopexit: ; preds = %if.end.r_exit.i.i.us.2.1 - br label %pregion_for_end.i.i.2 - -pregion_for_end.i.i.2: ; preds = %pregion_for_end.i.i.2.loopexit, %vector.ph70, %pregion_for_end.i.i.1 - %252 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.3 = or i32 %252, 3 - %cmp.i.i.3 = icmp sgt i32 %17, %conv2.i.i.3 - %mul.i.i.3 = mul nsw i32 %13, %conv2.i.i.3 - br i1 %cmp.i.i.3, label %vector.scevcheck90, label %pregion_for_end.i.i.3 - -vector.scevcheck90: ; preds = %pregion_for_end.i.i.2 - %253 = mul i32 %13, %conv2.i.i.3 - %254 = trunc i64 %2 to i32 - %255 = shl i32 %254, 5 - %256 = add i32 %253, %255 - %257 = icmp sgt i32 %256, 2147483616 - br i1 %257, label %pregion_for_entry.entry.i.i.us.3.preheader, label %vector.memcheck104 - -pregion_for_entry.entry.i.i.us.3.preheader: ; preds = %vector.memcheck104, %vector.scevcheck90 - br label %pregion_for_entry.entry.i.i.us.3 - -vector.memcheck104: ; preds = %vector.scevcheck90 - %258 = trunc i64 %2 to i32 - %259 = shl i32 %258, 5 - %260 = sext i32 %259 to i64 - %scevgep92 = getelementptr float, float* %6, i64 %260 - %261 = add nsw i64 %260, 32 - %scevgep94 = getelementptr float, float* %6, i64 %261 - %262 = mul i32 %13, %conv2.i.i.3 - %263 = add i32 %262, %259 - %264 = sext i32 %263 to i64 - %scevgep96 = getelementptr float, float* %9, i64 %264 - %265 = add nsw i64 %264, 32 - %scevgep98 = getelementptr float, float* %9, i64 %265 - %bound0100 = icmp ult float* %scevgep92, %scevgep98 - %bound1101 = icmp ult float* %scevgep96, %scevgep94 - %found.conflict102 = and i1 %bound0100, %bound1101 - br i1 %found.conflict102, label %pregion_for_entry.entry.i.i.us.3.preheader, label %vector.ph105 - -vector.ph105: ; preds = %vector.memcheck104 - %broadcast.splatinsert112 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat113 = shufflevector <8 x i64> %broadcast.splatinsert112, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert114 = insertelement <8 x i32> undef, i32 %13, i32 0 - %broadcast.splat115 = shufflevector <8 x i32> %broadcast.splatinsert114, <8 x i32> undef, <8 x i32> zeroinitializer - %266 = or <8 x i64> %broadcast.splat113, - %267 = trunc <8 x i64> %266 to <8 x i32> - %268 = icmp sgt <8 x i32> %broadcast.splat115, %267 - %269 = extractelement <8 x i64> %266, i32 0 - %270 = shl i64 %269, 32 - %271 = ashr exact i64 %270, 32 - %272 = getelementptr inbounds float, float* %6, i64 %271 - %273 = bitcast float* %272 to <8 x float>* - %wide.masked.load116 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %273, i32 4, <8 x i1> %268, <8 x float> undef), !tbaa !12, !alias.scope !132, !noalias !135 - %274 = extractelement <8 x i32> %267, i32 0 - %275 = add nsw i32 %mul.i.i.3, %274 - %276 = sext i32 %275 to i64 - %277 = getelementptr inbounds float, float* %9, i64 %276 - %278 = bitcast float* %277 to <8 x float>* - %wide.masked.load117 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %278, i32 4, <8 x i1> %268, <8 x float> undef), !tbaa !12, !alias.scope !135 - %279 = fsub <8 x float> %wide.masked.load117, %wide.masked.load116 - %280 = bitcast float* %277 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %279, <8 x float>* %280, i32 4, <8 x i1> %268), !tbaa !12, !alias.scope !135, !llvm.access.group !21 - %281 = or <8 x i64> %broadcast.splat113, - %282 = trunc <8 x i64> %281 to <8 x i32> - %283 = icmp sgt <8 x i32> %broadcast.splat115, %282 - %284 = extractelement <8 x i64> %281, i32 0 - %285 = shl i64 %284, 32 - %286 = ashr exact i64 %285, 32 - %287 = getelementptr inbounds float, float* %6, i64 %286 - %288 = bitcast float* %287 to <8 x float>* - %wide.masked.load116.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %288, i32 4, <8 x i1> %283, <8 x float> undef), !tbaa !12, !alias.scope !132, !noalias !135 - %289 = extractelement <8 x i32> %282, i32 0 - %290 = add nsw i32 %mul.i.i.3, %289 - %291 = sext i32 %290 to i64 - %292 = getelementptr inbounds float, float* %9, i64 %291 - %293 = bitcast float* %292 to <8 x float>* - %wide.masked.load117.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %293, i32 4, <8 x i1> %283, <8 x float> undef), !tbaa !12, !alias.scope !135 - %294 = fsub <8 x float> %wide.masked.load117.1, %wide.masked.load116.1 - %295 = bitcast float* %292 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %294, <8 x float>* %295, i32 4, <8 x i1> %283), !tbaa !12, !alias.scope !135, !llvm.access.group !21 - %296 = or <8 x i64> %broadcast.splat113, - %297 = trunc <8 x i64> %296 to <8 x i32> - %298 = icmp sgt <8 x i32> %broadcast.splat115, %297 - %299 = extractelement <8 x i64> %296, i32 0 - %300 = shl i64 %299, 32 - %301 = ashr exact i64 %300, 32 - %302 = getelementptr inbounds float, float* %6, i64 %301 - %303 = bitcast float* %302 to <8 x float>* - %wide.masked.load116.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %303, i32 4, <8 x i1> %298, <8 x float> undef), !tbaa !12, !alias.scope !132, !noalias !135 - %304 = extractelement <8 x i32> %297, i32 0 - %305 = add nsw i32 %mul.i.i.3, %304 - %306 = sext i32 %305 to i64 - %307 = getelementptr inbounds float, float* %9, i64 %306 - %308 = bitcast float* %307 to <8 x float>* - %wide.masked.load117.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %308, i32 4, <8 x i1> %298, <8 x float> undef), !tbaa !12, !alias.scope !135 - %309 = fsub <8 x float> %wide.masked.load117.2, %wide.masked.load116.2 - %310 = bitcast float* %307 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %309, <8 x float>* %310, i32 4, <8 x i1> %298), !tbaa !12, !alias.scope !135, !llvm.access.group !21 - %311 = or <8 x i64> %broadcast.splat113, - %312 = trunc <8 x i64> %311 to <8 x i32> - %313 = icmp sgt <8 x i32> %broadcast.splat115, %312 - %314 = extractelement <8 x i64> %311, i32 0 - %315 = shl i64 %314, 32 - %316 = ashr exact i64 %315, 32 - %317 = getelementptr inbounds float, float* %6, i64 %316 - %318 = bitcast float* %317 to <8 x float>* - %wide.masked.load116.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %318, i32 4, <8 x i1> %313, <8 x float> undef), !tbaa !12, !alias.scope !132, !noalias !135 - %319 = extractelement <8 x i32> %312, i32 0 - %320 = add nsw i32 %mul.i.i.3, %319 - %321 = sext i32 %320 to i64 - %322 = getelementptr inbounds float, float* %9, i64 %321 - %323 = bitcast float* %322 to <8 x float>* - %wide.masked.load117.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %323, i32 4, <8 x i1> %313, <8 x float> undef), !tbaa !12, !alias.scope !135 - %324 = fsub <8 x float> %wide.masked.load117.3, %wide.masked.load116.3 - %325 = bitcast float* %322 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %324, <8 x float>* %325, i32 4, <8 x i1> %313), !tbaa !12, !alias.scope !135, !llvm.access.group !21 - br label %pregion_for_end.i.i.3 - -pregion_for_entry.entry.i.i.us.3: ; preds = %if.end.r_exit.i.i.us.3.1, %pregion_for_entry.entry.i.i.us.3.preheader - %_local_id_x.i.0.us.3 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.3.preheader ], [ %651, %if.end.r_exit.i.i.us.3.1 ] - %add1.i.i.i.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.3, %mul.i.i.i - %conv.i.i.us.3 = trunc i64 %add1.i.i.i.us.3 to i32 - %cmp4.i.i.us.3 = icmp sgt i32 %13, %conv.i.i.us.3 - br i1 %cmp4.i.i.us.3, label %if.then.i.i.us.3, label %if.end.r_exit.i.i.us.3 - -if.then.i.i.us.3: ; preds = %pregion_for_entry.entry.i.i.us.3 - %sext.i.i.us.3 = shl i64 %add1.i.i.i.us.3, 32 - %idxprom.i.i.us.3 = ashr exact i64 %sext.i.i.us.3, 32 - %arrayidx.i.i.us.3 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.3 - %326 = load float, float* %arrayidx.i.i.us.3, align 4, !tbaa !12 - %add.i.i.us.3 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3 - %idxprom6.i.i.us.3 = sext i32 %add.i.i.us.3 to i64 - %arrayidx7.i.i.us.3 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.3 - %327 = load float, float* %arrayidx7.i.i.us.3, align 4, !tbaa !12 - %sub.i.i.us.3 = fsub float %327, %326 - store float %sub.i.i.us.3, float* %arrayidx7.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.3 - -if.end.r_exit.i.i.us.3: ; preds = %if.then.i.i.us.3, %pregion_for_entry.entry.i.i.us.3 - %328 = or i64 %_local_id_x.i.0.us.3, 1 - %add1.i.i.i.us.3.1 = add nuw nsw i64 %328, %mul.i.i.i - %conv.i.i.us.3.1 = trunc i64 %add1.i.i.i.us.3.1 to i32 - %cmp4.i.i.us.3.1 = icmp sgt i32 %13, %conv.i.i.us.3.1 - br i1 %cmp4.i.i.us.3.1, label %if.then.i.i.us.3.1, label %if.end.r_exit.i.i.us.3.1 - -pregion_for_end.i.i.3.loopexit: ; preds = %if.end.r_exit.i.i.us.3.1 - br label %pregion_for_end.i.i.3 - -pregion_for_end.i.i.3: ; preds = %pregion_for_end.i.i.3.loopexit, %vector.ph105, %pregion_for_end.i.i.2 - %329 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.4 = or i32 %329, 4 - %cmp.i.i.4 = icmp sgt i32 %17, %conv2.i.i.4 - %mul.i.i.4 = mul nsw i32 %13, %conv2.i.i.4 - br i1 %cmp.i.i.4, label %vector.scevcheck125, label %pregion_for_end.i.i.4 - -vector.scevcheck125: ; preds = %pregion_for_end.i.i.3 - %330 = mul i32 %13, %conv2.i.i.4 - %331 = trunc i64 %2 to i32 - %332 = shl i32 %331, 5 - %333 = add i32 %330, %332 - %334 = icmp sgt i32 %333, 2147483616 - br i1 %334, label %pregion_for_entry.entry.i.i.us.4.preheader, label %vector.memcheck139 - -pregion_for_entry.entry.i.i.us.4.preheader: ; preds = %vector.memcheck139, %vector.scevcheck125 - br label %pregion_for_entry.entry.i.i.us.4 - -vector.memcheck139: ; preds = %vector.scevcheck125 - %335 = trunc i64 %2 to i32 - %336 = shl i32 %335, 5 - %337 = sext i32 %336 to i64 - %scevgep127 = getelementptr float, float* %6, i64 %337 - %338 = add nsw i64 %337, 32 - %scevgep129 = getelementptr float, float* %6, i64 %338 - %339 = mul i32 %13, %conv2.i.i.4 - %340 = add i32 %339, %336 - %341 = sext i32 %340 to i64 - %scevgep131 = getelementptr float, float* %9, i64 %341 - %342 = add nsw i64 %341, 32 - %scevgep133 = getelementptr float, float* %9, i64 %342 - %bound0135 = icmp ult float* %scevgep127, %scevgep133 - %bound1136 = icmp ult float* %scevgep131, %scevgep129 - %found.conflict137 = and i1 %bound0135, %bound1136 - br i1 %found.conflict137, label %pregion_for_entry.entry.i.i.us.4.preheader, label %vector.ph140 - -vector.ph140: ; preds = %vector.memcheck139 - %broadcast.splatinsert147 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat148 = shufflevector <8 x i64> %broadcast.splatinsert147, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert149 = insertelement <8 x i32> undef, i32 %13, i32 0 - %broadcast.splat150 = shufflevector <8 x i32> %broadcast.splatinsert149, <8 x i32> undef, <8 x i32> zeroinitializer - %343 = or <8 x i64> %broadcast.splat148, - %344 = trunc <8 x i64> %343 to <8 x i32> - %345 = icmp sgt <8 x i32> %broadcast.splat150, %344 - %346 = extractelement <8 x i64> %343, i32 0 - %347 = shl i64 %346, 32 - %348 = ashr exact i64 %347, 32 - %349 = getelementptr inbounds float, float* %6, i64 %348 - %350 = bitcast float* %349 to <8 x float>* - %wide.masked.load151 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %350, i32 4, <8 x i1> %345, <8 x float> undef), !tbaa !12, !alias.scope !137, !noalias !140 - %351 = extractelement <8 x i32> %344, i32 0 - %352 = add nsw i32 %mul.i.i.4, %351 - %353 = sext i32 %352 to i64 - %354 = getelementptr inbounds float, float* %9, i64 %353 - %355 = bitcast float* %354 to <8 x float>* - %wide.masked.load152 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %355, i32 4, <8 x i1> %345, <8 x float> undef), !tbaa !12, !alias.scope !140 - %356 = fsub <8 x float> %wide.masked.load152, %wide.masked.load151 - %357 = bitcast float* %354 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %356, <8 x float>* %357, i32 4, <8 x i1> %345), !tbaa !12, !alias.scope !140, !llvm.access.group !21 - %358 = or <8 x i64> %broadcast.splat148, - %359 = trunc <8 x i64> %358 to <8 x i32> - %360 = icmp sgt <8 x i32> %broadcast.splat150, %359 - %361 = extractelement <8 x i64> %358, i32 0 - %362 = shl i64 %361, 32 - %363 = ashr exact i64 %362, 32 - %364 = getelementptr inbounds float, float* %6, i64 %363 - %365 = bitcast float* %364 to <8 x float>* - %wide.masked.load151.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %365, i32 4, <8 x i1> %360, <8 x float> undef), !tbaa !12, !alias.scope !137, !noalias !140 - %366 = extractelement <8 x i32> %359, i32 0 - %367 = add nsw i32 %mul.i.i.4, %366 - %368 = sext i32 %367 to i64 - %369 = getelementptr inbounds float, float* %9, i64 %368 - %370 = bitcast float* %369 to <8 x float>* - %wide.masked.load152.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %370, i32 4, <8 x i1> %360, <8 x float> undef), !tbaa !12, !alias.scope !140 - %371 = fsub <8 x float> %wide.masked.load152.1, %wide.masked.load151.1 - %372 = bitcast float* %369 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %371, <8 x float>* %372, i32 4, <8 x i1> %360), !tbaa !12, !alias.scope !140, !llvm.access.group !21 - %373 = or <8 x i64> %broadcast.splat148, - %374 = trunc <8 x i64> %373 to <8 x i32> - %375 = icmp sgt <8 x i32> %broadcast.splat150, %374 - %376 = extractelement <8 x i64> %373, i32 0 - %377 = shl i64 %376, 32 - %378 = ashr exact i64 %377, 32 - %379 = getelementptr inbounds float, float* %6, i64 %378 - %380 = bitcast float* %379 to <8 x float>* - %wide.masked.load151.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %380, i32 4, <8 x i1> %375, <8 x float> undef), !tbaa !12, !alias.scope !137, !noalias !140 - %381 = extractelement <8 x i32> %374, i32 0 - %382 = add nsw i32 %mul.i.i.4, %381 - %383 = sext i32 %382 to i64 - %384 = getelementptr inbounds float, float* %9, i64 %383 - %385 = bitcast float* %384 to <8 x float>* - %wide.masked.load152.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %385, i32 4, <8 x i1> %375, <8 x float> undef), !tbaa !12, !alias.scope !140 - %386 = fsub <8 x float> %wide.masked.load152.2, %wide.masked.load151.2 - %387 = bitcast float* %384 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %386, <8 x float>* %387, i32 4, <8 x i1> %375), !tbaa !12, !alias.scope !140, !llvm.access.group !21 - %388 = or <8 x i64> %broadcast.splat148, - %389 = trunc <8 x i64> %388 to <8 x i32> - %390 = icmp sgt <8 x i32> %broadcast.splat150, %389 - %391 = extractelement <8 x i64> %388, i32 0 - %392 = shl i64 %391, 32 - %393 = ashr exact i64 %392, 32 - %394 = getelementptr inbounds float, float* %6, i64 %393 - %395 = bitcast float* %394 to <8 x float>* - %wide.masked.load151.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %395, i32 4, <8 x i1> %390, <8 x float> undef), !tbaa !12, !alias.scope !137, !noalias !140 - %396 = extractelement <8 x i32> %389, i32 0 - %397 = add nsw i32 %mul.i.i.4, %396 - %398 = sext i32 %397 to i64 - %399 = getelementptr inbounds float, float* %9, i64 %398 - %400 = bitcast float* %399 to <8 x float>* - %wide.masked.load152.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %400, i32 4, <8 x i1> %390, <8 x float> undef), !tbaa !12, !alias.scope !140 - %401 = fsub <8 x float> %wide.masked.load152.3, %wide.masked.load151.3 - %402 = bitcast float* %399 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %401, <8 x float>* %402, i32 4, <8 x i1> %390), !tbaa !12, !alias.scope !140, !llvm.access.group !21 - br label %pregion_for_end.i.i.4 - -pregion_for_entry.entry.i.i.us.4: ; preds = %if.end.r_exit.i.i.us.4.1, %pregion_for_entry.entry.i.i.us.4.preheader - %_local_id_x.i.0.us.4 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.4.preheader ], [ %648, %if.end.r_exit.i.i.us.4.1 ] - %add1.i.i.i.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.4, %mul.i.i.i - %conv.i.i.us.4 = trunc i64 %add1.i.i.i.us.4 to i32 - %cmp4.i.i.us.4 = icmp sgt i32 %13, %conv.i.i.us.4 - br i1 %cmp4.i.i.us.4, label %if.then.i.i.us.4, label %if.end.r_exit.i.i.us.4 - -if.then.i.i.us.4: ; preds = %pregion_for_entry.entry.i.i.us.4 - %sext.i.i.us.4 = shl i64 %add1.i.i.i.us.4, 32 - %idxprom.i.i.us.4 = ashr exact i64 %sext.i.i.us.4, 32 - %arrayidx.i.i.us.4 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.4 - %403 = load float, float* %arrayidx.i.i.us.4, align 4, !tbaa !12 - %add.i.i.us.4 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4 - %idxprom6.i.i.us.4 = sext i32 %add.i.i.us.4 to i64 - %arrayidx7.i.i.us.4 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.4 - %404 = load float, float* %arrayidx7.i.i.us.4, align 4, !tbaa !12 - %sub.i.i.us.4 = fsub float %404, %403 - store float %sub.i.i.us.4, float* %arrayidx7.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.4 - -if.end.r_exit.i.i.us.4: ; preds = %if.then.i.i.us.4, %pregion_for_entry.entry.i.i.us.4 - %405 = or i64 %_local_id_x.i.0.us.4, 1 - %add1.i.i.i.us.4.1 = add nuw nsw i64 %405, %mul.i.i.i - %conv.i.i.us.4.1 = trunc i64 %add1.i.i.i.us.4.1 to i32 - %cmp4.i.i.us.4.1 = icmp sgt i32 %13, %conv.i.i.us.4.1 - br i1 %cmp4.i.i.us.4.1, label %if.then.i.i.us.4.1, label %if.end.r_exit.i.i.us.4.1 - -pregion_for_end.i.i.4.loopexit: ; preds = %if.end.r_exit.i.i.us.4.1 - br label %pregion_for_end.i.i.4 - -pregion_for_end.i.i.4: ; preds = %pregion_for_end.i.i.4.loopexit, %vector.ph140, %pregion_for_end.i.i.3 - %406 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.5 = or i32 %406, 5 - %cmp.i.i.5 = icmp sgt i32 %17, %conv2.i.i.5 - %mul.i.i.5 = mul nsw i32 %13, %conv2.i.i.5 - br i1 %cmp.i.i.5, label %vector.scevcheck160, label %pregion_for_end.i.i.5 - -vector.scevcheck160: ; preds = %pregion_for_end.i.i.4 - %407 = mul i32 %13, %conv2.i.i.5 - %408 = trunc i64 %2 to i32 - %409 = shl i32 %408, 5 - %410 = add i32 %407, %409 - %411 = icmp sgt i32 %410, 2147483616 - br i1 %411, label %pregion_for_entry.entry.i.i.us.5.preheader, label %vector.memcheck174 - -pregion_for_entry.entry.i.i.us.5.preheader: ; preds = %vector.memcheck174, %vector.scevcheck160 - br label %pregion_for_entry.entry.i.i.us.5 - -vector.memcheck174: ; preds = %vector.scevcheck160 - %412 = trunc i64 %2 to i32 - %413 = shl i32 %412, 5 - %414 = sext i32 %413 to i64 - %scevgep162 = getelementptr float, float* %6, i64 %414 - %415 = add nsw i64 %414, 32 - %scevgep164 = getelementptr float, float* %6, i64 %415 - %416 = mul i32 %13, %conv2.i.i.5 - %417 = add i32 %416, %413 - %418 = sext i32 %417 to i64 - %scevgep166 = getelementptr float, float* %9, i64 %418 - %419 = add nsw i64 %418, 32 - %scevgep168 = getelementptr float, float* %9, i64 %419 - %bound0170 = icmp ult float* %scevgep162, %scevgep168 - %bound1171 = icmp ult float* %scevgep166, %scevgep164 - %found.conflict172 = and i1 %bound0170, %bound1171 - br i1 %found.conflict172, label %pregion_for_entry.entry.i.i.us.5.preheader, label %vector.ph175 - -vector.ph175: ; preds = %vector.memcheck174 - %broadcast.splatinsert182 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat183 = shufflevector <8 x i64> %broadcast.splatinsert182, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert184 = insertelement <8 x i32> undef, i32 %13, i32 0 - %broadcast.splat185 = shufflevector <8 x i32> %broadcast.splatinsert184, <8 x i32> undef, <8 x i32> zeroinitializer - %420 = or <8 x i64> %broadcast.splat183, - %421 = trunc <8 x i64> %420 to <8 x i32> - %422 = icmp sgt <8 x i32> %broadcast.splat185, %421 - %423 = extractelement <8 x i64> %420, i32 0 - %424 = shl i64 %423, 32 - %425 = ashr exact i64 %424, 32 - %426 = getelementptr inbounds float, float* %6, i64 %425 - %427 = bitcast float* %426 to <8 x float>* - %wide.masked.load186 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %427, i32 4, <8 x i1> %422, <8 x float> undef), !tbaa !12, !alias.scope !142, !noalias !145 - %428 = extractelement <8 x i32> %421, i32 0 - %429 = add nsw i32 %mul.i.i.5, %428 - %430 = sext i32 %429 to i64 - %431 = getelementptr inbounds float, float* %9, i64 %430 - %432 = bitcast float* %431 to <8 x float>* - %wide.masked.load187 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %432, i32 4, <8 x i1> %422, <8 x float> undef), !tbaa !12, !alias.scope !145 - %433 = fsub <8 x float> %wide.masked.load187, %wide.masked.load186 - %434 = bitcast float* %431 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %433, <8 x float>* %434, i32 4, <8 x i1> %422), !tbaa !12, !alias.scope !145, !llvm.access.group !21 - %435 = or <8 x i64> %broadcast.splat183, - %436 = trunc <8 x i64> %435 to <8 x i32> - %437 = icmp sgt <8 x i32> %broadcast.splat185, %436 - %438 = extractelement <8 x i64> %435, i32 0 - %439 = shl i64 %438, 32 - %440 = ashr exact i64 %439, 32 - %441 = getelementptr inbounds float, float* %6, i64 %440 - %442 = bitcast float* %441 to <8 x float>* - %wide.masked.load186.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %442, i32 4, <8 x i1> %437, <8 x float> undef), !tbaa !12, !alias.scope !142, !noalias !145 - %443 = extractelement <8 x i32> %436, i32 0 - %444 = add nsw i32 %mul.i.i.5, %443 - %445 = sext i32 %444 to i64 - %446 = getelementptr inbounds float, float* %9, i64 %445 - %447 = bitcast float* %446 to <8 x float>* - %wide.masked.load187.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %447, i32 4, <8 x i1> %437, <8 x float> undef), !tbaa !12, !alias.scope !145 - %448 = fsub <8 x float> %wide.masked.load187.1, %wide.masked.load186.1 - %449 = bitcast float* %446 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %448, <8 x float>* %449, i32 4, <8 x i1> %437), !tbaa !12, !alias.scope !145, !llvm.access.group !21 - %450 = or <8 x i64> %broadcast.splat183, - %451 = trunc <8 x i64> %450 to <8 x i32> - %452 = icmp sgt <8 x i32> %broadcast.splat185, %451 - %453 = extractelement <8 x i64> %450, i32 0 - %454 = shl i64 %453, 32 - %455 = ashr exact i64 %454, 32 - %456 = getelementptr inbounds float, float* %6, i64 %455 - %457 = bitcast float* %456 to <8 x float>* - %wide.masked.load186.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %457, i32 4, <8 x i1> %452, <8 x float> undef), !tbaa !12, !alias.scope !142, !noalias !145 - %458 = extractelement <8 x i32> %451, i32 0 - %459 = add nsw i32 %mul.i.i.5, %458 - %460 = sext i32 %459 to i64 - %461 = getelementptr inbounds float, float* %9, i64 %460 - %462 = bitcast float* %461 to <8 x float>* - %wide.masked.load187.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %462, i32 4, <8 x i1> %452, <8 x float> undef), !tbaa !12, !alias.scope !145 - %463 = fsub <8 x float> %wide.masked.load187.2, %wide.masked.load186.2 - %464 = bitcast float* %461 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %463, <8 x float>* %464, i32 4, <8 x i1> %452), !tbaa !12, !alias.scope !145, !llvm.access.group !21 - %465 = or <8 x i64> %broadcast.splat183, - %466 = trunc <8 x i64> %465 to <8 x i32> - %467 = icmp sgt <8 x i32> %broadcast.splat185, %466 - %468 = extractelement <8 x i64> %465, i32 0 - %469 = shl i64 %468, 32 - %470 = ashr exact i64 %469, 32 - %471 = getelementptr inbounds float, float* %6, i64 %470 - %472 = bitcast float* %471 to <8 x float>* - %wide.masked.load186.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %472, i32 4, <8 x i1> %467, <8 x float> undef), !tbaa !12, !alias.scope !142, !noalias !145 - %473 = extractelement <8 x i32> %466, i32 0 - %474 = add nsw i32 %mul.i.i.5, %473 - %475 = sext i32 %474 to i64 - %476 = getelementptr inbounds float, float* %9, i64 %475 - %477 = bitcast float* %476 to <8 x float>* - %wide.masked.load187.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %477, i32 4, <8 x i1> %467, <8 x float> undef), !tbaa !12, !alias.scope !145 - %478 = fsub <8 x float> %wide.masked.load187.3, %wide.masked.load186.3 - %479 = bitcast float* %476 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %478, <8 x float>* %479, i32 4, <8 x i1> %467), !tbaa !12, !alias.scope !145, !llvm.access.group !21 - br label %pregion_for_end.i.i.5 - -pregion_for_entry.entry.i.i.us.5: ; preds = %if.end.r_exit.i.i.us.5.1, %pregion_for_entry.entry.i.i.us.5.preheader - %_local_id_x.i.0.us.5 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.5.preheader ], [ %645, %if.end.r_exit.i.i.us.5.1 ] - %add1.i.i.i.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.5, %mul.i.i.i - %conv.i.i.us.5 = trunc i64 %add1.i.i.i.us.5 to i32 - %cmp4.i.i.us.5 = icmp sgt i32 %13, %conv.i.i.us.5 - br i1 %cmp4.i.i.us.5, label %if.then.i.i.us.5, label %if.end.r_exit.i.i.us.5 - -if.then.i.i.us.5: ; preds = %pregion_for_entry.entry.i.i.us.5 - %sext.i.i.us.5 = shl i64 %add1.i.i.i.us.5, 32 - %idxprom.i.i.us.5 = ashr exact i64 %sext.i.i.us.5, 32 - %arrayidx.i.i.us.5 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.5 - %480 = load float, float* %arrayidx.i.i.us.5, align 4, !tbaa !12 - %add.i.i.us.5 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5 - %idxprom6.i.i.us.5 = sext i32 %add.i.i.us.5 to i64 - %arrayidx7.i.i.us.5 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.5 - %481 = load float, float* %arrayidx7.i.i.us.5, align 4, !tbaa !12 - %sub.i.i.us.5 = fsub float %481, %480 - store float %sub.i.i.us.5, float* %arrayidx7.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.5 - -if.end.r_exit.i.i.us.5: ; preds = %if.then.i.i.us.5, %pregion_for_entry.entry.i.i.us.5 - %482 = or i64 %_local_id_x.i.0.us.5, 1 - %add1.i.i.i.us.5.1 = add nuw nsw i64 %482, %mul.i.i.i - %conv.i.i.us.5.1 = trunc i64 %add1.i.i.i.us.5.1 to i32 - %cmp4.i.i.us.5.1 = icmp sgt i32 %13, %conv.i.i.us.5.1 - br i1 %cmp4.i.i.us.5.1, label %if.then.i.i.us.5.1, label %if.end.r_exit.i.i.us.5.1 - -pregion_for_end.i.i.5.loopexit: ; preds = %if.end.r_exit.i.i.us.5.1 - br label %pregion_for_end.i.i.5 - -pregion_for_end.i.i.5: ; preds = %pregion_for_end.i.i.5.loopexit, %vector.ph175, %pregion_for_end.i.i.4 - %483 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.6 = or i32 %483, 6 - %cmp.i.i.6 = icmp sgt i32 %17, %conv2.i.i.6 - %mul.i.i.6 = mul nsw i32 %13, %conv2.i.i.6 - br i1 %cmp.i.i.6, label %vector.scevcheck195, label %pregion_for_end.i.i.6 - -vector.scevcheck195: ; preds = %pregion_for_end.i.i.5 - %484 = mul i32 %13, %conv2.i.i.6 - %485 = trunc i64 %2 to i32 - %486 = shl i32 %485, 5 - %487 = add i32 %484, %486 - %488 = icmp sgt i32 %487, 2147483616 - br i1 %488, label %pregion_for_entry.entry.i.i.us.6.preheader, label %vector.memcheck209 - -pregion_for_entry.entry.i.i.us.6.preheader: ; preds = %vector.memcheck209, %vector.scevcheck195 - br label %pregion_for_entry.entry.i.i.us.6 - -vector.memcheck209: ; preds = %vector.scevcheck195 - %489 = trunc i64 %2 to i32 - %490 = shl i32 %489, 5 - %491 = sext i32 %490 to i64 - %scevgep197 = getelementptr float, float* %6, i64 %491 - %492 = add nsw i64 %491, 32 - %scevgep199 = getelementptr float, float* %6, i64 %492 - %493 = mul i32 %13, %conv2.i.i.6 - %494 = add i32 %493, %490 - %495 = sext i32 %494 to i64 - %scevgep201 = getelementptr float, float* %9, i64 %495 - %496 = add nsw i64 %495, 32 - %scevgep203 = getelementptr float, float* %9, i64 %496 - %bound0205 = icmp ult float* %scevgep197, %scevgep203 - %bound1206 = icmp ult float* %scevgep201, %scevgep199 - %found.conflict207 = and i1 %bound0205, %bound1206 - br i1 %found.conflict207, label %pregion_for_entry.entry.i.i.us.6.preheader, label %vector.ph210 - -vector.ph210: ; preds = %vector.memcheck209 - %broadcast.splatinsert217 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat218 = shufflevector <8 x i64> %broadcast.splatinsert217, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert219 = insertelement <8 x i32> undef, i32 %13, i32 0 - %broadcast.splat220 = shufflevector <8 x i32> %broadcast.splatinsert219, <8 x i32> undef, <8 x i32> zeroinitializer - %497 = or <8 x i64> %broadcast.splat218, - %498 = trunc <8 x i64> %497 to <8 x i32> - %499 = icmp sgt <8 x i32> %broadcast.splat220, %498 - %500 = extractelement <8 x i64> %497, i32 0 - %501 = shl i64 %500, 32 - %502 = ashr exact i64 %501, 32 - %503 = getelementptr inbounds float, float* %6, i64 %502 - %504 = bitcast float* %503 to <8 x float>* - %wide.masked.load221 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %504, i32 4, <8 x i1> %499, <8 x float> undef), !tbaa !12, !alias.scope !147, !noalias !150 - %505 = extractelement <8 x i32> %498, i32 0 - %506 = add nsw i32 %mul.i.i.6, %505 - %507 = sext i32 %506 to i64 - %508 = getelementptr inbounds float, float* %9, i64 %507 - %509 = bitcast float* %508 to <8 x float>* - %wide.masked.load222 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %509, i32 4, <8 x i1> %499, <8 x float> undef), !tbaa !12, !alias.scope !150 - %510 = fsub <8 x float> %wide.masked.load222, %wide.masked.load221 - %511 = bitcast float* %508 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %510, <8 x float>* %511, i32 4, <8 x i1> %499), !tbaa !12, !alias.scope !150, !llvm.access.group !21 - %512 = or <8 x i64> %broadcast.splat218, - %513 = trunc <8 x i64> %512 to <8 x i32> - %514 = icmp sgt <8 x i32> %broadcast.splat220, %513 - %515 = extractelement <8 x i64> %512, i32 0 - %516 = shl i64 %515, 32 - %517 = ashr exact i64 %516, 32 - %518 = getelementptr inbounds float, float* %6, i64 %517 - %519 = bitcast float* %518 to <8 x float>* - %wide.masked.load221.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %519, i32 4, <8 x i1> %514, <8 x float> undef), !tbaa !12, !alias.scope !147, !noalias !150 - %520 = extractelement <8 x i32> %513, i32 0 - %521 = add nsw i32 %mul.i.i.6, %520 - %522 = sext i32 %521 to i64 - %523 = getelementptr inbounds float, float* %9, i64 %522 - %524 = bitcast float* %523 to <8 x float>* - %wide.masked.load222.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %524, i32 4, <8 x i1> %514, <8 x float> undef), !tbaa !12, !alias.scope !150 - %525 = fsub <8 x float> %wide.masked.load222.1, %wide.masked.load221.1 - %526 = bitcast float* %523 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %525, <8 x float>* %526, i32 4, <8 x i1> %514), !tbaa !12, !alias.scope !150, !llvm.access.group !21 - %527 = or <8 x i64> %broadcast.splat218, - %528 = trunc <8 x i64> %527 to <8 x i32> - %529 = icmp sgt <8 x i32> %broadcast.splat220, %528 - %530 = extractelement <8 x i64> %527, i32 0 - %531 = shl i64 %530, 32 - %532 = ashr exact i64 %531, 32 - %533 = getelementptr inbounds float, float* %6, i64 %532 - %534 = bitcast float* %533 to <8 x float>* - %wide.masked.load221.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %534, i32 4, <8 x i1> %529, <8 x float> undef), !tbaa !12, !alias.scope !147, !noalias !150 - %535 = extractelement <8 x i32> %528, i32 0 - %536 = add nsw i32 %mul.i.i.6, %535 - %537 = sext i32 %536 to i64 - %538 = getelementptr inbounds float, float* %9, i64 %537 - %539 = bitcast float* %538 to <8 x float>* - %wide.masked.load222.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %539, i32 4, <8 x i1> %529, <8 x float> undef), !tbaa !12, !alias.scope !150 - %540 = fsub <8 x float> %wide.masked.load222.2, %wide.masked.load221.2 - %541 = bitcast float* %538 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %540, <8 x float>* %541, i32 4, <8 x i1> %529), !tbaa !12, !alias.scope !150, !llvm.access.group !21 - %542 = or <8 x i64> %broadcast.splat218, - %543 = trunc <8 x i64> %542 to <8 x i32> - %544 = icmp sgt <8 x i32> %broadcast.splat220, %543 - %545 = extractelement <8 x i64> %542, i32 0 - %546 = shl i64 %545, 32 - %547 = ashr exact i64 %546, 32 - %548 = getelementptr inbounds float, float* %6, i64 %547 - %549 = bitcast float* %548 to <8 x float>* - %wide.masked.load221.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %549, i32 4, <8 x i1> %544, <8 x float> undef), !tbaa !12, !alias.scope !147, !noalias !150 - %550 = extractelement <8 x i32> %543, i32 0 - %551 = add nsw i32 %mul.i.i.6, %550 - %552 = sext i32 %551 to i64 - %553 = getelementptr inbounds float, float* %9, i64 %552 - %554 = bitcast float* %553 to <8 x float>* - %wide.masked.load222.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %554, i32 4, <8 x i1> %544, <8 x float> undef), !tbaa !12, !alias.scope !150 - %555 = fsub <8 x float> %wide.masked.load222.3, %wide.masked.load221.3 - %556 = bitcast float* %553 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %555, <8 x float>* %556, i32 4, <8 x i1> %544), !tbaa !12, !alias.scope !150, !llvm.access.group !21 - br label %pregion_for_end.i.i.6 - -pregion_for_entry.entry.i.i.us.6: ; preds = %if.end.r_exit.i.i.us.6.1, %pregion_for_entry.entry.i.i.us.6.preheader - %_local_id_x.i.0.us.6 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.6.preheader ], [ %642, %if.end.r_exit.i.i.us.6.1 ] - %add1.i.i.i.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.6, %mul.i.i.i - %conv.i.i.us.6 = trunc i64 %add1.i.i.i.us.6 to i32 - %cmp4.i.i.us.6 = icmp sgt i32 %13, %conv.i.i.us.6 - br i1 %cmp4.i.i.us.6, label %if.then.i.i.us.6, label %if.end.r_exit.i.i.us.6 - -if.then.i.i.us.6: ; preds = %pregion_for_entry.entry.i.i.us.6 - %sext.i.i.us.6 = shl i64 %add1.i.i.i.us.6, 32 - %idxprom.i.i.us.6 = ashr exact i64 %sext.i.i.us.6, 32 - %arrayidx.i.i.us.6 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.6 - %557 = load float, float* %arrayidx.i.i.us.6, align 4, !tbaa !12 - %add.i.i.us.6 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6 - %idxprom6.i.i.us.6 = sext i32 %add.i.i.us.6 to i64 - %arrayidx7.i.i.us.6 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.6 - %558 = load float, float* %arrayidx7.i.i.us.6, align 4, !tbaa !12 - %sub.i.i.us.6 = fsub float %558, %557 - store float %sub.i.i.us.6, float* %arrayidx7.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.6 - -if.end.r_exit.i.i.us.6: ; preds = %if.then.i.i.us.6, %pregion_for_entry.entry.i.i.us.6 - %559 = or i64 %_local_id_x.i.0.us.6, 1 - %add1.i.i.i.us.6.1 = add nuw nsw i64 %559, %mul.i.i.i - %conv.i.i.us.6.1 = trunc i64 %add1.i.i.i.us.6.1 to i32 - %cmp4.i.i.us.6.1 = icmp sgt i32 %13, %conv.i.i.us.6.1 - br i1 %cmp4.i.i.us.6.1, label %if.then.i.i.us.6.1, label %if.end.r_exit.i.i.us.6.1 - -pregion_for_end.i.i.6.loopexit: ; preds = %if.end.r_exit.i.i.us.6.1 - br label %pregion_for_end.i.i.6 - -pregion_for_end.i.i.6: ; preds = %pregion_for_end.i.i.6.loopexit, %vector.ph210, %pregion_for_end.i.i.5 - %560 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.7 = or i32 %560, 7 - %cmp.i.i.7 = icmp sgt i32 %17, %conv2.i.i.7 - %mul.i.i.7 = mul nsw i32 %13, %conv2.i.i.7 - br i1 %cmp.i.i.7, label %vector.scevcheck230, label %pregion_for_end.i.i.7 - -vector.scevcheck230: ; preds = %pregion_for_end.i.i.6 - %561 = mul i32 %13, %conv2.i.i.7 - %562 = trunc i64 %2 to i32 - %563 = shl i32 %562, 5 - %564 = add i32 %561, %563 - %565 = icmp sgt i32 %564, 2147483616 - br i1 %565, label %pregion_for_entry.entry.i.i.us.7.preheader, label %vector.memcheck244 - -pregion_for_entry.entry.i.i.us.7.preheader: ; preds = %vector.memcheck244, %vector.scevcheck230 - br label %pregion_for_entry.entry.i.i.us.7 - -vector.memcheck244: ; preds = %vector.scevcheck230 - %566 = trunc i64 %2 to i32 - %567 = shl i32 %566, 5 - %568 = sext i32 %567 to i64 - %scevgep232 = getelementptr float, float* %6, i64 %568 - %569 = add nsw i64 %568, 32 - %scevgep234 = getelementptr float, float* %6, i64 %569 - %570 = mul i32 %13, %conv2.i.i.7 - %571 = add i32 %570, %567 - %572 = sext i32 %571 to i64 - %scevgep236 = getelementptr float, float* %9, i64 %572 - %573 = add nsw i64 %572, 32 - %scevgep238 = getelementptr float, float* %9, i64 %573 - %bound0240 = icmp ult float* %scevgep232, %scevgep238 - %bound1241 = icmp ult float* %scevgep236, %scevgep234 - %found.conflict242 = and i1 %bound0240, %bound1241 - br i1 %found.conflict242, label %pregion_for_entry.entry.i.i.us.7.preheader, label %vector.ph245 - -vector.ph245: ; preds = %vector.memcheck244 - %broadcast.splatinsert252 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat253 = shufflevector <8 x i64> %broadcast.splatinsert252, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert254 = insertelement <8 x i32> undef, i32 %13, i32 0 - %broadcast.splat255 = shufflevector <8 x i32> %broadcast.splatinsert254, <8 x i32> undef, <8 x i32> zeroinitializer - %574 = or <8 x i64> %broadcast.splat253, - %575 = trunc <8 x i64> %574 to <8 x i32> - %576 = icmp sgt <8 x i32> %broadcast.splat255, %575 - %577 = extractelement <8 x i64> %574, i32 0 - %578 = shl i64 %577, 32 - %579 = ashr exact i64 %578, 32 - %580 = getelementptr inbounds float, float* %6, i64 %579 - %581 = bitcast float* %580 to <8 x float>* - %wide.masked.load256 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %581, i32 4, <8 x i1> %576, <8 x float> undef), !tbaa !12, !alias.scope !152, !noalias !155 - %582 = extractelement <8 x i32> %575, i32 0 - %583 = add nsw i32 %mul.i.i.7, %582 - %584 = sext i32 %583 to i64 - %585 = getelementptr inbounds float, float* %9, i64 %584 - %586 = bitcast float* %585 to <8 x float>* - %wide.masked.load257 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %586, i32 4, <8 x i1> %576, <8 x float> undef), !tbaa !12, !alias.scope !155 - %587 = fsub <8 x float> %wide.masked.load257, %wide.masked.load256 - %588 = bitcast float* %585 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %587, <8 x float>* %588, i32 4, <8 x i1> %576), !tbaa !12, !alias.scope !155, !llvm.access.group !21 - %589 = or <8 x i64> %broadcast.splat253, - %590 = trunc <8 x i64> %589 to <8 x i32> - %591 = icmp sgt <8 x i32> %broadcast.splat255, %590 - %592 = extractelement <8 x i64> %589, i32 0 - %593 = shl i64 %592, 32 - %594 = ashr exact i64 %593, 32 - %595 = getelementptr inbounds float, float* %6, i64 %594 - %596 = bitcast float* %595 to <8 x float>* - %wide.masked.load256.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %596, i32 4, <8 x i1> %591, <8 x float> undef), !tbaa !12, !alias.scope !152, !noalias !155 - %597 = extractelement <8 x i32> %590, i32 0 - %598 = add nsw i32 %mul.i.i.7, %597 - %599 = sext i32 %598 to i64 - %600 = getelementptr inbounds float, float* %9, i64 %599 - %601 = bitcast float* %600 to <8 x float>* - %wide.masked.load257.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %601, i32 4, <8 x i1> %591, <8 x float> undef), !tbaa !12, !alias.scope !155 - %602 = fsub <8 x float> %wide.masked.load257.1, %wide.masked.load256.1 - %603 = bitcast float* %600 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %602, <8 x float>* %603, i32 4, <8 x i1> %591), !tbaa !12, !alias.scope !155, !llvm.access.group !21 - %604 = or <8 x i64> %broadcast.splat253, - %605 = trunc <8 x i64> %604 to <8 x i32> - %606 = icmp sgt <8 x i32> %broadcast.splat255, %605 - %607 = extractelement <8 x i64> %604, i32 0 - %608 = shl i64 %607, 32 - %609 = ashr exact i64 %608, 32 - %610 = getelementptr inbounds float, float* %6, i64 %609 - %611 = bitcast float* %610 to <8 x float>* - %wide.masked.load256.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %611, i32 4, <8 x i1> %606, <8 x float> undef), !tbaa !12, !alias.scope !152, !noalias !155 - %612 = extractelement <8 x i32> %605, i32 0 - %613 = add nsw i32 %mul.i.i.7, %612 - %614 = sext i32 %613 to i64 - %615 = getelementptr inbounds float, float* %9, i64 %614 - %616 = bitcast float* %615 to <8 x float>* - %wide.masked.load257.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %616, i32 4, <8 x i1> %606, <8 x float> undef), !tbaa !12, !alias.scope !155 - %617 = fsub <8 x float> %wide.masked.load257.2, %wide.masked.load256.2 - %618 = bitcast float* %615 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %617, <8 x float>* %618, i32 4, <8 x i1> %606), !tbaa !12, !alias.scope !155, !llvm.access.group !21 - %619 = or <8 x i64> %broadcast.splat253, - %620 = trunc <8 x i64> %619 to <8 x i32> - %621 = icmp sgt <8 x i32> %broadcast.splat255, %620 - %622 = extractelement <8 x i64> %619, i32 0 - %623 = shl i64 %622, 32 - %624 = ashr exact i64 %623, 32 - %625 = getelementptr inbounds float, float* %6, i64 %624 - %626 = bitcast float* %625 to <8 x float>* - %wide.masked.load256.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %626, i32 4, <8 x i1> %621, <8 x float> undef), !tbaa !12, !alias.scope !152, !noalias !155 - %627 = extractelement <8 x i32> %620, i32 0 - %628 = add nsw i32 %mul.i.i.7, %627 - %629 = sext i32 %628 to i64 - %630 = getelementptr inbounds float, float* %9, i64 %629 - %631 = bitcast float* %630 to <8 x float>* - %wide.masked.load257.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %631, i32 4, <8 x i1> %621, <8 x float> undef), !tbaa !12, !alias.scope !155 - %632 = fsub <8 x float> %wide.masked.load257.3, %wide.masked.load256.3 - %633 = bitcast float* %630 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %632, <8 x float>* %633, i32 4, <8 x i1> %621), !tbaa !12, !alias.scope !155, !llvm.access.group !21 - br label %pregion_for_end.i.i.7 - -pregion_for_entry.entry.i.i.us.7: ; preds = %if.end.r_exit.i.i.us.7.1, %pregion_for_entry.entry.i.i.us.7.preheader - %_local_id_x.i.0.us.7 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.7.preheader ], [ %639, %if.end.r_exit.i.i.us.7.1 ] - %add1.i.i.i.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.7, %mul.i.i.i - %conv.i.i.us.7 = trunc i64 %add1.i.i.i.us.7 to i32 - %cmp4.i.i.us.7 = icmp sgt i32 %13, %conv.i.i.us.7 - br i1 %cmp4.i.i.us.7, label %if.then.i.i.us.7, label %if.end.r_exit.i.i.us.7 - -if.then.i.i.us.7: ; preds = %pregion_for_entry.entry.i.i.us.7 - %sext.i.i.us.7 = shl i64 %add1.i.i.i.us.7, 32 - %idxprom.i.i.us.7 = ashr exact i64 %sext.i.i.us.7, 32 - %arrayidx.i.i.us.7 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.7 - %634 = load float, float* %arrayidx.i.i.us.7, align 4, !tbaa !12 - %add.i.i.us.7 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7 - %idxprom6.i.i.us.7 = sext i32 %add.i.i.us.7 to i64 - %arrayidx7.i.i.us.7 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.7 - %635 = load float, float* %arrayidx7.i.i.us.7, align 4, !tbaa !12 - %sub.i.i.us.7 = fsub float %635, %634 - store float %sub.i.i.us.7, float* %arrayidx7.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.7 - -if.end.r_exit.i.i.us.7: ; preds = %if.then.i.i.us.7, %pregion_for_entry.entry.i.i.us.7 - %636 = or i64 %_local_id_x.i.0.us.7, 1 - %add1.i.i.i.us.7.1 = add nuw nsw i64 %636, %mul.i.i.i - %conv.i.i.us.7.1 = trunc i64 %add1.i.i.i.us.7.1 to i32 - %cmp4.i.i.us.7.1 = icmp sgt i32 %13, %conv.i.i.us.7.1 - br i1 %cmp4.i.i.us.7.1, label %if.then.i.i.us.7.1, label %if.end.r_exit.i.i.us.7.1 - -pregion_for_end.i.i.7.loopexit: ; preds = %if.end.r_exit.i.i.us.7.1 - br label %pregion_for_end.i.i.7 - -pregion_for_end.i.i.7: ; preds = %pregion_for_end.i.i.7.loopexit, %vector.ph245, %pregion_for_end.i.i.6 - ret void - -if.then.i.i.us.7.1: ; preds = %if.end.r_exit.i.i.us.7 - %sext.i.i.us.7.1 = shl i64 %add1.i.i.i.us.7.1, 32 - %idxprom.i.i.us.7.1 = ashr exact i64 %sext.i.i.us.7.1, 32 - %arrayidx.i.i.us.7.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.7.1 - %637 = load float, float* %arrayidx.i.i.us.7.1, align 4, !tbaa !12 - %add.i.i.us.7.1 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7.1 - %idxprom6.i.i.us.7.1 = sext i32 %add.i.i.us.7.1 to i64 - %arrayidx7.i.i.us.7.1 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.7.1 - %638 = load float, float* %arrayidx7.i.i.us.7.1, align 4, !tbaa !12 - %sub.i.i.us.7.1 = fsub float %638, %637 - store float %sub.i.i.us.7.1, float* %arrayidx7.i.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.7.1 - -if.end.r_exit.i.i.us.7.1: ; preds = %if.then.i.i.us.7.1, %if.end.r_exit.i.i.us.7 - %639 = add nuw nsw i64 %_local_id_x.i.0.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %639, 32 - br i1 %exitcond.7.not.1, label %pregion_for_end.i.i.7.loopexit, label %pregion_for_entry.entry.i.i.us.7, !llvm.loop !157 - -if.then.i.i.us.6.1: ; preds = %if.end.r_exit.i.i.us.6 - %sext.i.i.us.6.1 = shl i64 %add1.i.i.i.us.6.1, 32 - %idxprom.i.i.us.6.1 = ashr exact i64 %sext.i.i.us.6.1, 32 - %arrayidx.i.i.us.6.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.6.1 - %640 = load float, float* %arrayidx.i.i.us.6.1, align 4, !tbaa !12 - %add.i.i.us.6.1 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6.1 - %idxprom6.i.i.us.6.1 = sext i32 %add.i.i.us.6.1 to i64 - %arrayidx7.i.i.us.6.1 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.6.1 - %641 = load float, float* %arrayidx7.i.i.us.6.1, align 4, !tbaa !12 - %sub.i.i.us.6.1 = fsub float %641, %640 - store float %sub.i.i.us.6.1, float* %arrayidx7.i.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.6.1 - -if.end.r_exit.i.i.us.6.1: ; preds = %if.then.i.i.us.6.1, %if.end.r_exit.i.i.us.6 - %642 = add nuw nsw i64 %_local_id_x.i.0.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %642, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.i.6.loopexit, label %pregion_for_entry.entry.i.i.us.6, !llvm.loop !158 - -if.then.i.i.us.5.1: ; preds = %if.end.r_exit.i.i.us.5 - %sext.i.i.us.5.1 = shl i64 %add1.i.i.i.us.5.1, 32 - %idxprom.i.i.us.5.1 = ashr exact i64 %sext.i.i.us.5.1, 32 - %arrayidx.i.i.us.5.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.5.1 - %643 = load float, float* %arrayidx.i.i.us.5.1, align 4, !tbaa !12 - %add.i.i.us.5.1 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5.1 - %idxprom6.i.i.us.5.1 = sext i32 %add.i.i.us.5.1 to i64 - %arrayidx7.i.i.us.5.1 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.5.1 - %644 = load float, float* %arrayidx7.i.i.us.5.1, align 4, !tbaa !12 - %sub.i.i.us.5.1 = fsub float %644, %643 - store float %sub.i.i.us.5.1, float* %arrayidx7.i.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.5.1 - -if.end.r_exit.i.i.us.5.1: ; preds = %if.then.i.i.us.5.1, %if.end.r_exit.i.i.us.5 - %645 = add nuw nsw i64 %_local_id_x.i.0.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %645, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.i.5.loopexit, label %pregion_for_entry.entry.i.i.us.5, !llvm.loop !159 - -if.then.i.i.us.4.1: ; preds = %if.end.r_exit.i.i.us.4 - %sext.i.i.us.4.1 = shl i64 %add1.i.i.i.us.4.1, 32 - %idxprom.i.i.us.4.1 = ashr exact i64 %sext.i.i.us.4.1, 32 - %arrayidx.i.i.us.4.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.4.1 - %646 = load float, float* %arrayidx.i.i.us.4.1, align 4, !tbaa !12 - %add.i.i.us.4.1 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4.1 - %idxprom6.i.i.us.4.1 = sext i32 %add.i.i.us.4.1 to i64 - %arrayidx7.i.i.us.4.1 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.4.1 - %647 = load float, float* %arrayidx7.i.i.us.4.1, align 4, !tbaa !12 - %sub.i.i.us.4.1 = fsub float %647, %646 - store float %sub.i.i.us.4.1, float* %arrayidx7.i.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.4.1 - -if.end.r_exit.i.i.us.4.1: ; preds = %if.then.i.i.us.4.1, %if.end.r_exit.i.i.us.4 - %648 = add nuw nsw i64 %_local_id_x.i.0.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %648, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.i.4.loopexit, label %pregion_for_entry.entry.i.i.us.4, !llvm.loop !160 - -if.then.i.i.us.3.1: ; preds = %if.end.r_exit.i.i.us.3 - %sext.i.i.us.3.1 = shl i64 %add1.i.i.i.us.3.1, 32 - %idxprom.i.i.us.3.1 = ashr exact i64 %sext.i.i.us.3.1, 32 - %arrayidx.i.i.us.3.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.3.1 - %649 = load float, float* %arrayidx.i.i.us.3.1, align 4, !tbaa !12 - %add.i.i.us.3.1 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3.1 - %idxprom6.i.i.us.3.1 = sext i32 %add.i.i.us.3.1 to i64 - %arrayidx7.i.i.us.3.1 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.3.1 - %650 = load float, float* %arrayidx7.i.i.us.3.1, align 4, !tbaa !12 - %sub.i.i.us.3.1 = fsub float %650, %649 - store float %sub.i.i.us.3.1, float* %arrayidx7.i.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.3.1 - -if.end.r_exit.i.i.us.3.1: ; preds = %if.then.i.i.us.3.1, %if.end.r_exit.i.i.us.3 - %651 = add nuw nsw i64 %_local_id_x.i.0.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %651, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.i.3.loopexit, label %pregion_for_entry.entry.i.i.us.3, !llvm.loop !161 - -if.then.i.i.us.2.1: ; preds = %if.end.r_exit.i.i.us.2 - %sext.i.i.us.2.1 = shl i64 %add1.i.i.i.us.2.1, 32 - %idxprom.i.i.us.2.1 = ashr exact i64 %sext.i.i.us.2.1, 32 - %arrayidx.i.i.us.2.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.2.1 - %652 = load float, float* %arrayidx.i.i.us.2.1, align 4, !tbaa !12 - %add.i.i.us.2.1 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2.1 - %idxprom6.i.i.us.2.1 = sext i32 %add.i.i.us.2.1 to i64 - %arrayidx7.i.i.us.2.1 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.2.1 - %653 = load float, float* %arrayidx7.i.i.us.2.1, align 4, !tbaa !12 - %sub.i.i.us.2.1 = fsub float %653, %652 - store float %sub.i.i.us.2.1, float* %arrayidx7.i.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.2.1 - -if.end.r_exit.i.i.us.2.1: ; preds = %if.then.i.i.us.2.1, %if.end.r_exit.i.i.us.2 - %654 = add nuw nsw i64 %_local_id_x.i.0.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %654, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.i.2.loopexit, label %pregion_for_entry.entry.i.i.us.2, !llvm.loop !162 - -if.then.i.i.us.1.1: ; preds = %if.end.r_exit.i.i.us.1 - %sext.i.i.us.1.1 = shl i64 %add1.i.i.i.us.1.1, 32 - %idxprom.i.i.us.1.1 = ashr exact i64 %sext.i.i.us.1.1, 32 - %arrayidx.i.i.us.1.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.1.1 - %655 = load float, float* %arrayidx.i.i.us.1.1, align 4, !tbaa !12 - %add.i.i.us.1.1 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1.1 - %idxprom6.i.i.us.1.1 = sext i32 %add.i.i.us.1.1 to i64 - %arrayidx7.i.i.us.1.1 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.1.1 - %656 = load float, float* %arrayidx7.i.i.us.1.1, align 4, !tbaa !12 - %sub.i.i.us.1.1 = fsub float %656, %655 - store float %sub.i.i.us.1.1, float* %arrayidx7.i.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.1.1 - -if.end.r_exit.i.i.us.1.1: ; preds = %if.then.i.i.us.1.1, %if.end.r_exit.i.i.us.1 - %657 = add nuw nsw i64 %_local_id_x.i.0.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %657, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.i.1.loopexit, label %pregion_for_entry.entry.i.i.us.1, !llvm.loop !163 - -if.then.i.i.us.1278: ; preds = %if.end.r_exit.i.i.us - %sext.i.i.us.1271 = shl i64 %add1.i.i.i.us.1267, 32 - %idxprom.i.i.us.1272 = ashr exact i64 %sext.i.i.us.1271, 32 - %arrayidx.i.i.us.1273 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.1272 - %658 = load float, float* %arrayidx.i.i.us.1273, align 4, !tbaa !12 - %add.i.i.us.1274 = add nsw i32 %mul.i.i, %conv.i.i.us.1268 - %idxprom6.i.i.us.1275 = sext i32 %add.i.i.us.1274 to i64 - %arrayidx7.i.i.us.1276 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.1275 - %659 = load float, float* %arrayidx7.i.i.us.1276, align 4, !tbaa !12 - %sub.i.i.us.1277 = fsub float %659, %658 - store float %sub.i.i.us.1277, float* %arrayidx7.i.i.us.1276, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.1279 - -if.end.r_exit.i.i.us.1279: ; preds = %if.then.i.i.us.1278, %if.end.r_exit.i.i.us - %660 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %660, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !164 -} - -; Function Attrs: argmemonly nounwind readonly willreturn -declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32 immarg, <8 x i1>, <8 x float>) #2 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>) #3 - -attributes #0 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nofree norecurse nounwind } -attributes #2 = { argmemonly nounwind readonly willreturn } -attributes #3 = { argmemonly nounwind willreturn } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"int", !"int"} -!8 = !{!"float*", !"float*", !"int", !"int"} -!9 = !{!"", !"", !"", !""} -!10 = !{!"mean", !"data", !"m", !"n"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{!17, !18} -!18 = distinct !{!18, !"LVerDomain"} -!19 = !{!20} -!20 = distinct !{!20, !18} -!21 = !{!22, !23} -!22 = distinct !{} -!23 = distinct !{} -!24 = !{!25} -!25 = distinct !{!25, !26} -!26 = distinct !{!26, !"LVerDomain"} -!27 = !{!28} -!28 = distinct !{!28, !26} -!29 = !{!30} -!30 = distinct !{!30, !31} -!31 = distinct !{!31, !"LVerDomain"} -!32 = !{!33} -!33 = distinct !{!33, !31} -!34 = !{!35} -!35 = distinct !{!35, !36} -!36 = distinct !{!36, !"LVerDomain"} -!37 = !{!38} -!38 = distinct !{!38, !36} -!39 = !{!40} -!40 = distinct !{!40, !41} -!41 = distinct !{!41, !"LVerDomain"} -!42 = !{!43} -!43 = distinct !{!43, !41} -!44 = !{!45} -!45 = distinct !{!45, !46} -!46 = distinct !{!46, !"LVerDomain"} -!47 = !{!48} -!48 = distinct !{!48, !46} -!49 = !{!50} -!50 = distinct !{!50, !51} -!51 = distinct !{!51, !"LVerDomain"} -!52 = !{!53} -!53 = distinct !{!53, !51} -!54 = !{!55} -!55 = distinct !{!55, !56} -!56 = distinct !{!56, !"LVerDomain"} -!57 = !{!58} -!58 = distinct !{!58, !56} -!59 = distinct !{!59, !60, !61} -!60 = !{!"llvm.loop.parallel_accesses", !22} -!61 = !{!"llvm.loop.isvectorized", i32 1} -!62 = distinct !{!62, !60, !61} -!63 = distinct !{!63, !60, !61} -!64 = distinct !{!64, !60, !61} -!65 = distinct !{!65, !60, !61} -!66 = distinct !{!66, !60, !61} -!67 = distinct !{!67, !60, !61} -!68 = distinct !{!68, !60, !61} -!69 = !{!70} -!70 = distinct !{!70, !71} -!71 = distinct !{!71, !"LVerDomain"} -!72 = !{!73} -!73 = distinct !{!73, !71} -!74 = !{!75} -!75 = distinct !{!75, !76} -!76 = distinct !{!76, !"LVerDomain"} -!77 = !{!78} -!78 = distinct !{!78, !76} -!79 = !{!80} -!80 = distinct !{!80, !81} -!81 = distinct !{!81, !"LVerDomain"} -!82 = !{!83} -!83 = distinct !{!83, !81} -!84 = !{!85} -!85 = distinct !{!85, !86} -!86 = distinct !{!86, !"LVerDomain"} -!87 = !{!88} -!88 = distinct !{!88, !86} -!89 = !{!90} -!90 = distinct !{!90, !91} -!91 = distinct !{!91, !"LVerDomain"} -!92 = !{!93} -!93 = distinct !{!93, !91} -!94 = !{!95} -!95 = distinct !{!95, !96} -!96 = distinct !{!96, !"LVerDomain"} -!97 = !{!98} -!98 = distinct !{!98, !96} -!99 = !{!100} -!100 = distinct !{!100, !101} -!101 = distinct !{!101, !"LVerDomain"} -!102 = !{!103} -!103 = distinct !{!103, !101} -!104 = !{!105} -!105 = distinct !{!105, !106} -!106 = distinct !{!106, !"LVerDomain"} -!107 = !{!108} -!108 = distinct !{!108, !106} -!109 = distinct !{!109, !60, !61} -!110 = distinct !{!110, !60, !61} -!111 = distinct !{!111, !60, !61} -!112 = distinct !{!112, !60, !61} -!113 = distinct !{!113, !60, !61} -!114 = distinct !{!114, !60, !61} -!115 = distinct !{!115, !60, !61} -!116 = distinct !{!116, !60, !61} -!117 = !{!118} -!118 = distinct !{!118, !119} -!119 = distinct !{!119, !"LVerDomain"} -!120 = !{!121} -!121 = distinct !{!121, !119} -!122 = !{!123} -!123 = distinct !{!123, !124} -!124 = distinct !{!124, !"LVerDomain"} -!125 = !{!126} -!126 = distinct !{!126, !124} -!127 = !{!128} -!128 = distinct !{!128, !129} -!129 = distinct !{!129, !"LVerDomain"} -!130 = !{!131} -!131 = distinct !{!131, !129} -!132 = !{!133} -!133 = distinct !{!133, !134} -!134 = distinct !{!134, !"LVerDomain"} -!135 = !{!136} -!136 = distinct !{!136, !134} -!137 = !{!138} -!138 = distinct !{!138, !139} -!139 = distinct !{!139, !"LVerDomain"} -!140 = !{!141} -!141 = distinct !{!141, !139} -!142 = !{!143} -!143 = distinct !{!143, !144} -!144 = distinct !{!144, !"LVerDomain"} -!145 = !{!146} -!146 = distinct !{!146, !144} -!147 = !{!148} -!148 = distinct !{!148, !149} -!149 = distinct !{!149, !"LVerDomain"} -!150 = !{!151} -!151 = distinct !{!151, !149} -!152 = !{!153} -!153 = distinct !{!153, !154} -!154 = distinct !{!154, !"LVerDomain"} -!155 = !{!156} -!156 = distinct !{!156, !154} -!157 = distinct !{!157, !60, !61} -!158 = distinct !{!158, !60, !61} -!159 = distinct !{!159, !60, !61} -!160 = distinct !{!160, !60, !61} -!161 = distinct !{!161, !60, !61} -!162 = distinct !{!162, !60, !61} -!163 = distinct !{!163, !60, !61} -!164 = distinct !{!164, !60, !61} diff --git a/pocl_irs/doitgen_kernel1.ll b/pocl_irs/doitgen_kernel1.ll deleted file mode 100644 index 0678ac1..0000000 --- a/pocl_irs/doitgen_kernel1.ll +++ /dev/null @@ -1,3588 +0,0 @@ -; ModuleID = './EC/AOFIJPJJDGBDGAAGPPJIBOAFNDFEMPBMCHKBI/doitgen_kernel1/32-8-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_doitgen_kernel1(i32 %0, i32 %1, i32 %2, float* nocapture readonly %3, float* nocapture readonly %4, float* nocapture %5, i32 %6, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %7, i64 %8, i64 %9, i64 %10) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %8, 5 - %mul3.i.i = shl i64 %9, 3 - %mul6.i = mul i32 %6, %1 - %cmp970.i = icmp sgt i32 %2, 0 - %12 = zext i32 %2 to i64 - br i1 %cmp970.i, label %pregion_for_entry.pregion_for_init.i.us.preheader, label %pregion_for_entry.pregion_for_init.i.preheader - -pregion_for_entry.pregion_for_init.i.preheader: ; preds = %11 - %conv.i.us = trunc i64 %mul.i.i to i32 - %cmp.i.us = icmp slt i32 %conv.i.us, %2 - %13 = trunc i64 %mul.i.i to i32 - %conv.i.us.1 = or i32 %13, 1 - %cmp.i.us.1 = icmp slt i32 %conv.i.us.1, %2 - %14 = trunc i64 %mul.i.i to i32 - %conv.i.us.2 = or i32 %14, 2 - %cmp.i.us.2 = icmp slt i32 %conv.i.us.2, %2 - %15 = trunc i64 %mul.i.i to i32 - %conv.i.us.3 = or i32 %15, 3 - %cmp.i.us.3 = icmp slt i32 %conv.i.us.3, %2 - %16 = trunc i64 %mul.i.i to i32 - %conv.i.us.4 = or i32 %16, 4 - %cmp.i.us.4 = icmp slt i32 %conv.i.us.4, %2 - %17 = trunc i64 %mul.i.i to i32 - %conv.i.us.5 = or i32 %17, 5 - %cmp.i.us.5 = icmp slt i32 %conv.i.us.5, %2 - %18 = trunc i64 %mul.i.i to i32 - %conv.i.us.6 = or i32 %18, 6 - %cmp.i.us.6 = icmp slt i32 %conv.i.us.6, %2 - %19 = trunc i64 %mul.i.i to i32 - %conv.i.us.7 = or i32 %19, 7 - %cmp.i.us.7 = icmp slt i32 %conv.i.us.7, %2 - %20 = trunc i64 %mul.i.i to i32 - %conv.i.us.8 = or i32 %20, 8 - %cmp.i.us.8 = icmp slt i32 %conv.i.us.8, %2 - %21 = trunc i64 %mul.i.i to i32 - %conv.i.us.9 = or i32 %21, 9 - %cmp.i.us.9 = icmp slt i32 %conv.i.us.9, %2 - %22 = trunc i64 %mul.i.i to i32 - %conv.i.us.10 = or i32 %22, 10 - %cmp.i.us.10 = icmp slt i32 %conv.i.us.10, %2 - %23 = trunc i64 %mul.i.i to i32 - %conv.i.us.11 = or i32 %23, 11 - %cmp.i.us.11 = icmp slt i32 %conv.i.us.11, %2 - %24 = trunc i64 %mul.i.i to i32 - %conv.i.us.12 = or i32 %24, 12 - %cmp.i.us.12 = icmp slt i32 %conv.i.us.12, %2 - %25 = trunc i64 %mul.i.i to i32 - %conv.i.us.13 = or i32 %25, 13 - %cmp.i.us.13 = icmp slt i32 %conv.i.us.13, %2 - %26 = trunc i64 %mul.i.i to i32 - %conv.i.us.14 = or i32 %26, 14 - %cmp.i.us.14 = icmp slt i32 %conv.i.us.14, %2 - %27 = trunc i64 %mul.i.i to i32 - %conv.i.us.15 = or i32 %27, 15 - %cmp.i.us.15 = icmp slt i32 %conv.i.us.15, %2 - %28 = trunc i64 %mul.i.i to i32 - %conv.i.us.16 = or i32 %28, 16 - %cmp.i.us.16 = icmp slt i32 %conv.i.us.16, %2 - %29 = trunc i64 %mul.i.i to i32 - %conv.i.us.17 = or i32 %29, 17 - %cmp.i.us.17 = icmp slt i32 %conv.i.us.17, %2 - %30 = trunc i64 %mul.i.i to i32 - %conv.i.us.18 = or i32 %30, 18 - %cmp.i.us.18 = icmp slt i32 %conv.i.us.18, %2 - %31 = trunc i64 %mul.i.i to i32 - %conv.i.us.19 = or i32 %31, 19 - %cmp.i.us.19 = icmp slt i32 %conv.i.us.19, %2 - %32 = trunc i64 %mul.i.i to i32 - %conv.i.us.20 = or i32 %32, 20 - %cmp.i.us.20 = icmp slt i32 %conv.i.us.20, %2 - %33 = trunc i64 %mul.i.i to i32 - %conv.i.us.21 = or i32 %33, 21 - %cmp.i.us.21 = icmp slt i32 %conv.i.us.21, %2 - %34 = trunc i64 %mul.i.i to i32 - %conv.i.us.22 = or i32 %34, 22 - %cmp.i.us.22 = icmp slt i32 %conv.i.us.22, %2 - %35 = trunc i64 %mul.i.i to i32 - %conv.i.us.23 = or i32 %35, 23 - %cmp.i.us.23 = icmp slt i32 %conv.i.us.23, %2 - %36 = trunc i64 %mul.i.i to i32 - %conv.i.us.24 = or i32 %36, 24 - %cmp.i.us.24 = icmp slt i32 %conv.i.us.24, %2 - %37 = trunc i64 %mul.i.i to i32 - %conv.i.us.25 = or i32 %37, 25 - %cmp.i.us.25 = icmp slt i32 %conv.i.us.25, %2 - %38 = trunc i64 %mul.i.i to i32 - %conv.i.us.26 = or i32 %38, 26 - %cmp.i.us.26 = icmp slt i32 %conv.i.us.26, %2 - %39 = trunc i64 %mul.i.i to i32 - %conv.i.us.27 = or i32 %39, 27 - %cmp.i.us.27 = icmp slt i32 %conv.i.us.27, %2 - %40 = trunc i64 %mul.i.i to i32 - %conv.i.us.28 = or i32 %40, 28 - %cmp.i.us.28 = icmp slt i32 %conv.i.us.28, %2 - %41 = trunc i64 %mul.i.i to i32 - %conv.i.us.29 = or i32 %41, 29 - %cmp.i.us.29 = icmp slt i32 %conv.i.us.29, %2 - %42 = trunc i64 %mul.i.i to i32 - %conv.i.us.30 = or i32 %42, 30 - %cmp.i.us.30 = icmp slt i32 %conv.i.us.30, %2 - %43 = trunc i64 %mul.i.i to i32 - %conv.i.us.31 = or i32 %43, 31 - %cmp.i.us.31 = icmp slt i32 %conv.i.us.31, %2 - br label %pregion_for_entry.pregion_for_init.i - -pregion_for_entry.pregion_for_init.i.us.preheader: ; preds = %11 - %conv2.i.us = trunc i64 %mul3.i.i to i32 - %cmp4.i.us = icmp slt i32 %conv2.i.us, %1 - %reass.add.i.us = add i32 %mul6.i, %conv2.i.us - %reass.mul.i.us = mul i32 %reass.add.i.us, %2 - %44 = sext i32 %reass.mul.i.us to i64 - br i1 %cmp4.i.us, label %pregion_for_entry.entry.i.us.us.preheader, label %pregion_for_end.i.us - -pregion_for_entry.entry.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.us.preheader - br label %pregion_for_entry.entry.i.us.us - -pregion_for_end.i.us.loopexit: ; preds = %if.end.i.us.us.153 - br label %pregion_for_end.i.us - -pregion_for_end.i.us: ; preds = %pregion_for_end.i.us.loopexit, %pregion_for_entry.pregion_for_init.i.us.preheader - %45 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.1 = or i32 %45, 1 - %cmp4.i.us.1 = icmp slt i32 %conv2.i.us.1, %1 - %reass.add.i.us.1 = add i32 %mul6.i, %conv2.i.us.1 - %reass.mul.i.us.1 = mul i32 %reass.add.i.us.1, %2 - %46 = sext i32 %reass.mul.i.us.1 to i64 - br i1 %cmp4.i.us.1, label %pregion_for_entry.entry.i.us.us.1.preheader, label %pregion_for_end.i.us.1 - -pregion_for_entry.entry.i.us.us.1.preheader: ; preds = %pregion_for_end.i.us - br label %pregion_for_entry.entry.i.us.us.1 - -pregion_for_entry.entry.i.us.us: ; preds = %if.end.i.us.us.153, %pregion_for_entry.entry.i.us.us.preheader - %_local_id_x.0.us.us = phi i64 [ %203, %if.end.i.us.us.153 ], [ 0, %pregion_for_entry.entry.i.us.us.preheader ] - %add1.i.i.us.us = add nuw nsw i64 %_local_id_x.0.us.us, %mul.i.i - %conv.i.us.us = trunc i64 %add1.i.i.us.us to i32 - %cmp.i.us.us = icmp slt i32 %conv.i.us.us, %2 - br i1 %cmp.i.us.us, label %if.then.i.us.us, label %if.end.i.us.us - -if.then.i.us.us: ; preds = %pregion_for_entry.entry.i.us.us - %add8.i.us.us = add nsw i32 %reass.mul.i.us, %conv.i.us.us - %idxprom.i.us.us = sext i32 %add8.i.us.us to i64 - %arrayidx.i.us.us = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.us - store float 0.000000e+00, float* %arrayidx.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us = shl i64 %add1.i.i.us.us, 32 - %47 = ashr exact i64 %sext.i.us.us, 32 - br label %for.body.i.us.us - -if.end.i.us.us.loopexit: ; preds = %for.body.i.us.us - br label %if.end.i.us.us - -if.end.i.us.us: ; preds = %if.end.i.us.us.loopexit, %pregion_for_entry.entry.i.us.us - %48 = or i64 %_local_id_x.0.us.us, 1 - %add1.i.i.us.us.138 = add nuw nsw i64 %48, %mul.i.i - %conv.i.us.us.139 = trunc i64 %add1.i.i.us.us.138 to i32 - %cmp.i.us.us.140 = icmp slt i32 %conv.i.us.us.139, %2 - br i1 %cmp.i.us.us.140, label %if.then.i.us.us.146, label %if.end.i.us.us.153 - -for.body.i.us.us: ; preds = %for.body.i.us.us, %if.then.i.us.us - %indvars.iv.next.i3.us.us = phi i64 [ %indvars.iv.next.i.us.us, %for.body.i.us.us ], [ 0, %if.then.i.us.us ] - %49 = phi float [ %55, %for.body.i.us.us ], [ 0.000000e+00, %if.then.i.us.us ] - %50 = add nsw i64 %indvars.iv.next.i3.us.us, %44 - %arrayidx24.i.us.us = getelementptr inbounds float, float* %3, i64 %50 - %51 = load float, float* %arrayidx24.i.us.us, align 4, !tbaa !12 - %52 = mul nuw nsw i64 %indvars.iv.next.i3.us.us, %12 - %53 = add nsw i64 %52, %47 - %arrayidx28.i.us.us = getelementptr inbounds float, float* %4, i64 %53 - %54 = load float, float* %arrayidx28.i.us.us, align 4, !tbaa !12 - %55 = tail call float @llvm.fmuladd.f32(float %51, float %54, float %49) #2 - store float %55, float* %arrayidx.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us = add nuw nsw i64 %indvars.iv.next.i3.us.us, 1 - %exitcond.not.i.us.us = icmp eq i64 %indvars.iv.next.i.us.us, %12 - br i1 %exitcond.not.i.us.us, label %if.end.i.us.us.loopexit, label %for.body.i.us.us, !llvm.loop !19 - -pregion_for_entry.pregion_for_init.i: ; preds = %pregion_for_end.i, %pregion_for_entry.pregion_for_init.i.preheader - %_local_id_y.0 = phi i64 [ %56, %pregion_for_end.i ], [ 0, %pregion_for_entry.pregion_for_init.i.preheader ] - %add6.i.i = add nuw nsw i64 %_local_id_y.0, %mul3.i.i - %conv2.i = trunc i64 %add6.i.i to i32 - %cmp4.i = icmp slt i32 %conv2.i, %1 - %reass.add.i = add i32 %mul6.i, %conv2.i - %reass.mul.i = mul i32 %reass.add.i, %2 - br i1 %cmp4.i, label %pregion_for_entry.entry.i.us.preheader, label %pregion_for_end.i - -pregion_for_entry.entry.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i - br i1 %cmp.i.us, label %if.then.i.us, label %if.end.i.us - -if.then.i.us: ; preds = %pregion_for_entry.entry.i.us.preheader - %add8.i.us = add nsw i32 %reass.mul.i, %conv.i.us - %idxprom.i.us = sext i32 %add8.i.us to i64 - %arrayidx.i.us = getelementptr inbounds float, float* %5, i64 %idxprom.i.us - store float 0.000000e+00, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us - -if.end.i.us: ; preds = %if.then.i.us, %pregion_for_entry.entry.i.us.preheader - br i1 %cmp.i.us.1, label %if.then.i.us.1, label %if.end.i.us.1 - -pregion_for_end.i: ; preds = %if.then.i.us.31, %if.end.i.us.30, %pregion_for_entry.pregion_for_init.i - %56 = add nuw nsw i64 %_local_id_y.0, 1 - %exitcond33.not = icmp eq i64 %56, 8 - br i1 %exitcond33.not, label %doitgen_kernel1.exit.loopexit54, label %pregion_for_entry.pregion_for_init.i, !llvm.loop !21 - -doitgen_kernel1.exit.loopexit: ; preds = %if.end.i.us.us.7.1 - br label %doitgen_kernel1.exit - -doitgen_kernel1.exit.loopexit54: ; preds = %pregion_for_end.i - br label %doitgen_kernel1.exit - -doitgen_kernel1.exit: ; preds = %pregion_for_end.i.us.6, %doitgen_kernel1.exit.loopexit54, %doitgen_kernel1.exit.loopexit - ret void - -pregion_for_entry.entry.i.us.us.1: ; preds = %if.end.i.us.us.1.1, %pregion_for_entry.entry.i.us.us.1.preheader - %_local_id_x.0.us.us.1 = phi i64 [ %194, %if.end.i.us.us.1.1 ], [ 0, %pregion_for_entry.entry.i.us.us.1.preheader ] - %add1.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.0.us.us.1, %mul.i.i - %conv.i.us.us.1 = trunc i64 %add1.i.i.us.us.1 to i32 - %cmp.i.us.us.1 = icmp slt i32 %conv.i.us.us.1, %2 - br i1 %cmp.i.us.us.1, label %if.then.i.us.us.1, label %if.end.i.us.us.1 - -if.then.i.us.us.1: ; preds = %pregion_for_entry.entry.i.us.us.1 - %add8.i.us.us.1 = add nsw i32 %reass.mul.i.us.1, %conv.i.us.us.1 - %idxprom.i.us.us.1 = sext i32 %add8.i.us.us.1 to i64 - %arrayidx.i.us.us.1 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.us.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.1 = shl i64 %add1.i.i.us.us.1, 32 - %57 = ashr exact i64 %sext.i.us.us.1, 32 - br label %for.body.i.us.us.1 - -for.body.i.us.us.1: ; preds = %for.body.i.us.us.1, %if.then.i.us.us.1 - %indvars.iv.next.i3.us.us.1 = phi i64 [ %indvars.iv.next.i.us.us.1, %for.body.i.us.us.1 ], [ 0, %if.then.i.us.us.1 ] - %58 = phi float [ %64, %for.body.i.us.us.1 ], [ 0.000000e+00, %if.then.i.us.us.1 ] - %59 = add nsw i64 %indvars.iv.next.i3.us.us.1, %46 - %arrayidx24.i.us.us.1 = getelementptr inbounds float, float* %3, i64 %59 - %60 = load float, float* %arrayidx24.i.us.us.1, align 4, !tbaa !12 - %61 = mul nuw nsw i64 %indvars.iv.next.i3.us.us.1, %12 - %62 = add nsw i64 %61, %57 - %arrayidx28.i.us.us.1 = getelementptr inbounds float, float* %4, i64 %62 - %63 = load float, float* %arrayidx28.i.us.us.1, align 4, !tbaa !12 - %64 = tail call float @llvm.fmuladd.f32(float %60, float %63, float %58) #2 - store float %64, float* %arrayidx.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.1, 1 - %exitcond.not.i.us.us.1 = icmp eq i64 %indvars.iv.next.i.us.us.1, %12 - br i1 %exitcond.not.i.us.us.1, label %if.end.i.us.us.1.loopexit, label %for.body.i.us.us.1, !llvm.loop !19 - -if.end.i.us.us.1.loopexit: ; preds = %for.body.i.us.us.1 - br label %if.end.i.us.us.1 - -if.end.i.us.us.1: ; preds = %if.end.i.us.us.1.loopexit, %pregion_for_entry.entry.i.us.us.1 - %65 = or i64 %_local_id_x.0.us.us.1, 1 - %add1.i.i.us.us.1.1 = add nuw nsw i64 %65, %mul.i.i - %conv.i.us.us.1.1 = trunc i64 %add1.i.i.us.us.1.1 to i32 - %cmp.i.us.us.1.1 = icmp slt i32 %conv.i.us.us.1.1, %2 - br i1 %cmp.i.us.us.1.1, label %if.then.i.us.us.1.1, label %if.end.i.us.us.1.1 - -pregion_for_end.i.us.1.loopexit: ; preds = %if.end.i.us.us.1.1 - br label %pregion_for_end.i.us.1 - -pregion_for_end.i.us.1: ; preds = %pregion_for_end.i.us.1.loopexit, %pregion_for_end.i.us - %66 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.2 = or i32 %66, 2 - %cmp4.i.us.2 = icmp slt i32 %conv2.i.us.2, %1 - %reass.add.i.us.2 = add i32 %mul6.i, %conv2.i.us.2 - %reass.mul.i.us.2 = mul i32 %reass.add.i.us.2, %2 - %67 = sext i32 %reass.mul.i.us.2 to i64 - br i1 %cmp4.i.us.2, label %pregion_for_entry.entry.i.us.us.2.preheader, label %pregion_for_end.i.us.2 - -pregion_for_entry.entry.i.us.us.2.preheader: ; preds = %pregion_for_end.i.us.1 - br label %pregion_for_entry.entry.i.us.us.2 - -pregion_for_entry.entry.i.us.us.2: ; preds = %if.end.i.us.us.2.1, %pregion_for_entry.entry.i.us.us.2.preheader - %_local_id_x.0.us.us.2 = phi i64 [ %185, %if.end.i.us.us.2.1 ], [ 0, %pregion_for_entry.entry.i.us.us.2.preheader ] - %add1.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.0.us.us.2, %mul.i.i - %conv.i.us.us.2 = trunc i64 %add1.i.i.us.us.2 to i32 - %cmp.i.us.us.2 = icmp slt i32 %conv.i.us.us.2, %2 - br i1 %cmp.i.us.us.2, label %if.then.i.us.us.2, label %if.end.i.us.us.2 - -if.then.i.us.us.2: ; preds = %pregion_for_entry.entry.i.us.us.2 - %add8.i.us.us.2 = add nsw i32 %reass.mul.i.us.2, %conv.i.us.us.2 - %idxprom.i.us.us.2 = sext i32 %add8.i.us.us.2 to i64 - %arrayidx.i.us.us.2 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.us.2 - store float 0.000000e+00, float* %arrayidx.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.2 = shl i64 %add1.i.i.us.us.2, 32 - %68 = ashr exact i64 %sext.i.us.us.2, 32 - br label %for.body.i.us.us.2 - -for.body.i.us.us.2: ; preds = %for.body.i.us.us.2, %if.then.i.us.us.2 - %indvars.iv.next.i3.us.us.2 = phi i64 [ %indvars.iv.next.i.us.us.2, %for.body.i.us.us.2 ], [ 0, %if.then.i.us.us.2 ] - %69 = phi float [ %75, %for.body.i.us.us.2 ], [ 0.000000e+00, %if.then.i.us.us.2 ] - %70 = add nsw i64 %indvars.iv.next.i3.us.us.2, %67 - %arrayidx24.i.us.us.2 = getelementptr inbounds float, float* %3, i64 %70 - %71 = load float, float* %arrayidx24.i.us.us.2, align 4, !tbaa !12 - %72 = mul nuw nsw i64 %indvars.iv.next.i3.us.us.2, %12 - %73 = add nsw i64 %72, %68 - %arrayidx28.i.us.us.2 = getelementptr inbounds float, float* %4, i64 %73 - %74 = load float, float* %arrayidx28.i.us.us.2, align 4, !tbaa !12 - %75 = tail call float @llvm.fmuladd.f32(float %71, float %74, float %69) #2 - store float %75, float* %arrayidx.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.2 = add nuw nsw i64 %indvars.iv.next.i3.us.us.2, 1 - %exitcond.not.i.us.us.2 = icmp eq i64 %indvars.iv.next.i.us.us.2, %12 - br i1 %exitcond.not.i.us.us.2, label %if.end.i.us.us.2.loopexit, label %for.body.i.us.us.2, !llvm.loop !19 - -if.end.i.us.us.2.loopexit: ; preds = %for.body.i.us.us.2 - br label %if.end.i.us.us.2 - -if.end.i.us.us.2: ; preds = %if.end.i.us.us.2.loopexit, %pregion_for_entry.entry.i.us.us.2 - %76 = or i64 %_local_id_x.0.us.us.2, 1 - %add1.i.i.us.us.2.1 = add nuw nsw i64 %76, %mul.i.i - %conv.i.us.us.2.1 = trunc i64 %add1.i.i.us.us.2.1 to i32 - %cmp.i.us.us.2.1 = icmp slt i32 %conv.i.us.us.2.1, %2 - br i1 %cmp.i.us.us.2.1, label %if.then.i.us.us.2.1, label %if.end.i.us.us.2.1 - -pregion_for_end.i.us.2.loopexit: ; preds = %if.end.i.us.us.2.1 - br label %pregion_for_end.i.us.2 - -pregion_for_end.i.us.2: ; preds = %pregion_for_end.i.us.2.loopexit, %pregion_for_end.i.us.1 - %77 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.3 = or i32 %77, 3 - %cmp4.i.us.3 = icmp slt i32 %conv2.i.us.3, %1 - %reass.add.i.us.3 = add i32 %mul6.i, %conv2.i.us.3 - %reass.mul.i.us.3 = mul i32 %reass.add.i.us.3, %2 - %78 = sext i32 %reass.mul.i.us.3 to i64 - br i1 %cmp4.i.us.3, label %pregion_for_entry.entry.i.us.us.3.preheader, label %pregion_for_end.i.us.3 - -pregion_for_entry.entry.i.us.us.3.preheader: ; preds = %pregion_for_end.i.us.2 - br label %pregion_for_entry.entry.i.us.us.3 - -pregion_for_entry.entry.i.us.us.3: ; preds = %if.end.i.us.us.3.1, %pregion_for_entry.entry.i.us.us.3.preheader - %_local_id_x.0.us.us.3 = phi i64 [ %176, %if.end.i.us.us.3.1 ], [ 0, %pregion_for_entry.entry.i.us.us.3.preheader ] - %add1.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.0.us.us.3, %mul.i.i - %conv.i.us.us.3 = trunc i64 %add1.i.i.us.us.3 to i32 - %cmp.i.us.us.3 = icmp slt i32 %conv.i.us.us.3, %2 - br i1 %cmp.i.us.us.3, label %if.then.i.us.us.3, label %if.end.i.us.us.3 - -if.then.i.us.us.3: ; preds = %pregion_for_entry.entry.i.us.us.3 - %add8.i.us.us.3 = add nsw i32 %reass.mul.i.us.3, %conv.i.us.us.3 - %idxprom.i.us.us.3 = sext i32 %add8.i.us.us.3 to i64 - %arrayidx.i.us.us.3 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.us.3 - store float 0.000000e+00, float* %arrayidx.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.3 = shl i64 %add1.i.i.us.us.3, 32 - %79 = ashr exact i64 %sext.i.us.us.3, 32 - br label %for.body.i.us.us.3 - -for.body.i.us.us.3: ; preds = %for.body.i.us.us.3, %if.then.i.us.us.3 - %indvars.iv.next.i3.us.us.3 = phi i64 [ %indvars.iv.next.i.us.us.3, %for.body.i.us.us.3 ], [ 0, %if.then.i.us.us.3 ] - %80 = phi float [ %86, %for.body.i.us.us.3 ], [ 0.000000e+00, %if.then.i.us.us.3 ] - %81 = add nsw i64 %indvars.iv.next.i3.us.us.3, %78 - %arrayidx24.i.us.us.3 = getelementptr inbounds float, float* %3, i64 %81 - %82 = load float, float* %arrayidx24.i.us.us.3, align 4, !tbaa !12 - %83 = mul nuw nsw i64 %indvars.iv.next.i3.us.us.3, %12 - %84 = add nsw i64 %83, %79 - %arrayidx28.i.us.us.3 = getelementptr inbounds float, float* %4, i64 %84 - %85 = load float, float* %arrayidx28.i.us.us.3, align 4, !tbaa !12 - %86 = tail call float @llvm.fmuladd.f32(float %82, float %85, float %80) #2 - store float %86, float* %arrayidx.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.3 = add nuw nsw i64 %indvars.iv.next.i3.us.us.3, 1 - %exitcond.not.i.us.us.3 = icmp eq i64 %indvars.iv.next.i.us.us.3, %12 - br i1 %exitcond.not.i.us.us.3, label %if.end.i.us.us.3.loopexit, label %for.body.i.us.us.3, !llvm.loop !19 - -if.end.i.us.us.3.loopexit: ; preds = %for.body.i.us.us.3 - br label %if.end.i.us.us.3 - -if.end.i.us.us.3: ; preds = %if.end.i.us.us.3.loopexit, %pregion_for_entry.entry.i.us.us.3 - %87 = or i64 %_local_id_x.0.us.us.3, 1 - %add1.i.i.us.us.3.1 = add nuw nsw i64 %87, %mul.i.i - %conv.i.us.us.3.1 = trunc i64 %add1.i.i.us.us.3.1 to i32 - %cmp.i.us.us.3.1 = icmp slt i32 %conv.i.us.us.3.1, %2 - br i1 %cmp.i.us.us.3.1, label %if.then.i.us.us.3.1, label %if.end.i.us.us.3.1 - -pregion_for_end.i.us.3.loopexit: ; preds = %if.end.i.us.us.3.1 - br label %pregion_for_end.i.us.3 - -pregion_for_end.i.us.3: ; preds = %pregion_for_end.i.us.3.loopexit, %pregion_for_end.i.us.2 - %88 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.4 = or i32 %88, 4 - %cmp4.i.us.4 = icmp slt i32 %conv2.i.us.4, %1 - %reass.add.i.us.4 = add i32 %mul6.i, %conv2.i.us.4 - %reass.mul.i.us.4 = mul i32 %reass.add.i.us.4, %2 - %89 = sext i32 %reass.mul.i.us.4 to i64 - br i1 %cmp4.i.us.4, label %pregion_for_entry.entry.i.us.us.4.preheader, label %pregion_for_end.i.us.4 - -pregion_for_entry.entry.i.us.us.4.preheader: ; preds = %pregion_for_end.i.us.3 - br label %pregion_for_entry.entry.i.us.us.4 - -pregion_for_entry.entry.i.us.us.4: ; preds = %if.end.i.us.us.4.1, %pregion_for_entry.entry.i.us.us.4.preheader - %_local_id_x.0.us.us.4 = phi i64 [ %167, %if.end.i.us.us.4.1 ], [ 0, %pregion_for_entry.entry.i.us.us.4.preheader ] - %add1.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.0.us.us.4, %mul.i.i - %conv.i.us.us.4 = trunc i64 %add1.i.i.us.us.4 to i32 - %cmp.i.us.us.4 = icmp slt i32 %conv.i.us.us.4, %2 - br i1 %cmp.i.us.us.4, label %if.then.i.us.us.4, label %if.end.i.us.us.4 - -if.then.i.us.us.4: ; preds = %pregion_for_entry.entry.i.us.us.4 - %add8.i.us.us.4 = add nsw i32 %reass.mul.i.us.4, %conv.i.us.us.4 - %idxprom.i.us.us.4 = sext i32 %add8.i.us.us.4 to i64 - %arrayidx.i.us.us.4 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.us.4 - store float 0.000000e+00, float* %arrayidx.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.4 = shl i64 %add1.i.i.us.us.4, 32 - %90 = ashr exact i64 %sext.i.us.us.4, 32 - br label %for.body.i.us.us.4 - -for.body.i.us.us.4: ; preds = %for.body.i.us.us.4, %if.then.i.us.us.4 - %indvars.iv.next.i3.us.us.4 = phi i64 [ %indvars.iv.next.i.us.us.4, %for.body.i.us.us.4 ], [ 0, %if.then.i.us.us.4 ] - %91 = phi float [ %97, %for.body.i.us.us.4 ], [ 0.000000e+00, %if.then.i.us.us.4 ] - %92 = add nsw i64 %indvars.iv.next.i3.us.us.4, %89 - %arrayidx24.i.us.us.4 = getelementptr inbounds float, float* %3, i64 %92 - %93 = load float, float* %arrayidx24.i.us.us.4, align 4, !tbaa !12 - %94 = mul nuw nsw i64 %indvars.iv.next.i3.us.us.4, %12 - %95 = add nsw i64 %94, %90 - %arrayidx28.i.us.us.4 = getelementptr inbounds float, float* %4, i64 %95 - %96 = load float, float* %arrayidx28.i.us.us.4, align 4, !tbaa !12 - %97 = tail call float @llvm.fmuladd.f32(float %93, float %96, float %91) #2 - store float %97, float* %arrayidx.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.4 = add nuw nsw i64 %indvars.iv.next.i3.us.us.4, 1 - %exitcond.not.i.us.us.4 = icmp eq i64 %indvars.iv.next.i.us.us.4, %12 - br i1 %exitcond.not.i.us.us.4, label %if.end.i.us.us.4.loopexit, label %for.body.i.us.us.4, !llvm.loop !19 - -if.end.i.us.us.4.loopexit: ; preds = %for.body.i.us.us.4 - br label %if.end.i.us.us.4 - -if.end.i.us.us.4: ; preds = %if.end.i.us.us.4.loopexit, %pregion_for_entry.entry.i.us.us.4 - %98 = or i64 %_local_id_x.0.us.us.4, 1 - %add1.i.i.us.us.4.1 = add nuw nsw i64 %98, %mul.i.i - %conv.i.us.us.4.1 = trunc i64 %add1.i.i.us.us.4.1 to i32 - %cmp.i.us.us.4.1 = icmp slt i32 %conv.i.us.us.4.1, %2 - br i1 %cmp.i.us.us.4.1, label %if.then.i.us.us.4.1, label %if.end.i.us.us.4.1 - -pregion_for_end.i.us.4.loopexit: ; preds = %if.end.i.us.us.4.1 - br label %pregion_for_end.i.us.4 - -pregion_for_end.i.us.4: ; preds = %pregion_for_end.i.us.4.loopexit, %pregion_for_end.i.us.3 - %99 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.5 = or i32 %99, 5 - %cmp4.i.us.5 = icmp slt i32 %conv2.i.us.5, %1 - %reass.add.i.us.5 = add i32 %mul6.i, %conv2.i.us.5 - %reass.mul.i.us.5 = mul i32 %reass.add.i.us.5, %2 - %100 = sext i32 %reass.mul.i.us.5 to i64 - br i1 %cmp4.i.us.5, label %pregion_for_entry.entry.i.us.us.5.preheader, label %pregion_for_end.i.us.5 - -pregion_for_entry.entry.i.us.us.5.preheader: ; preds = %pregion_for_end.i.us.4 - br label %pregion_for_entry.entry.i.us.us.5 - -pregion_for_entry.entry.i.us.us.5: ; preds = %if.end.i.us.us.5.1, %pregion_for_entry.entry.i.us.us.5.preheader - %_local_id_x.0.us.us.5 = phi i64 [ %158, %if.end.i.us.us.5.1 ], [ 0, %pregion_for_entry.entry.i.us.us.5.preheader ] - %add1.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.0.us.us.5, %mul.i.i - %conv.i.us.us.5 = trunc i64 %add1.i.i.us.us.5 to i32 - %cmp.i.us.us.5 = icmp slt i32 %conv.i.us.us.5, %2 - br i1 %cmp.i.us.us.5, label %if.then.i.us.us.5, label %if.end.i.us.us.5 - -if.then.i.us.us.5: ; preds = %pregion_for_entry.entry.i.us.us.5 - %add8.i.us.us.5 = add nsw i32 %reass.mul.i.us.5, %conv.i.us.us.5 - %idxprom.i.us.us.5 = sext i32 %add8.i.us.us.5 to i64 - %arrayidx.i.us.us.5 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.us.5 - store float 0.000000e+00, float* %arrayidx.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.5 = shl i64 %add1.i.i.us.us.5, 32 - %101 = ashr exact i64 %sext.i.us.us.5, 32 - br label %for.body.i.us.us.5 - -for.body.i.us.us.5: ; preds = %for.body.i.us.us.5, %if.then.i.us.us.5 - %indvars.iv.next.i3.us.us.5 = phi i64 [ %indvars.iv.next.i.us.us.5, %for.body.i.us.us.5 ], [ 0, %if.then.i.us.us.5 ] - %102 = phi float [ %108, %for.body.i.us.us.5 ], [ 0.000000e+00, %if.then.i.us.us.5 ] - %103 = add nsw i64 %indvars.iv.next.i3.us.us.5, %100 - %arrayidx24.i.us.us.5 = getelementptr inbounds float, float* %3, i64 %103 - %104 = load float, float* %arrayidx24.i.us.us.5, align 4, !tbaa !12 - %105 = mul nuw nsw i64 %indvars.iv.next.i3.us.us.5, %12 - %106 = add nsw i64 %105, %101 - %arrayidx28.i.us.us.5 = getelementptr inbounds float, float* %4, i64 %106 - %107 = load float, float* %arrayidx28.i.us.us.5, align 4, !tbaa !12 - %108 = tail call float @llvm.fmuladd.f32(float %104, float %107, float %102) #2 - store float %108, float* %arrayidx.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.5 = add nuw nsw i64 %indvars.iv.next.i3.us.us.5, 1 - %exitcond.not.i.us.us.5 = icmp eq i64 %indvars.iv.next.i.us.us.5, %12 - br i1 %exitcond.not.i.us.us.5, label %if.end.i.us.us.5.loopexit, label %for.body.i.us.us.5, !llvm.loop !19 - -if.end.i.us.us.5.loopexit: ; preds = %for.body.i.us.us.5 - br label %if.end.i.us.us.5 - -if.end.i.us.us.5: ; preds = %if.end.i.us.us.5.loopexit, %pregion_for_entry.entry.i.us.us.5 - %109 = or i64 %_local_id_x.0.us.us.5, 1 - %add1.i.i.us.us.5.1 = add nuw nsw i64 %109, %mul.i.i - %conv.i.us.us.5.1 = trunc i64 %add1.i.i.us.us.5.1 to i32 - %cmp.i.us.us.5.1 = icmp slt i32 %conv.i.us.us.5.1, %2 - br i1 %cmp.i.us.us.5.1, label %if.then.i.us.us.5.1, label %if.end.i.us.us.5.1 - -pregion_for_end.i.us.5.loopexit: ; preds = %if.end.i.us.us.5.1 - br label %pregion_for_end.i.us.5 - -pregion_for_end.i.us.5: ; preds = %pregion_for_end.i.us.5.loopexit, %pregion_for_end.i.us.4 - %110 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.6 = or i32 %110, 6 - %cmp4.i.us.6 = icmp slt i32 %conv2.i.us.6, %1 - %reass.add.i.us.6 = add i32 %mul6.i, %conv2.i.us.6 - %reass.mul.i.us.6 = mul i32 %reass.add.i.us.6, %2 - %111 = sext i32 %reass.mul.i.us.6 to i64 - br i1 %cmp4.i.us.6, label %pregion_for_entry.entry.i.us.us.6.preheader, label %pregion_for_end.i.us.6 - -pregion_for_entry.entry.i.us.us.6.preheader: ; preds = %pregion_for_end.i.us.5 - br label %pregion_for_entry.entry.i.us.us.6 - -pregion_for_entry.entry.i.us.us.6: ; preds = %if.end.i.us.us.6.1, %pregion_for_entry.entry.i.us.us.6.preheader - %_local_id_x.0.us.us.6 = phi i64 [ %149, %if.end.i.us.us.6.1 ], [ 0, %pregion_for_entry.entry.i.us.us.6.preheader ] - %add1.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.0.us.us.6, %mul.i.i - %conv.i.us.us.6 = trunc i64 %add1.i.i.us.us.6 to i32 - %cmp.i.us.us.6 = icmp slt i32 %conv.i.us.us.6, %2 - br i1 %cmp.i.us.us.6, label %if.then.i.us.us.6, label %if.end.i.us.us.6 - -if.then.i.us.us.6: ; preds = %pregion_for_entry.entry.i.us.us.6 - %add8.i.us.us.6 = add nsw i32 %reass.mul.i.us.6, %conv.i.us.us.6 - %idxprom.i.us.us.6 = sext i32 %add8.i.us.us.6 to i64 - %arrayidx.i.us.us.6 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.us.6 - store float 0.000000e+00, float* %arrayidx.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.6 = shl i64 %add1.i.i.us.us.6, 32 - %112 = ashr exact i64 %sext.i.us.us.6, 32 - br label %for.body.i.us.us.6 - -for.body.i.us.us.6: ; preds = %for.body.i.us.us.6, %if.then.i.us.us.6 - %indvars.iv.next.i3.us.us.6 = phi i64 [ %indvars.iv.next.i.us.us.6, %for.body.i.us.us.6 ], [ 0, %if.then.i.us.us.6 ] - %113 = phi float [ %119, %for.body.i.us.us.6 ], [ 0.000000e+00, %if.then.i.us.us.6 ] - %114 = add nsw i64 %indvars.iv.next.i3.us.us.6, %111 - %arrayidx24.i.us.us.6 = getelementptr inbounds float, float* %3, i64 %114 - %115 = load float, float* %arrayidx24.i.us.us.6, align 4, !tbaa !12 - %116 = mul nuw nsw i64 %indvars.iv.next.i3.us.us.6, %12 - %117 = add nsw i64 %116, %112 - %arrayidx28.i.us.us.6 = getelementptr inbounds float, float* %4, i64 %117 - %118 = load float, float* %arrayidx28.i.us.us.6, align 4, !tbaa !12 - %119 = tail call float @llvm.fmuladd.f32(float %115, float %118, float %113) #2 - store float %119, float* %arrayidx.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.6 = add nuw nsw i64 %indvars.iv.next.i3.us.us.6, 1 - %exitcond.not.i.us.us.6 = icmp eq i64 %indvars.iv.next.i.us.us.6, %12 - br i1 %exitcond.not.i.us.us.6, label %if.end.i.us.us.6.loopexit, label %for.body.i.us.us.6, !llvm.loop !19 - -if.end.i.us.us.6.loopexit: ; preds = %for.body.i.us.us.6 - br label %if.end.i.us.us.6 - -if.end.i.us.us.6: ; preds = %if.end.i.us.us.6.loopexit, %pregion_for_entry.entry.i.us.us.6 - %120 = or i64 %_local_id_x.0.us.us.6, 1 - %add1.i.i.us.us.6.1 = add nuw nsw i64 %120, %mul.i.i - %conv.i.us.us.6.1 = trunc i64 %add1.i.i.us.us.6.1 to i32 - %cmp.i.us.us.6.1 = icmp slt i32 %conv.i.us.us.6.1, %2 - br i1 %cmp.i.us.us.6.1, label %if.then.i.us.us.6.1, label %if.end.i.us.us.6.1 - -pregion_for_end.i.us.6.loopexit: ; preds = %if.end.i.us.us.6.1 - br label %pregion_for_end.i.us.6 - -pregion_for_end.i.us.6: ; preds = %pregion_for_end.i.us.6.loopexit, %pregion_for_end.i.us.5 - %121 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.7 = or i32 %121, 7 - %cmp4.i.us.7 = icmp slt i32 %conv2.i.us.7, %1 - %reass.add.i.us.7 = add i32 %mul6.i, %conv2.i.us.7 - %reass.mul.i.us.7 = mul i32 %reass.add.i.us.7, %2 - %122 = sext i32 %reass.mul.i.us.7 to i64 - br i1 %cmp4.i.us.7, label %pregion_for_entry.entry.i.us.us.7.preheader, label %doitgen_kernel1.exit - -pregion_for_entry.entry.i.us.us.7.preheader: ; preds = %pregion_for_end.i.us.6 - br label %pregion_for_entry.entry.i.us.us.7 - -pregion_for_entry.entry.i.us.us.7: ; preds = %if.end.i.us.us.7.1, %pregion_for_entry.entry.i.us.us.7.preheader - %_local_id_x.0.us.us.7 = phi i64 [ %140, %if.end.i.us.us.7.1 ], [ 0, %pregion_for_entry.entry.i.us.us.7.preheader ] - %add1.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.0.us.us.7, %mul.i.i - %conv.i.us.us.7 = trunc i64 %add1.i.i.us.us.7 to i32 - %cmp.i.us.us.7 = icmp slt i32 %conv.i.us.us.7, %2 - br i1 %cmp.i.us.us.7, label %if.then.i.us.us.7, label %if.end.i.us.us.7 - -if.then.i.us.us.7: ; preds = %pregion_for_entry.entry.i.us.us.7 - %add8.i.us.us.7 = add nsw i32 %reass.mul.i.us.7, %conv.i.us.us.7 - %idxprom.i.us.us.7 = sext i32 %add8.i.us.us.7 to i64 - %arrayidx.i.us.us.7 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.us.7 - store float 0.000000e+00, float* %arrayidx.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.7 = shl i64 %add1.i.i.us.us.7, 32 - %123 = ashr exact i64 %sext.i.us.us.7, 32 - br label %for.body.i.us.us.7 - -for.body.i.us.us.7: ; preds = %for.body.i.us.us.7, %if.then.i.us.us.7 - %indvars.iv.next.i3.us.us.7 = phi i64 [ %indvars.iv.next.i.us.us.7, %for.body.i.us.us.7 ], [ 0, %if.then.i.us.us.7 ] - %124 = phi float [ %130, %for.body.i.us.us.7 ], [ 0.000000e+00, %if.then.i.us.us.7 ] - %125 = add nsw i64 %indvars.iv.next.i3.us.us.7, %122 - %arrayidx24.i.us.us.7 = getelementptr inbounds float, float* %3, i64 %125 - %126 = load float, float* %arrayidx24.i.us.us.7, align 4, !tbaa !12 - %127 = mul nuw nsw i64 %indvars.iv.next.i3.us.us.7, %12 - %128 = add nsw i64 %127, %123 - %arrayidx28.i.us.us.7 = getelementptr inbounds float, float* %4, i64 %128 - %129 = load float, float* %arrayidx28.i.us.us.7, align 4, !tbaa !12 - %130 = tail call float @llvm.fmuladd.f32(float %126, float %129, float %124) #2 - store float %130, float* %arrayidx.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.7 = add nuw nsw i64 %indvars.iv.next.i3.us.us.7, 1 - %exitcond.not.i.us.us.7 = icmp eq i64 %indvars.iv.next.i.us.us.7, %12 - br i1 %exitcond.not.i.us.us.7, label %if.end.i.us.us.7.loopexit, label %for.body.i.us.us.7, !llvm.loop !19 - -if.end.i.us.us.7.loopexit: ; preds = %for.body.i.us.us.7 - br label %if.end.i.us.us.7 - -if.end.i.us.us.7: ; preds = %if.end.i.us.us.7.loopexit, %pregion_for_entry.entry.i.us.us.7 - %131 = or i64 %_local_id_x.0.us.us.7, 1 - %add1.i.i.us.us.7.1 = add nuw nsw i64 %131, %mul.i.i - %conv.i.us.us.7.1 = trunc i64 %add1.i.i.us.us.7.1 to i32 - %cmp.i.us.us.7.1 = icmp slt i32 %conv.i.us.us.7.1, %2 - br i1 %cmp.i.us.us.7.1, label %if.then.i.us.us.7.1, label %if.end.i.us.us.7.1 - -if.then.i.us.1: ; preds = %if.end.i.us - %add8.i.us.1 = add nsw i32 %reass.mul.i, %conv.i.us.1 - %idxprom.i.us.1 = sext i32 %add8.i.us.1 to i64 - %arrayidx.i.us.1 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.1 - store float 0.000000e+00, float* %arrayidx.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.1 - -if.end.i.us.1: ; preds = %if.then.i.us.1, %if.end.i.us - br i1 %cmp.i.us.2, label %if.then.i.us.2, label %if.end.i.us.2 - -if.then.i.us.2: ; preds = %if.end.i.us.1 - %add8.i.us.2 = add nsw i32 %reass.mul.i, %conv.i.us.2 - %idxprom.i.us.2 = sext i32 %add8.i.us.2 to i64 - %arrayidx.i.us.2 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.2 - store float 0.000000e+00, float* %arrayidx.i.us.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.2 - -if.end.i.us.2: ; preds = %if.then.i.us.2, %if.end.i.us.1 - br i1 %cmp.i.us.3, label %if.then.i.us.3, label %if.end.i.us.3 - -if.then.i.us.3: ; preds = %if.end.i.us.2 - %add8.i.us.3 = add nsw i32 %reass.mul.i, %conv.i.us.3 - %idxprom.i.us.3 = sext i32 %add8.i.us.3 to i64 - %arrayidx.i.us.3 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.3 - store float 0.000000e+00, float* %arrayidx.i.us.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.3 - -if.end.i.us.3: ; preds = %if.then.i.us.3, %if.end.i.us.2 - br i1 %cmp.i.us.4, label %if.then.i.us.4, label %if.end.i.us.4 - -if.then.i.us.4: ; preds = %if.end.i.us.3 - %add8.i.us.4 = add nsw i32 %reass.mul.i, %conv.i.us.4 - %idxprom.i.us.4 = sext i32 %add8.i.us.4 to i64 - %arrayidx.i.us.4 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.4 - store float 0.000000e+00, float* %arrayidx.i.us.4, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.4 - -if.end.i.us.4: ; preds = %if.then.i.us.4, %if.end.i.us.3 - br i1 %cmp.i.us.5, label %if.then.i.us.5, label %if.end.i.us.5 - -if.then.i.us.5: ; preds = %if.end.i.us.4 - %add8.i.us.5 = add nsw i32 %reass.mul.i, %conv.i.us.5 - %idxprom.i.us.5 = sext i32 %add8.i.us.5 to i64 - %arrayidx.i.us.5 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.5 - store float 0.000000e+00, float* %arrayidx.i.us.5, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.5 - -if.end.i.us.5: ; preds = %if.then.i.us.5, %if.end.i.us.4 - br i1 %cmp.i.us.6, label %if.then.i.us.6, label %if.end.i.us.6 - -if.then.i.us.6: ; preds = %if.end.i.us.5 - %add8.i.us.6 = add nsw i32 %reass.mul.i, %conv.i.us.6 - %idxprom.i.us.6 = sext i32 %add8.i.us.6 to i64 - %arrayidx.i.us.6 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.6 - store float 0.000000e+00, float* %arrayidx.i.us.6, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.6 - -if.end.i.us.6: ; preds = %if.then.i.us.6, %if.end.i.us.5 - br i1 %cmp.i.us.7, label %if.then.i.us.7, label %if.end.i.us.7 - -if.then.i.us.7: ; preds = %if.end.i.us.6 - %add8.i.us.7 = add nsw i32 %reass.mul.i, %conv.i.us.7 - %idxprom.i.us.7 = sext i32 %add8.i.us.7 to i64 - %arrayidx.i.us.7 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.7 - store float 0.000000e+00, float* %arrayidx.i.us.7, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.7 - -if.end.i.us.7: ; preds = %if.then.i.us.7, %if.end.i.us.6 - br i1 %cmp.i.us.8, label %if.then.i.us.8, label %if.end.i.us.8 - -if.then.i.us.8: ; preds = %if.end.i.us.7 - %add8.i.us.8 = add nsw i32 %reass.mul.i, %conv.i.us.8 - %idxprom.i.us.8 = sext i32 %add8.i.us.8 to i64 - %arrayidx.i.us.8 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.8 - store float 0.000000e+00, float* %arrayidx.i.us.8, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.8 - -if.end.i.us.8: ; preds = %if.then.i.us.8, %if.end.i.us.7 - br i1 %cmp.i.us.9, label %if.then.i.us.9, label %if.end.i.us.9 - -if.then.i.us.9: ; preds = %if.end.i.us.8 - %add8.i.us.9 = add nsw i32 %reass.mul.i, %conv.i.us.9 - %idxprom.i.us.9 = sext i32 %add8.i.us.9 to i64 - %arrayidx.i.us.9 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.9 - store float 0.000000e+00, float* %arrayidx.i.us.9, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.9 - -if.end.i.us.9: ; preds = %if.then.i.us.9, %if.end.i.us.8 - br i1 %cmp.i.us.10, label %if.then.i.us.10, label %if.end.i.us.10 - -if.then.i.us.10: ; preds = %if.end.i.us.9 - %add8.i.us.10 = add nsw i32 %reass.mul.i, %conv.i.us.10 - %idxprom.i.us.10 = sext i32 %add8.i.us.10 to i64 - %arrayidx.i.us.10 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.10 - store float 0.000000e+00, float* %arrayidx.i.us.10, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.10 - -if.end.i.us.10: ; preds = %if.then.i.us.10, %if.end.i.us.9 - br i1 %cmp.i.us.11, label %if.then.i.us.11, label %if.end.i.us.11 - -if.then.i.us.11: ; preds = %if.end.i.us.10 - %add8.i.us.11 = add nsw i32 %reass.mul.i, %conv.i.us.11 - %idxprom.i.us.11 = sext i32 %add8.i.us.11 to i64 - %arrayidx.i.us.11 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.11 - store float 0.000000e+00, float* %arrayidx.i.us.11, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.11 - -if.end.i.us.11: ; preds = %if.then.i.us.11, %if.end.i.us.10 - br i1 %cmp.i.us.12, label %if.then.i.us.12, label %if.end.i.us.12 - -if.then.i.us.12: ; preds = %if.end.i.us.11 - %add8.i.us.12 = add nsw i32 %reass.mul.i, %conv.i.us.12 - %idxprom.i.us.12 = sext i32 %add8.i.us.12 to i64 - %arrayidx.i.us.12 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.12 - store float 0.000000e+00, float* %arrayidx.i.us.12, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.12 - -if.end.i.us.12: ; preds = %if.then.i.us.12, %if.end.i.us.11 - br i1 %cmp.i.us.13, label %if.then.i.us.13, label %if.end.i.us.13 - -if.then.i.us.13: ; preds = %if.end.i.us.12 - %add8.i.us.13 = add nsw i32 %reass.mul.i, %conv.i.us.13 - %idxprom.i.us.13 = sext i32 %add8.i.us.13 to i64 - %arrayidx.i.us.13 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.13 - store float 0.000000e+00, float* %arrayidx.i.us.13, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.13 - -if.end.i.us.13: ; preds = %if.then.i.us.13, %if.end.i.us.12 - br i1 %cmp.i.us.14, label %if.then.i.us.14, label %if.end.i.us.14 - -if.then.i.us.14: ; preds = %if.end.i.us.13 - %add8.i.us.14 = add nsw i32 %reass.mul.i, %conv.i.us.14 - %idxprom.i.us.14 = sext i32 %add8.i.us.14 to i64 - %arrayidx.i.us.14 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.14 - store float 0.000000e+00, float* %arrayidx.i.us.14, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.14 - -if.end.i.us.14: ; preds = %if.then.i.us.14, %if.end.i.us.13 - br i1 %cmp.i.us.15, label %if.then.i.us.15, label %if.end.i.us.15 - -if.then.i.us.15: ; preds = %if.end.i.us.14 - %add8.i.us.15 = add nsw i32 %reass.mul.i, %conv.i.us.15 - %idxprom.i.us.15 = sext i32 %add8.i.us.15 to i64 - %arrayidx.i.us.15 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.15 - store float 0.000000e+00, float* %arrayidx.i.us.15, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.15 - -if.end.i.us.15: ; preds = %if.then.i.us.15, %if.end.i.us.14 - br i1 %cmp.i.us.16, label %if.then.i.us.16, label %if.end.i.us.16 - -if.then.i.us.16: ; preds = %if.end.i.us.15 - %add8.i.us.16 = add nsw i32 %reass.mul.i, %conv.i.us.16 - %idxprom.i.us.16 = sext i32 %add8.i.us.16 to i64 - %arrayidx.i.us.16 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.16 - store float 0.000000e+00, float* %arrayidx.i.us.16, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.16 - -if.end.i.us.16: ; preds = %if.then.i.us.16, %if.end.i.us.15 - br i1 %cmp.i.us.17, label %if.then.i.us.17, label %if.end.i.us.17 - -if.then.i.us.17: ; preds = %if.end.i.us.16 - %add8.i.us.17 = add nsw i32 %reass.mul.i, %conv.i.us.17 - %idxprom.i.us.17 = sext i32 %add8.i.us.17 to i64 - %arrayidx.i.us.17 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.17 - store float 0.000000e+00, float* %arrayidx.i.us.17, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.17 - -if.end.i.us.17: ; preds = %if.then.i.us.17, %if.end.i.us.16 - br i1 %cmp.i.us.18, label %if.then.i.us.18, label %if.end.i.us.18 - -if.then.i.us.18: ; preds = %if.end.i.us.17 - %add8.i.us.18 = add nsw i32 %reass.mul.i, %conv.i.us.18 - %idxprom.i.us.18 = sext i32 %add8.i.us.18 to i64 - %arrayidx.i.us.18 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.18 - store float 0.000000e+00, float* %arrayidx.i.us.18, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.18 - -if.end.i.us.18: ; preds = %if.then.i.us.18, %if.end.i.us.17 - br i1 %cmp.i.us.19, label %if.then.i.us.19, label %if.end.i.us.19 - -if.then.i.us.19: ; preds = %if.end.i.us.18 - %add8.i.us.19 = add nsw i32 %reass.mul.i, %conv.i.us.19 - %idxprom.i.us.19 = sext i32 %add8.i.us.19 to i64 - %arrayidx.i.us.19 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.19 - store float 0.000000e+00, float* %arrayidx.i.us.19, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.19 - -if.end.i.us.19: ; preds = %if.then.i.us.19, %if.end.i.us.18 - br i1 %cmp.i.us.20, label %if.then.i.us.20, label %if.end.i.us.20 - -if.then.i.us.20: ; preds = %if.end.i.us.19 - %add8.i.us.20 = add nsw i32 %reass.mul.i, %conv.i.us.20 - %idxprom.i.us.20 = sext i32 %add8.i.us.20 to i64 - %arrayidx.i.us.20 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.20 - store float 0.000000e+00, float* %arrayidx.i.us.20, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.20 - -if.end.i.us.20: ; preds = %if.then.i.us.20, %if.end.i.us.19 - br i1 %cmp.i.us.21, label %if.then.i.us.21, label %if.end.i.us.21 - -if.then.i.us.21: ; preds = %if.end.i.us.20 - %add8.i.us.21 = add nsw i32 %reass.mul.i, %conv.i.us.21 - %idxprom.i.us.21 = sext i32 %add8.i.us.21 to i64 - %arrayidx.i.us.21 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.21 - store float 0.000000e+00, float* %arrayidx.i.us.21, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.21 - -if.end.i.us.21: ; preds = %if.then.i.us.21, %if.end.i.us.20 - br i1 %cmp.i.us.22, label %if.then.i.us.22, label %if.end.i.us.22 - -if.then.i.us.22: ; preds = %if.end.i.us.21 - %add8.i.us.22 = add nsw i32 %reass.mul.i, %conv.i.us.22 - %idxprom.i.us.22 = sext i32 %add8.i.us.22 to i64 - %arrayidx.i.us.22 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.22 - store float 0.000000e+00, float* %arrayidx.i.us.22, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.22 - -if.end.i.us.22: ; preds = %if.then.i.us.22, %if.end.i.us.21 - br i1 %cmp.i.us.23, label %if.then.i.us.23, label %if.end.i.us.23 - -if.then.i.us.23: ; preds = %if.end.i.us.22 - %add8.i.us.23 = add nsw i32 %reass.mul.i, %conv.i.us.23 - %idxprom.i.us.23 = sext i32 %add8.i.us.23 to i64 - %arrayidx.i.us.23 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.23 - store float 0.000000e+00, float* %arrayidx.i.us.23, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.23 - -if.end.i.us.23: ; preds = %if.then.i.us.23, %if.end.i.us.22 - br i1 %cmp.i.us.24, label %if.then.i.us.24, label %if.end.i.us.24 - -if.then.i.us.24: ; preds = %if.end.i.us.23 - %add8.i.us.24 = add nsw i32 %reass.mul.i, %conv.i.us.24 - %idxprom.i.us.24 = sext i32 %add8.i.us.24 to i64 - %arrayidx.i.us.24 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.24 - store float 0.000000e+00, float* %arrayidx.i.us.24, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.24 - -if.end.i.us.24: ; preds = %if.then.i.us.24, %if.end.i.us.23 - br i1 %cmp.i.us.25, label %if.then.i.us.25, label %if.end.i.us.25 - -if.then.i.us.25: ; preds = %if.end.i.us.24 - %add8.i.us.25 = add nsw i32 %reass.mul.i, %conv.i.us.25 - %idxprom.i.us.25 = sext i32 %add8.i.us.25 to i64 - %arrayidx.i.us.25 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.25 - store float 0.000000e+00, float* %arrayidx.i.us.25, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.25 - -if.end.i.us.25: ; preds = %if.then.i.us.25, %if.end.i.us.24 - br i1 %cmp.i.us.26, label %if.then.i.us.26, label %if.end.i.us.26 - -if.then.i.us.26: ; preds = %if.end.i.us.25 - %add8.i.us.26 = add nsw i32 %reass.mul.i, %conv.i.us.26 - %idxprom.i.us.26 = sext i32 %add8.i.us.26 to i64 - %arrayidx.i.us.26 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.26 - store float 0.000000e+00, float* %arrayidx.i.us.26, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.26 - -if.end.i.us.26: ; preds = %if.then.i.us.26, %if.end.i.us.25 - br i1 %cmp.i.us.27, label %if.then.i.us.27, label %if.end.i.us.27 - -if.then.i.us.27: ; preds = %if.end.i.us.26 - %add8.i.us.27 = add nsw i32 %reass.mul.i, %conv.i.us.27 - %idxprom.i.us.27 = sext i32 %add8.i.us.27 to i64 - %arrayidx.i.us.27 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.27 - store float 0.000000e+00, float* %arrayidx.i.us.27, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.27 - -if.end.i.us.27: ; preds = %if.then.i.us.27, %if.end.i.us.26 - br i1 %cmp.i.us.28, label %if.then.i.us.28, label %if.end.i.us.28 - -if.then.i.us.28: ; preds = %if.end.i.us.27 - %add8.i.us.28 = add nsw i32 %reass.mul.i, %conv.i.us.28 - %idxprom.i.us.28 = sext i32 %add8.i.us.28 to i64 - %arrayidx.i.us.28 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.28 - store float 0.000000e+00, float* %arrayidx.i.us.28, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.28 - -if.end.i.us.28: ; preds = %if.then.i.us.28, %if.end.i.us.27 - br i1 %cmp.i.us.29, label %if.then.i.us.29, label %if.end.i.us.29 - -if.then.i.us.29: ; preds = %if.end.i.us.28 - %add8.i.us.29 = add nsw i32 %reass.mul.i, %conv.i.us.29 - %idxprom.i.us.29 = sext i32 %add8.i.us.29 to i64 - %arrayidx.i.us.29 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.29 - store float 0.000000e+00, float* %arrayidx.i.us.29, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.29 - -if.end.i.us.29: ; preds = %if.then.i.us.29, %if.end.i.us.28 - br i1 %cmp.i.us.30, label %if.then.i.us.30, label %if.end.i.us.30 - -if.then.i.us.30: ; preds = %if.end.i.us.29 - %add8.i.us.30 = add nsw i32 %reass.mul.i, %conv.i.us.30 - %idxprom.i.us.30 = sext i32 %add8.i.us.30 to i64 - %arrayidx.i.us.30 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.30 - store float 0.000000e+00, float* %arrayidx.i.us.30, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.30 - -if.end.i.us.30: ; preds = %if.then.i.us.30, %if.end.i.us.29 - br i1 %cmp.i.us.31, label %if.then.i.us.31, label %pregion_for_end.i - -if.then.i.us.31: ; preds = %if.end.i.us.30 - %add8.i.us.31 = add nsw i32 %reass.mul.i, %conv.i.us.31 - %idxprom.i.us.31 = sext i32 %add8.i.us.31 to i64 - %arrayidx.i.us.31 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.31 - store float 0.000000e+00, float* %arrayidx.i.us.31, align 4, !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i - -if.then.i.us.us.7.1: ; preds = %if.end.i.us.us.7 - %add8.i.us.us.7.1 = add nsw i32 %reass.mul.i.us.7, %conv.i.us.us.7.1 - %idxprom.i.us.us.7.1 = sext i32 %add8.i.us.us.7.1 to i64 - %arrayidx.i.us.us.7.1 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.us.7.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.7.1 = shl i64 %add1.i.i.us.us.7.1, 32 - %132 = ashr exact i64 %sext.i.us.us.7.1, 32 - br label %for.body.i.us.us.7.1 - -for.body.i.us.us.7.1: ; preds = %for.body.i.us.us.7.1, %if.then.i.us.us.7.1 - %indvars.iv.next.i3.us.us.7.1 = phi i64 [ %indvars.iv.next.i.us.us.7.1, %for.body.i.us.us.7.1 ], [ 0, %if.then.i.us.us.7.1 ] - %133 = phi float [ %139, %for.body.i.us.us.7.1 ], [ 0.000000e+00, %if.then.i.us.us.7.1 ] - %134 = add nsw i64 %indvars.iv.next.i3.us.us.7.1, %122 - %arrayidx24.i.us.us.7.1 = getelementptr inbounds float, float* %3, i64 %134 - %135 = load float, float* %arrayidx24.i.us.us.7.1, align 4, !tbaa !12 - %136 = mul nuw nsw i64 %indvars.iv.next.i3.us.us.7.1, %12 - %137 = add nsw i64 %136, %132 - %arrayidx28.i.us.us.7.1 = getelementptr inbounds float, float* %4, i64 %137 - %138 = load float, float* %arrayidx28.i.us.us.7.1, align 4, !tbaa !12 - %139 = tail call float @llvm.fmuladd.f32(float %135, float %138, float %133) #2 - store float %139, float* %arrayidx.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.7.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.7.1, 1 - %exitcond.not.i.us.us.7.1 = icmp eq i64 %indvars.iv.next.i.us.us.7.1, %12 - br i1 %exitcond.not.i.us.us.7.1, label %if.end.i.us.us.7.1.loopexit, label %for.body.i.us.us.7.1, !llvm.loop !19 - -if.end.i.us.us.7.1.loopexit: ; preds = %for.body.i.us.us.7.1 - br label %if.end.i.us.us.7.1 - -if.end.i.us.us.7.1: ; preds = %if.end.i.us.us.7.1.loopexit, %if.end.i.us.us.7 - %140 = add nuw nsw i64 %_local_id_x.0.us.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %140, 32 - br i1 %exitcond.7.not.1, label %doitgen_kernel1.exit.loopexit, label %pregion_for_entry.entry.i.us.us.7, !llvm.loop !23 - -if.then.i.us.us.6.1: ; preds = %if.end.i.us.us.6 - %add8.i.us.us.6.1 = add nsw i32 %reass.mul.i.us.6, %conv.i.us.us.6.1 - %idxprom.i.us.us.6.1 = sext i32 %add8.i.us.us.6.1 to i64 - %arrayidx.i.us.us.6.1 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.us.6.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.6.1 = shl i64 %add1.i.i.us.us.6.1, 32 - %141 = ashr exact i64 %sext.i.us.us.6.1, 32 - br label %for.body.i.us.us.6.1 - -for.body.i.us.us.6.1: ; preds = %for.body.i.us.us.6.1, %if.then.i.us.us.6.1 - %indvars.iv.next.i3.us.us.6.1 = phi i64 [ %indvars.iv.next.i.us.us.6.1, %for.body.i.us.us.6.1 ], [ 0, %if.then.i.us.us.6.1 ] - %142 = phi float [ %148, %for.body.i.us.us.6.1 ], [ 0.000000e+00, %if.then.i.us.us.6.1 ] - %143 = add nsw i64 %indvars.iv.next.i3.us.us.6.1, %111 - %arrayidx24.i.us.us.6.1 = getelementptr inbounds float, float* %3, i64 %143 - %144 = load float, float* %arrayidx24.i.us.us.6.1, align 4, !tbaa !12 - %145 = mul nuw nsw i64 %indvars.iv.next.i3.us.us.6.1, %12 - %146 = add nsw i64 %145, %141 - %arrayidx28.i.us.us.6.1 = getelementptr inbounds float, float* %4, i64 %146 - %147 = load float, float* %arrayidx28.i.us.us.6.1, align 4, !tbaa !12 - %148 = tail call float @llvm.fmuladd.f32(float %144, float %147, float %142) #2 - store float %148, float* %arrayidx.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.6.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.6.1, 1 - %exitcond.not.i.us.us.6.1 = icmp eq i64 %indvars.iv.next.i.us.us.6.1, %12 - br i1 %exitcond.not.i.us.us.6.1, label %if.end.i.us.us.6.1.loopexit, label %for.body.i.us.us.6.1, !llvm.loop !19 - -if.end.i.us.us.6.1.loopexit: ; preds = %for.body.i.us.us.6.1 - br label %if.end.i.us.us.6.1 - -if.end.i.us.us.6.1: ; preds = %if.end.i.us.us.6.1.loopexit, %if.end.i.us.us.6 - %149 = add nuw nsw i64 %_local_id_x.0.us.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %149, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.us.6.loopexit, label %pregion_for_entry.entry.i.us.us.6, !llvm.loop !23 - -if.then.i.us.us.5.1: ; preds = %if.end.i.us.us.5 - %add8.i.us.us.5.1 = add nsw i32 %reass.mul.i.us.5, %conv.i.us.us.5.1 - %idxprom.i.us.us.5.1 = sext i32 %add8.i.us.us.5.1 to i64 - %arrayidx.i.us.us.5.1 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.us.5.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.5.1 = shl i64 %add1.i.i.us.us.5.1, 32 - %150 = ashr exact i64 %sext.i.us.us.5.1, 32 - br label %for.body.i.us.us.5.1 - -for.body.i.us.us.5.1: ; preds = %for.body.i.us.us.5.1, %if.then.i.us.us.5.1 - %indvars.iv.next.i3.us.us.5.1 = phi i64 [ %indvars.iv.next.i.us.us.5.1, %for.body.i.us.us.5.1 ], [ 0, %if.then.i.us.us.5.1 ] - %151 = phi float [ %157, %for.body.i.us.us.5.1 ], [ 0.000000e+00, %if.then.i.us.us.5.1 ] - %152 = add nsw i64 %indvars.iv.next.i3.us.us.5.1, %100 - %arrayidx24.i.us.us.5.1 = getelementptr inbounds float, float* %3, i64 %152 - %153 = load float, float* %arrayidx24.i.us.us.5.1, align 4, !tbaa !12 - %154 = mul nuw nsw i64 %indvars.iv.next.i3.us.us.5.1, %12 - %155 = add nsw i64 %154, %150 - %arrayidx28.i.us.us.5.1 = getelementptr inbounds float, float* %4, i64 %155 - %156 = load float, float* %arrayidx28.i.us.us.5.1, align 4, !tbaa !12 - %157 = tail call float @llvm.fmuladd.f32(float %153, float %156, float %151) #2 - store float %157, float* %arrayidx.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.5.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.5.1, 1 - %exitcond.not.i.us.us.5.1 = icmp eq i64 %indvars.iv.next.i.us.us.5.1, %12 - br i1 %exitcond.not.i.us.us.5.1, label %if.end.i.us.us.5.1.loopexit, label %for.body.i.us.us.5.1, !llvm.loop !19 - -if.end.i.us.us.5.1.loopexit: ; preds = %for.body.i.us.us.5.1 - br label %if.end.i.us.us.5.1 - -if.end.i.us.us.5.1: ; preds = %if.end.i.us.us.5.1.loopexit, %if.end.i.us.us.5 - %158 = add nuw nsw i64 %_local_id_x.0.us.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %158, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.us.5.loopexit, label %pregion_for_entry.entry.i.us.us.5, !llvm.loop !23 - -if.then.i.us.us.4.1: ; preds = %if.end.i.us.us.4 - %add8.i.us.us.4.1 = add nsw i32 %reass.mul.i.us.4, %conv.i.us.us.4.1 - %idxprom.i.us.us.4.1 = sext i32 %add8.i.us.us.4.1 to i64 - %arrayidx.i.us.us.4.1 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.us.4.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.4.1 = shl i64 %add1.i.i.us.us.4.1, 32 - %159 = ashr exact i64 %sext.i.us.us.4.1, 32 - br label %for.body.i.us.us.4.1 - -for.body.i.us.us.4.1: ; preds = %for.body.i.us.us.4.1, %if.then.i.us.us.4.1 - %indvars.iv.next.i3.us.us.4.1 = phi i64 [ %indvars.iv.next.i.us.us.4.1, %for.body.i.us.us.4.1 ], [ 0, %if.then.i.us.us.4.1 ] - %160 = phi float [ %166, %for.body.i.us.us.4.1 ], [ 0.000000e+00, %if.then.i.us.us.4.1 ] - %161 = add nsw i64 %indvars.iv.next.i3.us.us.4.1, %89 - %arrayidx24.i.us.us.4.1 = getelementptr inbounds float, float* %3, i64 %161 - %162 = load float, float* %arrayidx24.i.us.us.4.1, align 4, !tbaa !12 - %163 = mul nuw nsw i64 %indvars.iv.next.i3.us.us.4.1, %12 - %164 = add nsw i64 %163, %159 - %arrayidx28.i.us.us.4.1 = getelementptr inbounds float, float* %4, i64 %164 - %165 = load float, float* %arrayidx28.i.us.us.4.1, align 4, !tbaa !12 - %166 = tail call float @llvm.fmuladd.f32(float %162, float %165, float %160) #2 - store float %166, float* %arrayidx.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.4.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.4.1, 1 - %exitcond.not.i.us.us.4.1 = icmp eq i64 %indvars.iv.next.i.us.us.4.1, %12 - br i1 %exitcond.not.i.us.us.4.1, label %if.end.i.us.us.4.1.loopexit, label %for.body.i.us.us.4.1, !llvm.loop !19 - -if.end.i.us.us.4.1.loopexit: ; preds = %for.body.i.us.us.4.1 - br label %if.end.i.us.us.4.1 - -if.end.i.us.us.4.1: ; preds = %if.end.i.us.us.4.1.loopexit, %if.end.i.us.us.4 - %167 = add nuw nsw i64 %_local_id_x.0.us.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %167, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.us.4.loopexit, label %pregion_for_entry.entry.i.us.us.4, !llvm.loop !23 - -if.then.i.us.us.3.1: ; preds = %if.end.i.us.us.3 - %add8.i.us.us.3.1 = add nsw i32 %reass.mul.i.us.3, %conv.i.us.us.3.1 - %idxprom.i.us.us.3.1 = sext i32 %add8.i.us.us.3.1 to i64 - %arrayidx.i.us.us.3.1 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.us.3.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.3.1 = shl i64 %add1.i.i.us.us.3.1, 32 - %168 = ashr exact i64 %sext.i.us.us.3.1, 32 - br label %for.body.i.us.us.3.1 - -for.body.i.us.us.3.1: ; preds = %for.body.i.us.us.3.1, %if.then.i.us.us.3.1 - %indvars.iv.next.i3.us.us.3.1 = phi i64 [ %indvars.iv.next.i.us.us.3.1, %for.body.i.us.us.3.1 ], [ 0, %if.then.i.us.us.3.1 ] - %169 = phi float [ %175, %for.body.i.us.us.3.1 ], [ 0.000000e+00, %if.then.i.us.us.3.1 ] - %170 = add nsw i64 %indvars.iv.next.i3.us.us.3.1, %78 - %arrayidx24.i.us.us.3.1 = getelementptr inbounds float, float* %3, i64 %170 - %171 = load float, float* %arrayidx24.i.us.us.3.1, align 4, !tbaa !12 - %172 = mul nuw nsw i64 %indvars.iv.next.i3.us.us.3.1, %12 - %173 = add nsw i64 %172, %168 - %arrayidx28.i.us.us.3.1 = getelementptr inbounds float, float* %4, i64 %173 - %174 = load float, float* %arrayidx28.i.us.us.3.1, align 4, !tbaa !12 - %175 = tail call float @llvm.fmuladd.f32(float %171, float %174, float %169) #2 - store float %175, float* %arrayidx.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.3.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.3.1, 1 - %exitcond.not.i.us.us.3.1 = icmp eq i64 %indvars.iv.next.i.us.us.3.1, %12 - br i1 %exitcond.not.i.us.us.3.1, label %if.end.i.us.us.3.1.loopexit, label %for.body.i.us.us.3.1, !llvm.loop !19 - -if.end.i.us.us.3.1.loopexit: ; preds = %for.body.i.us.us.3.1 - br label %if.end.i.us.us.3.1 - -if.end.i.us.us.3.1: ; preds = %if.end.i.us.us.3.1.loopexit, %if.end.i.us.us.3 - %176 = add nuw nsw i64 %_local_id_x.0.us.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %176, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.us.3.loopexit, label %pregion_for_entry.entry.i.us.us.3, !llvm.loop !23 - -if.then.i.us.us.2.1: ; preds = %if.end.i.us.us.2 - %add8.i.us.us.2.1 = add nsw i32 %reass.mul.i.us.2, %conv.i.us.us.2.1 - %idxprom.i.us.us.2.1 = sext i32 %add8.i.us.us.2.1 to i64 - %arrayidx.i.us.us.2.1 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.us.2.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.2.1 = shl i64 %add1.i.i.us.us.2.1, 32 - %177 = ashr exact i64 %sext.i.us.us.2.1, 32 - br label %for.body.i.us.us.2.1 - -for.body.i.us.us.2.1: ; preds = %for.body.i.us.us.2.1, %if.then.i.us.us.2.1 - %indvars.iv.next.i3.us.us.2.1 = phi i64 [ %indvars.iv.next.i.us.us.2.1, %for.body.i.us.us.2.1 ], [ 0, %if.then.i.us.us.2.1 ] - %178 = phi float [ %184, %for.body.i.us.us.2.1 ], [ 0.000000e+00, %if.then.i.us.us.2.1 ] - %179 = add nsw i64 %indvars.iv.next.i3.us.us.2.1, %67 - %arrayidx24.i.us.us.2.1 = getelementptr inbounds float, float* %3, i64 %179 - %180 = load float, float* %arrayidx24.i.us.us.2.1, align 4, !tbaa !12 - %181 = mul nuw nsw i64 %indvars.iv.next.i3.us.us.2.1, %12 - %182 = add nsw i64 %181, %177 - %arrayidx28.i.us.us.2.1 = getelementptr inbounds float, float* %4, i64 %182 - %183 = load float, float* %arrayidx28.i.us.us.2.1, align 4, !tbaa !12 - %184 = tail call float @llvm.fmuladd.f32(float %180, float %183, float %178) #2 - store float %184, float* %arrayidx.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.2.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.2.1, 1 - %exitcond.not.i.us.us.2.1 = icmp eq i64 %indvars.iv.next.i.us.us.2.1, %12 - br i1 %exitcond.not.i.us.us.2.1, label %if.end.i.us.us.2.1.loopexit, label %for.body.i.us.us.2.1, !llvm.loop !19 - -if.end.i.us.us.2.1.loopexit: ; preds = %for.body.i.us.us.2.1 - br label %if.end.i.us.us.2.1 - -if.end.i.us.us.2.1: ; preds = %if.end.i.us.us.2.1.loopexit, %if.end.i.us.us.2 - %185 = add nuw nsw i64 %_local_id_x.0.us.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %185, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.us.2.loopexit, label %pregion_for_entry.entry.i.us.us.2, !llvm.loop !23 - -if.then.i.us.us.1.1: ; preds = %if.end.i.us.us.1 - %add8.i.us.us.1.1 = add nsw i32 %reass.mul.i.us.1, %conv.i.us.us.1.1 - %idxprom.i.us.us.1.1 = sext i32 %add8.i.us.us.1.1 to i64 - %arrayidx.i.us.us.1.1 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.us.1.1 - store float 0.000000e+00, float* %arrayidx.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.1.1 = shl i64 %add1.i.i.us.us.1.1, 32 - %186 = ashr exact i64 %sext.i.us.us.1.1, 32 - br label %for.body.i.us.us.1.1 - -for.body.i.us.us.1.1: ; preds = %for.body.i.us.us.1.1, %if.then.i.us.us.1.1 - %indvars.iv.next.i3.us.us.1.1 = phi i64 [ %indvars.iv.next.i.us.us.1.1, %for.body.i.us.us.1.1 ], [ 0, %if.then.i.us.us.1.1 ] - %187 = phi float [ %193, %for.body.i.us.us.1.1 ], [ 0.000000e+00, %if.then.i.us.us.1.1 ] - %188 = add nsw i64 %indvars.iv.next.i3.us.us.1.1, %46 - %arrayidx24.i.us.us.1.1 = getelementptr inbounds float, float* %3, i64 %188 - %189 = load float, float* %arrayidx24.i.us.us.1.1, align 4, !tbaa !12 - %190 = mul nuw nsw i64 %indvars.iv.next.i3.us.us.1.1, %12 - %191 = add nsw i64 %190, %186 - %arrayidx28.i.us.us.1.1 = getelementptr inbounds float, float* %4, i64 %191 - %192 = load float, float* %arrayidx28.i.us.us.1.1, align 4, !tbaa !12 - %193 = tail call float @llvm.fmuladd.f32(float %189, float %192, float %187) #2 - store float %193, float* %arrayidx.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.1.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.1.1, 1 - %exitcond.not.i.us.us.1.1 = icmp eq i64 %indvars.iv.next.i.us.us.1.1, %12 - br i1 %exitcond.not.i.us.us.1.1, label %if.end.i.us.us.1.1.loopexit, label %for.body.i.us.us.1.1, !llvm.loop !19 - -if.end.i.us.us.1.1.loopexit: ; preds = %for.body.i.us.us.1.1 - br label %if.end.i.us.us.1.1 - -if.end.i.us.us.1.1: ; preds = %if.end.i.us.us.1.1.loopexit, %if.end.i.us.us.1 - %194 = add nuw nsw i64 %_local_id_x.0.us.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %194, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.us.1.loopexit, label %pregion_for_entry.entry.i.us.us.1, !llvm.loop !23 - -if.then.i.us.us.146: ; preds = %if.end.i.us.us - %add8.i.us.us.142 = add nsw i32 %reass.mul.i.us, %conv.i.us.us.139 - %idxprom.i.us.us.143 = sext i32 %add8.i.us.us.142 to i64 - %arrayidx.i.us.us.144 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.us.143 - store float 0.000000e+00, float* %arrayidx.i.us.us.144, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.145 = shl i64 %add1.i.i.us.us.138, 32 - %195 = ashr exact i64 %sext.i.us.us.145, 32 - br label %for.body.i.us.us.152 - -for.body.i.us.us.152: ; preds = %for.body.i.us.us.152, %if.then.i.us.us.146 - %indvars.iv.next.i3.us.us.147 = phi i64 [ %indvars.iv.next.i.us.us.150, %for.body.i.us.us.152 ], [ 0, %if.then.i.us.us.146 ] - %196 = phi float [ %202, %for.body.i.us.us.152 ], [ 0.000000e+00, %if.then.i.us.us.146 ] - %197 = add nsw i64 %indvars.iv.next.i3.us.us.147, %44 - %arrayidx24.i.us.us.148 = getelementptr inbounds float, float* %3, i64 %197 - %198 = load float, float* %arrayidx24.i.us.us.148, align 4, !tbaa !12 - %199 = mul nuw nsw i64 %indvars.iv.next.i3.us.us.147, %12 - %200 = add nsw i64 %199, %195 - %arrayidx28.i.us.us.149 = getelementptr inbounds float, float* %4, i64 %200 - %201 = load float, float* %arrayidx28.i.us.us.149, align 4, !tbaa !12 - %202 = tail call float @llvm.fmuladd.f32(float %198, float %201, float %196) #2 - store float %202, float* %arrayidx.i.us.us.144, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.150 = add nuw nsw i64 %indvars.iv.next.i3.us.us.147, 1 - %exitcond.not.i.us.us.151 = icmp eq i64 %indvars.iv.next.i.us.us.150, %12 - br i1 %exitcond.not.i.us.us.151, label %if.end.i.us.us.153.loopexit, label %for.body.i.us.us.152, !llvm.loop !19 - -if.end.i.us.us.153.loopexit: ; preds = %for.body.i.us.us.152 - br label %if.end.i.us.us.153 - -if.end.i.us.us.153: ; preds = %if.end.i.us.us.153.loopexit, %if.end.i.us.us - %203 = add nuw nsw i64 %_local_id_x.0.us.us, 2 - %exitcond.not.1 = icmp eq i64 %203, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.us.loopexit, label %pregion_for_entry.entry.i.us.us, !llvm.loop !23 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_doitgen_kernel1_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = getelementptr i8*, i8** %0, i64 1 - %7 = bitcast i8** %6 to i32** - %8 = load i32*, i32** %7, align 8 - %9 = load i32, i32* %8, align 4 - %10 = getelementptr i8*, i8** %0, i64 2 - %11 = bitcast i8** %10 to i32** - %12 = load i32*, i32** %11, align 8 - %13 = load i32, i32* %12, align 4 - %14 = getelementptr i8*, i8** %0, i64 3 - %15 = bitcast i8** %14 to float*** - %16 = load float**, float*** %15, align 8 - %17 = load float*, float** %16, align 8 - %18 = getelementptr i8*, i8** %0, i64 4 - %19 = bitcast i8** %18 to float*** - %20 = load float**, float*** %19, align 8 - %21 = load float*, float** %20, align 8 - %22 = getelementptr i8*, i8** %0, i64 5 - %23 = bitcast i8** %22 to float*** - %24 = load float**, float*** %23, align 8 - %25 = load float*, float** %24, align 8 - %26 = getelementptr i8*, i8** %0, i64 6 - %27 = bitcast i8** %26 to i32** - %28 = load i32*, i32** %27, align 8 - %29 = load i32, i32* %28, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %mul6.i.i = mul i32 %29, %9 - %cmp970.i.i = icmp sgt i32 %13, 0 - %30 = zext i32 %13 to i64 - br i1 %cmp970.i.i, label %pregion_for_entry.pregion_for_init.i.i.us.preheader, label %pregion_for_entry.pregion_for_init.i.i.preheader - -pregion_for_entry.pregion_for_init.i.i.preheader: ; preds = %5 - %conv.i.i.us = trunc i64 %mul.i.i.i to i32 - %cmp.i.i.us = icmp sgt i32 %13, %conv.i.i.us - %31 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.1 = or i32 %31, 1 - %cmp.i.i.us.1 = icmp sgt i32 %13, %conv.i.i.us.1 - %32 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.2 = or i32 %32, 2 - %cmp.i.i.us.2 = icmp sgt i32 %13, %conv.i.i.us.2 - %33 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.3 = or i32 %33, 3 - %cmp.i.i.us.3 = icmp sgt i32 %13, %conv.i.i.us.3 - %34 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.4 = or i32 %34, 4 - %cmp.i.i.us.4 = icmp sgt i32 %13, %conv.i.i.us.4 - %35 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.5 = or i32 %35, 5 - %cmp.i.i.us.5 = icmp sgt i32 %13, %conv.i.i.us.5 - %36 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.6 = or i32 %36, 6 - %cmp.i.i.us.6 = icmp sgt i32 %13, %conv.i.i.us.6 - %37 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.7 = or i32 %37, 7 - %cmp.i.i.us.7 = icmp sgt i32 %13, %conv.i.i.us.7 - %38 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.8 = or i32 %38, 8 - %cmp.i.i.us.8 = icmp sgt i32 %13, %conv.i.i.us.8 - %39 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.9 = or i32 %39, 9 - %cmp.i.i.us.9 = icmp sgt i32 %13, %conv.i.i.us.9 - %40 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.10 = or i32 %40, 10 - %cmp.i.i.us.10 = icmp sgt i32 %13, %conv.i.i.us.10 - %41 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.11 = or i32 %41, 11 - %cmp.i.i.us.11 = icmp sgt i32 %13, %conv.i.i.us.11 - %42 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.12 = or i32 %42, 12 - %cmp.i.i.us.12 = icmp sgt i32 %13, %conv.i.i.us.12 - %43 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.13 = or i32 %43, 13 - %cmp.i.i.us.13 = icmp sgt i32 %13, %conv.i.i.us.13 - %44 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.14 = or i32 %44, 14 - %cmp.i.i.us.14 = icmp sgt i32 %13, %conv.i.i.us.14 - %45 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.15 = or i32 %45, 15 - %cmp.i.i.us.15 = icmp sgt i32 %13, %conv.i.i.us.15 - %46 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.16 = or i32 %46, 16 - %cmp.i.i.us.16 = icmp sgt i32 %13, %conv.i.i.us.16 - %47 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.17 = or i32 %47, 17 - %cmp.i.i.us.17 = icmp sgt i32 %13, %conv.i.i.us.17 - %48 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.18 = or i32 %48, 18 - %cmp.i.i.us.18 = icmp sgt i32 %13, %conv.i.i.us.18 - %49 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.19 = or i32 %49, 19 - %cmp.i.i.us.19 = icmp sgt i32 %13, %conv.i.i.us.19 - %50 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.20 = or i32 %50, 20 - %cmp.i.i.us.20 = icmp sgt i32 %13, %conv.i.i.us.20 - %51 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.21 = or i32 %51, 21 - %cmp.i.i.us.21 = icmp sgt i32 %13, %conv.i.i.us.21 - %52 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.22 = or i32 %52, 22 - %cmp.i.i.us.22 = icmp sgt i32 %13, %conv.i.i.us.22 - %53 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.23 = or i32 %53, 23 - %cmp.i.i.us.23 = icmp sgt i32 %13, %conv.i.i.us.23 - %54 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.24 = or i32 %54, 24 - %cmp.i.i.us.24 = icmp sgt i32 %13, %conv.i.i.us.24 - %55 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.25 = or i32 %55, 25 - %cmp.i.i.us.25 = icmp sgt i32 %13, %conv.i.i.us.25 - %56 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.26 = or i32 %56, 26 - %cmp.i.i.us.26 = icmp sgt i32 %13, %conv.i.i.us.26 - %57 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.27 = or i32 %57, 27 - %cmp.i.i.us.27 = icmp sgt i32 %13, %conv.i.i.us.27 - %58 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.28 = or i32 %58, 28 - %cmp.i.i.us.28 = icmp sgt i32 %13, %conv.i.i.us.28 - %59 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.29 = or i32 %59, 29 - %cmp.i.i.us.29 = icmp sgt i32 %13, %conv.i.i.us.29 - %60 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.30 = or i32 %60, 30 - %cmp.i.i.us.30 = icmp sgt i32 %13, %conv.i.i.us.30 - %61 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.31 = or i32 %61, 31 - %cmp.i.i.us.31 = icmp sgt i32 %13, %conv.i.i.us.31 - br label %pregion_for_entry.pregion_for_init.i.i - -pregion_for_entry.pregion_for_init.i.i.us.preheader: ; preds = %5 - %conv2.i.i.us = trunc i64 %mul3.i.i.i to i32 - %cmp4.i.i.us = icmp sgt i32 %9, %conv2.i.i.us - %reass.add.i.i.us = add i32 %mul6.i.i, %conv2.i.i.us - %reass.mul.i.i.us = mul i32 %reass.add.i.i.us, %13 - %62 = sext i32 %reass.mul.i.i.us to i64 - br i1 %cmp4.i.i.us, label %pregion_for_entry.entry.i.i.us.us.preheader, label %pregion_for_end.i.i.us - -pregion_for_entry.entry.i.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i.us.preheader - br label %pregion_for_entry.entry.i.i.us.us - -pregion_for_end.i.i.us.loopexit: ; preds = %if.end.i.i.us.us.153 - br label %pregion_for_end.i.i.us - -pregion_for_end.i.i.us: ; preds = %pregion_for_end.i.i.us.loopexit, %pregion_for_entry.pregion_for_init.i.i.us.preheader - %63 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.1 = or i32 %63, 1 - %cmp4.i.i.us.1 = icmp sgt i32 %9, %conv2.i.i.us.1 - %reass.add.i.i.us.1 = add i32 %mul6.i.i, %conv2.i.i.us.1 - %reass.mul.i.i.us.1 = mul i32 %reass.add.i.i.us.1, %13 - %64 = sext i32 %reass.mul.i.i.us.1 to i64 - br i1 %cmp4.i.i.us.1, label %pregion_for_entry.entry.i.i.us.us.1.preheader, label %pregion_for_end.i.i.us.1 - -pregion_for_entry.entry.i.i.us.us.1.preheader: ; preds = %pregion_for_end.i.i.us - br label %pregion_for_entry.entry.i.i.us.us.1 - -pregion_for_entry.entry.i.i.us.us: ; preds = %if.end.i.i.us.us.153, %pregion_for_entry.entry.i.i.us.us.preheader - %_local_id_x.i.0.us.us = phi i64 [ %221, %if.end.i.i.us.us.153 ], [ 0, %pregion_for_entry.entry.i.i.us.us.preheader ] - %add1.i.i.i.us.us = add nuw nsw i64 %_local_id_x.i.0.us.us, %mul.i.i.i - %conv.i.i.us.us = trunc i64 %add1.i.i.i.us.us to i32 - %cmp.i.i.us.us = icmp sgt i32 %13, %conv.i.i.us.us - br i1 %cmp.i.i.us.us, label %if.then.i.i.us.us, label %if.end.i.i.us.us - -if.then.i.i.us.us: ; preds = %pregion_for_entry.entry.i.i.us.us - %add8.i.i.us.us = add nsw i32 %reass.mul.i.i.us, %conv.i.i.us.us - %idxprom.i.i.us.us = sext i32 %add8.i.i.us.us to i64 - %arrayidx.i.i.us.us = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.us - store float 0.000000e+00, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us = shl i64 %add1.i.i.i.us.us, 32 - %65 = ashr exact i64 %sext.i.i.us.us, 32 - br label %for.body.i.i.us.us - -if.end.i.i.us.us.loopexit: ; preds = %for.body.i.i.us.us - br label %if.end.i.i.us.us - -if.end.i.i.us.us: ; preds = %if.end.i.i.us.us.loopexit, %pregion_for_entry.entry.i.i.us.us - %66 = or i64 %_local_id_x.i.0.us.us, 1 - %add1.i.i.i.us.us.138 = add nuw nsw i64 %66, %mul.i.i.i - %conv.i.i.us.us.139 = trunc i64 %add1.i.i.i.us.us.138 to i32 - %cmp.i.i.us.us.140 = icmp sgt i32 %13, %conv.i.i.us.us.139 - br i1 %cmp.i.i.us.us.140, label %if.then.i.i.us.us.146, label %if.end.i.i.us.us.153 - -for.body.i.i.us.us: ; preds = %for.body.i.i.us.us, %if.then.i.i.us.us - %indvars.iv.next.i.i3.us.us = phi i64 [ %indvars.iv.next.i.i.us.us, %for.body.i.i.us.us ], [ 0, %if.then.i.i.us.us ] - %67 = phi float [ %73, %for.body.i.i.us.us ], [ 0.000000e+00, %if.then.i.i.us.us ] - %68 = add nsw i64 %indvars.iv.next.i.i3.us.us, %62 - %arrayidx24.i.i.us.us = getelementptr inbounds float, float* %17, i64 %68 - %69 = load float, float* %arrayidx24.i.i.us.us, align 4, !tbaa !12 - %70 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us, %30 - %71 = add nsw i64 %70, %65 - %arrayidx28.i.i.us.us = getelementptr inbounds float, float* %21, i64 %71 - %72 = load float, float* %arrayidx28.i.i.us.us, align 4, !tbaa !12 - %73 = tail call float @llvm.fmuladd.f32(float %69, float %72, float %67) #2 - store float %73, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us = add nuw nsw i64 %indvars.iv.next.i.i3.us.us, 1 - %exitcond.not.i.i.us.us = icmp eq i64 %indvars.iv.next.i.i.us.us, %30 - br i1 %exitcond.not.i.i.us.us, label %if.end.i.i.us.us.loopexit, label %for.body.i.i.us.us, !llvm.loop !19 - -pregion_for_entry.pregion_for_init.i.i: ; preds = %pregion_for_end.i.i, %pregion_for_entry.pregion_for_init.i.i.preheader - %_local_id_y.i.0 = phi i64 [ %74, %pregion_for_end.i.i ], [ 0, %pregion_for_entry.pregion_for_init.i.i.preheader ] - %add6.i.i.i = add nuw nsw i64 %_local_id_y.i.0, %mul3.i.i.i - %conv2.i.i = trunc i64 %add6.i.i.i to i32 - %cmp4.i.i = icmp sgt i32 %9, %conv2.i.i - %reass.add.i.i = add i32 %mul6.i.i, %conv2.i.i - %reass.mul.i.i = mul i32 %reass.add.i.i, %13 - br i1 %cmp4.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i - br i1 %cmp.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us.preheader - %add8.i.i.us = add nsw i32 %reass.mul.i.i, %conv.i.i.us - %idxprom.i.i.us = sext i32 %add8.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us - store float 0.000000e+00, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - br i1 %cmp.i.i.us.1, label %if.then.i.i.us.1, label %if.end.i.i.us.1 - -pregion_for_end.i.i: ; preds = %if.then.i.i.us.31, %if.end.i.i.us.30, %pregion_for_entry.pregion_for_init.i.i - %74 = add nuw nsw i64 %_local_id_y.i.0, 1 - %exitcond33.not = icmp eq i64 %74, 8 - br i1 %exitcond33.not, label %_pocl_kernel_doitgen_kernel1.exit.loopexit54, label %pregion_for_entry.pregion_for_init.i.i, !llvm.loop !21 - -_pocl_kernel_doitgen_kernel1.exit.loopexit: ; preds = %if.end.i.i.us.us.7.1 - br label %_pocl_kernel_doitgen_kernel1.exit - -_pocl_kernel_doitgen_kernel1.exit.loopexit54: ; preds = %pregion_for_end.i.i - br label %_pocl_kernel_doitgen_kernel1.exit - -_pocl_kernel_doitgen_kernel1.exit: ; preds = %pregion_for_end.i.i.us.6, %_pocl_kernel_doitgen_kernel1.exit.loopexit54, %_pocl_kernel_doitgen_kernel1.exit.loopexit - ret void - -pregion_for_entry.entry.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.1, %pregion_for_entry.entry.i.i.us.us.1.preheader - %_local_id_x.i.0.us.us.1 = phi i64 [ %212, %if.end.i.i.us.us.1.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.1.preheader ] - %add1.i.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, %mul.i.i.i - %conv.i.i.us.us.1 = trunc i64 %add1.i.i.i.us.us.1 to i32 - %cmp.i.i.us.us.1 = icmp sgt i32 %13, %conv.i.i.us.us.1 - br i1 %cmp.i.i.us.us.1, label %if.then.i.i.us.us.1, label %if.end.i.i.us.us.1 - -if.then.i.i.us.us.1: ; preds = %pregion_for_entry.entry.i.i.us.us.1 - %add8.i.i.us.us.1 = add nsw i32 %reass.mul.i.i.us.1, %conv.i.i.us.us.1 - %idxprom.i.i.us.us.1 = sext i32 %add8.i.i.us.us.1 to i64 - %arrayidx.i.i.us.us.1 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.us.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.1 = shl i64 %add1.i.i.i.us.us.1, 32 - %75 = ashr exact i64 %sext.i.i.us.us.1, 32 - br label %for.body.i.i.us.us.1 - -for.body.i.i.us.us.1: ; preds = %for.body.i.i.us.us.1, %if.then.i.i.us.us.1 - %indvars.iv.next.i.i3.us.us.1 = phi i64 [ %indvars.iv.next.i.i.us.us.1, %for.body.i.i.us.us.1 ], [ 0, %if.then.i.i.us.us.1 ] - %76 = phi float [ %82, %for.body.i.i.us.us.1 ], [ 0.000000e+00, %if.then.i.i.us.us.1 ] - %77 = add nsw i64 %indvars.iv.next.i.i3.us.us.1, %64 - %arrayidx24.i.i.us.us.1 = getelementptr inbounds float, float* %17, i64 %77 - %78 = load float, float* %arrayidx24.i.i.us.us.1, align 4, !tbaa !12 - %79 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.1, %30 - %80 = add nsw i64 %79, %75 - %arrayidx28.i.i.us.us.1 = getelementptr inbounds float, float* %21, i64 %80 - %81 = load float, float* %arrayidx28.i.i.us.us.1, align 4, !tbaa !12 - %82 = tail call float @llvm.fmuladd.f32(float %78, float %81, float %76) #2 - store float %82, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.1, 1 - %exitcond.not.i.i.us.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.1, %30 - br i1 %exitcond.not.i.i.us.us.1, label %if.end.i.i.us.us.1.loopexit, label %for.body.i.i.us.us.1, !llvm.loop !19 - -if.end.i.i.us.us.1.loopexit: ; preds = %for.body.i.i.us.us.1 - br label %if.end.i.i.us.us.1 - -if.end.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.loopexit, %pregion_for_entry.entry.i.i.us.us.1 - %83 = or i64 %_local_id_x.i.0.us.us.1, 1 - %add1.i.i.i.us.us.1.1 = add nuw nsw i64 %83, %mul.i.i.i - %conv.i.i.us.us.1.1 = trunc i64 %add1.i.i.i.us.us.1.1 to i32 - %cmp.i.i.us.us.1.1 = icmp sgt i32 %13, %conv.i.i.us.us.1.1 - br i1 %cmp.i.i.us.us.1.1, label %if.then.i.i.us.us.1.1, label %if.end.i.i.us.us.1.1 - -pregion_for_end.i.i.us.1.loopexit: ; preds = %if.end.i.i.us.us.1.1 - br label %pregion_for_end.i.i.us.1 - -pregion_for_end.i.i.us.1: ; preds = %pregion_for_end.i.i.us.1.loopexit, %pregion_for_end.i.i.us - %84 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.2 = or i32 %84, 2 - %cmp4.i.i.us.2 = icmp sgt i32 %9, %conv2.i.i.us.2 - %reass.add.i.i.us.2 = add i32 %mul6.i.i, %conv2.i.i.us.2 - %reass.mul.i.i.us.2 = mul i32 %reass.add.i.i.us.2, %13 - %85 = sext i32 %reass.mul.i.i.us.2 to i64 - br i1 %cmp4.i.i.us.2, label %pregion_for_entry.entry.i.i.us.us.2.preheader, label %pregion_for_end.i.i.us.2 - -pregion_for_entry.entry.i.i.us.us.2.preheader: ; preds = %pregion_for_end.i.i.us.1 - br label %pregion_for_entry.entry.i.i.us.us.2 - -pregion_for_entry.entry.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.1, %pregion_for_entry.entry.i.i.us.us.2.preheader - %_local_id_x.i.0.us.us.2 = phi i64 [ %203, %if.end.i.i.us.us.2.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.2.preheader ] - %add1.i.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, %mul.i.i.i - %conv.i.i.us.us.2 = trunc i64 %add1.i.i.i.us.us.2 to i32 - %cmp.i.i.us.us.2 = icmp sgt i32 %13, %conv.i.i.us.us.2 - br i1 %cmp.i.i.us.us.2, label %if.then.i.i.us.us.2, label %if.end.i.i.us.us.2 - -if.then.i.i.us.us.2: ; preds = %pregion_for_entry.entry.i.i.us.us.2 - %add8.i.i.us.us.2 = add nsw i32 %reass.mul.i.i.us.2, %conv.i.i.us.us.2 - %idxprom.i.i.us.us.2 = sext i32 %add8.i.i.us.us.2 to i64 - %arrayidx.i.i.us.us.2 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.us.2 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.2 = shl i64 %add1.i.i.i.us.us.2, 32 - %86 = ashr exact i64 %sext.i.i.us.us.2, 32 - br label %for.body.i.i.us.us.2 - -for.body.i.i.us.us.2: ; preds = %for.body.i.i.us.us.2, %if.then.i.i.us.us.2 - %indvars.iv.next.i.i3.us.us.2 = phi i64 [ %indvars.iv.next.i.i.us.us.2, %for.body.i.i.us.us.2 ], [ 0, %if.then.i.i.us.us.2 ] - %87 = phi float [ %93, %for.body.i.i.us.us.2 ], [ 0.000000e+00, %if.then.i.i.us.us.2 ] - %88 = add nsw i64 %indvars.iv.next.i.i3.us.us.2, %85 - %arrayidx24.i.i.us.us.2 = getelementptr inbounds float, float* %17, i64 %88 - %89 = load float, float* %arrayidx24.i.i.us.us.2, align 4, !tbaa !12 - %90 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.2, %30 - %91 = add nsw i64 %90, %86 - %arrayidx28.i.i.us.us.2 = getelementptr inbounds float, float* %21, i64 %91 - %92 = load float, float* %arrayidx28.i.i.us.us.2, align 4, !tbaa !12 - %93 = tail call float @llvm.fmuladd.f32(float %89, float %92, float %87) #2 - store float %93, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.2 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.2, 1 - %exitcond.not.i.i.us.us.2 = icmp eq i64 %indvars.iv.next.i.i.us.us.2, %30 - br i1 %exitcond.not.i.i.us.us.2, label %if.end.i.i.us.us.2.loopexit, label %for.body.i.i.us.us.2, !llvm.loop !19 - -if.end.i.i.us.us.2.loopexit: ; preds = %for.body.i.i.us.us.2 - br label %if.end.i.i.us.us.2 - -if.end.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.loopexit, %pregion_for_entry.entry.i.i.us.us.2 - %94 = or i64 %_local_id_x.i.0.us.us.2, 1 - %add1.i.i.i.us.us.2.1 = add nuw nsw i64 %94, %mul.i.i.i - %conv.i.i.us.us.2.1 = trunc i64 %add1.i.i.i.us.us.2.1 to i32 - %cmp.i.i.us.us.2.1 = icmp sgt i32 %13, %conv.i.i.us.us.2.1 - br i1 %cmp.i.i.us.us.2.1, label %if.then.i.i.us.us.2.1, label %if.end.i.i.us.us.2.1 - -pregion_for_end.i.i.us.2.loopexit: ; preds = %if.end.i.i.us.us.2.1 - br label %pregion_for_end.i.i.us.2 - -pregion_for_end.i.i.us.2: ; preds = %pregion_for_end.i.i.us.2.loopexit, %pregion_for_end.i.i.us.1 - %95 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.3 = or i32 %95, 3 - %cmp4.i.i.us.3 = icmp sgt i32 %9, %conv2.i.i.us.3 - %reass.add.i.i.us.3 = add i32 %mul6.i.i, %conv2.i.i.us.3 - %reass.mul.i.i.us.3 = mul i32 %reass.add.i.i.us.3, %13 - %96 = sext i32 %reass.mul.i.i.us.3 to i64 - br i1 %cmp4.i.i.us.3, label %pregion_for_entry.entry.i.i.us.us.3.preheader, label %pregion_for_end.i.i.us.3 - -pregion_for_entry.entry.i.i.us.us.3.preheader: ; preds = %pregion_for_end.i.i.us.2 - br label %pregion_for_entry.entry.i.i.us.us.3 - -pregion_for_entry.entry.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.1, %pregion_for_entry.entry.i.i.us.us.3.preheader - %_local_id_x.i.0.us.us.3 = phi i64 [ %194, %if.end.i.i.us.us.3.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.3.preheader ] - %add1.i.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, %mul.i.i.i - %conv.i.i.us.us.3 = trunc i64 %add1.i.i.i.us.us.3 to i32 - %cmp.i.i.us.us.3 = icmp sgt i32 %13, %conv.i.i.us.us.3 - br i1 %cmp.i.i.us.us.3, label %if.then.i.i.us.us.3, label %if.end.i.i.us.us.3 - -if.then.i.i.us.us.3: ; preds = %pregion_for_entry.entry.i.i.us.us.3 - %add8.i.i.us.us.3 = add nsw i32 %reass.mul.i.i.us.3, %conv.i.i.us.us.3 - %idxprom.i.i.us.us.3 = sext i32 %add8.i.i.us.us.3 to i64 - %arrayidx.i.i.us.us.3 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.us.3 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.3 = shl i64 %add1.i.i.i.us.us.3, 32 - %97 = ashr exact i64 %sext.i.i.us.us.3, 32 - br label %for.body.i.i.us.us.3 - -for.body.i.i.us.us.3: ; preds = %for.body.i.i.us.us.3, %if.then.i.i.us.us.3 - %indvars.iv.next.i.i3.us.us.3 = phi i64 [ %indvars.iv.next.i.i.us.us.3, %for.body.i.i.us.us.3 ], [ 0, %if.then.i.i.us.us.3 ] - %98 = phi float [ %104, %for.body.i.i.us.us.3 ], [ 0.000000e+00, %if.then.i.i.us.us.3 ] - %99 = add nsw i64 %indvars.iv.next.i.i3.us.us.3, %96 - %arrayidx24.i.i.us.us.3 = getelementptr inbounds float, float* %17, i64 %99 - %100 = load float, float* %arrayidx24.i.i.us.us.3, align 4, !tbaa !12 - %101 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.3, %30 - %102 = add nsw i64 %101, %97 - %arrayidx28.i.i.us.us.3 = getelementptr inbounds float, float* %21, i64 %102 - %103 = load float, float* %arrayidx28.i.i.us.us.3, align 4, !tbaa !12 - %104 = tail call float @llvm.fmuladd.f32(float %100, float %103, float %98) #2 - store float %104, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.3 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.3, 1 - %exitcond.not.i.i.us.us.3 = icmp eq i64 %indvars.iv.next.i.i.us.us.3, %30 - br i1 %exitcond.not.i.i.us.us.3, label %if.end.i.i.us.us.3.loopexit, label %for.body.i.i.us.us.3, !llvm.loop !19 - -if.end.i.i.us.us.3.loopexit: ; preds = %for.body.i.i.us.us.3 - br label %if.end.i.i.us.us.3 - -if.end.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.loopexit, %pregion_for_entry.entry.i.i.us.us.3 - %105 = or i64 %_local_id_x.i.0.us.us.3, 1 - %add1.i.i.i.us.us.3.1 = add nuw nsw i64 %105, %mul.i.i.i - %conv.i.i.us.us.3.1 = trunc i64 %add1.i.i.i.us.us.3.1 to i32 - %cmp.i.i.us.us.3.1 = icmp sgt i32 %13, %conv.i.i.us.us.3.1 - br i1 %cmp.i.i.us.us.3.1, label %if.then.i.i.us.us.3.1, label %if.end.i.i.us.us.3.1 - -pregion_for_end.i.i.us.3.loopexit: ; preds = %if.end.i.i.us.us.3.1 - br label %pregion_for_end.i.i.us.3 - -pregion_for_end.i.i.us.3: ; preds = %pregion_for_end.i.i.us.3.loopexit, %pregion_for_end.i.i.us.2 - %106 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.4 = or i32 %106, 4 - %cmp4.i.i.us.4 = icmp sgt i32 %9, %conv2.i.i.us.4 - %reass.add.i.i.us.4 = add i32 %mul6.i.i, %conv2.i.i.us.4 - %reass.mul.i.i.us.4 = mul i32 %reass.add.i.i.us.4, %13 - %107 = sext i32 %reass.mul.i.i.us.4 to i64 - br i1 %cmp4.i.i.us.4, label %pregion_for_entry.entry.i.i.us.us.4.preheader, label %pregion_for_end.i.i.us.4 - -pregion_for_entry.entry.i.i.us.us.4.preheader: ; preds = %pregion_for_end.i.i.us.3 - br label %pregion_for_entry.entry.i.i.us.us.4 - -pregion_for_entry.entry.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.1, %pregion_for_entry.entry.i.i.us.us.4.preheader - %_local_id_x.i.0.us.us.4 = phi i64 [ %185, %if.end.i.i.us.us.4.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.4.preheader ] - %add1.i.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, %mul.i.i.i - %conv.i.i.us.us.4 = trunc i64 %add1.i.i.i.us.us.4 to i32 - %cmp.i.i.us.us.4 = icmp sgt i32 %13, %conv.i.i.us.us.4 - br i1 %cmp.i.i.us.us.4, label %if.then.i.i.us.us.4, label %if.end.i.i.us.us.4 - -if.then.i.i.us.us.4: ; preds = %pregion_for_entry.entry.i.i.us.us.4 - %add8.i.i.us.us.4 = add nsw i32 %reass.mul.i.i.us.4, %conv.i.i.us.us.4 - %idxprom.i.i.us.us.4 = sext i32 %add8.i.i.us.us.4 to i64 - %arrayidx.i.i.us.us.4 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.us.4 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.4 = shl i64 %add1.i.i.i.us.us.4, 32 - %108 = ashr exact i64 %sext.i.i.us.us.4, 32 - br label %for.body.i.i.us.us.4 - -for.body.i.i.us.us.4: ; preds = %for.body.i.i.us.us.4, %if.then.i.i.us.us.4 - %indvars.iv.next.i.i3.us.us.4 = phi i64 [ %indvars.iv.next.i.i.us.us.4, %for.body.i.i.us.us.4 ], [ 0, %if.then.i.i.us.us.4 ] - %109 = phi float [ %115, %for.body.i.i.us.us.4 ], [ 0.000000e+00, %if.then.i.i.us.us.4 ] - %110 = add nsw i64 %indvars.iv.next.i.i3.us.us.4, %107 - %arrayidx24.i.i.us.us.4 = getelementptr inbounds float, float* %17, i64 %110 - %111 = load float, float* %arrayidx24.i.i.us.us.4, align 4, !tbaa !12 - %112 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.4, %30 - %113 = add nsw i64 %112, %108 - %arrayidx28.i.i.us.us.4 = getelementptr inbounds float, float* %21, i64 %113 - %114 = load float, float* %arrayidx28.i.i.us.us.4, align 4, !tbaa !12 - %115 = tail call float @llvm.fmuladd.f32(float %111, float %114, float %109) #2 - store float %115, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.4 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.4, 1 - %exitcond.not.i.i.us.us.4 = icmp eq i64 %indvars.iv.next.i.i.us.us.4, %30 - br i1 %exitcond.not.i.i.us.us.4, label %if.end.i.i.us.us.4.loopexit, label %for.body.i.i.us.us.4, !llvm.loop !19 - -if.end.i.i.us.us.4.loopexit: ; preds = %for.body.i.i.us.us.4 - br label %if.end.i.i.us.us.4 - -if.end.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.loopexit, %pregion_for_entry.entry.i.i.us.us.4 - %116 = or i64 %_local_id_x.i.0.us.us.4, 1 - %add1.i.i.i.us.us.4.1 = add nuw nsw i64 %116, %mul.i.i.i - %conv.i.i.us.us.4.1 = trunc i64 %add1.i.i.i.us.us.4.1 to i32 - %cmp.i.i.us.us.4.1 = icmp sgt i32 %13, %conv.i.i.us.us.4.1 - br i1 %cmp.i.i.us.us.4.1, label %if.then.i.i.us.us.4.1, label %if.end.i.i.us.us.4.1 - -pregion_for_end.i.i.us.4.loopexit: ; preds = %if.end.i.i.us.us.4.1 - br label %pregion_for_end.i.i.us.4 - -pregion_for_end.i.i.us.4: ; preds = %pregion_for_end.i.i.us.4.loopexit, %pregion_for_end.i.i.us.3 - %117 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.5 = or i32 %117, 5 - %cmp4.i.i.us.5 = icmp sgt i32 %9, %conv2.i.i.us.5 - %reass.add.i.i.us.5 = add i32 %mul6.i.i, %conv2.i.i.us.5 - %reass.mul.i.i.us.5 = mul i32 %reass.add.i.i.us.5, %13 - %118 = sext i32 %reass.mul.i.i.us.5 to i64 - br i1 %cmp4.i.i.us.5, label %pregion_for_entry.entry.i.i.us.us.5.preheader, label %pregion_for_end.i.i.us.5 - -pregion_for_entry.entry.i.i.us.us.5.preheader: ; preds = %pregion_for_end.i.i.us.4 - br label %pregion_for_entry.entry.i.i.us.us.5 - -pregion_for_entry.entry.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.1, %pregion_for_entry.entry.i.i.us.us.5.preheader - %_local_id_x.i.0.us.us.5 = phi i64 [ %176, %if.end.i.i.us.us.5.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.5.preheader ] - %add1.i.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, %mul.i.i.i - %conv.i.i.us.us.5 = trunc i64 %add1.i.i.i.us.us.5 to i32 - %cmp.i.i.us.us.5 = icmp sgt i32 %13, %conv.i.i.us.us.5 - br i1 %cmp.i.i.us.us.5, label %if.then.i.i.us.us.5, label %if.end.i.i.us.us.5 - -if.then.i.i.us.us.5: ; preds = %pregion_for_entry.entry.i.i.us.us.5 - %add8.i.i.us.us.5 = add nsw i32 %reass.mul.i.i.us.5, %conv.i.i.us.us.5 - %idxprom.i.i.us.us.5 = sext i32 %add8.i.i.us.us.5 to i64 - %arrayidx.i.i.us.us.5 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.us.5 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.5 = shl i64 %add1.i.i.i.us.us.5, 32 - %119 = ashr exact i64 %sext.i.i.us.us.5, 32 - br label %for.body.i.i.us.us.5 - -for.body.i.i.us.us.5: ; preds = %for.body.i.i.us.us.5, %if.then.i.i.us.us.5 - %indvars.iv.next.i.i3.us.us.5 = phi i64 [ %indvars.iv.next.i.i.us.us.5, %for.body.i.i.us.us.5 ], [ 0, %if.then.i.i.us.us.5 ] - %120 = phi float [ %126, %for.body.i.i.us.us.5 ], [ 0.000000e+00, %if.then.i.i.us.us.5 ] - %121 = add nsw i64 %indvars.iv.next.i.i3.us.us.5, %118 - %arrayidx24.i.i.us.us.5 = getelementptr inbounds float, float* %17, i64 %121 - %122 = load float, float* %arrayidx24.i.i.us.us.5, align 4, !tbaa !12 - %123 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.5, %30 - %124 = add nsw i64 %123, %119 - %arrayidx28.i.i.us.us.5 = getelementptr inbounds float, float* %21, i64 %124 - %125 = load float, float* %arrayidx28.i.i.us.us.5, align 4, !tbaa !12 - %126 = tail call float @llvm.fmuladd.f32(float %122, float %125, float %120) #2 - store float %126, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.5 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.5, 1 - %exitcond.not.i.i.us.us.5 = icmp eq i64 %indvars.iv.next.i.i.us.us.5, %30 - br i1 %exitcond.not.i.i.us.us.5, label %if.end.i.i.us.us.5.loopexit, label %for.body.i.i.us.us.5, !llvm.loop !19 - -if.end.i.i.us.us.5.loopexit: ; preds = %for.body.i.i.us.us.5 - br label %if.end.i.i.us.us.5 - -if.end.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.loopexit, %pregion_for_entry.entry.i.i.us.us.5 - %127 = or i64 %_local_id_x.i.0.us.us.5, 1 - %add1.i.i.i.us.us.5.1 = add nuw nsw i64 %127, %mul.i.i.i - %conv.i.i.us.us.5.1 = trunc i64 %add1.i.i.i.us.us.5.1 to i32 - %cmp.i.i.us.us.5.1 = icmp sgt i32 %13, %conv.i.i.us.us.5.1 - br i1 %cmp.i.i.us.us.5.1, label %if.then.i.i.us.us.5.1, label %if.end.i.i.us.us.5.1 - -pregion_for_end.i.i.us.5.loopexit: ; preds = %if.end.i.i.us.us.5.1 - br label %pregion_for_end.i.i.us.5 - -pregion_for_end.i.i.us.5: ; preds = %pregion_for_end.i.i.us.5.loopexit, %pregion_for_end.i.i.us.4 - %128 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.6 = or i32 %128, 6 - %cmp4.i.i.us.6 = icmp sgt i32 %9, %conv2.i.i.us.6 - %reass.add.i.i.us.6 = add i32 %mul6.i.i, %conv2.i.i.us.6 - %reass.mul.i.i.us.6 = mul i32 %reass.add.i.i.us.6, %13 - %129 = sext i32 %reass.mul.i.i.us.6 to i64 - br i1 %cmp4.i.i.us.6, label %pregion_for_entry.entry.i.i.us.us.6.preheader, label %pregion_for_end.i.i.us.6 - -pregion_for_entry.entry.i.i.us.us.6.preheader: ; preds = %pregion_for_end.i.i.us.5 - br label %pregion_for_entry.entry.i.i.us.us.6 - -pregion_for_entry.entry.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.1, %pregion_for_entry.entry.i.i.us.us.6.preheader - %_local_id_x.i.0.us.us.6 = phi i64 [ %167, %if.end.i.i.us.us.6.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.6.preheader ] - %add1.i.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, %mul.i.i.i - %conv.i.i.us.us.6 = trunc i64 %add1.i.i.i.us.us.6 to i32 - %cmp.i.i.us.us.6 = icmp sgt i32 %13, %conv.i.i.us.us.6 - br i1 %cmp.i.i.us.us.6, label %if.then.i.i.us.us.6, label %if.end.i.i.us.us.6 - -if.then.i.i.us.us.6: ; preds = %pregion_for_entry.entry.i.i.us.us.6 - %add8.i.i.us.us.6 = add nsw i32 %reass.mul.i.i.us.6, %conv.i.i.us.us.6 - %idxprom.i.i.us.us.6 = sext i32 %add8.i.i.us.us.6 to i64 - %arrayidx.i.i.us.us.6 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.us.6 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.6 = shl i64 %add1.i.i.i.us.us.6, 32 - %130 = ashr exact i64 %sext.i.i.us.us.6, 32 - br label %for.body.i.i.us.us.6 - -for.body.i.i.us.us.6: ; preds = %for.body.i.i.us.us.6, %if.then.i.i.us.us.6 - %indvars.iv.next.i.i3.us.us.6 = phi i64 [ %indvars.iv.next.i.i.us.us.6, %for.body.i.i.us.us.6 ], [ 0, %if.then.i.i.us.us.6 ] - %131 = phi float [ %137, %for.body.i.i.us.us.6 ], [ 0.000000e+00, %if.then.i.i.us.us.6 ] - %132 = add nsw i64 %indvars.iv.next.i.i3.us.us.6, %129 - %arrayidx24.i.i.us.us.6 = getelementptr inbounds float, float* %17, i64 %132 - %133 = load float, float* %arrayidx24.i.i.us.us.6, align 4, !tbaa !12 - %134 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.6, %30 - %135 = add nsw i64 %134, %130 - %arrayidx28.i.i.us.us.6 = getelementptr inbounds float, float* %21, i64 %135 - %136 = load float, float* %arrayidx28.i.i.us.us.6, align 4, !tbaa !12 - %137 = tail call float @llvm.fmuladd.f32(float %133, float %136, float %131) #2 - store float %137, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.6 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.6, 1 - %exitcond.not.i.i.us.us.6 = icmp eq i64 %indvars.iv.next.i.i.us.us.6, %30 - br i1 %exitcond.not.i.i.us.us.6, label %if.end.i.i.us.us.6.loopexit, label %for.body.i.i.us.us.6, !llvm.loop !19 - -if.end.i.i.us.us.6.loopexit: ; preds = %for.body.i.i.us.us.6 - br label %if.end.i.i.us.us.6 - -if.end.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.loopexit, %pregion_for_entry.entry.i.i.us.us.6 - %138 = or i64 %_local_id_x.i.0.us.us.6, 1 - %add1.i.i.i.us.us.6.1 = add nuw nsw i64 %138, %mul.i.i.i - %conv.i.i.us.us.6.1 = trunc i64 %add1.i.i.i.us.us.6.1 to i32 - %cmp.i.i.us.us.6.1 = icmp sgt i32 %13, %conv.i.i.us.us.6.1 - br i1 %cmp.i.i.us.us.6.1, label %if.then.i.i.us.us.6.1, label %if.end.i.i.us.us.6.1 - -pregion_for_end.i.i.us.6.loopexit: ; preds = %if.end.i.i.us.us.6.1 - br label %pregion_for_end.i.i.us.6 - -pregion_for_end.i.i.us.6: ; preds = %pregion_for_end.i.i.us.6.loopexit, %pregion_for_end.i.i.us.5 - %139 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.7 = or i32 %139, 7 - %cmp4.i.i.us.7 = icmp sgt i32 %9, %conv2.i.i.us.7 - %reass.add.i.i.us.7 = add i32 %mul6.i.i, %conv2.i.i.us.7 - %reass.mul.i.i.us.7 = mul i32 %reass.add.i.i.us.7, %13 - %140 = sext i32 %reass.mul.i.i.us.7 to i64 - br i1 %cmp4.i.i.us.7, label %pregion_for_entry.entry.i.i.us.us.7.preheader, label %_pocl_kernel_doitgen_kernel1.exit - -pregion_for_entry.entry.i.i.us.us.7.preheader: ; preds = %pregion_for_end.i.i.us.6 - br label %pregion_for_entry.entry.i.i.us.us.7 - -pregion_for_entry.entry.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.1, %pregion_for_entry.entry.i.i.us.us.7.preheader - %_local_id_x.i.0.us.us.7 = phi i64 [ %158, %if.end.i.i.us.us.7.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.7.preheader ] - %add1.i.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, %mul.i.i.i - %conv.i.i.us.us.7 = trunc i64 %add1.i.i.i.us.us.7 to i32 - %cmp.i.i.us.us.7 = icmp sgt i32 %13, %conv.i.i.us.us.7 - br i1 %cmp.i.i.us.us.7, label %if.then.i.i.us.us.7, label %if.end.i.i.us.us.7 - -if.then.i.i.us.us.7: ; preds = %pregion_for_entry.entry.i.i.us.us.7 - %add8.i.i.us.us.7 = add nsw i32 %reass.mul.i.i.us.7, %conv.i.i.us.us.7 - %idxprom.i.i.us.us.7 = sext i32 %add8.i.i.us.us.7 to i64 - %arrayidx.i.i.us.us.7 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.us.7 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.7 = shl i64 %add1.i.i.i.us.us.7, 32 - %141 = ashr exact i64 %sext.i.i.us.us.7, 32 - br label %for.body.i.i.us.us.7 - -for.body.i.i.us.us.7: ; preds = %for.body.i.i.us.us.7, %if.then.i.i.us.us.7 - %indvars.iv.next.i.i3.us.us.7 = phi i64 [ %indvars.iv.next.i.i.us.us.7, %for.body.i.i.us.us.7 ], [ 0, %if.then.i.i.us.us.7 ] - %142 = phi float [ %148, %for.body.i.i.us.us.7 ], [ 0.000000e+00, %if.then.i.i.us.us.7 ] - %143 = add nsw i64 %indvars.iv.next.i.i3.us.us.7, %140 - %arrayidx24.i.i.us.us.7 = getelementptr inbounds float, float* %17, i64 %143 - %144 = load float, float* %arrayidx24.i.i.us.us.7, align 4, !tbaa !12 - %145 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.7, %30 - %146 = add nsw i64 %145, %141 - %arrayidx28.i.i.us.us.7 = getelementptr inbounds float, float* %21, i64 %146 - %147 = load float, float* %arrayidx28.i.i.us.us.7, align 4, !tbaa !12 - %148 = tail call float @llvm.fmuladd.f32(float %144, float %147, float %142) #2 - store float %148, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.7 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.7, 1 - %exitcond.not.i.i.us.us.7 = icmp eq i64 %indvars.iv.next.i.i.us.us.7, %30 - br i1 %exitcond.not.i.i.us.us.7, label %if.end.i.i.us.us.7.loopexit, label %for.body.i.i.us.us.7, !llvm.loop !19 - -if.end.i.i.us.us.7.loopexit: ; preds = %for.body.i.i.us.us.7 - br label %if.end.i.i.us.us.7 - -if.end.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.loopexit, %pregion_for_entry.entry.i.i.us.us.7 - %149 = or i64 %_local_id_x.i.0.us.us.7, 1 - %add1.i.i.i.us.us.7.1 = add nuw nsw i64 %149, %mul.i.i.i - %conv.i.i.us.us.7.1 = trunc i64 %add1.i.i.i.us.us.7.1 to i32 - %cmp.i.i.us.us.7.1 = icmp sgt i32 %13, %conv.i.i.us.us.7.1 - br i1 %cmp.i.i.us.us.7.1, label %if.then.i.i.us.us.7.1, label %if.end.i.i.us.us.7.1 - -if.then.i.i.us.1: ; preds = %if.end.i.i.us - %add8.i.i.us.1 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.1 - %idxprom.i.i.us.1 = sext i32 %add8.i.i.us.1 to i64 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1 - -if.end.i.i.us.1: ; preds = %if.then.i.i.us.1, %if.end.i.i.us - br i1 %cmp.i.i.us.2, label %if.then.i.i.us.2, label %if.end.i.i.us.2 - -if.then.i.i.us.2: ; preds = %if.end.i.i.us.1 - %add8.i.i.us.2 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.2 - %idxprom.i.i.us.2 = sext i32 %add8.i.i.us.2 to i64 - %arrayidx.i.i.us.2 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.2 - store float 0.000000e+00, float* %arrayidx.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2 - -if.end.i.i.us.2: ; preds = %if.then.i.i.us.2, %if.end.i.i.us.1 - br i1 %cmp.i.i.us.3, label %if.then.i.i.us.3, label %if.end.i.i.us.3 - -if.then.i.i.us.3: ; preds = %if.end.i.i.us.2 - %add8.i.i.us.3 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.3 - %idxprom.i.i.us.3 = sext i32 %add8.i.i.us.3 to i64 - %arrayidx.i.i.us.3 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.3 - store float 0.000000e+00, float* %arrayidx.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3 - -if.end.i.i.us.3: ; preds = %if.then.i.i.us.3, %if.end.i.i.us.2 - br i1 %cmp.i.i.us.4, label %if.then.i.i.us.4, label %if.end.i.i.us.4 - -if.then.i.i.us.4: ; preds = %if.end.i.i.us.3 - %add8.i.i.us.4 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.4 - %idxprom.i.i.us.4 = sext i32 %add8.i.i.us.4 to i64 - %arrayidx.i.i.us.4 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.4 - store float 0.000000e+00, float* %arrayidx.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4 - -if.end.i.i.us.4: ; preds = %if.then.i.i.us.4, %if.end.i.i.us.3 - br i1 %cmp.i.i.us.5, label %if.then.i.i.us.5, label %if.end.i.i.us.5 - -if.then.i.i.us.5: ; preds = %if.end.i.i.us.4 - %add8.i.i.us.5 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.5 - %idxprom.i.i.us.5 = sext i32 %add8.i.i.us.5 to i64 - %arrayidx.i.i.us.5 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.5 - store float 0.000000e+00, float* %arrayidx.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5 - -if.end.i.i.us.5: ; preds = %if.then.i.i.us.5, %if.end.i.i.us.4 - br i1 %cmp.i.i.us.6, label %if.then.i.i.us.6, label %if.end.i.i.us.6 - -if.then.i.i.us.6: ; preds = %if.end.i.i.us.5 - %add8.i.i.us.6 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.6 - %idxprom.i.i.us.6 = sext i32 %add8.i.i.us.6 to i64 - %arrayidx.i.i.us.6 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.6 - store float 0.000000e+00, float* %arrayidx.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6 - -if.end.i.i.us.6: ; preds = %if.then.i.i.us.6, %if.end.i.i.us.5 - br i1 %cmp.i.i.us.7, label %if.then.i.i.us.7, label %if.end.i.i.us.7 - -if.then.i.i.us.7: ; preds = %if.end.i.i.us.6 - %add8.i.i.us.7 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.7 - %idxprom.i.i.us.7 = sext i32 %add8.i.i.us.7 to i64 - %arrayidx.i.i.us.7 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.7 - store float 0.000000e+00, float* %arrayidx.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7 - -if.end.i.i.us.7: ; preds = %if.then.i.i.us.7, %if.end.i.i.us.6 - br i1 %cmp.i.i.us.8, label %if.then.i.i.us.8, label %if.end.i.i.us.8 - -if.then.i.i.us.8: ; preds = %if.end.i.i.us.7 - %add8.i.i.us.8 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.8 - %idxprom.i.i.us.8 = sext i32 %add8.i.i.us.8 to i64 - %arrayidx.i.i.us.8 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.8 - store float 0.000000e+00, float* %arrayidx.i.i.us.8, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.8 - -if.end.i.i.us.8: ; preds = %if.then.i.i.us.8, %if.end.i.i.us.7 - br i1 %cmp.i.i.us.9, label %if.then.i.i.us.9, label %if.end.i.i.us.9 - -if.then.i.i.us.9: ; preds = %if.end.i.i.us.8 - %add8.i.i.us.9 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.9 - %idxprom.i.i.us.9 = sext i32 %add8.i.i.us.9 to i64 - %arrayidx.i.i.us.9 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.9 - store float 0.000000e+00, float* %arrayidx.i.i.us.9, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.9 - -if.end.i.i.us.9: ; preds = %if.then.i.i.us.9, %if.end.i.i.us.8 - br i1 %cmp.i.i.us.10, label %if.then.i.i.us.10, label %if.end.i.i.us.10 - -if.then.i.i.us.10: ; preds = %if.end.i.i.us.9 - %add8.i.i.us.10 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.10 - %idxprom.i.i.us.10 = sext i32 %add8.i.i.us.10 to i64 - %arrayidx.i.i.us.10 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.10 - store float 0.000000e+00, float* %arrayidx.i.i.us.10, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.10 - -if.end.i.i.us.10: ; preds = %if.then.i.i.us.10, %if.end.i.i.us.9 - br i1 %cmp.i.i.us.11, label %if.then.i.i.us.11, label %if.end.i.i.us.11 - -if.then.i.i.us.11: ; preds = %if.end.i.i.us.10 - %add8.i.i.us.11 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.11 - %idxprom.i.i.us.11 = sext i32 %add8.i.i.us.11 to i64 - %arrayidx.i.i.us.11 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.11 - store float 0.000000e+00, float* %arrayidx.i.i.us.11, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.11 - -if.end.i.i.us.11: ; preds = %if.then.i.i.us.11, %if.end.i.i.us.10 - br i1 %cmp.i.i.us.12, label %if.then.i.i.us.12, label %if.end.i.i.us.12 - -if.then.i.i.us.12: ; preds = %if.end.i.i.us.11 - %add8.i.i.us.12 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.12 - %idxprom.i.i.us.12 = sext i32 %add8.i.i.us.12 to i64 - %arrayidx.i.i.us.12 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.12 - store float 0.000000e+00, float* %arrayidx.i.i.us.12, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.12 - -if.end.i.i.us.12: ; preds = %if.then.i.i.us.12, %if.end.i.i.us.11 - br i1 %cmp.i.i.us.13, label %if.then.i.i.us.13, label %if.end.i.i.us.13 - -if.then.i.i.us.13: ; preds = %if.end.i.i.us.12 - %add8.i.i.us.13 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.13 - %idxprom.i.i.us.13 = sext i32 %add8.i.i.us.13 to i64 - %arrayidx.i.i.us.13 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.13 - store float 0.000000e+00, float* %arrayidx.i.i.us.13, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.13 - -if.end.i.i.us.13: ; preds = %if.then.i.i.us.13, %if.end.i.i.us.12 - br i1 %cmp.i.i.us.14, label %if.then.i.i.us.14, label %if.end.i.i.us.14 - -if.then.i.i.us.14: ; preds = %if.end.i.i.us.13 - %add8.i.i.us.14 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.14 - %idxprom.i.i.us.14 = sext i32 %add8.i.i.us.14 to i64 - %arrayidx.i.i.us.14 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.14 - store float 0.000000e+00, float* %arrayidx.i.i.us.14, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.14 - -if.end.i.i.us.14: ; preds = %if.then.i.i.us.14, %if.end.i.i.us.13 - br i1 %cmp.i.i.us.15, label %if.then.i.i.us.15, label %if.end.i.i.us.15 - -if.then.i.i.us.15: ; preds = %if.end.i.i.us.14 - %add8.i.i.us.15 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.15 - %idxprom.i.i.us.15 = sext i32 %add8.i.i.us.15 to i64 - %arrayidx.i.i.us.15 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.15 - store float 0.000000e+00, float* %arrayidx.i.i.us.15, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.15 - -if.end.i.i.us.15: ; preds = %if.then.i.i.us.15, %if.end.i.i.us.14 - br i1 %cmp.i.i.us.16, label %if.then.i.i.us.16, label %if.end.i.i.us.16 - -if.then.i.i.us.16: ; preds = %if.end.i.i.us.15 - %add8.i.i.us.16 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.16 - %idxprom.i.i.us.16 = sext i32 %add8.i.i.us.16 to i64 - %arrayidx.i.i.us.16 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.16 - store float 0.000000e+00, float* %arrayidx.i.i.us.16, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.16 - -if.end.i.i.us.16: ; preds = %if.then.i.i.us.16, %if.end.i.i.us.15 - br i1 %cmp.i.i.us.17, label %if.then.i.i.us.17, label %if.end.i.i.us.17 - -if.then.i.i.us.17: ; preds = %if.end.i.i.us.16 - %add8.i.i.us.17 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.17 - %idxprom.i.i.us.17 = sext i32 %add8.i.i.us.17 to i64 - %arrayidx.i.i.us.17 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.17 - store float 0.000000e+00, float* %arrayidx.i.i.us.17, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.17 - -if.end.i.i.us.17: ; preds = %if.then.i.i.us.17, %if.end.i.i.us.16 - br i1 %cmp.i.i.us.18, label %if.then.i.i.us.18, label %if.end.i.i.us.18 - -if.then.i.i.us.18: ; preds = %if.end.i.i.us.17 - %add8.i.i.us.18 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.18 - %idxprom.i.i.us.18 = sext i32 %add8.i.i.us.18 to i64 - %arrayidx.i.i.us.18 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.18 - store float 0.000000e+00, float* %arrayidx.i.i.us.18, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.18 - -if.end.i.i.us.18: ; preds = %if.then.i.i.us.18, %if.end.i.i.us.17 - br i1 %cmp.i.i.us.19, label %if.then.i.i.us.19, label %if.end.i.i.us.19 - -if.then.i.i.us.19: ; preds = %if.end.i.i.us.18 - %add8.i.i.us.19 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.19 - %idxprom.i.i.us.19 = sext i32 %add8.i.i.us.19 to i64 - %arrayidx.i.i.us.19 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.19 - store float 0.000000e+00, float* %arrayidx.i.i.us.19, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.19 - -if.end.i.i.us.19: ; preds = %if.then.i.i.us.19, %if.end.i.i.us.18 - br i1 %cmp.i.i.us.20, label %if.then.i.i.us.20, label %if.end.i.i.us.20 - -if.then.i.i.us.20: ; preds = %if.end.i.i.us.19 - %add8.i.i.us.20 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.20 - %idxprom.i.i.us.20 = sext i32 %add8.i.i.us.20 to i64 - %arrayidx.i.i.us.20 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.20 - store float 0.000000e+00, float* %arrayidx.i.i.us.20, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.20 - -if.end.i.i.us.20: ; preds = %if.then.i.i.us.20, %if.end.i.i.us.19 - br i1 %cmp.i.i.us.21, label %if.then.i.i.us.21, label %if.end.i.i.us.21 - -if.then.i.i.us.21: ; preds = %if.end.i.i.us.20 - %add8.i.i.us.21 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.21 - %idxprom.i.i.us.21 = sext i32 %add8.i.i.us.21 to i64 - %arrayidx.i.i.us.21 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.21 - store float 0.000000e+00, float* %arrayidx.i.i.us.21, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.21 - -if.end.i.i.us.21: ; preds = %if.then.i.i.us.21, %if.end.i.i.us.20 - br i1 %cmp.i.i.us.22, label %if.then.i.i.us.22, label %if.end.i.i.us.22 - -if.then.i.i.us.22: ; preds = %if.end.i.i.us.21 - %add8.i.i.us.22 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.22 - %idxprom.i.i.us.22 = sext i32 %add8.i.i.us.22 to i64 - %arrayidx.i.i.us.22 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.22 - store float 0.000000e+00, float* %arrayidx.i.i.us.22, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.22 - -if.end.i.i.us.22: ; preds = %if.then.i.i.us.22, %if.end.i.i.us.21 - br i1 %cmp.i.i.us.23, label %if.then.i.i.us.23, label %if.end.i.i.us.23 - -if.then.i.i.us.23: ; preds = %if.end.i.i.us.22 - %add8.i.i.us.23 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.23 - %idxprom.i.i.us.23 = sext i32 %add8.i.i.us.23 to i64 - %arrayidx.i.i.us.23 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.23 - store float 0.000000e+00, float* %arrayidx.i.i.us.23, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.23 - -if.end.i.i.us.23: ; preds = %if.then.i.i.us.23, %if.end.i.i.us.22 - br i1 %cmp.i.i.us.24, label %if.then.i.i.us.24, label %if.end.i.i.us.24 - -if.then.i.i.us.24: ; preds = %if.end.i.i.us.23 - %add8.i.i.us.24 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.24 - %idxprom.i.i.us.24 = sext i32 %add8.i.i.us.24 to i64 - %arrayidx.i.i.us.24 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.24 - store float 0.000000e+00, float* %arrayidx.i.i.us.24, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.24 - -if.end.i.i.us.24: ; preds = %if.then.i.i.us.24, %if.end.i.i.us.23 - br i1 %cmp.i.i.us.25, label %if.then.i.i.us.25, label %if.end.i.i.us.25 - -if.then.i.i.us.25: ; preds = %if.end.i.i.us.24 - %add8.i.i.us.25 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.25 - %idxprom.i.i.us.25 = sext i32 %add8.i.i.us.25 to i64 - %arrayidx.i.i.us.25 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.25 - store float 0.000000e+00, float* %arrayidx.i.i.us.25, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.25 - -if.end.i.i.us.25: ; preds = %if.then.i.i.us.25, %if.end.i.i.us.24 - br i1 %cmp.i.i.us.26, label %if.then.i.i.us.26, label %if.end.i.i.us.26 - -if.then.i.i.us.26: ; preds = %if.end.i.i.us.25 - %add8.i.i.us.26 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.26 - %idxprom.i.i.us.26 = sext i32 %add8.i.i.us.26 to i64 - %arrayidx.i.i.us.26 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.26 - store float 0.000000e+00, float* %arrayidx.i.i.us.26, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.26 - -if.end.i.i.us.26: ; preds = %if.then.i.i.us.26, %if.end.i.i.us.25 - br i1 %cmp.i.i.us.27, label %if.then.i.i.us.27, label %if.end.i.i.us.27 - -if.then.i.i.us.27: ; preds = %if.end.i.i.us.26 - %add8.i.i.us.27 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.27 - %idxprom.i.i.us.27 = sext i32 %add8.i.i.us.27 to i64 - %arrayidx.i.i.us.27 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.27 - store float 0.000000e+00, float* %arrayidx.i.i.us.27, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.27 - -if.end.i.i.us.27: ; preds = %if.then.i.i.us.27, %if.end.i.i.us.26 - br i1 %cmp.i.i.us.28, label %if.then.i.i.us.28, label %if.end.i.i.us.28 - -if.then.i.i.us.28: ; preds = %if.end.i.i.us.27 - %add8.i.i.us.28 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.28 - %idxprom.i.i.us.28 = sext i32 %add8.i.i.us.28 to i64 - %arrayidx.i.i.us.28 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.28 - store float 0.000000e+00, float* %arrayidx.i.i.us.28, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.28 - -if.end.i.i.us.28: ; preds = %if.then.i.i.us.28, %if.end.i.i.us.27 - br i1 %cmp.i.i.us.29, label %if.then.i.i.us.29, label %if.end.i.i.us.29 - -if.then.i.i.us.29: ; preds = %if.end.i.i.us.28 - %add8.i.i.us.29 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.29 - %idxprom.i.i.us.29 = sext i32 %add8.i.i.us.29 to i64 - %arrayidx.i.i.us.29 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.29 - store float 0.000000e+00, float* %arrayidx.i.i.us.29, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.29 - -if.end.i.i.us.29: ; preds = %if.then.i.i.us.29, %if.end.i.i.us.28 - br i1 %cmp.i.i.us.30, label %if.then.i.i.us.30, label %if.end.i.i.us.30 - -if.then.i.i.us.30: ; preds = %if.end.i.i.us.29 - %add8.i.i.us.30 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.30 - %idxprom.i.i.us.30 = sext i32 %add8.i.i.us.30 to i64 - %arrayidx.i.i.us.30 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.30 - store float 0.000000e+00, float* %arrayidx.i.i.us.30, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.30 - -if.end.i.i.us.30: ; preds = %if.then.i.i.us.30, %if.end.i.i.us.29 - br i1 %cmp.i.i.us.31, label %if.then.i.i.us.31, label %pregion_for_end.i.i - -if.then.i.i.us.31: ; preds = %if.end.i.i.us.30 - %add8.i.i.us.31 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.31 - %idxprom.i.i.us.31 = sext i32 %add8.i.i.us.31 to i64 - %arrayidx.i.i.us.31 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.31 - store float 0.000000e+00, float* %arrayidx.i.i.us.31, align 4, !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i - -if.then.i.i.us.us.7.1: ; preds = %if.end.i.i.us.us.7 - %add8.i.i.us.us.7.1 = add nsw i32 %reass.mul.i.i.us.7, %conv.i.i.us.us.7.1 - %idxprom.i.i.us.us.7.1 = sext i32 %add8.i.i.us.us.7.1 to i64 - %arrayidx.i.i.us.us.7.1 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.us.7.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.7.1 = shl i64 %add1.i.i.i.us.us.7.1, 32 - %150 = ashr exact i64 %sext.i.i.us.us.7.1, 32 - br label %for.body.i.i.us.us.7.1 - -for.body.i.i.us.us.7.1: ; preds = %for.body.i.i.us.us.7.1, %if.then.i.i.us.us.7.1 - %indvars.iv.next.i.i3.us.us.7.1 = phi i64 [ %indvars.iv.next.i.i.us.us.7.1, %for.body.i.i.us.us.7.1 ], [ 0, %if.then.i.i.us.us.7.1 ] - %151 = phi float [ %157, %for.body.i.i.us.us.7.1 ], [ 0.000000e+00, %if.then.i.i.us.us.7.1 ] - %152 = add nsw i64 %indvars.iv.next.i.i3.us.us.7.1, %140 - %arrayidx24.i.i.us.us.7.1 = getelementptr inbounds float, float* %17, i64 %152 - %153 = load float, float* %arrayidx24.i.i.us.us.7.1, align 4, !tbaa !12 - %154 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.7.1, %30 - %155 = add nsw i64 %154, %150 - %arrayidx28.i.i.us.us.7.1 = getelementptr inbounds float, float* %21, i64 %155 - %156 = load float, float* %arrayidx28.i.i.us.us.7.1, align 4, !tbaa !12 - %157 = tail call float @llvm.fmuladd.f32(float %153, float %156, float %151) #2 - store float %157, float* %arrayidx.i.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.7.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.7.1, 1 - %exitcond.not.i.i.us.us.7.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.7.1, %30 - br i1 %exitcond.not.i.i.us.us.7.1, label %if.end.i.i.us.us.7.1.loopexit, label %for.body.i.i.us.us.7.1, !llvm.loop !19 - -if.end.i.i.us.us.7.1.loopexit: ; preds = %for.body.i.i.us.us.7.1 - br label %if.end.i.i.us.us.7.1 - -if.end.i.i.us.us.7.1: ; preds = %if.end.i.i.us.us.7.1.loopexit, %if.end.i.i.us.us.7 - %158 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %158, 32 - br i1 %exitcond.7.not.1, label %_pocl_kernel_doitgen_kernel1.exit.loopexit, label %pregion_for_entry.entry.i.i.us.us.7, !llvm.loop !23 - -if.then.i.i.us.us.6.1: ; preds = %if.end.i.i.us.us.6 - %add8.i.i.us.us.6.1 = add nsw i32 %reass.mul.i.i.us.6, %conv.i.i.us.us.6.1 - %idxprom.i.i.us.us.6.1 = sext i32 %add8.i.i.us.us.6.1 to i64 - %arrayidx.i.i.us.us.6.1 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.us.6.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.6.1 = shl i64 %add1.i.i.i.us.us.6.1, 32 - %159 = ashr exact i64 %sext.i.i.us.us.6.1, 32 - br label %for.body.i.i.us.us.6.1 - -for.body.i.i.us.us.6.1: ; preds = %for.body.i.i.us.us.6.1, %if.then.i.i.us.us.6.1 - %indvars.iv.next.i.i3.us.us.6.1 = phi i64 [ %indvars.iv.next.i.i.us.us.6.1, %for.body.i.i.us.us.6.1 ], [ 0, %if.then.i.i.us.us.6.1 ] - %160 = phi float [ %166, %for.body.i.i.us.us.6.1 ], [ 0.000000e+00, %if.then.i.i.us.us.6.1 ] - %161 = add nsw i64 %indvars.iv.next.i.i3.us.us.6.1, %129 - %arrayidx24.i.i.us.us.6.1 = getelementptr inbounds float, float* %17, i64 %161 - %162 = load float, float* %arrayidx24.i.i.us.us.6.1, align 4, !tbaa !12 - %163 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.6.1, %30 - %164 = add nsw i64 %163, %159 - %arrayidx28.i.i.us.us.6.1 = getelementptr inbounds float, float* %21, i64 %164 - %165 = load float, float* %arrayidx28.i.i.us.us.6.1, align 4, !tbaa !12 - %166 = tail call float @llvm.fmuladd.f32(float %162, float %165, float %160) #2 - store float %166, float* %arrayidx.i.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.6.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.6.1, 1 - %exitcond.not.i.i.us.us.6.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.6.1, %30 - br i1 %exitcond.not.i.i.us.us.6.1, label %if.end.i.i.us.us.6.1.loopexit, label %for.body.i.i.us.us.6.1, !llvm.loop !19 - -if.end.i.i.us.us.6.1.loopexit: ; preds = %for.body.i.i.us.us.6.1 - br label %if.end.i.i.us.us.6.1 - -if.end.i.i.us.us.6.1: ; preds = %if.end.i.i.us.us.6.1.loopexit, %if.end.i.i.us.us.6 - %167 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %167, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.i.us.6.loopexit, label %pregion_for_entry.entry.i.i.us.us.6, !llvm.loop !23 - -if.then.i.i.us.us.5.1: ; preds = %if.end.i.i.us.us.5 - %add8.i.i.us.us.5.1 = add nsw i32 %reass.mul.i.i.us.5, %conv.i.i.us.us.5.1 - %idxprom.i.i.us.us.5.1 = sext i32 %add8.i.i.us.us.5.1 to i64 - %arrayidx.i.i.us.us.5.1 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.us.5.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.5.1 = shl i64 %add1.i.i.i.us.us.5.1, 32 - %168 = ashr exact i64 %sext.i.i.us.us.5.1, 32 - br label %for.body.i.i.us.us.5.1 - -for.body.i.i.us.us.5.1: ; preds = %for.body.i.i.us.us.5.1, %if.then.i.i.us.us.5.1 - %indvars.iv.next.i.i3.us.us.5.1 = phi i64 [ %indvars.iv.next.i.i.us.us.5.1, %for.body.i.i.us.us.5.1 ], [ 0, %if.then.i.i.us.us.5.1 ] - %169 = phi float [ %175, %for.body.i.i.us.us.5.1 ], [ 0.000000e+00, %if.then.i.i.us.us.5.1 ] - %170 = add nsw i64 %indvars.iv.next.i.i3.us.us.5.1, %118 - %arrayidx24.i.i.us.us.5.1 = getelementptr inbounds float, float* %17, i64 %170 - %171 = load float, float* %arrayidx24.i.i.us.us.5.1, align 4, !tbaa !12 - %172 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.5.1, %30 - %173 = add nsw i64 %172, %168 - %arrayidx28.i.i.us.us.5.1 = getelementptr inbounds float, float* %21, i64 %173 - %174 = load float, float* %arrayidx28.i.i.us.us.5.1, align 4, !tbaa !12 - %175 = tail call float @llvm.fmuladd.f32(float %171, float %174, float %169) #2 - store float %175, float* %arrayidx.i.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.5.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.5.1, 1 - %exitcond.not.i.i.us.us.5.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.5.1, %30 - br i1 %exitcond.not.i.i.us.us.5.1, label %if.end.i.i.us.us.5.1.loopexit, label %for.body.i.i.us.us.5.1, !llvm.loop !19 - -if.end.i.i.us.us.5.1.loopexit: ; preds = %for.body.i.i.us.us.5.1 - br label %if.end.i.i.us.us.5.1 - -if.end.i.i.us.us.5.1: ; preds = %if.end.i.i.us.us.5.1.loopexit, %if.end.i.i.us.us.5 - %176 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %176, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.i.us.5.loopexit, label %pregion_for_entry.entry.i.i.us.us.5, !llvm.loop !23 - -if.then.i.i.us.us.4.1: ; preds = %if.end.i.i.us.us.4 - %add8.i.i.us.us.4.1 = add nsw i32 %reass.mul.i.i.us.4, %conv.i.i.us.us.4.1 - %idxprom.i.i.us.us.4.1 = sext i32 %add8.i.i.us.us.4.1 to i64 - %arrayidx.i.i.us.us.4.1 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.us.4.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.4.1 = shl i64 %add1.i.i.i.us.us.4.1, 32 - %177 = ashr exact i64 %sext.i.i.us.us.4.1, 32 - br label %for.body.i.i.us.us.4.1 - -for.body.i.i.us.us.4.1: ; preds = %for.body.i.i.us.us.4.1, %if.then.i.i.us.us.4.1 - %indvars.iv.next.i.i3.us.us.4.1 = phi i64 [ %indvars.iv.next.i.i.us.us.4.1, %for.body.i.i.us.us.4.1 ], [ 0, %if.then.i.i.us.us.4.1 ] - %178 = phi float [ %184, %for.body.i.i.us.us.4.1 ], [ 0.000000e+00, %if.then.i.i.us.us.4.1 ] - %179 = add nsw i64 %indvars.iv.next.i.i3.us.us.4.1, %107 - %arrayidx24.i.i.us.us.4.1 = getelementptr inbounds float, float* %17, i64 %179 - %180 = load float, float* %arrayidx24.i.i.us.us.4.1, align 4, !tbaa !12 - %181 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.4.1, %30 - %182 = add nsw i64 %181, %177 - %arrayidx28.i.i.us.us.4.1 = getelementptr inbounds float, float* %21, i64 %182 - %183 = load float, float* %arrayidx28.i.i.us.us.4.1, align 4, !tbaa !12 - %184 = tail call float @llvm.fmuladd.f32(float %180, float %183, float %178) #2 - store float %184, float* %arrayidx.i.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.4.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.4.1, 1 - %exitcond.not.i.i.us.us.4.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.4.1, %30 - br i1 %exitcond.not.i.i.us.us.4.1, label %if.end.i.i.us.us.4.1.loopexit, label %for.body.i.i.us.us.4.1, !llvm.loop !19 - -if.end.i.i.us.us.4.1.loopexit: ; preds = %for.body.i.i.us.us.4.1 - br label %if.end.i.i.us.us.4.1 - -if.end.i.i.us.us.4.1: ; preds = %if.end.i.i.us.us.4.1.loopexit, %if.end.i.i.us.us.4 - %185 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %185, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.i.us.4.loopexit, label %pregion_for_entry.entry.i.i.us.us.4, !llvm.loop !23 - -if.then.i.i.us.us.3.1: ; preds = %if.end.i.i.us.us.3 - %add8.i.i.us.us.3.1 = add nsw i32 %reass.mul.i.i.us.3, %conv.i.i.us.us.3.1 - %idxprom.i.i.us.us.3.1 = sext i32 %add8.i.i.us.us.3.1 to i64 - %arrayidx.i.i.us.us.3.1 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.us.3.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.3.1 = shl i64 %add1.i.i.i.us.us.3.1, 32 - %186 = ashr exact i64 %sext.i.i.us.us.3.1, 32 - br label %for.body.i.i.us.us.3.1 - -for.body.i.i.us.us.3.1: ; preds = %for.body.i.i.us.us.3.1, %if.then.i.i.us.us.3.1 - %indvars.iv.next.i.i3.us.us.3.1 = phi i64 [ %indvars.iv.next.i.i.us.us.3.1, %for.body.i.i.us.us.3.1 ], [ 0, %if.then.i.i.us.us.3.1 ] - %187 = phi float [ %193, %for.body.i.i.us.us.3.1 ], [ 0.000000e+00, %if.then.i.i.us.us.3.1 ] - %188 = add nsw i64 %indvars.iv.next.i.i3.us.us.3.1, %96 - %arrayidx24.i.i.us.us.3.1 = getelementptr inbounds float, float* %17, i64 %188 - %189 = load float, float* %arrayidx24.i.i.us.us.3.1, align 4, !tbaa !12 - %190 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.3.1, %30 - %191 = add nsw i64 %190, %186 - %arrayidx28.i.i.us.us.3.1 = getelementptr inbounds float, float* %21, i64 %191 - %192 = load float, float* %arrayidx28.i.i.us.us.3.1, align 4, !tbaa !12 - %193 = tail call float @llvm.fmuladd.f32(float %189, float %192, float %187) #2 - store float %193, float* %arrayidx.i.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.3.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.3.1, 1 - %exitcond.not.i.i.us.us.3.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.3.1, %30 - br i1 %exitcond.not.i.i.us.us.3.1, label %if.end.i.i.us.us.3.1.loopexit, label %for.body.i.i.us.us.3.1, !llvm.loop !19 - -if.end.i.i.us.us.3.1.loopexit: ; preds = %for.body.i.i.us.us.3.1 - br label %if.end.i.i.us.us.3.1 - -if.end.i.i.us.us.3.1: ; preds = %if.end.i.i.us.us.3.1.loopexit, %if.end.i.i.us.us.3 - %194 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %194, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.i.us.3.loopexit, label %pregion_for_entry.entry.i.i.us.us.3, !llvm.loop !23 - -if.then.i.i.us.us.2.1: ; preds = %if.end.i.i.us.us.2 - %add8.i.i.us.us.2.1 = add nsw i32 %reass.mul.i.i.us.2, %conv.i.i.us.us.2.1 - %idxprom.i.i.us.us.2.1 = sext i32 %add8.i.i.us.us.2.1 to i64 - %arrayidx.i.i.us.us.2.1 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.us.2.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.2.1 = shl i64 %add1.i.i.i.us.us.2.1, 32 - %195 = ashr exact i64 %sext.i.i.us.us.2.1, 32 - br label %for.body.i.i.us.us.2.1 - -for.body.i.i.us.us.2.1: ; preds = %for.body.i.i.us.us.2.1, %if.then.i.i.us.us.2.1 - %indvars.iv.next.i.i3.us.us.2.1 = phi i64 [ %indvars.iv.next.i.i.us.us.2.1, %for.body.i.i.us.us.2.1 ], [ 0, %if.then.i.i.us.us.2.1 ] - %196 = phi float [ %202, %for.body.i.i.us.us.2.1 ], [ 0.000000e+00, %if.then.i.i.us.us.2.1 ] - %197 = add nsw i64 %indvars.iv.next.i.i3.us.us.2.1, %85 - %arrayidx24.i.i.us.us.2.1 = getelementptr inbounds float, float* %17, i64 %197 - %198 = load float, float* %arrayidx24.i.i.us.us.2.1, align 4, !tbaa !12 - %199 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.2.1, %30 - %200 = add nsw i64 %199, %195 - %arrayidx28.i.i.us.us.2.1 = getelementptr inbounds float, float* %21, i64 %200 - %201 = load float, float* %arrayidx28.i.i.us.us.2.1, align 4, !tbaa !12 - %202 = tail call float @llvm.fmuladd.f32(float %198, float %201, float %196) #2 - store float %202, float* %arrayidx.i.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.2.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.2.1, 1 - %exitcond.not.i.i.us.us.2.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.2.1, %30 - br i1 %exitcond.not.i.i.us.us.2.1, label %if.end.i.i.us.us.2.1.loopexit, label %for.body.i.i.us.us.2.1, !llvm.loop !19 - -if.end.i.i.us.us.2.1.loopexit: ; preds = %for.body.i.i.us.us.2.1 - br label %if.end.i.i.us.us.2.1 - -if.end.i.i.us.us.2.1: ; preds = %if.end.i.i.us.us.2.1.loopexit, %if.end.i.i.us.us.2 - %203 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %203, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.i.us.2.loopexit, label %pregion_for_entry.entry.i.i.us.us.2, !llvm.loop !23 - -if.then.i.i.us.us.1.1: ; preds = %if.end.i.i.us.us.1 - %add8.i.i.us.us.1.1 = add nsw i32 %reass.mul.i.i.us.1, %conv.i.i.us.us.1.1 - %idxprom.i.i.us.us.1.1 = sext i32 %add8.i.i.us.us.1.1 to i64 - %arrayidx.i.i.us.us.1.1 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.us.1.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.1.1 = shl i64 %add1.i.i.i.us.us.1.1, 32 - %204 = ashr exact i64 %sext.i.i.us.us.1.1, 32 - br label %for.body.i.i.us.us.1.1 - -for.body.i.i.us.us.1.1: ; preds = %for.body.i.i.us.us.1.1, %if.then.i.i.us.us.1.1 - %indvars.iv.next.i.i3.us.us.1.1 = phi i64 [ %indvars.iv.next.i.i.us.us.1.1, %for.body.i.i.us.us.1.1 ], [ 0, %if.then.i.i.us.us.1.1 ] - %205 = phi float [ %211, %for.body.i.i.us.us.1.1 ], [ 0.000000e+00, %if.then.i.i.us.us.1.1 ] - %206 = add nsw i64 %indvars.iv.next.i.i3.us.us.1.1, %64 - %arrayidx24.i.i.us.us.1.1 = getelementptr inbounds float, float* %17, i64 %206 - %207 = load float, float* %arrayidx24.i.i.us.us.1.1, align 4, !tbaa !12 - %208 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.1.1, %30 - %209 = add nsw i64 %208, %204 - %arrayidx28.i.i.us.us.1.1 = getelementptr inbounds float, float* %21, i64 %209 - %210 = load float, float* %arrayidx28.i.i.us.us.1.1, align 4, !tbaa !12 - %211 = tail call float @llvm.fmuladd.f32(float %207, float %210, float %205) #2 - store float %211, float* %arrayidx.i.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.1.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.1.1, 1 - %exitcond.not.i.i.us.us.1.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.1.1, %30 - br i1 %exitcond.not.i.i.us.us.1.1, label %if.end.i.i.us.us.1.1.loopexit, label %for.body.i.i.us.us.1.1, !llvm.loop !19 - -if.end.i.i.us.us.1.1.loopexit: ; preds = %for.body.i.i.us.us.1.1 - br label %if.end.i.i.us.us.1.1 - -if.end.i.i.us.us.1.1: ; preds = %if.end.i.i.us.us.1.1.loopexit, %if.end.i.i.us.us.1 - %212 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %212, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.i.us.1.loopexit, label %pregion_for_entry.entry.i.i.us.us.1, !llvm.loop !23 - -if.then.i.i.us.us.146: ; preds = %if.end.i.i.us.us - %add8.i.i.us.us.142 = add nsw i32 %reass.mul.i.i.us, %conv.i.i.us.us.139 - %idxprom.i.i.us.us.143 = sext i32 %add8.i.i.us.us.142 to i64 - %arrayidx.i.i.us.us.144 = getelementptr inbounds float, float* %25, i64 %idxprom.i.i.us.us.143 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.144, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.145 = shl i64 %add1.i.i.i.us.us.138, 32 - %213 = ashr exact i64 %sext.i.i.us.us.145, 32 - br label %for.body.i.i.us.us.152 - -for.body.i.i.us.us.152: ; preds = %for.body.i.i.us.us.152, %if.then.i.i.us.us.146 - %indvars.iv.next.i.i3.us.us.147 = phi i64 [ %indvars.iv.next.i.i.us.us.150, %for.body.i.i.us.us.152 ], [ 0, %if.then.i.i.us.us.146 ] - %214 = phi float [ %220, %for.body.i.i.us.us.152 ], [ 0.000000e+00, %if.then.i.i.us.us.146 ] - %215 = add nsw i64 %indvars.iv.next.i.i3.us.us.147, %62 - %arrayidx24.i.i.us.us.148 = getelementptr inbounds float, float* %17, i64 %215 - %216 = load float, float* %arrayidx24.i.i.us.us.148, align 4, !tbaa !12 - %217 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.147, %30 - %218 = add nsw i64 %217, %213 - %arrayidx28.i.i.us.us.149 = getelementptr inbounds float, float* %21, i64 %218 - %219 = load float, float* %arrayidx28.i.i.us.us.149, align 4, !tbaa !12 - %220 = tail call float @llvm.fmuladd.f32(float %216, float %219, float %214) #2 - store float %220, float* %arrayidx.i.i.us.us.144, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.150 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.147, 1 - %exitcond.not.i.i.us.us.151 = icmp eq i64 %indvars.iv.next.i.i.us.us.150, %30 - br i1 %exitcond.not.i.i.us.us.151, label %if.end.i.i.us.us.153.loopexit, label %for.body.i.i.us.us.152, !llvm.loop !19 - -if.end.i.i.us.us.153.loopexit: ; preds = %for.body.i.i.us.us.152 - br label %if.end.i.i.us.us.153 - -if.end.i.i.us.us.153: ; preds = %if.end.i.i.us.us.153.loopexit, %if.end.i.i.us.us - %221 = add nuw nsw i64 %_local_id_x.i.0.us.us, 2 - %exitcond.not.1 = icmp eq i64 %221, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.us.loopexit, label %pregion_for_entry.entry.i.i.us.us, !llvm.loop !23 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_doitgen_kernel1_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = getelementptr i8*, i8** %0, i64 1 - %7 = bitcast i8** %6 to i32** - %8 = load i32*, i32** %7, align 8 - %9 = load i32, i32* %8, align 4 - %10 = getelementptr i8*, i8** %0, i64 2 - %11 = bitcast i8** %10 to i32** - %12 = load i32*, i32** %11, align 8 - %13 = load i32, i32* %12, align 4 - %14 = getelementptr i8*, i8** %0, i64 3 - %15 = bitcast i8** %14 to float** - %16 = load float*, float** %15, align 8 - %17 = getelementptr i8*, i8** %0, i64 4 - %18 = bitcast i8** %17 to float** - %19 = load float*, float** %18, align 8 - %20 = getelementptr i8*, i8** %0, i64 5 - %21 = bitcast i8** %20 to float** - %22 = load float*, float** %21, align 8 - %23 = getelementptr i8*, i8** %0, i64 6 - %24 = bitcast i8** %23 to i32** - %25 = load i32*, i32** %24, align 8 - %26 = load i32, i32* %25, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %mul6.i.i = mul i32 %26, %9 - %cmp970.i.i = icmp sgt i32 %13, 0 - %27 = zext i32 %13 to i64 - br i1 %cmp970.i.i, label %pregion_for_entry.pregion_for_init.i.i.us.preheader, label %pregion_for_entry.pregion_for_init.i.i.preheader - -pregion_for_entry.pregion_for_init.i.i.preheader: ; preds = %5 - %conv.i.i.us = trunc i64 %mul.i.i.i to i32 - %cmp.i.i.us = icmp sgt i32 %13, %conv.i.i.us - %28 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.1 = or i32 %28, 1 - %cmp.i.i.us.1 = icmp sgt i32 %13, %conv.i.i.us.1 - %29 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.2 = or i32 %29, 2 - %cmp.i.i.us.2 = icmp sgt i32 %13, %conv.i.i.us.2 - %30 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.3 = or i32 %30, 3 - %cmp.i.i.us.3 = icmp sgt i32 %13, %conv.i.i.us.3 - %31 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.4 = or i32 %31, 4 - %cmp.i.i.us.4 = icmp sgt i32 %13, %conv.i.i.us.4 - %32 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.5 = or i32 %32, 5 - %cmp.i.i.us.5 = icmp sgt i32 %13, %conv.i.i.us.5 - %33 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.6 = or i32 %33, 6 - %cmp.i.i.us.6 = icmp sgt i32 %13, %conv.i.i.us.6 - %34 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.7 = or i32 %34, 7 - %cmp.i.i.us.7 = icmp sgt i32 %13, %conv.i.i.us.7 - %35 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.8 = or i32 %35, 8 - %cmp.i.i.us.8 = icmp sgt i32 %13, %conv.i.i.us.8 - %36 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.9 = or i32 %36, 9 - %cmp.i.i.us.9 = icmp sgt i32 %13, %conv.i.i.us.9 - %37 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.10 = or i32 %37, 10 - %cmp.i.i.us.10 = icmp sgt i32 %13, %conv.i.i.us.10 - %38 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.11 = or i32 %38, 11 - %cmp.i.i.us.11 = icmp sgt i32 %13, %conv.i.i.us.11 - %39 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.12 = or i32 %39, 12 - %cmp.i.i.us.12 = icmp sgt i32 %13, %conv.i.i.us.12 - %40 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.13 = or i32 %40, 13 - %cmp.i.i.us.13 = icmp sgt i32 %13, %conv.i.i.us.13 - %41 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.14 = or i32 %41, 14 - %cmp.i.i.us.14 = icmp sgt i32 %13, %conv.i.i.us.14 - %42 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.15 = or i32 %42, 15 - %cmp.i.i.us.15 = icmp sgt i32 %13, %conv.i.i.us.15 - %43 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.16 = or i32 %43, 16 - %cmp.i.i.us.16 = icmp sgt i32 %13, %conv.i.i.us.16 - %44 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.17 = or i32 %44, 17 - %cmp.i.i.us.17 = icmp sgt i32 %13, %conv.i.i.us.17 - %45 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.18 = or i32 %45, 18 - %cmp.i.i.us.18 = icmp sgt i32 %13, %conv.i.i.us.18 - %46 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.19 = or i32 %46, 19 - %cmp.i.i.us.19 = icmp sgt i32 %13, %conv.i.i.us.19 - %47 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.20 = or i32 %47, 20 - %cmp.i.i.us.20 = icmp sgt i32 %13, %conv.i.i.us.20 - %48 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.21 = or i32 %48, 21 - %cmp.i.i.us.21 = icmp sgt i32 %13, %conv.i.i.us.21 - %49 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.22 = or i32 %49, 22 - %cmp.i.i.us.22 = icmp sgt i32 %13, %conv.i.i.us.22 - %50 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.23 = or i32 %50, 23 - %cmp.i.i.us.23 = icmp sgt i32 %13, %conv.i.i.us.23 - %51 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.24 = or i32 %51, 24 - %cmp.i.i.us.24 = icmp sgt i32 %13, %conv.i.i.us.24 - %52 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.25 = or i32 %52, 25 - %cmp.i.i.us.25 = icmp sgt i32 %13, %conv.i.i.us.25 - %53 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.26 = or i32 %53, 26 - %cmp.i.i.us.26 = icmp sgt i32 %13, %conv.i.i.us.26 - %54 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.27 = or i32 %54, 27 - %cmp.i.i.us.27 = icmp sgt i32 %13, %conv.i.i.us.27 - %55 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.28 = or i32 %55, 28 - %cmp.i.i.us.28 = icmp sgt i32 %13, %conv.i.i.us.28 - %56 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.29 = or i32 %56, 29 - %cmp.i.i.us.29 = icmp sgt i32 %13, %conv.i.i.us.29 - %57 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.30 = or i32 %57, 30 - %cmp.i.i.us.30 = icmp sgt i32 %13, %conv.i.i.us.30 - %58 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.31 = or i32 %58, 31 - %cmp.i.i.us.31 = icmp sgt i32 %13, %conv.i.i.us.31 - br label %pregion_for_entry.pregion_for_init.i.i - -pregion_for_entry.pregion_for_init.i.i.us.preheader: ; preds = %5 - %conv2.i.i.us = trunc i64 %mul3.i.i.i to i32 - %cmp4.i.i.us = icmp sgt i32 %9, %conv2.i.i.us - %reass.add.i.i.us = add i32 %mul6.i.i, %conv2.i.i.us - %reass.mul.i.i.us = mul i32 %reass.add.i.i.us, %13 - %59 = sext i32 %reass.mul.i.i.us to i64 - br i1 %cmp4.i.i.us, label %pregion_for_entry.entry.i.i.us.us.preheader, label %pregion_for_end.i.i.us - -pregion_for_entry.entry.i.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i.us.preheader - br label %pregion_for_entry.entry.i.i.us.us - -pregion_for_end.i.i.us.loopexit: ; preds = %if.end.i.i.us.us.153 - br label %pregion_for_end.i.i.us - -pregion_for_end.i.i.us: ; preds = %pregion_for_end.i.i.us.loopexit, %pregion_for_entry.pregion_for_init.i.i.us.preheader - %60 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.1 = or i32 %60, 1 - %cmp4.i.i.us.1 = icmp sgt i32 %9, %conv2.i.i.us.1 - %reass.add.i.i.us.1 = add i32 %mul6.i.i, %conv2.i.i.us.1 - %reass.mul.i.i.us.1 = mul i32 %reass.add.i.i.us.1, %13 - %61 = sext i32 %reass.mul.i.i.us.1 to i64 - br i1 %cmp4.i.i.us.1, label %pregion_for_entry.entry.i.i.us.us.1.preheader, label %pregion_for_end.i.i.us.1 - -pregion_for_entry.entry.i.i.us.us.1.preheader: ; preds = %pregion_for_end.i.i.us - br label %pregion_for_entry.entry.i.i.us.us.1 - -pregion_for_entry.entry.i.i.us.us: ; preds = %if.end.i.i.us.us.153, %pregion_for_entry.entry.i.i.us.us.preheader - %_local_id_x.i.0.us.us = phi i64 [ %218, %if.end.i.i.us.us.153 ], [ 0, %pregion_for_entry.entry.i.i.us.us.preheader ] - %add1.i.i.i.us.us = add nuw nsw i64 %_local_id_x.i.0.us.us, %mul.i.i.i - %conv.i.i.us.us = trunc i64 %add1.i.i.i.us.us to i32 - %cmp.i.i.us.us = icmp sgt i32 %13, %conv.i.i.us.us - br i1 %cmp.i.i.us.us, label %if.then.i.i.us.us, label %if.end.i.i.us.us - -if.then.i.i.us.us: ; preds = %pregion_for_entry.entry.i.i.us.us - %add8.i.i.us.us = add nsw i32 %reass.mul.i.i.us, %conv.i.i.us.us - %idxprom.i.i.us.us = sext i32 %add8.i.i.us.us to i64 - %arrayidx.i.i.us.us = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.us - store float 0.000000e+00, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us = shl i64 %add1.i.i.i.us.us, 32 - %62 = ashr exact i64 %sext.i.i.us.us, 32 - br label %for.body.i.i.us.us - -if.end.i.i.us.us.loopexit: ; preds = %for.body.i.i.us.us - br label %if.end.i.i.us.us - -if.end.i.i.us.us: ; preds = %if.end.i.i.us.us.loopexit, %pregion_for_entry.entry.i.i.us.us - %63 = or i64 %_local_id_x.i.0.us.us, 1 - %add1.i.i.i.us.us.138 = add nuw nsw i64 %63, %mul.i.i.i - %conv.i.i.us.us.139 = trunc i64 %add1.i.i.i.us.us.138 to i32 - %cmp.i.i.us.us.140 = icmp sgt i32 %13, %conv.i.i.us.us.139 - br i1 %cmp.i.i.us.us.140, label %if.then.i.i.us.us.146, label %if.end.i.i.us.us.153 - -for.body.i.i.us.us: ; preds = %for.body.i.i.us.us, %if.then.i.i.us.us - %indvars.iv.next.i.i3.us.us = phi i64 [ %indvars.iv.next.i.i.us.us, %for.body.i.i.us.us ], [ 0, %if.then.i.i.us.us ] - %64 = phi float [ %70, %for.body.i.i.us.us ], [ 0.000000e+00, %if.then.i.i.us.us ] - %65 = add nsw i64 %indvars.iv.next.i.i3.us.us, %59 - %arrayidx24.i.i.us.us = getelementptr inbounds float, float* %16, i64 %65 - %66 = load float, float* %arrayidx24.i.i.us.us, align 4, !tbaa !12 - %67 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us, %27 - %68 = add nsw i64 %67, %62 - %arrayidx28.i.i.us.us = getelementptr inbounds float, float* %19, i64 %68 - %69 = load float, float* %arrayidx28.i.i.us.us, align 4, !tbaa !12 - %70 = tail call float @llvm.fmuladd.f32(float %66, float %69, float %64) #2 - store float %70, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us = add nuw nsw i64 %indvars.iv.next.i.i3.us.us, 1 - %exitcond.not.i.i.us.us = icmp eq i64 %indvars.iv.next.i.i.us.us, %27 - br i1 %exitcond.not.i.i.us.us, label %if.end.i.i.us.us.loopexit, label %for.body.i.i.us.us, !llvm.loop !19 - -pregion_for_entry.pregion_for_init.i.i: ; preds = %pregion_for_end.i.i, %pregion_for_entry.pregion_for_init.i.i.preheader - %_local_id_y.i.0 = phi i64 [ %71, %pregion_for_end.i.i ], [ 0, %pregion_for_entry.pregion_for_init.i.i.preheader ] - %add6.i.i.i = add nuw nsw i64 %_local_id_y.i.0, %mul3.i.i.i - %conv2.i.i = trunc i64 %add6.i.i.i to i32 - %cmp4.i.i = icmp sgt i32 %9, %conv2.i.i - %reass.add.i.i = add i32 %mul6.i.i, %conv2.i.i - %reass.mul.i.i = mul i32 %reass.add.i.i, %13 - br i1 %cmp4.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i - br i1 %cmp.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us.preheader - %add8.i.i.us = add nsw i32 %reass.mul.i.i, %conv.i.i.us - %idxprom.i.i.us = sext i32 %add8.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us - store float 0.000000e+00, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - br i1 %cmp.i.i.us.1, label %if.then.i.i.us.1, label %if.end.i.i.us.1 - -pregion_for_end.i.i: ; preds = %if.then.i.i.us.31, %if.end.i.i.us.30, %pregion_for_entry.pregion_for_init.i.i - %71 = add nuw nsw i64 %_local_id_y.i.0, 1 - %exitcond33.not = icmp eq i64 %71, 8 - br i1 %exitcond33.not, label %_pocl_kernel_doitgen_kernel1.exit.loopexit54, label %pregion_for_entry.pregion_for_init.i.i, !llvm.loop !21 - -_pocl_kernel_doitgen_kernel1.exit.loopexit: ; preds = %if.end.i.i.us.us.7.1 - br label %_pocl_kernel_doitgen_kernel1.exit - -_pocl_kernel_doitgen_kernel1.exit.loopexit54: ; preds = %pregion_for_end.i.i - br label %_pocl_kernel_doitgen_kernel1.exit - -_pocl_kernel_doitgen_kernel1.exit: ; preds = %pregion_for_end.i.i.us.6, %_pocl_kernel_doitgen_kernel1.exit.loopexit54, %_pocl_kernel_doitgen_kernel1.exit.loopexit - ret void - -pregion_for_entry.entry.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.1, %pregion_for_entry.entry.i.i.us.us.1.preheader - %_local_id_x.i.0.us.us.1 = phi i64 [ %209, %if.end.i.i.us.us.1.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.1.preheader ] - %add1.i.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, %mul.i.i.i - %conv.i.i.us.us.1 = trunc i64 %add1.i.i.i.us.us.1 to i32 - %cmp.i.i.us.us.1 = icmp sgt i32 %13, %conv.i.i.us.us.1 - br i1 %cmp.i.i.us.us.1, label %if.then.i.i.us.us.1, label %if.end.i.i.us.us.1 - -if.then.i.i.us.us.1: ; preds = %pregion_for_entry.entry.i.i.us.us.1 - %add8.i.i.us.us.1 = add nsw i32 %reass.mul.i.i.us.1, %conv.i.i.us.us.1 - %idxprom.i.i.us.us.1 = sext i32 %add8.i.i.us.us.1 to i64 - %arrayidx.i.i.us.us.1 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.us.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.1 = shl i64 %add1.i.i.i.us.us.1, 32 - %72 = ashr exact i64 %sext.i.i.us.us.1, 32 - br label %for.body.i.i.us.us.1 - -for.body.i.i.us.us.1: ; preds = %for.body.i.i.us.us.1, %if.then.i.i.us.us.1 - %indvars.iv.next.i.i3.us.us.1 = phi i64 [ %indvars.iv.next.i.i.us.us.1, %for.body.i.i.us.us.1 ], [ 0, %if.then.i.i.us.us.1 ] - %73 = phi float [ %79, %for.body.i.i.us.us.1 ], [ 0.000000e+00, %if.then.i.i.us.us.1 ] - %74 = add nsw i64 %indvars.iv.next.i.i3.us.us.1, %61 - %arrayidx24.i.i.us.us.1 = getelementptr inbounds float, float* %16, i64 %74 - %75 = load float, float* %arrayidx24.i.i.us.us.1, align 4, !tbaa !12 - %76 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.1, %27 - %77 = add nsw i64 %76, %72 - %arrayidx28.i.i.us.us.1 = getelementptr inbounds float, float* %19, i64 %77 - %78 = load float, float* %arrayidx28.i.i.us.us.1, align 4, !tbaa !12 - %79 = tail call float @llvm.fmuladd.f32(float %75, float %78, float %73) #2 - store float %79, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.1, 1 - %exitcond.not.i.i.us.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.1, %27 - br i1 %exitcond.not.i.i.us.us.1, label %if.end.i.i.us.us.1.loopexit, label %for.body.i.i.us.us.1, !llvm.loop !19 - -if.end.i.i.us.us.1.loopexit: ; preds = %for.body.i.i.us.us.1 - br label %if.end.i.i.us.us.1 - -if.end.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.loopexit, %pregion_for_entry.entry.i.i.us.us.1 - %80 = or i64 %_local_id_x.i.0.us.us.1, 1 - %add1.i.i.i.us.us.1.1 = add nuw nsw i64 %80, %mul.i.i.i - %conv.i.i.us.us.1.1 = trunc i64 %add1.i.i.i.us.us.1.1 to i32 - %cmp.i.i.us.us.1.1 = icmp sgt i32 %13, %conv.i.i.us.us.1.1 - br i1 %cmp.i.i.us.us.1.1, label %if.then.i.i.us.us.1.1, label %if.end.i.i.us.us.1.1 - -pregion_for_end.i.i.us.1.loopexit: ; preds = %if.end.i.i.us.us.1.1 - br label %pregion_for_end.i.i.us.1 - -pregion_for_end.i.i.us.1: ; preds = %pregion_for_end.i.i.us.1.loopexit, %pregion_for_end.i.i.us - %81 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.2 = or i32 %81, 2 - %cmp4.i.i.us.2 = icmp sgt i32 %9, %conv2.i.i.us.2 - %reass.add.i.i.us.2 = add i32 %mul6.i.i, %conv2.i.i.us.2 - %reass.mul.i.i.us.2 = mul i32 %reass.add.i.i.us.2, %13 - %82 = sext i32 %reass.mul.i.i.us.2 to i64 - br i1 %cmp4.i.i.us.2, label %pregion_for_entry.entry.i.i.us.us.2.preheader, label %pregion_for_end.i.i.us.2 - -pregion_for_entry.entry.i.i.us.us.2.preheader: ; preds = %pregion_for_end.i.i.us.1 - br label %pregion_for_entry.entry.i.i.us.us.2 - -pregion_for_entry.entry.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.1, %pregion_for_entry.entry.i.i.us.us.2.preheader - %_local_id_x.i.0.us.us.2 = phi i64 [ %200, %if.end.i.i.us.us.2.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.2.preheader ] - %add1.i.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, %mul.i.i.i - %conv.i.i.us.us.2 = trunc i64 %add1.i.i.i.us.us.2 to i32 - %cmp.i.i.us.us.2 = icmp sgt i32 %13, %conv.i.i.us.us.2 - br i1 %cmp.i.i.us.us.2, label %if.then.i.i.us.us.2, label %if.end.i.i.us.us.2 - -if.then.i.i.us.us.2: ; preds = %pregion_for_entry.entry.i.i.us.us.2 - %add8.i.i.us.us.2 = add nsw i32 %reass.mul.i.i.us.2, %conv.i.i.us.us.2 - %idxprom.i.i.us.us.2 = sext i32 %add8.i.i.us.us.2 to i64 - %arrayidx.i.i.us.us.2 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.us.2 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.2 = shl i64 %add1.i.i.i.us.us.2, 32 - %83 = ashr exact i64 %sext.i.i.us.us.2, 32 - br label %for.body.i.i.us.us.2 - -for.body.i.i.us.us.2: ; preds = %for.body.i.i.us.us.2, %if.then.i.i.us.us.2 - %indvars.iv.next.i.i3.us.us.2 = phi i64 [ %indvars.iv.next.i.i.us.us.2, %for.body.i.i.us.us.2 ], [ 0, %if.then.i.i.us.us.2 ] - %84 = phi float [ %90, %for.body.i.i.us.us.2 ], [ 0.000000e+00, %if.then.i.i.us.us.2 ] - %85 = add nsw i64 %indvars.iv.next.i.i3.us.us.2, %82 - %arrayidx24.i.i.us.us.2 = getelementptr inbounds float, float* %16, i64 %85 - %86 = load float, float* %arrayidx24.i.i.us.us.2, align 4, !tbaa !12 - %87 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.2, %27 - %88 = add nsw i64 %87, %83 - %arrayidx28.i.i.us.us.2 = getelementptr inbounds float, float* %19, i64 %88 - %89 = load float, float* %arrayidx28.i.i.us.us.2, align 4, !tbaa !12 - %90 = tail call float @llvm.fmuladd.f32(float %86, float %89, float %84) #2 - store float %90, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.2 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.2, 1 - %exitcond.not.i.i.us.us.2 = icmp eq i64 %indvars.iv.next.i.i.us.us.2, %27 - br i1 %exitcond.not.i.i.us.us.2, label %if.end.i.i.us.us.2.loopexit, label %for.body.i.i.us.us.2, !llvm.loop !19 - -if.end.i.i.us.us.2.loopexit: ; preds = %for.body.i.i.us.us.2 - br label %if.end.i.i.us.us.2 - -if.end.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.loopexit, %pregion_for_entry.entry.i.i.us.us.2 - %91 = or i64 %_local_id_x.i.0.us.us.2, 1 - %add1.i.i.i.us.us.2.1 = add nuw nsw i64 %91, %mul.i.i.i - %conv.i.i.us.us.2.1 = trunc i64 %add1.i.i.i.us.us.2.1 to i32 - %cmp.i.i.us.us.2.1 = icmp sgt i32 %13, %conv.i.i.us.us.2.1 - br i1 %cmp.i.i.us.us.2.1, label %if.then.i.i.us.us.2.1, label %if.end.i.i.us.us.2.1 - -pregion_for_end.i.i.us.2.loopexit: ; preds = %if.end.i.i.us.us.2.1 - br label %pregion_for_end.i.i.us.2 - -pregion_for_end.i.i.us.2: ; preds = %pregion_for_end.i.i.us.2.loopexit, %pregion_for_end.i.i.us.1 - %92 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.3 = or i32 %92, 3 - %cmp4.i.i.us.3 = icmp sgt i32 %9, %conv2.i.i.us.3 - %reass.add.i.i.us.3 = add i32 %mul6.i.i, %conv2.i.i.us.3 - %reass.mul.i.i.us.3 = mul i32 %reass.add.i.i.us.3, %13 - %93 = sext i32 %reass.mul.i.i.us.3 to i64 - br i1 %cmp4.i.i.us.3, label %pregion_for_entry.entry.i.i.us.us.3.preheader, label %pregion_for_end.i.i.us.3 - -pregion_for_entry.entry.i.i.us.us.3.preheader: ; preds = %pregion_for_end.i.i.us.2 - br label %pregion_for_entry.entry.i.i.us.us.3 - -pregion_for_entry.entry.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.1, %pregion_for_entry.entry.i.i.us.us.3.preheader - %_local_id_x.i.0.us.us.3 = phi i64 [ %191, %if.end.i.i.us.us.3.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.3.preheader ] - %add1.i.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, %mul.i.i.i - %conv.i.i.us.us.3 = trunc i64 %add1.i.i.i.us.us.3 to i32 - %cmp.i.i.us.us.3 = icmp sgt i32 %13, %conv.i.i.us.us.3 - br i1 %cmp.i.i.us.us.3, label %if.then.i.i.us.us.3, label %if.end.i.i.us.us.3 - -if.then.i.i.us.us.3: ; preds = %pregion_for_entry.entry.i.i.us.us.3 - %add8.i.i.us.us.3 = add nsw i32 %reass.mul.i.i.us.3, %conv.i.i.us.us.3 - %idxprom.i.i.us.us.3 = sext i32 %add8.i.i.us.us.3 to i64 - %arrayidx.i.i.us.us.3 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.us.3 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.3 = shl i64 %add1.i.i.i.us.us.3, 32 - %94 = ashr exact i64 %sext.i.i.us.us.3, 32 - br label %for.body.i.i.us.us.3 - -for.body.i.i.us.us.3: ; preds = %for.body.i.i.us.us.3, %if.then.i.i.us.us.3 - %indvars.iv.next.i.i3.us.us.3 = phi i64 [ %indvars.iv.next.i.i.us.us.3, %for.body.i.i.us.us.3 ], [ 0, %if.then.i.i.us.us.3 ] - %95 = phi float [ %101, %for.body.i.i.us.us.3 ], [ 0.000000e+00, %if.then.i.i.us.us.3 ] - %96 = add nsw i64 %indvars.iv.next.i.i3.us.us.3, %93 - %arrayidx24.i.i.us.us.3 = getelementptr inbounds float, float* %16, i64 %96 - %97 = load float, float* %arrayidx24.i.i.us.us.3, align 4, !tbaa !12 - %98 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.3, %27 - %99 = add nsw i64 %98, %94 - %arrayidx28.i.i.us.us.3 = getelementptr inbounds float, float* %19, i64 %99 - %100 = load float, float* %arrayidx28.i.i.us.us.3, align 4, !tbaa !12 - %101 = tail call float @llvm.fmuladd.f32(float %97, float %100, float %95) #2 - store float %101, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.3 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.3, 1 - %exitcond.not.i.i.us.us.3 = icmp eq i64 %indvars.iv.next.i.i.us.us.3, %27 - br i1 %exitcond.not.i.i.us.us.3, label %if.end.i.i.us.us.3.loopexit, label %for.body.i.i.us.us.3, !llvm.loop !19 - -if.end.i.i.us.us.3.loopexit: ; preds = %for.body.i.i.us.us.3 - br label %if.end.i.i.us.us.3 - -if.end.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.loopexit, %pregion_for_entry.entry.i.i.us.us.3 - %102 = or i64 %_local_id_x.i.0.us.us.3, 1 - %add1.i.i.i.us.us.3.1 = add nuw nsw i64 %102, %mul.i.i.i - %conv.i.i.us.us.3.1 = trunc i64 %add1.i.i.i.us.us.3.1 to i32 - %cmp.i.i.us.us.3.1 = icmp sgt i32 %13, %conv.i.i.us.us.3.1 - br i1 %cmp.i.i.us.us.3.1, label %if.then.i.i.us.us.3.1, label %if.end.i.i.us.us.3.1 - -pregion_for_end.i.i.us.3.loopexit: ; preds = %if.end.i.i.us.us.3.1 - br label %pregion_for_end.i.i.us.3 - -pregion_for_end.i.i.us.3: ; preds = %pregion_for_end.i.i.us.3.loopexit, %pregion_for_end.i.i.us.2 - %103 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.4 = or i32 %103, 4 - %cmp4.i.i.us.4 = icmp sgt i32 %9, %conv2.i.i.us.4 - %reass.add.i.i.us.4 = add i32 %mul6.i.i, %conv2.i.i.us.4 - %reass.mul.i.i.us.4 = mul i32 %reass.add.i.i.us.4, %13 - %104 = sext i32 %reass.mul.i.i.us.4 to i64 - br i1 %cmp4.i.i.us.4, label %pregion_for_entry.entry.i.i.us.us.4.preheader, label %pregion_for_end.i.i.us.4 - -pregion_for_entry.entry.i.i.us.us.4.preheader: ; preds = %pregion_for_end.i.i.us.3 - br label %pregion_for_entry.entry.i.i.us.us.4 - -pregion_for_entry.entry.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.1, %pregion_for_entry.entry.i.i.us.us.4.preheader - %_local_id_x.i.0.us.us.4 = phi i64 [ %182, %if.end.i.i.us.us.4.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.4.preheader ] - %add1.i.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, %mul.i.i.i - %conv.i.i.us.us.4 = trunc i64 %add1.i.i.i.us.us.4 to i32 - %cmp.i.i.us.us.4 = icmp sgt i32 %13, %conv.i.i.us.us.4 - br i1 %cmp.i.i.us.us.4, label %if.then.i.i.us.us.4, label %if.end.i.i.us.us.4 - -if.then.i.i.us.us.4: ; preds = %pregion_for_entry.entry.i.i.us.us.4 - %add8.i.i.us.us.4 = add nsw i32 %reass.mul.i.i.us.4, %conv.i.i.us.us.4 - %idxprom.i.i.us.us.4 = sext i32 %add8.i.i.us.us.4 to i64 - %arrayidx.i.i.us.us.4 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.us.4 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.4 = shl i64 %add1.i.i.i.us.us.4, 32 - %105 = ashr exact i64 %sext.i.i.us.us.4, 32 - br label %for.body.i.i.us.us.4 - -for.body.i.i.us.us.4: ; preds = %for.body.i.i.us.us.4, %if.then.i.i.us.us.4 - %indvars.iv.next.i.i3.us.us.4 = phi i64 [ %indvars.iv.next.i.i.us.us.4, %for.body.i.i.us.us.4 ], [ 0, %if.then.i.i.us.us.4 ] - %106 = phi float [ %112, %for.body.i.i.us.us.4 ], [ 0.000000e+00, %if.then.i.i.us.us.4 ] - %107 = add nsw i64 %indvars.iv.next.i.i3.us.us.4, %104 - %arrayidx24.i.i.us.us.4 = getelementptr inbounds float, float* %16, i64 %107 - %108 = load float, float* %arrayidx24.i.i.us.us.4, align 4, !tbaa !12 - %109 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.4, %27 - %110 = add nsw i64 %109, %105 - %arrayidx28.i.i.us.us.4 = getelementptr inbounds float, float* %19, i64 %110 - %111 = load float, float* %arrayidx28.i.i.us.us.4, align 4, !tbaa !12 - %112 = tail call float @llvm.fmuladd.f32(float %108, float %111, float %106) #2 - store float %112, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.4 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.4, 1 - %exitcond.not.i.i.us.us.4 = icmp eq i64 %indvars.iv.next.i.i.us.us.4, %27 - br i1 %exitcond.not.i.i.us.us.4, label %if.end.i.i.us.us.4.loopexit, label %for.body.i.i.us.us.4, !llvm.loop !19 - -if.end.i.i.us.us.4.loopexit: ; preds = %for.body.i.i.us.us.4 - br label %if.end.i.i.us.us.4 - -if.end.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.loopexit, %pregion_for_entry.entry.i.i.us.us.4 - %113 = or i64 %_local_id_x.i.0.us.us.4, 1 - %add1.i.i.i.us.us.4.1 = add nuw nsw i64 %113, %mul.i.i.i - %conv.i.i.us.us.4.1 = trunc i64 %add1.i.i.i.us.us.4.1 to i32 - %cmp.i.i.us.us.4.1 = icmp sgt i32 %13, %conv.i.i.us.us.4.1 - br i1 %cmp.i.i.us.us.4.1, label %if.then.i.i.us.us.4.1, label %if.end.i.i.us.us.4.1 - -pregion_for_end.i.i.us.4.loopexit: ; preds = %if.end.i.i.us.us.4.1 - br label %pregion_for_end.i.i.us.4 - -pregion_for_end.i.i.us.4: ; preds = %pregion_for_end.i.i.us.4.loopexit, %pregion_for_end.i.i.us.3 - %114 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.5 = or i32 %114, 5 - %cmp4.i.i.us.5 = icmp sgt i32 %9, %conv2.i.i.us.5 - %reass.add.i.i.us.5 = add i32 %mul6.i.i, %conv2.i.i.us.5 - %reass.mul.i.i.us.5 = mul i32 %reass.add.i.i.us.5, %13 - %115 = sext i32 %reass.mul.i.i.us.5 to i64 - br i1 %cmp4.i.i.us.5, label %pregion_for_entry.entry.i.i.us.us.5.preheader, label %pregion_for_end.i.i.us.5 - -pregion_for_entry.entry.i.i.us.us.5.preheader: ; preds = %pregion_for_end.i.i.us.4 - br label %pregion_for_entry.entry.i.i.us.us.5 - -pregion_for_entry.entry.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.1, %pregion_for_entry.entry.i.i.us.us.5.preheader - %_local_id_x.i.0.us.us.5 = phi i64 [ %173, %if.end.i.i.us.us.5.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.5.preheader ] - %add1.i.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, %mul.i.i.i - %conv.i.i.us.us.5 = trunc i64 %add1.i.i.i.us.us.5 to i32 - %cmp.i.i.us.us.5 = icmp sgt i32 %13, %conv.i.i.us.us.5 - br i1 %cmp.i.i.us.us.5, label %if.then.i.i.us.us.5, label %if.end.i.i.us.us.5 - -if.then.i.i.us.us.5: ; preds = %pregion_for_entry.entry.i.i.us.us.5 - %add8.i.i.us.us.5 = add nsw i32 %reass.mul.i.i.us.5, %conv.i.i.us.us.5 - %idxprom.i.i.us.us.5 = sext i32 %add8.i.i.us.us.5 to i64 - %arrayidx.i.i.us.us.5 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.us.5 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.5 = shl i64 %add1.i.i.i.us.us.5, 32 - %116 = ashr exact i64 %sext.i.i.us.us.5, 32 - br label %for.body.i.i.us.us.5 - -for.body.i.i.us.us.5: ; preds = %for.body.i.i.us.us.5, %if.then.i.i.us.us.5 - %indvars.iv.next.i.i3.us.us.5 = phi i64 [ %indvars.iv.next.i.i.us.us.5, %for.body.i.i.us.us.5 ], [ 0, %if.then.i.i.us.us.5 ] - %117 = phi float [ %123, %for.body.i.i.us.us.5 ], [ 0.000000e+00, %if.then.i.i.us.us.5 ] - %118 = add nsw i64 %indvars.iv.next.i.i3.us.us.5, %115 - %arrayidx24.i.i.us.us.5 = getelementptr inbounds float, float* %16, i64 %118 - %119 = load float, float* %arrayidx24.i.i.us.us.5, align 4, !tbaa !12 - %120 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.5, %27 - %121 = add nsw i64 %120, %116 - %arrayidx28.i.i.us.us.5 = getelementptr inbounds float, float* %19, i64 %121 - %122 = load float, float* %arrayidx28.i.i.us.us.5, align 4, !tbaa !12 - %123 = tail call float @llvm.fmuladd.f32(float %119, float %122, float %117) #2 - store float %123, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.5 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.5, 1 - %exitcond.not.i.i.us.us.5 = icmp eq i64 %indvars.iv.next.i.i.us.us.5, %27 - br i1 %exitcond.not.i.i.us.us.5, label %if.end.i.i.us.us.5.loopexit, label %for.body.i.i.us.us.5, !llvm.loop !19 - -if.end.i.i.us.us.5.loopexit: ; preds = %for.body.i.i.us.us.5 - br label %if.end.i.i.us.us.5 - -if.end.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.loopexit, %pregion_for_entry.entry.i.i.us.us.5 - %124 = or i64 %_local_id_x.i.0.us.us.5, 1 - %add1.i.i.i.us.us.5.1 = add nuw nsw i64 %124, %mul.i.i.i - %conv.i.i.us.us.5.1 = trunc i64 %add1.i.i.i.us.us.5.1 to i32 - %cmp.i.i.us.us.5.1 = icmp sgt i32 %13, %conv.i.i.us.us.5.1 - br i1 %cmp.i.i.us.us.5.1, label %if.then.i.i.us.us.5.1, label %if.end.i.i.us.us.5.1 - -pregion_for_end.i.i.us.5.loopexit: ; preds = %if.end.i.i.us.us.5.1 - br label %pregion_for_end.i.i.us.5 - -pregion_for_end.i.i.us.5: ; preds = %pregion_for_end.i.i.us.5.loopexit, %pregion_for_end.i.i.us.4 - %125 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.6 = or i32 %125, 6 - %cmp4.i.i.us.6 = icmp sgt i32 %9, %conv2.i.i.us.6 - %reass.add.i.i.us.6 = add i32 %mul6.i.i, %conv2.i.i.us.6 - %reass.mul.i.i.us.6 = mul i32 %reass.add.i.i.us.6, %13 - %126 = sext i32 %reass.mul.i.i.us.6 to i64 - br i1 %cmp4.i.i.us.6, label %pregion_for_entry.entry.i.i.us.us.6.preheader, label %pregion_for_end.i.i.us.6 - -pregion_for_entry.entry.i.i.us.us.6.preheader: ; preds = %pregion_for_end.i.i.us.5 - br label %pregion_for_entry.entry.i.i.us.us.6 - -pregion_for_entry.entry.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.1, %pregion_for_entry.entry.i.i.us.us.6.preheader - %_local_id_x.i.0.us.us.6 = phi i64 [ %164, %if.end.i.i.us.us.6.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.6.preheader ] - %add1.i.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, %mul.i.i.i - %conv.i.i.us.us.6 = trunc i64 %add1.i.i.i.us.us.6 to i32 - %cmp.i.i.us.us.6 = icmp sgt i32 %13, %conv.i.i.us.us.6 - br i1 %cmp.i.i.us.us.6, label %if.then.i.i.us.us.6, label %if.end.i.i.us.us.6 - -if.then.i.i.us.us.6: ; preds = %pregion_for_entry.entry.i.i.us.us.6 - %add8.i.i.us.us.6 = add nsw i32 %reass.mul.i.i.us.6, %conv.i.i.us.us.6 - %idxprom.i.i.us.us.6 = sext i32 %add8.i.i.us.us.6 to i64 - %arrayidx.i.i.us.us.6 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.us.6 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.6 = shl i64 %add1.i.i.i.us.us.6, 32 - %127 = ashr exact i64 %sext.i.i.us.us.6, 32 - br label %for.body.i.i.us.us.6 - -for.body.i.i.us.us.6: ; preds = %for.body.i.i.us.us.6, %if.then.i.i.us.us.6 - %indvars.iv.next.i.i3.us.us.6 = phi i64 [ %indvars.iv.next.i.i.us.us.6, %for.body.i.i.us.us.6 ], [ 0, %if.then.i.i.us.us.6 ] - %128 = phi float [ %134, %for.body.i.i.us.us.6 ], [ 0.000000e+00, %if.then.i.i.us.us.6 ] - %129 = add nsw i64 %indvars.iv.next.i.i3.us.us.6, %126 - %arrayidx24.i.i.us.us.6 = getelementptr inbounds float, float* %16, i64 %129 - %130 = load float, float* %arrayidx24.i.i.us.us.6, align 4, !tbaa !12 - %131 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.6, %27 - %132 = add nsw i64 %131, %127 - %arrayidx28.i.i.us.us.6 = getelementptr inbounds float, float* %19, i64 %132 - %133 = load float, float* %arrayidx28.i.i.us.us.6, align 4, !tbaa !12 - %134 = tail call float @llvm.fmuladd.f32(float %130, float %133, float %128) #2 - store float %134, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.6 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.6, 1 - %exitcond.not.i.i.us.us.6 = icmp eq i64 %indvars.iv.next.i.i.us.us.6, %27 - br i1 %exitcond.not.i.i.us.us.6, label %if.end.i.i.us.us.6.loopexit, label %for.body.i.i.us.us.6, !llvm.loop !19 - -if.end.i.i.us.us.6.loopexit: ; preds = %for.body.i.i.us.us.6 - br label %if.end.i.i.us.us.6 - -if.end.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.loopexit, %pregion_for_entry.entry.i.i.us.us.6 - %135 = or i64 %_local_id_x.i.0.us.us.6, 1 - %add1.i.i.i.us.us.6.1 = add nuw nsw i64 %135, %mul.i.i.i - %conv.i.i.us.us.6.1 = trunc i64 %add1.i.i.i.us.us.6.1 to i32 - %cmp.i.i.us.us.6.1 = icmp sgt i32 %13, %conv.i.i.us.us.6.1 - br i1 %cmp.i.i.us.us.6.1, label %if.then.i.i.us.us.6.1, label %if.end.i.i.us.us.6.1 - -pregion_for_end.i.i.us.6.loopexit: ; preds = %if.end.i.i.us.us.6.1 - br label %pregion_for_end.i.i.us.6 - -pregion_for_end.i.i.us.6: ; preds = %pregion_for_end.i.i.us.6.loopexit, %pregion_for_end.i.i.us.5 - %136 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.7 = or i32 %136, 7 - %cmp4.i.i.us.7 = icmp sgt i32 %9, %conv2.i.i.us.7 - %reass.add.i.i.us.7 = add i32 %mul6.i.i, %conv2.i.i.us.7 - %reass.mul.i.i.us.7 = mul i32 %reass.add.i.i.us.7, %13 - %137 = sext i32 %reass.mul.i.i.us.7 to i64 - br i1 %cmp4.i.i.us.7, label %pregion_for_entry.entry.i.i.us.us.7.preheader, label %_pocl_kernel_doitgen_kernel1.exit - -pregion_for_entry.entry.i.i.us.us.7.preheader: ; preds = %pregion_for_end.i.i.us.6 - br label %pregion_for_entry.entry.i.i.us.us.7 - -pregion_for_entry.entry.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.1, %pregion_for_entry.entry.i.i.us.us.7.preheader - %_local_id_x.i.0.us.us.7 = phi i64 [ %155, %if.end.i.i.us.us.7.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.7.preheader ] - %add1.i.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, %mul.i.i.i - %conv.i.i.us.us.7 = trunc i64 %add1.i.i.i.us.us.7 to i32 - %cmp.i.i.us.us.7 = icmp sgt i32 %13, %conv.i.i.us.us.7 - br i1 %cmp.i.i.us.us.7, label %if.then.i.i.us.us.7, label %if.end.i.i.us.us.7 - -if.then.i.i.us.us.7: ; preds = %pregion_for_entry.entry.i.i.us.us.7 - %add8.i.i.us.us.7 = add nsw i32 %reass.mul.i.i.us.7, %conv.i.i.us.us.7 - %idxprom.i.i.us.us.7 = sext i32 %add8.i.i.us.us.7 to i64 - %arrayidx.i.i.us.us.7 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.us.7 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.7 = shl i64 %add1.i.i.i.us.us.7, 32 - %138 = ashr exact i64 %sext.i.i.us.us.7, 32 - br label %for.body.i.i.us.us.7 - -for.body.i.i.us.us.7: ; preds = %for.body.i.i.us.us.7, %if.then.i.i.us.us.7 - %indvars.iv.next.i.i3.us.us.7 = phi i64 [ %indvars.iv.next.i.i.us.us.7, %for.body.i.i.us.us.7 ], [ 0, %if.then.i.i.us.us.7 ] - %139 = phi float [ %145, %for.body.i.i.us.us.7 ], [ 0.000000e+00, %if.then.i.i.us.us.7 ] - %140 = add nsw i64 %indvars.iv.next.i.i3.us.us.7, %137 - %arrayidx24.i.i.us.us.7 = getelementptr inbounds float, float* %16, i64 %140 - %141 = load float, float* %arrayidx24.i.i.us.us.7, align 4, !tbaa !12 - %142 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.7, %27 - %143 = add nsw i64 %142, %138 - %arrayidx28.i.i.us.us.7 = getelementptr inbounds float, float* %19, i64 %143 - %144 = load float, float* %arrayidx28.i.i.us.us.7, align 4, !tbaa !12 - %145 = tail call float @llvm.fmuladd.f32(float %141, float %144, float %139) #2 - store float %145, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.7 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.7, 1 - %exitcond.not.i.i.us.us.7 = icmp eq i64 %indvars.iv.next.i.i.us.us.7, %27 - br i1 %exitcond.not.i.i.us.us.7, label %if.end.i.i.us.us.7.loopexit, label %for.body.i.i.us.us.7, !llvm.loop !19 - -if.end.i.i.us.us.7.loopexit: ; preds = %for.body.i.i.us.us.7 - br label %if.end.i.i.us.us.7 - -if.end.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.loopexit, %pregion_for_entry.entry.i.i.us.us.7 - %146 = or i64 %_local_id_x.i.0.us.us.7, 1 - %add1.i.i.i.us.us.7.1 = add nuw nsw i64 %146, %mul.i.i.i - %conv.i.i.us.us.7.1 = trunc i64 %add1.i.i.i.us.us.7.1 to i32 - %cmp.i.i.us.us.7.1 = icmp sgt i32 %13, %conv.i.i.us.us.7.1 - br i1 %cmp.i.i.us.us.7.1, label %if.then.i.i.us.us.7.1, label %if.end.i.i.us.us.7.1 - -if.then.i.i.us.1: ; preds = %if.end.i.i.us - %add8.i.i.us.1 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.1 - %idxprom.i.i.us.1 = sext i32 %add8.i.i.us.1 to i64 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1 - -if.end.i.i.us.1: ; preds = %if.then.i.i.us.1, %if.end.i.i.us - br i1 %cmp.i.i.us.2, label %if.then.i.i.us.2, label %if.end.i.i.us.2 - -if.then.i.i.us.2: ; preds = %if.end.i.i.us.1 - %add8.i.i.us.2 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.2 - %idxprom.i.i.us.2 = sext i32 %add8.i.i.us.2 to i64 - %arrayidx.i.i.us.2 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.2 - store float 0.000000e+00, float* %arrayidx.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2 - -if.end.i.i.us.2: ; preds = %if.then.i.i.us.2, %if.end.i.i.us.1 - br i1 %cmp.i.i.us.3, label %if.then.i.i.us.3, label %if.end.i.i.us.3 - -if.then.i.i.us.3: ; preds = %if.end.i.i.us.2 - %add8.i.i.us.3 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.3 - %idxprom.i.i.us.3 = sext i32 %add8.i.i.us.3 to i64 - %arrayidx.i.i.us.3 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.3 - store float 0.000000e+00, float* %arrayidx.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3 - -if.end.i.i.us.3: ; preds = %if.then.i.i.us.3, %if.end.i.i.us.2 - br i1 %cmp.i.i.us.4, label %if.then.i.i.us.4, label %if.end.i.i.us.4 - -if.then.i.i.us.4: ; preds = %if.end.i.i.us.3 - %add8.i.i.us.4 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.4 - %idxprom.i.i.us.4 = sext i32 %add8.i.i.us.4 to i64 - %arrayidx.i.i.us.4 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.4 - store float 0.000000e+00, float* %arrayidx.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4 - -if.end.i.i.us.4: ; preds = %if.then.i.i.us.4, %if.end.i.i.us.3 - br i1 %cmp.i.i.us.5, label %if.then.i.i.us.5, label %if.end.i.i.us.5 - -if.then.i.i.us.5: ; preds = %if.end.i.i.us.4 - %add8.i.i.us.5 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.5 - %idxprom.i.i.us.5 = sext i32 %add8.i.i.us.5 to i64 - %arrayidx.i.i.us.5 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.5 - store float 0.000000e+00, float* %arrayidx.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5 - -if.end.i.i.us.5: ; preds = %if.then.i.i.us.5, %if.end.i.i.us.4 - br i1 %cmp.i.i.us.6, label %if.then.i.i.us.6, label %if.end.i.i.us.6 - -if.then.i.i.us.6: ; preds = %if.end.i.i.us.5 - %add8.i.i.us.6 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.6 - %idxprom.i.i.us.6 = sext i32 %add8.i.i.us.6 to i64 - %arrayidx.i.i.us.6 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.6 - store float 0.000000e+00, float* %arrayidx.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6 - -if.end.i.i.us.6: ; preds = %if.then.i.i.us.6, %if.end.i.i.us.5 - br i1 %cmp.i.i.us.7, label %if.then.i.i.us.7, label %if.end.i.i.us.7 - -if.then.i.i.us.7: ; preds = %if.end.i.i.us.6 - %add8.i.i.us.7 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.7 - %idxprom.i.i.us.7 = sext i32 %add8.i.i.us.7 to i64 - %arrayidx.i.i.us.7 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.7 - store float 0.000000e+00, float* %arrayidx.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7 - -if.end.i.i.us.7: ; preds = %if.then.i.i.us.7, %if.end.i.i.us.6 - br i1 %cmp.i.i.us.8, label %if.then.i.i.us.8, label %if.end.i.i.us.8 - -if.then.i.i.us.8: ; preds = %if.end.i.i.us.7 - %add8.i.i.us.8 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.8 - %idxprom.i.i.us.8 = sext i32 %add8.i.i.us.8 to i64 - %arrayidx.i.i.us.8 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.8 - store float 0.000000e+00, float* %arrayidx.i.i.us.8, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.8 - -if.end.i.i.us.8: ; preds = %if.then.i.i.us.8, %if.end.i.i.us.7 - br i1 %cmp.i.i.us.9, label %if.then.i.i.us.9, label %if.end.i.i.us.9 - -if.then.i.i.us.9: ; preds = %if.end.i.i.us.8 - %add8.i.i.us.9 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.9 - %idxprom.i.i.us.9 = sext i32 %add8.i.i.us.9 to i64 - %arrayidx.i.i.us.9 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.9 - store float 0.000000e+00, float* %arrayidx.i.i.us.9, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.9 - -if.end.i.i.us.9: ; preds = %if.then.i.i.us.9, %if.end.i.i.us.8 - br i1 %cmp.i.i.us.10, label %if.then.i.i.us.10, label %if.end.i.i.us.10 - -if.then.i.i.us.10: ; preds = %if.end.i.i.us.9 - %add8.i.i.us.10 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.10 - %idxprom.i.i.us.10 = sext i32 %add8.i.i.us.10 to i64 - %arrayidx.i.i.us.10 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.10 - store float 0.000000e+00, float* %arrayidx.i.i.us.10, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.10 - -if.end.i.i.us.10: ; preds = %if.then.i.i.us.10, %if.end.i.i.us.9 - br i1 %cmp.i.i.us.11, label %if.then.i.i.us.11, label %if.end.i.i.us.11 - -if.then.i.i.us.11: ; preds = %if.end.i.i.us.10 - %add8.i.i.us.11 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.11 - %idxprom.i.i.us.11 = sext i32 %add8.i.i.us.11 to i64 - %arrayidx.i.i.us.11 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.11 - store float 0.000000e+00, float* %arrayidx.i.i.us.11, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.11 - -if.end.i.i.us.11: ; preds = %if.then.i.i.us.11, %if.end.i.i.us.10 - br i1 %cmp.i.i.us.12, label %if.then.i.i.us.12, label %if.end.i.i.us.12 - -if.then.i.i.us.12: ; preds = %if.end.i.i.us.11 - %add8.i.i.us.12 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.12 - %idxprom.i.i.us.12 = sext i32 %add8.i.i.us.12 to i64 - %arrayidx.i.i.us.12 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.12 - store float 0.000000e+00, float* %arrayidx.i.i.us.12, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.12 - -if.end.i.i.us.12: ; preds = %if.then.i.i.us.12, %if.end.i.i.us.11 - br i1 %cmp.i.i.us.13, label %if.then.i.i.us.13, label %if.end.i.i.us.13 - -if.then.i.i.us.13: ; preds = %if.end.i.i.us.12 - %add8.i.i.us.13 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.13 - %idxprom.i.i.us.13 = sext i32 %add8.i.i.us.13 to i64 - %arrayidx.i.i.us.13 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.13 - store float 0.000000e+00, float* %arrayidx.i.i.us.13, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.13 - -if.end.i.i.us.13: ; preds = %if.then.i.i.us.13, %if.end.i.i.us.12 - br i1 %cmp.i.i.us.14, label %if.then.i.i.us.14, label %if.end.i.i.us.14 - -if.then.i.i.us.14: ; preds = %if.end.i.i.us.13 - %add8.i.i.us.14 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.14 - %idxprom.i.i.us.14 = sext i32 %add8.i.i.us.14 to i64 - %arrayidx.i.i.us.14 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.14 - store float 0.000000e+00, float* %arrayidx.i.i.us.14, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.14 - -if.end.i.i.us.14: ; preds = %if.then.i.i.us.14, %if.end.i.i.us.13 - br i1 %cmp.i.i.us.15, label %if.then.i.i.us.15, label %if.end.i.i.us.15 - -if.then.i.i.us.15: ; preds = %if.end.i.i.us.14 - %add8.i.i.us.15 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.15 - %idxprom.i.i.us.15 = sext i32 %add8.i.i.us.15 to i64 - %arrayidx.i.i.us.15 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.15 - store float 0.000000e+00, float* %arrayidx.i.i.us.15, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.15 - -if.end.i.i.us.15: ; preds = %if.then.i.i.us.15, %if.end.i.i.us.14 - br i1 %cmp.i.i.us.16, label %if.then.i.i.us.16, label %if.end.i.i.us.16 - -if.then.i.i.us.16: ; preds = %if.end.i.i.us.15 - %add8.i.i.us.16 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.16 - %idxprom.i.i.us.16 = sext i32 %add8.i.i.us.16 to i64 - %arrayidx.i.i.us.16 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.16 - store float 0.000000e+00, float* %arrayidx.i.i.us.16, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.16 - -if.end.i.i.us.16: ; preds = %if.then.i.i.us.16, %if.end.i.i.us.15 - br i1 %cmp.i.i.us.17, label %if.then.i.i.us.17, label %if.end.i.i.us.17 - -if.then.i.i.us.17: ; preds = %if.end.i.i.us.16 - %add8.i.i.us.17 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.17 - %idxprom.i.i.us.17 = sext i32 %add8.i.i.us.17 to i64 - %arrayidx.i.i.us.17 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.17 - store float 0.000000e+00, float* %arrayidx.i.i.us.17, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.17 - -if.end.i.i.us.17: ; preds = %if.then.i.i.us.17, %if.end.i.i.us.16 - br i1 %cmp.i.i.us.18, label %if.then.i.i.us.18, label %if.end.i.i.us.18 - -if.then.i.i.us.18: ; preds = %if.end.i.i.us.17 - %add8.i.i.us.18 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.18 - %idxprom.i.i.us.18 = sext i32 %add8.i.i.us.18 to i64 - %arrayidx.i.i.us.18 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.18 - store float 0.000000e+00, float* %arrayidx.i.i.us.18, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.18 - -if.end.i.i.us.18: ; preds = %if.then.i.i.us.18, %if.end.i.i.us.17 - br i1 %cmp.i.i.us.19, label %if.then.i.i.us.19, label %if.end.i.i.us.19 - -if.then.i.i.us.19: ; preds = %if.end.i.i.us.18 - %add8.i.i.us.19 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.19 - %idxprom.i.i.us.19 = sext i32 %add8.i.i.us.19 to i64 - %arrayidx.i.i.us.19 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.19 - store float 0.000000e+00, float* %arrayidx.i.i.us.19, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.19 - -if.end.i.i.us.19: ; preds = %if.then.i.i.us.19, %if.end.i.i.us.18 - br i1 %cmp.i.i.us.20, label %if.then.i.i.us.20, label %if.end.i.i.us.20 - -if.then.i.i.us.20: ; preds = %if.end.i.i.us.19 - %add8.i.i.us.20 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.20 - %idxprom.i.i.us.20 = sext i32 %add8.i.i.us.20 to i64 - %arrayidx.i.i.us.20 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.20 - store float 0.000000e+00, float* %arrayidx.i.i.us.20, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.20 - -if.end.i.i.us.20: ; preds = %if.then.i.i.us.20, %if.end.i.i.us.19 - br i1 %cmp.i.i.us.21, label %if.then.i.i.us.21, label %if.end.i.i.us.21 - -if.then.i.i.us.21: ; preds = %if.end.i.i.us.20 - %add8.i.i.us.21 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.21 - %idxprom.i.i.us.21 = sext i32 %add8.i.i.us.21 to i64 - %arrayidx.i.i.us.21 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.21 - store float 0.000000e+00, float* %arrayidx.i.i.us.21, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.21 - -if.end.i.i.us.21: ; preds = %if.then.i.i.us.21, %if.end.i.i.us.20 - br i1 %cmp.i.i.us.22, label %if.then.i.i.us.22, label %if.end.i.i.us.22 - -if.then.i.i.us.22: ; preds = %if.end.i.i.us.21 - %add8.i.i.us.22 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.22 - %idxprom.i.i.us.22 = sext i32 %add8.i.i.us.22 to i64 - %arrayidx.i.i.us.22 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.22 - store float 0.000000e+00, float* %arrayidx.i.i.us.22, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.22 - -if.end.i.i.us.22: ; preds = %if.then.i.i.us.22, %if.end.i.i.us.21 - br i1 %cmp.i.i.us.23, label %if.then.i.i.us.23, label %if.end.i.i.us.23 - -if.then.i.i.us.23: ; preds = %if.end.i.i.us.22 - %add8.i.i.us.23 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.23 - %idxprom.i.i.us.23 = sext i32 %add8.i.i.us.23 to i64 - %arrayidx.i.i.us.23 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.23 - store float 0.000000e+00, float* %arrayidx.i.i.us.23, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.23 - -if.end.i.i.us.23: ; preds = %if.then.i.i.us.23, %if.end.i.i.us.22 - br i1 %cmp.i.i.us.24, label %if.then.i.i.us.24, label %if.end.i.i.us.24 - -if.then.i.i.us.24: ; preds = %if.end.i.i.us.23 - %add8.i.i.us.24 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.24 - %idxprom.i.i.us.24 = sext i32 %add8.i.i.us.24 to i64 - %arrayidx.i.i.us.24 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.24 - store float 0.000000e+00, float* %arrayidx.i.i.us.24, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.24 - -if.end.i.i.us.24: ; preds = %if.then.i.i.us.24, %if.end.i.i.us.23 - br i1 %cmp.i.i.us.25, label %if.then.i.i.us.25, label %if.end.i.i.us.25 - -if.then.i.i.us.25: ; preds = %if.end.i.i.us.24 - %add8.i.i.us.25 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.25 - %idxprom.i.i.us.25 = sext i32 %add8.i.i.us.25 to i64 - %arrayidx.i.i.us.25 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.25 - store float 0.000000e+00, float* %arrayidx.i.i.us.25, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.25 - -if.end.i.i.us.25: ; preds = %if.then.i.i.us.25, %if.end.i.i.us.24 - br i1 %cmp.i.i.us.26, label %if.then.i.i.us.26, label %if.end.i.i.us.26 - -if.then.i.i.us.26: ; preds = %if.end.i.i.us.25 - %add8.i.i.us.26 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.26 - %idxprom.i.i.us.26 = sext i32 %add8.i.i.us.26 to i64 - %arrayidx.i.i.us.26 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.26 - store float 0.000000e+00, float* %arrayidx.i.i.us.26, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.26 - -if.end.i.i.us.26: ; preds = %if.then.i.i.us.26, %if.end.i.i.us.25 - br i1 %cmp.i.i.us.27, label %if.then.i.i.us.27, label %if.end.i.i.us.27 - -if.then.i.i.us.27: ; preds = %if.end.i.i.us.26 - %add8.i.i.us.27 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.27 - %idxprom.i.i.us.27 = sext i32 %add8.i.i.us.27 to i64 - %arrayidx.i.i.us.27 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.27 - store float 0.000000e+00, float* %arrayidx.i.i.us.27, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.27 - -if.end.i.i.us.27: ; preds = %if.then.i.i.us.27, %if.end.i.i.us.26 - br i1 %cmp.i.i.us.28, label %if.then.i.i.us.28, label %if.end.i.i.us.28 - -if.then.i.i.us.28: ; preds = %if.end.i.i.us.27 - %add8.i.i.us.28 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.28 - %idxprom.i.i.us.28 = sext i32 %add8.i.i.us.28 to i64 - %arrayidx.i.i.us.28 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.28 - store float 0.000000e+00, float* %arrayidx.i.i.us.28, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.28 - -if.end.i.i.us.28: ; preds = %if.then.i.i.us.28, %if.end.i.i.us.27 - br i1 %cmp.i.i.us.29, label %if.then.i.i.us.29, label %if.end.i.i.us.29 - -if.then.i.i.us.29: ; preds = %if.end.i.i.us.28 - %add8.i.i.us.29 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.29 - %idxprom.i.i.us.29 = sext i32 %add8.i.i.us.29 to i64 - %arrayidx.i.i.us.29 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.29 - store float 0.000000e+00, float* %arrayidx.i.i.us.29, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.29 - -if.end.i.i.us.29: ; preds = %if.then.i.i.us.29, %if.end.i.i.us.28 - br i1 %cmp.i.i.us.30, label %if.then.i.i.us.30, label %if.end.i.i.us.30 - -if.then.i.i.us.30: ; preds = %if.end.i.i.us.29 - %add8.i.i.us.30 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.30 - %idxprom.i.i.us.30 = sext i32 %add8.i.i.us.30 to i64 - %arrayidx.i.i.us.30 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.30 - store float 0.000000e+00, float* %arrayidx.i.i.us.30, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.30 - -if.end.i.i.us.30: ; preds = %if.then.i.i.us.30, %if.end.i.i.us.29 - br i1 %cmp.i.i.us.31, label %if.then.i.i.us.31, label %pregion_for_end.i.i - -if.then.i.i.us.31: ; preds = %if.end.i.i.us.30 - %add8.i.i.us.31 = add nsw i32 %reass.mul.i.i, %conv.i.i.us.31 - %idxprom.i.i.us.31 = sext i32 %add8.i.i.us.31 to i64 - %arrayidx.i.i.us.31 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.31 - store float 0.000000e+00, float* %arrayidx.i.i.us.31, align 4, !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i - -if.then.i.i.us.us.7.1: ; preds = %if.end.i.i.us.us.7 - %add8.i.i.us.us.7.1 = add nsw i32 %reass.mul.i.i.us.7, %conv.i.i.us.us.7.1 - %idxprom.i.i.us.us.7.1 = sext i32 %add8.i.i.us.us.7.1 to i64 - %arrayidx.i.i.us.us.7.1 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.us.7.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.7.1 = shl i64 %add1.i.i.i.us.us.7.1, 32 - %147 = ashr exact i64 %sext.i.i.us.us.7.1, 32 - br label %for.body.i.i.us.us.7.1 - -for.body.i.i.us.us.7.1: ; preds = %for.body.i.i.us.us.7.1, %if.then.i.i.us.us.7.1 - %indvars.iv.next.i.i3.us.us.7.1 = phi i64 [ %indvars.iv.next.i.i.us.us.7.1, %for.body.i.i.us.us.7.1 ], [ 0, %if.then.i.i.us.us.7.1 ] - %148 = phi float [ %154, %for.body.i.i.us.us.7.1 ], [ 0.000000e+00, %if.then.i.i.us.us.7.1 ] - %149 = add nsw i64 %indvars.iv.next.i.i3.us.us.7.1, %137 - %arrayidx24.i.i.us.us.7.1 = getelementptr inbounds float, float* %16, i64 %149 - %150 = load float, float* %arrayidx24.i.i.us.us.7.1, align 4, !tbaa !12 - %151 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.7.1, %27 - %152 = add nsw i64 %151, %147 - %arrayidx28.i.i.us.us.7.1 = getelementptr inbounds float, float* %19, i64 %152 - %153 = load float, float* %arrayidx28.i.i.us.us.7.1, align 4, !tbaa !12 - %154 = tail call float @llvm.fmuladd.f32(float %150, float %153, float %148) #2 - store float %154, float* %arrayidx.i.i.us.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.7.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.7.1, 1 - %exitcond.not.i.i.us.us.7.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.7.1, %27 - br i1 %exitcond.not.i.i.us.us.7.1, label %if.end.i.i.us.us.7.1.loopexit, label %for.body.i.i.us.us.7.1, !llvm.loop !19 - -if.end.i.i.us.us.7.1.loopexit: ; preds = %for.body.i.i.us.us.7.1 - br label %if.end.i.i.us.us.7.1 - -if.end.i.i.us.us.7.1: ; preds = %if.end.i.i.us.us.7.1.loopexit, %if.end.i.i.us.us.7 - %155 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %155, 32 - br i1 %exitcond.7.not.1, label %_pocl_kernel_doitgen_kernel1.exit.loopexit, label %pregion_for_entry.entry.i.i.us.us.7, !llvm.loop !23 - -if.then.i.i.us.us.6.1: ; preds = %if.end.i.i.us.us.6 - %add8.i.i.us.us.6.1 = add nsw i32 %reass.mul.i.i.us.6, %conv.i.i.us.us.6.1 - %idxprom.i.i.us.us.6.1 = sext i32 %add8.i.i.us.us.6.1 to i64 - %arrayidx.i.i.us.us.6.1 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.us.6.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.6.1 = shl i64 %add1.i.i.i.us.us.6.1, 32 - %156 = ashr exact i64 %sext.i.i.us.us.6.1, 32 - br label %for.body.i.i.us.us.6.1 - -for.body.i.i.us.us.6.1: ; preds = %for.body.i.i.us.us.6.1, %if.then.i.i.us.us.6.1 - %indvars.iv.next.i.i3.us.us.6.1 = phi i64 [ %indvars.iv.next.i.i.us.us.6.1, %for.body.i.i.us.us.6.1 ], [ 0, %if.then.i.i.us.us.6.1 ] - %157 = phi float [ %163, %for.body.i.i.us.us.6.1 ], [ 0.000000e+00, %if.then.i.i.us.us.6.1 ] - %158 = add nsw i64 %indvars.iv.next.i.i3.us.us.6.1, %126 - %arrayidx24.i.i.us.us.6.1 = getelementptr inbounds float, float* %16, i64 %158 - %159 = load float, float* %arrayidx24.i.i.us.us.6.1, align 4, !tbaa !12 - %160 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.6.1, %27 - %161 = add nsw i64 %160, %156 - %arrayidx28.i.i.us.us.6.1 = getelementptr inbounds float, float* %19, i64 %161 - %162 = load float, float* %arrayidx28.i.i.us.us.6.1, align 4, !tbaa !12 - %163 = tail call float @llvm.fmuladd.f32(float %159, float %162, float %157) #2 - store float %163, float* %arrayidx.i.i.us.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.6.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.6.1, 1 - %exitcond.not.i.i.us.us.6.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.6.1, %27 - br i1 %exitcond.not.i.i.us.us.6.1, label %if.end.i.i.us.us.6.1.loopexit, label %for.body.i.i.us.us.6.1, !llvm.loop !19 - -if.end.i.i.us.us.6.1.loopexit: ; preds = %for.body.i.i.us.us.6.1 - br label %if.end.i.i.us.us.6.1 - -if.end.i.i.us.us.6.1: ; preds = %if.end.i.i.us.us.6.1.loopexit, %if.end.i.i.us.us.6 - %164 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %164, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.i.us.6.loopexit, label %pregion_for_entry.entry.i.i.us.us.6, !llvm.loop !23 - -if.then.i.i.us.us.5.1: ; preds = %if.end.i.i.us.us.5 - %add8.i.i.us.us.5.1 = add nsw i32 %reass.mul.i.i.us.5, %conv.i.i.us.us.5.1 - %idxprom.i.i.us.us.5.1 = sext i32 %add8.i.i.us.us.5.1 to i64 - %arrayidx.i.i.us.us.5.1 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.us.5.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.5.1 = shl i64 %add1.i.i.i.us.us.5.1, 32 - %165 = ashr exact i64 %sext.i.i.us.us.5.1, 32 - br label %for.body.i.i.us.us.5.1 - -for.body.i.i.us.us.5.1: ; preds = %for.body.i.i.us.us.5.1, %if.then.i.i.us.us.5.1 - %indvars.iv.next.i.i3.us.us.5.1 = phi i64 [ %indvars.iv.next.i.i.us.us.5.1, %for.body.i.i.us.us.5.1 ], [ 0, %if.then.i.i.us.us.5.1 ] - %166 = phi float [ %172, %for.body.i.i.us.us.5.1 ], [ 0.000000e+00, %if.then.i.i.us.us.5.1 ] - %167 = add nsw i64 %indvars.iv.next.i.i3.us.us.5.1, %115 - %arrayidx24.i.i.us.us.5.1 = getelementptr inbounds float, float* %16, i64 %167 - %168 = load float, float* %arrayidx24.i.i.us.us.5.1, align 4, !tbaa !12 - %169 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.5.1, %27 - %170 = add nsw i64 %169, %165 - %arrayidx28.i.i.us.us.5.1 = getelementptr inbounds float, float* %19, i64 %170 - %171 = load float, float* %arrayidx28.i.i.us.us.5.1, align 4, !tbaa !12 - %172 = tail call float @llvm.fmuladd.f32(float %168, float %171, float %166) #2 - store float %172, float* %arrayidx.i.i.us.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.5.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.5.1, 1 - %exitcond.not.i.i.us.us.5.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.5.1, %27 - br i1 %exitcond.not.i.i.us.us.5.1, label %if.end.i.i.us.us.5.1.loopexit, label %for.body.i.i.us.us.5.1, !llvm.loop !19 - -if.end.i.i.us.us.5.1.loopexit: ; preds = %for.body.i.i.us.us.5.1 - br label %if.end.i.i.us.us.5.1 - -if.end.i.i.us.us.5.1: ; preds = %if.end.i.i.us.us.5.1.loopexit, %if.end.i.i.us.us.5 - %173 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %173, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.i.us.5.loopexit, label %pregion_for_entry.entry.i.i.us.us.5, !llvm.loop !23 - -if.then.i.i.us.us.4.1: ; preds = %if.end.i.i.us.us.4 - %add8.i.i.us.us.4.1 = add nsw i32 %reass.mul.i.i.us.4, %conv.i.i.us.us.4.1 - %idxprom.i.i.us.us.4.1 = sext i32 %add8.i.i.us.us.4.1 to i64 - %arrayidx.i.i.us.us.4.1 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.us.4.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.4.1 = shl i64 %add1.i.i.i.us.us.4.1, 32 - %174 = ashr exact i64 %sext.i.i.us.us.4.1, 32 - br label %for.body.i.i.us.us.4.1 - -for.body.i.i.us.us.4.1: ; preds = %for.body.i.i.us.us.4.1, %if.then.i.i.us.us.4.1 - %indvars.iv.next.i.i3.us.us.4.1 = phi i64 [ %indvars.iv.next.i.i.us.us.4.1, %for.body.i.i.us.us.4.1 ], [ 0, %if.then.i.i.us.us.4.1 ] - %175 = phi float [ %181, %for.body.i.i.us.us.4.1 ], [ 0.000000e+00, %if.then.i.i.us.us.4.1 ] - %176 = add nsw i64 %indvars.iv.next.i.i3.us.us.4.1, %104 - %arrayidx24.i.i.us.us.4.1 = getelementptr inbounds float, float* %16, i64 %176 - %177 = load float, float* %arrayidx24.i.i.us.us.4.1, align 4, !tbaa !12 - %178 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.4.1, %27 - %179 = add nsw i64 %178, %174 - %arrayidx28.i.i.us.us.4.1 = getelementptr inbounds float, float* %19, i64 %179 - %180 = load float, float* %arrayidx28.i.i.us.us.4.1, align 4, !tbaa !12 - %181 = tail call float @llvm.fmuladd.f32(float %177, float %180, float %175) #2 - store float %181, float* %arrayidx.i.i.us.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.4.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.4.1, 1 - %exitcond.not.i.i.us.us.4.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.4.1, %27 - br i1 %exitcond.not.i.i.us.us.4.1, label %if.end.i.i.us.us.4.1.loopexit, label %for.body.i.i.us.us.4.1, !llvm.loop !19 - -if.end.i.i.us.us.4.1.loopexit: ; preds = %for.body.i.i.us.us.4.1 - br label %if.end.i.i.us.us.4.1 - -if.end.i.i.us.us.4.1: ; preds = %if.end.i.i.us.us.4.1.loopexit, %if.end.i.i.us.us.4 - %182 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %182, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.i.us.4.loopexit, label %pregion_for_entry.entry.i.i.us.us.4, !llvm.loop !23 - -if.then.i.i.us.us.3.1: ; preds = %if.end.i.i.us.us.3 - %add8.i.i.us.us.3.1 = add nsw i32 %reass.mul.i.i.us.3, %conv.i.i.us.us.3.1 - %idxprom.i.i.us.us.3.1 = sext i32 %add8.i.i.us.us.3.1 to i64 - %arrayidx.i.i.us.us.3.1 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.us.3.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.3.1 = shl i64 %add1.i.i.i.us.us.3.1, 32 - %183 = ashr exact i64 %sext.i.i.us.us.3.1, 32 - br label %for.body.i.i.us.us.3.1 - -for.body.i.i.us.us.3.1: ; preds = %for.body.i.i.us.us.3.1, %if.then.i.i.us.us.3.1 - %indvars.iv.next.i.i3.us.us.3.1 = phi i64 [ %indvars.iv.next.i.i.us.us.3.1, %for.body.i.i.us.us.3.1 ], [ 0, %if.then.i.i.us.us.3.1 ] - %184 = phi float [ %190, %for.body.i.i.us.us.3.1 ], [ 0.000000e+00, %if.then.i.i.us.us.3.1 ] - %185 = add nsw i64 %indvars.iv.next.i.i3.us.us.3.1, %93 - %arrayidx24.i.i.us.us.3.1 = getelementptr inbounds float, float* %16, i64 %185 - %186 = load float, float* %arrayidx24.i.i.us.us.3.1, align 4, !tbaa !12 - %187 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.3.1, %27 - %188 = add nsw i64 %187, %183 - %arrayidx28.i.i.us.us.3.1 = getelementptr inbounds float, float* %19, i64 %188 - %189 = load float, float* %arrayidx28.i.i.us.us.3.1, align 4, !tbaa !12 - %190 = tail call float @llvm.fmuladd.f32(float %186, float %189, float %184) #2 - store float %190, float* %arrayidx.i.i.us.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.3.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.3.1, 1 - %exitcond.not.i.i.us.us.3.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.3.1, %27 - br i1 %exitcond.not.i.i.us.us.3.1, label %if.end.i.i.us.us.3.1.loopexit, label %for.body.i.i.us.us.3.1, !llvm.loop !19 - -if.end.i.i.us.us.3.1.loopexit: ; preds = %for.body.i.i.us.us.3.1 - br label %if.end.i.i.us.us.3.1 - -if.end.i.i.us.us.3.1: ; preds = %if.end.i.i.us.us.3.1.loopexit, %if.end.i.i.us.us.3 - %191 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %191, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.i.us.3.loopexit, label %pregion_for_entry.entry.i.i.us.us.3, !llvm.loop !23 - -if.then.i.i.us.us.2.1: ; preds = %if.end.i.i.us.us.2 - %add8.i.i.us.us.2.1 = add nsw i32 %reass.mul.i.i.us.2, %conv.i.i.us.us.2.1 - %idxprom.i.i.us.us.2.1 = sext i32 %add8.i.i.us.us.2.1 to i64 - %arrayidx.i.i.us.us.2.1 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.us.2.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.2.1 = shl i64 %add1.i.i.i.us.us.2.1, 32 - %192 = ashr exact i64 %sext.i.i.us.us.2.1, 32 - br label %for.body.i.i.us.us.2.1 - -for.body.i.i.us.us.2.1: ; preds = %for.body.i.i.us.us.2.1, %if.then.i.i.us.us.2.1 - %indvars.iv.next.i.i3.us.us.2.1 = phi i64 [ %indvars.iv.next.i.i.us.us.2.1, %for.body.i.i.us.us.2.1 ], [ 0, %if.then.i.i.us.us.2.1 ] - %193 = phi float [ %199, %for.body.i.i.us.us.2.1 ], [ 0.000000e+00, %if.then.i.i.us.us.2.1 ] - %194 = add nsw i64 %indvars.iv.next.i.i3.us.us.2.1, %82 - %arrayidx24.i.i.us.us.2.1 = getelementptr inbounds float, float* %16, i64 %194 - %195 = load float, float* %arrayidx24.i.i.us.us.2.1, align 4, !tbaa !12 - %196 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.2.1, %27 - %197 = add nsw i64 %196, %192 - %arrayidx28.i.i.us.us.2.1 = getelementptr inbounds float, float* %19, i64 %197 - %198 = load float, float* %arrayidx28.i.i.us.us.2.1, align 4, !tbaa !12 - %199 = tail call float @llvm.fmuladd.f32(float %195, float %198, float %193) #2 - store float %199, float* %arrayidx.i.i.us.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.2.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.2.1, 1 - %exitcond.not.i.i.us.us.2.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.2.1, %27 - br i1 %exitcond.not.i.i.us.us.2.1, label %if.end.i.i.us.us.2.1.loopexit, label %for.body.i.i.us.us.2.1, !llvm.loop !19 - -if.end.i.i.us.us.2.1.loopexit: ; preds = %for.body.i.i.us.us.2.1 - br label %if.end.i.i.us.us.2.1 - -if.end.i.i.us.us.2.1: ; preds = %if.end.i.i.us.us.2.1.loopexit, %if.end.i.i.us.us.2 - %200 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %200, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.i.us.2.loopexit, label %pregion_for_entry.entry.i.i.us.us.2, !llvm.loop !23 - -if.then.i.i.us.us.1.1: ; preds = %if.end.i.i.us.us.1 - %add8.i.i.us.us.1.1 = add nsw i32 %reass.mul.i.i.us.1, %conv.i.i.us.us.1.1 - %idxprom.i.i.us.us.1.1 = sext i32 %add8.i.i.us.us.1.1 to i64 - %arrayidx.i.i.us.us.1.1 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.us.1.1 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.1.1 = shl i64 %add1.i.i.i.us.us.1.1, 32 - %201 = ashr exact i64 %sext.i.i.us.us.1.1, 32 - br label %for.body.i.i.us.us.1.1 - -for.body.i.i.us.us.1.1: ; preds = %for.body.i.i.us.us.1.1, %if.then.i.i.us.us.1.1 - %indvars.iv.next.i.i3.us.us.1.1 = phi i64 [ %indvars.iv.next.i.i.us.us.1.1, %for.body.i.i.us.us.1.1 ], [ 0, %if.then.i.i.us.us.1.1 ] - %202 = phi float [ %208, %for.body.i.i.us.us.1.1 ], [ 0.000000e+00, %if.then.i.i.us.us.1.1 ] - %203 = add nsw i64 %indvars.iv.next.i.i3.us.us.1.1, %61 - %arrayidx24.i.i.us.us.1.1 = getelementptr inbounds float, float* %16, i64 %203 - %204 = load float, float* %arrayidx24.i.i.us.us.1.1, align 4, !tbaa !12 - %205 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.1.1, %27 - %206 = add nsw i64 %205, %201 - %arrayidx28.i.i.us.us.1.1 = getelementptr inbounds float, float* %19, i64 %206 - %207 = load float, float* %arrayidx28.i.i.us.us.1.1, align 4, !tbaa !12 - %208 = tail call float @llvm.fmuladd.f32(float %204, float %207, float %202) #2 - store float %208, float* %arrayidx.i.i.us.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.1.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.1.1, 1 - %exitcond.not.i.i.us.us.1.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.1.1, %27 - br i1 %exitcond.not.i.i.us.us.1.1, label %if.end.i.i.us.us.1.1.loopexit, label %for.body.i.i.us.us.1.1, !llvm.loop !19 - -if.end.i.i.us.us.1.1.loopexit: ; preds = %for.body.i.i.us.us.1.1 - br label %if.end.i.i.us.us.1.1 - -if.end.i.i.us.us.1.1: ; preds = %if.end.i.i.us.us.1.1.loopexit, %if.end.i.i.us.us.1 - %209 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %209, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.i.us.1.loopexit, label %pregion_for_entry.entry.i.i.us.us.1, !llvm.loop !23 - -if.then.i.i.us.us.146: ; preds = %if.end.i.i.us.us - %add8.i.i.us.us.142 = add nsw i32 %reass.mul.i.i.us, %conv.i.i.us.us.139 - %idxprom.i.i.us.us.143 = sext i32 %add8.i.i.us.us.142 to i64 - %arrayidx.i.i.us.us.144 = getelementptr inbounds float, float* %22, i64 %idxprom.i.i.us.us.143 - store float 0.000000e+00, float* %arrayidx.i.i.us.us.144, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.145 = shl i64 %add1.i.i.i.us.us.138, 32 - %210 = ashr exact i64 %sext.i.i.us.us.145, 32 - br label %for.body.i.i.us.us.152 - -for.body.i.i.us.us.152: ; preds = %for.body.i.i.us.us.152, %if.then.i.i.us.us.146 - %indvars.iv.next.i.i3.us.us.147 = phi i64 [ %indvars.iv.next.i.i.us.us.150, %for.body.i.i.us.us.152 ], [ 0, %if.then.i.i.us.us.146 ] - %211 = phi float [ %217, %for.body.i.i.us.us.152 ], [ 0.000000e+00, %if.then.i.i.us.us.146 ] - %212 = add nsw i64 %indvars.iv.next.i.i3.us.us.147, %59 - %arrayidx24.i.i.us.us.148 = getelementptr inbounds float, float* %16, i64 %212 - %213 = load float, float* %arrayidx24.i.i.us.us.148, align 4, !tbaa !12 - %214 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.us.147, %27 - %215 = add nsw i64 %214, %210 - %arrayidx28.i.i.us.us.149 = getelementptr inbounds float, float* %19, i64 %215 - %216 = load float, float* %arrayidx28.i.i.us.us.149, align 4, !tbaa !12 - %217 = tail call float @llvm.fmuladd.f32(float %213, float %216, float %211) #2 - store float %217, float* %arrayidx.i.i.us.us.144, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.150 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.147, 1 - %exitcond.not.i.i.us.us.151 = icmp eq i64 %indvars.iv.next.i.i.us.us.150, %27 - br i1 %exitcond.not.i.i.us.us.151, label %if.end.i.i.us.us.153.loopexit, label %for.body.i.i.us.us.152, !llvm.loop !19 - -if.end.i.i.us.us.153.loopexit: ; preds = %for.body.i.i.us.us.152 - br label %if.end.i.i.us.us.153 - -if.end.i.i.us.us.153: ; preds = %if.end.i.i.us.us.153.loopexit, %if.end.i.i.us.us - %218 = add nuw nsw i64 %_local_id_x.i.0.us.us, 2 - %exitcond.not.1 = icmp eq i64 %218, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.us.loopexit, label %pregion_for_entry.entry.i.i.us.us, !llvm.loop !23 -} - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none"} -!7 = !{!"int", !"int", !"int", !"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"int"} -!8 = !{!"int", !"int", !"int", !"float*", !"float*", !"float*", !"int"} -!9 = !{!"", !"", !"", !"", !"", !"", !""} -!10 = !{!"nr", !"nq", !"np", !"A", !"C4", !"sum", !"r"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17, !18} -!17 = distinct !{} -!18 = distinct !{} -!19 = distinct !{!19, !20} -!20 = !{!"llvm.loop.unroll.disable"} -!21 = distinct !{!21, !22} -!22 = !{!"llvm.loop.parallel_accesses", !18} -!23 = distinct !{!23, !24} -!24 = !{!"llvm.loop.parallel_accesses", !17} diff --git a/pocl_irs/doitgen_kernel2.ll b/pocl_irs/doitgen_kernel2.ll deleted file mode 100644 index 2c443ca..0000000 --- a/pocl_irs/doitgen_kernel2.ll +++ /dev/null @@ -1,1776 +0,0 @@ -; ModuleID = './EC/AOFIJPJJDGBDGAAGPPJIBOAFNDFEMPBMCHKBI/doitgen_kernel2/32-8-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_doitgen_kernel2(i32 %0, i32 %1, i32 %2, float* nocapture %3, float* nocapture readnone %4, float* nocapture readonly %5, i32 %6, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %7, i64 %8, i64 %9, i64 %10) local_unnamed_addr #0 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %8, 5 - %mul3.i.i = shl i64 %9, 3 - %mul6.i = mul i32 %6, %1 - %conv.i.us = trunc i64 %mul.i.i to i32 - %cmp.i.us = icmp slt i32 %conv.i.us, %2 - %12 = trunc i64 %mul.i.i to i32 - %conv.i.us.1 = or i32 %12, 1 - %cmp.i.us.1 = icmp slt i32 %conv.i.us.1, %2 - %13 = trunc i64 %mul.i.i to i32 - %conv.i.us.2 = or i32 %13, 2 - %cmp.i.us.2 = icmp slt i32 %conv.i.us.2, %2 - %14 = trunc i64 %mul.i.i to i32 - %conv.i.us.3 = or i32 %14, 3 - %cmp.i.us.3 = icmp slt i32 %conv.i.us.3, %2 - %15 = trunc i64 %mul.i.i to i32 - %conv.i.us.4 = or i32 %15, 4 - %cmp.i.us.4 = icmp slt i32 %conv.i.us.4, %2 - %16 = trunc i64 %mul.i.i to i32 - %conv.i.us.5 = or i32 %16, 5 - %cmp.i.us.5 = icmp slt i32 %conv.i.us.5, %2 - %17 = trunc i64 %mul.i.i to i32 - %conv.i.us.6 = or i32 %17, 6 - %cmp.i.us.6 = icmp slt i32 %conv.i.us.6, %2 - %18 = trunc i64 %mul.i.i to i32 - %conv.i.us.7 = or i32 %18, 7 - %cmp.i.us.7 = icmp slt i32 %conv.i.us.7, %2 - %19 = trunc i64 %mul.i.i to i32 - %conv.i.us.8 = or i32 %19, 8 - %cmp.i.us.8 = icmp slt i32 %conv.i.us.8, %2 - %20 = trunc i64 %mul.i.i to i32 - %conv.i.us.9 = or i32 %20, 9 - %cmp.i.us.9 = icmp slt i32 %conv.i.us.9, %2 - %21 = trunc i64 %mul.i.i to i32 - %conv.i.us.10 = or i32 %21, 10 - %cmp.i.us.10 = icmp slt i32 %conv.i.us.10, %2 - %22 = trunc i64 %mul.i.i to i32 - %conv.i.us.11 = or i32 %22, 11 - %cmp.i.us.11 = icmp slt i32 %conv.i.us.11, %2 - %23 = trunc i64 %mul.i.i to i32 - %conv.i.us.12 = or i32 %23, 12 - %cmp.i.us.12 = icmp slt i32 %conv.i.us.12, %2 - %24 = trunc i64 %mul.i.i to i32 - %conv.i.us.13 = or i32 %24, 13 - %cmp.i.us.13 = icmp slt i32 %conv.i.us.13, %2 - %25 = trunc i64 %mul.i.i to i32 - %conv.i.us.14 = or i32 %25, 14 - %cmp.i.us.14 = icmp slt i32 %conv.i.us.14, %2 - %26 = trunc i64 %mul.i.i to i32 - %conv.i.us.15 = or i32 %26, 15 - %cmp.i.us.15 = icmp slt i32 %conv.i.us.15, %2 - %27 = trunc i64 %mul.i.i to i32 - %conv.i.us.16 = or i32 %27, 16 - %cmp.i.us.16 = icmp slt i32 %conv.i.us.16, %2 - %28 = trunc i64 %mul.i.i to i32 - %conv.i.us.17 = or i32 %28, 17 - %cmp.i.us.17 = icmp slt i32 %conv.i.us.17, %2 - %29 = trunc i64 %mul.i.i to i32 - %conv.i.us.18 = or i32 %29, 18 - %cmp.i.us.18 = icmp slt i32 %conv.i.us.18, %2 - %30 = trunc i64 %mul.i.i to i32 - %conv.i.us.19 = or i32 %30, 19 - %cmp.i.us.19 = icmp slt i32 %conv.i.us.19, %2 - %31 = trunc i64 %mul.i.i to i32 - %conv.i.us.20 = or i32 %31, 20 - %cmp.i.us.20 = icmp slt i32 %conv.i.us.20, %2 - %32 = trunc i64 %mul.i.i to i32 - %conv.i.us.21 = or i32 %32, 21 - %cmp.i.us.21 = icmp slt i32 %conv.i.us.21, %2 - %33 = trunc i64 %mul.i.i to i32 - %conv.i.us.22 = or i32 %33, 22 - %cmp.i.us.22 = icmp slt i32 %conv.i.us.22, %2 - %34 = trunc i64 %mul.i.i to i32 - %conv.i.us.23 = or i32 %34, 23 - %cmp.i.us.23 = icmp slt i32 %conv.i.us.23, %2 - %35 = trunc i64 %mul.i.i to i32 - %conv.i.us.24 = or i32 %35, 24 - %cmp.i.us.24 = icmp slt i32 %conv.i.us.24, %2 - %36 = trunc i64 %mul.i.i to i32 - %conv.i.us.25 = or i32 %36, 25 - %cmp.i.us.25 = icmp slt i32 %conv.i.us.25, %2 - %37 = trunc i64 %mul.i.i to i32 - %conv.i.us.26 = or i32 %37, 26 - %cmp.i.us.26 = icmp slt i32 %conv.i.us.26, %2 - %38 = trunc i64 %mul.i.i to i32 - %conv.i.us.27 = or i32 %38, 27 - %cmp.i.us.27 = icmp slt i32 %conv.i.us.27, %2 - %39 = trunc i64 %mul.i.i to i32 - %conv.i.us.28 = or i32 %39, 28 - %cmp.i.us.28 = icmp slt i32 %conv.i.us.28, %2 - %40 = trunc i64 %mul.i.i to i32 - %conv.i.us.29 = or i32 %40, 29 - %cmp.i.us.29 = icmp slt i32 %conv.i.us.29, %2 - %41 = trunc i64 %mul.i.i to i32 - %conv.i.us.30 = or i32 %41, 30 - %cmp.i.us.30 = icmp slt i32 %conv.i.us.30, %2 - %42 = trunc i64 %mul.i.i to i32 - %conv.i.us.31 = or i32 %42, 31 - %cmp.i.us.31 = icmp slt i32 %conv.i.us.31, %2 - br label %pregion_for_entry.pregion_for_init.i - -pregion_for_entry.pregion_for_init.i: ; preds = %pregion_for_end.i, %11 - %_local_id_y.0 = phi i64 [ 0, %11 ], [ %46, %pregion_for_end.i ] - %add6.i.i = add nuw nsw i64 %_local_id_y.0, %mul3.i.i - %conv2.i = trunc i64 %add6.i.i to i32 - %cmp4.i = icmp slt i32 %conv2.i, %1 - %reass.add.i = add i32 %mul6.i, %conv2.i - %reass.mul.i = mul i32 %reass.add.i, %2 - br i1 %cmp4.i, label %pregion_for_entry.entry.i.us.preheader, label %pregion_for_end.i - -pregion_for_entry.entry.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i - br i1 %cmp.i.us, label %if.then.i.us, label %if.end.r_exit.i.us - -if.then.i.us: ; preds = %pregion_for_entry.entry.i.us.preheader - %add8.i.us = add i32 %reass.mul.i, %conv.i.us - %idxprom.i.us = sext i32 %add8.i.us to i64 - %arrayidx.i.us = getelementptr inbounds float, float* %5, i64 %idxprom.i.us - %43 = bitcast float* %arrayidx.i.us to i32* - %44 = load i32, i32* %43, align 4, !tbaa !12 - %arrayidx15.i.us = getelementptr inbounds float, float* %3, i64 %idxprom.i.us - %45 = bitcast float* %arrayidx15.i.us to i32* - store i32 %44, i32* %45, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us - -if.end.r_exit.i.us: ; preds = %if.then.i.us, %pregion_for_entry.entry.i.us.preheader - br i1 %cmp.i.us.1, label %if.then.i.us.1, label %if.end.r_exit.i.us.1 - -pregion_for_end.i: ; preds = %if.then.i.us.31, %if.end.r_exit.i.us.30, %pregion_for_entry.pregion_for_init.i - %46 = add nuw nsw i64 %_local_id_y.0, 1 - %exitcond.not = icmp eq i64 %46, 8 - br i1 %exitcond.not, label %doitgen_kernel2.exit, label %pregion_for_entry.pregion_for_init.i, !llvm.loop !19 - -doitgen_kernel2.exit: ; preds = %pregion_for_end.i - ret void - -if.then.i.us.1: ; preds = %if.end.r_exit.i.us - %add8.i.us.1 = add i32 %reass.mul.i, %conv.i.us.1 - %idxprom.i.us.1 = sext i32 %add8.i.us.1 to i64 - %arrayidx.i.us.1 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.1 - %47 = bitcast float* %arrayidx.i.us.1 to i32* - %48 = load i32, i32* %47, align 4, !tbaa !12 - %arrayidx15.i.us.1 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.1 - %49 = bitcast float* %arrayidx15.i.us.1 to i32* - store i32 %48, i32* %49, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.1 - -if.end.r_exit.i.us.1: ; preds = %if.then.i.us.1, %if.end.r_exit.i.us - br i1 %cmp.i.us.2, label %if.then.i.us.2, label %if.end.r_exit.i.us.2 - -if.then.i.us.2: ; preds = %if.end.r_exit.i.us.1 - %add8.i.us.2 = add i32 %reass.mul.i, %conv.i.us.2 - %idxprom.i.us.2 = sext i32 %add8.i.us.2 to i64 - %arrayidx.i.us.2 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.2 - %50 = bitcast float* %arrayidx.i.us.2 to i32* - %51 = load i32, i32* %50, align 4, !tbaa !12 - %arrayidx15.i.us.2 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.2 - %52 = bitcast float* %arrayidx15.i.us.2 to i32* - store i32 %51, i32* %52, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.2 - -if.end.r_exit.i.us.2: ; preds = %if.then.i.us.2, %if.end.r_exit.i.us.1 - br i1 %cmp.i.us.3, label %if.then.i.us.3, label %if.end.r_exit.i.us.3 - -if.then.i.us.3: ; preds = %if.end.r_exit.i.us.2 - %add8.i.us.3 = add i32 %reass.mul.i, %conv.i.us.3 - %idxprom.i.us.3 = sext i32 %add8.i.us.3 to i64 - %arrayidx.i.us.3 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.3 - %53 = bitcast float* %arrayidx.i.us.3 to i32* - %54 = load i32, i32* %53, align 4, !tbaa !12 - %arrayidx15.i.us.3 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.3 - %55 = bitcast float* %arrayidx15.i.us.3 to i32* - store i32 %54, i32* %55, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.3 - -if.end.r_exit.i.us.3: ; preds = %if.then.i.us.3, %if.end.r_exit.i.us.2 - br i1 %cmp.i.us.4, label %if.then.i.us.4, label %if.end.r_exit.i.us.4 - -if.then.i.us.4: ; preds = %if.end.r_exit.i.us.3 - %add8.i.us.4 = add i32 %reass.mul.i, %conv.i.us.4 - %idxprom.i.us.4 = sext i32 %add8.i.us.4 to i64 - %arrayidx.i.us.4 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.4 - %56 = bitcast float* %arrayidx.i.us.4 to i32* - %57 = load i32, i32* %56, align 4, !tbaa !12 - %arrayidx15.i.us.4 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.4 - %58 = bitcast float* %arrayidx15.i.us.4 to i32* - store i32 %57, i32* %58, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.4 - -if.end.r_exit.i.us.4: ; preds = %if.then.i.us.4, %if.end.r_exit.i.us.3 - br i1 %cmp.i.us.5, label %if.then.i.us.5, label %if.end.r_exit.i.us.5 - -if.then.i.us.5: ; preds = %if.end.r_exit.i.us.4 - %add8.i.us.5 = add i32 %reass.mul.i, %conv.i.us.5 - %idxprom.i.us.5 = sext i32 %add8.i.us.5 to i64 - %arrayidx.i.us.5 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.5 - %59 = bitcast float* %arrayidx.i.us.5 to i32* - %60 = load i32, i32* %59, align 4, !tbaa !12 - %arrayidx15.i.us.5 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.5 - %61 = bitcast float* %arrayidx15.i.us.5 to i32* - store i32 %60, i32* %61, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.5 - -if.end.r_exit.i.us.5: ; preds = %if.then.i.us.5, %if.end.r_exit.i.us.4 - br i1 %cmp.i.us.6, label %if.then.i.us.6, label %if.end.r_exit.i.us.6 - -if.then.i.us.6: ; preds = %if.end.r_exit.i.us.5 - %add8.i.us.6 = add i32 %reass.mul.i, %conv.i.us.6 - %idxprom.i.us.6 = sext i32 %add8.i.us.6 to i64 - %arrayidx.i.us.6 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.6 - %62 = bitcast float* %arrayidx.i.us.6 to i32* - %63 = load i32, i32* %62, align 4, !tbaa !12 - %arrayidx15.i.us.6 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.6 - %64 = bitcast float* %arrayidx15.i.us.6 to i32* - store i32 %63, i32* %64, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.6 - -if.end.r_exit.i.us.6: ; preds = %if.then.i.us.6, %if.end.r_exit.i.us.5 - br i1 %cmp.i.us.7, label %if.then.i.us.7, label %if.end.r_exit.i.us.7 - -if.then.i.us.7: ; preds = %if.end.r_exit.i.us.6 - %add8.i.us.7 = add i32 %reass.mul.i, %conv.i.us.7 - %idxprom.i.us.7 = sext i32 %add8.i.us.7 to i64 - %arrayidx.i.us.7 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.7 - %65 = bitcast float* %arrayidx.i.us.7 to i32* - %66 = load i32, i32* %65, align 4, !tbaa !12 - %arrayidx15.i.us.7 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.7 - %67 = bitcast float* %arrayidx15.i.us.7 to i32* - store i32 %66, i32* %67, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.7 - -if.end.r_exit.i.us.7: ; preds = %if.then.i.us.7, %if.end.r_exit.i.us.6 - br i1 %cmp.i.us.8, label %if.then.i.us.8, label %if.end.r_exit.i.us.8 - -if.then.i.us.8: ; preds = %if.end.r_exit.i.us.7 - %add8.i.us.8 = add i32 %reass.mul.i, %conv.i.us.8 - %idxprom.i.us.8 = sext i32 %add8.i.us.8 to i64 - %arrayidx.i.us.8 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.8 - %68 = bitcast float* %arrayidx.i.us.8 to i32* - %69 = load i32, i32* %68, align 4, !tbaa !12 - %arrayidx15.i.us.8 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.8 - %70 = bitcast float* %arrayidx15.i.us.8 to i32* - store i32 %69, i32* %70, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.8 - -if.end.r_exit.i.us.8: ; preds = %if.then.i.us.8, %if.end.r_exit.i.us.7 - br i1 %cmp.i.us.9, label %if.then.i.us.9, label %if.end.r_exit.i.us.9 - -if.then.i.us.9: ; preds = %if.end.r_exit.i.us.8 - %add8.i.us.9 = add i32 %reass.mul.i, %conv.i.us.9 - %idxprom.i.us.9 = sext i32 %add8.i.us.9 to i64 - %arrayidx.i.us.9 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.9 - %71 = bitcast float* %arrayidx.i.us.9 to i32* - %72 = load i32, i32* %71, align 4, !tbaa !12 - %arrayidx15.i.us.9 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.9 - %73 = bitcast float* %arrayidx15.i.us.9 to i32* - store i32 %72, i32* %73, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.9 - -if.end.r_exit.i.us.9: ; preds = %if.then.i.us.9, %if.end.r_exit.i.us.8 - br i1 %cmp.i.us.10, label %if.then.i.us.10, label %if.end.r_exit.i.us.10 - -if.then.i.us.10: ; preds = %if.end.r_exit.i.us.9 - %add8.i.us.10 = add i32 %reass.mul.i, %conv.i.us.10 - %idxprom.i.us.10 = sext i32 %add8.i.us.10 to i64 - %arrayidx.i.us.10 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.10 - %74 = bitcast float* %arrayidx.i.us.10 to i32* - %75 = load i32, i32* %74, align 4, !tbaa !12 - %arrayidx15.i.us.10 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.10 - %76 = bitcast float* %arrayidx15.i.us.10 to i32* - store i32 %75, i32* %76, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.10 - -if.end.r_exit.i.us.10: ; preds = %if.then.i.us.10, %if.end.r_exit.i.us.9 - br i1 %cmp.i.us.11, label %if.then.i.us.11, label %if.end.r_exit.i.us.11 - -if.then.i.us.11: ; preds = %if.end.r_exit.i.us.10 - %add8.i.us.11 = add i32 %reass.mul.i, %conv.i.us.11 - %idxprom.i.us.11 = sext i32 %add8.i.us.11 to i64 - %arrayidx.i.us.11 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.11 - %77 = bitcast float* %arrayidx.i.us.11 to i32* - %78 = load i32, i32* %77, align 4, !tbaa !12 - %arrayidx15.i.us.11 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.11 - %79 = bitcast float* %arrayidx15.i.us.11 to i32* - store i32 %78, i32* %79, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.11 - -if.end.r_exit.i.us.11: ; preds = %if.then.i.us.11, %if.end.r_exit.i.us.10 - br i1 %cmp.i.us.12, label %if.then.i.us.12, label %if.end.r_exit.i.us.12 - -if.then.i.us.12: ; preds = %if.end.r_exit.i.us.11 - %add8.i.us.12 = add i32 %reass.mul.i, %conv.i.us.12 - %idxprom.i.us.12 = sext i32 %add8.i.us.12 to i64 - %arrayidx.i.us.12 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.12 - %80 = bitcast float* %arrayidx.i.us.12 to i32* - %81 = load i32, i32* %80, align 4, !tbaa !12 - %arrayidx15.i.us.12 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.12 - %82 = bitcast float* %arrayidx15.i.us.12 to i32* - store i32 %81, i32* %82, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.12 - -if.end.r_exit.i.us.12: ; preds = %if.then.i.us.12, %if.end.r_exit.i.us.11 - br i1 %cmp.i.us.13, label %if.then.i.us.13, label %if.end.r_exit.i.us.13 - -if.then.i.us.13: ; preds = %if.end.r_exit.i.us.12 - %add8.i.us.13 = add i32 %reass.mul.i, %conv.i.us.13 - %idxprom.i.us.13 = sext i32 %add8.i.us.13 to i64 - %arrayidx.i.us.13 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.13 - %83 = bitcast float* %arrayidx.i.us.13 to i32* - %84 = load i32, i32* %83, align 4, !tbaa !12 - %arrayidx15.i.us.13 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.13 - %85 = bitcast float* %arrayidx15.i.us.13 to i32* - store i32 %84, i32* %85, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.13 - -if.end.r_exit.i.us.13: ; preds = %if.then.i.us.13, %if.end.r_exit.i.us.12 - br i1 %cmp.i.us.14, label %if.then.i.us.14, label %if.end.r_exit.i.us.14 - -if.then.i.us.14: ; preds = %if.end.r_exit.i.us.13 - %add8.i.us.14 = add i32 %reass.mul.i, %conv.i.us.14 - %idxprom.i.us.14 = sext i32 %add8.i.us.14 to i64 - %arrayidx.i.us.14 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.14 - %86 = bitcast float* %arrayidx.i.us.14 to i32* - %87 = load i32, i32* %86, align 4, !tbaa !12 - %arrayidx15.i.us.14 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.14 - %88 = bitcast float* %arrayidx15.i.us.14 to i32* - store i32 %87, i32* %88, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.14 - -if.end.r_exit.i.us.14: ; preds = %if.then.i.us.14, %if.end.r_exit.i.us.13 - br i1 %cmp.i.us.15, label %if.then.i.us.15, label %if.end.r_exit.i.us.15 - -if.then.i.us.15: ; preds = %if.end.r_exit.i.us.14 - %add8.i.us.15 = add i32 %reass.mul.i, %conv.i.us.15 - %idxprom.i.us.15 = sext i32 %add8.i.us.15 to i64 - %arrayidx.i.us.15 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.15 - %89 = bitcast float* %arrayidx.i.us.15 to i32* - %90 = load i32, i32* %89, align 4, !tbaa !12 - %arrayidx15.i.us.15 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.15 - %91 = bitcast float* %arrayidx15.i.us.15 to i32* - store i32 %90, i32* %91, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.15 - -if.end.r_exit.i.us.15: ; preds = %if.then.i.us.15, %if.end.r_exit.i.us.14 - br i1 %cmp.i.us.16, label %if.then.i.us.16, label %if.end.r_exit.i.us.16 - -if.then.i.us.16: ; preds = %if.end.r_exit.i.us.15 - %add8.i.us.16 = add i32 %reass.mul.i, %conv.i.us.16 - %idxprom.i.us.16 = sext i32 %add8.i.us.16 to i64 - %arrayidx.i.us.16 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.16 - %92 = bitcast float* %arrayidx.i.us.16 to i32* - %93 = load i32, i32* %92, align 4, !tbaa !12 - %arrayidx15.i.us.16 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.16 - %94 = bitcast float* %arrayidx15.i.us.16 to i32* - store i32 %93, i32* %94, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.16 - -if.end.r_exit.i.us.16: ; preds = %if.then.i.us.16, %if.end.r_exit.i.us.15 - br i1 %cmp.i.us.17, label %if.then.i.us.17, label %if.end.r_exit.i.us.17 - -if.then.i.us.17: ; preds = %if.end.r_exit.i.us.16 - %add8.i.us.17 = add i32 %reass.mul.i, %conv.i.us.17 - %idxprom.i.us.17 = sext i32 %add8.i.us.17 to i64 - %arrayidx.i.us.17 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.17 - %95 = bitcast float* %arrayidx.i.us.17 to i32* - %96 = load i32, i32* %95, align 4, !tbaa !12 - %arrayidx15.i.us.17 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.17 - %97 = bitcast float* %arrayidx15.i.us.17 to i32* - store i32 %96, i32* %97, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.17 - -if.end.r_exit.i.us.17: ; preds = %if.then.i.us.17, %if.end.r_exit.i.us.16 - br i1 %cmp.i.us.18, label %if.then.i.us.18, label %if.end.r_exit.i.us.18 - -if.then.i.us.18: ; preds = %if.end.r_exit.i.us.17 - %add8.i.us.18 = add i32 %reass.mul.i, %conv.i.us.18 - %idxprom.i.us.18 = sext i32 %add8.i.us.18 to i64 - %arrayidx.i.us.18 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.18 - %98 = bitcast float* %arrayidx.i.us.18 to i32* - %99 = load i32, i32* %98, align 4, !tbaa !12 - %arrayidx15.i.us.18 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.18 - %100 = bitcast float* %arrayidx15.i.us.18 to i32* - store i32 %99, i32* %100, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.18 - -if.end.r_exit.i.us.18: ; preds = %if.then.i.us.18, %if.end.r_exit.i.us.17 - br i1 %cmp.i.us.19, label %if.then.i.us.19, label %if.end.r_exit.i.us.19 - -if.then.i.us.19: ; preds = %if.end.r_exit.i.us.18 - %add8.i.us.19 = add i32 %reass.mul.i, %conv.i.us.19 - %idxprom.i.us.19 = sext i32 %add8.i.us.19 to i64 - %arrayidx.i.us.19 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.19 - %101 = bitcast float* %arrayidx.i.us.19 to i32* - %102 = load i32, i32* %101, align 4, !tbaa !12 - %arrayidx15.i.us.19 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.19 - %103 = bitcast float* %arrayidx15.i.us.19 to i32* - store i32 %102, i32* %103, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.19 - -if.end.r_exit.i.us.19: ; preds = %if.then.i.us.19, %if.end.r_exit.i.us.18 - br i1 %cmp.i.us.20, label %if.then.i.us.20, label %if.end.r_exit.i.us.20 - -if.then.i.us.20: ; preds = %if.end.r_exit.i.us.19 - %add8.i.us.20 = add i32 %reass.mul.i, %conv.i.us.20 - %idxprom.i.us.20 = sext i32 %add8.i.us.20 to i64 - %arrayidx.i.us.20 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.20 - %104 = bitcast float* %arrayidx.i.us.20 to i32* - %105 = load i32, i32* %104, align 4, !tbaa !12 - %arrayidx15.i.us.20 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.20 - %106 = bitcast float* %arrayidx15.i.us.20 to i32* - store i32 %105, i32* %106, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.20 - -if.end.r_exit.i.us.20: ; preds = %if.then.i.us.20, %if.end.r_exit.i.us.19 - br i1 %cmp.i.us.21, label %if.then.i.us.21, label %if.end.r_exit.i.us.21 - -if.then.i.us.21: ; preds = %if.end.r_exit.i.us.20 - %add8.i.us.21 = add i32 %reass.mul.i, %conv.i.us.21 - %idxprom.i.us.21 = sext i32 %add8.i.us.21 to i64 - %arrayidx.i.us.21 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.21 - %107 = bitcast float* %arrayidx.i.us.21 to i32* - %108 = load i32, i32* %107, align 4, !tbaa !12 - %arrayidx15.i.us.21 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.21 - %109 = bitcast float* %arrayidx15.i.us.21 to i32* - store i32 %108, i32* %109, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.21 - -if.end.r_exit.i.us.21: ; preds = %if.then.i.us.21, %if.end.r_exit.i.us.20 - br i1 %cmp.i.us.22, label %if.then.i.us.22, label %if.end.r_exit.i.us.22 - -if.then.i.us.22: ; preds = %if.end.r_exit.i.us.21 - %add8.i.us.22 = add i32 %reass.mul.i, %conv.i.us.22 - %idxprom.i.us.22 = sext i32 %add8.i.us.22 to i64 - %arrayidx.i.us.22 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.22 - %110 = bitcast float* %arrayidx.i.us.22 to i32* - %111 = load i32, i32* %110, align 4, !tbaa !12 - %arrayidx15.i.us.22 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.22 - %112 = bitcast float* %arrayidx15.i.us.22 to i32* - store i32 %111, i32* %112, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.22 - -if.end.r_exit.i.us.22: ; preds = %if.then.i.us.22, %if.end.r_exit.i.us.21 - br i1 %cmp.i.us.23, label %if.then.i.us.23, label %if.end.r_exit.i.us.23 - -if.then.i.us.23: ; preds = %if.end.r_exit.i.us.22 - %add8.i.us.23 = add i32 %reass.mul.i, %conv.i.us.23 - %idxprom.i.us.23 = sext i32 %add8.i.us.23 to i64 - %arrayidx.i.us.23 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.23 - %113 = bitcast float* %arrayidx.i.us.23 to i32* - %114 = load i32, i32* %113, align 4, !tbaa !12 - %arrayidx15.i.us.23 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.23 - %115 = bitcast float* %arrayidx15.i.us.23 to i32* - store i32 %114, i32* %115, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.23 - -if.end.r_exit.i.us.23: ; preds = %if.then.i.us.23, %if.end.r_exit.i.us.22 - br i1 %cmp.i.us.24, label %if.then.i.us.24, label %if.end.r_exit.i.us.24 - -if.then.i.us.24: ; preds = %if.end.r_exit.i.us.23 - %add8.i.us.24 = add i32 %reass.mul.i, %conv.i.us.24 - %idxprom.i.us.24 = sext i32 %add8.i.us.24 to i64 - %arrayidx.i.us.24 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.24 - %116 = bitcast float* %arrayidx.i.us.24 to i32* - %117 = load i32, i32* %116, align 4, !tbaa !12 - %arrayidx15.i.us.24 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.24 - %118 = bitcast float* %arrayidx15.i.us.24 to i32* - store i32 %117, i32* %118, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.24 - -if.end.r_exit.i.us.24: ; preds = %if.then.i.us.24, %if.end.r_exit.i.us.23 - br i1 %cmp.i.us.25, label %if.then.i.us.25, label %if.end.r_exit.i.us.25 - -if.then.i.us.25: ; preds = %if.end.r_exit.i.us.24 - %add8.i.us.25 = add i32 %reass.mul.i, %conv.i.us.25 - %idxprom.i.us.25 = sext i32 %add8.i.us.25 to i64 - %arrayidx.i.us.25 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.25 - %119 = bitcast float* %arrayidx.i.us.25 to i32* - %120 = load i32, i32* %119, align 4, !tbaa !12 - %arrayidx15.i.us.25 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.25 - %121 = bitcast float* %arrayidx15.i.us.25 to i32* - store i32 %120, i32* %121, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.25 - -if.end.r_exit.i.us.25: ; preds = %if.then.i.us.25, %if.end.r_exit.i.us.24 - br i1 %cmp.i.us.26, label %if.then.i.us.26, label %if.end.r_exit.i.us.26 - -if.then.i.us.26: ; preds = %if.end.r_exit.i.us.25 - %add8.i.us.26 = add i32 %reass.mul.i, %conv.i.us.26 - %idxprom.i.us.26 = sext i32 %add8.i.us.26 to i64 - %arrayidx.i.us.26 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.26 - %122 = bitcast float* %arrayidx.i.us.26 to i32* - %123 = load i32, i32* %122, align 4, !tbaa !12 - %arrayidx15.i.us.26 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.26 - %124 = bitcast float* %arrayidx15.i.us.26 to i32* - store i32 %123, i32* %124, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.26 - -if.end.r_exit.i.us.26: ; preds = %if.then.i.us.26, %if.end.r_exit.i.us.25 - br i1 %cmp.i.us.27, label %if.then.i.us.27, label %if.end.r_exit.i.us.27 - -if.then.i.us.27: ; preds = %if.end.r_exit.i.us.26 - %add8.i.us.27 = add i32 %reass.mul.i, %conv.i.us.27 - %idxprom.i.us.27 = sext i32 %add8.i.us.27 to i64 - %arrayidx.i.us.27 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.27 - %125 = bitcast float* %arrayidx.i.us.27 to i32* - %126 = load i32, i32* %125, align 4, !tbaa !12 - %arrayidx15.i.us.27 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.27 - %127 = bitcast float* %arrayidx15.i.us.27 to i32* - store i32 %126, i32* %127, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.27 - -if.end.r_exit.i.us.27: ; preds = %if.then.i.us.27, %if.end.r_exit.i.us.26 - br i1 %cmp.i.us.28, label %if.then.i.us.28, label %if.end.r_exit.i.us.28 - -if.then.i.us.28: ; preds = %if.end.r_exit.i.us.27 - %add8.i.us.28 = add i32 %reass.mul.i, %conv.i.us.28 - %idxprom.i.us.28 = sext i32 %add8.i.us.28 to i64 - %arrayidx.i.us.28 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.28 - %128 = bitcast float* %arrayidx.i.us.28 to i32* - %129 = load i32, i32* %128, align 4, !tbaa !12 - %arrayidx15.i.us.28 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.28 - %130 = bitcast float* %arrayidx15.i.us.28 to i32* - store i32 %129, i32* %130, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.28 - -if.end.r_exit.i.us.28: ; preds = %if.then.i.us.28, %if.end.r_exit.i.us.27 - br i1 %cmp.i.us.29, label %if.then.i.us.29, label %if.end.r_exit.i.us.29 - -if.then.i.us.29: ; preds = %if.end.r_exit.i.us.28 - %add8.i.us.29 = add i32 %reass.mul.i, %conv.i.us.29 - %idxprom.i.us.29 = sext i32 %add8.i.us.29 to i64 - %arrayidx.i.us.29 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.29 - %131 = bitcast float* %arrayidx.i.us.29 to i32* - %132 = load i32, i32* %131, align 4, !tbaa !12 - %arrayidx15.i.us.29 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.29 - %133 = bitcast float* %arrayidx15.i.us.29 to i32* - store i32 %132, i32* %133, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.29 - -if.end.r_exit.i.us.29: ; preds = %if.then.i.us.29, %if.end.r_exit.i.us.28 - br i1 %cmp.i.us.30, label %if.then.i.us.30, label %if.end.r_exit.i.us.30 - -if.then.i.us.30: ; preds = %if.end.r_exit.i.us.29 - %add8.i.us.30 = add i32 %reass.mul.i, %conv.i.us.30 - %idxprom.i.us.30 = sext i32 %add8.i.us.30 to i64 - %arrayidx.i.us.30 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.30 - %134 = bitcast float* %arrayidx.i.us.30 to i32* - %135 = load i32, i32* %134, align 4, !tbaa !12 - %arrayidx15.i.us.30 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.30 - %136 = bitcast float* %arrayidx15.i.us.30 to i32* - store i32 %135, i32* %136, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.us.30 - -if.end.r_exit.i.us.30: ; preds = %if.then.i.us.30, %if.end.r_exit.i.us.29 - br i1 %cmp.i.us.31, label %if.then.i.us.31, label %pregion_for_end.i - -if.then.i.us.31: ; preds = %if.end.r_exit.i.us.30 - %add8.i.us.31 = add i32 %reass.mul.i, %conv.i.us.31 - %idxprom.i.us.31 = sext i32 %add8.i.us.31 to i64 - %arrayidx.i.us.31 = getelementptr inbounds float, float* %5, i64 %idxprom.i.us.31 - %137 = bitcast float* %arrayidx.i.us.31 to i32* - %138 = load i32, i32* %137, align 4, !tbaa !12 - %arrayidx15.i.us.31 = getelementptr inbounds float, float* %3, i64 %idxprom.i.us.31 - %139 = bitcast float* %arrayidx15.i.us.31 to i32* - store i32 %138, i32* %139, align 4, !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i -} - -; Function Attrs: nofree norecurse nounwind -define void @_pocl_kernel_doitgen_kernel2_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { - %6 = getelementptr i8*, i8** %0, i64 1 - %7 = bitcast i8** %6 to i32** - %8 = load i32*, i32** %7, align 8 - %9 = load i32, i32* %8, align 4 - %10 = getelementptr i8*, i8** %0, i64 2 - %11 = bitcast i8** %10 to i32** - %12 = load i32*, i32** %11, align 8 - %13 = load i32, i32* %12, align 4 - %14 = getelementptr i8*, i8** %0, i64 3 - %15 = bitcast i8** %14 to float*** - %16 = load float**, float*** %15, align 8 - %17 = load float*, float** %16, align 8 - %18 = getelementptr i8*, i8** %0, i64 5 - %19 = bitcast i8** %18 to float*** - %20 = load float**, float*** %19, align 8 - %21 = load float*, float** %20, align 8 - %22 = getelementptr i8*, i8** %0, i64 6 - %23 = bitcast i8** %22 to i32** - %24 = load i32*, i32** %23, align 8 - %25 = load i32, i32* %24, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %mul6.i.i = mul i32 %25, %9 - %conv.i.i.us = trunc i64 %mul.i.i.i to i32 - %cmp.i.i.us = icmp sgt i32 %13, %conv.i.i.us - %26 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.1 = or i32 %26, 1 - %cmp.i.i.us.1 = icmp sgt i32 %13, %conv.i.i.us.1 - %27 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.2 = or i32 %27, 2 - %cmp.i.i.us.2 = icmp sgt i32 %13, %conv.i.i.us.2 - %28 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.3 = or i32 %28, 3 - %cmp.i.i.us.3 = icmp sgt i32 %13, %conv.i.i.us.3 - %29 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.4 = or i32 %29, 4 - %cmp.i.i.us.4 = icmp sgt i32 %13, %conv.i.i.us.4 - %30 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.5 = or i32 %30, 5 - %cmp.i.i.us.5 = icmp sgt i32 %13, %conv.i.i.us.5 - %31 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.6 = or i32 %31, 6 - %cmp.i.i.us.6 = icmp sgt i32 %13, %conv.i.i.us.6 - %32 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.7 = or i32 %32, 7 - %cmp.i.i.us.7 = icmp sgt i32 %13, %conv.i.i.us.7 - %33 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.8 = or i32 %33, 8 - %cmp.i.i.us.8 = icmp sgt i32 %13, %conv.i.i.us.8 - %34 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.9 = or i32 %34, 9 - %cmp.i.i.us.9 = icmp sgt i32 %13, %conv.i.i.us.9 - %35 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.10 = or i32 %35, 10 - %cmp.i.i.us.10 = icmp sgt i32 %13, %conv.i.i.us.10 - %36 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.11 = or i32 %36, 11 - %cmp.i.i.us.11 = icmp sgt i32 %13, %conv.i.i.us.11 - %37 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.12 = or i32 %37, 12 - %cmp.i.i.us.12 = icmp sgt i32 %13, %conv.i.i.us.12 - %38 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.13 = or i32 %38, 13 - %cmp.i.i.us.13 = icmp sgt i32 %13, %conv.i.i.us.13 - %39 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.14 = or i32 %39, 14 - %cmp.i.i.us.14 = icmp sgt i32 %13, %conv.i.i.us.14 - %40 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.15 = or i32 %40, 15 - %cmp.i.i.us.15 = icmp sgt i32 %13, %conv.i.i.us.15 - %41 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.16 = or i32 %41, 16 - %cmp.i.i.us.16 = icmp sgt i32 %13, %conv.i.i.us.16 - %42 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.17 = or i32 %42, 17 - %cmp.i.i.us.17 = icmp sgt i32 %13, %conv.i.i.us.17 - %43 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.18 = or i32 %43, 18 - %cmp.i.i.us.18 = icmp sgt i32 %13, %conv.i.i.us.18 - %44 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.19 = or i32 %44, 19 - %cmp.i.i.us.19 = icmp sgt i32 %13, %conv.i.i.us.19 - %45 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.20 = or i32 %45, 20 - %cmp.i.i.us.20 = icmp sgt i32 %13, %conv.i.i.us.20 - %46 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.21 = or i32 %46, 21 - %cmp.i.i.us.21 = icmp sgt i32 %13, %conv.i.i.us.21 - %47 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.22 = or i32 %47, 22 - %cmp.i.i.us.22 = icmp sgt i32 %13, %conv.i.i.us.22 - %48 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.23 = or i32 %48, 23 - %cmp.i.i.us.23 = icmp sgt i32 %13, %conv.i.i.us.23 - %49 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.24 = or i32 %49, 24 - %cmp.i.i.us.24 = icmp sgt i32 %13, %conv.i.i.us.24 - %50 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.25 = or i32 %50, 25 - %cmp.i.i.us.25 = icmp sgt i32 %13, %conv.i.i.us.25 - %51 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.26 = or i32 %51, 26 - %cmp.i.i.us.26 = icmp sgt i32 %13, %conv.i.i.us.26 - %52 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.27 = or i32 %52, 27 - %cmp.i.i.us.27 = icmp sgt i32 %13, %conv.i.i.us.27 - %53 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.28 = or i32 %53, 28 - %cmp.i.i.us.28 = icmp sgt i32 %13, %conv.i.i.us.28 - %54 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.29 = or i32 %54, 29 - %cmp.i.i.us.29 = icmp sgt i32 %13, %conv.i.i.us.29 - %55 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.30 = or i32 %55, 30 - %cmp.i.i.us.30 = icmp sgt i32 %13, %conv.i.i.us.30 - %56 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.31 = or i32 %56, 31 - %cmp.i.i.us.31 = icmp sgt i32 %13, %conv.i.i.us.31 - br label %pregion_for_entry.pregion_for_init.i.i - -pregion_for_entry.pregion_for_init.i.i: ; preds = %pregion_for_end.i.i, %5 - %_local_id_y.i.0 = phi i64 [ 0, %5 ], [ %60, %pregion_for_end.i.i ] - %add6.i.i.i = add nuw nsw i64 %_local_id_y.i.0, %mul3.i.i.i - %conv2.i.i = trunc i64 %add6.i.i.i to i32 - %cmp4.i.i = icmp sgt i32 %9, %conv2.i.i - %reass.add.i.i = add i32 %mul6.i.i, %conv2.i.i - %reass.mul.i.i = mul i32 %reass.add.i.i, %13 - br i1 %cmp4.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i - br i1 %cmp.i.i.us, label %if.then.i.i.us, label %if.end.r_exit.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us.preheader - %add8.i.i.us = add i32 %reass.mul.i.i, %conv.i.i.us - %idxprom.i.i.us = sext i32 %add8.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us - %57 = bitcast float* %arrayidx.i.i.us to i32* - %58 = load i32, i32* %57, align 4, !tbaa !12 - %arrayidx15.i.i.us = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us - %59 = bitcast float* %arrayidx15.i.i.us to i32* - store i32 %58, i32* %59, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - br i1 %cmp.i.i.us.1, label %if.then.i.i.us.1, label %if.end.r_exit.i.i.us.1 - -pregion_for_end.i.i: ; preds = %if.then.i.i.us.31, %if.end.r_exit.i.i.us.30, %pregion_for_entry.pregion_for_init.i.i - %60 = add nuw nsw i64 %_local_id_y.i.0, 1 - %exitcond.not = icmp eq i64 %60, 8 - br i1 %exitcond.not, label %_pocl_kernel_doitgen_kernel2.exit, label %pregion_for_entry.pregion_for_init.i.i, !llvm.loop !19 - -_pocl_kernel_doitgen_kernel2.exit: ; preds = %pregion_for_end.i.i - ret void - -if.then.i.i.us.1: ; preds = %if.end.r_exit.i.i.us - %add8.i.i.us.1 = add i32 %reass.mul.i.i, %conv.i.i.us.1 - %idxprom.i.i.us.1 = sext i32 %add8.i.i.us.1 to i64 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.1 - %61 = bitcast float* %arrayidx.i.i.us.1 to i32* - %62 = load i32, i32* %61, align 4, !tbaa !12 - %arrayidx15.i.i.us.1 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.1 - %63 = bitcast float* %arrayidx15.i.i.us.1 to i32* - store i32 %62, i32* %63, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.1 - -if.end.r_exit.i.i.us.1: ; preds = %if.then.i.i.us.1, %if.end.r_exit.i.i.us - br i1 %cmp.i.i.us.2, label %if.then.i.i.us.2, label %if.end.r_exit.i.i.us.2 - -if.then.i.i.us.2: ; preds = %if.end.r_exit.i.i.us.1 - %add8.i.i.us.2 = add i32 %reass.mul.i.i, %conv.i.i.us.2 - %idxprom.i.i.us.2 = sext i32 %add8.i.i.us.2 to i64 - %arrayidx.i.i.us.2 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.2 - %64 = bitcast float* %arrayidx.i.i.us.2 to i32* - %65 = load i32, i32* %64, align 4, !tbaa !12 - %arrayidx15.i.i.us.2 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.2 - %66 = bitcast float* %arrayidx15.i.i.us.2 to i32* - store i32 %65, i32* %66, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.2 - -if.end.r_exit.i.i.us.2: ; preds = %if.then.i.i.us.2, %if.end.r_exit.i.i.us.1 - br i1 %cmp.i.i.us.3, label %if.then.i.i.us.3, label %if.end.r_exit.i.i.us.3 - -if.then.i.i.us.3: ; preds = %if.end.r_exit.i.i.us.2 - %add8.i.i.us.3 = add i32 %reass.mul.i.i, %conv.i.i.us.3 - %idxprom.i.i.us.3 = sext i32 %add8.i.i.us.3 to i64 - %arrayidx.i.i.us.3 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.3 - %67 = bitcast float* %arrayidx.i.i.us.3 to i32* - %68 = load i32, i32* %67, align 4, !tbaa !12 - %arrayidx15.i.i.us.3 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.3 - %69 = bitcast float* %arrayidx15.i.i.us.3 to i32* - store i32 %68, i32* %69, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.3 - -if.end.r_exit.i.i.us.3: ; preds = %if.then.i.i.us.3, %if.end.r_exit.i.i.us.2 - br i1 %cmp.i.i.us.4, label %if.then.i.i.us.4, label %if.end.r_exit.i.i.us.4 - -if.then.i.i.us.4: ; preds = %if.end.r_exit.i.i.us.3 - %add8.i.i.us.4 = add i32 %reass.mul.i.i, %conv.i.i.us.4 - %idxprom.i.i.us.4 = sext i32 %add8.i.i.us.4 to i64 - %arrayidx.i.i.us.4 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.4 - %70 = bitcast float* %arrayidx.i.i.us.4 to i32* - %71 = load i32, i32* %70, align 4, !tbaa !12 - %arrayidx15.i.i.us.4 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.4 - %72 = bitcast float* %arrayidx15.i.i.us.4 to i32* - store i32 %71, i32* %72, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.4 - -if.end.r_exit.i.i.us.4: ; preds = %if.then.i.i.us.4, %if.end.r_exit.i.i.us.3 - br i1 %cmp.i.i.us.5, label %if.then.i.i.us.5, label %if.end.r_exit.i.i.us.5 - -if.then.i.i.us.5: ; preds = %if.end.r_exit.i.i.us.4 - %add8.i.i.us.5 = add i32 %reass.mul.i.i, %conv.i.i.us.5 - %idxprom.i.i.us.5 = sext i32 %add8.i.i.us.5 to i64 - %arrayidx.i.i.us.5 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.5 - %73 = bitcast float* %arrayidx.i.i.us.5 to i32* - %74 = load i32, i32* %73, align 4, !tbaa !12 - %arrayidx15.i.i.us.5 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.5 - %75 = bitcast float* %arrayidx15.i.i.us.5 to i32* - store i32 %74, i32* %75, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.5 - -if.end.r_exit.i.i.us.5: ; preds = %if.then.i.i.us.5, %if.end.r_exit.i.i.us.4 - br i1 %cmp.i.i.us.6, label %if.then.i.i.us.6, label %if.end.r_exit.i.i.us.6 - -if.then.i.i.us.6: ; preds = %if.end.r_exit.i.i.us.5 - %add8.i.i.us.6 = add i32 %reass.mul.i.i, %conv.i.i.us.6 - %idxprom.i.i.us.6 = sext i32 %add8.i.i.us.6 to i64 - %arrayidx.i.i.us.6 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.6 - %76 = bitcast float* %arrayidx.i.i.us.6 to i32* - %77 = load i32, i32* %76, align 4, !tbaa !12 - %arrayidx15.i.i.us.6 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.6 - %78 = bitcast float* %arrayidx15.i.i.us.6 to i32* - store i32 %77, i32* %78, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.6 - -if.end.r_exit.i.i.us.6: ; preds = %if.then.i.i.us.6, %if.end.r_exit.i.i.us.5 - br i1 %cmp.i.i.us.7, label %if.then.i.i.us.7, label %if.end.r_exit.i.i.us.7 - -if.then.i.i.us.7: ; preds = %if.end.r_exit.i.i.us.6 - %add8.i.i.us.7 = add i32 %reass.mul.i.i, %conv.i.i.us.7 - %idxprom.i.i.us.7 = sext i32 %add8.i.i.us.7 to i64 - %arrayidx.i.i.us.7 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.7 - %79 = bitcast float* %arrayidx.i.i.us.7 to i32* - %80 = load i32, i32* %79, align 4, !tbaa !12 - %arrayidx15.i.i.us.7 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.7 - %81 = bitcast float* %arrayidx15.i.i.us.7 to i32* - store i32 %80, i32* %81, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.7 - -if.end.r_exit.i.i.us.7: ; preds = %if.then.i.i.us.7, %if.end.r_exit.i.i.us.6 - br i1 %cmp.i.i.us.8, label %if.then.i.i.us.8, label %if.end.r_exit.i.i.us.8 - -if.then.i.i.us.8: ; preds = %if.end.r_exit.i.i.us.7 - %add8.i.i.us.8 = add i32 %reass.mul.i.i, %conv.i.i.us.8 - %idxprom.i.i.us.8 = sext i32 %add8.i.i.us.8 to i64 - %arrayidx.i.i.us.8 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.8 - %82 = bitcast float* %arrayidx.i.i.us.8 to i32* - %83 = load i32, i32* %82, align 4, !tbaa !12 - %arrayidx15.i.i.us.8 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.8 - %84 = bitcast float* %arrayidx15.i.i.us.8 to i32* - store i32 %83, i32* %84, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.8 - -if.end.r_exit.i.i.us.8: ; preds = %if.then.i.i.us.8, %if.end.r_exit.i.i.us.7 - br i1 %cmp.i.i.us.9, label %if.then.i.i.us.9, label %if.end.r_exit.i.i.us.9 - -if.then.i.i.us.9: ; preds = %if.end.r_exit.i.i.us.8 - %add8.i.i.us.9 = add i32 %reass.mul.i.i, %conv.i.i.us.9 - %idxprom.i.i.us.9 = sext i32 %add8.i.i.us.9 to i64 - %arrayidx.i.i.us.9 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.9 - %85 = bitcast float* %arrayidx.i.i.us.9 to i32* - %86 = load i32, i32* %85, align 4, !tbaa !12 - %arrayidx15.i.i.us.9 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.9 - %87 = bitcast float* %arrayidx15.i.i.us.9 to i32* - store i32 %86, i32* %87, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.9 - -if.end.r_exit.i.i.us.9: ; preds = %if.then.i.i.us.9, %if.end.r_exit.i.i.us.8 - br i1 %cmp.i.i.us.10, label %if.then.i.i.us.10, label %if.end.r_exit.i.i.us.10 - -if.then.i.i.us.10: ; preds = %if.end.r_exit.i.i.us.9 - %add8.i.i.us.10 = add i32 %reass.mul.i.i, %conv.i.i.us.10 - %idxprom.i.i.us.10 = sext i32 %add8.i.i.us.10 to i64 - %arrayidx.i.i.us.10 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.10 - %88 = bitcast float* %arrayidx.i.i.us.10 to i32* - %89 = load i32, i32* %88, align 4, !tbaa !12 - %arrayidx15.i.i.us.10 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.10 - %90 = bitcast float* %arrayidx15.i.i.us.10 to i32* - store i32 %89, i32* %90, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.10 - -if.end.r_exit.i.i.us.10: ; preds = %if.then.i.i.us.10, %if.end.r_exit.i.i.us.9 - br i1 %cmp.i.i.us.11, label %if.then.i.i.us.11, label %if.end.r_exit.i.i.us.11 - -if.then.i.i.us.11: ; preds = %if.end.r_exit.i.i.us.10 - %add8.i.i.us.11 = add i32 %reass.mul.i.i, %conv.i.i.us.11 - %idxprom.i.i.us.11 = sext i32 %add8.i.i.us.11 to i64 - %arrayidx.i.i.us.11 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.11 - %91 = bitcast float* %arrayidx.i.i.us.11 to i32* - %92 = load i32, i32* %91, align 4, !tbaa !12 - %arrayidx15.i.i.us.11 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.11 - %93 = bitcast float* %arrayidx15.i.i.us.11 to i32* - store i32 %92, i32* %93, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.11 - -if.end.r_exit.i.i.us.11: ; preds = %if.then.i.i.us.11, %if.end.r_exit.i.i.us.10 - br i1 %cmp.i.i.us.12, label %if.then.i.i.us.12, label %if.end.r_exit.i.i.us.12 - -if.then.i.i.us.12: ; preds = %if.end.r_exit.i.i.us.11 - %add8.i.i.us.12 = add i32 %reass.mul.i.i, %conv.i.i.us.12 - %idxprom.i.i.us.12 = sext i32 %add8.i.i.us.12 to i64 - %arrayidx.i.i.us.12 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.12 - %94 = bitcast float* %arrayidx.i.i.us.12 to i32* - %95 = load i32, i32* %94, align 4, !tbaa !12 - %arrayidx15.i.i.us.12 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.12 - %96 = bitcast float* %arrayidx15.i.i.us.12 to i32* - store i32 %95, i32* %96, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.12 - -if.end.r_exit.i.i.us.12: ; preds = %if.then.i.i.us.12, %if.end.r_exit.i.i.us.11 - br i1 %cmp.i.i.us.13, label %if.then.i.i.us.13, label %if.end.r_exit.i.i.us.13 - -if.then.i.i.us.13: ; preds = %if.end.r_exit.i.i.us.12 - %add8.i.i.us.13 = add i32 %reass.mul.i.i, %conv.i.i.us.13 - %idxprom.i.i.us.13 = sext i32 %add8.i.i.us.13 to i64 - %arrayidx.i.i.us.13 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.13 - %97 = bitcast float* %arrayidx.i.i.us.13 to i32* - %98 = load i32, i32* %97, align 4, !tbaa !12 - %arrayidx15.i.i.us.13 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.13 - %99 = bitcast float* %arrayidx15.i.i.us.13 to i32* - store i32 %98, i32* %99, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.13 - -if.end.r_exit.i.i.us.13: ; preds = %if.then.i.i.us.13, %if.end.r_exit.i.i.us.12 - br i1 %cmp.i.i.us.14, label %if.then.i.i.us.14, label %if.end.r_exit.i.i.us.14 - -if.then.i.i.us.14: ; preds = %if.end.r_exit.i.i.us.13 - %add8.i.i.us.14 = add i32 %reass.mul.i.i, %conv.i.i.us.14 - %idxprom.i.i.us.14 = sext i32 %add8.i.i.us.14 to i64 - %arrayidx.i.i.us.14 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.14 - %100 = bitcast float* %arrayidx.i.i.us.14 to i32* - %101 = load i32, i32* %100, align 4, !tbaa !12 - %arrayidx15.i.i.us.14 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.14 - %102 = bitcast float* %arrayidx15.i.i.us.14 to i32* - store i32 %101, i32* %102, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.14 - -if.end.r_exit.i.i.us.14: ; preds = %if.then.i.i.us.14, %if.end.r_exit.i.i.us.13 - br i1 %cmp.i.i.us.15, label %if.then.i.i.us.15, label %if.end.r_exit.i.i.us.15 - -if.then.i.i.us.15: ; preds = %if.end.r_exit.i.i.us.14 - %add8.i.i.us.15 = add i32 %reass.mul.i.i, %conv.i.i.us.15 - %idxprom.i.i.us.15 = sext i32 %add8.i.i.us.15 to i64 - %arrayidx.i.i.us.15 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.15 - %103 = bitcast float* %arrayidx.i.i.us.15 to i32* - %104 = load i32, i32* %103, align 4, !tbaa !12 - %arrayidx15.i.i.us.15 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.15 - %105 = bitcast float* %arrayidx15.i.i.us.15 to i32* - store i32 %104, i32* %105, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.15 - -if.end.r_exit.i.i.us.15: ; preds = %if.then.i.i.us.15, %if.end.r_exit.i.i.us.14 - br i1 %cmp.i.i.us.16, label %if.then.i.i.us.16, label %if.end.r_exit.i.i.us.16 - -if.then.i.i.us.16: ; preds = %if.end.r_exit.i.i.us.15 - %add8.i.i.us.16 = add i32 %reass.mul.i.i, %conv.i.i.us.16 - %idxprom.i.i.us.16 = sext i32 %add8.i.i.us.16 to i64 - %arrayidx.i.i.us.16 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.16 - %106 = bitcast float* %arrayidx.i.i.us.16 to i32* - %107 = load i32, i32* %106, align 4, !tbaa !12 - %arrayidx15.i.i.us.16 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.16 - %108 = bitcast float* %arrayidx15.i.i.us.16 to i32* - store i32 %107, i32* %108, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.16 - -if.end.r_exit.i.i.us.16: ; preds = %if.then.i.i.us.16, %if.end.r_exit.i.i.us.15 - br i1 %cmp.i.i.us.17, label %if.then.i.i.us.17, label %if.end.r_exit.i.i.us.17 - -if.then.i.i.us.17: ; preds = %if.end.r_exit.i.i.us.16 - %add8.i.i.us.17 = add i32 %reass.mul.i.i, %conv.i.i.us.17 - %idxprom.i.i.us.17 = sext i32 %add8.i.i.us.17 to i64 - %arrayidx.i.i.us.17 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.17 - %109 = bitcast float* %arrayidx.i.i.us.17 to i32* - %110 = load i32, i32* %109, align 4, !tbaa !12 - %arrayidx15.i.i.us.17 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.17 - %111 = bitcast float* %arrayidx15.i.i.us.17 to i32* - store i32 %110, i32* %111, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.17 - -if.end.r_exit.i.i.us.17: ; preds = %if.then.i.i.us.17, %if.end.r_exit.i.i.us.16 - br i1 %cmp.i.i.us.18, label %if.then.i.i.us.18, label %if.end.r_exit.i.i.us.18 - -if.then.i.i.us.18: ; preds = %if.end.r_exit.i.i.us.17 - %add8.i.i.us.18 = add i32 %reass.mul.i.i, %conv.i.i.us.18 - %idxprom.i.i.us.18 = sext i32 %add8.i.i.us.18 to i64 - %arrayidx.i.i.us.18 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.18 - %112 = bitcast float* %arrayidx.i.i.us.18 to i32* - %113 = load i32, i32* %112, align 4, !tbaa !12 - %arrayidx15.i.i.us.18 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.18 - %114 = bitcast float* %arrayidx15.i.i.us.18 to i32* - store i32 %113, i32* %114, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.18 - -if.end.r_exit.i.i.us.18: ; preds = %if.then.i.i.us.18, %if.end.r_exit.i.i.us.17 - br i1 %cmp.i.i.us.19, label %if.then.i.i.us.19, label %if.end.r_exit.i.i.us.19 - -if.then.i.i.us.19: ; preds = %if.end.r_exit.i.i.us.18 - %add8.i.i.us.19 = add i32 %reass.mul.i.i, %conv.i.i.us.19 - %idxprom.i.i.us.19 = sext i32 %add8.i.i.us.19 to i64 - %arrayidx.i.i.us.19 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.19 - %115 = bitcast float* %arrayidx.i.i.us.19 to i32* - %116 = load i32, i32* %115, align 4, !tbaa !12 - %arrayidx15.i.i.us.19 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.19 - %117 = bitcast float* %arrayidx15.i.i.us.19 to i32* - store i32 %116, i32* %117, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.19 - -if.end.r_exit.i.i.us.19: ; preds = %if.then.i.i.us.19, %if.end.r_exit.i.i.us.18 - br i1 %cmp.i.i.us.20, label %if.then.i.i.us.20, label %if.end.r_exit.i.i.us.20 - -if.then.i.i.us.20: ; preds = %if.end.r_exit.i.i.us.19 - %add8.i.i.us.20 = add i32 %reass.mul.i.i, %conv.i.i.us.20 - %idxprom.i.i.us.20 = sext i32 %add8.i.i.us.20 to i64 - %arrayidx.i.i.us.20 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.20 - %118 = bitcast float* %arrayidx.i.i.us.20 to i32* - %119 = load i32, i32* %118, align 4, !tbaa !12 - %arrayidx15.i.i.us.20 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.20 - %120 = bitcast float* %arrayidx15.i.i.us.20 to i32* - store i32 %119, i32* %120, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.20 - -if.end.r_exit.i.i.us.20: ; preds = %if.then.i.i.us.20, %if.end.r_exit.i.i.us.19 - br i1 %cmp.i.i.us.21, label %if.then.i.i.us.21, label %if.end.r_exit.i.i.us.21 - -if.then.i.i.us.21: ; preds = %if.end.r_exit.i.i.us.20 - %add8.i.i.us.21 = add i32 %reass.mul.i.i, %conv.i.i.us.21 - %idxprom.i.i.us.21 = sext i32 %add8.i.i.us.21 to i64 - %arrayidx.i.i.us.21 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.21 - %121 = bitcast float* %arrayidx.i.i.us.21 to i32* - %122 = load i32, i32* %121, align 4, !tbaa !12 - %arrayidx15.i.i.us.21 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.21 - %123 = bitcast float* %arrayidx15.i.i.us.21 to i32* - store i32 %122, i32* %123, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.21 - -if.end.r_exit.i.i.us.21: ; preds = %if.then.i.i.us.21, %if.end.r_exit.i.i.us.20 - br i1 %cmp.i.i.us.22, label %if.then.i.i.us.22, label %if.end.r_exit.i.i.us.22 - -if.then.i.i.us.22: ; preds = %if.end.r_exit.i.i.us.21 - %add8.i.i.us.22 = add i32 %reass.mul.i.i, %conv.i.i.us.22 - %idxprom.i.i.us.22 = sext i32 %add8.i.i.us.22 to i64 - %arrayidx.i.i.us.22 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.22 - %124 = bitcast float* %arrayidx.i.i.us.22 to i32* - %125 = load i32, i32* %124, align 4, !tbaa !12 - %arrayidx15.i.i.us.22 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.22 - %126 = bitcast float* %arrayidx15.i.i.us.22 to i32* - store i32 %125, i32* %126, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.22 - -if.end.r_exit.i.i.us.22: ; preds = %if.then.i.i.us.22, %if.end.r_exit.i.i.us.21 - br i1 %cmp.i.i.us.23, label %if.then.i.i.us.23, label %if.end.r_exit.i.i.us.23 - -if.then.i.i.us.23: ; preds = %if.end.r_exit.i.i.us.22 - %add8.i.i.us.23 = add i32 %reass.mul.i.i, %conv.i.i.us.23 - %idxprom.i.i.us.23 = sext i32 %add8.i.i.us.23 to i64 - %arrayidx.i.i.us.23 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.23 - %127 = bitcast float* %arrayidx.i.i.us.23 to i32* - %128 = load i32, i32* %127, align 4, !tbaa !12 - %arrayidx15.i.i.us.23 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.23 - %129 = bitcast float* %arrayidx15.i.i.us.23 to i32* - store i32 %128, i32* %129, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.23 - -if.end.r_exit.i.i.us.23: ; preds = %if.then.i.i.us.23, %if.end.r_exit.i.i.us.22 - br i1 %cmp.i.i.us.24, label %if.then.i.i.us.24, label %if.end.r_exit.i.i.us.24 - -if.then.i.i.us.24: ; preds = %if.end.r_exit.i.i.us.23 - %add8.i.i.us.24 = add i32 %reass.mul.i.i, %conv.i.i.us.24 - %idxprom.i.i.us.24 = sext i32 %add8.i.i.us.24 to i64 - %arrayidx.i.i.us.24 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.24 - %130 = bitcast float* %arrayidx.i.i.us.24 to i32* - %131 = load i32, i32* %130, align 4, !tbaa !12 - %arrayidx15.i.i.us.24 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.24 - %132 = bitcast float* %arrayidx15.i.i.us.24 to i32* - store i32 %131, i32* %132, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.24 - -if.end.r_exit.i.i.us.24: ; preds = %if.then.i.i.us.24, %if.end.r_exit.i.i.us.23 - br i1 %cmp.i.i.us.25, label %if.then.i.i.us.25, label %if.end.r_exit.i.i.us.25 - -if.then.i.i.us.25: ; preds = %if.end.r_exit.i.i.us.24 - %add8.i.i.us.25 = add i32 %reass.mul.i.i, %conv.i.i.us.25 - %idxprom.i.i.us.25 = sext i32 %add8.i.i.us.25 to i64 - %arrayidx.i.i.us.25 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.25 - %133 = bitcast float* %arrayidx.i.i.us.25 to i32* - %134 = load i32, i32* %133, align 4, !tbaa !12 - %arrayidx15.i.i.us.25 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.25 - %135 = bitcast float* %arrayidx15.i.i.us.25 to i32* - store i32 %134, i32* %135, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.25 - -if.end.r_exit.i.i.us.25: ; preds = %if.then.i.i.us.25, %if.end.r_exit.i.i.us.24 - br i1 %cmp.i.i.us.26, label %if.then.i.i.us.26, label %if.end.r_exit.i.i.us.26 - -if.then.i.i.us.26: ; preds = %if.end.r_exit.i.i.us.25 - %add8.i.i.us.26 = add i32 %reass.mul.i.i, %conv.i.i.us.26 - %idxprom.i.i.us.26 = sext i32 %add8.i.i.us.26 to i64 - %arrayidx.i.i.us.26 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.26 - %136 = bitcast float* %arrayidx.i.i.us.26 to i32* - %137 = load i32, i32* %136, align 4, !tbaa !12 - %arrayidx15.i.i.us.26 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.26 - %138 = bitcast float* %arrayidx15.i.i.us.26 to i32* - store i32 %137, i32* %138, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.26 - -if.end.r_exit.i.i.us.26: ; preds = %if.then.i.i.us.26, %if.end.r_exit.i.i.us.25 - br i1 %cmp.i.i.us.27, label %if.then.i.i.us.27, label %if.end.r_exit.i.i.us.27 - -if.then.i.i.us.27: ; preds = %if.end.r_exit.i.i.us.26 - %add8.i.i.us.27 = add i32 %reass.mul.i.i, %conv.i.i.us.27 - %idxprom.i.i.us.27 = sext i32 %add8.i.i.us.27 to i64 - %arrayidx.i.i.us.27 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.27 - %139 = bitcast float* %arrayidx.i.i.us.27 to i32* - %140 = load i32, i32* %139, align 4, !tbaa !12 - %arrayidx15.i.i.us.27 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.27 - %141 = bitcast float* %arrayidx15.i.i.us.27 to i32* - store i32 %140, i32* %141, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.27 - -if.end.r_exit.i.i.us.27: ; preds = %if.then.i.i.us.27, %if.end.r_exit.i.i.us.26 - br i1 %cmp.i.i.us.28, label %if.then.i.i.us.28, label %if.end.r_exit.i.i.us.28 - -if.then.i.i.us.28: ; preds = %if.end.r_exit.i.i.us.27 - %add8.i.i.us.28 = add i32 %reass.mul.i.i, %conv.i.i.us.28 - %idxprom.i.i.us.28 = sext i32 %add8.i.i.us.28 to i64 - %arrayidx.i.i.us.28 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.28 - %142 = bitcast float* %arrayidx.i.i.us.28 to i32* - %143 = load i32, i32* %142, align 4, !tbaa !12 - %arrayidx15.i.i.us.28 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.28 - %144 = bitcast float* %arrayidx15.i.i.us.28 to i32* - store i32 %143, i32* %144, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.28 - -if.end.r_exit.i.i.us.28: ; preds = %if.then.i.i.us.28, %if.end.r_exit.i.i.us.27 - br i1 %cmp.i.i.us.29, label %if.then.i.i.us.29, label %if.end.r_exit.i.i.us.29 - -if.then.i.i.us.29: ; preds = %if.end.r_exit.i.i.us.28 - %add8.i.i.us.29 = add i32 %reass.mul.i.i, %conv.i.i.us.29 - %idxprom.i.i.us.29 = sext i32 %add8.i.i.us.29 to i64 - %arrayidx.i.i.us.29 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.29 - %145 = bitcast float* %arrayidx.i.i.us.29 to i32* - %146 = load i32, i32* %145, align 4, !tbaa !12 - %arrayidx15.i.i.us.29 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.29 - %147 = bitcast float* %arrayidx15.i.i.us.29 to i32* - store i32 %146, i32* %147, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.29 - -if.end.r_exit.i.i.us.29: ; preds = %if.then.i.i.us.29, %if.end.r_exit.i.i.us.28 - br i1 %cmp.i.i.us.30, label %if.then.i.i.us.30, label %if.end.r_exit.i.i.us.30 - -if.then.i.i.us.30: ; preds = %if.end.r_exit.i.i.us.29 - %add8.i.i.us.30 = add i32 %reass.mul.i.i, %conv.i.i.us.30 - %idxprom.i.i.us.30 = sext i32 %add8.i.i.us.30 to i64 - %arrayidx.i.i.us.30 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.30 - %148 = bitcast float* %arrayidx.i.i.us.30 to i32* - %149 = load i32, i32* %148, align 4, !tbaa !12 - %arrayidx15.i.i.us.30 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.30 - %150 = bitcast float* %arrayidx15.i.i.us.30 to i32* - store i32 %149, i32* %150, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.30 - -if.end.r_exit.i.i.us.30: ; preds = %if.then.i.i.us.30, %if.end.r_exit.i.i.us.29 - br i1 %cmp.i.i.us.31, label %if.then.i.i.us.31, label %pregion_for_end.i.i - -if.then.i.i.us.31: ; preds = %if.end.r_exit.i.i.us.30 - %add8.i.i.us.31 = add i32 %reass.mul.i.i, %conv.i.i.us.31 - %idxprom.i.i.us.31 = sext i32 %add8.i.i.us.31 to i64 - %arrayidx.i.i.us.31 = getelementptr inbounds float, float* %21, i64 %idxprom.i.i.us.31 - %151 = bitcast float* %arrayidx.i.i.us.31 to i32* - %152 = load i32, i32* %151, align 4, !tbaa !12 - %arrayidx15.i.i.us.31 = getelementptr inbounds float, float* %17, i64 %idxprom.i.i.us.31 - %153 = bitcast float* %arrayidx15.i.i.us.31 to i32* - store i32 %152, i32* %153, align 4, !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i -} - -; Function Attrs: nofree norecurse nounwind -define void @_pocl_kernel_doitgen_kernel2_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { - %6 = getelementptr i8*, i8** %0, i64 1 - %7 = bitcast i8** %6 to i32** - %8 = load i32*, i32** %7, align 8 - %9 = load i32, i32* %8, align 4 - %10 = getelementptr i8*, i8** %0, i64 2 - %11 = bitcast i8** %10 to i32** - %12 = load i32*, i32** %11, align 8 - %13 = load i32, i32* %12, align 4 - %14 = getelementptr i8*, i8** %0, i64 3 - %15 = bitcast i8** %14 to float** - %16 = load float*, float** %15, align 8 - %17 = getelementptr i8*, i8** %0, i64 5 - %18 = bitcast i8** %17 to float** - %19 = load float*, float** %18, align 8 - %20 = getelementptr i8*, i8** %0, i64 6 - %21 = bitcast i8** %20 to i32** - %22 = load i32*, i32** %21, align 8 - %23 = load i32, i32* %22, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %mul6.i.i = mul i32 %23, %9 - %conv.i.i.us = trunc i64 %mul.i.i.i to i32 - %cmp.i.i.us = icmp sgt i32 %13, %conv.i.i.us - %24 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.1 = or i32 %24, 1 - %cmp.i.i.us.1 = icmp sgt i32 %13, %conv.i.i.us.1 - %25 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.2 = or i32 %25, 2 - %cmp.i.i.us.2 = icmp sgt i32 %13, %conv.i.i.us.2 - %26 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.3 = or i32 %26, 3 - %cmp.i.i.us.3 = icmp sgt i32 %13, %conv.i.i.us.3 - %27 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.4 = or i32 %27, 4 - %cmp.i.i.us.4 = icmp sgt i32 %13, %conv.i.i.us.4 - %28 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.5 = or i32 %28, 5 - %cmp.i.i.us.5 = icmp sgt i32 %13, %conv.i.i.us.5 - %29 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.6 = or i32 %29, 6 - %cmp.i.i.us.6 = icmp sgt i32 %13, %conv.i.i.us.6 - %30 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.7 = or i32 %30, 7 - %cmp.i.i.us.7 = icmp sgt i32 %13, %conv.i.i.us.7 - %31 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.8 = or i32 %31, 8 - %cmp.i.i.us.8 = icmp sgt i32 %13, %conv.i.i.us.8 - %32 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.9 = or i32 %32, 9 - %cmp.i.i.us.9 = icmp sgt i32 %13, %conv.i.i.us.9 - %33 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.10 = or i32 %33, 10 - %cmp.i.i.us.10 = icmp sgt i32 %13, %conv.i.i.us.10 - %34 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.11 = or i32 %34, 11 - %cmp.i.i.us.11 = icmp sgt i32 %13, %conv.i.i.us.11 - %35 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.12 = or i32 %35, 12 - %cmp.i.i.us.12 = icmp sgt i32 %13, %conv.i.i.us.12 - %36 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.13 = or i32 %36, 13 - %cmp.i.i.us.13 = icmp sgt i32 %13, %conv.i.i.us.13 - %37 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.14 = or i32 %37, 14 - %cmp.i.i.us.14 = icmp sgt i32 %13, %conv.i.i.us.14 - %38 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.15 = or i32 %38, 15 - %cmp.i.i.us.15 = icmp sgt i32 %13, %conv.i.i.us.15 - %39 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.16 = or i32 %39, 16 - %cmp.i.i.us.16 = icmp sgt i32 %13, %conv.i.i.us.16 - %40 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.17 = or i32 %40, 17 - %cmp.i.i.us.17 = icmp sgt i32 %13, %conv.i.i.us.17 - %41 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.18 = or i32 %41, 18 - %cmp.i.i.us.18 = icmp sgt i32 %13, %conv.i.i.us.18 - %42 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.19 = or i32 %42, 19 - %cmp.i.i.us.19 = icmp sgt i32 %13, %conv.i.i.us.19 - %43 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.20 = or i32 %43, 20 - %cmp.i.i.us.20 = icmp sgt i32 %13, %conv.i.i.us.20 - %44 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.21 = or i32 %44, 21 - %cmp.i.i.us.21 = icmp sgt i32 %13, %conv.i.i.us.21 - %45 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.22 = or i32 %45, 22 - %cmp.i.i.us.22 = icmp sgt i32 %13, %conv.i.i.us.22 - %46 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.23 = or i32 %46, 23 - %cmp.i.i.us.23 = icmp sgt i32 %13, %conv.i.i.us.23 - %47 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.24 = or i32 %47, 24 - %cmp.i.i.us.24 = icmp sgt i32 %13, %conv.i.i.us.24 - %48 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.25 = or i32 %48, 25 - %cmp.i.i.us.25 = icmp sgt i32 %13, %conv.i.i.us.25 - %49 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.26 = or i32 %49, 26 - %cmp.i.i.us.26 = icmp sgt i32 %13, %conv.i.i.us.26 - %50 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.27 = or i32 %50, 27 - %cmp.i.i.us.27 = icmp sgt i32 %13, %conv.i.i.us.27 - %51 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.28 = or i32 %51, 28 - %cmp.i.i.us.28 = icmp sgt i32 %13, %conv.i.i.us.28 - %52 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.29 = or i32 %52, 29 - %cmp.i.i.us.29 = icmp sgt i32 %13, %conv.i.i.us.29 - %53 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.30 = or i32 %53, 30 - %cmp.i.i.us.30 = icmp sgt i32 %13, %conv.i.i.us.30 - %54 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.31 = or i32 %54, 31 - %cmp.i.i.us.31 = icmp sgt i32 %13, %conv.i.i.us.31 - br label %pregion_for_entry.pregion_for_init.i.i - -pregion_for_entry.pregion_for_init.i.i: ; preds = %pregion_for_end.i.i, %5 - %_local_id_y.i.0 = phi i64 [ 0, %5 ], [ %58, %pregion_for_end.i.i ] - %add6.i.i.i = add nuw nsw i64 %_local_id_y.i.0, %mul3.i.i.i - %conv2.i.i = trunc i64 %add6.i.i.i to i32 - %cmp4.i.i = icmp sgt i32 %9, %conv2.i.i - %reass.add.i.i = add i32 %mul6.i.i, %conv2.i.i - %reass.mul.i.i = mul i32 %reass.add.i.i, %13 - br i1 %cmp4.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i - br i1 %cmp.i.i.us, label %if.then.i.i.us, label %if.end.r_exit.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us.preheader - %add8.i.i.us = add i32 %reass.mul.i.i, %conv.i.i.us - %idxprom.i.i.us = sext i32 %add8.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us - %55 = bitcast float* %arrayidx.i.i.us to i32* - %56 = load i32, i32* %55, align 4, !tbaa !12 - %arrayidx15.i.i.us = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us - %57 = bitcast float* %arrayidx15.i.i.us to i32* - store i32 %56, i32* %57, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - br i1 %cmp.i.i.us.1, label %if.then.i.i.us.1, label %if.end.r_exit.i.i.us.1 - -pregion_for_end.i.i: ; preds = %if.then.i.i.us.31, %if.end.r_exit.i.i.us.30, %pregion_for_entry.pregion_for_init.i.i - %58 = add nuw nsw i64 %_local_id_y.i.0, 1 - %exitcond.not = icmp eq i64 %58, 8 - br i1 %exitcond.not, label %_pocl_kernel_doitgen_kernel2.exit, label %pregion_for_entry.pregion_for_init.i.i, !llvm.loop !19 - -_pocl_kernel_doitgen_kernel2.exit: ; preds = %pregion_for_end.i.i - ret void - -if.then.i.i.us.1: ; preds = %if.end.r_exit.i.i.us - %add8.i.i.us.1 = add i32 %reass.mul.i.i, %conv.i.i.us.1 - %idxprom.i.i.us.1 = sext i32 %add8.i.i.us.1 to i64 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.1 - %59 = bitcast float* %arrayidx.i.i.us.1 to i32* - %60 = load i32, i32* %59, align 4, !tbaa !12 - %arrayidx15.i.i.us.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.1 - %61 = bitcast float* %arrayidx15.i.i.us.1 to i32* - store i32 %60, i32* %61, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.1 - -if.end.r_exit.i.i.us.1: ; preds = %if.then.i.i.us.1, %if.end.r_exit.i.i.us - br i1 %cmp.i.i.us.2, label %if.then.i.i.us.2, label %if.end.r_exit.i.i.us.2 - -if.then.i.i.us.2: ; preds = %if.end.r_exit.i.i.us.1 - %add8.i.i.us.2 = add i32 %reass.mul.i.i, %conv.i.i.us.2 - %idxprom.i.i.us.2 = sext i32 %add8.i.i.us.2 to i64 - %arrayidx.i.i.us.2 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.2 - %62 = bitcast float* %arrayidx.i.i.us.2 to i32* - %63 = load i32, i32* %62, align 4, !tbaa !12 - %arrayidx15.i.i.us.2 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.2 - %64 = bitcast float* %arrayidx15.i.i.us.2 to i32* - store i32 %63, i32* %64, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.2 - -if.end.r_exit.i.i.us.2: ; preds = %if.then.i.i.us.2, %if.end.r_exit.i.i.us.1 - br i1 %cmp.i.i.us.3, label %if.then.i.i.us.3, label %if.end.r_exit.i.i.us.3 - -if.then.i.i.us.3: ; preds = %if.end.r_exit.i.i.us.2 - %add8.i.i.us.3 = add i32 %reass.mul.i.i, %conv.i.i.us.3 - %idxprom.i.i.us.3 = sext i32 %add8.i.i.us.3 to i64 - %arrayidx.i.i.us.3 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.3 - %65 = bitcast float* %arrayidx.i.i.us.3 to i32* - %66 = load i32, i32* %65, align 4, !tbaa !12 - %arrayidx15.i.i.us.3 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.3 - %67 = bitcast float* %arrayidx15.i.i.us.3 to i32* - store i32 %66, i32* %67, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.3 - -if.end.r_exit.i.i.us.3: ; preds = %if.then.i.i.us.3, %if.end.r_exit.i.i.us.2 - br i1 %cmp.i.i.us.4, label %if.then.i.i.us.4, label %if.end.r_exit.i.i.us.4 - -if.then.i.i.us.4: ; preds = %if.end.r_exit.i.i.us.3 - %add8.i.i.us.4 = add i32 %reass.mul.i.i, %conv.i.i.us.4 - %idxprom.i.i.us.4 = sext i32 %add8.i.i.us.4 to i64 - %arrayidx.i.i.us.4 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.4 - %68 = bitcast float* %arrayidx.i.i.us.4 to i32* - %69 = load i32, i32* %68, align 4, !tbaa !12 - %arrayidx15.i.i.us.4 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.4 - %70 = bitcast float* %arrayidx15.i.i.us.4 to i32* - store i32 %69, i32* %70, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.4 - -if.end.r_exit.i.i.us.4: ; preds = %if.then.i.i.us.4, %if.end.r_exit.i.i.us.3 - br i1 %cmp.i.i.us.5, label %if.then.i.i.us.5, label %if.end.r_exit.i.i.us.5 - -if.then.i.i.us.5: ; preds = %if.end.r_exit.i.i.us.4 - %add8.i.i.us.5 = add i32 %reass.mul.i.i, %conv.i.i.us.5 - %idxprom.i.i.us.5 = sext i32 %add8.i.i.us.5 to i64 - %arrayidx.i.i.us.5 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.5 - %71 = bitcast float* %arrayidx.i.i.us.5 to i32* - %72 = load i32, i32* %71, align 4, !tbaa !12 - %arrayidx15.i.i.us.5 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.5 - %73 = bitcast float* %arrayidx15.i.i.us.5 to i32* - store i32 %72, i32* %73, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.5 - -if.end.r_exit.i.i.us.5: ; preds = %if.then.i.i.us.5, %if.end.r_exit.i.i.us.4 - br i1 %cmp.i.i.us.6, label %if.then.i.i.us.6, label %if.end.r_exit.i.i.us.6 - -if.then.i.i.us.6: ; preds = %if.end.r_exit.i.i.us.5 - %add8.i.i.us.6 = add i32 %reass.mul.i.i, %conv.i.i.us.6 - %idxprom.i.i.us.6 = sext i32 %add8.i.i.us.6 to i64 - %arrayidx.i.i.us.6 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.6 - %74 = bitcast float* %arrayidx.i.i.us.6 to i32* - %75 = load i32, i32* %74, align 4, !tbaa !12 - %arrayidx15.i.i.us.6 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.6 - %76 = bitcast float* %arrayidx15.i.i.us.6 to i32* - store i32 %75, i32* %76, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.6 - -if.end.r_exit.i.i.us.6: ; preds = %if.then.i.i.us.6, %if.end.r_exit.i.i.us.5 - br i1 %cmp.i.i.us.7, label %if.then.i.i.us.7, label %if.end.r_exit.i.i.us.7 - -if.then.i.i.us.7: ; preds = %if.end.r_exit.i.i.us.6 - %add8.i.i.us.7 = add i32 %reass.mul.i.i, %conv.i.i.us.7 - %idxprom.i.i.us.7 = sext i32 %add8.i.i.us.7 to i64 - %arrayidx.i.i.us.7 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.7 - %77 = bitcast float* %arrayidx.i.i.us.7 to i32* - %78 = load i32, i32* %77, align 4, !tbaa !12 - %arrayidx15.i.i.us.7 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.7 - %79 = bitcast float* %arrayidx15.i.i.us.7 to i32* - store i32 %78, i32* %79, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.7 - -if.end.r_exit.i.i.us.7: ; preds = %if.then.i.i.us.7, %if.end.r_exit.i.i.us.6 - br i1 %cmp.i.i.us.8, label %if.then.i.i.us.8, label %if.end.r_exit.i.i.us.8 - -if.then.i.i.us.8: ; preds = %if.end.r_exit.i.i.us.7 - %add8.i.i.us.8 = add i32 %reass.mul.i.i, %conv.i.i.us.8 - %idxprom.i.i.us.8 = sext i32 %add8.i.i.us.8 to i64 - %arrayidx.i.i.us.8 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.8 - %80 = bitcast float* %arrayidx.i.i.us.8 to i32* - %81 = load i32, i32* %80, align 4, !tbaa !12 - %arrayidx15.i.i.us.8 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.8 - %82 = bitcast float* %arrayidx15.i.i.us.8 to i32* - store i32 %81, i32* %82, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.8 - -if.end.r_exit.i.i.us.8: ; preds = %if.then.i.i.us.8, %if.end.r_exit.i.i.us.7 - br i1 %cmp.i.i.us.9, label %if.then.i.i.us.9, label %if.end.r_exit.i.i.us.9 - -if.then.i.i.us.9: ; preds = %if.end.r_exit.i.i.us.8 - %add8.i.i.us.9 = add i32 %reass.mul.i.i, %conv.i.i.us.9 - %idxprom.i.i.us.9 = sext i32 %add8.i.i.us.9 to i64 - %arrayidx.i.i.us.9 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.9 - %83 = bitcast float* %arrayidx.i.i.us.9 to i32* - %84 = load i32, i32* %83, align 4, !tbaa !12 - %arrayidx15.i.i.us.9 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.9 - %85 = bitcast float* %arrayidx15.i.i.us.9 to i32* - store i32 %84, i32* %85, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.9 - -if.end.r_exit.i.i.us.9: ; preds = %if.then.i.i.us.9, %if.end.r_exit.i.i.us.8 - br i1 %cmp.i.i.us.10, label %if.then.i.i.us.10, label %if.end.r_exit.i.i.us.10 - -if.then.i.i.us.10: ; preds = %if.end.r_exit.i.i.us.9 - %add8.i.i.us.10 = add i32 %reass.mul.i.i, %conv.i.i.us.10 - %idxprom.i.i.us.10 = sext i32 %add8.i.i.us.10 to i64 - %arrayidx.i.i.us.10 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.10 - %86 = bitcast float* %arrayidx.i.i.us.10 to i32* - %87 = load i32, i32* %86, align 4, !tbaa !12 - %arrayidx15.i.i.us.10 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.10 - %88 = bitcast float* %arrayidx15.i.i.us.10 to i32* - store i32 %87, i32* %88, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.10 - -if.end.r_exit.i.i.us.10: ; preds = %if.then.i.i.us.10, %if.end.r_exit.i.i.us.9 - br i1 %cmp.i.i.us.11, label %if.then.i.i.us.11, label %if.end.r_exit.i.i.us.11 - -if.then.i.i.us.11: ; preds = %if.end.r_exit.i.i.us.10 - %add8.i.i.us.11 = add i32 %reass.mul.i.i, %conv.i.i.us.11 - %idxprom.i.i.us.11 = sext i32 %add8.i.i.us.11 to i64 - %arrayidx.i.i.us.11 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.11 - %89 = bitcast float* %arrayidx.i.i.us.11 to i32* - %90 = load i32, i32* %89, align 4, !tbaa !12 - %arrayidx15.i.i.us.11 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.11 - %91 = bitcast float* %arrayidx15.i.i.us.11 to i32* - store i32 %90, i32* %91, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.11 - -if.end.r_exit.i.i.us.11: ; preds = %if.then.i.i.us.11, %if.end.r_exit.i.i.us.10 - br i1 %cmp.i.i.us.12, label %if.then.i.i.us.12, label %if.end.r_exit.i.i.us.12 - -if.then.i.i.us.12: ; preds = %if.end.r_exit.i.i.us.11 - %add8.i.i.us.12 = add i32 %reass.mul.i.i, %conv.i.i.us.12 - %idxprom.i.i.us.12 = sext i32 %add8.i.i.us.12 to i64 - %arrayidx.i.i.us.12 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.12 - %92 = bitcast float* %arrayidx.i.i.us.12 to i32* - %93 = load i32, i32* %92, align 4, !tbaa !12 - %arrayidx15.i.i.us.12 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.12 - %94 = bitcast float* %arrayidx15.i.i.us.12 to i32* - store i32 %93, i32* %94, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.12 - -if.end.r_exit.i.i.us.12: ; preds = %if.then.i.i.us.12, %if.end.r_exit.i.i.us.11 - br i1 %cmp.i.i.us.13, label %if.then.i.i.us.13, label %if.end.r_exit.i.i.us.13 - -if.then.i.i.us.13: ; preds = %if.end.r_exit.i.i.us.12 - %add8.i.i.us.13 = add i32 %reass.mul.i.i, %conv.i.i.us.13 - %idxprom.i.i.us.13 = sext i32 %add8.i.i.us.13 to i64 - %arrayidx.i.i.us.13 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.13 - %95 = bitcast float* %arrayidx.i.i.us.13 to i32* - %96 = load i32, i32* %95, align 4, !tbaa !12 - %arrayidx15.i.i.us.13 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.13 - %97 = bitcast float* %arrayidx15.i.i.us.13 to i32* - store i32 %96, i32* %97, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.13 - -if.end.r_exit.i.i.us.13: ; preds = %if.then.i.i.us.13, %if.end.r_exit.i.i.us.12 - br i1 %cmp.i.i.us.14, label %if.then.i.i.us.14, label %if.end.r_exit.i.i.us.14 - -if.then.i.i.us.14: ; preds = %if.end.r_exit.i.i.us.13 - %add8.i.i.us.14 = add i32 %reass.mul.i.i, %conv.i.i.us.14 - %idxprom.i.i.us.14 = sext i32 %add8.i.i.us.14 to i64 - %arrayidx.i.i.us.14 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.14 - %98 = bitcast float* %arrayidx.i.i.us.14 to i32* - %99 = load i32, i32* %98, align 4, !tbaa !12 - %arrayidx15.i.i.us.14 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.14 - %100 = bitcast float* %arrayidx15.i.i.us.14 to i32* - store i32 %99, i32* %100, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.14 - -if.end.r_exit.i.i.us.14: ; preds = %if.then.i.i.us.14, %if.end.r_exit.i.i.us.13 - br i1 %cmp.i.i.us.15, label %if.then.i.i.us.15, label %if.end.r_exit.i.i.us.15 - -if.then.i.i.us.15: ; preds = %if.end.r_exit.i.i.us.14 - %add8.i.i.us.15 = add i32 %reass.mul.i.i, %conv.i.i.us.15 - %idxprom.i.i.us.15 = sext i32 %add8.i.i.us.15 to i64 - %arrayidx.i.i.us.15 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.15 - %101 = bitcast float* %arrayidx.i.i.us.15 to i32* - %102 = load i32, i32* %101, align 4, !tbaa !12 - %arrayidx15.i.i.us.15 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.15 - %103 = bitcast float* %arrayidx15.i.i.us.15 to i32* - store i32 %102, i32* %103, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.15 - -if.end.r_exit.i.i.us.15: ; preds = %if.then.i.i.us.15, %if.end.r_exit.i.i.us.14 - br i1 %cmp.i.i.us.16, label %if.then.i.i.us.16, label %if.end.r_exit.i.i.us.16 - -if.then.i.i.us.16: ; preds = %if.end.r_exit.i.i.us.15 - %add8.i.i.us.16 = add i32 %reass.mul.i.i, %conv.i.i.us.16 - %idxprom.i.i.us.16 = sext i32 %add8.i.i.us.16 to i64 - %arrayidx.i.i.us.16 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.16 - %104 = bitcast float* %arrayidx.i.i.us.16 to i32* - %105 = load i32, i32* %104, align 4, !tbaa !12 - %arrayidx15.i.i.us.16 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.16 - %106 = bitcast float* %arrayidx15.i.i.us.16 to i32* - store i32 %105, i32* %106, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.16 - -if.end.r_exit.i.i.us.16: ; preds = %if.then.i.i.us.16, %if.end.r_exit.i.i.us.15 - br i1 %cmp.i.i.us.17, label %if.then.i.i.us.17, label %if.end.r_exit.i.i.us.17 - -if.then.i.i.us.17: ; preds = %if.end.r_exit.i.i.us.16 - %add8.i.i.us.17 = add i32 %reass.mul.i.i, %conv.i.i.us.17 - %idxprom.i.i.us.17 = sext i32 %add8.i.i.us.17 to i64 - %arrayidx.i.i.us.17 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.17 - %107 = bitcast float* %arrayidx.i.i.us.17 to i32* - %108 = load i32, i32* %107, align 4, !tbaa !12 - %arrayidx15.i.i.us.17 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.17 - %109 = bitcast float* %arrayidx15.i.i.us.17 to i32* - store i32 %108, i32* %109, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.17 - -if.end.r_exit.i.i.us.17: ; preds = %if.then.i.i.us.17, %if.end.r_exit.i.i.us.16 - br i1 %cmp.i.i.us.18, label %if.then.i.i.us.18, label %if.end.r_exit.i.i.us.18 - -if.then.i.i.us.18: ; preds = %if.end.r_exit.i.i.us.17 - %add8.i.i.us.18 = add i32 %reass.mul.i.i, %conv.i.i.us.18 - %idxprom.i.i.us.18 = sext i32 %add8.i.i.us.18 to i64 - %arrayidx.i.i.us.18 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.18 - %110 = bitcast float* %arrayidx.i.i.us.18 to i32* - %111 = load i32, i32* %110, align 4, !tbaa !12 - %arrayidx15.i.i.us.18 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.18 - %112 = bitcast float* %arrayidx15.i.i.us.18 to i32* - store i32 %111, i32* %112, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.18 - -if.end.r_exit.i.i.us.18: ; preds = %if.then.i.i.us.18, %if.end.r_exit.i.i.us.17 - br i1 %cmp.i.i.us.19, label %if.then.i.i.us.19, label %if.end.r_exit.i.i.us.19 - -if.then.i.i.us.19: ; preds = %if.end.r_exit.i.i.us.18 - %add8.i.i.us.19 = add i32 %reass.mul.i.i, %conv.i.i.us.19 - %idxprom.i.i.us.19 = sext i32 %add8.i.i.us.19 to i64 - %arrayidx.i.i.us.19 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.19 - %113 = bitcast float* %arrayidx.i.i.us.19 to i32* - %114 = load i32, i32* %113, align 4, !tbaa !12 - %arrayidx15.i.i.us.19 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.19 - %115 = bitcast float* %arrayidx15.i.i.us.19 to i32* - store i32 %114, i32* %115, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.19 - -if.end.r_exit.i.i.us.19: ; preds = %if.then.i.i.us.19, %if.end.r_exit.i.i.us.18 - br i1 %cmp.i.i.us.20, label %if.then.i.i.us.20, label %if.end.r_exit.i.i.us.20 - -if.then.i.i.us.20: ; preds = %if.end.r_exit.i.i.us.19 - %add8.i.i.us.20 = add i32 %reass.mul.i.i, %conv.i.i.us.20 - %idxprom.i.i.us.20 = sext i32 %add8.i.i.us.20 to i64 - %arrayidx.i.i.us.20 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.20 - %116 = bitcast float* %arrayidx.i.i.us.20 to i32* - %117 = load i32, i32* %116, align 4, !tbaa !12 - %arrayidx15.i.i.us.20 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.20 - %118 = bitcast float* %arrayidx15.i.i.us.20 to i32* - store i32 %117, i32* %118, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.20 - -if.end.r_exit.i.i.us.20: ; preds = %if.then.i.i.us.20, %if.end.r_exit.i.i.us.19 - br i1 %cmp.i.i.us.21, label %if.then.i.i.us.21, label %if.end.r_exit.i.i.us.21 - -if.then.i.i.us.21: ; preds = %if.end.r_exit.i.i.us.20 - %add8.i.i.us.21 = add i32 %reass.mul.i.i, %conv.i.i.us.21 - %idxprom.i.i.us.21 = sext i32 %add8.i.i.us.21 to i64 - %arrayidx.i.i.us.21 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.21 - %119 = bitcast float* %arrayidx.i.i.us.21 to i32* - %120 = load i32, i32* %119, align 4, !tbaa !12 - %arrayidx15.i.i.us.21 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.21 - %121 = bitcast float* %arrayidx15.i.i.us.21 to i32* - store i32 %120, i32* %121, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.21 - -if.end.r_exit.i.i.us.21: ; preds = %if.then.i.i.us.21, %if.end.r_exit.i.i.us.20 - br i1 %cmp.i.i.us.22, label %if.then.i.i.us.22, label %if.end.r_exit.i.i.us.22 - -if.then.i.i.us.22: ; preds = %if.end.r_exit.i.i.us.21 - %add8.i.i.us.22 = add i32 %reass.mul.i.i, %conv.i.i.us.22 - %idxprom.i.i.us.22 = sext i32 %add8.i.i.us.22 to i64 - %arrayidx.i.i.us.22 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.22 - %122 = bitcast float* %arrayidx.i.i.us.22 to i32* - %123 = load i32, i32* %122, align 4, !tbaa !12 - %arrayidx15.i.i.us.22 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.22 - %124 = bitcast float* %arrayidx15.i.i.us.22 to i32* - store i32 %123, i32* %124, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.22 - -if.end.r_exit.i.i.us.22: ; preds = %if.then.i.i.us.22, %if.end.r_exit.i.i.us.21 - br i1 %cmp.i.i.us.23, label %if.then.i.i.us.23, label %if.end.r_exit.i.i.us.23 - -if.then.i.i.us.23: ; preds = %if.end.r_exit.i.i.us.22 - %add8.i.i.us.23 = add i32 %reass.mul.i.i, %conv.i.i.us.23 - %idxprom.i.i.us.23 = sext i32 %add8.i.i.us.23 to i64 - %arrayidx.i.i.us.23 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.23 - %125 = bitcast float* %arrayidx.i.i.us.23 to i32* - %126 = load i32, i32* %125, align 4, !tbaa !12 - %arrayidx15.i.i.us.23 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.23 - %127 = bitcast float* %arrayidx15.i.i.us.23 to i32* - store i32 %126, i32* %127, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.23 - -if.end.r_exit.i.i.us.23: ; preds = %if.then.i.i.us.23, %if.end.r_exit.i.i.us.22 - br i1 %cmp.i.i.us.24, label %if.then.i.i.us.24, label %if.end.r_exit.i.i.us.24 - -if.then.i.i.us.24: ; preds = %if.end.r_exit.i.i.us.23 - %add8.i.i.us.24 = add i32 %reass.mul.i.i, %conv.i.i.us.24 - %idxprom.i.i.us.24 = sext i32 %add8.i.i.us.24 to i64 - %arrayidx.i.i.us.24 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.24 - %128 = bitcast float* %arrayidx.i.i.us.24 to i32* - %129 = load i32, i32* %128, align 4, !tbaa !12 - %arrayidx15.i.i.us.24 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.24 - %130 = bitcast float* %arrayidx15.i.i.us.24 to i32* - store i32 %129, i32* %130, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.24 - -if.end.r_exit.i.i.us.24: ; preds = %if.then.i.i.us.24, %if.end.r_exit.i.i.us.23 - br i1 %cmp.i.i.us.25, label %if.then.i.i.us.25, label %if.end.r_exit.i.i.us.25 - -if.then.i.i.us.25: ; preds = %if.end.r_exit.i.i.us.24 - %add8.i.i.us.25 = add i32 %reass.mul.i.i, %conv.i.i.us.25 - %idxprom.i.i.us.25 = sext i32 %add8.i.i.us.25 to i64 - %arrayidx.i.i.us.25 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.25 - %131 = bitcast float* %arrayidx.i.i.us.25 to i32* - %132 = load i32, i32* %131, align 4, !tbaa !12 - %arrayidx15.i.i.us.25 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.25 - %133 = bitcast float* %arrayidx15.i.i.us.25 to i32* - store i32 %132, i32* %133, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.25 - -if.end.r_exit.i.i.us.25: ; preds = %if.then.i.i.us.25, %if.end.r_exit.i.i.us.24 - br i1 %cmp.i.i.us.26, label %if.then.i.i.us.26, label %if.end.r_exit.i.i.us.26 - -if.then.i.i.us.26: ; preds = %if.end.r_exit.i.i.us.25 - %add8.i.i.us.26 = add i32 %reass.mul.i.i, %conv.i.i.us.26 - %idxprom.i.i.us.26 = sext i32 %add8.i.i.us.26 to i64 - %arrayidx.i.i.us.26 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.26 - %134 = bitcast float* %arrayidx.i.i.us.26 to i32* - %135 = load i32, i32* %134, align 4, !tbaa !12 - %arrayidx15.i.i.us.26 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.26 - %136 = bitcast float* %arrayidx15.i.i.us.26 to i32* - store i32 %135, i32* %136, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.26 - -if.end.r_exit.i.i.us.26: ; preds = %if.then.i.i.us.26, %if.end.r_exit.i.i.us.25 - br i1 %cmp.i.i.us.27, label %if.then.i.i.us.27, label %if.end.r_exit.i.i.us.27 - -if.then.i.i.us.27: ; preds = %if.end.r_exit.i.i.us.26 - %add8.i.i.us.27 = add i32 %reass.mul.i.i, %conv.i.i.us.27 - %idxprom.i.i.us.27 = sext i32 %add8.i.i.us.27 to i64 - %arrayidx.i.i.us.27 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.27 - %137 = bitcast float* %arrayidx.i.i.us.27 to i32* - %138 = load i32, i32* %137, align 4, !tbaa !12 - %arrayidx15.i.i.us.27 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.27 - %139 = bitcast float* %arrayidx15.i.i.us.27 to i32* - store i32 %138, i32* %139, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.27 - -if.end.r_exit.i.i.us.27: ; preds = %if.then.i.i.us.27, %if.end.r_exit.i.i.us.26 - br i1 %cmp.i.i.us.28, label %if.then.i.i.us.28, label %if.end.r_exit.i.i.us.28 - -if.then.i.i.us.28: ; preds = %if.end.r_exit.i.i.us.27 - %add8.i.i.us.28 = add i32 %reass.mul.i.i, %conv.i.i.us.28 - %idxprom.i.i.us.28 = sext i32 %add8.i.i.us.28 to i64 - %arrayidx.i.i.us.28 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.28 - %140 = bitcast float* %arrayidx.i.i.us.28 to i32* - %141 = load i32, i32* %140, align 4, !tbaa !12 - %arrayidx15.i.i.us.28 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.28 - %142 = bitcast float* %arrayidx15.i.i.us.28 to i32* - store i32 %141, i32* %142, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.28 - -if.end.r_exit.i.i.us.28: ; preds = %if.then.i.i.us.28, %if.end.r_exit.i.i.us.27 - br i1 %cmp.i.i.us.29, label %if.then.i.i.us.29, label %if.end.r_exit.i.i.us.29 - -if.then.i.i.us.29: ; preds = %if.end.r_exit.i.i.us.28 - %add8.i.i.us.29 = add i32 %reass.mul.i.i, %conv.i.i.us.29 - %idxprom.i.i.us.29 = sext i32 %add8.i.i.us.29 to i64 - %arrayidx.i.i.us.29 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.29 - %143 = bitcast float* %arrayidx.i.i.us.29 to i32* - %144 = load i32, i32* %143, align 4, !tbaa !12 - %arrayidx15.i.i.us.29 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.29 - %145 = bitcast float* %arrayidx15.i.i.us.29 to i32* - store i32 %144, i32* %145, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.29 - -if.end.r_exit.i.i.us.29: ; preds = %if.then.i.i.us.29, %if.end.r_exit.i.i.us.28 - br i1 %cmp.i.i.us.30, label %if.then.i.i.us.30, label %if.end.r_exit.i.i.us.30 - -if.then.i.i.us.30: ; preds = %if.end.r_exit.i.i.us.29 - %add8.i.i.us.30 = add i32 %reass.mul.i.i, %conv.i.i.us.30 - %idxprom.i.i.us.30 = sext i32 %add8.i.i.us.30 to i64 - %arrayidx.i.i.us.30 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.30 - %146 = bitcast float* %arrayidx.i.i.us.30 to i32* - %147 = load i32, i32* %146, align 4, !tbaa !12 - %arrayidx15.i.i.us.30 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.30 - %148 = bitcast float* %arrayidx15.i.i.us.30 to i32* - store i32 %147, i32* %148, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.us.30 - -if.end.r_exit.i.i.us.30: ; preds = %if.then.i.i.us.30, %if.end.r_exit.i.i.us.29 - br i1 %cmp.i.i.us.31, label %if.then.i.i.us.31, label %pregion_for_end.i.i - -if.then.i.i.us.31: ; preds = %if.end.r_exit.i.i.us.30 - %add8.i.i.us.31 = add i32 %reass.mul.i.i, %conv.i.i.us.31 - %idxprom.i.i.us.31 = sext i32 %add8.i.i.us.31 to i64 - %arrayidx.i.i.us.31 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.us.31 - %149 = bitcast float* %arrayidx.i.i.us.31 to i32* - %150 = load i32, i32* %149, align 4, !tbaa !12 - %arrayidx15.i.i.us.31 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.31 - %151 = bitcast float* %arrayidx15.i.i.us.31 to i32* - store i32 %150, i32* %151, align 4, !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i -} - -attributes #0 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nofree norecurse nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none"} -!7 = !{!"int", !"int", !"int", !"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"int"} -!8 = !{!"int", !"int", !"int", !"float*", !"float*", !"float*", !"int"} -!9 = !{!"", !"", !"", !"", !"", !"", !""} -!10 = !{!"nr", !"nq", !"np", !"A", !"C4", !"sum", !"r"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17, !18} -!17 = distinct !{} -!18 = distinct !{} -!19 = distinct !{!19, !20} -!20 = !{!"llvm.loop.parallel_accesses", !18} diff --git a/pocl_irs/fdtd-2d_kernel1.ll b/pocl_irs/fdtd-2d_kernel1.ll deleted file mode 100644 index 5a4b6b8..0000000 --- a/pocl_irs/fdtd-2d_kernel1.ll +++ /dev/null @@ -1,2324 +0,0 @@ -; ModuleID = './CE/HMLMAPAJJBLPBKGGCCBDEJJLIPIDBFCCIIFBD/fdtd_kernel1/32-8-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare double @llvm.fmuladd.f64(double, double, double) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_fdtd_kernel1(float* nocapture readonly %0, float* nocapture readnone %1, float* nocapture %2, float* nocapture readonly %3, i32 %4, i32 %5, i32 %6, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %7, i64 %8, i64 %9, i64 %10) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %8, 5 - %mul3.i.i = shl i64 %9, 3 - %idxprom.i = sext i32 %4 to i64 - %arrayidx.i = getelementptr inbounds float, float* %0, i64 %idxprom.i - %12 = bitcast float* %arrayidx.i to i32* - %conv.i.us.us = trunc i64 %mul.i.i to i32 - %cmp4.i.us.us = icmp slt i32 %conv.i.us.us, %6 - %13 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.1 = or i32 %13, 1 - %cmp4.i.us.us.1 = icmp slt i32 %conv.i.us.us.1, %6 - %14 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.2 = or i32 %14, 2 - %cmp4.i.us.us.2 = icmp slt i32 %conv.i.us.us.2, %6 - %15 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.3 = or i32 %15, 3 - %cmp4.i.us.us.3 = icmp slt i32 %conv.i.us.us.3, %6 - %16 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.4 = or i32 %16, 4 - %cmp4.i.us.us.4 = icmp slt i32 %conv.i.us.us.4, %6 - %17 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.5 = or i32 %17, 5 - %cmp4.i.us.us.5 = icmp slt i32 %conv.i.us.us.5, %6 - %18 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.6 = or i32 %18, 6 - %cmp4.i.us.us.6 = icmp slt i32 %conv.i.us.us.6, %6 - %19 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.7 = or i32 %19, 7 - %cmp4.i.us.us.7 = icmp slt i32 %conv.i.us.us.7, %6 - %20 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.8 = or i32 %20, 8 - %cmp4.i.us.us.8 = icmp slt i32 %conv.i.us.us.8, %6 - %21 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.9 = or i32 %21, 9 - %cmp4.i.us.us.9 = icmp slt i32 %conv.i.us.us.9, %6 - %22 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.10 = or i32 %22, 10 - %cmp4.i.us.us.10 = icmp slt i32 %conv.i.us.us.10, %6 - %23 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.11 = or i32 %23, 11 - %cmp4.i.us.us.11 = icmp slt i32 %conv.i.us.us.11, %6 - %24 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.12 = or i32 %24, 12 - %cmp4.i.us.us.12 = icmp slt i32 %conv.i.us.us.12, %6 - %25 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.13 = or i32 %25, 13 - %cmp4.i.us.us.13 = icmp slt i32 %conv.i.us.us.13, %6 - %26 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.14 = or i32 %26, 14 - %cmp4.i.us.us.14 = icmp slt i32 %conv.i.us.us.14, %6 - %27 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.15 = or i32 %27, 15 - %cmp4.i.us.us.15 = icmp slt i32 %conv.i.us.us.15, %6 - %28 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.16 = or i32 %28, 16 - %cmp4.i.us.us.16 = icmp slt i32 %conv.i.us.us.16, %6 - %29 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.17 = or i32 %29, 17 - %cmp4.i.us.us.17 = icmp slt i32 %conv.i.us.us.17, %6 - %30 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.18 = or i32 %30, 18 - %cmp4.i.us.us.18 = icmp slt i32 %conv.i.us.us.18, %6 - %31 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.19 = or i32 %31, 19 - %cmp4.i.us.us.19 = icmp slt i32 %conv.i.us.us.19, %6 - %32 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.20 = or i32 %32, 20 - %cmp4.i.us.us.20 = icmp slt i32 %conv.i.us.us.20, %6 - %33 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.21 = or i32 %33, 21 - %cmp4.i.us.us.21 = icmp slt i32 %conv.i.us.us.21, %6 - %34 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.22 = or i32 %34, 22 - %cmp4.i.us.us.22 = icmp slt i32 %conv.i.us.us.22, %6 - %35 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.23 = or i32 %35, 23 - %cmp4.i.us.us.23 = icmp slt i32 %conv.i.us.us.23, %6 - %36 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.24 = or i32 %36, 24 - %cmp4.i.us.us.24 = icmp slt i32 %conv.i.us.us.24, %6 - %37 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.25 = or i32 %37, 25 - %cmp4.i.us.us.25 = icmp slt i32 %conv.i.us.us.25, %6 - %38 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.26 = or i32 %38, 26 - %cmp4.i.us.us.26 = icmp slt i32 %conv.i.us.us.26, %6 - %39 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.27 = or i32 %39, 27 - %cmp4.i.us.us.27 = icmp slt i32 %conv.i.us.us.27, %6 - %40 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.28 = or i32 %40, 28 - %cmp4.i.us.us.28 = icmp slt i32 %conv.i.us.us.28, %6 - %41 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.29 = or i32 %41, 29 - %cmp4.i.us.us.29 = icmp slt i32 %conv.i.us.us.29, %6 - %42 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.30 = or i32 %42, 30 - %cmp4.i.us.us.30 = icmp slt i32 %conv.i.us.us.30, %6 - %43 = trunc i64 %mul.i.i to i32 - %conv.i.us.us.31 = or i32 %43, 31 - %cmp4.i.us.us.31 = icmp slt i32 %conv.i.us.us.31, %6 - %44 = trunc i64 %9 to i32 - %45 = mul i32 %44, %6 - %46 = shl i32 %45, 3 - %47 = trunc i64 %8 to i32 - %48 = shl i32 %47, 5 - %49 = add i32 %46, %48 - %50 = zext i32 %6 to i64 - %51 = shl i32 %44, 3 - %52 = add i32 %51, -1 - %53 = mul i32 %52, %6 - %54 = add i32 %53, %48 - %55 = trunc i64 %9 to i32 - %56 = mul i32 %55, %6 - %57 = shl i32 %56, 3 - %58 = trunc i64 %8 to i32 - %59 = shl i32 %58, 5 - %60 = add i32 %57, %59 - %61 = zext i32 %6 to i64 - %scevgep9 = getelementptr float, float* %2, i64 32 - %62 = shl i32 %55, 3 - %63 = add i32 %62, -1 - %64 = mul i32 %63, %6 - %65 = add i32 %64, %59 - %scevgep14 = getelementptr float, float* %3, i64 32 - %scevgep19 = getelementptr float, float* %3, i64 32 - %bound022 = icmp ugt float* %scevgep19, %2 - %bound123 = icmp ugt float* %scevgep9, %3 - %found.conflict24 = and i1 %bound022, %bound123 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert25 = insertelement <8 x i32> undef, i32 %6, i32 0 - %broadcast.splat26 = shufflevector <8 x i32> %broadcast.splatinsert25, <8 x i32> undef, <8 x i32> zeroinitializer - %66 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %67 = or <8 x i32> %66, - %68 = icmp sgt <8 x i32> %broadcast.splat26, %67 - %69 = extractelement <8 x i32> %67, i32 0 - %70 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %71 = or <8 x i32> %70, - %72 = icmp sgt <8 x i32> %broadcast.splat26, %71 - %73 = extractelement <8 x i32> %71, i32 0 - %74 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %75 = or <8 x i32> %74, - %76 = icmp sgt <8 x i32> %broadcast.splat26, %75 - %77 = extractelement <8 x i32> %75, i32 0 - %78 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %79 = or <8 x i32> %78, - %80 = icmp sgt <8 x i32> %broadcast.splat26, %79 - %81 = extractelement <8 x i32> %79, i32 0 - br label %pregion_for_entry.pregion_for_init.i - -pregion_for_entry.pregion_for_init.i: ; preds = %pregion_for_end.i, %11 - %_local_id_y.0 = phi i64 [ 0, %11 ], [ %168, %pregion_for_end.i ] - %82 = mul i64 %_local_id_y.0, %61 - %83 = trunc i64 %82 to i32 - %84 = add i32 %60, %83 - %85 = sext i32 %84 to i64 - %scevgep = getelementptr float, float* %2, i64 %85 - %scevgep10 = getelementptr float, float* %scevgep9, i64 %85 - %86 = trunc i64 %82 to i32 - %87 = add i32 %65, %86 - %88 = sext i32 %87 to i64 - %scevgep12 = getelementptr float, float* %3, i64 %88 - %scevgep15 = getelementptr float, float* %scevgep14, i64 %88 - %89 = mul i64 %_local_id_y.0, %50 - %90 = trunc i64 %89 to i32 - %91 = add i32 %49, %90 - %92 = trunc i64 %89 to i32 - %93 = add i32 %54, %92 - %add6.i.i = add nuw nsw i64 %_local_id_y.0, %mul3.i.i - %conv2.i = trunc i64 %add6.i.i to i32 - %cmp.i = icmp slt i32 %conv2.i, %5 - %mul.i = mul nsw i32 %conv2.i, %6 - %sub.i = add nsw i32 %conv2.i, -1 - %mul22.i = mul nsw i32 %sub.i, %6 - br i1 %cmp.i, label %pregion_for_entry.pregion_for_init.i.split.us, label %pregion_for_end.i - -pregion_for_entry.pregion_for_init.i.split.us: ; preds = %pregion_for_entry.pregion_for_init.i - %cmp6.i = icmp eq i32 %conv2.i, 0 - br i1 %cmp6.i, label %pregion_for_entry.entry.i.us.us.preheader, label %vector.scevcheck - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.split.us - %94 = icmp sgt i32 %91, 2147483616 - %95 = icmp sgt i32 %93, 2147483616 - %96 = or i1 %94, %95 - br i1 %96, label %pregion_for_entry.entry.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.us - -vector.memcheck: ; preds = %vector.scevcheck - %bound0 = icmp ult float* %scevgep, %scevgep15 - %bound1 = icmp ult float* %scevgep12, %scevgep10 - %found.conflict = and i1 %bound0, %bound1 - %conflict.rdx = or i1 %found.conflict, %found.conflict24 - br i1 %conflict.rdx, label %pregion_for_entry.entry.i.us.preheader, label %vector.body - -vector.body: ; preds = %vector.memcheck - %97 = add nsw i32 %mul.i, %69 - %98 = sext i32 %97 to i64 - %99 = getelementptr inbounds float, float* %2, i64 %98 - %100 = bitcast float* %99 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %100, i32 4, <8 x i1> %68, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %101 = fpext <8 x float> %wide.masked.load to <8 x double> - %102 = getelementptr inbounds float, float* %3, i64 %98 - %103 = bitcast float* %102 to <8 x float>* - %wide.masked.load27 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %103, i32 4, <8 x i1> %68, <8 x float> undef), !tbaa !12, !alias.scope !22 - %104 = add nsw i32 %mul22.i, %69 - %105 = sext i32 %104 to i64 - %106 = getelementptr inbounds float, float* %3, i64 %105 - %107 = bitcast float* %106 to <8 x float>* - %wide.masked.load28 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %107, i32 4, <8 x i1> %68, <8 x float> undef), !tbaa !12, !alias.scope !23 - %108 = fsub <8 x float> %wide.masked.load27, %wide.masked.load28 - %109 = fpext <8 x float> %108 to <8 x double> - %110 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %109, <8 x double> , <8 x double> %101) - %111 = fptrunc <8 x double> %110 to <8 x float> - %112 = bitcast float* %99 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %111, <8 x float>* %112, i32 4, <8 x i1> %68), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !24 - %113 = add nsw i32 %mul.i, %73 - %114 = sext i32 %113 to i64 - %115 = getelementptr inbounds float, float* %2, i64 %114 - %116 = bitcast float* %115 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %116, i32 4, <8 x i1> %72, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %117 = fpext <8 x float> %wide.masked.load.1 to <8 x double> - %118 = getelementptr inbounds float, float* %3, i64 %114 - %119 = bitcast float* %118 to <8 x float>* - %wide.masked.load27.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %119, i32 4, <8 x i1> %72, <8 x float> undef), !tbaa !12, !alias.scope !22 - %120 = add nsw i32 %mul22.i, %73 - %121 = sext i32 %120 to i64 - %122 = getelementptr inbounds float, float* %3, i64 %121 - %123 = bitcast float* %122 to <8 x float>* - %wide.masked.load28.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %123, i32 4, <8 x i1> %72, <8 x float> undef), !tbaa !12, !alias.scope !23 - %124 = fsub <8 x float> %wide.masked.load27.1, %wide.masked.load28.1 - %125 = fpext <8 x float> %124 to <8 x double> - %126 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %125, <8 x double> , <8 x double> %117) - %127 = fptrunc <8 x double> %126 to <8 x float> - %128 = bitcast float* %115 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %127, <8 x float>* %128, i32 4, <8 x i1> %72), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !24 - %129 = add nsw i32 %mul.i, %77 - %130 = sext i32 %129 to i64 - %131 = getelementptr inbounds float, float* %2, i64 %130 - %132 = bitcast float* %131 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %132, i32 4, <8 x i1> %76, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %133 = fpext <8 x float> %wide.masked.load.2 to <8 x double> - %134 = getelementptr inbounds float, float* %3, i64 %130 - %135 = bitcast float* %134 to <8 x float>* - %wide.masked.load27.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %135, i32 4, <8 x i1> %76, <8 x float> undef), !tbaa !12, !alias.scope !22 - %136 = add nsw i32 %mul22.i, %77 - %137 = sext i32 %136 to i64 - %138 = getelementptr inbounds float, float* %3, i64 %137 - %139 = bitcast float* %138 to <8 x float>* - %wide.masked.load28.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %139, i32 4, <8 x i1> %76, <8 x float> undef), !tbaa !12, !alias.scope !23 - %140 = fsub <8 x float> %wide.masked.load27.2, %wide.masked.load28.2 - %141 = fpext <8 x float> %140 to <8 x double> - %142 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %141, <8 x double> , <8 x double> %133) - %143 = fptrunc <8 x double> %142 to <8 x float> - %144 = bitcast float* %131 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %143, <8 x float>* %144, i32 4, <8 x i1> %76), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !24 - %145 = add nsw i32 %mul.i, %81 - %146 = sext i32 %145 to i64 - %147 = getelementptr inbounds float, float* %2, i64 %146 - %148 = bitcast float* %147 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %148, i32 4, <8 x i1> %80, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %149 = fpext <8 x float> %wide.masked.load.3 to <8 x double> - %150 = getelementptr inbounds float, float* %3, i64 %146 - %151 = bitcast float* %150 to <8 x float>* - %wide.masked.load27.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %151, i32 4, <8 x i1> %80, <8 x float> undef), !tbaa !12, !alias.scope !22 - %152 = add nsw i32 %mul22.i, %81 - %153 = sext i32 %152 to i64 - %154 = getelementptr inbounds float, float* %3, i64 %153 - %155 = bitcast float* %154 to <8 x float>* - %wide.masked.load28.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %155, i32 4, <8 x i1> %80, <8 x float> undef), !tbaa !12, !alias.scope !23 - %156 = fsub <8 x float> %wide.masked.load27.3, %wide.masked.load28.3 - %157 = fpext <8 x float> %156 to <8 x double> - %158 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %157, <8 x double> , <8 x double> %149) - %159 = fptrunc <8 x double> %158 to <8 x float> - %160 = bitcast float* %147 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %159, <8 x float>* %160, i32 4, <8 x i1> %80), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !24 - br label %pregion_for_end.i - -pregion_for_entry.entry.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.split.us - br i1 %cmp4.i.us.us, label %if.then8.i.us.us, label %if.end34.i.us.us - -if.then8.i.us.us: ; preds = %pregion_for_entry.entry.i.us.us.preheader - %161 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us = add nuw nsw i32 %mul.i, %conv.i.us.us - %idxprom11.i.us.us = sext i32 %add10.i.us.us to i64 - %arrayidx12.i.us.us = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us - %162 = bitcast float* %arrayidx12.i.us.us to i32* - store i32 %161, i32* %162, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us - -if.end34.i.us.us: ; preds = %if.then8.i.us.us, %pregion_for_entry.entry.i.us.us.preheader - br i1 %cmp4.i.us.us.1, label %if.then8.i.us.us.1, label %if.end34.i.us.us.1 - -pregion_for_entry.entry.i.us: ; preds = %if.end34.i.us.1, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ 0, %pregion_for_entry.entry.i.us.preheader ], [ %235, %if.end34.i.us.1 ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp4.i.us = icmp slt i32 %conv.i.us, %6 - br i1 %cmp4.i.us, label %if.else.i.us, label %if.end34.i.us - -if.else.i.us: ; preds = %pregion_for_entry.entry.i.us - %add14.i.us = add nsw i32 %mul.i, %conv.i.us - %idxprom15.i.us = sext i32 %add14.i.us to i64 - %arrayidx16.i.us = getelementptr inbounds float, float* %2, i64 %idxprom15.i.us - %163 = load float, float* %arrayidx16.i.us, align 4, !tbaa !12 - %conv17.i.us = fpext float %163 to double - %arrayidx21.i.us = getelementptr inbounds float, float* %3, i64 %idxprom15.i.us - %164 = load float, float* %arrayidx21.i.us, align 4, !tbaa !12 - %add23.i.us = add nsw i32 %mul22.i, %conv.i.us - %idxprom24.i.us = sext i32 %add23.i.us to i64 - %arrayidx25.i.us = getelementptr inbounds float, float* %3, i64 %idxprom24.i.us - %165 = load float, float* %arrayidx25.i.us, align 4, !tbaa !12 - %sub26.i.us = fsub float %164, %165 - %conv27.i.us = fpext float %sub26.i.us to double - %166 = tail call double @llvm.fmuladd.f64(double %conv27.i.us, double -5.000000e-01, double %conv17.i.us) #5 - %conv29.i.us = fptrunc double %166 to float - store float %conv29.i.us, float* %arrayidx16.i.us, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us - -if.end34.i.us: ; preds = %if.else.i.us, %pregion_for_entry.entry.i.us - %167 = or i64 %_local_id_x.0.us, 1 - %add1.i.i.us.1 = add nuw nsw i64 %167, %mul.i.i - %conv.i.us.1 = trunc i64 %add1.i.i.us.1 to i32 - %cmp4.i.us.1 = icmp slt i32 %conv.i.us.1, %6 - br i1 %cmp4.i.us.1, label %if.else.i.us.1, label %if.end34.i.us.1 - -pregion_for_end.i.loopexit: ; preds = %if.end34.i.us.1 - br label %pregion_for_end.i - -pregion_for_end.i: ; preds = %if.then8.i.us.us.31, %if.end34.i.us.us.30, %pregion_for_end.i.loopexit, %vector.body, %pregion_for_entry.pregion_for_init.i - %168 = add nuw nsw i64 %_local_id_y.0, 1 - %exitcond4.not = icmp eq i64 %168, 8 - br i1 %exitcond4.not, label %fdtd_kernel1.exit, label %pregion_for_entry.pregion_for_init.i, !llvm.loop !27 - -fdtd_kernel1.exit: ; preds = %pregion_for_end.i - ret void - -if.then8.i.us.us.1: ; preds = %if.end34.i.us.us - %169 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.1 = add nuw nsw i32 %mul.i, %conv.i.us.us.1 - %idxprom11.i.us.us.1 = sext i32 %add10.i.us.us.1 to i64 - %arrayidx12.i.us.us.1 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.1 - %170 = bitcast float* %arrayidx12.i.us.us.1 to i32* - store i32 %169, i32* %170, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.1 - -if.end34.i.us.us.1: ; preds = %if.then8.i.us.us.1, %if.end34.i.us.us - br i1 %cmp4.i.us.us.2, label %if.then8.i.us.us.2, label %if.end34.i.us.us.2 - -if.then8.i.us.us.2: ; preds = %if.end34.i.us.us.1 - %171 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.2 = add nuw nsw i32 %mul.i, %conv.i.us.us.2 - %idxprom11.i.us.us.2 = sext i32 %add10.i.us.us.2 to i64 - %arrayidx12.i.us.us.2 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.2 - %172 = bitcast float* %arrayidx12.i.us.us.2 to i32* - store i32 %171, i32* %172, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.2 - -if.end34.i.us.us.2: ; preds = %if.then8.i.us.us.2, %if.end34.i.us.us.1 - br i1 %cmp4.i.us.us.3, label %if.then8.i.us.us.3, label %if.end34.i.us.us.3 - -if.then8.i.us.us.3: ; preds = %if.end34.i.us.us.2 - %173 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.3 = add nuw nsw i32 %mul.i, %conv.i.us.us.3 - %idxprom11.i.us.us.3 = sext i32 %add10.i.us.us.3 to i64 - %arrayidx12.i.us.us.3 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.3 - %174 = bitcast float* %arrayidx12.i.us.us.3 to i32* - store i32 %173, i32* %174, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.3 - -if.end34.i.us.us.3: ; preds = %if.then8.i.us.us.3, %if.end34.i.us.us.2 - br i1 %cmp4.i.us.us.4, label %if.then8.i.us.us.4, label %if.end34.i.us.us.4 - -if.then8.i.us.us.4: ; preds = %if.end34.i.us.us.3 - %175 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.4 = add nuw nsw i32 %mul.i, %conv.i.us.us.4 - %idxprom11.i.us.us.4 = sext i32 %add10.i.us.us.4 to i64 - %arrayidx12.i.us.us.4 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.4 - %176 = bitcast float* %arrayidx12.i.us.us.4 to i32* - store i32 %175, i32* %176, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.4 - -if.end34.i.us.us.4: ; preds = %if.then8.i.us.us.4, %if.end34.i.us.us.3 - br i1 %cmp4.i.us.us.5, label %if.then8.i.us.us.5, label %if.end34.i.us.us.5 - -if.then8.i.us.us.5: ; preds = %if.end34.i.us.us.4 - %177 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.5 = add nuw nsw i32 %mul.i, %conv.i.us.us.5 - %idxprom11.i.us.us.5 = sext i32 %add10.i.us.us.5 to i64 - %arrayidx12.i.us.us.5 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.5 - %178 = bitcast float* %arrayidx12.i.us.us.5 to i32* - store i32 %177, i32* %178, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.5 - -if.end34.i.us.us.5: ; preds = %if.then8.i.us.us.5, %if.end34.i.us.us.4 - br i1 %cmp4.i.us.us.6, label %if.then8.i.us.us.6, label %if.end34.i.us.us.6 - -if.then8.i.us.us.6: ; preds = %if.end34.i.us.us.5 - %179 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.6 = add nuw nsw i32 %mul.i, %conv.i.us.us.6 - %idxprom11.i.us.us.6 = sext i32 %add10.i.us.us.6 to i64 - %arrayidx12.i.us.us.6 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.6 - %180 = bitcast float* %arrayidx12.i.us.us.6 to i32* - store i32 %179, i32* %180, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.6 - -if.end34.i.us.us.6: ; preds = %if.then8.i.us.us.6, %if.end34.i.us.us.5 - br i1 %cmp4.i.us.us.7, label %if.then8.i.us.us.7, label %if.end34.i.us.us.7 - -if.then8.i.us.us.7: ; preds = %if.end34.i.us.us.6 - %181 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.7 = add nuw nsw i32 %mul.i, %conv.i.us.us.7 - %idxprom11.i.us.us.7 = sext i32 %add10.i.us.us.7 to i64 - %arrayidx12.i.us.us.7 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.7 - %182 = bitcast float* %arrayidx12.i.us.us.7 to i32* - store i32 %181, i32* %182, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.7 - -if.end34.i.us.us.7: ; preds = %if.then8.i.us.us.7, %if.end34.i.us.us.6 - br i1 %cmp4.i.us.us.8, label %if.then8.i.us.us.8, label %if.end34.i.us.us.8 - -if.then8.i.us.us.8: ; preds = %if.end34.i.us.us.7 - %183 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.8 = add nuw nsw i32 %mul.i, %conv.i.us.us.8 - %idxprom11.i.us.us.8 = sext i32 %add10.i.us.us.8 to i64 - %arrayidx12.i.us.us.8 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.8 - %184 = bitcast float* %arrayidx12.i.us.us.8 to i32* - store i32 %183, i32* %184, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.8 - -if.end34.i.us.us.8: ; preds = %if.then8.i.us.us.8, %if.end34.i.us.us.7 - br i1 %cmp4.i.us.us.9, label %if.then8.i.us.us.9, label %if.end34.i.us.us.9 - -if.then8.i.us.us.9: ; preds = %if.end34.i.us.us.8 - %185 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.9 = add nuw nsw i32 %mul.i, %conv.i.us.us.9 - %idxprom11.i.us.us.9 = sext i32 %add10.i.us.us.9 to i64 - %arrayidx12.i.us.us.9 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.9 - %186 = bitcast float* %arrayidx12.i.us.us.9 to i32* - store i32 %185, i32* %186, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.9 - -if.end34.i.us.us.9: ; preds = %if.then8.i.us.us.9, %if.end34.i.us.us.8 - br i1 %cmp4.i.us.us.10, label %if.then8.i.us.us.10, label %if.end34.i.us.us.10 - -if.then8.i.us.us.10: ; preds = %if.end34.i.us.us.9 - %187 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.10 = add nuw nsw i32 %mul.i, %conv.i.us.us.10 - %idxprom11.i.us.us.10 = sext i32 %add10.i.us.us.10 to i64 - %arrayidx12.i.us.us.10 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.10 - %188 = bitcast float* %arrayidx12.i.us.us.10 to i32* - store i32 %187, i32* %188, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.10 - -if.end34.i.us.us.10: ; preds = %if.then8.i.us.us.10, %if.end34.i.us.us.9 - br i1 %cmp4.i.us.us.11, label %if.then8.i.us.us.11, label %if.end34.i.us.us.11 - -if.then8.i.us.us.11: ; preds = %if.end34.i.us.us.10 - %189 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.11 = add nuw nsw i32 %mul.i, %conv.i.us.us.11 - %idxprom11.i.us.us.11 = sext i32 %add10.i.us.us.11 to i64 - %arrayidx12.i.us.us.11 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.11 - %190 = bitcast float* %arrayidx12.i.us.us.11 to i32* - store i32 %189, i32* %190, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.11 - -if.end34.i.us.us.11: ; preds = %if.then8.i.us.us.11, %if.end34.i.us.us.10 - br i1 %cmp4.i.us.us.12, label %if.then8.i.us.us.12, label %if.end34.i.us.us.12 - -if.then8.i.us.us.12: ; preds = %if.end34.i.us.us.11 - %191 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.12 = add nuw nsw i32 %mul.i, %conv.i.us.us.12 - %idxprom11.i.us.us.12 = sext i32 %add10.i.us.us.12 to i64 - %arrayidx12.i.us.us.12 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.12 - %192 = bitcast float* %arrayidx12.i.us.us.12 to i32* - store i32 %191, i32* %192, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.12 - -if.end34.i.us.us.12: ; preds = %if.then8.i.us.us.12, %if.end34.i.us.us.11 - br i1 %cmp4.i.us.us.13, label %if.then8.i.us.us.13, label %if.end34.i.us.us.13 - -if.then8.i.us.us.13: ; preds = %if.end34.i.us.us.12 - %193 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.13 = add nuw nsw i32 %mul.i, %conv.i.us.us.13 - %idxprom11.i.us.us.13 = sext i32 %add10.i.us.us.13 to i64 - %arrayidx12.i.us.us.13 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.13 - %194 = bitcast float* %arrayidx12.i.us.us.13 to i32* - store i32 %193, i32* %194, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.13 - -if.end34.i.us.us.13: ; preds = %if.then8.i.us.us.13, %if.end34.i.us.us.12 - br i1 %cmp4.i.us.us.14, label %if.then8.i.us.us.14, label %if.end34.i.us.us.14 - -if.then8.i.us.us.14: ; preds = %if.end34.i.us.us.13 - %195 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.14 = add nuw nsw i32 %mul.i, %conv.i.us.us.14 - %idxprom11.i.us.us.14 = sext i32 %add10.i.us.us.14 to i64 - %arrayidx12.i.us.us.14 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.14 - %196 = bitcast float* %arrayidx12.i.us.us.14 to i32* - store i32 %195, i32* %196, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.14 - -if.end34.i.us.us.14: ; preds = %if.then8.i.us.us.14, %if.end34.i.us.us.13 - br i1 %cmp4.i.us.us.15, label %if.then8.i.us.us.15, label %if.end34.i.us.us.15 - -if.then8.i.us.us.15: ; preds = %if.end34.i.us.us.14 - %197 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.15 = add nuw nsw i32 %mul.i, %conv.i.us.us.15 - %idxprom11.i.us.us.15 = sext i32 %add10.i.us.us.15 to i64 - %arrayidx12.i.us.us.15 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.15 - %198 = bitcast float* %arrayidx12.i.us.us.15 to i32* - store i32 %197, i32* %198, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.15 - -if.end34.i.us.us.15: ; preds = %if.then8.i.us.us.15, %if.end34.i.us.us.14 - br i1 %cmp4.i.us.us.16, label %if.then8.i.us.us.16, label %if.end34.i.us.us.16 - -if.then8.i.us.us.16: ; preds = %if.end34.i.us.us.15 - %199 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.16 = add nuw nsw i32 %mul.i, %conv.i.us.us.16 - %idxprom11.i.us.us.16 = sext i32 %add10.i.us.us.16 to i64 - %arrayidx12.i.us.us.16 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.16 - %200 = bitcast float* %arrayidx12.i.us.us.16 to i32* - store i32 %199, i32* %200, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.16 - -if.end34.i.us.us.16: ; preds = %if.then8.i.us.us.16, %if.end34.i.us.us.15 - br i1 %cmp4.i.us.us.17, label %if.then8.i.us.us.17, label %if.end34.i.us.us.17 - -if.then8.i.us.us.17: ; preds = %if.end34.i.us.us.16 - %201 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.17 = add nuw nsw i32 %mul.i, %conv.i.us.us.17 - %idxprom11.i.us.us.17 = sext i32 %add10.i.us.us.17 to i64 - %arrayidx12.i.us.us.17 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.17 - %202 = bitcast float* %arrayidx12.i.us.us.17 to i32* - store i32 %201, i32* %202, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.17 - -if.end34.i.us.us.17: ; preds = %if.then8.i.us.us.17, %if.end34.i.us.us.16 - br i1 %cmp4.i.us.us.18, label %if.then8.i.us.us.18, label %if.end34.i.us.us.18 - -if.then8.i.us.us.18: ; preds = %if.end34.i.us.us.17 - %203 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.18 = add nuw nsw i32 %mul.i, %conv.i.us.us.18 - %idxprom11.i.us.us.18 = sext i32 %add10.i.us.us.18 to i64 - %arrayidx12.i.us.us.18 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.18 - %204 = bitcast float* %arrayidx12.i.us.us.18 to i32* - store i32 %203, i32* %204, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.18 - -if.end34.i.us.us.18: ; preds = %if.then8.i.us.us.18, %if.end34.i.us.us.17 - br i1 %cmp4.i.us.us.19, label %if.then8.i.us.us.19, label %if.end34.i.us.us.19 - -if.then8.i.us.us.19: ; preds = %if.end34.i.us.us.18 - %205 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.19 = add nuw nsw i32 %mul.i, %conv.i.us.us.19 - %idxprom11.i.us.us.19 = sext i32 %add10.i.us.us.19 to i64 - %arrayidx12.i.us.us.19 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.19 - %206 = bitcast float* %arrayidx12.i.us.us.19 to i32* - store i32 %205, i32* %206, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.19 - -if.end34.i.us.us.19: ; preds = %if.then8.i.us.us.19, %if.end34.i.us.us.18 - br i1 %cmp4.i.us.us.20, label %if.then8.i.us.us.20, label %if.end34.i.us.us.20 - -if.then8.i.us.us.20: ; preds = %if.end34.i.us.us.19 - %207 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.20 = add nuw nsw i32 %mul.i, %conv.i.us.us.20 - %idxprom11.i.us.us.20 = sext i32 %add10.i.us.us.20 to i64 - %arrayidx12.i.us.us.20 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.20 - %208 = bitcast float* %arrayidx12.i.us.us.20 to i32* - store i32 %207, i32* %208, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.20 - -if.end34.i.us.us.20: ; preds = %if.then8.i.us.us.20, %if.end34.i.us.us.19 - br i1 %cmp4.i.us.us.21, label %if.then8.i.us.us.21, label %if.end34.i.us.us.21 - -if.then8.i.us.us.21: ; preds = %if.end34.i.us.us.20 - %209 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.21 = add nuw nsw i32 %mul.i, %conv.i.us.us.21 - %idxprom11.i.us.us.21 = sext i32 %add10.i.us.us.21 to i64 - %arrayidx12.i.us.us.21 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.21 - %210 = bitcast float* %arrayidx12.i.us.us.21 to i32* - store i32 %209, i32* %210, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.21 - -if.end34.i.us.us.21: ; preds = %if.then8.i.us.us.21, %if.end34.i.us.us.20 - br i1 %cmp4.i.us.us.22, label %if.then8.i.us.us.22, label %if.end34.i.us.us.22 - -if.then8.i.us.us.22: ; preds = %if.end34.i.us.us.21 - %211 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.22 = add nuw nsw i32 %mul.i, %conv.i.us.us.22 - %idxprom11.i.us.us.22 = sext i32 %add10.i.us.us.22 to i64 - %arrayidx12.i.us.us.22 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.22 - %212 = bitcast float* %arrayidx12.i.us.us.22 to i32* - store i32 %211, i32* %212, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.22 - -if.end34.i.us.us.22: ; preds = %if.then8.i.us.us.22, %if.end34.i.us.us.21 - br i1 %cmp4.i.us.us.23, label %if.then8.i.us.us.23, label %if.end34.i.us.us.23 - -if.then8.i.us.us.23: ; preds = %if.end34.i.us.us.22 - %213 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.23 = add nuw nsw i32 %mul.i, %conv.i.us.us.23 - %idxprom11.i.us.us.23 = sext i32 %add10.i.us.us.23 to i64 - %arrayidx12.i.us.us.23 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.23 - %214 = bitcast float* %arrayidx12.i.us.us.23 to i32* - store i32 %213, i32* %214, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.23 - -if.end34.i.us.us.23: ; preds = %if.then8.i.us.us.23, %if.end34.i.us.us.22 - br i1 %cmp4.i.us.us.24, label %if.then8.i.us.us.24, label %if.end34.i.us.us.24 - -if.then8.i.us.us.24: ; preds = %if.end34.i.us.us.23 - %215 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.24 = add nuw nsw i32 %mul.i, %conv.i.us.us.24 - %idxprom11.i.us.us.24 = sext i32 %add10.i.us.us.24 to i64 - %arrayidx12.i.us.us.24 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.24 - %216 = bitcast float* %arrayidx12.i.us.us.24 to i32* - store i32 %215, i32* %216, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.24 - -if.end34.i.us.us.24: ; preds = %if.then8.i.us.us.24, %if.end34.i.us.us.23 - br i1 %cmp4.i.us.us.25, label %if.then8.i.us.us.25, label %if.end34.i.us.us.25 - -if.then8.i.us.us.25: ; preds = %if.end34.i.us.us.24 - %217 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.25 = add nuw nsw i32 %mul.i, %conv.i.us.us.25 - %idxprom11.i.us.us.25 = sext i32 %add10.i.us.us.25 to i64 - %arrayidx12.i.us.us.25 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.25 - %218 = bitcast float* %arrayidx12.i.us.us.25 to i32* - store i32 %217, i32* %218, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.25 - -if.end34.i.us.us.25: ; preds = %if.then8.i.us.us.25, %if.end34.i.us.us.24 - br i1 %cmp4.i.us.us.26, label %if.then8.i.us.us.26, label %if.end34.i.us.us.26 - -if.then8.i.us.us.26: ; preds = %if.end34.i.us.us.25 - %219 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.26 = add nuw nsw i32 %mul.i, %conv.i.us.us.26 - %idxprom11.i.us.us.26 = sext i32 %add10.i.us.us.26 to i64 - %arrayidx12.i.us.us.26 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.26 - %220 = bitcast float* %arrayidx12.i.us.us.26 to i32* - store i32 %219, i32* %220, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.26 - -if.end34.i.us.us.26: ; preds = %if.then8.i.us.us.26, %if.end34.i.us.us.25 - br i1 %cmp4.i.us.us.27, label %if.then8.i.us.us.27, label %if.end34.i.us.us.27 - -if.then8.i.us.us.27: ; preds = %if.end34.i.us.us.26 - %221 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.27 = add nuw nsw i32 %mul.i, %conv.i.us.us.27 - %idxprom11.i.us.us.27 = sext i32 %add10.i.us.us.27 to i64 - %arrayidx12.i.us.us.27 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.27 - %222 = bitcast float* %arrayidx12.i.us.us.27 to i32* - store i32 %221, i32* %222, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.27 - -if.end34.i.us.us.27: ; preds = %if.then8.i.us.us.27, %if.end34.i.us.us.26 - br i1 %cmp4.i.us.us.28, label %if.then8.i.us.us.28, label %if.end34.i.us.us.28 - -if.then8.i.us.us.28: ; preds = %if.end34.i.us.us.27 - %223 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.28 = add nuw nsw i32 %mul.i, %conv.i.us.us.28 - %idxprom11.i.us.us.28 = sext i32 %add10.i.us.us.28 to i64 - %arrayidx12.i.us.us.28 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.28 - %224 = bitcast float* %arrayidx12.i.us.us.28 to i32* - store i32 %223, i32* %224, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.28 - -if.end34.i.us.us.28: ; preds = %if.then8.i.us.us.28, %if.end34.i.us.us.27 - br i1 %cmp4.i.us.us.29, label %if.then8.i.us.us.29, label %if.end34.i.us.us.29 - -if.then8.i.us.us.29: ; preds = %if.end34.i.us.us.28 - %225 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.29 = add nuw nsw i32 %mul.i, %conv.i.us.us.29 - %idxprom11.i.us.us.29 = sext i32 %add10.i.us.us.29 to i64 - %arrayidx12.i.us.us.29 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.29 - %226 = bitcast float* %arrayidx12.i.us.us.29 to i32* - store i32 %225, i32* %226, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.29 - -if.end34.i.us.us.29: ; preds = %if.then8.i.us.us.29, %if.end34.i.us.us.28 - br i1 %cmp4.i.us.us.30, label %if.then8.i.us.us.30, label %if.end34.i.us.us.30 - -if.then8.i.us.us.30: ; preds = %if.end34.i.us.us.29 - %227 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.30 = add nuw nsw i32 %mul.i, %conv.i.us.us.30 - %idxprom11.i.us.us.30 = sext i32 %add10.i.us.us.30 to i64 - %arrayidx12.i.us.us.30 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.30 - %228 = bitcast float* %arrayidx12.i.us.us.30 to i32* - store i32 %227, i32* %228, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.us.30 - -if.end34.i.us.us.30: ; preds = %if.then8.i.us.us.30, %if.end34.i.us.us.29 - br i1 %cmp4.i.us.us.31, label %if.then8.i.us.us.31, label %pregion_for_end.i - -if.then8.i.us.us.31: ; preds = %if.end34.i.us.us.30 - %229 = load i32, i32* %12, align 4, !tbaa !12 - %add10.i.us.us.31 = add nuw nsw i32 %mul.i, %conv.i.us.us.31 - %idxprom11.i.us.us.31 = sext i32 %add10.i.us.us.31 to i64 - %arrayidx12.i.us.us.31 = getelementptr inbounds float, float* %2, i64 %idxprom11.i.us.us.31 - %230 = bitcast float* %arrayidx12.i.us.us.31 to i32* - store i32 %229, i32* %230, align 4, !tbaa !12, !llvm.access.group !24 - br label %pregion_for_end.i - -if.else.i.us.1: ; preds = %if.end34.i.us - %add14.i.us.1 = add nsw i32 %mul.i, %conv.i.us.1 - %idxprom15.i.us.1 = sext i32 %add14.i.us.1 to i64 - %arrayidx16.i.us.1 = getelementptr inbounds float, float* %2, i64 %idxprom15.i.us.1 - %231 = load float, float* %arrayidx16.i.us.1, align 4, !tbaa !12 - %conv17.i.us.1 = fpext float %231 to double - %arrayidx21.i.us.1 = getelementptr inbounds float, float* %3, i64 %idxprom15.i.us.1 - %232 = load float, float* %arrayidx21.i.us.1, align 4, !tbaa !12 - %add23.i.us.1 = add nsw i32 %mul22.i, %conv.i.us.1 - %idxprom24.i.us.1 = sext i32 %add23.i.us.1 to i64 - %arrayidx25.i.us.1 = getelementptr inbounds float, float* %3, i64 %idxprom24.i.us.1 - %233 = load float, float* %arrayidx25.i.us.1, align 4, !tbaa !12 - %sub26.i.us.1 = fsub float %232, %233 - %conv27.i.us.1 = fpext float %sub26.i.us.1 to double - %234 = tail call double @llvm.fmuladd.f64(double %conv27.i.us.1, double -5.000000e-01, double %conv17.i.us.1) #5 - %conv29.i.us.1 = fptrunc double %234 to float - store float %conv29.i.us.1, float* %arrayidx16.i.us.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.us.1 - -if.end34.i.us.1: ; preds = %if.else.i.us.1, %if.end34.i.us - %235 = add nuw nsw i64 %_local_id_x.0.us, 2 - %exitcond.not.1 = icmp eq i64 %235, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !29 -} - -; Function Attrs: nofree nounwind -define void @_pocl_kernel_fdtd_kernel1_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 2 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 3 - %14 = bitcast i8** %13 to float*** - %15 = load float**, float*** %14, align 8 - %16 = load float*, float** %15, align 8 - %17 = getelementptr i8*, i8** %0, i64 4 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %21 = getelementptr i8*, i8** %0, i64 5 - %22 = bitcast i8** %21 to i32** - %23 = load i32*, i32** %22, align 8 - %24 = load i32, i32* %23, align 4 - %25 = getelementptr i8*, i8** %0, i64 6 - %26 = bitcast i8** %25 to i32** - %27 = load i32*, i32** %26, align 8 - %28 = load i32, i32* %27, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %idxprom.i.i = sext i32 %20 to i64 - %arrayidx.i.i = getelementptr inbounds float, float* %8, i64 %idxprom.i.i - %29 = bitcast float* %arrayidx.i.i to i32* - %conv.i.i.us.us = trunc i64 %mul.i.i.i to i32 - %cmp4.i.i.us.us = icmp sgt i32 %28, %conv.i.i.us.us - %30 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.1 = or i32 %30, 1 - %cmp4.i.i.us.us.1 = icmp sgt i32 %28, %conv.i.i.us.us.1 - %31 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.2 = or i32 %31, 2 - %cmp4.i.i.us.us.2 = icmp sgt i32 %28, %conv.i.i.us.us.2 - %32 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.3 = or i32 %32, 3 - %cmp4.i.i.us.us.3 = icmp sgt i32 %28, %conv.i.i.us.us.3 - %33 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.4 = or i32 %33, 4 - %cmp4.i.i.us.us.4 = icmp sgt i32 %28, %conv.i.i.us.us.4 - %34 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.5 = or i32 %34, 5 - %cmp4.i.i.us.us.5 = icmp sgt i32 %28, %conv.i.i.us.us.5 - %35 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.6 = or i32 %35, 6 - %cmp4.i.i.us.us.6 = icmp sgt i32 %28, %conv.i.i.us.us.6 - %36 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.7 = or i32 %36, 7 - %cmp4.i.i.us.us.7 = icmp sgt i32 %28, %conv.i.i.us.us.7 - %37 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.8 = or i32 %37, 8 - %cmp4.i.i.us.us.8 = icmp sgt i32 %28, %conv.i.i.us.us.8 - %38 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.9 = or i32 %38, 9 - %cmp4.i.i.us.us.9 = icmp sgt i32 %28, %conv.i.i.us.us.9 - %39 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.10 = or i32 %39, 10 - %cmp4.i.i.us.us.10 = icmp sgt i32 %28, %conv.i.i.us.us.10 - %40 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.11 = or i32 %40, 11 - %cmp4.i.i.us.us.11 = icmp sgt i32 %28, %conv.i.i.us.us.11 - %41 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.12 = or i32 %41, 12 - %cmp4.i.i.us.us.12 = icmp sgt i32 %28, %conv.i.i.us.us.12 - %42 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.13 = or i32 %42, 13 - %cmp4.i.i.us.us.13 = icmp sgt i32 %28, %conv.i.i.us.us.13 - %43 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.14 = or i32 %43, 14 - %cmp4.i.i.us.us.14 = icmp sgt i32 %28, %conv.i.i.us.us.14 - %44 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.15 = or i32 %44, 15 - %cmp4.i.i.us.us.15 = icmp sgt i32 %28, %conv.i.i.us.us.15 - %45 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.16 = or i32 %45, 16 - %cmp4.i.i.us.us.16 = icmp sgt i32 %28, %conv.i.i.us.us.16 - %46 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.17 = or i32 %46, 17 - %cmp4.i.i.us.us.17 = icmp sgt i32 %28, %conv.i.i.us.us.17 - %47 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.18 = or i32 %47, 18 - %cmp4.i.i.us.us.18 = icmp sgt i32 %28, %conv.i.i.us.us.18 - %48 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.19 = or i32 %48, 19 - %cmp4.i.i.us.us.19 = icmp sgt i32 %28, %conv.i.i.us.us.19 - %49 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.20 = or i32 %49, 20 - %cmp4.i.i.us.us.20 = icmp sgt i32 %28, %conv.i.i.us.us.20 - %50 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.21 = or i32 %50, 21 - %cmp4.i.i.us.us.21 = icmp sgt i32 %28, %conv.i.i.us.us.21 - %51 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.22 = or i32 %51, 22 - %cmp4.i.i.us.us.22 = icmp sgt i32 %28, %conv.i.i.us.us.22 - %52 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.23 = or i32 %52, 23 - %cmp4.i.i.us.us.23 = icmp sgt i32 %28, %conv.i.i.us.us.23 - %53 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.24 = or i32 %53, 24 - %cmp4.i.i.us.us.24 = icmp sgt i32 %28, %conv.i.i.us.us.24 - %54 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.25 = or i32 %54, 25 - %cmp4.i.i.us.us.25 = icmp sgt i32 %28, %conv.i.i.us.us.25 - %55 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.26 = or i32 %55, 26 - %cmp4.i.i.us.us.26 = icmp sgt i32 %28, %conv.i.i.us.us.26 - %56 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.27 = or i32 %56, 27 - %cmp4.i.i.us.us.27 = icmp sgt i32 %28, %conv.i.i.us.us.27 - %57 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.28 = or i32 %57, 28 - %cmp4.i.i.us.us.28 = icmp sgt i32 %28, %conv.i.i.us.us.28 - %58 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.29 = or i32 %58, 29 - %cmp4.i.i.us.us.29 = icmp sgt i32 %28, %conv.i.i.us.us.29 - %59 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.30 = or i32 %59, 30 - %cmp4.i.i.us.us.30 = icmp sgt i32 %28, %conv.i.i.us.us.30 - %60 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.31 = or i32 %60, 31 - %cmp4.i.i.us.us.31 = icmp sgt i32 %28, %conv.i.i.us.us.31 - %61 = trunc i64 %3 to i32 - %62 = mul i32 %28, %61 - %63 = shl i32 %62, 3 - %64 = trunc i64 %2 to i32 - %65 = shl i32 %64, 5 - %66 = add i32 %63, %65 - %67 = zext i32 %28 to i64 - %68 = shl i32 %61, 3 - %69 = add i32 %68, -1 - %70 = mul i32 %28, %69 - %71 = add i32 %70, %65 - %72 = trunc i64 %3 to i32 - %73 = mul i32 %28, %72 - %74 = shl i32 %73, 3 - %75 = trunc i64 %2 to i32 - %76 = shl i32 %75, 5 - %77 = add i32 %74, %76 - %78 = zext i32 %28 to i64 - %scevgep9 = getelementptr float, float* %12, i64 32 - %79 = shl i32 %72, 3 - %80 = add i32 %79, -1 - %81 = mul i32 %28, %80 - %82 = add i32 %81, %76 - %scevgep14 = getelementptr float, float* %16, i64 32 - %scevgep19 = getelementptr float, float* %16, i64 32 - %bound022 = icmp ult float* %12, %scevgep19 - %bound123 = icmp ult float* %16, %scevgep9 - %found.conflict24 = and i1 %bound022, %bound123 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert25 = insertelement <8 x i32> undef, i32 %28, i32 0 - %broadcast.splat26 = shufflevector <8 x i32> %broadcast.splatinsert25, <8 x i32> undef, <8 x i32> zeroinitializer - %83 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %84 = or <8 x i32> %83, - %85 = icmp sgt <8 x i32> %broadcast.splat26, %84 - %86 = extractelement <8 x i32> %84, i32 0 - %87 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %88 = or <8 x i32> %87, - %89 = icmp sgt <8 x i32> %broadcast.splat26, %88 - %90 = extractelement <8 x i32> %88, i32 0 - %91 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %92 = or <8 x i32> %91, - %93 = icmp sgt <8 x i32> %broadcast.splat26, %92 - %94 = extractelement <8 x i32> %92, i32 0 - %95 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %96 = or <8 x i32> %95, - %97 = icmp sgt <8 x i32> %broadcast.splat26, %96 - %98 = extractelement <8 x i32> %96, i32 0 - br label %pregion_for_entry.pregion_for_init.i.i - -pregion_for_entry.pregion_for_init.i.i: ; preds = %pregion_for_end.i.i, %5 - %_local_id_y.i.0 = phi i64 [ 0, %5 ], [ %185, %pregion_for_end.i.i ] - %99 = mul i64 %_local_id_y.i.0, %78 - %100 = trunc i64 %99 to i32 - %101 = add i32 %77, %100 - %102 = sext i32 %101 to i64 - %scevgep = getelementptr float, float* %12, i64 %102 - %scevgep10 = getelementptr float, float* %scevgep9, i64 %102 - %103 = trunc i64 %99 to i32 - %104 = add i32 %82, %103 - %105 = sext i32 %104 to i64 - %scevgep12 = getelementptr float, float* %16, i64 %105 - %scevgep15 = getelementptr float, float* %scevgep14, i64 %105 - %106 = mul i64 %_local_id_y.i.0, %67 - %107 = trunc i64 %106 to i32 - %108 = add i32 %66, %107 - %109 = trunc i64 %106 to i32 - %110 = add i32 %71, %109 - %add6.i.i.i = add nuw nsw i64 %_local_id_y.i.0, %mul3.i.i.i - %conv2.i.i = trunc i64 %add6.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %24, %conv2.i.i - %mul.i.i = mul nsw i32 %28, %conv2.i.i - %sub.i.i = add nsw i32 %conv2.i.i, -1 - %mul22.i.i = mul nsw i32 %sub.i.i, %28 - br i1 %cmp.i.i, label %pregion_for_entry.pregion_for_init.i.i.split.us, label %pregion_for_end.i.i - -pregion_for_entry.pregion_for_init.i.i.split.us: ; preds = %pregion_for_entry.pregion_for_init.i.i - %cmp6.i.i = icmp eq i32 %conv2.i.i, 0 - br i1 %cmp6.i.i, label %pregion_for_entry.entry.i.i.us.us.preheader, label %vector.scevcheck - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.i.split.us - %111 = icmp sgt i32 %108, 2147483616 - %112 = icmp sgt i32 %110, 2147483616 - %113 = or i1 %111, %112 - br i1 %113, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i.us - -vector.memcheck: ; preds = %vector.scevcheck - %bound0 = icmp ult float* %scevgep, %scevgep15 - %bound1 = icmp ult float* %scevgep12, %scevgep10 - %found.conflict = and i1 %bound0, %bound1 - %conflict.rdx = or i1 %found.conflict, %found.conflict24 - br i1 %conflict.rdx, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.body - -vector.body: ; preds = %vector.memcheck - %114 = add nsw i32 %mul.i.i, %86 - %115 = sext i32 %114 to i64 - %116 = getelementptr inbounds float, float* %12, i64 %115 - %117 = bitcast float* %116 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %117, i32 4, <8 x i1> %85, <8 x float> undef), !tbaa !12, !alias.scope !32, !noalias !35 - %118 = fpext <8 x float> %wide.masked.load to <8 x double> - %119 = getelementptr inbounds float, float* %16, i64 %115 - %120 = bitcast float* %119 to <8 x float>* - %wide.masked.load27 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %120, i32 4, <8 x i1> %85, <8 x float> undef), !tbaa !12, !alias.scope !38 - %121 = add nsw i32 %mul22.i.i, %86 - %122 = sext i32 %121 to i64 - %123 = getelementptr inbounds float, float* %16, i64 %122 - %124 = bitcast float* %123 to <8 x float>* - %wide.masked.load28 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %124, i32 4, <8 x i1> %85, <8 x float> undef), !tbaa !12, !alias.scope !39 - %125 = fsub <8 x float> %wide.masked.load27, %wide.masked.load28 - %126 = fpext <8 x float> %125 to <8 x double> - %127 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %126, <8 x double> , <8 x double> %118) - %128 = fptrunc <8 x double> %127 to <8 x float> - %129 = bitcast float* %116 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %128, <8 x float>* %129, i32 4, <8 x i1> %85), !tbaa !12, !alias.scope !32, !noalias !35, !llvm.access.group !24 - %130 = add nsw i32 %mul.i.i, %90 - %131 = sext i32 %130 to i64 - %132 = getelementptr inbounds float, float* %12, i64 %131 - %133 = bitcast float* %132 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %133, i32 4, <8 x i1> %89, <8 x float> undef), !tbaa !12, !alias.scope !32, !noalias !35 - %134 = fpext <8 x float> %wide.masked.load.1 to <8 x double> - %135 = getelementptr inbounds float, float* %16, i64 %131 - %136 = bitcast float* %135 to <8 x float>* - %wide.masked.load27.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %136, i32 4, <8 x i1> %89, <8 x float> undef), !tbaa !12, !alias.scope !38 - %137 = add nsw i32 %mul22.i.i, %90 - %138 = sext i32 %137 to i64 - %139 = getelementptr inbounds float, float* %16, i64 %138 - %140 = bitcast float* %139 to <8 x float>* - %wide.masked.load28.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %140, i32 4, <8 x i1> %89, <8 x float> undef), !tbaa !12, !alias.scope !39 - %141 = fsub <8 x float> %wide.masked.load27.1, %wide.masked.load28.1 - %142 = fpext <8 x float> %141 to <8 x double> - %143 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %142, <8 x double> , <8 x double> %134) - %144 = fptrunc <8 x double> %143 to <8 x float> - %145 = bitcast float* %132 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %144, <8 x float>* %145, i32 4, <8 x i1> %89), !tbaa !12, !alias.scope !32, !noalias !35, !llvm.access.group !24 - %146 = add nsw i32 %mul.i.i, %94 - %147 = sext i32 %146 to i64 - %148 = getelementptr inbounds float, float* %12, i64 %147 - %149 = bitcast float* %148 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %149, i32 4, <8 x i1> %93, <8 x float> undef), !tbaa !12, !alias.scope !32, !noalias !35 - %150 = fpext <8 x float> %wide.masked.load.2 to <8 x double> - %151 = getelementptr inbounds float, float* %16, i64 %147 - %152 = bitcast float* %151 to <8 x float>* - %wide.masked.load27.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %152, i32 4, <8 x i1> %93, <8 x float> undef), !tbaa !12, !alias.scope !38 - %153 = add nsw i32 %mul22.i.i, %94 - %154 = sext i32 %153 to i64 - %155 = getelementptr inbounds float, float* %16, i64 %154 - %156 = bitcast float* %155 to <8 x float>* - %wide.masked.load28.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %156, i32 4, <8 x i1> %93, <8 x float> undef), !tbaa !12, !alias.scope !39 - %157 = fsub <8 x float> %wide.masked.load27.2, %wide.masked.load28.2 - %158 = fpext <8 x float> %157 to <8 x double> - %159 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %158, <8 x double> , <8 x double> %150) - %160 = fptrunc <8 x double> %159 to <8 x float> - %161 = bitcast float* %148 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %160, <8 x float>* %161, i32 4, <8 x i1> %93), !tbaa !12, !alias.scope !32, !noalias !35, !llvm.access.group !24 - %162 = add nsw i32 %mul.i.i, %98 - %163 = sext i32 %162 to i64 - %164 = getelementptr inbounds float, float* %12, i64 %163 - %165 = bitcast float* %164 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %165, i32 4, <8 x i1> %97, <8 x float> undef), !tbaa !12, !alias.scope !32, !noalias !35 - %166 = fpext <8 x float> %wide.masked.load.3 to <8 x double> - %167 = getelementptr inbounds float, float* %16, i64 %163 - %168 = bitcast float* %167 to <8 x float>* - %wide.masked.load27.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %168, i32 4, <8 x i1> %97, <8 x float> undef), !tbaa !12, !alias.scope !38 - %169 = add nsw i32 %mul22.i.i, %98 - %170 = sext i32 %169 to i64 - %171 = getelementptr inbounds float, float* %16, i64 %170 - %172 = bitcast float* %171 to <8 x float>* - %wide.masked.load28.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %172, i32 4, <8 x i1> %97, <8 x float> undef), !tbaa !12, !alias.scope !39 - %173 = fsub <8 x float> %wide.masked.load27.3, %wide.masked.load28.3 - %174 = fpext <8 x float> %173 to <8 x double> - %175 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %174, <8 x double> , <8 x double> %166) - %176 = fptrunc <8 x double> %175 to <8 x float> - %177 = bitcast float* %164 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %176, <8 x float>* %177, i32 4, <8 x i1> %97), !tbaa !12, !alias.scope !32, !noalias !35, !llvm.access.group !24 - br label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i.split.us - br i1 %cmp4.i.i.us.us, label %if.then8.i.i.us.us, label %if.end34.i.i.us.us - -if.then8.i.i.us.us: ; preds = %pregion_for_entry.entry.i.i.us.us.preheader - %178 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us - %idxprom11.i.i.us.us = sext i32 %add10.i.i.us.us to i64 - %arrayidx12.i.i.us.us = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us - %179 = bitcast float* %arrayidx12.i.i.us.us to i32* - store i32 %178, i32* %179, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us - -if.end34.i.i.us.us: ; preds = %if.then8.i.i.us.us, %pregion_for_entry.entry.i.i.us.us.preheader - br i1 %cmp4.i.i.us.us.1, label %if.then8.i.i.us.us.1, label %if.end34.i.i.us.us.1 - -pregion_for_entry.entry.i.i.us: ; preds = %if.end34.i.i.us.1, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.preheader ], [ %252, %if.end34.i.i.us.1 ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp4.i.i.us = icmp sgt i32 %28, %conv.i.i.us - br i1 %cmp4.i.i.us, label %if.else.i.i.us, label %if.end34.i.i.us - -if.else.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %add14.i.i.us = add nsw i32 %mul.i.i, %conv.i.i.us - %idxprom15.i.i.us = sext i32 %add14.i.i.us to i64 - %arrayidx16.i.i.us = getelementptr inbounds float, float* %12, i64 %idxprom15.i.i.us - %180 = load float, float* %arrayidx16.i.i.us, align 4, !tbaa !12 - %conv17.i.i.us = fpext float %180 to double - %arrayidx21.i.i.us = getelementptr inbounds float, float* %16, i64 %idxprom15.i.i.us - %181 = load float, float* %arrayidx21.i.i.us, align 4, !tbaa !12 - %add23.i.i.us = add nsw i32 %mul22.i.i, %conv.i.i.us - %idxprom24.i.i.us = sext i32 %add23.i.i.us to i64 - %arrayidx25.i.i.us = getelementptr inbounds float, float* %16, i64 %idxprom24.i.i.us - %182 = load float, float* %arrayidx25.i.i.us, align 4, !tbaa !12 - %sub26.i.i.us = fsub float %181, %182 - %conv27.i.i.us = fpext float %sub26.i.i.us to double - %183 = tail call double @llvm.fmuladd.f64(double %conv27.i.i.us, double -5.000000e-01, double %conv17.i.i.us) #5 - %conv29.i.i.us = fptrunc double %183 to float - store float %conv29.i.i.us, float* %arrayidx16.i.i.us, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us - -if.end34.i.i.us: ; preds = %if.else.i.i.us, %pregion_for_entry.entry.i.i.us - %184 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1 = add nuw nsw i64 %184, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp4.i.i.us.1 = icmp sgt i32 %28, %conv.i.i.us.1 - br i1 %cmp4.i.i.us.1, label %if.else.i.i.us.1, label %if.end34.i.i.us.1 - -pregion_for_end.i.i.loopexit: ; preds = %if.end34.i.i.us.1 - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %if.then8.i.i.us.us.31, %if.end34.i.i.us.us.30, %pregion_for_end.i.i.loopexit, %vector.body, %pregion_for_entry.pregion_for_init.i.i - %185 = add nuw nsw i64 %_local_id_y.i.0, 1 - %exitcond4.not = icmp eq i64 %185, 8 - br i1 %exitcond4.not, label %_pocl_kernel_fdtd_kernel1.exit, label %pregion_for_entry.pregion_for_init.i.i, !llvm.loop !27 - -_pocl_kernel_fdtd_kernel1.exit: ; preds = %pregion_for_end.i.i - ret void - -if.then8.i.i.us.us.1: ; preds = %if.end34.i.i.us.us - %186 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.1 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.1 - %idxprom11.i.i.us.us.1 = sext i32 %add10.i.i.us.us.1 to i64 - %arrayidx12.i.i.us.us.1 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.1 - %187 = bitcast float* %arrayidx12.i.i.us.us.1 to i32* - store i32 %186, i32* %187, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.1 - -if.end34.i.i.us.us.1: ; preds = %if.then8.i.i.us.us.1, %if.end34.i.i.us.us - br i1 %cmp4.i.i.us.us.2, label %if.then8.i.i.us.us.2, label %if.end34.i.i.us.us.2 - -if.then8.i.i.us.us.2: ; preds = %if.end34.i.i.us.us.1 - %188 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.2 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.2 - %idxprom11.i.i.us.us.2 = sext i32 %add10.i.i.us.us.2 to i64 - %arrayidx12.i.i.us.us.2 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.2 - %189 = bitcast float* %arrayidx12.i.i.us.us.2 to i32* - store i32 %188, i32* %189, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.2 - -if.end34.i.i.us.us.2: ; preds = %if.then8.i.i.us.us.2, %if.end34.i.i.us.us.1 - br i1 %cmp4.i.i.us.us.3, label %if.then8.i.i.us.us.3, label %if.end34.i.i.us.us.3 - -if.then8.i.i.us.us.3: ; preds = %if.end34.i.i.us.us.2 - %190 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.3 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.3 - %idxprom11.i.i.us.us.3 = sext i32 %add10.i.i.us.us.3 to i64 - %arrayidx12.i.i.us.us.3 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.3 - %191 = bitcast float* %arrayidx12.i.i.us.us.3 to i32* - store i32 %190, i32* %191, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.3 - -if.end34.i.i.us.us.3: ; preds = %if.then8.i.i.us.us.3, %if.end34.i.i.us.us.2 - br i1 %cmp4.i.i.us.us.4, label %if.then8.i.i.us.us.4, label %if.end34.i.i.us.us.4 - -if.then8.i.i.us.us.4: ; preds = %if.end34.i.i.us.us.3 - %192 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.4 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.4 - %idxprom11.i.i.us.us.4 = sext i32 %add10.i.i.us.us.4 to i64 - %arrayidx12.i.i.us.us.4 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.4 - %193 = bitcast float* %arrayidx12.i.i.us.us.4 to i32* - store i32 %192, i32* %193, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.4 - -if.end34.i.i.us.us.4: ; preds = %if.then8.i.i.us.us.4, %if.end34.i.i.us.us.3 - br i1 %cmp4.i.i.us.us.5, label %if.then8.i.i.us.us.5, label %if.end34.i.i.us.us.5 - -if.then8.i.i.us.us.5: ; preds = %if.end34.i.i.us.us.4 - %194 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.5 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.5 - %idxprom11.i.i.us.us.5 = sext i32 %add10.i.i.us.us.5 to i64 - %arrayidx12.i.i.us.us.5 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.5 - %195 = bitcast float* %arrayidx12.i.i.us.us.5 to i32* - store i32 %194, i32* %195, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.5 - -if.end34.i.i.us.us.5: ; preds = %if.then8.i.i.us.us.5, %if.end34.i.i.us.us.4 - br i1 %cmp4.i.i.us.us.6, label %if.then8.i.i.us.us.6, label %if.end34.i.i.us.us.6 - -if.then8.i.i.us.us.6: ; preds = %if.end34.i.i.us.us.5 - %196 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.6 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.6 - %idxprom11.i.i.us.us.6 = sext i32 %add10.i.i.us.us.6 to i64 - %arrayidx12.i.i.us.us.6 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.6 - %197 = bitcast float* %arrayidx12.i.i.us.us.6 to i32* - store i32 %196, i32* %197, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.6 - -if.end34.i.i.us.us.6: ; preds = %if.then8.i.i.us.us.6, %if.end34.i.i.us.us.5 - br i1 %cmp4.i.i.us.us.7, label %if.then8.i.i.us.us.7, label %if.end34.i.i.us.us.7 - -if.then8.i.i.us.us.7: ; preds = %if.end34.i.i.us.us.6 - %198 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.7 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.7 - %idxprom11.i.i.us.us.7 = sext i32 %add10.i.i.us.us.7 to i64 - %arrayidx12.i.i.us.us.7 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.7 - %199 = bitcast float* %arrayidx12.i.i.us.us.7 to i32* - store i32 %198, i32* %199, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.7 - -if.end34.i.i.us.us.7: ; preds = %if.then8.i.i.us.us.7, %if.end34.i.i.us.us.6 - br i1 %cmp4.i.i.us.us.8, label %if.then8.i.i.us.us.8, label %if.end34.i.i.us.us.8 - -if.then8.i.i.us.us.8: ; preds = %if.end34.i.i.us.us.7 - %200 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.8 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.8 - %idxprom11.i.i.us.us.8 = sext i32 %add10.i.i.us.us.8 to i64 - %arrayidx12.i.i.us.us.8 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.8 - %201 = bitcast float* %arrayidx12.i.i.us.us.8 to i32* - store i32 %200, i32* %201, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.8 - -if.end34.i.i.us.us.8: ; preds = %if.then8.i.i.us.us.8, %if.end34.i.i.us.us.7 - br i1 %cmp4.i.i.us.us.9, label %if.then8.i.i.us.us.9, label %if.end34.i.i.us.us.9 - -if.then8.i.i.us.us.9: ; preds = %if.end34.i.i.us.us.8 - %202 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.9 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.9 - %idxprom11.i.i.us.us.9 = sext i32 %add10.i.i.us.us.9 to i64 - %arrayidx12.i.i.us.us.9 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.9 - %203 = bitcast float* %arrayidx12.i.i.us.us.9 to i32* - store i32 %202, i32* %203, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.9 - -if.end34.i.i.us.us.9: ; preds = %if.then8.i.i.us.us.9, %if.end34.i.i.us.us.8 - br i1 %cmp4.i.i.us.us.10, label %if.then8.i.i.us.us.10, label %if.end34.i.i.us.us.10 - -if.then8.i.i.us.us.10: ; preds = %if.end34.i.i.us.us.9 - %204 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.10 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.10 - %idxprom11.i.i.us.us.10 = sext i32 %add10.i.i.us.us.10 to i64 - %arrayidx12.i.i.us.us.10 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.10 - %205 = bitcast float* %arrayidx12.i.i.us.us.10 to i32* - store i32 %204, i32* %205, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.10 - -if.end34.i.i.us.us.10: ; preds = %if.then8.i.i.us.us.10, %if.end34.i.i.us.us.9 - br i1 %cmp4.i.i.us.us.11, label %if.then8.i.i.us.us.11, label %if.end34.i.i.us.us.11 - -if.then8.i.i.us.us.11: ; preds = %if.end34.i.i.us.us.10 - %206 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.11 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.11 - %idxprom11.i.i.us.us.11 = sext i32 %add10.i.i.us.us.11 to i64 - %arrayidx12.i.i.us.us.11 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.11 - %207 = bitcast float* %arrayidx12.i.i.us.us.11 to i32* - store i32 %206, i32* %207, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.11 - -if.end34.i.i.us.us.11: ; preds = %if.then8.i.i.us.us.11, %if.end34.i.i.us.us.10 - br i1 %cmp4.i.i.us.us.12, label %if.then8.i.i.us.us.12, label %if.end34.i.i.us.us.12 - -if.then8.i.i.us.us.12: ; preds = %if.end34.i.i.us.us.11 - %208 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.12 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.12 - %idxprom11.i.i.us.us.12 = sext i32 %add10.i.i.us.us.12 to i64 - %arrayidx12.i.i.us.us.12 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.12 - %209 = bitcast float* %arrayidx12.i.i.us.us.12 to i32* - store i32 %208, i32* %209, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.12 - -if.end34.i.i.us.us.12: ; preds = %if.then8.i.i.us.us.12, %if.end34.i.i.us.us.11 - br i1 %cmp4.i.i.us.us.13, label %if.then8.i.i.us.us.13, label %if.end34.i.i.us.us.13 - -if.then8.i.i.us.us.13: ; preds = %if.end34.i.i.us.us.12 - %210 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.13 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.13 - %idxprom11.i.i.us.us.13 = sext i32 %add10.i.i.us.us.13 to i64 - %arrayidx12.i.i.us.us.13 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.13 - %211 = bitcast float* %arrayidx12.i.i.us.us.13 to i32* - store i32 %210, i32* %211, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.13 - -if.end34.i.i.us.us.13: ; preds = %if.then8.i.i.us.us.13, %if.end34.i.i.us.us.12 - br i1 %cmp4.i.i.us.us.14, label %if.then8.i.i.us.us.14, label %if.end34.i.i.us.us.14 - -if.then8.i.i.us.us.14: ; preds = %if.end34.i.i.us.us.13 - %212 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.14 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.14 - %idxprom11.i.i.us.us.14 = sext i32 %add10.i.i.us.us.14 to i64 - %arrayidx12.i.i.us.us.14 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.14 - %213 = bitcast float* %arrayidx12.i.i.us.us.14 to i32* - store i32 %212, i32* %213, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.14 - -if.end34.i.i.us.us.14: ; preds = %if.then8.i.i.us.us.14, %if.end34.i.i.us.us.13 - br i1 %cmp4.i.i.us.us.15, label %if.then8.i.i.us.us.15, label %if.end34.i.i.us.us.15 - -if.then8.i.i.us.us.15: ; preds = %if.end34.i.i.us.us.14 - %214 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.15 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.15 - %idxprom11.i.i.us.us.15 = sext i32 %add10.i.i.us.us.15 to i64 - %arrayidx12.i.i.us.us.15 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.15 - %215 = bitcast float* %arrayidx12.i.i.us.us.15 to i32* - store i32 %214, i32* %215, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.15 - -if.end34.i.i.us.us.15: ; preds = %if.then8.i.i.us.us.15, %if.end34.i.i.us.us.14 - br i1 %cmp4.i.i.us.us.16, label %if.then8.i.i.us.us.16, label %if.end34.i.i.us.us.16 - -if.then8.i.i.us.us.16: ; preds = %if.end34.i.i.us.us.15 - %216 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.16 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.16 - %idxprom11.i.i.us.us.16 = sext i32 %add10.i.i.us.us.16 to i64 - %arrayidx12.i.i.us.us.16 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.16 - %217 = bitcast float* %arrayidx12.i.i.us.us.16 to i32* - store i32 %216, i32* %217, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.16 - -if.end34.i.i.us.us.16: ; preds = %if.then8.i.i.us.us.16, %if.end34.i.i.us.us.15 - br i1 %cmp4.i.i.us.us.17, label %if.then8.i.i.us.us.17, label %if.end34.i.i.us.us.17 - -if.then8.i.i.us.us.17: ; preds = %if.end34.i.i.us.us.16 - %218 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.17 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.17 - %idxprom11.i.i.us.us.17 = sext i32 %add10.i.i.us.us.17 to i64 - %arrayidx12.i.i.us.us.17 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.17 - %219 = bitcast float* %arrayidx12.i.i.us.us.17 to i32* - store i32 %218, i32* %219, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.17 - -if.end34.i.i.us.us.17: ; preds = %if.then8.i.i.us.us.17, %if.end34.i.i.us.us.16 - br i1 %cmp4.i.i.us.us.18, label %if.then8.i.i.us.us.18, label %if.end34.i.i.us.us.18 - -if.then8.i.i.us.us.18: ; preds = %if.end34.i.i.us.us.17 - %220 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.18 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.18 - %idxprom11.i.i.us.us.18 = sext i32 %add10.i.i.us.us.18 to i64 - %arrayidx12.i.i.us.us.18 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.18 - %221 = bitcast float* %arrayidx12.i.i.us.us.18 to i32* - store i32 %220, i32* %221, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.18 - -if.end34.i.i.us.us.18: ; preds = %if.then8.i.i.us.us.18, %if.end34.i.i.us.us.17 - br i1 %cmp4.i.i.us.us.19, label %if.then8.i.i.us.us.19, label %if.end34.i.i.us.us.19 - -if.then8.i.i.us.us.19: ; preds = %if.end34.i.i.us.us.18 - %222 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.19 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.19 - %idxprom11.i.i.us.us.19 = sext i32 %add10.i.i.us.us.19 to i64 - %arrayidx12.i.i.us.us.19 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.19 - %223 = bitcast float* %arrayidx12.i.i.us.us.19 to i32* - store i32 %222, i32* %223, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.19 - -if.end34.i.i.us.us.19: ; preds = %if.then8.i.i.us.us.19, %if.end34.i.i.us.us.18 - br i1 %cmp4.i.i.us.us.20, label %if.then8.i.i.us.us.20, label %if.end34.i.i.us.us.20 - -if.then8.i.i.us.us.20: ; preds = %if.end34.i.i.us.us.19 - %224 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.20 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.20 - %idxprom11.i.i.us.us.20 = sext i32 %add10.i.i.us.us.20 to i64 - %arrayidx12.i.i.us.us.20 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.20 - %225 = bitcast float* %arrayidx12.i.i.us.us.20 to i32* - store i32 %224, i32* %225, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.20 - -if.end34.i.i.us.us.20: ; preds = %if.then8.i.i.us.us.20, %if.end34.i.i.us.us.19 - br i1 %cmp4.i.i.us.us.21, label %if.then8.i.i.us.us.21, label %if.end34.i.i.us.us.21 - -if.then8.i.i.us.us.21: ; preds = %if.end34.i.i.us.us.20 - %226 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.21 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.21 - %idxprom11.i.i.us.us.21 = sext i32 %add10.i.i.us.us.21 to i64 - %arrayidx12.i.i.us.us.21 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.21 - %227 = bitcast float* %arrayidx12.i.i.us.us.21 to i32* - store i32 %226, i32* %227, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.21 - -if.end34.i.i.us.us.21: ; preds = %if.then8.i.i.us.us.21, %if.end34.i.i.us.us.20 - br i1 %cmp4.i.i.us.us.22, label %if.then8.i.i.us.us.22, label %if.end34.i.i.us.us.22 - -if.then8.i.i.us.us.22: ; preds = %if.end34.i.i.us.us.21 - %228 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.22 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.22 - %idxprom11.i.i.us.us.22 = sext i32 %add10.i.i.us.us.22 to i64 - %arrayidx12.i.i.us.us.22 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.22 - %229 = bitcast float* %arrayidx12.i.i.us.us.22 to i32* - store i32 %228, i32* %229, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.22 - -if.end34.i.i.us.us.22: ; preds = %if.then8.i.i.us.us.22, %if.end34.i.i.us.us.21 - br i1 %cmp4.i.i.us.us.23, label %if.then8.i.i.us.us.23, label %if.end34.i.i.us.us.23 - -if.then8.i.i.us.us.23: ; preds = %if.end34.i.i.us.us.22 - %230 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.23 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.23 - %idxprom11.i.i.us.us.23 = sext i32 %add10.i.i.us.us.23 to i64 - %arrayidx12.i.i.us.us.23 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.23 - %231 = bitcast float* %arrayidx12.i.i.us.us.23 to i32* - store i32 %230, i32* %231, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.23 - -if.end34.i.i.us.us.23: ; preds = %if.then8.i.i.us.us.23, %if.end34.i.i.us.us.22 - br i1 %cmp4.i.i.us.us.24, label %if.then8.i.i.us.us.24, label %if.end34.i.i.us.us.24 - -if.then8.i.i.us.us.24: ; preds = %if.end34.i.i.us.us.23 - %232 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.24 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.24 - %idxprom11.i.i.us.us.24 = sext i32 %add10.i.i.us.us.24 to i64 - %arrayidx12.i.i.us.us.24 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.24 - %233 = bitcast float* %arrayidx12.i.i.us.us.24 to i32* - store i32 %232, i32* %233, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.24 - -if.end34.i.i.us.us.24: ; preds = %if.then8.i.i.us.us.24, %if.end34.i.i.us.us.23 - br i1 %cmp4.i.i.us.us.25, label %if.then8.i.i.us.us.25, label %if.end34.i.i.us.us.25 - -if.then8.i.i.us.us.25: ; preds = %if.end34.i.i.us.us.24 - %234 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.25 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.25 - %idxprom11.i.i.us.us.25 = sext i32 %add10.i.i.us.us.25 to i64 - %arrayidx12.i.i.us.us.25 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.25 - %235 = bitcast float* %arrayidx12.i.i.us.us.25 to i32* - store i32 %234, i32* %235, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.25 - -if.end34.i.i.us.us.25: ; preds = %if.then8.i.i.us.us.25, %if.end34.i.i.us.us.24 - br i1 %cmp4.i.i.us.us.26, label %if.then8.i.i.us.us.26, label %if.end34.i.i.us.us.26 - -if.then8.i.i.us.us.26: ; preds = %if.end34.i.i.us.us.25 - %236 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.26 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.26 - %idxprom11.i.i.us.us.26 = sext i32 %add10.i.i.us.us.26 to i64 - %arrayidx12.i.i.us.us.26 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.26 - %237 = bitcast float* %arrayidx12.i.i.us.us.26 to i32* - store i32 %236, i32* %237, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.26 - -if.end34.i.i.us.us.26: ; preds = %if.then8.i.i.us.us.26, %if.end34.i.i.us.us.25 - br i1 %cmp4.i.i.us.us.27, label %if.then8.i.i.us.us.27, label %if.end34.i.i.us.us.27 - -if.then8.i.i.us.us.27: ; preds = %if.end34.i.i.us.us.26 - %238 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.27 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.27 - %idxprom11.i.i.us.us.27 = sext i32 %add10.i.i.us.us.27 to i64 - %arrayidx12.i.i.us.us.27 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.27 - %239 = bitcast float* %arrayidx12.i.i.us.us.27 to i32* - store i32 %238, i32* %239, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.27 - -if.end34.i.i.us.us.27: ; preds = %if.then8.i.i.us.us.27, %if.end34.i.i.us.us.26 - br i1 %cmp4.i.i.us.us.28, label %if.then8.i.i.us.us.28, label %if.end34.i.i.us.us.28 - -if.then8.i.i.us.us.28: ; preds = %if.end34.i.i.us.us.27 - %240 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.28 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.28 - %idxprom11.i.i.us.us.28 = sext i32 %add10.i.i.us.us.28 to i64 - %arrayidx12.i.i.us.us.28 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.28 - %241 = bitcast float* %arrayidx12.i.i.us.us.28 to i32* - store i32 %240, i32* %241, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.28 - -if.end34.i.i.us.us.28: ; preds = %if.then8.i.i.us.us.28, %if.end34.i.i.us.us.27 - br i1 %cmp4.i.i.us.us.29, label %if.then8.i.i.us.us.29, label %if.end34.i.i.us.us.29 - -if.then8.i.i.us.us.29: ; preds = %if.end34.i.i.us.us.28 - %242 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.29 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.29 - %idxprom11.i.i.us.us.29 = sext i32 %add10.i.i.us.us.29 to i64 - %arrayidx12.i.i.us.us.29 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.29 - %243 = bitcast float* %arrayidx12.i.i.us.us.29 to i32* - store i32 %242, i32* %243, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.29 - -if.end34.i.i.us.us.29: ; preds = %if.then8.i.i.us.us.29, %if.end34.i.i.us.us.28 - br i1 %cmp4.i.i.us.us.30, label %if.then8.i.i.us.us.30, label %if.end34.i.i.us.us.30 - -if.then8.i.i.us.us.30: ; preds = %if.end34.i.i.us.us.29 - %244 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.30 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.30 - %idxprom11.i.i.us.us.30 = sext i32 %add10.i.i.us.us.30 to i64 - %arrayidx12.i.i.us.us.30 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.30 - %245 = bitcast float* %arrayidx12.i.i.us.us.30 to i32* - store i32 %244, i32* %245, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.30 - -if.end34.i.i.us.us.30: ; preds = %if.then8.i.i.us.us.30, %if.end34.i.i.us.us.29 - br i1 %cmp4.i.i.us.us.31, label %if.then8.i.i.us.us.31, label %pregion_for_end.i.i - -if.then8.i.i.us.us.31: ; preds = %if.end34.i.i.us.us.30 - %246 = load i32, i32* %29, align 4, !tbaa !12 - %add10.i.i.us.us.31 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.31 - %idxprom11.i.i.us.us.31 = sext i32 %add10.i.i.us.us.31 to i64 - %arrayidx12.i.i.us.us.31 = getelementptr inbounds float, float* %12, i64 %idxprom11.i.i.us.us.31 - %247 = bitcast float* %arrayidx12.i.i.us.us.31 to i32* - store i32 %246, i32* %247, align 4, !tbaa !12, !llvm.access.group !24 - br label %pregion_for_end.i.i - -if.else.i.i.us.1: ; preds = %if.end34.i.i.us - %add14.i.i.us.1 = add nsw i32 %mul.i.i, %conv.i.i.us.1 - %idxprom15.i.i.us.1 = sext i32 %add14.i.i.us.1 to i64 - %arrayidx16.i.i.us.1 = getelementptr inbounds float, float* %12, i64 %idxprom15.i.i.us.1 - %248 = load float, float* %arrayidx16.i.i.us.1, align 4, !tbaa !12 - %conv17.i.i.us.1 = fpext float %248 to double - %arrayidx21.i.i.us.1 = getelementptr inbounds float, float* %16, i64 %idxprom15.i.i.us.1 - %249 = load float, float* %arrayidx21.i.i.us.1, align 4, !tbaa !12 - %add23.i.i.us.1 = add nsw i32 %mul22.i.i, %conv.i.i.us.1 - %idxprom24.i.i.us.1 = sext i32 %add23.i.i.us.1 to i64 - %arrayidx25.i.i.us.1 = getelementptr inbounds float, float* %16, i64 %idxprom24.i.i.us.1 - %250 = load float, float* %arrayidx25.i.i.us.1, align 4, !tbaa !12 - %sub26.i.i.us.1 = fsub float %249, %250 - %conv27.i.i.us.1 = fpext float %sub26.i.i.us.1 to double - %251 = tail call double @llvm.fmuladd.f64(double %conv27.i.i.us.1, double -5.000000e-01, double %conv17.i.i.us.1) #5 - %conv29.i.i.us.1 = fptrunc double %251 to float - store float %conv29.i.i.us.1, float* %arrayidx16.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.1 - -if.end34.i.i.us.1: ; preds = %if.else.i.i.us.1, %if.end34.i.i.us - %252 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %252, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !40 -} - -; Function Attrs: nofree nounwind -define void @_pocl_kernel_fdtd_kernel1_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 2 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 3 - %12 = bitcast i8** %11 to float** - %13 = load float*, float** %12, align 8 - %14 = getelementptr i8*, i8** %0, i64 4 - %15 = bitcast i8** %14 to i32** - %16 = load i32*, i32** %15, align 8 - %17 = load i32, i32* %16, align 4 - %18 = getelementptr i8*, i8** %0, i64 5 - %19 = bitcast i8** %18 to i32** - %20 = load i32*, i32** %19, align 8 - %21 = load i32, i32* %20, align 4 - %22 = getelementptr i8*, i8** %0, i64 6 - %23 = bitcast i8** %22 to i32** - %24 = load i32*, i32** %23, align 8 - %25 = load i32, i32* %24, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %idxprom.i.i = sext i32 %17 to i64 - %arrayidx.i.i = getelementptr inbounds float, float* %7, i64 %idxprom.i.i - %26 = bitcast float* %arrayidx.i.i to i32* - %conv.i.i.us.us = trunc i64 %mul.i.i.i to i32 - %cmp4.i.i.us.us = icmp sgt i32 %25, %conv.i.i.us.us - %27 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.1 = or i32 %27, 1 - %cmp4.i.i.us.us.1 = icmp sgt i32 %25, %conv.i.i.us.us.1 - %28 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.2 = or i32 %28, 2 - %cmp4.i.i.us.us.2 = icmp sgt i32 %25, %conv.i.i.us.us.2 - %29 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.3 = or i32 %29, 3 - %cmp4.i.i.us.us.3 = icmp sgt i32 %25, %conv.i.i.us.us.3 - %30 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.4 = or i32 %30, 4 - %cmp4.i.i.us.us.4 = icmp sgt i32 %25, %conv.i.i.us.us.4 - %31 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.5 = or i32 %31, 5 - %cmp4.i.i.us.us.5 = icmp sgt i32 %25, %conv.i.i.us.us.5 - %32 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.6 = or i32 %32, 6 - %cmp4.i.i.us.us.6 = icmp sgt i32 %25, %conv.i.i.us.us.6 - %33 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.7 = or i32 %33, 7 - %cmp4.i.i.us.us.7 = icmp sgt i32 %25, %conv.i.i.us.us.7 - %34 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.8 = or i32 %34, 8 - %cmp4.i.i.us.us.8 = icmp sgt i32 %25, %conv.i.i.us.us.8 - %35 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.9 = or i32 %35, 9 - %cmp4.i.i.us.us.9 = icmp sgt i32 %25, %conv.i.i.us.us.9 - %36 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.10 = or i32 %36, 10 - %cmp4.i.i.us.us.10 = icmp sgt i32 %25, %conv.i.i.us.us.10 - %37 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.11 = or i32 %37, 11 - %cmp4.i.i.us.us.11 = icmp sgt i32 %25, %conv.i.i.us.us.11 - %38 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.12 = or i32 %38, 12 - %cmp4.i.i.us.us.12 = icmp sgt i32 %25, %conv.i.i.us.us.12 - %39 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.13 = or i32 %39, 13 - %cmp4.i.i.us.us.13 = icmp sgt i32 %25, %conv.i.i.us.us.13 - %40 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.14 = or i32 %40, 14 - %cmp4.i.i.us.us.14 = icmp sgt i32 %25, %conv.i.i.us.us.14 - %41 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.15 = or i32 %41, 15 - %cmp4.i.i.us.us.15 = icmp sgt i32 %25, %conv.i.i.us.us.15 - %42 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.16 = or i32 %42, 16 - %cmp4.i.i.us.us.16 = icmp sgt i32 %25, %conv.i.i.us.us.16 - %43 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.17 = or i32 %43, 17 - %cmp4.i.i.us.us.17 = icmp sgt i32 %25, %conv.i.i.us.us.17 - %44 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.18 = or i32 %44, 18 - %cmp4.i.i.us.us.18 = icmp sgt i32 %25, %conv.i.i.us.us.18 - %45 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.19 = or i32 %45, 19 - %cmp4.i.i.us.us.19 = icmp sgt i32 %25, %conv.i.i.us.us.19 - %46 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.20 = or i32 %46, 20 - %cmp4.i.i.us.us.20 = icmp sgt i32 %25, %conv.i.i.us.us.20 - %47 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.21 = or i32 %47, 21 - %cmp4.i.i.us.us.21 = icmp sgt i32 %25, %conv.i.i.us.us.21 - %48 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.22 = or i32 %48, 22 - %cmp4.i.i.us.us.22 = icmp sgt i32 %25, %conv.i.i.us.us.22 - %49 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.23 = or i32 %49, 23 - %cmp4.i.i.us.us.23 = icmp sgt i32 %25, %conv.i.i.us.us.23 - %50 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.24 = or i32 %50, 24 - %cmp4.i.i.us.us.24 = icmp sgt i32 %25, %conv.i.i.us.us.24 - %51 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.25 = or i32 %51, 25 - %cmp4.i.i.us.us.25 = icmp sgt i32 %25, %conv.i.i.us.us.25 - %52 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.26 = or i32 %52, 26 - %cmp4.i.i.us.us.26 = icmp sgt i32 %25, %conv.i.i.us.us.26 - %53 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.27 = or i32 %53, 27 - %cmp4.i.i.us.us.27 = icmp sgt i32 %25, %conv.i.i.us.us.27 - %54 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.28 = or i32 %54, 28 - %cmp4.i.i.us.us.28 = icmp sgt i32 %25, %conv.i.i.us.us.28 - %55 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.29 = or i32 %55, 29 - %cmp4.i.i.us.us.29 = icmp sgt i32 %25, %conv.i.i.us.us.29 - %56 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.30 = or i32 %56, 30 - %cmp4.i.i.us.us.30 = icmp sgt i32 %25, %conv.i.i.us.us.30 - %57 = trunc i64 %mul.i.i.i to i32 - %conv.i.i.us.us.31 = or i32 %57, 31 - %cmp4.i.i.us.us.31 = icmp sgt i32 %25, %conv.i.i.us.us.31 - %58 = trunc i64 %3 to i32 - %59 = mul i32 %25, %58 - %60 = shl i32 %59, 3 - %61 = trunc i64 %2 to i32 - %62 = shl i32 %61, 5 - %63 = add i32 %60, %62 - %64 = zext i32 %25 to i64 - %65 = shl i32 %58, 3 - %66 = add i32 %65, -1 - %67 = mul i32 %25, %66 - %68 = add i32 %67, %62 - %69 = trunc i64 %3 to i32 - %70 = mul i32 %25, %69 - %71 = shl i32 %70, 3 - %72 = trunc i64 %2 to i32 - %73 = shl i32 %72, 5 - %74 = add i32 %71, %73 - %75 = zext i32 %25 to i64 - %scevgep9 = getelementptr float, float* %10, i64 32 - %76 = shl i32 %69, 3 - %77 = add i32 %76, -1 - %78 = mul i32 %25, %77 - %79 = add i32 %78, %73 - %scevgep14 = getelementptr float, float* %13, i64 32 - %scevgep19 = getelementptr float, float* %13, i64 32 - %bound022 = icmp ult float* %10, %scevgep19 - %bound123 = icmp ult float* %13, %scevgep9 - %found.conflict24 = and i1 %bound022, %bound123 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert25 = insertelement <8 x i32> undef, i32 %25, i32 0 - %broadcast.splat26 = shufflevector <8 x i32> %broadcast.splatinsert25, <8 x i32> undef, <8 x i32> zeroinitializer - %80 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %81 = or <8 x i32> %80, - %82 = icmp sgt <8 x i32> %broadcast.splat26, %81 - %83 = extractelement <8 x i32> %81, i32 0 - %84 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %85 = or <8 x i32> %84, - %86 = icmp sgt <8 x i32> %broadcast.splat26, %85 - %87 = extractelement <8 x i32> %85, i32 0 - %88 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %89 = or <8 x i32> %88, - %90 = icmp sgt <8 x i32> %broadcast.splat26, %89 - %91 = extractelement <8 x i32> %89, i32 0 - %92 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %93 = or <8 x i32> %92, - %94 = icmp sgt <8 x i32> %broadcast.splat26, %93 - %95 = extractelement <8 x i32> %93, i32 0 - br label %pregion_for_entry.pregion_for_init.i.i - -pregion_for_entry.pregion_for_init.i.i: ; preds = %pregion_for_end.i.i, %5 - %_local_id_y.i.0 = phi i64 [ 0, %5 ], [ %182, %pregion_for_end.i.i ] - %96 = mul i64 %_local_id_y.i.0, %75 - %97 = trunc i64 %96 to i32 - %98 = add i32 %74, %97 - %99 = sext i32 %98 to i64 - %scevgep = getelementptr float, float* %10, i64 %99 - %scevgep10 = getelementptr float, float* %scevgep9, i64 %99 - %100 = trunc i64 %96 to i32 - %101 = add i32 %79, %100 - %102 = sext i32 %101 to i64 - %scevgep12 = getelementptr float, float* %13, i64 %102 - %scevgep15 = getelementptr float, float* %scevgep14, i64 %102 - %103 = mul i64 %_local_id_y.i.0, %64 - %104 = trunc i64 %103 to i32 - %105 = add i32 %63, %104 - %106 = trunc i64 %103 to i32 - %107 = add i32 %68, %106 - %add6.i.i.i = add nuw nsw i64 %_local_id_y.i.0, %mul3.i.i.i - %conv2.i.i = trunc i64 %add6.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %21, %conv2.i.i - %mul.i.i = mul nsw i32 %25, %conv2.i.i - %sub.i.i = add nsw i32 %conv2.i.i, -1 - %mul22.i.i = mul nsw i32 %sub.i.i, %25 - br i1 %cmp.i.i, label %pregion_for_entry.pregion_for_init.i.i.split.us, label %pregion_for_end.i.i - -pregion_for_entry.pregion_for_init.i.i.split.us: ; preds = %pregion_for_entry.pregion_for_init.i.i - %cmp6.i.i = icmp eq i32 %conv2.i.i, 0 - br i1 %cmp6.i.i, label %pregion_for_entry.entry.i.i.us.us.preheader, label %vector.scevcheck - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.i.split.us - %108 = icmp sgt i32 %105, 2147483616 - %109 = icmp sgt i32 %107, 2147483616 - %110 = or i1 %108, %109 - br i1 %110, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i.us - -vector.memcheck: ; preds = %vector.scevcheck - %bound0 = icmp ult float* %scevgep, %scevgep15 - %bound1 = icmp ult float* %scevgep12, %scevgep10 - %found.conflict = and i1 %bound0, %bound1 - %conflict.rdx = or i1 %found.conflict, %found.conflict24 - br i1 %conflict.rdx, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.body - -vector.body: ; preds = %vector.memcheck - %111 = add nsw i32 %mul.i.i, %83 - %112 = sext i32 %111 to i64 - %113 = getelementptr inbounds float, float* %10, i64 %112 - %114 = bitcast float* %113 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %114, i32 4, <8 x i1> %82, <8 x float> undef), !tbaa !12, !alias.scope !41, !noalias !44 - %115 = fpext <8 x float> %wide.masked.load to <8 x double> - %116 = getelementptr inbounds float, float* %13, i64 %112 - %117 = bitcast float* %116 to <8 x float>* - %wide.masked.load27 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %117, i32 4, <8 x i1> %82, <8 x float> undef), !tbaa !12, !alias.scope !47 - %118 = add nsw i32 %mul22.i.i, %83 - %119 = sext i32 %118 to i64 - %120 = getelementptr inbounds float, float* %13, i64 %119 - %121 = bitcast float* %120 to <8 x float>* - %wide.masked.load28 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %121, i32 4, <8 x i1> %82, <8 x float> undef), !tbaa !12, !alias.scope !48 - %122 = fsub <8 x float> %wide.masked.load27, %wide.masked.load28 - %123 = fpext <8 x float> %122 to <8 x double> - %124 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %123, <8 x double> , <8 x double> %115) - %125 = fptrunc <8 x double> %124 to <8 x float> - %126 = bitcast float* %113 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %125, <8 x float>* %126, i32 4, <8 x i1> %82), !tbaa !12, !alias.scope !41, !noalias !44, !llvm.access.group !24 - %127 = add nsw i32 %mul.i.i, %87 - %128 = sext i32 %127 to i64 - %129 = getelementptr inbounds float, float* %10, i64 %128 - %130 = bitcast float* %129 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %130, i32 4, <8 x i1> %86, <8 x float> undef), !tbaa !12, !alias.scope !41, !noalias !44 - %131 = fpext <8 x float> %wide.masked.load.1 to <8 x double> - %132 = getelementptr inbounds float, float* %13, i64 %128 - %133 = bitcast float* %132 to <8 x float>* - %wide.masked.load27.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %133, i32 4, <8 x i1> %86, <8 x float> undef), !tbaa !12, !alias.scope !47 - %134 = add nsw i32 %mul22.i.i, %87 - %135 = sext i32 %134 to i64 - %136 = getelementptr inbounds float, float* %13, i64 %135 - %137 = bitcast float* %136 to <8 x float>* - %wide.masked.load28.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %137, i32 4, <8 x i1> %86, <8 x float> undef), !tbaa !12, !alias.scope !48 - %138 = fsub <8 x float> %wide.masked.load27.1, %wide.masked.load28.1 - %139 = fpext <8 x float> %138 to <8 x double> - %140 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %139, <8 x double> , <8 x double> %131) - %141 = fptrunc <8 x double> %140 to <8 x float> - %142 = bitcast float* %129 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %141, <8 x float>* %142, i32 4, <8 x i1> %86), !tbaa !12, !alias.scope !41, !noalias !44, !llvm.access.group !24 - %143 = add nsw i32 %mul.i.i, %91 - %144 = sext i32 %143 to i64 - %145 = getelementptr inbounds float, float* %10, i64 %144 - %146 = bitcast float* %145 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %146, i32 4, <8 x i1> %90, <8 x float> undef), !tbaa !12, !alias.scope !41, !noalias !44 - %147 = fpext <8 x float> %wide.masked.load.2 to <8 x double> - %148 = getelementptr inbounds float, float* %13, i64 %144 - %149 = bitcast float* %148 to <8 x float>* - %wide.masked.load27.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %149, i32 4, <8 x i1> %90, <8 x float> undef), !tbaa !12, !alias.scope !47 - %150 = add nsw i32 %mul22.i.i, %91 - %151 = sext i32 %150 to i64 - %152 = getelementptr inbounds float, float* %13, i64 %151 - %153 = bitcast float* %152 to <8 x float>* - %wide.masked.load28.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %153, i32 4, <8 x i1> %90, <8 x float> undef), !tbaa !12, !alias.scope !48 - %154 = fsub <8 x float> %wide.masked.load27.2, %wide.masked.load28.2 - %155 = fpext <8 x float> %154 to <8 x double> - %156 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %155, <8 x double> , <8 x double> %147) - %157 = fptrunc <8 x double> %156 to <8 x float> - %158 = bitcast float* %145 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %157, <8 x float>* %158, i32 4, <8 x i1> %90), !tbaa !12, !alias.scope !41, !noalias !44, !llvm.access.group !24 - %159 = add nsw i32 %mul.i.i, %95 - %160 = sext i32 %159 to i64 - %161 = getelementptr inbounds float, float* %10, i64 %160 - %162 = bitcast float* %161 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %162, i32 4, <8 x i1> %94, <8 x float> undef), !tbaa !12, !alias.scope !41, !noalias !44 - %163 = fpext <8 x float> %wide.masked.load.3 to <8 x double> - %164 = getelementptr inbounds float, float* %13, i64 %160 - %165 = bitcast float* %164 to <8 x float>* - %wide.masked.load27.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %165, i32 4, <8 x i1> %94, <8 x float> undef), !tbaa !12, !alias.scope !47 - %166 = add nsw i32 %mul22.i.i, %95 - %167 = sext i32 %166 to i64 - %168 = getelementptr inbounds float, float* %13, i64 %167 - %169 = bitcast float* %168 to <8 x float>* - %wide.masked.load28.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %169, i32 4, <8 x i1> %94, <8 x float> undef), !tbaa !12, !alias.scope !48 - %170 = fsub <8 x float> %wide.masked.load27.3, %wide.masked.load28.3 - %171 = fpext <8 x float> %170 to <8 x double> - %172 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %171, <8 x double> , <8 x double> %163) - %173 = fptrunc <8 x double> %172 to <8 x float> - %174 = bitcast float* %161 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %173, <8 x float>* %174, i32 4, <8 x i1> %94), !tbaa !12, !alias.scope !41, !noalias !44, !llvm.access.group !24 - br label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i.split.us - br i1 %cmp4.i.i.us.us, label %if.then8.i.i.us.us, label %if.end34.i.i.us.us - -if.then8.i.i.us.us: ; preds = %pregion_for_entry.entry.i.i.us.us.preheader - %175 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us - %idxprom11.i.i.us.us = sext i32 %add10.i.i.us.us to i64 - %arrayidx12.i.i.us.us = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us - %176 = bitcast float* %arrayidx12.i.i.us.us to i32* - store i32 %175, i32* %176, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us - -if.end34.i.i.us.us: ; preds = %if.then8.i.i.us.us, %pregion_for_entry.entry.i.i.us.us.preheader - br i1 %cmp4.i.i.us.us.1, label %if.then8.i.i.us.us.1, label %if.end34.i.i.us.us.1 - -pregion_for_entry.entry.i.i.us: ; preds = %if.end34.i.i.us.1, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.preheader ], [ %249, %if.end34.i.i.us.1 ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp4.i.i.us = icmp sgt i32 %25, %conv.i.i.us - br i1 %cmp4.i.i.us, label %if.else.i.i.us, label %if.end34.i.i.us - -if.else.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %add14.i.i.us = add nsw i32 %mul.i.i, %conv.i.i.us - %idxprom15.i.i.us = sext i32 %add14.i.i.us to i64 - %arrayidx16.i.i.us = getelementptr inbounds float, float* %10, i64 %idxprom15.i.i.us - %177 = load float, float* %arrayidx16.i.i.us, align 4, !tbaa !12 - %conv17.i.i.us = fpext float %177 to double - %arrayidx21.i.i.us = getelementptr inbounds float, float* %13, i64 %idxprom15.i.i.us - %178 = load float, float* %arrayidx21.i.i.us, align 4, !tbaa !12 - %add23.i.i.us = add nsw i32 %mul22.i.i, %conv.i.i.us - %idxprom24.i.i.us = sext i32 %add23.i.i.us to i64 - %arrayidx25.i.i.us = getelementptr inbounds float, float* %13, i64 %idxprom24.i.i.us - %179 = load float, float* %arrayidx25.i.i.us, align 4, !tbaa !12 - %sub26.i.i.us = fsub float %178, %179 - %conv27.i.i.us = fpext float %sub26.i.i.us to double - %180 = tail call double @llvm.fmuladd.f64(double %conv27.i.i.us, double -5.000000e-01, double %conv17.i.i.us) #5 - %conv29.i.i.us = fptrunc double %180 to float - store float %conv29.i.i.us, float* %arrayidx16.i.i.us, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us - -if.end34.i.i.us: ; preds = %if.else.i.i.us, %pregion_for_entry.entry.i.i.us - %181 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1 = add nuw nsw i64 %181, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp4.i.i.us.1 = icmp sgt i32 %25, %conv.i.i.us.1 - br i1 %cmp4.i.i.us.1, label %if.else.i.i.us.1, label %if.end34.i.i.us.1 - -pregion_for_end.i.i.loopexit: ; preds = %if.end34.i.i.us.1 - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %if.then8.i.i.us.us.31, %if.end34.i.i.us.us.30, %pregion_for_end.i.i.loopexit, %vector.body, %pregion_for_entry.pregion_for_init.i.i - %182 = add nuw nsw i64 %_local_id_y.i.0, 1 - %exitcond4.not = icmp eq i64 %182, 8 - br i1 %exitcond4.not, label %_pocl_kernel_fdtd_kernel1.exit, label %pregion_for_entry.pregion_for_init.i.i, !llvm.loop !27 - -_pocl_kernel_fdtd_kernel1.exit: ; preds = %pregion_for_end.i.i - ret void - -if.then8.i.i.us.us.1: ; preds = %if.end34.i.i.us.us - %183 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.1 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.1 - %idxprom11.i.i.us.us.1 = sext i32 %add10.i.i.us.us.1 to i64 - %arrayidx12.i.i.us.us.1 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.1 - %184 = bitcast float* %arrayidx12.i.i.us.us.1 to i32* - store i32 %183, i32* %184, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.1 - -if.end34.i.i.us.us.1: ; preds = %if.then8.i.i.us.us.1, %if.end34.i.i.us.us - br i1 %cmp4.i.i.us.us.2, label %if.then8.i.i.us.us.2, label %if.end34.i.i.us.us.2 - -if.then8.i.i.us.us.2: ; preds = %if.end34.i.i.us.us.1 - %185 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.2 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.2 - %idxprom11.i.i.us.us.2 = sext i32 %add10.i.i.us.us.2 to i64 - %arrayidx12.i.i.us.us.2 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.2 - %186 = bitcast float* %arrayidx12.i.i.us.us.2 to i32* - store i32 %185, i32* %186, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.2 - -if.end34.i.i.us.us.2: ; preds = %if.then8.i.i.us.us.2, %if.end34.i.i.us.us.1 - br i1 %cmp4.i.i.us.us.3, label %if.then8.i.i.us.us.3, label %if.end34.i.i.us.us.3 - -if.then8.i.i.us.us.3: ; preds = %if.end34.i.i.us.us.2 - %187 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.3 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.3 - %idxprom11.i.i.us.us.3 = sext i32 %add10.i.i.us.us.3 to i64 - %arrayidx12.i.i.us.us.3 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.3 - %188 = bitcast float* %arrayidx12.i.i.us.us.3 to i32* - store i32 %187, i32* %188, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.3 - -if.end34.i.i.us.us.3: ; preds = %if.then8.i.i.us.us.3, %if.end34.i.i.us.us.2 - br i1 %cmp4.i.i.us.us.4, label %if.then8.i.i.us.us.4, label %if.end34.i.i.us.us.4 - -if.then8.i.i.us.us.4: ; preds = %if.end34.i.i.us.us.3 - %189 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.4 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.4 - %idxprom11.i.i.us.us.4 = sext i32 %add10.i.i.us.us.4 to i64 - %arrayidx12.i.i.us.us.4 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.4 - %190 = bitcast float* %arrayidx12.i.i.us.us.4 to i32* - store i32 %189, i32* %190, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.4 - -if.end34.i.i.us.us.4: ; preds = %if.then8.i.i.us.us.4, %if.end34.i.i.us.us.3 - br i1 %cmp4.i.i.us.us.5, label %if.then8.i.i.us.us.5, label %if.end34.i.i.us.us.5 - -if.then8.i.i.us.us.5: ; preds = %if.end34.i.i.us.us.4 - %191 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.5 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.5 - %idxprom11.i.i.us.us.5 = sext i32 %add10.i.i.us.us.5 to i64 - %arrayidx12.i.i.us.us.5 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.5 - %192 = bitcast float* %arrayidx12.i.i.us.us.5 to i32* - store i32 %191, i32* %192, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.5 - -if.end34.i.i.us.us.5: ; preds = %if.then8.i.i.us.us.5, %if.end34.i.i.us.us.4 - br i1 %cmp4.i.i.us.us.6, label %if.then8.i.i.us.us.6, label %if.end34.i.i.us.us.6 - -if.then8.i.i.us.us.6: ; preds = %if.end34.i.i.us.us.5 - %193 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.6 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.6 - %idxprom11.i.i.us.us.6 = sext i32 %add10.i.i.us.us.6 to i64 - %arrayidx12.i.i.us.us.6 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.6 - %194 = bitcast float* %arrayidx12.i.i.us.us.6 to i32* - store i32 %193, i32* %194, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.6 - -if.end34.i.i.us.us.6: ; preds = %if.then8.i.i.us.us.6, %if.end34.i.i.us.us.5 - br i1 %cmp4.i.i.us.us.7, label %if.then8.i.i.us.us.7, label %if.end34.i.i.us.us.7 - -if.then8.i.i.us.us.7: ; preds = %if.end34.i.i.us.us.6 - %195 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.7 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.7 - %idxprom11.i.i.us.us.7 = sext i32 %add10.i.i.us.us.7 to i64 - %arrayidx12.i.i.us.us.7 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.7 - %196 = bitcast float* %arrayidx12.i.i.us.us.7 to i32* - store i32 %195, i32* %196, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.7 - -if.end34.i.i.us.us.7: ; preds = %if.then8.i.i.us.us.7, %if.end34.i.i.us.us.6 - br i1 %cmp4.i.i.us.us.8, label %if.then8.i.i.us.us.8, label %if.end34.i.i.us.us.8 - -if.then8.i.i.us.us.8: ; preds = %if.end34.i.i.us.us.7 - %197 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.8 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.8 - %idxprom11.i.i.us.us.8 = sext i32 %add10.i.i.us.us.8 to i64 - %arrayidx12.i.i.us.us.8 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.8 - %198 = bitcast float* %arrayidx12.i.i.us.us.8 to i32* - store i32 %197, i32* %198, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.8 - -if.end34.i.i.us.us.8: ; preds = %if.then8.i.i.us.us.8, %if.end34.i.i.us.us.7 - br i1 %cmp4.i.i.us.us.9, label %if.then8.i.i.us.us.9, label %if.end34.i.i.us.us.9 - -if.then8.i.i.us.us.9: ; preds = %if.end34.i.i.us.us.8 - %199 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.9 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.9 - %idxprom11.i.i.us.us.9 = sext i32 %add10.i.i.us.us.9 to i64 - %arrayidx12.i.i.us.us.9 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.9 - %200 = bitcast float* %arrayidx12.i.i.us.us.9 to i32* - store i32 %199, i32* %200, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.9 - -if.end34.i.i.us.us.9: ; preds = %if.then8.i.i.us.us.9, %if.end34.i.i.us.us.8 - br i1 %cmp4.i.i.us.us.10, label %if.then8.i.i.us.us.10, label %if.end34.i.i.us.us.10 - -if.then8.i.i.us.us.10: ; preds = %if.end34.i.i.us.us.9 - %201 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.10 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.10 - %idxprom11.i.i.us.us.10 = sext i32 %add10.i.i.us.us.10 to i64 - %arrayidx12.i.i.us.us.10 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.10 - %202 = bitcast float* %arrayidx12.i.i.us.us.10 to i32* - store i32 %201, i32* %202, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.10 - -if.end34.i.i.us.us.10: ; preds = %if.then8.i.i.us.us.10, %if.end34.i.i.us.us.9 - br i1 %cmp4.i.i.us.us.11, label %if.then8.i.i.us.us.11, label %if.end34.i.i.us.us.11 - -if.then8.i.i.us.us.11: ; preds = %if.end34.i.i.us.us.10 - %203 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.11 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.11 - %idxprom11.i.i.us.us.11 = sext i32 %add10.i.i.us.us.11 to i64 - %arrayidx12.i.i.us.us.11 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.11 - %204 = bitcast float* %arrayidx12.i.i.us.us.11 to i32* - store i32 %203, i32* %204, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.11 - -if.end34.i.i.us.us.11: ; preds = %if.then8.i.i.us.us.11, %if.end34.i.i.us.us.10 - br i1 %cmp4.i.i.us.us.12, label %if.then8.i.i.us.us.12, label %if.end34.i.i.us.us.12 - -if.then8.i.i.us.us.12: ; preds = %if.end34.i.i.us.us.11 - %205 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.12 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.12 - %idxprom11.i.i.us.us.12 = sext i32 %add10.i.i.us.us.12 to i64 - %arrayidx12.i.i.us.us.12 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.12 - %206 = bitcast float* %arrayidx12.i.i.us.us.12 to i32* - store i32 %205, i32* %206, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.12 - -if.end34.i.i.us.us.12: ; preds = %if.then8.i.i.us.us.12, %if.end34.i.i.us.us.11 - br i1 %cmp4.i.i.us.us.13, label %if.then8.i.i.us.us.13, label %if.end34.i.i.us.us.13 - -if.then8.i.i.us.us.13: ; preds = %if.end34.i.i.us.us.12 - %207 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.13 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.13 - %idxprom11.i.i.us.us.13 = sext i32 %add10.i.i.us.us.13 to i64 - %arrayidx12.i.i.us.us.13 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.13 - %208 = bitcast float* %arrayidx12.i.i.us.us.13 to i32* - store i32 %207, i32* %208, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.13 - -if.end34.i.i.us.us.13: ; preds = %if.then8.i.i.us.us.13, %if.end34.i.i.us.us.12 - br i1 %cmp4.i.i.us.us.14, label %if.then8.i.i.us.us.14, label %if.end34.i.i.us.us.14 - -if.then8.i.i.us.us.14: ; preds = %if.end34.i.i.us.us.13 - %209 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.14 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.14 - %idxprom11.i.i.us.us.14 = sext i32 %add10.i.i.us.us.14 to i64 - %arrayidx12.i.i.us.us.14 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.14 - %210 = bitcast float* %arrayidx12.i.i.us.us.14 to i32* - store i32 %209, i32* %210, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.14 - -if.end34.i.i.us.us.14: ; preds = %if.then8.i.i.us.us.14, %if.end34.i.i.us.us.13 - br i1 %cmp4.i.i.us.us.15, label %if.then8.i.i.us.us.15, label %if.end34.i.i.us.us.15 - -if.then8.i.i.us.us.15: ; preds = %if.end34.i.i.us.us.14 - %211 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.15 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.15 - %idxprom11.i.i.us.us.15 = sext i32 %add10.i.i.us.us.15 to i64 - %arrayidx12.i.i.us.us.15 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.15 - %212 = bitcast float* %arrayidx12.i.i.us.us.15 to i32* - store i32 %211, i32* %212, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.15 - -if.end34.i.i.us.us.15: ; preds = %if.then8.i.i.us.us.15, %if.end34.i.i.us.us.14 - br i1 %cmp4.i.i.us.us.16, label %if.then8.i.i.us.us.16, label %if.end34.i.i.us.us.16 - -if.then8.i.i.us.us.16: ; preds = %if.end34.i.i.us.us.15 - %213 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.16 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.16 - %idxprom11.i.i.us.us.16 = sext i32 %add10.i.i.us.us.16 to i64 - %arrayidx12.i.i.us.us.16 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.16 - %214 = bitcast float* %arrayidx12.i.i.us.us.16 to i32* - store i32 %213, i32* %214, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.16 - -if.end34.i.i.us.us.16: ; preds = %if.then8.i.i.us.us.16, %if.end34.i.i.us.us.15 - br i1 %cmp4.i.i.us.us.17, label %if.then8.i.i.us.us.17, label %if.end34.i.i.us.us.17 - -if.then8.i.i.us.us.17: ; preds = %if.end34.i.i.us.us.16 - %215 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.17 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.17 - %idxprom11.i.i.us.us.17 = sext i32 %add10.i.i.us.us.17 to i64 - %arrayidx12.i.i.us.us.17 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.17 - %216 = bitcast float* %arrayidx12.i.i.us.us.17 to i32* - store i32 %215, i32* %216, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.17 - -if.end34.i.i.us.us.17: ; preds = %if.then8.i.i.us.us.17, %if.end34.i.i.us.us.16 - br i1 %cmp4.i.i.us.us.18, label %if.then8.i.i.us.us.18, label %if.end34.i.i.us.us.18 - -if.then8.i.i.us.us.18: ; preds = %if.end34.i.i.us.us.17 - %217 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.18 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.18 - %idxprom11.i.i.us.us.18 = sext i32 %add10.i.i.us.us.18 to i64 - %arrayidx12.i.i.us.us.18 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.18 - %218 = bitcast float* %arrayidx12.i.i.us.us.18 to i32* - store i32 %217, i32* %218, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.18 - -if.end34.i.i.us.us.18: ; preds = %if.then8.i.i.us.us.18, %if.end34.i.i.us.us.17 - br i1 %cmp4.i.i.us.us.19, label %if.then8.i.i.us.us.19, label %if.end34.i.i.us.us.19 - -if.then8.i.i.us.us.19: ; preds = %if.end34.i.i.us.us.18 - %219 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.19 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.19 - %idxprom11.i.i.us.us.19 = sext i32 %add10.i.i.us.us.19 to i64 - %arrayidx12.i.i.us.us.19 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.19 - %220 = bitcast float* %arrayidx12.i.i.us.us.19 to i32* - store i32 %219, i32* %220, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.19 - -if.end34.i.i.us.us.19: ; preds = %if.then8.i.i.us.us.19, %if.end34.i.i.us.us.18 - br i1 %cmp4.i.i.us.us.20, label %if.then8.i.i.us.us.20, label %if.end34.i.i.us.us.20 - -if.then8.i.i.us.us.20: ; preds = %if.end34.i.i.us.us.19 - %221 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.20 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.20 - %idxprom11.i.i.us.us.20 = sext i32 %add10.i.i.us.us.20 to i64 - %arrayidx12.i.i.us.us.20 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.20 - %222 = bitcast float* %arrayidx12.i.i.us.us.20 to i32* - store i32 %221, i32* %222, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.20 - -if.end34.i.i.us.us.20: ; preds = %if.then8.i.i.us.us.20, %if.end34.i.i.us.us.19 - br i1 %cmp4.i.i.us.us.21, label %if.then8.i.i.us.us.21, label %if.end34.i.i.us.us.21 - -if.then8.i.i.us.us.21: ; preds = %if.end34.i.i.us.us.20 - %223 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.21 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.21 - %idxprom11.i.i.us.us.21 = sext i32 %add10.i.i.us.us.21 to i64 - %arrayidx12.i.i.us.us.21 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.21 - %224 = bitcast float* %arrayidx12.i.i.us.us.21 to i32* - store i32 %223, i32* %224, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.21 - -if.end34.i.i.us.us.21: ; preds = %if.then8.i.i.us.us.21, %if.end34.i.i.us.us.20 - br i1 %cmp4.i.i.us.us.22, label %if.then8.i.i.us.us.22, label %if.end34.i.i.us.us.22 - -if.then8.i.i.us.us.22: ; preds = %if.end34.i.i.us.us.21 - %225 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.22 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.22 - %idxprom11.i.i.us.us.22 = sext i32 %add10.i.i.us.us.22 to i64 - %arrayidx12.i.i.us.us.22 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.22 - %226 = bitcast float* %arrayidx12.i.i.us.us.22 to i32* - store i32 %225, i32* %226, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.22 - -if.end34.i.i.us.us.22: ; preds = %if.then8.i.i.us.us.22, %if.end34.i.i.us.us.21 - br i1 %cmp4.i.i.us.us.23, label %if.then8.i.i.us.us.23, label %if.end34.i.i.us.us.23 - -if.then8.i.i.us.us.23: ; preds = %if.end34.i.i.us.us.22 - %227 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.23 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.23 - %idxprom11.i.i.us.us.23 = sext i32 %add10.i.i.us.us.23 to i64 - %arrayidx12.i.i.us.us.23 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.23 - %228 = bitcast float* %arrayidx12.i.i.us.us.23 to i32* - store i32 %227, i32* %228, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.23 - -if.end34.i.i.us.us.23: ; preds = %if.then8.i.i.us.us.23, %if.end34.i.i.us.us.22 - br i1 %cmp4.i.i.us.us.24, label %if.then8.i.i.us.us.24, label %if.end34.i.i.us.us.24 - -if.then8.i.i.us.us.24: ; preds = %if.end34.i.i.us.us.23 - %229 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.24 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.24 - %idxprom11.i.i.us.us.24 = sext i32 %add10.i.i.us.us.24 to i64 - %arrayidx12.i.i.us.us.24 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.24 - %230 = bitcast float* %arrayidx12.i.i.us.us.24 to i32* - store i32 %229, i32* %230, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.24 - -if.end34.i.i.us.us.24: ; preds = %if.then8.i.i.us.us.24, %if.end34.i.i.us.us.23 - br i1 %cmp4.i.i.us.us.25, label %if.then8.i.i.us.us.25, label %if.end34.i.i.us.us.25 - -if.then8.i.i.us.us.25: ; preds = %if.end34.i.i.us.us.24 - %231 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.25 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.25 - %idxprom11.i.i.us.us.25 = sext i32 %add10.i.i.us.us.25 to i64 - %arrayidx12.i.i.us.us.25 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.25 - %232 = bitcast float* %arrayidx12.i.i.us.us.25 to i32* - store i32 %231, i32* %232, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.25 - -if.end34.i.i.us.us.25: ; preds = %if.then8.i.i.us.us.25, %if.end34.i.i.us.us.24 - br i1 %cmp4.i.i.us.us.26, label %if.then8.i.i.us.us.26, label %if.end34.i.i.us.us.26 - -if.then8.i.i.us.us.26: ; preds = %if.end34.i.i.us.us.25 - %233 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.26 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.26 - %idxprom11.i.i.us.us.26 = sext i32 %add10.i.i.us.us.26 to i64 - %arrayidx12.i.i.us.us.26 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.26 - %234 = bitcast float* %arrayidx12.i.i.us.us.26 to i32* - store i32 %233, i32* %234, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.26 - -if.end34.i.i.us.us.26: ; preds = %if.then8.i.i.us.us.26, %if.end34.i.i.us.us.25 - br i1 %cmp4.i.i.us.us.27, label %if.then8.i.i.us.us.27, label %if.end34.i.i.us.us.27 - -if.then8.i.i.us.us.27: ; preds = %if.end34.i.i.us.us.26 - %235 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.27 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.27 - %idxprom11.i.i.us.us.27 = sext i32 %add10.i.i.us.us.27 to i64 - %arrayidx12.i.i.us.us.27 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.27 - %236 = bitcast float* %arrayidx12.i.i.us.us.27 to i32* - store i32 %235, i32* %236, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.27 - -if.end34.i.i.us.us.27: ; preds = %if.then8.i.i.us.us.27, %if.end34.i.i.us.us.26 - br i1 %cmp4.i.i.us.us.28, label %if.then8.i.i.us.us.28, label %if.end34.i.i.us.us.28 - -if.then8.i.i.us.us.28: ; preds = %if.end34.i.i.us.us.27 - %237 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.28 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.28 - %idxprom11.i.i.us.us.28 = sext i32 %add10.i.i.us.us.28 to i64 - %arrayidx12.i.i.us.us.28 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.28 - %238 = bitcast float* %arrayidx12.i.i.us.us.28 to i32* - store i32 %237, i32* %238, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.28 - -if.end34.i.i.us.us.28: ; preds = %if.then8.i.i.us.us.28, %if.end34.i.i.us.us.27 - br i1 %cmp4.i.i.us.us.29, label %if.then8.i.i.us.us.29, label %if.end34.i.i.us.us.29 - -if.then8.i.i.us.us.29: ; preds = %if.end34.i.i.us.us.28 - %239 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.29 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.29 - %idxprom11.i.i.us.us.29 = sext i32 %add10.i.i.us.us.29 to i64 - %arrayidx12.i.i.us.us.29 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.29 - %240 = bitcast float* %arrayidx12.i.i.us.us.29 to i32* - store i32 %239, i32* %240, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.29 - -if.end34.i.i.us.us.29: ; preds = %if.then8.i.i.us.us.29, %if.end34.i.i.us.us.28 - br i1 %cmp4.i.i.us.us.30, label %if.then8.i.i.us.us.30, label %if.end34.i.i.us.us.30 - -if.then8.i.i.us.us.30: ; preds = %if.end34.i.i.us.us.29 - %241 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.30 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.30 - %idxprom11.i.i.us.us.30 = sext i32 %add10.i.i.us.us.30 to i64 - %arrayidx12.i.i.us.us.30 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.30 - %242 = bitcast float* %arrayidx12.i.i.us.us.30 to i32* - store i32 %241, i32* %242, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.us.30 - -if.end34.i.i.us.us.30: ; preds = %if.then8.i.i.us.us.30, %if.end34.i.i.us.us.29 - br i1 %cmp4.i.i.us.us.31, label %if.then8.i.i.us.us.31, label %pregion_for_end.i.i - -if.then8.i.i.us.us.31: ; preds = %if.end34.i.i.us.us.30 - %243 = load i32, i32* %26, align 4, !tbaa !12 - %add10.i.i.us.us.31 = add nuw nsw i32 %mul.i.i, %conv.i.i.us.us.31 - %idxprom11.i.i.us.us.31 = sext i32 %add10.i.i.us.us.31 to i64 - %arrayidx12.i.i.us.us.31 = getelementptr inbounds float, float* %10, i64 %idxprom11.i.i.us.us.31 - %244 = bitcast float* %arrayidx12.i.i.us.us.31 to i32* - store i32 %243, i32* %244, align 4, !tbaa !12, !llvm.access.group !24 - br label %pregion_for_end.i.i - -if.else.i.i.us.1: ; preds = %if.end34.i.i.us - %add14.i.i.us.1 = add nsw i32 %mul.i.i, %conv.i.i.us.1 - %idxprom15.i.i.us.1 = sext i32 %add14.i.i.us.1 to i64 - %arrayidx16.i.i.us.1 = getelementptr inbounds float, float* %10, i64 %idxprom15.i.i.us.1 - %245 = load float, float* %arrayidx16.i.i.us.1, align 4, !tbaa !12 - %conv17.i.i.us.1 = fpext float %245 to double - %arrayidx21.i.i.us.1 = getelementptr inbounds float, float* %13, i64 %idxprom15.i.i.us.1 - %246 = load float, float* %arrayidx21.i.i.us.1, align 4, !tbaa !12 - %add23.i.i.us.1 = add nsw i32 %mul22.i.i, %conv.i.i.us.1 - %idxprom24.i.i.us.1 = sext i32 %add23.i.i.us.1 to i64 - %arrayidx25.i.i.us.1 = getelementptr inbounds float, float* %13, i64 %idxprom24.i.i.us.1 - %247 = load float, float* %arrayidx25.i.i.us.1, align 4, !tbaa !12 - %sub26.i.i.us.1 = fsub float %246, %247 - %conv27.i.i.us.1 = fpext float %sub26.i.i.us.1 to double - %248 = tail call double @llvm.fmuladd.f64(double %conv27.i.i.us.1, double -5.000000e-01, double %conv17.i.i.us.1) #5 - %conv29.i.i.us.1 = fptrunc double %248 to float - store float %conv29.i.i.us.1, float* %arrayidx16.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end34.i.i.us.1 - -if.end34.i.i.us.1: ; preds = %if.else.i.i.us.1, %if.end34.i.i.us - %249 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %249, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !49 -} - -; Function Attrs: argmemonly nounwind readonly willreturn -declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32 immarg, <8 x i1>, <8 x float>) #3 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <8 x double> @llvm.fmuladd.v8f64(<8 x double>, <8 x double>, <8 x double>) #0 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>) #4 - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nofree nounwind } -attributes #3 = { argmemonly nounwind readonly willreturn } -attributes #4 = { argmemonly nounwind willreturn } -attributes #5 = { nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 1, i32 0, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"int", !"int", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"float*", !"int", !"int", !"int"} -!9 = !{!"", !"", !"", !"", !"", !"", !""} -!10 = !{!"_fict_", !"ex", !"ey", !"hz", !"t", !"nx", !"ny"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{!17, !18} -!18 = distinct !{!18, !"LVerDomain"} -!19 = !{!20, !21} -!20 = distinct !{!20, !18} -!21 = distinct !{!21, !18} -!22 = !{!21} -!23 = !{!20} -!24 = !{!25, !26} -!25 = distinct !{} -!26 = distinct !{} -!27 = distinct !{!27, !28} -!28 = !{!"llvm.loop.parallel_accesses", !26} -!29 = distinct !{!29, !30, !31} -!30 = !{!"llvm.loop.parallel_accesses", !25} -!31 = !{!"llvm.loop.isvectorized", i32 1} -!32 = !{!33} -!33 = distinct !{!33, !34} -!34 = distinct !{!34, !"LVerDomain"} -!35 = !{!36, !37} -!36 = distinct !{!36, !34} -!37 = distinct !{!37, !34} -!38 = !{!37} -!39 = !{!36} -!40 = distinct !{!40, !30, !31} -!41 = !{!42} -!42 = distinct !{!42, !43} -!43 = distinct !{!43, !"LVerDomain"} -!44 = !{!45, !46} -!45 = distinct !{!45, !43} -!46 = distinct !{!46, !43} -!47 = !{!46} -!48 = !{!45} -!49 = distinct !{!49, !30, !31} diff --git a/pocl_irs/fdtd-2d_kernel2.ll b/pocl_irs/fdtd-2d_kernel2.ll deleted file mode 100644 index 2acd958..0000000 --- a/pocl_irs/fdtd-2d_kernel2.ll +++ /dev/null @@ -1,5715 +0,0 @@ -; ModuleID = './CE/HMLMAPAJJBLPBKGGCCBDEJJLIPIDBFCCIIFBD/fdtd_kernel2/32-8-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare double @llvm.fmuladd.f64(double, double, double) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_fdtd_kernel2(float* nocapture %0, float* nocapture readnone %1, float* nocapture readonly %2, i32 %3, i32 %4, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %5, i64 %6, i64 %7, i64 %8) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { -pregion_for_entry.pregion_for_init.i: - %mul.i.i = shl i64 %6, 5 - %mul3.i.i = shl i64 %7, 3 - %conv2.i = trunc i64 %mul3.i.i to i32 - %cmp.i = icmp slt i32 %conv2.i, %3 - %mul.i = mul nsw i32 %conv2.i, %4 - br i1 %cmp.i, label %vector.scevcheck, label %pregion_for_end.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i - %9 = trunc i64 %7 to i32 - %10 = mul i32 %9, %4 - %11 = shl i32 %10, 3 - %12 = trunc i64 %6 to i32 - %13 = shl i32 %12, 5 - %14 = add i32 %11, %13 - %15 = icmp sgt i32 %14, 2147483616 - %16 = add i32 %11, %13 - %17 = add i32 %16, -1 - %18 = add i32 %16, 30 - %19 = icmp slt i32 %18, %17 - %20 = or i1 %15, %19 - br i1 %20, label %pregion_for_entry.entry.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.us - -vector.memcheck: ; preds = %vector.scevcheck - %21 = trunc i64 %7 to i32 - %22 = mul i32 %21, %4 - %23 = shl i32 %22, 3 - %24 = trunc i64 %6 to i32 - %25 = shl i32 %24, 5 - %26 = add i32 %23, %25 - %27 = sext i32 %26 to i64 - %scevgep = getelementptr float, float* %0, i64 %27 - %28 = add nsw i64 %27, 32 - %scevgep7 = getelementptr float, float* %0, i64 %28 - %29 = add i32 %23, %25 - %30 = add i32 %29, -8 - %31 = sext i32 %30 to i64 - %32 = or i64 %31, 7 - %scevgep9 = getelementptr float, float* %2, i64 %32 - %33 = add nsw i64 %31, 39 - %scevgep11 = getelementptr float, float* %2, i64 %33 - %scevgep13 = getelementptr float, float* %2, i64 %27 - %scevgep15 = getelementptr float, float* %2, i64 %28 - %bound0 = icmp ult float* %scevgep, %scevgep11 - %bound1 = icmp ult float* %scevgep9, %scevgep7 - %found.conflict = and i1 %bound0, %bound1 - %bound017 = icmp ult float* %scevgep, %scevgep15 - %bound118 = icmp ult float* %scevgep13, %scevgep7 - %found.conflict19 = and i1 %bound017, %bound118 - %conflict.rdx = or i1 %found.conflict, %found.conflict19 - br i1 %conflict.rdx, label %pregion_for_entry.entry.i.us.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert20 = insertelement <8 x i32> undef, i32 %4, i32 0 - %broadcast.splat21 = shufflevector <8 x i32> %broadcast.splatinsert20, <8 x i32> undef, <8 x i32> zeroinitializer - %34 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %35 = or <8 x i32> %34, - %36 = icmp sgt <8 x i32> %broadcast.splat21, %35 - %37 = icmp sgt <8 x i32> %35, zeroinitializer - %38 = and <8 x i1> %36, %37 - %39 = extractelement <8 x i32> %35, i32 0 - %40 = add i32 %mul.i, %39 - %41 = sext i32 %40 to i64 - %42 = getelementptr inbounds float, float* %0, i64 %41 - %43 = bitcast float* %42 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %43, i32 4, <8 x i1> %38, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %44 = fpext <8 x float> %wide.masked.load to <8 x double> - %45 = getelementptr inbounds float, float* %2, i64 %41 - %46 = bitcast float* %45 to <8 x float>* - %wide.masked.load22 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %46, i32 4, <8 x i1> %38, <8 x float> undef), !tbaa !12, !alias.scope !22 - %47 = add i32 %40, -1 - %48 = sext i32 %47 to i64 - %49 = getelementptr inbounds float, float* %2, i64 %48 - %50 = bitcast float* %49 to <8 x float>* - %wide.masked.load23 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %50, i32 4, <8 x i1> %38, <8 x float> undef), !tbaa !12, !alias.scope !23 - %51 = fsub <8 x float> %wide.masked.load22, %wide.masked.load23 - %52 = fpext <8 x float> %51 to <8 x double> - %53 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %52, <8 x double> , <8 x double> %44) - %54 = fptrunc <8 x double> %53 to <8 x float> - %55 = bitcast float* %42 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %54, <8 x float>* %55, i32 4, <8 x i1> %38), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !24 - %56 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %57 = or <8 x i32> %56, - %58 = icmp sgt <8 x i32> %broadcast.splat21, %57 - %59 = icmp sgt <8 x i32> %57, zeroinitializer - %60 = and <8 x i1> %58, %59 - %61 = extractelement <8 x i32> %57, i32 0 - %62 = add i32 %mul.i, %61 - %63 = sext i32 %62 to i64 - %64 = getelementptr inbounds float, float* %0, i64 %63 - %65 = bitcast float* %64 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %65, i32 4, <8 x i1> %60, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %66 = fpext <8 x float> %wide.masked.load.1 to <8 x double> - %67 = getelementptr inbounds float, float* %2, i64 %63 - %68 = bitcast float* %67 to <8 x float>* - %wide.masked.load22.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %68, i32 4, <8 x i1> %60, <8 x float> undef), !tbaa !12, !alias.scope !22 - %69 = add i32 %62, -1 - %70 = sext i32 %69 to i64 - %71 = getelementptr inbounds float, float* %2, i64 %70 - %72 = bitcast float* %71 to <8 x float>* - %wide.masked.load23.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %72, i32 4, <8 x i1> %60, <8 x float> undef), !tbaa !12, !alias.scope !23 - %73 = fsub <8 x float> %wide.masked.load22.1, %wide.masked.load23.1 - %74 = fpext <8 x float> %73 to <8 x double> - %75 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %74, <8 x double> , <8 x double> %66) - %76 = fptrunc <8 x double> %75 to <8 x float> - %77 = bitcast float* %64 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %76, <8 x float>* %77, i32 4, <8 x i1> %60), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !24 - %78 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %79 = or <8 x i32> %78, - %80 = icmp sgt <8 x i32> %broadcast.splat21, %79 - %81 = icmp sgt <8 x i32> %79, zeroinitializer - %82 = and <8 x i1> %80, %81 - %83 = extractelement <8 x i32> %79, i32 0 - %84 = add i32 %mul.i, %83 - %85 = sext i32 %84 to i64 - %86 = getelementptr inbounds float, float* %0, i64 %85 - %87 = bitcast float* %86 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %87, i32 4, <8 x i1> %82, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %88 = fpext <8 x float> %wide.masked.load.2 to <8 x double> - %89 = getelementptr inbounds float, float* %2, i64 %85 - %90 = bitcast float* %89 to <8 x float>* - %wide.masked.load22.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %90, i32 4, <8 x i1> %82, <8 x float> undef), !tbaa !12, !alias.scope !22 - %91 = add i32 %84, -1 - %92 = sext i32 %91 to i64 - %93 = getelementptr inbounds float, float* %2, i64 %92 - %94 = bitcast float* %93 to <8 x float>* - %wide.masked.load23.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %94, i32 4, <8 x i1> %82, <8 x float> undef), !tbaa !12, !alias.scope !23 - %95 = fsub <8 x float> %wide.masked.load22.2, %wide.masked.load23.2 - %96 = fpext <8 x float> %95 to <8 x double> - %97 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %96, <8 x double> , <8 x double> %88) - %98 = fptrunc <8 x double> %97 to <8 x float> - %99 = bitcast float* %86 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %98, <8 x float>* %99, i32 4, <8 x i1> %82), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !24 - %100 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %101 = or <8 x i32> %100, - %102 = icmp sgt <8 x i32> %broadcast.splat21, %101 - %103 = icmp sgt <8 x i32> %101, zeroinitializer - %104 = and <8 x i1> %102, %103 - %105 = extractelement <8 x i32> %101, i32 0 - %106 = add i32 %mul.i, %105 - %107 = sext i32 %106 to i64 - %108 = getelementptr inbounds float, float* %0, i64 %107 - %109 = bitcast float* %108 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %109, i32 4, <8 x i1> %104, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %110 = fpext <8 x float> %wide.masked.load.3 to <8 x double> - %111 = getelementptr inbounds float, float* %2, i64 %107 - %112 = bitcast float* %111 to <8 x float>* - %wide.masked.load22.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %112, i32 4, <8 x i1> %104, <8 x float> undef), !tbaa !12, !alias.scope !22 - %113 = add i32 %106, -1 - %114 = sext i32 %113 to i64 - %115 = getelementptr inbounds float, float* %2, i64 %114 - %116 = bitcast float* %115 to <8 x float>* - %wide.masked.load23.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %116, i32 4, <8 x i1> %104, <8 x float> undef), !tbaa !12, !alias.scope !23 - %117 = fsub <8 x float> %wide.masked.load22.3, %wide.masked.load23.3 - %118 = fpext <8 x float> %117 to <8 x double> - %119 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %118, <8 x double> , <8 x double> %110) - %120 = fptrunc <8 x double> %119 to <8 x float> - %121 = bitcast float* %108 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %120, <8 x float>* %121, i32 4, <8 x i1> %104), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !24 - br label %pregion_for_end.i - -pregion_for_entry.entry.i.us: ; preds = %if.end.i.us.1380, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ 0, %pregion_for_entry.entry.i.us.preheader ], [ %967, %if.end.i.us.1380 ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp4.i.us = icmp slt i32 %conv.i.us, %4 - %cmp7.i.us = icmp sgt i32 %conv.i.us, 0 - %or.cond.i.us = and i1 %cmp4.i.us, %cmp7.i.us - br i1 %or.cond.i.us, label %if.then.i.us, label %if.end.i.us - -if.then.i.us: ; preds = %pregion_for_entry.entry.i.us - %add.i.us = add i32 %mul.i, %conv.i.us - %idxprom.i.us = sext i32 %add.i.us to i64 - %arrayidx.i.us = getelementptr inbounds float, float* %0, i64 %idxprom.i.us - %122 = load float, float* %arrayidx.i.us, align 4, !tbaa !12 - %conv9.i.us = fpext float %122 to double - %arrayidx13.i.us = getelementptr inbounds float, float* %2, i64 %idxprom.i.us - %123 = load float, float* %arrayidx13.i.us, align 4, !tbaa !12 - %add15.i.us = add i32 %add.i.us, -1 - %idxprom16.i.us = sext i32 %add15.i.us to i64 - %arrayidx17.i.us = getelementptr inbounds float, float* %2, i64 %idxprom16.i.us - %124 = load float, float* %arrayidx17.i.us, align 4, !tbaa !12 - %sub18.i.us = fsub float %123, %124 - %conv19.i.us = fpext float %sub18.i.us to double - %125 = tail call double @llvm.fmuladd.f64(double %conv19.i.us, double -5.000000e-01, double %conv9.i.us) #5 - %conv21.i.us = fptrunc double %125 to float - store float %conv21.i.us, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.us - -if.end.i.us: ; preds = %if.then.i.us, %pregion_for_entry.entry.i.us - %126 = or i64 %_local_id_x.0.us, 1 - %add1.i.i.us.1362 = add nuw nsw i64 %126, %mul.i.i - %conv.i.us.1363 = trunc i64 %add1.i.i.us.1362 to i32 - %cmp4.i.us.1364 = icmp slt i32 %conv.i.us.1363, %4 - %cmp7.i.us.1365 = icmp sgt i32 %conv.i.us.1363, 0 - %or.cond.i.us.1366 = and i1 %cmp4.i.us.1364, %cmp7.i.us.1365 - br i1 %or.cond.i.us.1366, label %if.then.i.us.1379, label %if.end.i.us.1380 - -pregion_for_end.i.loopexit: ; preds = %if.end.i.us.1380 - br label %pregion_for_end.i - -pregion_for_end.i: ; preds = %pregion_for_end.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i - %127 = trunc i64 %mul3.i.i to i32 - %conv2.i.1 = or i32 %127, 1 - %cmp.i.1 = icmp slt i32 %conv2.i.1, %3 - %mul.i.1 = mul nsw i32 %conv2.i.1, %4 - br i1 %cmp.i.1, label %vector.scevcheck34, label %pregion_for_end.i.1 - -vector.scevcheck34: ; preds = %pregion_for_end.i - %128 = mul i32 %conv2.i.1, %4 - %129 = trunc i64 %6 to i32 - %130 = shl i32 %129, 5 - %131 = add i32 %128, %130 - %132 = icmp sgt i32 %131, 2147483616 - %133 = add i32 %128, %130 - %134 = add i32 %133, -1 - %135 = add i32 %133, 30 - %136 = icmp slt i32 %135, %134 - %137 = or i1 %132, %136 - br i1 %137, label %pregion_for_entry.entry.i.us.1.preheader, label %vector.memcheck56 - -pregion_for_entry.entry.i.us.1.preheader: ; preds = %vector.memcheck56, %vector.scevcheck34 - br label %pregion_for_entry.entry.i.us.1 - -vector.memcheck56: ; preds = %vector.scevcheck34 - %138 = mul i32 %conv2.i.1, %4 - %139 = trunc i64 %6 to i32 - %140 = shl i32 %139, 5 - %141 = add i32 %138, %140 - %142 = sext i32 %141 to i64 - %scevgep36 = getelementptr float, float* %0, i64 %142 - %143 = add nsw i64 %142, 32 - %scevgep38 = getelementptr float, float* %0, i64 %143 - %144 = add i32 %138, %140 - %145 = add i32 %144, -1 - %146 = sext i32 %145 to i64 - %scevgep40 = getelementptr float, float* %2, i64 %146 - %147 = add nsw i64 %146, 32 - %scevgep42 = getelementptr float, float* %2, i64 %147 - %scevgep44 = getelementptr float, float* %2, i64 %142 - %scevgep46 = getelementptr float, float* %2, i64 %143 - %bound048 = icmp ult float* %scevgep36, %scevgep42 - %bound149 = icmp ult float* %scevgep40, %scevgep38 - %found.conflict50 = and i1 %bound048, %bound149 - %bound051 = icmp ult float* %scevgep36, %scevgep46 - %bound152 = icmp ult float* %scevgep44, %scevgep38 - %found.conflict53 = and i1 %bound051, %bound152 - %conflict.rdx54 = or i1 %found.conflict50, %found.conflict53 - br i1 %conflict.rdx54, label %pregion_for_entry.entry.i.us.1.preheader, label %vector.ph57 - -vector.ph57: ; preds = %vector.memcheck56 - %broadcast.splatinsert64 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat65 = shufflevector <8 x i64> %broadcast.splatinsert64, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert66 = insertelement <8 x i32> undef, i32 %4, i32 0 - %broadcast.splat67 = shufflevector <8 x i32> %broadcast.splatinsert66, <8 x i32> undef, <8 x i32> zeroinitializer - %148 = trunc <8 x i64> %broadcast.splat65 to <8 x i32> - %149 = or <8 x i32> %148, - %150 = icmp sgt <8 x i32> %broadcast.splat67, %149 - %151 = icmp sgt <8 x i32> %149, zeroinitializer - %152 = and <8 x i1> %150, %151 - %153 = extractelement <8 x i32> %149, i32 0 - %154 = add i32 %mul.i.1, %153 - %155 = sext i32 %154 to i64 - %156 = getelementptr inbounds float, float* %0, i64 %155 - %157 = bitcast float* %156 to <8 x float>* - %wide.masked.load68 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %157, i32 4, <8 x i1> %152, <8 x float> undef), !tbaa !12, !alias.scope !27, !noalias !30 - %158 = fpext <8 x float> %wide.masked.load68 to <8 x double> - %159 = getelementptr inbounds float, float* %2, i64 %155 - %160 = bitcast float* %159 to <8 x float>* - %wide.masked.load69 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %160, i32 4, <8 x i1> %152, <8 x float> undef), !tbaa !12, !alias.scope !33 - %161 = add i32 %154, -1 - %162 = sext i32 %161 to i64 - %163 = getelementptr inbounds float, float* %2, i64 %162 - %164 = bitcast float* %163 to <8 x float>* - %wide.masked.load70 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %164, i32 4, <8 x i1> %152, <8 x float> undef), !tbaa !12, !alias.scope !34 - %165 = fsub <8 x float> %wide.masked.load69, %wide.masked.load70 - %166 = fpext <8 x float> %165 to <8 x double> - %167 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %166, <8 x double> , <8 x double> %158) - %168 = fptrunc <8 x double> %167 to <8 x float> - %169 = bitcast float* %156 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %168, <8 x float>* %169, i32 4, <8 x i1> %152), !tbaa !12, !alias.scope !27, !noalias !30, !llvm.access.group !24 - %170 = trunc <8 x i64> %broadcast.splat65 to <8 x i32> - %171 = or <8 x i32> %170, - %172 = icmp sgt <8 x i32> %broadcast.splat67, %171 - %173 = icmp sgt <8 x i32> %171, zeroinitializer - %174 = and <8 x i1> %172, %173 - %175 = extractelement <8 x i32> %171, i32 0 - %176 = add i32 %mul.i.1, %175 - %177 = sext i32 %176 to i64 - %178 = getelementptr inbounds float, float* %0, i64 %177 - %179 = bitcast float* %178 to <8 x float>* - %wide.masked.load68.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %179, i32 4, <8 x i1> %174, <8 x float> undef), !tbaa !12, !alias.scope !27, !noalias !30 - %180 = fpext <8 x float> %wide.masked.load68.1 to <8 x double> - %181 = getelementptr inbounds float, float* %2, i64 %177 - %182 = bitcast float* %181 to <8 x float>* - %wide.masked.load69.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %182, i32 4, <8 x i1> %174, <8 x float> undef), !tbaa !12, !alias.scope !33 - %183 = add i32 %176, -1 - %184 = sext i32 %183 to i64 - %185 = getelementptr inbounds float, float* %2, i64 %184 - %186 = bitcast float* %185 to <8 x float>* - %wide.masked.load70.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %186, i32 4, <8 x i1> %174, <8 x float> undef), !tbaa !12, !alias.scope !34 - %187 = fsub <8 x float> %wide.masked.load69.1, %wide.masked.load70.1 - %188 = fpext <8 x float> %187 to <8 x double> - %189 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %188, <8 x double> , <8 x double> %180) - %190 = fptrunc <8 x double> %189 to <8 x float> - %191 = bitcast float* %178 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %190, <8 x float>* %191, i32 4, <8 x i1> %174), !tbaa !12, !alias.scope !27, !noalias !30, !llvm.access.group !24 - %192 = trunc <8 x i64> %broadcast.splat65 to <8 x i32> - %193 = or <8 x i32> %192, - %194 = icmp sgt <8 x i32> %broadcast.splat67, %193 - %195 = icmp sgt <8 x i32> %193, zeroinitializer - %196 = and <8 x i1> %194, %195 - %197 = extractelement <8 x i32> %193, i32 0 - %198 = add i32 %mul.i.1, %197 - %199 = sext i32 %198 to i64 - %200 = getelementptr inbounds float, float* %0, i64 %199 - %201 = bitcast float* %200 to <8 x float>* - %wide.masked.load68.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %201, i32 4, <8 x i1> %196, <8 x float> undef), !tbaa !12, !alias.scope !27, !noalias !30 - %202 = fpext <8 x float> %wide.masked.load68.2 to <8 x double> - %203 = getelementptr inbounds float, float* %2, i64 %199 - %204 = bitcast float* %203 to <8 x float>* - %wide.masked.load69.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %204, i32 4, <8 x i1> %196, <8 x float> undef), !tbaa !12, !alias.scope !33 - %205 = add i32 %198, -1 - %206 = sext i32 %205 to i64 - %207 = getelementptr inbounds float, float* %2, i64 %206 - %208 = bitcast float* %207 to <8 x float>* - %wide.masked.load70.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %208, i32 4, <8 x i1> %196, <8 x float> undef), !tbaa !12, !alias.scope !34 - %209 = fsub <8 x float> %wide.masked.load69.2, %wide.masked.load70.2 - %210 = fpext <8 x float> %209 to <8 x double> - %211 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %210, <8 x double> , <8 x double> %202) - %212 = fptrunc <8 x double> %211 to <8 x float> - %213 = bitcast float* %200 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %212, <8 x float>* %213, i32 4, <8 x i1> %196), !tbaa !12, !alias.scope !27, !noalias !30, !llvm.access.group !24 - %214 = trunc <8 x i64> %broadcast.splat65 to <8 x i32> - %215 = or <8 x i32> %214, - %216 = icmp sgt <8 x i32> %broadcast.splat67, %215 - %217 = icmp sgt <8 x i32> %215, zeroinitializer - %218 = and <8 x i1> %216, %217 - %219 = extractelement <8 x i32> %215, i32 0 - %220 = add i32 %mul.i.1, %219 - %221 = sext i32 %220 to i64 - %222 = getelementptr inbounds float, float* %0, i64 %221 - %223 = bitcast float* %222 to <8 x float>* - %wide.masked.load68.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %223, i32 4, <8 x i1> %218, <8 x float> undef), !tbaa !12, !alias.scope !27, !noalias !30 - %224 = fpext <8 x float> %wide.masked.load68.3 to <8 x double> - %225 = getelementptr inbounds float, float* %2, i64 %221 - %226 = bitcast float* %225 to <8 x float>* - %wide.masked.load69.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %226, i32 4, <8 x i1> %218, <8 x float> undef), !tbaa !12, !alias.scope !33 - %227 = add i32 %220, -1 - %228 = sext i32 %227 to i64 - %229 = getelementptr inbounds float, float* %2, i64 %228 - %230 = bitcast float* %229 to <8 x float>* - %wide.masked.load70.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %230, i32 4, <8 x i1> %218, <8 x float> undef), !tbaa !12, !alias.scope !34 - %231 = fsub <8 x float> %wide.masked.load69.3, %wide.masked.load70.3 - %232 = fpext <8 x float> %231 to <8 x double> - %233 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %232, <8 x double> , <8 x double> %224) - %234 = fptrunc <8 x double> %233 to <8 x float> - %235 = bitcast float* %222 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %234, <8 x float>* %235, i32 4, <8 x i1> %218), !tbaa !12, !alias.scope !27, !noalias !30, !llvm.access.group !24 - br label %pregion_for_end.i.1 - -pregion_for_entry.entry.i.us.1: ; preds = %if.end.i.us.1.1, %pregion_for_entry.entry.i.us.1.preheader - %_local_id_x.0.us.1 = phi i64 [ 0, %pregion_for_entry.entry.i.us.1.preheader ], [ %962, %if.end.i.us.1.1 ] - %add1.i.i.us.1 = add nuw nsw i64 %_local_id_x.0.us.1, %mul.i.i - %conv.i.us.1 = trunc i64 %add1.i.i.us.1 to i32 - %cmp4.i.us.1 = icmp slt i32 %conv.i.us.1, %4 - %cmp7.i.us.1 = icmp sgt i32 %conv.i.us.1, 0 - %or.cond.i.us.1 = and i1 %cmp4.i.us.1, %cmp7.i.us.1 - br i1 %or.cond.i.us.1, label %if.then.i.us.1, label %if.end.i.us.1 - -if.then.i.us.1: ; preds = %pregion_for_entry.entry.i.us.1 - %add.i.us.1 = add i32 %mul.i.1, %conv.i.us.1 - %idxprom.i.us.1 = sext i32 %add.i.us.1 to i64 - %arrayidx.i.us.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.1 - %236 = load float, float* %arrayidx.i.us.1, align 4, !tbaa !12 - %conv9.i.us.1 = fpext float %236 to double - %arrayidx13.i.us.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.1 - %237 = load float, float* %arrayidx13.i.us.1, align 4, !tbaa !12 - %add15.i.us.1 = add i32 %add.i.us.1, -1 - %idxprom16.i.us.1 = sext i32 %add15.i.us.1 to i64 - %arrayidx17.i.us.1 = getelementptr inbounds float, float* %2, i64 %idxprom16.i.us.1 - %238 = load float, float* %arrayidx17.i.us.1, align 4, !tbaa !12 - %sub18.i.us.1 = fsub float %237, %238 - %conv19.i.us.1 = fpext float %sub18.i.us.1 to double - %239 = tail call double @llvm.fmuladd.f64(double %conv19.i.us.1, double -5.000000e-01, double %conv9.i.us.1) #5 - %conv21.i.us.1 = fptrunc double %239 to float - store float %conv21.i.us.1, float* %arrayidx.i.us.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.us.1 - -if.end.i.us.1: ; preds = %if.then.i.us.1, %pregion_for_entry.entry.i.us.1 - %240 = or i64 %_local_id_x.0.us.1, 1 - %add1.i.i.us.1.1 = add nuw nsw i64 %240, %mul.i.i - %conv.i.us.1.1 = trunc i64 %add1.i.i.us.1.1 to i32 - %cmp4.i.us.1.1 = icmp slt i32 %conv.i.us.1.1, %4 - %cmp7.i.us.1.1 = icmp sgt i32 %conv.i.us.1.1, 0 - %or.cond.i.us.1.1 = and i1 %cmp4.i.us.1.1, %cmp7.i.us.1.1 - br i1 %or.cond.i.us.1.1, label %if.then.i.us.1.1, label %if.end.i.us.1.1 - -pregion_for_end.i.1.loopexit: ; preds = %if.end.i.us.1.1 - br label %pregion_for_end.i.1 - -pregion_for_end.i.1: ; preds = %pregion_for_end.i.1.loopexit, %vector.ph57, %pregion_for_end.i - %241 = trunc i64 %mul3.i.i to i32 - %conv2.i.2 = or i32 %241, 2 - %cmp.i.2 = icmp slt i32 %conv2.i.2, %3 - %mul.i.2 = mul nsw i32 %conv2.i.2, %4 - br i1 %cmp.i.2, label %vector.scevcheck81, label %pregion_for_end.i.2 - -vector.scevcheck81: ; preds = %pregion_for_end.i.1 - %242 = mul i32 %conv2.i.2, %4 - %243 = trunc i64 %6 to i32 - %244 = shl i32 %243, 5 - %245 = add i32 %242, %244 - %246 = icmp sgt i32 %245, 2147483616 - %247 = add i32 %242, %244 - %248 = add i32 %247, -1 - %249 = add i32 %247, 30 - %250 = icmp slt i32 %249, %248 - %251 = or i1 %246, %250 - br i1 %251, label %pregion_for_entry.entry.i.us.2.preheader, label %vector.memcheck103 - -pregion_for_entry.entry.i.us.2.preheader: ; preds = %vector.memcheck103, %vector.scevcheck81 - br label %pregion_for_entry.entry.i.us.2 - -vector.memcheck103: ; preds = %vector.scevcheck81 - %252 = mul i32 %conv2.i.2, %4 - %253 = trunc i64 %6 to i32 - %254 = shl i32 %253, 5 - %255 = add i32 %252, %254 - %256 = sext i32 %255 to i64 - %scevgep83 = getelementptr float, float* %0, i64 %256 - %257 = add nsw i64 %256, 32 - %scevgep85 = getelementptr float, float* %0, i64 %257 - %258 = add i32 %252, %254 - %259 = add i32 %258, -2 - %260 = sext i32 %259 to i64 - %261 = add nuw nsw i64 %260, 1 - %scevgep87 = getelementptr float, float* %2, i64 %261 - %262 = add nsw i64 %260, 33 - %scevgep89 = getelementptr float, float* %2, i64 %262 - %scevgep91 = getelementptr float, float* %2, i64 %256 - %scevgep93 = getelementptr float, float* %2, i64 %257 - %bound095 = icmp ult float* %scevgep83, %scevgep89 - %bound196 = icmp ult float* %scevgep87, %scevgep85 - %found.conflict97 = and i1 %bound095, %bound196 - %bound098 = icmp ult float* %scevgep83, %scevgep93 - %bound199 = icmp ult float* %scevgep91, %scevgep85 - %found.conflict100 = and i1 %bound098, %bound199 - %conflict.rdx101 = or i1 %found.conflict97, %found.conflict100 - br i1 %conflict.rdx101, label %pregion_for_entry.entry.i.us.2.preheader, label %vector.ph104 - -vector.ph104: ; preds = %vector.memcheck103 - %broadcast.splatinsert111 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat112 = shufflevector <8 x i64> %broadcast.splatinsert111, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert113 = insertelement <8 x i32> undef, i32 %4, i32 0 - %broadcast.splat114 = shufflevector <8 x i32> %broadcast.splatinsert113, <8 x i32> undef, <8 x i32> zeroinitializer - %263 = trunc <8 x i64> %broadcast.splat112 to <8 x i32> - %264 = or <8 x i32> %263, - %265 = icmp sgt <8 x i32> %broadcast.splat114, %264 - %266 = icmp sgt <8 x i32> %264, zeroinitializer - %267 = and <8 x i1> %265, %266 - %268 = extractelement <8 x i32> %264, i32 0 - %269 = add i32 %mul.i.2, %268 - %270 = sext i32 %269 to i64 - %271 = getelementptr inbounds float, float* %0, i64 %270 - %272 = bitcast float* %271 to <8 x float>* - %wide.masked.load115 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %272, i32 4, <8 x i1> %267, <8 x float> undef), !tbaa !12, !alias.scope !35, !noalias !38 - %273 = fpext <8 x float> %wide.masked.load115 to <8 x double> - %274 = getelementptr inbounds float, float* %2, i64 %270 - %275 = bitcast float* %274 to <8 x float>* - %wide.masked.load116 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %275, i32 4, <8 x i1> %267, <8 x float> undef), !tbaa !12, !alias.scope !41 - %276 = add i32 %269, -1 - %277 = sext i32 %276 to i64 - %278 = getelementptr inbounds float, float* %2, i64 %277 - %279 = bitcast float* %278 to <8 x float>* - %wide.masked.load117 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %279, i32 4, <8 x i1> %267, <8 x float> undef), !tbaa !12, !alias.scope !42 - %280 = fsub <8 x float> %wide.masked.load116, %wide.masked.load117 - %281 = fpext <8 x float> %280 to <8 x double> - %282 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %281, <8 x double> , <8 x double> %273) - %283 = fptrunc <8 x double> %282 to <8 x float> - %284 = bitcast float* %271 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %283, <8 x float>* %284, i32 4, <8 x i1> %267), !tbaa !12, !alias.scope !35, !noalias !38, !llvm.access.group !24 - %285 = trunc <8 x i64> %broadcast.splat112 to <8 x i32> - %286 = or <8 x i32> %285, - %287 = icmp sgt <8 x i32> %broadcast.splat114, %286 - %288 = icmp sgt <8 x i32> %286, zeroinitializer - %289 = and <8 x i1> %287, %288 - %290 = extractelement <8 x i32> %286, i32 0 - %291 = add i32 %mul.i.2, %290 - %292 = sext i32 %291 to i64 - %293 = getelementptr inbounds float, float* %0, i64 %292 - %294 = bitcast float* %293 to <8 x float>* - %wide.masked.load115.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %294, i32 4, <8 x i1> %289, <8 x float> undef), !tbaa !12, !alias.scope !35, !noalias !38 - %295 = fpext <8 x float> %wide.masked.load115.1 to <8 x double> - %296 = getelementptr inbounds float, float* %2, i64 %292 - %297 = bitcast float* %296 to <8 x float>* - %wide.masked.load116.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %297, i32 4, <8 x i1> %289, <8 x float> undef), !tbaa !12, !alias.scope !41 - %298 = add i32 %291, -1 - %299 = sext i32 %298 to i64 - %300 = getelementptr inbounds float, float* %2, i64 %299 - %301 = bitcast float* %300 to <8 x float>* - %wide.masked.load117.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %301, i32 4, <8 x i1> %289, <8 x float> undef), !tbaa !12, !alias.scope !42 - %302 = fsub <8 x float> %wide.masked.load116.1, %wide.masked.load117.1 - %303 = fpext <8 x float> %302 to <8 x double> - %304 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %303, <8 x double> , <8 x double> %295) - %305 = fptrunc <8 x double> %304 to <8 x float> - %306 = bitcast float* %293 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %305, <8 x float>* %306, i32 4, <8 x i1> %289), !tbaa !12, !alias.scope !35, !noalias !38, !llvm.access.group !24 - %307 = trunc <8 x i64> %broadcast.splat112 to <8 x i32> - %308 = or <8 x i32> %307, - %309 = icmp sgt <8 x i32> %broadcast.splat114, %308 - %310 = icmp sgt <8 x i32> %308, zeroinitializer - %311 = and <8 x i1> %309, %310 - %312 = extractelement <8 x i32> %308, i32 0 - %313 = add i32 %mul.i.2, %312 - %314 = sext i32 %313 to i64 - %315 = getelementptr inbounds float, float* %0, i64 %314 - %316 = bitcast float* %315 to <8 x float>* - %wide.masked.load115.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %316, i32 4, <8 x i1> %311, <8 x float> undef), !tbaa !12, !alias.scope !35, !noalias !38 - %317 = fpext <8 x float> %wide.masked.load115.2 to <8 x double> - %318 = getelementptr inbounds float, float* %2, i64 %314 - %319 = bitcast float* %318 to <8 x float>* - %wide.masked.load116.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %319, i32 4, <8 x i1> %311, <8 x float> undef), !tbaa !12, !alias.scope !41 - %320 = add i32 %313, -1 - %321 = sext i32 %320 to i64 - %322 = getelementptr inbounds float, float* %2, i64 %321 - %323 = bitcast float* %322 to <8 x float>* - %wide.masked.load117.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %323, i32 4, <8 x i1> %311, <8 x float> undef), !tbaa !12, !alias.scope !42 - %324 = fsub <8 x float> %wide.masked.load116.2, %wide.masked.load117.2 - %325 = fpext <8 x float> %324 to <8 x double> - %326 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %325, <8 x double> , <8 x double> %317) - %327 = fptrunc <8 x double> %326 to <8 x float> - %328 = bitcast float* %315 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %327, <8 x float>* %328, i32 4, <8 x i1> %311), !tbaa !12, !alias.scope !35, !noalias !38, !llvm.access.group !24 - %329 = trunc <8 x i64> %broadcast.splat112 to <8 x i32> - %330 = or <8 x i32> %329, - %331 = icmp sgt <8 x i32> %broadcast.splat114, %330 - %332 = icmp sgt <8 x i32> %330, zeroinitializer - %333 = and <8 x i1> %331, %332 - %334 = extractelement <8 x i32> %330, i32 0 - %335 = add i32 %mul.i.2, %334 - %336 = sext i32 %335 to i64 - %337 = getelementptr inbounds float, float* %0, i64 %336 - %338 = bitcast float* %337 to <8 x float>* - %wide.masked.load115.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %338, i32 4, <8 x i1> %333, <8 x float> undef), !tbaa !12, !alias.scope !35, !noalias !38 - %339 = fpext <8 x float> %wide.masked.load115.3 to <8 x double> - %340 = getelementptr inbounds float, float* %2, i64 %336 - %341 = bitcast float* %340 to <8 x float>* - %wide.masked.load116.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %341, i32 4, <8 x i1> %333, <8 x float> undef), !tbaa !12, !alias.scope !41 - %342 = add i32 %335, -1 - %343 = sext i32 %342 to i64 - %344 = getelementptr inbounds float, float* %2, i64 %343 - %345 = bitcast float* %344 to <8 x float>* - %wide.masked.load117.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %345, i32 4, <8 x i1> %333, <8 x float> undef), !tbaa !12, !alias.scope !42 - %346 = fsub <8 x float> %wide.masked.load116.3, %wide.masked.load117.3 - %347 = fpext <8 x float> %346 to <8 x double> - %348 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %347, <8 x double> , <8 x double> %339) - %349 = fptrunc <8 x double> %348 to <8 x float> - %350 = bitcast float* %337 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %349, <8 x float>* %350, i32 4, <8 x i1> %333), !tbaa !12, !alias.scope !35, !noalias !38, !llvm.access.group !24 - br label %pregion_for_end.i.2 - -pregion_for_entry.entry.i.us.2: ; preds = %if.end.i.us.2.1, %pregion_for_entry.entry.i.us.2.preheader - %_local_id_x.0.us.2 = phi i64 [ 0, %pregion_for_entry.entry.i.us.2.preheader ], [ %957, %if.end.i.us.2.1 ] - %add1.i.i.us.2 = add nuw nsw i64 %_local_id_x.0.us.2, %mul.i.i - %conv.i.us.2 = trunc i64 %add1.i.i.us.2 to i32 - %cmp4.i.us.2 = icmp slt i32 %conv.i.us.2, %4 - %cmp7.i.us.2 = icmp sgt i32 %conv.i.us.2, 0 - %or.cond.i.us.2 = and i1 %cmp4.i.us.2, %cmp7.i.us.2 - br i1 %or.cond.i.us.2, label %if.then.i.us.2, label %if.end.i.us.2 - -if.then.i.us.2: ; preds = %pregion_for_entry.entry.i.us.2 - %add.i.us.2 = add i32 %mul.i.2, %conv.i.us.2 - %idxprom.i.us.2 = sext i32 %add.i.us.2 to i64 - %arrayidx.i.us.2 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.2 - %351 = load float, float* %arrayidx.i.us.2, align 4, !tbaa !12 - %conv9.i.us.2 = fpext float %351 to double - %arrayidx13.i.us.2 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.2 - %352 = load float, float* %arrayidx13.i.us.2, align 4, !tbaa !12 - %add15.i.us.2 = add i32 %add.i.us.2, -1 - %idxprom16.i.us.2 = sext i32 %add15.i.us.2 to i64 - %arrayidx17.i.us.2 = getelementptr inbounds float, float* %2, i64 %idxprom16.i.us.2 - %353 = load float, float* %arrayidx17.i.us.2, align 4, !tbaa !12 - %sub18.i.us.2 = fsub float %352, %353 - %conv19.i.us.2 = fpext float %sub18.i.us.2 to double - %354 = tail call double @llvm.fmuladd.f64(double %conv19.i.us.2, double -5.000000e-01, double %conv9.i.us.2) #5 - %conv21.i.us.2 = fptrunc double %354 to float - store float %conv21.i.us.2, float* %arrayidx.i.us.2, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.us.2 - -if.end.i.us.2: ; preds = %if.then.i.us.2, %pregion_for_entry.entry.i.us.2 - %355 = or i64 %_local_id_x.0.us.2, 1 - %add1.i.i.us.2.1 = add nuw nsw i64 %355, %mul.i.i - %conv.i.us.2.1 = trunc i64 %add1.i.i.us.2.1 to i32 - %cmp4.i.us.2.1 = icmp slt i32 %conv.i.us.2.1, %4 - %cmp7.i.us.2.1 = icmp sgt i32 %conv.i.us.2.1, 0 - %or.cond.i.us.2.1 = and i1 %cmp4.i.us.2.1, %cmp7.i.us.2.1 - br i1 %or.cond.i.us.2.1, label %if.then.i.us.2.1, label %if.end.i.us.2.1 - -pregion_for_end.i.2.loopexit: ; preds = %if.end.i.us.2.1 - br label %pregion_for_end.i.2 - -pregion_for_end.i.2: ; preds = %pregion_for_end.i.2.loopexit, %vector.ph104, %pregion_for_end.i.1 - %356 = trunc i64 %mul3.i.i to i32 - %conv2.i.3 = or i32 %356, 3 - %cmp.i.3 = icmp slt i32 %conv2.i.3, %3 - %mul.i.3 = mul nsw i32 %conv2.i.3, %4 - br i1 %cmp.i.3, label %vector.scevcheck128, label %pregion_for_end.i.3 - -vector.scevcheck128: ; preds = %pregion_for_end.i.2 - %357 = mul i32 %conv2.i.3, %4 - %358 = trunc i64 %6 to i32 - %359 = shl i32 %358, 5 - %360 = add i32 %357, %359 - %361 = icmp sgt i32 %360, 2147483616 - %362 = add i32 %357, %359 - %363 = add i32 %362, -1 - %364 = add i32 %362, 30 - %365 = icmp slt i32 %364, %363 - %366 = or i1 %361, %365 - br i1 %366, label %pregion_for_entry.entry.i.us.3.preheader, label %vector.memcheck150 - -pregion_for_entry.entry.i.us.3.preheader: ; preds = %vector.memcheck150, %vector.scevcheck128 - br label %pregion_for_entry.entry.i.us.3 - -vector.memcheck150: ; preds = %vector.scevcheck128 - %367 = mul i32 %conv2.i.3, %4 - %368 = trunc i64 %6 to i32 - %369 = shl i32 %368, 5 - %370 = add i32 %367, %369 - %371 = sext i32 %370 to i64 - %scevgep130 = getelementptr float, float* %0, i64 %371 - %372 = add nsw i64 %371, 32 - %scevgep132 = getelementptr float, float* %0, i64 %372 - %373 = add i32 %367, %369 - %374 = add i32 %373, -1 - %375 = sext i32 %374 to i64 - %scevgep134 = getelementptr float, float* %2, i64 %375 - %376 = add nsw i64 %375, 32 - %scevgep136 = getelementptr float, float* %2, i64 %376 - %scevgep138 = getelementptr float, float* %2, i64 %371 - %scevgep140 = getelementptr float, float* %2, i64 %372 - %bound0142 = icmp ult float* %scevgep130, %scevgep136 - %bound1143 = icmp ult float* %scevgep134, %scevgep132 - %found.conflict144 = and i1 %bound0142, %bound1143 - %bound0145 = icmp ult float* %scevgep130, %scevgep140 - %bound1146 = icmp ult float* %scevgep138, %scevgep132 - %found.conflict147 = and i1 %bound0145, %bound1146 - %conflict.rdx148 = or i1 %found.conflict144, %found.conflict147 - br i1 %conflict.rdx148, label %pregion_for_entry.entry.i.us.3.preheader, label %vector.ph151 - -vector.ph151: ; preds = %vector.memcheck150 - %broadcast.splatinsert158 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat159 = shufflevector <8 x i64> %broadcast.splatinsert158, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert160 = insertelement <8 x i32> undef, i32 %4, i32 0 - %broadcast.splat161 = shufflevector <8 x i32> %broadcast.splatinsert160, <8 x i32> undef, <8 x i32> zeroinitializer - %377 = trunc <8 x i64> %broadcast.splat159 to <8 x i32> - %378 = or <8 x i32> %377, - %379 = icmp sgt <8 x i32> %broadcast.splat161, %378 - %380 = icmp sgt <8 x i32> %378, zeroinitializer - %381 = and <8 x i1> %379, %380 - %382 = extractelement <8 x i32> %378, i32 0 - %383 = add i32 %mul.i.3, %382 - %384 = sext i32 %383 to i64 - %385 = getelementptr inbounds float, float* %0, i64 %384 - %386 = bitcast float* %385 to <8 x float>* - %wide.masked.load162 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %386, i32 4, <8 x i1> %381, <8 x float> undef), !tbaa !12, !alias.scope !43, !noalias !46 - %387 = fpext <8 x float> %wide.masked.load162 to <8 x double> - %388 = getelementptr inbounds float, float* %2, i64 %384 - %389 = bitcast float* %388 to <8 x float>* - %wide.masked.load163 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %389, i32 4, <8 x i1> %381, <8 x float> undef), !tbaa !12, !alias.scope !49 - %390 = add i32 %383, -1 - %391 = sext i32 %390 to i64 - %392 = getelementptr inbounds float, float* %2, i64 %391 - %393 = bitcast float* %392 to <8 x float>* - %wide.masked.load164 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %393, i32 4, <8 x i1> %381, <8 x float> undef), !tbaa !12, !alias.scope !50 - %394 = fsub <8 x float> %wide.masked.load163, %wide.masked.load164 - %395 = fpext <8 x float> %394 to <8 x double> - %396 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %395, <8 x double> , <8 x double> %387) - %397 = fptrunc <8 x double> %396 to <8 x float> - %398 = bitcast float* %385 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %397, <8 x float>* %398, i32 4, <8 x i1> %381), !tbaa !12, !alias.scope !43, !noalias !46, !llvm.access.group !24 - %399 = trunc <8 x i64> %broadcast.splat159 to <8 x i32> - %400 = or <8 x i32> %399, - %401 = icmp sgt <8 x i32> %broadcast.splat161, %400 - %402 = icmp sgt <8 x i32> %400, zeroinitializer - %403 = and <8 x i1> %401, %402 - %404 = extractelement <8 x i32> %400, i32 0 - %405 = add i32 %mul.i.3, %404 - %406 = sext i32 %405 to i64 - %407 = getelementptr inbounds float, float* %0, i64 %406 - %408 = bitcast float* %407 to <8 x float>* - %wide.masked.load162.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %408, i32 4, <8 x i1> %403, <8 x float> undef), !tbaa !12, !alias.scope !43, !noalias !46 - %409 = fpext <8 x float> %wide.masked.load162.1 to <8 x double> - %410 = getelementptr inbounds float, float* %2, i64 %406 - %411 = bitcast float* %410 to <8 x float>* - %wide.masked.load163.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %411, i32 4, <8 x i1> %403, <8 x float> undef), !tbaa !12, !alias.scope !49 - %412 = add i32 %405, -1 - %413 = sext i32 %412 to i64 - %414 = getelementptr inbounds float, float* %2, i64 %413 - %415 = bitcast float* %414 to <8 x float>* - %wide.masked.load164.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %415, i32 4, <8 x i1> %403, <8 x float> undef), !tbaa !12, !alias.scope !50 - %416 = fsub <8 x float> %wide.masked.load163.1, %wide.masked.load164.1 - %417 = fpext <8 x float> %416 to <8 x double> - %418 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %417, <8 x double> , <8 x double> %409) - %419 = fptrunc <8 x double> %418 to <8 x float> - %420 = bitcast float* %407 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %419, <8 x float>* %420, i32 4, <8 x i1> %403), !tbaa !12, !alias.scope !43, !noalias !46, !llvm.access.group !24 - %421 = trunc <8 x i64> %broadcast.splat159 to <8 x i32> - %422 = or <8 x i32> %421, - %423 = icmp sgt <8 x i32> %broadcast.splat161, %422 - %424 = icmp sgt <8 x i32> %422, zeroinitializer - %425 = and <8 x i1> %423, %424 - %426 = extractelement <8 x i32> %422, i32 0 - %427 = add i32 %mul.i.3, %426 - %428 = sext i32 %427 to i64 - %429 = getelementptr inbounds float, float* %0, i64 %428 - %430 = bitcast float* %429 to <8 x float>* - %wide.masked.load162.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %430, i32 4, <8 x i1> %425, <8 x float> undef), !tbaa !12, !alias.scope !43, !noalias !46 - %431 = fpext <8 x float> %wide.masked.load162.2 to <8 x double> - %432 = getelementptr inbounds float, float* %2, i64 %428 - %433 = bitcast float* %432 to <8 x float>* - %wide.masked.load163.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %433, i32 4, <8 x i1> %425, <8 x float> undef), !tbaa !12, !alias.scope !49 - %434 = add i32 %427, -1 - %435 = sext i32 %434 to i64 - %436 = getelementptr inbounds float, float* %2, i64 %435 - %437 = bitcast float* %436 to <8 x float>* - %wide.masked.load164.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %437, i32 4, <8 x i1> %425, <8 x float> undef), !tbaa !12, !alias.scope !50 - %438 = fsub <8 x float> %wide.masked.load163.2, %wide.masked.load164.2 - %439 = fpext <8 x float> %438 to <8 x double> - %440 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %439, <8 x double> , <8 x double> %431) - %441 = fptrunc <8 x double> %440 to <8 x float> - %442 = bitcast float* %429 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %441, <8 x float>* %442, i32 4, <8 x i1> %425), !tbaa !12, !alias.scope !43, !noalias !46, !llvm.access.group !24 - %443 = trunc <8 x i64> %broadcast.splat159 to <8 x i32> - %444 = or <8 x i32> %443, - %445 = icmp sgt <8 x i32> %broadcast.splat161, %444 - %446 = icmp sgt <8 x i32> %444, zeroinitializer - %447 = and <8 x i1> %445, %446 - %448 = extractelement <8 x i32> %444, i32 0 - %449 = add i32 %mul.i.3, %448 - %450 = sext i32 %449 to i64 - %451 = getelementptr inbounds float, float* %0, i64 %450 - %452 = bitcast float* %451 to <8 x float>* - %wide.masked.load162.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %452, i32 4, <8 x i1> %447, <8 x float> undef), !tbaa !12, !alias.scope !43, !noalias !46 - %453 = fpext <8 x float> %wide.masked.load162.3 to <8 x double> - %454 = getelementptr inbounds float, float* %2, i64 %450 - %455 = bitcast float* %454 to <8 x float>* - %wide.masked.load163.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %455, i32 4, <8 x i1> %447, <8 x float> undef), !tbaa !12, !alias.scope !49 - %456 = add i32 %449, -1 - %457 = sext i32 %456 to i64 - %458 = getelementptr inbounds float, float* %2, i64 %457 - %459 = bitcast float* %458 to <8 x float>* - %wide.masked.load164.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %459, i32 4, <8 x i1> %447, <8 x float> undef), !tbaa !12, !alias.scope !50 - %460 = fsub <8 x float> %wide.masked.load163.3, %wide.masked.load164.3 - %461 = fpext <8 x float> %460 to <8 x double> - %462 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %461, <8 x double> , <8 x double> %453) - %463 = fptrunc <8 x double> %462 to <8 x float> - %464 = bitcast float* %451 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %463, <8 x float>* %464, i32 4, <8 x i1> %447), !tbaa !12, !alias.scope !43, !noalias !46, !llvm.access.group !24 - br label %pregion_for_end.i.3 - -pregion_for_entry.entry.i.us.3: ; preds = %if.end.i.us.3.1, %pregion_for_entry.entry.i.us.3.preheader - %_local_id_x.0.us.3 = phi i64 [ 0, %pregion_for_entry.entry.i.us.3.preheader ], [ %952, %if.end.i.us.3.1 ] - %add1.i.i.us.3 = add nuw nsw i64 %_local_id_x.0.us.3, %mul.i.i - %conv.i.us.3 = trunc i64 %add1.i.i.us.3 to i32 - %cmp4.i.us.3 = icmp slt i32 %conv.i.us.3, %4 - %cmp7.i.us.3 = icmp sgt i32 %conv.i.us.3, 0 - %or.cond.i.us.3 = and i1 %cmp4.i.us.3, %cmp7.i.us.3 - br i1 %or.cond.i.us.3, label %if.then.i.us.3, label %if.end.i.us.3 - -if.then.i.us.3: ; preds = %pregion_for_entry.entry.i.us.3 - %add.i.us.3 = add i32 %mul.i.3, %conv.i.us.3 - %idxprom.i.us.3 = sext i32 %add.i.us.3 to i64 - %arrayidx.i.us.3 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.3 - %465 = load float, float* %arrayidx.i.us.3, align 4, !tbaa !12 - %conv9.i.us.3 = fpext float %465 to double - %arrayidx13.i.us.3 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.3 - %466 = load float, float* %arrayidx13.i.us.3, align 4, !tbaa !12 - %add15.i.us.3 = add i32 %add.i.us.3, -1 - %idxprom16.i.us.3 = sext i32 %add15.i.us.3 to i64 - %arrayidx17.i.us.3 = getelementptr inbounds float, float* %2, i64 %idxprom16.i.us.3 - %467 = load float, float* %arrayidx17.i.us.3, align 4, !tbaa !12 - %sub18.i.us.3 = fsub float %466, %467 - %conv19.i.us.3 = fpext float %sub18.i.us.3 to double - %468 = tail call double @llvm.fmuladd.f64(double %conv19.i.us.3, double -5.000000e-01, double %conv9.i.us.3) #5 - %conv21.i.us.3 = fptrunc double %468 to float - store float %conv21.i.us.3, float* %arrayidx.i.us.3, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.us.3 - -if.end.i.us.3: ; preds = %if.then.i.us.3, %pregion_for_entry.entry.i.us.3 - %469 = or i64 %_local_id_x.0.us.3, 1 - %add1.i.i.us.3.1 = add nuw nsw i64 %469, %mul.i.i - %conv.i.us.3.1 = trunc i64 %add1.i.i.us.3.1 to i32 - %cmp4.i.us.3.1 = icmp slt i32 %conv.i.us.3.1, %4 - %cmp7.i.us.3.1 = icmp sgt i32 %conv.i.us.3.1, 0 - %or.cond.i.us.3.1 = and i1 %cmp4.i.us.3.1, %cmp7.i.us.3.1 - br i1 %or.cond.i.us.3.1, label %if.then.i.us.3.1, label %if.end.i.us.3.1 - -pregion_for_end.i.3.loopexit: ; preds = %if.end.i.us.3.1 - br label %pregion_for_end.i.3 - -pregion_for_end.i.3: ; preds = %pregion_for_end.i.3.loopexit, %vector.ph151, %pregion_for_end.i.2 - %470 = trunc i64 %mul3.i.i to i32 - %conv2.i.4 = or i32 %470, 4 - %cmp.i.4 = icmp slt i32 %conv2.i.4, %3 - %mul.i.4 = mul nsw i32 %conv2.i.4, %4 - br i1 %cmp.i.4, label %vector.scevcheck175, label %pregion_for_end.i.4 - -vector.scevcheck175: ; preds = %pregion_for_end.i.3 - %471 = mul i32 %conv2.i.4, %4 - %472 = trunc i64 %6 to i32 - %473 = shl i32 %472, 5 - %474 = add i32 %471, %473 - %475 = icmp sgt i32 %474, 2147483616 - %476 = add i32 %471, %473 - %477 = add i32 %476, -1 - %478 = add i32 %476, 30 - %479 = icmp slt i32 %478, %477 - %480 = or i1 %475, %479 - br i1 %480, label %pregion_for_entry.entry.i.us.4.preheader, label %vector.memcheck197 - -pregion_for_entry.entry.i.us.4.preheader: ; preds = %vector.memcheck197, %vector.scevcheck175 - br label %pregion_for_entry.entry.i.us.4 - -vector.memcheck197: ; preds = %vector.scevcheck175 - %481 = mul i32 %conv2.i.4, %4 - %482 = trunc i64 %6 to i32 - %483 = shl i32 %482, 5 - %484 = add i32 %481, %483 - %485 = sext i32 %484 to i64 - %scevgep177 = getelementptr float, float* %0, i64 %485 - %486 = add nsw i64 %485, 32 - %scevgep179 = getelementptr float, float* %0, i64 %486 - %487 = add i32 %481, %483 - %488 = add i32 %487, -4 - %489 = sext i32 %488 to i64 - %490 = add nuw nsw i64 %489, 3 - %scevgep181 = getelementptr float, float* %2, i64 %490 - %491 = add nsw i64 %489, 35 - %scevgep183 = getelementptr float, float* %2, i64 %491 - %scevgep185 = getelementptr float, float* %2, i64 %485 - %scevgep187 = getelementptr float, float* %2, i64 %486 - %bound0189 = icmp ult float* %scevgep177, %scevgep183 - %bound1190 = icmp ult float* %scevgep181, %scevgep179 - %found.conflict191 = and i1 %bound0189, %bound1190 - %bound0192 = icmp ult float* %scevgep177, %scevgep187 - %bound1193 = icmp ult float* %scevgep185, %scevgep179 - %found.conflict194 = and i1 %bound0192, %bound1193 - %conflict.rdx195 = or i1 %found.conflict191, %found.conflict194 - br i1 %conflict.rdx195, label %pregion_for_entry.entry.i.us.4.preheader, label %vector.ph198 - -vector.ph198: ; preds = %vector.memcheck197 - %broadcast.splatinsert205 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat206 = shufflevector <8 x i64> %broadcast.splatinsert205, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert207 = insertelement <8 x i32> undef, i32 %4, i32 0 - %broadcast.splat208 = shufflevector <8 x i32> %broadcast.splatinsert207, <8 x i32> undef, <8 x i32> zeroinitializer - %492 = trunc <8 x i64> %broadcast.splat206 to <8 x i32> - %493 = or <8 x i32> %492, - %494 = icmp sgt <8 x i32> %broadcast.splat208, %493 - %495 = icmp sgt <8 x i32> %493, zeroinitializer - %496 = and <8 x i1> %494, %495 - %497 = extractelement <8 x i32> %493, i32 0 - %498 = add i32 %mul.i.4, %497 - %499 = sext i32 %498 to i64 - %500 = getelementptr inbounds float, float* %0, i64 %499 - %501 = bitcast float* %500 to <8 x float>* - %wide.masked.load209 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %501, i32 4, <8 x i1> %496, <8 x float> undef), !tbaa !12, !alias.scope !51, !noalias !54 - %502 = fpext <8 x float> %wide.masked.load209 to <8 x double> - %503 = getelementptr inbounds float, float* %2, i64 %499 - %504 = bitcast float* %503 to <8 x float>* - %wide.masked.load210 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %504, i32 4, <8 x i1> %496, <8 x float> undef), !tbaa !12, !alias.scope !57 - %505 = add i32 %498, -1 - %506 = sext i32 %505 to i64 - %507 = getelementptr inbounds float, float* %2, i64 %506 - %508 = bitcast float* %507 to <8 x float>* - %wide.masked.load211 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %508, i32 4, <8 x i1> %496, <8 x float> undef), !tbaa !12, !alias.scope !58 - %509 = fsub <8 x float> %wide.masked.load210, %wide.masked.load211 - %510 = fpext <8 x float> %509 to <8 x double> - %511 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %510, <8 x double> , <8 x double> %502) - %512 = fptrunc <8 x double> %511 to <8 x float> - %513 = bitcast float* %500 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %512, <8 x float>* %513, i32 4, <8 x i1> %496), !tbaa !12, !alias.scope !51, !noalias !54, !llvm.access.group !24 - %514 = trunc <8 x i64> %broadcast.splat206 to <8 x i32> - %515 = or <8 x i32> %514, - %516 = icmp sgt <8 x i32> %broadcast.splat208, %515 - %517 = icmp sgt <8 x i32> %515, zeroinitializer - %518 = and <8 x i1> %516, %517 - %519 = extractelement <8 x i32> %515, i32 0 - %520 = add i32 %mul.i.4, %519 - %521 = sext i32 %520 to i64 - %522 = getelementptr inbounds float, float* %0, i64 %521 - %523 = bitcast float* %522 to <8 x float>* - %wide.masked.load209.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %523, i32 4, <8 x i1> %518, <8 x float> undef), !tbaa !12, !alias.scope !51, !noalias !54 - %524 = fpext <8 x float> %wide.masked.load209.1 to <8 x double> - %525 = getelementptr inbounds float, float* %2, i64 %521 - %526 = bitcast float* %525 to <8 x float>* - %wide.masked.load210.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %526, i32 4, <8 x i1> %518, <8 x float> undef), !tbaa !12, !alias.scope !57 - %527 = add i32 %520, -1 - %528 = sext i32 %527 to i64 - %529 = getelementptr inbounds float, float* %2, i64 %528 - %530 = bitcast float* %529 to <8 x float>* - %wide.masked.load211.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %530, i32 4, <8 x i1> %518, <8 x float> undef), !tbaa !12, !alias.scope !58 - %531 = fsub <8 x float> %wide.masked.load210.1, %wide.masked.load211.1 - %532 = fpext <8 x float> %531 to <8 x double> - %533 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %532, <8 x double> , <8 x double> %524) - %534 = fptrunc <8 x double> %533 to <8 x float> - %535 = bitcast float* %522 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %534, <8 x float>* %535, i32 4, <8 x i1> %518), !tbaa !12, !alias.scope !51, !noalias !54, !llvm.access.group !24 - %536 = trunc <8 x i64> %broadcast.splat206 to <8 x i32> - %537 = or <8 x i32> %536, - %538 = icmp sgt <8 x i32> %broadcast.splat208, %537 - %539 = icmp sgt <8 x i32> %537, zeroinitializer - %540 = and <8 x i1> %538, %539 - %541 = extractelement <8 x i32> %537, i32 0 - %542 = add i32 %mul.i.4, %541 - %543 = sext i32 %542 to i64 - %544 = getelementptr inbounds float, float* %0, i64 %543 - %545 = bitcast float* %544 to <8 x float>* - %wide.masked.load209.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %545, i32 4, <8 x i1> %540, <8 x float> undef), !tbaa !12, !alias.scope !51, !noalias !54 - %546 = fpext <8 x float> %wide.masked.load209.2 to <8 x double> - %547 = getelementptr inbounds float, float* %2, i64 %543 - %548 = bitcast float* %547 to <8 x float>* - %wide.masked.load210.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %548, i32 4, <8 x i1> %540, <8 x float> undef), !tbaa !12, !alias.scope !57 - %549 = add i32 %542, -1 - %550 = sext i32 %549 to i64 - %551 = getelementptr inbounds float, float* %2, i64 %550 - %552 = bitcast float* %551 to <8 x float>* - %wide.masked.load211.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %552, i32 4, <8 x i1> %540, <8 x float> undef), !tbaa !12, !alias.scope !58 - %553 = fsub <8 x float> %wide.masked.load210.2, %wide.masked.load211.2 - %554 = fpext <8 x float> %553 to <8 x double> - %555 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %554, <8 x double> , <8 x double> %546) - %556 = fptrunc <8 x double> %555 to <8 x float> - %557 = bitcast float* %544 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %556, <8 x float>* %557, i32 4, <8 x i1> %540), !tbaa !12, !alias.scope !51, !noalias !54, !llvm.access.group !24 - %558 = trunc <8 x i64> %broadcast.splat206 to <8 x i32> - %559 = or <8 x i32> %558, - %560 = icmp sgt <8 x i32> %broadcast.splat208, %559 - %561 = icmp sgt <8 x i32> %559, zeroinitializer - %562 = and <8 x i1> %560, %561 - %563 = extractelement <8 x i32> %559, i32 0 - %564 = add i32 %mul.i.4, %563 - %565 = sext i32 %564 to i64 - %566 = getelementptr inbounds float, float* %0, i64 %565 - %567 = bitcast float* %566 to <8 x float>* - %wide.masked.load209.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %567, i32 4, <8 x i1> %562, <8 x float> undef), !tbaa !12, !alias.scope !51, !noalias !54 - %568 = fpext <8 x float> %wide.masked.load209.3 to <8 x double> - %569 = getelementptr inbounds float, float* %2, i64 %565 - %570 = bitcast float* %569 to <8 x float>* - %wide.masked.load210.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %570, i32 4, <8 x i1> %562, <8 x float> undef), !tbaa !12, !alias.scope !57 - %571 = add i32 %564, -1 - %572 = sext i32 %571 to i64 - %573 = getelementptr inbounds float, float* %2, i64 %572 - %574 = bitcast float* %573 to <8 x float>* - %wide.masked.load211.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %574, i32 4, <8 x i1> %562, <8 x float> undef), !tbaa !12, !alias.scope !58 - %575 = fsub <8 x float> %wide.masked.load210.3, %wide.masked.load211.3 - %576 = fpext <8 x float> %575 to <8 x double> - %577 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %576, <8 x double> , <8 x double> %568) - %578 = fptrunc <8 x double> %577 to <8 x float> - %579 = bitcast float* %566 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %578, <8 x float>* %579, i32 4, <8 x i1> %562), !tbaa !12, !alias.scope !51, !noalias !54, !llvm.access.group !24 - br label %pregion_for_end.i.4 - -pregion_for_entry.entry.i.us.4: ; preds = %if.end.i.us.4.1, %pregion_for_entry.entry.i.us.4.preheader - %_local_id_x.0.us.4 = phi i64 [ 0, %pregion_for_entry.entry.i.us.4.preheader ], [ %947, %if.end.i.us.4.1 ] - %add1.i.i.us.4 = add nuw nsw i64 %_local_id_x.0.us.4, %mul.i.i - %conv.i.us.4 = trunc i64 %add1.i.i.us.4 to i32 - %cmp4.i.us.4 = icmp slt i32 %conv.i.us.4, %4 - %cmp7.i.us.4 = icmp sgt i32 %conv.i.us.4, 0 - %or.cond.i.us.4 = and i1 %cmp4.i.us.4, %cmp7.i.us.4 - br i1 %or.cond.i.us.4, label %if.then.i.us.4, label %if.end.i.us.4 - -if.then.i.us.4: ; preds = %pregion_for_entry.entry.i.us.4 - %add.i.us.4 = add i32 %mul.i.4, %conv.i.us.4 - %idxprom.i.us.4 = sext i32 %add.i.us.4 to i64 - %arrayidx.i.us.4 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.4 - %580 = load float, float* %arrayidx.i.us.4, align 4, !tbaa !12 - %conv9.i.us.4 = fpext float %580 to double - %arrayidx13.i.us.4 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.4 - %581 = load float, float* %arrayidx13.i.us.4, align 4, !tbaa !12 - %add15.i.us.4 = add i32 %add.i.us.4, -1 - %idxprom16.i.us.4 = sext i32 %add15.i.us.4 to i64 - %arrayidx17.i.us.4 = getelementptr inbounds float, float* %2, i64 %idxprom16.i.us.4 - %582 = load float, float* %arrayidx17.i.us.4, align 4, !tbaa !12 - %sub18.i.us.4 = fsub float %581, %582 - %conv19.i.us.4 = fpext float %sub18.i.us.4 to double - %583 = tail call double @llvm.fmuladd.f64(double %conv19.i.us.4, double -5.000000e-01, double %conv9.i.us.4) #5 - %conv21.i.us.4 = fptrunc double %583 to float - store float %conv21.i.us.4, float* %arrayidx.i.us.4, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.us.4 - -if.end.i.us.4: ; preds = %if.then.i.us.4, %pregion_for_entry.entry.i.us.4 - %584 = or i64 %_local_id_x.0.us.4, 1 - %add1.i.i.us.4.1 = add nuw nsw i64 %584, %mul.i.i - %conv.i.us.4.1 = trunc i64 %add1.i.i.us.4.1 to i32 - %cmp4.i.us.4.1 = icmp slt i32 %conv.i.us.4.1, %4 - %cmp7.i.us.4.1 = icmp sgt i32 %conv.i.us.4.1, 0 - %or.cond.i.us.4.1 = and i1 %cmp4.i.us.4.1, %cmp7.i.us.4.1 - br i1 %or.cond.i.us.4.1, label %if.then.i.us.4.1, label %if.end.i.us.4.1 - -pregion_for_end.i.4.loopexit: ; preds = %if.end.i.us.4.1 - br label %pregion_for_end.i.4 - -pregion_for_end.i.4: ; preds = %pregion_for_end.i.4.loopexit, %vector.ph198, %pregion_for_end.i.3 - %585 = trunc i64 %mul3.i.i to i32 - %conv2.i.5 = or i32 %585, 5 - %cmp.i.5 = icmp slt i32 %conv2.i.5, %3 - %mul.i.5 = mul nsw i32 %conv2.i.5, %4 - br i1 %cmp.i.5, label %vector.scevcheck222, label %pregion_for_end.i.5 - -vector.scevcheck222: ; preds = %pregion_for_end.i.4 - %586 = mul i32 %conv2.i.5, %4 - %587 = trunc i64 %6 to i32 - %588 = shl i32 %587, 5 - %589 = add i32 %586, %588 - %590 = icmp sgt i32 %589, 2147483616 - %591 = add i32 %586, %588 - %592 = add i32 %591, -1 - %593 = add i32 %591, 30 - %594 = icmp slt i32 %593, %592 - %595 = or i1 %590, %594 - br i1 %595, label %pregion_for_entry.entry.i.us.5.preheader, label %vector.memcheck244 - -pregion_for_entry.entry.i.us.5.preheader: ; preds = %vector.memcheck244, %vector.scevcheck222 - br label %pregion_for_entry.entry.i.us.5 - -vector.memcheck244: ; preds = %vector.scevcheck222 - %596 = mul i32 %conv2.i.5, %4 - %597 = trunc i64 %6 to i32 - %598 = shl i32 %597, 5 - %599 = add i32 %596, %598 - %600 = sext i32 %599 to i64 - %scevgep224 = getelementptr float, float* %0, i64 %600 - %601 = add nsw i64 %600, 32 - %scevgep226 = getelementptr float, float* %0, i64 %601 - %602 = add i32 %596, %598 - %603 = add i32 %602, -1 - %604 = sext i32 %603 to i64 - %scevgep228 = getelementptr float, float* %2, i64 %604 - %605 = add nsw i64 %604, 32 - %scevgep230 = getelementptr float, float* %2, i64 %605 - %scevgep232 = getelementptr float, float* %2, i64 %600 - %scevgep234 = getelementptr float, float* %2, i64 %601 - %bound0236 = icmp ult float* %scevgep224, %scevgep230 - %bound1237 = icmp ult float* %scevgep228, %scevgep226 - %found.conflict238 = and i1 %bound0236, %bound1237 - %bound0239 = icmp ult float* %scevgep224, %scevgep234 - %bound1240 = icmp ult float* %scevgep232, %scevgep226 - %found.conflict241 = and i1 %bound0239, %bound1240 - %conflict.rdx242 = or i1 %found.conflict238, %found.conflict241 - br i1 %conflict.rdx242, label %pregion_for_entry.entry.i.us.5.preheader, label %vector.ph245 - -vector.ph245: ; preds = %vector.memcheck244 - %broadcast.splatinsert252 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat253 = shufflevector <8 x i64> %broadcast.splatinsert252, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert254 = insertelement <8 x i32> undef, i32 %4, i32 0 - %broadcast.splat255 = shufflevector <8 x i32> %broadcast.splatinsert254, <8 x i32> undef, <8 x i32> zeroinitializer - %606 = trunc <8 x i64> %broadcast.splat253 to <8 x i32> - %607 = or <8 x i32> %606, - %608 = icmp sgt <8 x i32> %broadcast.splat255, %607 - %609 = icmp sgt <8 x i32> %607, zeroinitializer - %610 = and <8 x i1> %608, %609 - %611 = extractelement <8 x i32> %607, i32 0 - %612 = add i32 %mul.i.5, %611 - %613 = sext i32 %612 to i64 - %614 = getelementptr inbounds float, float* %0, i64 %613 - %615 = bitcast float* %614 to <8 x float>* - %wide.masked.load256 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %615, i32 4, <8 x i1> %610, <8 x float> undef), !tbaa !12, !alias.scope !59, !noalias !62 - %616 = fpext <8 x float> %wide.masked.load256 to <8 x double> - %617 = getelementptr inbounds float, float* %2, i64 %613 - %618 = bitcast float* %617 to <8 x float>* - %wide.masked.load257 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %618, i32 4, <8 x i1> %610, <8 x float> undef), !tbaa !12, !alias.scope !65 - %619 = add i32 %612, -1 - %620 = sext i32 %619 to i64 - %621 = getelementptr inbounds float, float* %2, i64 %620 - %622 = bitcast float* %621 to <8 x float>* - %wide.masked.load258 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %622, i32 4, <8 x i1> %610, <8 x float> undef), !tbaa !12, !alias.scope !66 - %623 = fsub <8 x float> %wide.masked.load257, %wide.masked.load258 - %624 = fpext <8 x float> %623 to <8 x double> - %625 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %624, <8 x double> , <8 x double> %616) - %626 = fptrunc <8 x double> %625 to <8 x float> - %627 = bitcast float* %614 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %626, <8 x float>* %627, i32 4, <8 x i1> %610), !tbaa !12, !alias.scope !59, !noalias !62, !llvm.access.group !24 - %628 = trunc <8 x i64> %broadcast.splat253 to <8 x i32> - %629 = or <8 x i32> %628, - %630 = icmp sgt <8 x i32> %broadcast.splat255, %629 - %631 = icmp sgt <8 x i32> %629, zeroinitializer - %632 = and <8 x i1> %630, %631 - %633 = extractelement <8 x i32> %629, i32 0 - %634 = add i32 %mul.i.5, %633 - %635 = sext i32 %634 to i64 - %636 = getelementptr inbounds float, float* %0, i64 %635 - %637 = bitcast float* %636 to <8 x float>* - %wide.masked.load256.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %637, i32 4, <8 x i1> %632, <8 x float> undef), !tbaa !12, !alias.scope !59, !noalias !62 - %638 = fpext <8 x float> %wide.masked.load256.1 to <8 x double> - %639 = getelementptr inbounds float, float* %2, i64 %635 - %640 = bitcast float* %639 to <8 x float>* - %wide.masked.load257.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %640, i32 4, <8 x i1> %632, <8 x float> undef), !tbaa !12, !alias.scope !65 - %641 = add i32 %634, -1 - %642 = sext i32 %641 to i64 - %643 = getelementptr inbounds float, float* %2, i64 %642 - %644 = bitcast float* %643 to <8 x float>* - %wide.masked.load258.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %644, i32 4, <8 x i1> %632, <8 x float> undef), !tbaa !12, !alias.scope !66 - %645 = fsub <8 x float> %wide.masked.load257.1, %wide.masked.load258.1 - %646 = fpext <8 x float> %645 to <8 x double> - %647 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %646, <8 x double> , <8 x double> %638) - %648 = fptrunc <8 x double> %647 to <8 x float> - %649 = bitcast float* %636 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %648, <8 x float>* %649, i32 4, <8 x i1> %632), !tbaa !12, !alias.scope !59, !noalias !62, !llvm.access.group !24 - %650 = trunc <8 x i64> %broadcast.splat253 to <8 x i32> - %651 = or <8 x i32> %650, - %652 = icmp sgt <8 x i32> %broadcast.splat255, %651 - %653 = icmp sgt <8 x i32> %651, zeroinitializer - %654 = and <8 x i1> %652, %653 - %655 = extractelement <8 x i32> %651, i32 0 - %656 = add i32 %mul.i.5, %655 - %657 = sext i32 %656 to i64 - %658 = getelementptr inbounds float, float* %0, i64 %657 - %659 = bitcast float* %658 to <8 x float>* - %wide.masked.load256.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %659, i32 4, <8 x i1> %654, <8 x float> undef), !tbaa !12, !alias.scope !59, !noalias !62 - %660 = fpext <8 x float> %wide.masked.load256.2 to <8 x double> - %661 = getelementptr inbounds float, float* %2, i64 %657 - %662 = bitcast float* %661 to <8 x float>* - %wide.masked.load257.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %662, i32 4, <8 x i1> %654, <8 x float> undef), !tbaa !12, !alias.scope !65 - %663 = add i32 %656, -1 - %664 = sext i32 %663 to i64 - %665 = getelementptr inbounds float, float* %2, i64 %664 - %666 = bitcast float* %665 to <8 x float>* - %wide.masked.load258.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %666, i32 4, <8 x i1> %654, <8 x float> undef), !tbaa !12, !alias.scope !66 - %667 = fsub <8 x float> %wide.masked.load257.2, %wide.masked.load258.2 - %668 = fpext <8 x float> %667 to <8 x double> - %669 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %668, <8 x double> , <8 x double> %660) - %670 = fptrunc <8 x double> %669 to <8 x float> - %671 = bitcast float* %658 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %670, <8 x float>* %671, i32 4, <8 x i1> %654), !tbaa !12, !alias.scope !59, !noalias !62, !llvm.access.group !24 - %672 = trunc <8 x i64> %broadcast.splat253 to <8 x i32> - %673 = or <8 x i32> %672, - %674 = icmp sgt <8 x i32> %broadcast.splat255, %673 - %675 = icmp sgt <8 x i32> %673, zeroinitializer - %676 = and <8 x i1> %674, %675 - %677 = extractelement <8 x i32> %673, i32 0 - %678 = add i32 %mul.i.5, %677 - %679 = sext i32 %678 to i64 - %680 = getelementptr inbounds float, float* %0, i64 %679 - %681 = bitcast float* %680 to <8 x float>* - %wide.masked.load256.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %681, i32 4, <8 x i1> %676, <8 x float> undef), !tbaa !12, !alias.scope !59, !noalias !62 - %682 = fpext <8 x float> %wide.masked.load256.3 to <8 x double> - %683 = getelementptr inbounds float, float* %2, i64 %679 - %684 = bitcast float* %683 to <8 x float>* - %wide.masked.load257.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %684, i32 4, <8 x i1> %676, <8 x float> undef), !tbaa !12, !alias.scope !65 - %685 = add i32 %678, -1 - %686 = sext i32 %685 to i64 - %687 = getelementptr inbounds float, float* %2, i64 %686 - %688 = bitcast float* %687 to <8 x float>* - %wide.masked.load258.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %688, i32 4, <8 x i1> %676, <8 x float> undef), !tbaa !12, !alias.scope !66 - %689 = fsub <8 x float> %wide.masked.load257.3, %wide.masked.load258.3 - %690 = fpext <8 x float> %689 to <8 x double> - %691 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %690, <8 x double> , <8 x double> %682) - %692 = fptrunc <8 x double> %691 to <8 x float> - %693 = bitcast float* %680 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %692, <8 x float>* %693, i32 4, <8 x i1> %676), !tbaa !12, !alias.scope !59, !noalias !62, !llvm.access.group !24 - br label %pregion_for_end.i.5 - -pregion_for_entry.entry.i.us.5: ; preds = %if.end.i.us.5.1, %pregion_for_entry.entry.i.us.5.preheader - %_local_id_x.0.us.5 = phi i64 [ 0, %pregion_for_entry.entry.i.us.5.preheader ], [ %942, %if.end.i.us.5.1 ] - %add1.i.i.us.5 = add nuw nsw i64 %_local_id_x.0.us.5, %mul.i.i - %conv.i.us.5 = trunc i64 %add1.i.i.us.5 to i32 - %cmp4.i.us.5 = icmp slt i32 %conv.i.us.5, %4 - %cmp7.i.us.5 = icmp sgt i32 %conv.i.us.5, 0 - %or.cond.i.us.5 = and i1 %cmp4.i.us.5, %cmp7.i.us.5 - br i1 %or.cond.i.us.5, label %if.then.i.us.5, label %if.end.i.us.5 - -if.then.i.us.5: ; preds = %pregion_for_entry.entry.i.us.5 - %add.i.us.5 = add i32 %mul.i.5, %conv.i.us.5 - %idxprom.i.us.5 = sext i32 %add.i.us.5 to i64 - %arrayidx.i.us.5 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.5 - %694 = load float, float* %arrayidx.i.us.5, align 4, !tbaa !12 - %conv9.i.us.5 = fpext float %694 to double - %arrayidx13.i.us.5 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.5 - %695 = load float, float* %arrayidx13.i.us.5, align 4, !tbaa !12 - %add15.i.us.5 = add i32 %add.i.us.5, -1 - %idxprom16.i.us.5 = sext i32 %add15.i.us.5 to i64 - %arrayidx17.i.us.5 = getelementptr inbounds float, float* %2, i64 %idxprom16.i.us.5 - %696 = load float, float* %arrayidx17.i.us.5, align 4, !tbaa !12 - %sub18.i.us.5 = fsub float %695, %696 - %conv19.i.us.5 = fpext float %sub18.i.us.5 to double - %697 = tail call double @llvm.fmuladd.f64(double %conv19.i.us.5, double -5.000000e-01, double %conv9.i.us.5) #5 - %conv21.i.us.5 = fptrunc double %697 to float - store float %conv21.i.us.5, float* %arrayidx.i.us.5, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.us.5 - -if.end.i.us.5: ; preds = %if.then.i.us.5, %pregion_for_entry.entry.i.us.5 - %698 = or i64 %_local_id_x.0.us.5, 1 - %add1.i.i.us.5.1 = add nuw nsw i64 %698, %mul.i.i - %conv.i.us.5.1 = trunc i64 %add1.i.i.us.5.1 to i32 - %cmp4.i.us.5.1 = icmp slt i32 %conv.i.us.5.1, %4 - %cmp7.i.us.5.1 = icmp sgt i32 %conv.i.us.5.1, 0 - %or.cond.i.us.5.1 = and i1 %cmp4.i.us.5.1, %cmp7.i.us.5.1 - br i1 %or.cond.i.us.5.1, label %if.then.i.us.5.1, label %if.end.i.us.5.1 - -pregion_for_end.i.5.loopexit: ; preds = %if.end.i.us.5.1 - br label %pregion_for_end.i.5 - -pregion_for_end.i.5: ; preds = %pregion_for_end.i.5.loopexit, %vector.ph245, %pregion_for_end.i.4 - %699 = trunc i64 %mul3.i.i to i32 - %conv2.i.6 = or i32 %699, 6 - %cmp.i.6 = icmp slt i32 %conv2.i.6, %3 - %mul.i.6 = mul nsw i32 %conv2.i.6, %4 - br i1 %cmp.i.6, label %vector.scevcheck269, label %pregion_for_end.i.6 - -vector.scevcheck269: ; preds = %pregion_for_end.i.5 - %700 = mul i32 %conv2.i.6, %4 - %701 = trunc i64 %6 to i32 - %702 = shl i32 %701, 5 - %703 = add i32 %700, %702 - %704 = icmp sgt i32 %703, 2147483616 - %705 = add i32 %700, %702 - %706 = add i32 %705, -1 - %707 = add i32 %705, 30 - %708 = icmp slt i32 %707, %706 - %709 = or i1 %704, %708 - br i1 %709, label %pregion_for_entry.entry.i.us.6.preheader, label %vector.memcheck291 - -pregion_for_entry.entry.i.us.6.preheader: ; preds = %vector.memcheck291, %vector.scevcheck269 - br label %pregion_for_entry.entry.i.us.6 - -vector.memcheck291: ; preds = %vector.scevcheck269 - %710 = mul i32 %conv2.i.6, %4 - %711 = trunc i64 %6 to i32 - %712 = shl i32 %711, 5 - %713 = add i32 %710, %712 - %714 = sext i32 %713 to i64 - %scevgep271 = getelementptr float, float* %0, i64 %714 - %715 = add nsw i64 %714, 32 - %scevgep273 = getelementptr float, float* %0, i64 %715 - %716 = add i32 %710, %712 - %717 = add i32 %716, -2 - %718 = sext i32 %717 to i64 - %719 = add nuw nsw i64 %718, 1 - %scevgep275 = getelementptr float, float* %2, i64 %719 - %720 = add nsw i64 %718, 33 - %scevgep277 = getelementptr float, float* %2, i64 %720 - %scevgep279 = getelementptr float, float* %2, i64 %714 - %scevgep281 = getelementptr float, float* %2, i64 %715 - %bound0283 = icmp ult float* %scevgep271, %scevgep277 - %bound1284 = icmp ult float* %scevgep275, %scevgep273 - %found.conflict285 = and i1 %bound0283, %bound1284 - %bound0286 = icmp ult float* %scevgep271, %scevgep281 - %bound1287 = icmp ult float* %scevgep279, %scevgep273 - %found.conflict288 = and i1 %bound0286, %bound1287 - %conflict.rdx289 = or i1 %found.conflict285, %found.conflict288 - br i1 %conflict.rdx289, label %pregion_for_entry.entry.i.us.6.preheader, label %vector.ph292 - -vector.ph292: ; preds = %vector.memcheck291 - %broadcast.splatinsert299 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat300 = shufflevector <8 x i64> %broadcast.splatinsert299, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert301 = insertelement <8 x i32> undef, i32 %4, i32 0 - %broadcast.splat302 = shufflevector <8 x i32> %broadcast.splatinsert301, <8 x i32> undef, <8 x i32> zeroinitializer - %721 = trunc <8 x i64> %broadcast.splat300 to <8 x i32> - %722 = or <8 x i32> %721, - %723 = icmp sgt <8 x i32> %broadcast.splat302, %722 - %724 = icmp sgt <8 x i32> %722, zeroinitializer - %725 = and <8 x i1> %723, %724 - %726 = extractelement <8 x i32> %722, i32 0 - %727 = add i32 %mul.i.6, %726 - %728 = sext i32 %727 to i64 - %729 = getelementptr inbounds float, float* %0, i64 %728 - %730 = bitcast float* %729 to <8 x float>* - %wide.masked.load303 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %730, i32 4, <8 x i1> %725, <8 x float> undef), !tbaa !12, !alias.scope !67, !noalias !70 - %731 = fpext <8 x float> %wide.masked.load303 to <8 x double> - %732 = getelementptr inbounds float, float* %2, i64 %728 - %733 = bitcast float* %732 to <8 x float>* - %wide.masked.load304 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %733, i32 4, <8 x i1> %725, <8 x float> undef), !tbaa !12, !alias.scope !73 - %734 = add i32 %727, -1 - %735 = sext i32 %734 to i64 - %736 = getelementptr inbounds float, float* %2, i64 %735 - %737 = bitcast float* %736 to <8 x float>* - %wide.masked.load305 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %737, i32 4, <8 x i1> %725, <8 x float> undef), !tbaa !12, !alias.scope !74 - %738 = fsub <8 x float> %wide.masked.load304, %wide.masked.load305 - %739 = fpext <8 x float> %738 to <8 x double> - %740 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %739, <8 x double> , <8 x double> %731) - %741 = fptrunc <8 x double> %740 to <8 x float> - %742 = bitcast float* %729 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %741, <8 x float>* %742, i32 4, <8 x i1> %725), !tbaa !12, !alias.scope !67, !noalias !70, !llvm.access.group !24 - %743 = trunc <8 x i64> %broadcast.splat300 to <8 x i32> - %744 = or <8 x i32> %743, - %745 = icmp sgt <8 x i32> %broadcast.splat302, %744 - %746 = icmp sgt <8 x i32> %744, zeroinitializer - %747 = and <8 x i1> %745, %746 - %748 = extractelement <8 x i32> %744, i32 0 - %749 = add i32 %mul.i.6, %748 - %750 = sext i32 %749 to i64 - %751 = getelementptr inbounds float, float* %0, i64 %750 - %752 = bitcast float* %751 to <8 x float>* - %wide.masked.load303.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %752, i32 4, <8 x i1> %747, <8 x float> undef), !tbaa !12, !alias.scope !67, !noalias !70 - %753 = fpext <8 x float> %wide.masked.load303.1 to <8 x double> - %754 = getelementptr inbounds float, float* %2, i64 %750 - %755 = bitcast float* %754 to <8 x float>* - %wide.masked.load304.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %755, i32 4, <8 x i1> %747, <8 x float> undef), !tbaa !12, !alias.scope !73 - %756 = add i32 %749, -1 - %757 = sext i32 %756 to i64 - %758 = getelementptr inbounds float, float* %2, i64 %757 - %759 = bitcast float* %758 to <8 x float>* - %wide.masked.load305.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %759, i32 4, <8 x i1> %747, <8 x float> undef), !tbaa !12, !alias.scope !74 - %760 = fsub <8 x float> %wide.masked.load304.1, %wide.masked.load305.1 - %761 = fpext <8 x float> %760 to <8 x double> - %762 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %761, <8 x double> , <8 x double> %753) - %763 = fptrunc <8 x double> %762 to <8 x float> - %764 = bitcast float* %751 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %763, <8 x float>* %764, i32 4, <8 x i1> %747), !tbaa !12, !alias.scope !67, !noalias !70, !llvm.access.group !24 - %765 = trunc <8 x i64> %broadcast.splat300 to <8 x i32> - %766 = or <8 x i32> %765, - %767 = icmp sgt <8 x i32> %broadcast.splat302, %766 - %768 = icmp sgt <8 x i32> %766, zeroinitializer - %769 = and <8 x i1> %767, %768 - %770 = extractelement <8 x i32> %766, i32 0 - %771 = add i32 %mul.i.6, %770 - %772 = sext i32 %771 to i64 - %773 = getelementptr inbounds float, float* %0, i64 %772 - %774 = bitcast float* %773 to <8 x float>* - %wide.masked.load303.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %774, i32 4, <8 x i1> %769, <8 x float> undef), !tbaa !12, !alias.scope !67, !noalias !70 - %775 = fpext <8 x float> %wide.masked.load303.2 to <8 x double> - %776 = getelementptr inbounds float, float* %2, i64 %772 - %777 = bitcast float* %776 to <8 x float>* - %wide.masked.load304.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %777, i32 4, <8 x i1> %769, <8 x float> undef), !tbaa !12, !alias.scope !73 - %778 = add i32 %771, -1 - %779 = sext i32 %778 to i64 - %780 = getelementptr inbounds float, float* %2, i64 %779 - %781 = bitcast float* %780 to <8 x float>* - %wide.masked.load305.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %781, i32 4, <8 x i1> %769, <8 x float> undef), !tbaa !12, !alias.scope !74 - %782 = fsub <8 x float> %wide.masked.load304.2, %wide.masked.load305.2 - %783 = fpext <8 x float> %782 to <8 x double> - %784 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %783, <8 x double> , <8 x double> %775) - %785 = fptrunc <8 x double> %784 to <8 x float> - %786 = bitcast float* %773 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %785, <8 x float>* %786, i32 4, <8 x i1> %769), !tbaa !12, !alias.scope !67, !noalias !70, !llvm.access.group !24 - %787 = trunc <8 x i64> %broadcast.splat300 to <8 x i32> - %788 = or <8 x i32> %787, - %789 = icmp sgt <8 x i32> %broadcast.splat302, %788 - %790 = icmp sgt <8 x i32> %788, zeroinitializer - %791 = and <8 x i1> %789, %790 - %792 = extractelement <8 x i32> %788, i32 0 - %793 = add i32 %mul.i.6, %792 - %794 = sext i32 %793 to i64 - %795 = getelementptr inbounds float, float* %0, i64 %794 - %796 = bitcast float* %795 to <8 x float>* - %wide.masked.load303.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %796, i32 4, <8 x i1> %791, <8 x float> undef), !tbaa !12, !alias.scope !67, !noalias !70 - %797 = fpext <8 x float> %wide.masked.load303.3 to <8 x double> - %798 = getelementptr inbounds float, float* %2, i64 %794 - %799 = bitcast float* %798 to <8 x float>* - %wide.masked.load304.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %799, i32 4, <8 x i1> %791, <8 x float> undef), !tbaa !12, !alias.scope !73 - %800 = add i32 %793, -1 - %801 = sext i32 %800 to i64 - %802 = getelementptr inbounds float, float* %2, i64 %801 - %803 = bitcast float* %802 to <8 x float>* - %wide.masked.load305.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %803, i32 4, <8 x i1> %791, <8 x float> undef), !tbaa !12, !alias.scope !74 - %804 = fsub <8 x float> %wide.masked.load304.3, %wide.masked.load305.3 - %805 = fpext <8 x float> %804 to <8 x double> - %806 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %805, <8 x double> , <8 x double> %797) - %807 = fptrunc <8 x double> %806 to <8 x float> - %808 = bitcast float* %795 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %807, <8 x float>* %808, i32 4, <8 x i1> %791), !tbaa !12, !alias.scope !67, !noalias !70, !llvm.access.group !24 - br label %pregion_for_end.i.6 - -pregion_for_entry.entry.i.us.6: ; preds = %if.end.i.us.6.1, %pregion_for_entry.entry.i.us.6.preheader - %_local_id_x.0.us.6 = phi i64 [ 0, %pregion_for_entry.entry.i.us.6.preheader ], [ %937, %if.end.i.us.6.1 ] - %add1.i.i.us.6 = add nuw nsw i64 %_local_id_x.0.us.6, %mul.i.i - %conv.i.us.6 = trunc i64 %add1.i.i.us.6 to i32 - %cmp4.i.us.6 = icmp slt i32 %conv.i.us.6, %4 - %cmp7.i.us.6 = icmp sgt i32 %conv.i.us.6, 0 - %or.cond.i.us.6 = and i1 %cmp4.i.us.6, %cmp7.i.us.6 - br i1 %or.cond.i.us.6, label %if.then.i.us.6, label %if.end.i.us.6 - -if.then.i.us.6: ; preds = %pregion_for_entry.entry.i.us.6 - %add.i.us.6 = add i32 %mul.i.6, %conv.i.us.6 - %idxprom.i.us.6 = sext i32 %add.i.us.6 to i64 - %arrayidx.i.us.6 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.6 - %809 = load float, float* %arrayidx.i.us.6, align 4, !tbaa !12 - %conv9.i.us.6 = fpext float %809 to double - %arrayidx13.i.us.6 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.6 - %810 = load float, float* %arrayidx13.i.us.6, align 4, !tbaa !12 - %add15.i.us.6 = add i32 %add.i.us.6, -1 - %idxprom16.i.us.6 = sext i32 %add15.i.us.6 to i64 - %arrayidx17.i.us.6 = getelementptr inbounds float, float* %2, i64 %idxprom16.i.us.6 - %811 = load float, float* %arrayidx17.i.us.6, align 4, !tbaa !12 - %sub18.i.us.6 = fsub float %810, %811 - %conv19.i.us.6 = fpext float %sub18.i.us.6 to double - %812 = tail call double @llvm.fmuladd.f64(double %conv19.i.us.6, double -5.000000e-01, double %conv9.i.us.6) #5 - %conv21.i.us.6 = fptrunc double %812 to float - store float %conv21.i.us.6, float* %arrayidx.i.us.6, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.us.6 - -if.end.i.us.6: ; preds = %if.then.i.us.6, %pregion_for_entry.entry.i.us.6 - %813 = or i64 %_local_id_x.0.us.6, 1 - %add1.i.i.us.6.1 = add nuw nsw i64 %813, %mul.i.i - %conv.i.us.6.1 = trunc i64 %add1.i.i.us.6.1 to i32 - %cmp4.i.us.6.1 = icmp slt i32 %conv.i.us.6.1, %4 - %cmp7.i.us.6.1 = icmp sgt i32 %conv.i.us.6.1, 0 - %or.cond.i.us.6.1 = and i1 %cmp4.i.us.6.1, %cmp7.i.us.6.1 - br i1 %or.cond.i.us.6.1, label %if.then.i.us.6.1, label %if.end.i.us.6.1 - -pregion_for_end.i.6.loopexit: ; preds = %if.end.i.us.6.1 - br label %pregion_for_end.i.6 - -pregion_for_end.i.6: ; preds = %pregion_for_end.i.6.loopexit, %vector.ph292, %pregion_for_end.i.5 - %814 = trunc i64 %mul3.i.i to i32 - %conv2.i.7 = or i32 %814, 7 - %cmp.i.7 = icmp slt i32 %conv2.i.7, %3 - %mul.i.7 = mul nsw i32 %conv2.i.7, %4 - br i1 %cmp.i.7, label %vector.scevcheck316, label %pregion_for_end.i.7 - -vector.scevcheck316: ; preds = %pregion_for_end.i.6 - %815 = mul i32 %conv2.i.7, %4 - %816 = trunc i64 %6 to i32 - %817 = shl i32 %816, 5 - %818 = add i32 %815, %817 - %819 = icmp sgt i32 %818, 2147483616 - %820 = add i32 %815, %817 - %821 = add i32 %820, -1 - %822 = add i32 %820, 30 - %823 = icmp slt i32 %822, %821 - %824 = or i1 %819, %823 - br i1 %824, label %pregion_for_entry.entry.i.us.7.preheader, label %vector.memcheck338 - -pregion_for_entry.entry.i.us.7.preheader: ; preds = %vector.memcheck338, %vector.scevcheck316 - br label %pregion_for_entry.entry.i.us.7 - -vector.memcheck338: ; preds = %vector.scevcheck316 - %825 = mul i32 %conv2.i.7, %4 - %826 = trunc i64 %6 to i32 - %827 = shl i32 %826, 5 - %828 = add i32 %825, %827 - %829 = sext i32 %828 to i64 - %scevgep318 = getelementptr float, float* %0, i64 %829 - %830 = add nsw i64 %829, 32 - %scevgep320 = getelementptr float, float* %0, i64 %830 - %831 = add i32 %825, %827 - %832 = add i32 %831, -1 - %833 = sext i32 %832 to i64 - %scevgep322 = getelementptr float, float* %2, i64 %833 - %834 = add nsw i64 %833, 32 - %scevgep324 = getelementptr float, float* %2, i64 %834 - %scevgep326 = getelementptr float, float* %2, i64 %829 - %scevgep328 = getelementptr float, float* %2, i64 %830 - %bound0330 = icmp ult float* %scevgep318, %scevgep324 - %bound1331 = icmp ult float* %scevgep322, %scevgep320 - %found.conflict332 = and i1 %bound0330, %bound1331 - %bound0333 = icmp ult float* %scevgep318, %scevgep328 - %bound1334 = icmp ult float* %scevgep326, %scevgep320 - %found.conflict335 = and i1 %bound0333, %bound1334 - %conflict.rdx336 = or i1 %found.conflict332, %found.conflict335 - br i1 %conflict.rdx336, label %pregion_for_entry.entry.i.us.7.preheader, label %vector.ph339 - -vector.ph339: ; preds = %vector.memcheck338 - %broadcast.splatinsert346 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat347 = shufflevector <8 x i64> %broadcast.splatinsert346, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert348 = insertelement <8 x i32> undef, i32 %4, i32 0 - %broadcast.splat349 = shufflevector <8 x i32> %broadcast.splatinsert348, <8 x i32> undef, <8 x i32> zeroinitializer - %835 = trunc <8 x i64> %broadcast.splat347 to <8 x i32> - %836 = or <8 x i32> %835, - %837 = icmp sgt <8 x i32> %broadcast.splat349, %836 - %838 = icmp sgt <8 x i32> %836, zeroinitializer - %839 = and <8 x i1> %837, %838 - %840 = extractelement <8 x i32> %836, i32 0 - %841 = add i32 %mul.i.7, %840 - %842 = sext i32 %841 to i64 - %843 = getelementptr inbounds float, float* %0, i64 %842 - %844 = bitcast float* %843 to <8 x float>* - %wide.masked.load350 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %844, i32 4, <8 x i1> %839, <8 x float> undef), !tbaa !12, !alias.scope !75, !noalias !78 - %845 = fpext <8 x float> %wide.masked.load350 to <8 x double> - %846 = getelementptr inbounds float, float* %2, i64 %842 - %847 = bitcast float* %846 to <8 x float>* - %wide.masked.load351 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %847, i32 4, <8 x i1> %839, <8 x float> undef), !tbaa !12, !alias.scope !81 - %848 = add i32 %841, -1 - %849 = sext i32 %848 to i64 - %850 = getelementptr inbounds float, float* %2, i64 %849 - %851 = bitcast float* %850 to <8 x float>* - %wide.masked.load352 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %851, i32 4, <8 x i1> %839, <8 x float> undef), !tbaa !12, !alias.scope !82 - %852 = fsub <8 x float> %wide.masked.load351, %wide.masked.load352 - %853 = fpext <8 x float> %852 to <8 x double> - %854 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %853, <8 x double> , <8 x double> %845) - %855 = fptrunc <8 x double> %854 to <8 x float> - %856 = bitcast float* %843 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %855, <8 x float>* %856, i32 4, <8 x i1> %839), !tbaa !12, !alias.scope !75, !noalias !78, !llvm.access.group !24 - %857 = trunc <8 x i64> %broadcast.splat347 to <8 x i32> - %858 = or <8 x i32> %857, - %859 = icmp sgt <8 x i32> %broadcast.splat349, %858 - %860 = icmp sgt <8 x i32> %858, zeroinitializer - %861 = and <8 x i1> %859, %860 - %862 = extractelement <8 x i32> %858, i32 0 - %863 = add i32 %mul.i.7, %862 - %864 = sext i32 %863 to i64 - %865 = getelementptr inbounds float, float* %0, i64 %864 - %866 = bitcast float* %865 to <8 x float>* - %wide.masked.load350.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %866, i32 4, <8 x i1> %861, <8 x float> undef), !tbaa !12, !alias.scope !75, !noalias !78 - %867 = fpext <8 x float> %wide.masked.load350.1 to <8 x double> - %868 = getelementptr inbounds float, float* %2, i64 %864 - %869 = bitcast float* %868 to <8 x float>* - %wide.masked.load351.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %869, i32 4, <8 x i1> %861, <8 x float> undef), !tbaa !12, !alias.scope !81 - %870 = add i32 %863, -1 - %871 = sext i32 %870 to i64 - %872 = getelementptr inbounds float, float* %2, i64 %871 - %873 = bitcast float* %872 to <8 x float>* - %wide.masked.load352.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %873, i32 4, <8 x i1> %861, <8 x float> undef), !tbaa !12, !alias.scope !82 - %874 = fsub <8 x float> %wide.masked.load351.1, %wide.masked.load352.1 - %875 = fpext <8 x float> %874 to <8 x double> - %876 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %875, <8 x double> , <8 x double> %867) - %877 = fptrunc <8 x double> %876 to <8 x float> - %878 = bitcast float* %865 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %877, <8 x float>* %878, i32 4, <8 x i1> %861), !tbaa !12, !alias.scope !75, !noalias !78, !llvm.access.group !24 - %879 = trunc <8 x i64> %broadcast.splat347 to <8 x i32> - %880 = or <8 x i32> %879, - %881 = icmp sgt <8 x i32> %broadcast.splat349, %880 - %882 = icmp sgt <8 x i32> %880, zeroinitializer - %883 = and <8 x i1> %881, %882 - %884 = extractelement <8 x i32> %880, i32 0 - %885 = add i32 %mul.i.7, %884 - %886 = sext i32 %885 to i64 - %887 = getelementptr inbounds float, float* %0, i64 %886 - %888 = bitcast float* %887 to <8 x float>* - %wide.masked.load350.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %888, i32 4, <8 x i1> %883, <8 x float> undef), !tbaa !12, !alias.scope !75, !noalias !78 - %889 = fpext <8 x float> %wide.masked.load350.2 to <8 x double> - %890 = getelementptr inbounds float, float* %2, i64 %886 - %891 = bitcast float* %890 to <8 x float>* - %wide.masked.load351.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %891, i32 4, <8 x i1> %883, <8 x float> undef), !tbaa !12, !alias.scope !81 - %892 = add i32 %885, -1 - %893 = sext i32 %892 to i64 - %894 = getelementptr inbounds float, float* %2, i64 %893 - %895 = bitcast float* %894 to <8 x float>* - %wide.masked.load352.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %895, i32 4, <8 x i1> %883, <8 x float> undef), !tbaa !12, !alias.scope !82 - %896 = fsub <8 x float> %wide.masked.load351.2, %wide.masked.load352.2 - %897 = fpext <8 x float> %896 to <8 x double> - %898 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %897, <8 x double> , <8 x double> %889) - %899 = fptrunc <8 x double> %898 to <8 x float> - %900 = bitcast float* %887 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %899, <8 x float>* %900, i32 4, <8 x i1> %883), !tbaa !12, !alias.scope !75, !noalias !78, !llvm.access.group !24 - %901 = trunc <8 x i64> %broadcast.splat347 to <8 x i32> - %902 = or <8 x i32> %901, - %903 = icmp sgt <8 x i32> %broadcast.splat349, %902 - %904 = icmp sgt <8 x i32> %902, zeroinitializer - %905 = and <8 x i1> %903, %904 - %906 = extractelement <8 x i32> %902, i32 0 - %907 = add i32 %mul.i.7, %906 - %908 = sext i32 %907 to i64 - %909 = getelementptr inbounds float, float* %0, i64 %908 - %910 = bitcast float* %909 to <8 x float>* - %wide.masked.load350.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %910, i32 4, <8 x i1> %905, <8 x float> undef), !tbaa !12, !alias.scope !75, !noalias !78 - %911 = fpext <8 x float> %wide.masked.load350.3 to <8 x double> - %912 = getelementptr inbounds float, float* %2, i64 %908 - %913 = bitcast float* %912 to <8 x float>* - %wide.masked.load351.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %913, i32 4, <8 x i1> %905, <8 x float> undef), !tbaa !12, !alias.scope !81 - %914 = add i32 %907, -1 - %915 = sext i32 %914 to i64 - %916 = getelementptr inbounds float, float* %2, i64 %915 - %917 = bitcast float* %916 to <8 x float>* - %wide.masked.load352.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %917, i32 4, <8 x i1> %905, <8 x float> undef), !tbaa !12, !alias.scope !82 - %918 = fsub <8 x float> %wide.masked.load351.3, %wide.masked.load352.3 - %919 = fpext <8 x float> %918 to <8 x double> - %920 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %919, <8 x double> , <8 x double> %911) - %921 = fptrunc <8 x double> %920 to <8 x float> - %922 = bitcast float* %909 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %921, <8 x float>* %922, i32 4, <8 x i1> %905), !tbaa !12, !alias.scope !75, !noalias !78, !llvm.access.group !24 - br label %pregion_for_end.i.7 - -pregion_for_entry.entry.i.us.7: ; preds = %if.end.i.us.7.1, %pregion_for_entry.entry.i.us.7.preheader - %_local_id_x.0.us.7 = phi i64 [ 0, %pregion_for_entry.entry.i.us.7.preheader ], [ %932, %if.end.i.us.7.1 ] - %add1.i.i.us.7 = add nuw nsw i64 %_local_id_x.0.us.7, %mul.i.i - %conv.i.us.7 = trunc i64 %add1.i.i.us.7 to i32 - %cmp4.i.us.7 = icmp slt i32 %conv.i.us.7, %4 - %cmp7.i.us.7 = icmp sgt i32 %conv.i.us.7, 0 - %or.cond.i.us.7 = and i1 %cmp4.i.us.7, %cmp7.i.us.7 - br i1 %or.cond.i.us.7, label %if.then.i.us.7, label %if.end.i.us.7 - -if.then.i.us.7: ; preds = %pregion_for_entry.entry.i.us.7 - %add.i.us.7 = add i32 %mul.i.7, %conv.i.us.7 - %idxprom.i.us.7 = sext i32 %add.i.us.7 to i64 - %arrayidx.i.us.7 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.7 - %923 = load float, float* %arrayidx.i.us.7, align 4, !tbaa !12 - %conv9.i.us.7 = fpext float %923 to double - %arrayidx13.i.us.7 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.7 - %924 = load float, float* %arrayidx13.i.us.7, align 4, !tbaa !12 - %add15.i.us.7 = add i32 %add.i.us.7, -1 - %idxprom16.i.us.7 = sext i32 %add15.i.us.7 to i64 - %arrayidx17.i.us.7 = getelementptr inbounds float, float* %2, i64 %idxprom16.i.us.7 - %925 = load float, float* %arrayidx17.i.us.7, align 4, !tbaa !12 - %sub18.i.us.7 = fsub float %924, %925 - %conv19.i.us.7 = fpext float %sub18.i.us.7 to double - %926 = tail call double @llvm.fmuladd.f64(double %conv19.i.us.7, double -5.000000e-01, double %conv9.i.us.7) #5 - %conv21.i.us.7 = fptrunc double %926 to float - store float %conv21.i.us.7, float* %arrayidx.i.us.7, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.us.7 - -if.end.i.us.7: ; preds = %if.then.i.us.7, %pregion_for_entry.entry.i.us.7 - %927 = or i64 %_local_id_x.0.us.7, 1 - %add1.i.i.us.7.1 = add nuw nsw i64 %927, %mul.i.i - %conv.i.us.7.1 = trunc i64 %add1.i.i.us.7.1 to i32 - %cmp4.i.us.7.1 = icmp slt i32 %conv.i.us.7.1, %4 - %cmp7.i.us.7.1 = icmp sgt i32 %conv.i.us.7.1, 0 - %or.cond.i.us.7.1 = and i1 %cmp4.i.us.7.1, %cmp7.i.us.7.1 - br i1 %or.cond.i.us.7.1, label %if.then.i.us.7.1, label %if.end.i.us.7.1 - -pregion_for_end.i.7.loopexit: ; preds = %if.end.i.us.7.1 - br label %pregion_for_end.i.7 - -pregion_for_end.i.7: ; preds = %pregion_for_end.i.7.loopexit, %vector.ph339, %pregion_for_end.i.6 - ret void - -if.then.i.us.7.1: ; preds = %if.end.i.us.7 - %add.i.us.7.1 = add i32 %mul.i.7, %conv.i.us.7.1 - %idxprom.i.us.7.1 = sext i32 %add.i.us.7.1 to i64 - %arrayidx.i.us.7.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.7.1 - %928 = load float, float* %arrayidx.i.us.7.1, align 4, !tbaa !12 - %conv9.i.us.7.1 = fpext float %928 to double - %arrayidx13.i.us.7.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.7.1 - %929 = load float, float* %arrayidx13.i.us.7.1, align 4, !tbaa !12 - %add15.i.us.7.1 = add i32 %add.i.us.7.1, -1 - %idxprom16.i.us.7.1 = sext i32 %add15.i.us.7.1 to i64 - %arrayidx17.i.us.7.1 = getelementptr inbounds float, float* %2, i64 %idxprom16.i.us.7.1 - %930 = load float, float* %arrayidx17.i.us.7.1, align 4, !tbaa !12 - %sub18.i.us.7.1 = fsub float %929, %930 - %conv19.i.us.7.1 = fpext float %sub18.i.us.7.1 to double - %931 = tail call double @llvm.fmuladd.f64(double %conv19.i.us.7.1, double -5.000000e-01, double %conv9.i.us.7.1) #5 - %conv21.i.us.7.1 = fptrunc double %931 to float - store float %conv21.i.us.7.1, float* %arrayidx.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.us.7.1 - -if.end.i.us.7.1: ; preds = %if.then.i.us.7.1, %if.end.i.us.7 - %932 = add nuw nsw i64 %_local_id_x.0.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %932, 32 - br i1 %exitcond.7.not.1, label %pregion_for_end.i.7.loopexit, label %pregion_for_entry.entry.i.us.7, !llvm.loop !83 - -if.then.i.us.6.1: ; preds = %if.end.i.us.6 - %add.i.us.6.1 = add i32 %mul.i.6, %conv.i.us.6.1 - %idxprom.i.us.6.1 = sext i32 %add.i.us.6.1 to i64 - %arrayidx.i.us.6.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.6.1 - %933 = load float, float* %arrayidx.i.us.6.1, align 4, !tbaa !12 - %conv9.i.us.6.1 = fpext float %933 to double - %arrayidx13.i.us.6.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.6.1 - %934 = load float, float* %arrayidx13.i.us.6.1, align 4, !tbaa !12 - %add15.i.us.6.1 = add nsw i32 %add.i.us.6.1, -1 - %idxprom16.i.us.6.1 = sext i32 %add15.i.us.6.1 to i64 - %arrayidx17.i.us.6.1 = getelementptr inbounds float, float* %2, i64 %idxprom16.i.us.6.1 - %935 = load float, float* %arrayidx17.i.us.6.1, align 4, !tbaa !12 - %sub18.i.us.6.1 = fsub float %934, %935 - %conv19.i.us.6.1 = fpext float %sub18.i.us.6.1 to double - %936 = tail call double @llvm.fmuladd.f64(double %conv19.i.us.6.1, double -5.000000e-01, double %conv9.i.us.6.1) #5 - %conv21.i.us.6.1 = fptrunc double %936 to float - store float %conv21.i.us.6.1, float* %arrayidx.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.us.6.1 - -if.end.i.us.6.1: ; preds = %if.then.i.us.6.1, %if.end.i.us.6 - %937 = add nuw nsw i64 %_local_id_x.0.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %937, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.6.loopexit, label %pregion_for_entry.entry.i.us.6, !llvm.loop !86 - -if.then.i.us.5.1: ; preds = %if.end.i.us.5 - %add.i.us.5.1 = add i32 %mul.i.5, %conv.i.us.5.1 - %idxprom.i.us.5.1 = sext i32 %add.i.us.5.1 to i64 - %arrayidx.i.us.5.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.5.1 - %938 = load float, float* %arrayidx.i.us.5.1, align 4, !tbaa !12 - %conv9.i.us.5.1 = fpext float %938 to double - %arrayidx13.i.us.5.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.5.1 - %939 = load float, float* %arrayidx13.i.us.5.1, align 4, !tbaa !12 - %add15.i.us.5.1 = add i32 %add.i.us.5.1, -1 - %idxprom16.i.us.5.1 = sext i32 %add15.i.us.5.1 to i64 - %arrayidx17.i.us.5.1 = getelementptr inbounds float, float* %2, i64 %idxprom16.i.us.5.1 - %940 = load float, float* %arrayidx17.i.us.5.1, align 4, !tbaa !12 - %sub18.i.us.5.1 = fsub float %939, %940 - %conv19.i.us.5.1 = fpext float %sub18.i.us.5.1 to double - %941 = tail call double @llvm.fmuladd.f64(double %conv19.i.us.5.1, double -5.000000e-01, double %conv9.i.us.5.1) #5 - %conv21.i.us.5.1 = fptrunc double %941 to float - store float %conv21.i.us.5.1, float* %arrayidx.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.us.5.1 - -if.end.i.us.5.1: ; preds = %if.then.i.us.5.1, %if.end.i.us.5 - %942 = add nuw nsw i64 %_local_id_x.0.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %942, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.5.loopexit, label %pregion_for_entry.entry.i.us.5, !llvm.loop !87 - -if.then.i.us.4.1: ; preds = %if.end.i.us.4 - %add.i.us.4.1 = add i32 %mul.i.4, %conv.i.us.4.1 - %idxprom.i.us.4.1 = sext i32 %add.i.us.4.1 to i64 - %arrayidx.i.us.4.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.4.1 - %943 = load float, float* %arrayidx.i.us.4.1, align 4, !tbaa !12 - %conv9.i.us.4.1 = fpext float %943 to double - %arrayidx13.i.us.4.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.4.1 - %944 = load float, float* %arrayidx13.i.us.4.1, align 4, !tbaa !12 - %add15.i.us.4.1 = add nsw i32 %add.i.us.4.1, -1 - %idxprom16.i.us.4.1 = sext i32 %add15.i.us.4.1 to i64 - %arrayidx17.i.us.4.1 = getelementptr inbounds float, float* %2, i64 %idxprom16.i.us.4.1 - %945 = load float, float* %arrayidx17.i.us.4.1, align 4, !tbaa !12 - %sub18.i.us.4.1 = fsub float %944, %945 - %conv19.i.us.4.1 = fpext float %sub18.i.us.4.1 to double - %946 = tail call double @llvm.fmuladd.f64(double %conv19.i.us.4.1, double -5.000000e-01, double %conv9.i.us.4.1) #5 - %conv21.i.us.4.1 = fptrunc double %946 to float - store float %conv21.i.us.4.1, float* %arrayidx.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.us.4.1 - -if.end.i.us.4.1: ; preds = %if.then.i.us.4.1, %if.end.i.us.4 - %947 = add nuw nsw i64 %_local_id_x.0.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %947, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.4.loopexit, label %pregion_for_entry.entry.i.us.4, !llvm.loop !88 - -if.then.i.us.3.1: ; preds = %if.end.i.us.3 - %add.i.us.3.1 = add i32 %mul.i.3, %conv.i.us.3.1 - %idxprom.i.us.3.1 = sext i32 %add.i.us.3.1 to i64 - %arrayidx.i.us.3.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.3.1 - %948 = load float, float* %arrayidx.i.us.3.1, align 4, !tbaa !12 - %conv9.i.us.3.1 = fpext float %948 to double - %arrayidx13.i.us.3.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.3.1 - %949 = load float, float* %arrayidx13.i.us.3.1, align 4, !tbaa !12 - %add15.i.us.3.1 = add i32 %add.i.us.3.1, -1 - %idxprom16.i.us.3.1 = sext i32 %add15.i.us.3.1 to i64 - %arrayidx17.i.us.3.1 = getelementptr inbounds float, float* %2, i64 %idxprom16.i.us.3.1 - %950 = load float, float* %arrayidx17.i.us.3.1, align 4, !tbaa !12 - %sub18.i.us.3.1 = fsub float %949, %950 - %conv19.i.us.3.1 = fpext float %sub18.i.us.3.1 to double - %951 = tail call double @llvm.fmuladd.f64(double %conv19.i.us.3.1, double -5.000000e-01, double %conv9.i.us.3.1) #5 - %conv21.i.us.3.1 = fptrunc double %951 to float - store float %conv21.i.us.3.1, float* %arrayidx.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.us.3.1 - -if.end.i.us.3.1: ; preds = %if.then.i.us.3.1, %if.end.i.us.3 - %952 = add nuw nsw i64 %_local_id_x.0.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %952, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.3.loopexit, label %pregion_for_entry.entry.i.us.3, !llvm.loop !89 - -if.then.i.us.2.1: ; preds = %if.end.i.us.2 - %add.i.us.2.1 = add i32 %mul.i.2, %conv.i.us.2.1 - %idxprom.i.us.2.1 = sext i32 %add.i.us.2.1 to i64 - %arrayidx.i.us.2.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.2.1 - %953 = load float, float* %arrayidx.i.us.2.1, align 4, !tbaa !12 - %conv9.i.us.2.1 = fpext float %953 to double - %arrayidx13.i.us.2.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.2.1 - %954 = load float, float* %arrayidx13.i.us.2.1, align 4, !tbaa !12 - %add15.i.us.2.1 = add nsw i32 %add.i.us.2.1, -1 - %idxprom16.i.us.2.1 = sext i32 %add15.i.us.2.1 to i64 - %arrayidx17.i.us.2.1 = getelementptr inbounds float, float* %2, i64 %idxprom16.i.us.2.1 - %955 = load float, float* %arrayidx17.i.us.2.1, align 4, !tbaa !12 - %sub18.i.us.2.1 = fsub float %954, %955 - %conv19.i.us.2.1 = fpext float %sub18.i.us.2.1 to double - %956 = tail call double @llvm.fmuladd.f64(double %conv19.i.us.2.1, double -5.000000e-01, double %conv9.i.us.2.1) #5 - %conv21.i.us.2.1 = fptrunc double %956 to float - store float %conv21.i.us.2.1, float* %arrayidx.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.us.2.1 - -if.end.i.us.2.1: ; preds = %if.then.i.us.2.1, %if.end.i.us.2 - %957 = add nuw nsw i64 %_local_id_x.0.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %957, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.2.loopexit, label %pregion_for_entry.entry.i.us.2, !llvm.loop !90 - -if.then.i.us.1.1: ; preds = %if.end.i.us.1 - %add.i.us.1.1 = add i32 %mul.i.1, %conv.i.us.1.1 - %idxprom.i.us.1.1 = sext i32 %add.i.us.1.1 to i64 - %arrayidx.i.us.1.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.1.1 - %958 = load float, float* %arrayidx.i.us.1.1, align 4, !tbaa !12 - %conv9.i.us.1.1 = fpext float %958 to double - %arrayidx13.i.us.1.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.1.1 - %959 = load float, float* %arrayidx13.i.us.1.1, align 4, !tbaa !12 - %add15.i.us.1.1 = add i32 %add.i.us.1.1, -1 - %idxprom16.i.us.1.1 = sext i32 %add15.i.us.1.1 to i64 - %arrayidx17.i.us.1.1 = getelementptr inbounds float, float* %2, i64 %idxprom16.i.us.1.1 - %960 = load float, float* %arrayidx17.i.us.1.1, align 4, !tbaa !12 - %sub18.i.us.1.1 = fsub float %959, %960 - %conv19.i.us.1.1 = fpext float %sub18.i.us.1.1 to double - %961 = tail call double @llvm.fmuladd.f64(double %conv19.i.us.1.1, double -5.000000e-01, double %conv9.i.us.1.1) #5 - %conv21.i.us.1.1 = fptrunc double %961 to float - store float %conv21.i.us.1.1, float* %arrayidx.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.us.1.1 - -if.end.i.us.1.1: ; preds = %if.then.i.us.1.1, %if.end.i.us.1 - %962 = add nuw nsw i64 %_local_id_x.0.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %962, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.1.loopexit, label %pregion_for_entry.entry.i.us.1, !llvm.loop !91 - -if.then.i.us.1379: ; preds = %if.end.i.us - %add.i.us.1368 = add i32 %mul.i, %conv.i.us.1363 - %idxprom.i.us.1369 = sext i32 %add.i.us.1368 to i64 - %arrayidx.i.us.1370 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.1369 - %963 = load float, float* %arrayidx.i.us.1370, align 4, !tbaa !12 - %conv9.i.us.1371 = fpext float %963 to double - %arrayidx13.i.us.1372 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.1369 - %964 = load float, float* %arrayidx13.i.us.1372, align 4, !tbaa !12 - %add15.i.us.1373 = add nsw i32 %add.i.us.1368, -1 - %idxprom16.i.us.1374 = sext i32 %add15.i.us.1373 to i64 - %arrayidx17.i.us.1375 = getelementptr inbounds float, float* %2, i64 %idxprom16.i.us.1374 - %965 = load float, float* %arrayidx17.i.us.1375, align 4, !tbaa !12 - %sub18.i.us.1376 = fsub float %964, %965 - %conv19.i.us.1377 = fpext float %sub18.i.us.1376 to double - %966 = tail call double @llvm.fmuladd.f64(double %conv19.i.us.1377, double -5.000000e-01, double %conv9.i.us.1371) #5 - %conv21.i.us.1378 = fptrunc double %966 to float - store float %conv21.i.us.1378, float* %arrayidx.i.us.1370, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.us.1380 - -if.end.i.us.1380: ; preds = %if.then.i.us.1379, %if.end.i.us - %967 = add nuw nsw i64 %_local_id_x.0.us, 2 - %exitcond.not.1 = icmp eq i64 %967, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !92 -} - -; Function Attrs: nofree nounwind -define void @_pocl_kernel_fdtd_kernel2_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { -pregion_for_entry.pregion_for_init.i.i: - %5 = bitcast i8** %0 to float*** - %6 = load float**, float*** %5, align 8 - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 2 - %9 = bitcast i8** %8 to float*** - %10 = load float**, float*** %9, align 8 - %11 = load float*, float** %10, align 8 - %12 = getelementptr i8*, i8** %0, i64 3 - %13 = bitcast i8** %12 to i32** - %14 = load i32*, i32** %13, align 8 - %15 = load i32, i32* %14, align 4 - %16 = getelementptr i8*, i8** %0, i64 4 - %17 = bitcast i8** %16 to i32** - %18 = load i32*, i32** %17, align 8 - %19 = load i32, i32* %18, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %conv2.i.i = trunc i64 %mul3.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %15, %conv2.i.i - %mul.i.i = mul nsw i32 %19, %conv2.i.i - br i1 %cmp.i.i, label %vector.scevcheck, label %pregion_for_end.i.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.i - %20 = trunc i64 %3 to i32 - %21 = mul i32 %19, %20 - %22 = shl i32 %21, 3 - %23 = trunc i64 %2 to i32 - %24 = shl i32 %23, 5 - %25 = add i32 %22, %24 - %26 = icmp sgt i32 %25, 2147483616 - %27 = add i32 %22, %24 - %28 = add i32 %27, -1 - %29 = add i32 %27, 30 - %30 = icmp slt i32 %29, %28 - %31 = or i1 %26, %30 - br i1 %31, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i.us - -vector.memcheck: ; preds = %vector.scevcheck - %32 = trunc i64 %3 to i32 - %33 = mul i32 %19, %32 - %34 = shl i32 %33, 3 - %35 = trunc i64 %2 to i32 - %36 = shl i32 %35, 5 - %37 = add i32 %34, %36 - %38 = sext i32 %37 to i64 - %scevgep = getelementptr float, float* %7, i64 %38 - %39 = add nsw i64 %38, 32 - %scevgep7 = getelementptr float, float* %7, i64 %39 - %40 = add i32 %34, %36 - %41 = add i32 %40, -8 - %42 = sext i32 %41 to i64 - %43 = or i64 %42, 7 - %scevgep9 = getelementptr float, float* %11, i64 %43 - %44 = add nsw i64 %42, 39 - %scevgep11 = getelementptr float, float* %11, i64 %44 - %scevgep13 = getelementptr float, float* %11, i64 %38 - %scevgep15 = getelementptr float, float* %11, i64 %39 - %bound0 = icmp ult float* %scevgep, %scevgep11 - %bound1 = icmp ult float* %scevgep9, %scevgep7 - %found.conflict = and i1 %bound0, %bound1 - %bound017 = icmp ult float* %scevgep, %scevgep15 - %bound118 = icmp ult float* %scevgep13, %scevgep7 - %found.conflict19 = and i1 %bound017, %bound118 - %conflict.rdx = or i1 %found.conflict, %found.conflict19 - br i1 %conflict.rdx, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert20 = insertelement <8 x i32> undef, i32 %19, i32 0 - %broadcast.splat21 = shufflevector <8 x i32> %broadcast.splatinsert20, <8 x i32> undef, <8 x i32> zeroinitializer - %45 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %46 = or <8 x i32> %45, - %47 = icmp sgt <8 x i32> %broadcast.splat21, %46 - %48 = icmp sgt <8 x i32> %46, zeroinitializer - %49 = and <8 x i1> %47, %48 - %50 = extractelement <8 x i32> %46, i32 0 - %51 = add i32 %mul.i.i, %50 - %52 = sext i32 %51 to i64 - %53 = getelementptr inbounds float, float* %7, i64 %52 - %54 = bitcast float* %53 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %54, i32 4, <8 x i1> %49, <8 x float> undef), !tbaa !12, !alias.scope !93, !noalias !96 - %55 = fpext <8 x float> %wide.masked.load to <8 x double> - %56 = getelementptr inbounds float, float* %11, i64 %52 - %57 = bitcast float* %56 to <8 x float>* - %wide.masked.load22 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %57, i32 4, <8 x i1> %49, <8 x float> undef), !tbaa !12, !alias.scope !99 - %58 = add i32 %51, -1 - %59 = sext i32 %58 to i64 - %60 = getelementptr inbounds float, float* %11, i64 %59 - %61 = bitcast float* %60 to <8 x float>* - %wide.masked.load23 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %61, i32 4, <8 x i1> %49, <8 x float> undef), !tbaa !12, !alias.scope !100 - %62 = fsub <8 x float> %wide.masked.load22, %wide.masked.load23 - %63 = fpext <8 x float> %62 to <8 x double> - %64 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %63, <8 x double> , <8 x double> %55) - %65 = fptrunc <8 x double> %64 to <8 x float> - %66 = bitcast float* %53 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %65, <8 x float>* %66, i32 4, <8 x i1> %49), !tbaa !12, !alias.scope !93, !noalias !96, !llvm.access.group !24 - %67 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %68 = or <8 x i32> %67, - %69 = icmp sgt <8 x i32> %broadcast.splat21, %68 - %70 = icmp sgt <8 x i32> %68, zeroinitializer - %71 = and <8 x i1> %69, %70 - %72 = extractelement <8 x i32> %68, i32 0 - %73 = add i32 %mul.i.i, %72 - %74 = sext i32 %73 to i64 - %75 = getelementptr inbounds float, float* %7, i64 %74 - %76 = bitcast float* %75 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %76, i32 4, <8 x i1> %71, <8 x float> undef), !tbaa !12, !alias.scope !93, !noalias !96 - %77 = fpext <8 x float> %wide.masked.load.1 to <8 x double> - %78 = getelementptr inbounds float, float* %11, i64 %74 - %79 = bitcast float* %78 to <8 x float>* - %wide.masked.load22.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %79, i32 4, <8 x i1> %71, <8 x float> undef), !tbaa !12, !alias.scope !99 - %80 = add i32 %73, -1 - %81 = sext i32 %80 to i64 - %82 = getelementptr inbounds float, float* %11, i64 %81 - %83 = bitcast float* %82 to <8 x float>* - %wide.masked.load23.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %83, i32 4, <8 x i1> %71, <8 x float> undef), !tbaa !12, !alias.scope !100 - %84 = fsub <8 x float> %wide.masked.load22.1, %wide.masked.load23.1 - %85 = fpext <8 x float> %84 to <8 x double> - %86 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %85, <8 x double> , <8 x double> %77) - %87 = fptrunc <8 x double> %86 to <8 x float> - %88 = bitcast float* %75 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %87, <8 x float>* %88, i32 4, <8 x i1> %71), !tbaa !12, !alias.scope !93, !noalias !96, !llvm.access.group !24 - %89 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %90 = or <8 x i32> %89, - %91 = icmp sgt <8 x i32> %broadcast.splat21, %90 - %92 = icmp sgt <8 x i32> %90, zeroinitializer - %93 = and <8 x i1> %91, %92 - %94 = extractelement <8 x i32> %90, i32 0 - %95 = add i32 %mul.i.i, %94 - %96 = sext i32 %95 to i64 - %97 = getelementptr inbounds float, float* %7, i64 %96 - %98 = bitcast float* %97 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %98, i32 4, <8 x i1> %93, <8 x float> undef), !tbaa !12, !alias.scope !93, !noalias !96 - %99 = fpext <8 x float> %wide.masked.load.2 to <8 x double> - %100 = getelementptr inbounds float, float* %11, i64 %96 - %101 = bitcast float* %100 to <8 x float>* - %wide.masked.load22.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %101, i32 4, <8 x i1> %93, <8 x float> undef), !tbaa !12, !alias.scope !99 - %102 = add i32 %95, -1 - %103 = sext i32 %102 to i64 - %104 = getelementptr inbounds float, float* %11, i64 %103 - %105 = bitcast float* %104 to <8 x float>* - %wide.masked.load23.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %105, i32 4, <8 x i1> %93, <8 x float> undef), !tbaa !12, !alias.scope !100 - %106 = fsub <8 x float> %wide.masked.load22.2, %wide.masked.load23.2 - %107 = fpext <8 x float> %106 to <8 x double> - %108 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %107, <8 x double> , <8 x double> %99) - %109 = fptrunc <8 x double> %108 to <8 x float> - %110 = bitcast float* %97 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %109, <8 x float>* %110, i32 4, <8 x i1> %93), !tbaa !12, !alias.scope !93, !noalias !96, !llvm.access.group !24 - %111 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %112 = or <8 x i32> %111, - %113 = icmp sgt <8 x i32> %broadcast.splat21, %112 - %114 = icmp sgt <8 x i32> %112, zeroinitializer - %115 = and <8 x i1> %113, %114 - %116 = extractelement <8 x i32> %112, i32 0 - %117 = add i32 %mul.i.i, %116 - %118 = sext i32 %117 to i64 - %119 = getelementptr inbounds float, float* %7, i64 %118 - %120 = bitcast float* %119 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %120, i32 4, <8 x i1> %115, <8 x float> undef), !tbaa !12, !alias.scope !93, !noalias !96 - %121 = fpext <8 x float> %wide.masked.load.3 to <8 x double> - %122 = getelementptr inbounds float, float* %11, i64 %118 - %123 = bitcast float* %122 to <8 x float>* - %wide.masked.load22.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %123, i32 4, <8 x i1> %115, <8 x float> undef), !tbaa !12, !alias.scope !99 - %124 = add i32 %117, -1 - %125 = sext i32 %124 to i64 - %126 = getelementptr inbounds float, float* %11, i64 %125 - %127 = bitcast float* %126 to <8 x float>* - %wide.masked.load23.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %127, i32 4, <8 x i1> %115, <8 x float> undef), !tbaa !12, !alias.scope !100 - %128 = fsub <8 x float> %wide.masked.load22.3, %wide.masked.load23.3 - %129 = fpext <8 x float> %128 to <8 x double> - %130 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %129, <8 x double> , <8 x double> %121) - %131 = fptrunc <8 x double> %130 to <8 x float> - %132 = bitcast float* %119 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %131, <8 x float>* %132, i32 4, <8 x i1> %115), !tbaa !12, !alias.scope !93, !noalias !96, !llvm.access.group !24 - br label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.i.i.us.1380, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.preheader ], [ %978, %if.end.i.i.us.1380 ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp4.i.i.us = icmp sgt i32 %19, %conv.i.i.us - %cmp7.i.i.us = icmp sgt i32 %conv.i.i.us, 0 - %or.cond.i.i.us = and i1 %cmp4.i.i.us, %cmp7.i.i.us - br i1 %or.cond.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %add.i.i.us = add i32 %mul.i.i, %conv.i.i.us - %idxprom.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us - %133 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %conv9.i.i.us = fpext float %133 to double - %arrayidx13.i.i.us = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us - %134 = load float, float* %arrayidx13.i.i.us, align 4, !tbaa !12 - %add15.i.i.us = add i32 %add.i.i.us, -1 - %idxprom16.i.i.us = sext i32 %add15.i.i.us to i64 - %arrayidx17.i.i.us = getelementptr inbounds float, float* %11, i64 %idxprom16.i.i.us - %135 = load float, float* %arrayidx17.i.i.us, align 4, !tbaa !12 - %sub18.i.i.us = fsub float %134, %135 - %conv19.i.i.us = fpext float %sub18.i.i.us to double - %136 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us, double -5.000000e-01, double %conv9.i.i.us) #5 - %conv21.i.i.us = fptrunc double %136 to float - store float %conv21.i.i.us, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us - %137 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1362 = add nuw nsw i64 %137, %mul.i.i.i - %conv.i.i.us.1363 = trunc i64 %add1.i.i.i.us.1362 to i32 - %cmp4.i.i.us.1364 = icmp sgt i32 %19, %conv.i.i.us.1363 - %cmp7.i.i.us.1365 = icmp sgt i32 %conv.i.i.us.1363, 0 - %or.cond.i.i.us.1366 = and i1 %cmp4.i.i.us.1364, %cmp7.i.i.us.1365 - br i1 %or.cond.i.i.us.1366, label %if.then.i.i.us.1379, label %if.end.i.i.us.1380 - -pregion_for_end.i.i.loopexit: ; preds = %if.end.i.i.us.1380 - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i.i - %138 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.1 = or i32 %138, 1 - %cmp.i.i.1 = icmp sgt i32 %15, %conv2.i.i.1 - %mul.i.i.1 = mul nsw i32 %19, %conv2.i.i.1 - br i1 %cmp.i.i.1, label %vector.scevcheck34, label %pregion_for_end.i.i.1 - -vector.scevcheck34: ; preds = %pregion_for_end.i.i - %139 = mul i32 %19, %conv2.i.i.1 - %140 = trunc i64 %2 to i32 - %141 = shl i32 %140, 5 - %142 = add i32 %139, %141 - %143 = icmp sgt i32 %142, 2147483616 - %144 = add i32 %139, %141 - %145 = add i32 %144, -1 - %146 = add i32 %144, 30 - %147 = icmp slt i32 %146, %145 - %148 = or i1 %143, %147 - br i1 %148, label %pregion_for_entry.entry.i.i.us.1.preheader, label %vector.memcheck56 - -pregion_for_entry.entry.i.i.us.1.preheader: ; preds = %vector.memcheck56, %vector.scevcheck34 - br label %pregion_for_entry.entry.i.i.us.1 - -vector.memcheck56: ; preds = %vector.scevcheck34 - %149 = mul i32 %19, %conv2.i.i.1 - %150 = trunc i64 %2 to i32 - %151 = shl i32 %150, 5 - %152 = add i32 %149, %151 - %153 = sext i32 %152 to i64 - %scevgep36 = getelementptr float, float* %7, i64 %153 - %154 = add nsw i64 %153, 32 - %scevgep38 = getelementptr float, float* %7, i64 %154 - %155 = add i32 %149, %151 - %156 = add i32 %155, -1 - %157 = sext i32 %156 to i64 - %scevgep40 = getelementptr float, float* %11, i64 %157 - %158 = add nsw i64 %157, 32 - %scevgep42 = getelementptr float, float* %11, i64 %158 - %scevgep44 = getelementptr float, float* %11, i64 %153 - %scevgep46 = getelementptr float, float* %11, i64 %154 - %bound048 = icmp ult float* %scevgep36, %scevgep42 - %bound149 = icmp ult float* %scevgep40, %scevgep38 - %found.conflict50 = and i1 %bound048, %bound149 - %bound051 = icmp ult float* %scevgep36, %scevgep46 - %bound152 = icmp ult float* %scevgep44, %scevgep38 - %found.conflict53 = and i1 %bound051, %bound152 - %conflict.rdx54 = or i1 %found.conflict50, %found.conflict53 - br i1 %conflict.rdx54, label %pregion_for_entry.entry.i.i.us.1.preheader, label %vector.ph57 - -vector.ph57: ; preds = %vector.memcheck56 - %broadcast.splatinsert64 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat65 = shufflevector <8 x i64> %broadcast.splatinsert64, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert66 = insertelement <8 x i32> undef, i32 %19, i32 0 - %broadcast.splat67 = shufflevector <8 x i32> %broadcast.splatinsert66, <8 x i32> undef, <8 x i32> zeroinitializer - %159 = trunc <8 x i64> %broadcast.splat65 to <8 x i32> - %160 = or <8 x i32> %159, - %161 = icmp sgt <8 x i32> %broadcast.splat67, %160 - %162 = icmp sgt <8 x i32> %160, zeroinitializer - %163 = and <8 x i1> %161, %162 - %164 = extractelement <8 x i32> %160, i32 0 - %165 = add i32 %mul.i.i.1, %164 - %166 = sext i32 %165 to i64 - %167 = getelementptr inbounds float, float* %7, i64 %166 - %168 = bitcast float* %167 to <8 x float>* - %wide.masked.load68 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %168, i32 4, <8 x i1> %163, <8 x float> undef), !tbaa !12, !alias.scope !101, !noalias !104 - %169 = fpext <8 x float> %wide.masked.load68 to <8 x double> - %170 = getelementptr inbounds float, float* %11, i64 %166 - %171 = bitcast float* %170 to <8 x float>* - %wide.masked.load69 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %171, i32 4, <8 x i1> %163, <8 x float> undef), !tbaa !12, !alias.scope !107 - %172 = add i32 %165, -1 - %173 = sext i32 %172 to i64 - %174 = getelementptr inbounds float, float* %11, i64 %173 - %175 = bitcast float* %174 to <8 x float>* - %wide.masked.load70 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %175, i32 4, <8 x i1> %163, <8 x float> undef), !tbaa !12, !alias.scope !108 - %176 = fsub <8 x float> %wide.masked.load69, %wide.masked.load70 - %177 = fpext <8 x float> %176 to <8 x double> - %178 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %177, <8 x double> , <8 x double> %169) - %179 = fptrunc <8 x double> %178 to <8 x float> - %180 = bitcast float* %167 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %179, <8 x float>* %180, i32 4, <8 x i1> %163), !tbaa !12, !alias.scope !101, !noalias !104, !llvm.access.group !24 - %181 = trunc <8 x i64> %broadcast.splat65 to <8 x i32> - %182 = or <8 x i32> %181, - %183 = icmp sgt <8 x i32> %broadcast.splat67, %182 - %184 = icmp sgt <8 x i32> %182, zeroinitializer - %185 = and <8 x i1> %183, %184 - %186 = extractelement <8 x i32> %182, i32 0 - %187 = add i32 %mul.i.i.1, %186 - %188 = sext i32 %187 to i64 - %189 = getelementptr inbounds float, float* %7, i64 %188 - %190 = bitcast float* %189 to <8 x float>* - %wide.masked.load68.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %190, i32 4, <8 x i1> %185, <8 x float> undef), !tbaa !12, !alias.scope !101, !noalias !104 - %191 = fpext <8 x float> %wide.masked.load68.1 to <8 x double> - %192 = getelementptr inbounds float, float* %11, i64 %188 - %193 = bitcast float* %192 to <8 x float>* - %wide.masked.load69.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %193, i32 4, <8 x i1> %185, <8 x float> undef), !tbaa !12, !alias.scope !107 - %194 = add i32 %187, -1 - %195 = sext i32 %194 to i64 - %196 = getelementptr inbounds float, float* %11, i64 %195 - %197 = bitcast float* %196 to <8 x float>* - %wide.masked.load70.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %197, i32 4, <8 x i1> %185, <8 x float> undef), !tbaa !12, !alias.scope !108 - %198 = fsub <8 x float> %wide.masked.load69.1, %wide.masked.load70.1 - %199 = fpext <8 x float> %198 to <8 x double> - %200 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %199, <8 x double> , <8 x double> %191) - %201 = fptrunc <8 x double> %200 to <8 x float> - %202 = bitcast float* %189 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %201, <8 x float>* %202, i32 4, <8 x i1> %185), !tbaa !12, !alias.scope !101, !noalias !104, !llvm.access.group !24 - %203 = trunc <8 x i64> %broadcast.splat65 to <8 x i32> - %204 = or <8 x i32> %203, - %205 = icmp sgt <8 x i32> %broadcast.splat67, %204 - %206 = icmp sgt <8 x i32> %204, zeroinitializer - %207 = and <8 x i1> %205, %206 - %208 = extractelement <8 x i32> %204, i32 0 - %209 = add i32 %mul.i.i.1, %208 - %210 = sext i32 %209 to i64 - %211 = getelementptr inbounds float, float* %7, i64 %210 - %212 = bitcast float* %211 to <8 x float>* - %wide.masked.load68.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %212, i32 4, <8 x i1> %207, <8 x float> undef), !tbaa !12, !alias.scope !101, !noalias !104 - %213 = fpext <8 x float> %wide.masked.load68.2 to <8 x double> - %214 = getelementptr inbounds float, float* %11, i64 %210 - %215 = bitcast float* %214 to <8 x float>* - %wide.masked.load69.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %215, i32 4, <8 x i1> %207, <8 x float> undef), !tbaa !12, !alias.scope !107 - %216 = add i32 %209, -1 - %217 = sext i32 %216 to i64 - %218 = getelementptr inbounds float, float* %11, i64 %217 - %219 = bitcast float* %218 to <8 x float>* - %wide.masked.load70.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %219, i32 4, <8 x i1> %207, <8 x float> undef), !tbaa !12, !alias.scope !108 - %220 = fsub <8 x float> %wide.masked.load69.2, %wide.masked.load70.2 - %221 = fpext <8 x float> %220 to <8 x double> - %222 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %221, <8 x double> , <8 x double> %213) - %223 = fptrunc <8 x double> %222 to <8 x float> - %224 = bitcast float* %211 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %223, <8 x float>* %224, i32 4, <8 x i1> %207), !tbaa !12, !alias.scope !101, !noalias !104, !llvm.access.group !24 - %225 = trunc <8 x i64> %broadcast.splat65 to <8 x i32> - %226 = or <8 x i32> %225, - %227 = icmp sgt <8 x i32> %broadcast.splat67, %226 - %228 = icmp sgt <8 x i32> %226, zeroinitializer - %229 = and <8 x i1> %227, %228 - %230 = extractelement <8 x i32> %226, i32 0 - %231 = add i32 %mul.i.i.1, %230 - %232 = sext i32 %231 to i64 - %233 = getelementptr inbounds float, float* %7, i64 %232 - %234 = bitcast float* %233 to <8 x float>* - %wide.masked.load68.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %234, i32 4, <8 x i1> %229, <8 x float> undef), !tbaa !12, !alias.scope !101, !noalias !104 - %235 = fpext <8 x float> %wide.masked.load68.3 to <8 x double> - %236 = getelementptr inbounds float, float* %11, i64 %232 - %237 = bitcast float* %236 to <8 x float>* - %wide.masked.load69.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %237, i32 4, <8 x i1> %229, <8 x float> undef), !tbaa !12, !alias.scope !107 - %238 = add i32 %231, -1 - %239 = sext i32 %238 to i64 - %240 = getelementptr inbounds float, float* %11, i64 %239 - %241 = bitcast float* %240 to <8 x float>* - %wide.masked.load70.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %241, i32 4, <8 x i1> %229, <8 x float> undef), !tbaa !12, !alias.scope !108 - %242 = fsub <8 x float> %wide.masked.load69.3, %wide.masked.load70.3 - %243 = fpext <8 x float> %242 to <8 x double> - %244 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %243, <8 x double> , <8 x double> %235) - %245 = fptrunc <8 x double> %244 to <8 x float> - %246 = bitcast float* %233 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %245, <8 x float>* %246, i32 4, <8 x i1> %229), !tbaa !12, !alias.scope !101, !noalias !104, !llvm.access.group !24 - br label %pregion_for_end.i.i.1 - -pregion_for_entry.entry.i.i.us.1: ; preds = %if.end.i.i.us.1.1, %pregion_for_entry.entry.i.i.us.1.preheader - %_local_id_x.i.0.us.1 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.1.preheader ], [ %973, %if.end.i.i.us.1.1 ] - %add1.i.i.i.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.1, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp4.i.i.us.1 = icmp sgt i32 %19, %conv.i.i.us.1 - %cmp7.i.i.us.1 = icmp sgt i32 %conv.i.i.us.1, 0 - %or.cond.i.i.us.1 = and i1 %cmp4.i.i.us.1, %cmp7.i.i.us.1 - br i1 %or.cond.i.i.us.1, label %if.then.i.i.us.1, label %if.end.i.i.us.1 - -if.then.i.i.us.1: ; preds = %pregion_for_entry.entry.i.i.us.1 - %add.i.i.us.1 = add i32 %mul.i.i.1, %conv.i.i.us.1 - %idxprom.i.i.us.1 = sext i32 %add.i.i.us.1 to i64 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.1 - %247 = load float, float* %arrayidx.i.i.us.1, align 4, !tbaa !12 - %conv9.i.i.us.1 = fpext float %247 to double - %arrayidx13.i.i.us.1 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.1 - %248 = load float, float* %arrayidx13.i.i.us.1, align 4, !tbaa !12 - %add15.i.i.us.1 = add i32 %add.i.i.us.1, -1 - %idxprom16.i.i.us.1 = sext i32 %add15.i.i.us.1 to i64 - %arrayidx17.i.i.us.1 = getelementptr inbounds float, float* %11, i64 %idxprom16.i.i.us.1 - %249 = load float, float* %arrayidx17.i.i.us.1, align 4, !tbaa !12 - %sub18.i.i.us.1 = fsub float %248, %249 - %conv19.i.i.us.1 = fpext float %sub18.i.i.us.1 to double - %250 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.1, double -5.000000e-01, double %conv9.i.i.us.1) #5 - %conv21.i.i.us.1 = fptrunc double %250 to float - store float %conv21.i.i.us.1, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.1 - -if.end.i.i.us.1: ; preds = %if.then.i.i.us.1, %pregion_for_entry.entry.i.i.us.1 - %251 = or i64 %_local_id_x.i.0.us.1, 1 - %add1.i.i.i.us.1.1 = add nuw nsw i64 %251, %mul.i.i.i - %conv.i.i.us.1.1 = trunc i64 %add1.i.i.i.us.1.1 to i32 - %cmp4.i.i.us.1.1 = icmp sgt i32 %19, %conv.i.i.us.1.1 - %cmp7.i.i.us.1.1 = icmp sgt i32 %conv.i.i.us.1.1, 0 - %or.cond.i.i.us.1.1 = and i1 %cmp4.i.i.us.1.1, %cmp7.i.i.us.1.1 - br i1 %or.cond.i.i.us.1.1, label %if.then.i.i.us.1.1, label %if.end.i.i.us.1.1 - -pregion_for_end.i.i.1.loopexit: ; preds = %if.end.i.i.us.1.1 - br label %pregion_for_end.i.i.1 - -pregion_for_end.i.i.1: ; preds = %pregion_for_end.i.i.1.loopexit, %vector.ph57, %pregion_for_end.i.i - %252 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.2 = or i32 %252, 2 - %cmp.i.i.2 = icmp sgt i32 %15, %conv2.i.i.2 - %mul.i.i.2 = mul nsw i32 %19, %conv2.i.i.2 - br i1 %cmp.i.i.2, label %vector.scevcheck81, label %pregion_for_end.i.i.2 - -vector.scevcheck81: ; preds = %pregion_for_end.i.i.1 - %253 = mul i32 %19, %conv2.i.i.2 - %254 = trunc i64 %2 to i32 - %255 = shl i32 %254, 5 - %256 = add i32 %253, %255 - %257 = icmp sgt i32 %256, 2147483616 - %258 = add i32 %253, %255 - %259 = add i32 %258, -1 - %260 = add i32 %258, 30 - %261 = icmp slt i32 %260, %259 - %262 = or i1 %257, %261 - br i1 %262, label %pregion_for_entry.entry.i.i.us.2.preheader, label %vector.memcheck103 - -pregion_for_entry.entry.i.i.us.2.preheader: ; preds = %vector.memcheck103, %vector.scevcheck81 - br label %pregion_for_entry.entry.i.i.us.2 - -vector.memcheck103: ; preds = %vector.scevcheck81 - %263 = mul i32 %19, %conv2.i.i.2 - %264 = trunc i64 %2 to i32 - %265 = shl i32 %264, 5 - %266 = add i32 %263, %265 - %267 = sext i32 %266 to i64 - %scevgep83 = getelementptr float, float* %7, i64 %267 - %268 = add nsw i64 %267, 32 - %scevgep85 = getelementptr float, float* %7, i64 %268 - %269 = add i32 %263, %265 - %270 = add i32 %269, -2 - %271 = sext i32 %270 to i64 - %272 = add nuw nsw i64 %271, 1 - %scevgep87 = getelementptr float, float* %11, i64 %272 - %273 = add nsw i64 %271, 33 - %scevgep89 = getelementptr float, float* %11, i64 %273 - %scevgep91 = getelementptr float, float* %11, i64 %267 - %scevgep93 = getelementptr float, float* %11, i64 %268 - %bound095 = icmp ult float* %scevgep83, %scevgep89 - %bound196 = icmp ult float* %scevgep87, %scevgep85 - %found.conflict97 = and i1 %bound095, %bound196 - %bound098 = icmp ult float* %scevgep83, %scevgep93 - %bound199 = icmp ult float* %scevgep91, %scevgep85 - %found.conflict100 = and i1 %bound098, %bound199 - %conflict.rdx101 = or i1 %found.conflict97, %found.conflict100 - br i1 %conflict.rdx101, label %pregion_for_entry.entry.i.i.us.2.preheader, label %vector.ph104 - -vector.ph104: ; preds = %vector.memcheck103 - %broadcast.splatinsert111 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat112 = shufflevector <8 x i64> %broadcast.splatinsert111, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert113 = insertelement <8 x i32> undef, i32 %19, i32 0 - %broadcast.splat114 = shufflevector <8 x i32> %broadcast.splatinsert113, <8 x i32> undef, <8 x i32> zeroinitializer - %274 = trunc <8 x i64> %broadcast.splat112 to <8 x i32> - %275 = or <8 x i32> %274, - %276 = icmp sgt <8 x i32> %broadcast.splat114, %275 - %277 = icmp sgt <8 x i32> %275, zeroinitializer - %278 = and <8 x i1> %276, %277 - %279 = extractelement <8 x i32> %275, i32 0 - %280 = add i32 %mul.i.i.2, %279 - %281 = sext i32 %280 to i64 - %282 = getelementptr inbounds float, float* %7, i64 %281 - %283 = bitcast float* %282 to <8 x float>* - %wide.masked.load115 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %283, i32 4, <8 x i1> %278, <8 x float> undef), !tbaa !12, !alias.scope !109, !noalias !112 - %284 = fpext <8 x float> %wide.masked.load115 to <8 x double> - %285 = getelementptr inbounds float, float* %11, i64 %281 - %286 = bitcast float* %285 to <8 x float>* - %wide.masked.load116 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %286, i32 4, <8 x i1> %278, <8 x float> undef), !tbaa !12, !alias.scope !115 - %287 = add i32 %280, -1 - %288 = sext i32 %287 to i64 - %289 = getelementptr inbounds float, float* %11, i64 %288 - %290 = bitcast float* %289 to <8 x float>* - %wide.masked.load117 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %290, i32 4, <8 x i1> %278, <8 x float> undef), !tbaa !12, !alias.scope !116 - %291 = fsub <8 x float> %wide.masked.load116, %wide.masked.load117 - %292 = fpext <8 x float> %291 to <8 x double> - %293 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %292, <8 x double> , <8 x double> %284) - %294 = fptrunc <8 x double> %293 to <8 x float> - %295 = bitcast float* %282 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %294, <8 x float>* %295, i32 4, <8 x i1> %278), !tbaa !12, !alias.scope !109, !noalias !112, !llvm.access.group !24 - %296 = trunc <8 x i64> %broadcast.splat112 to <8 x i32> - %297 = or <8 x i32> %296, - %298 = icmp sgt <8 x i32> %broadcast.splat114, %297 - %299 = icmp sgt <8 x i32> %297, zeroinitializer - %300 = and <8 x i1> %298, %299 - %301 = extractelement <8 x i32> %297, i32 0 - %302 = add i32 %mul.i.i.2, %301 - %303 = sext i32 %302 to i64 - %304 = getelementptr inbounds float, float* %7, i64 %303 - %305 = bitcast float* %304 to <8 x float>* - %wide.masked.load115.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %305, i32 4, <8 x i1> %300, <8 x float> undef), !tbaa !12, !alias.scope !109, !noalias !112 - %306 = fpext <8 x float> %wide.masked.load115.1 to <8 x double> - %307 = getelementptr inbounds float, float* %11, i64 %303 - %308 = bitcast float* %307 to <8 x float>* - %wide.masked.load116.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %308, i32 4, <8 x i1> %300, <8 x float> undef), !tbaa !12, !alias.scope !115 - %309 = add i32 %302, -1 - %310 = sext i32 %309 to i64 - %311 = getelementptr inbounds float, float* %11, i64 %310 - %312 = bitcast float* %311 to <8 x float>* - %wide.masked.load117.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %312, i32 4, <8 x i1> %300, <8 x float> undef), !tbaa !12, !alias.scope !116 - %313 = fsub <8 x float> %wide.masked.load116.1, %wide.masked.load117.1 - %314 = fpext <8 x float> %313 to <8 x double> - %315 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %314, <8 x double> , <8 x double> %306) - %316 = fptrunc <8 x double> %315 to <8 x float> - %317 = bitcast float* %304 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %316, <8 x float>* %317, i32 4, <8 x i1> %300), !tbaa !12, !alias.scope !109, !noalias !112, !llvm.access.group !24 - %318 = trunc <8 x i64> %broadcast.splat112 to <8 x i32> - %319 = or <8 x i32> %318, - %320 = icmp sgt <8 x i32> %broadcast.splat114, %319 - %321 = icmp sgt <8 x i32> %319, zeroinitializer - %322 = and <8 x i1> %320, %321 - %323 = extractelement <8 x i32> %319, i32 0 - %324 = add i32 %mul.i.i.2, %323 - %325 = sext i32 %324 to i64 - %326 = getelementptr inbounds float, float* %7, i64 %325 - %327 = bitcast float* %326 to <8 x float>* - %wide.masked.load115.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %327, i32 4, <8 x i1> %322, <8 x float> undef), !tbaa !12, !alias.scope !109, !noalias !112 - %328 = fpext <8 x float> %wide.masked.load115.2 to <8 x double> - %329 = getelementptr inbounds float, float* %11, i64 %325 - %330 = bitcast float* %329 to <8 x float>* - %wide.masked.load116.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %330, i32 4, <8 x i1> %322, <8 x float> undef), !tbaa !12, !alias.scope !115 - %331 = add i32 %324, -1 - %332 = sext i32 %331 to i64 - %333 = getelementptr inbounds float, float* %11, i64 %332 - %334 = bitcast float* %333 to <8 x float>* - %wide.masked.load117.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %334, i32 4, <8 x i1> %322, <8 x float> undef), !tbaa !12, !alias.scope !116 - %335 = fsub <8 x float> %wide.masked.load116.2, %wide.masked.load117.2 - %336 = fpext <8 x float> %335 to <8 x double> - %337 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %336, <8 x double> , <8 x double> %328) - %338 = fptrunc <8 x double> %337 to <8 x float> - %339 = bitcast float* %326 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %338, <8 x float>* %339, i32 4, <8 x i1> %322), !tbaa !12, !alias.scope !109, !noalias !112, !llvm.access.group !24 - %340 = trunc <8 x i64> %broadcast.splat112 to <8 x i32> - %341 = or <8 x i32> %340, - %342 = icmp sgt <8 x i32> %broadcast.splat114, %341 - %343 = icmp sgt <8 x i32> %341, zeroinitializer - %344 = and <8 x i1> %342, %343 - %345 = extractelement <8 x i32> %341, i32 0 - %346 = add i32 %mul.i.i.2, %345 - %347 = sext i32 %346 to i64 - %348 = getelementptr inbounds float, float* %7, i64 %347 - %349 = bitcast float* %348 to <8 x float>* - %wide.masked.load115.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %349, i32 4, <8 x i1> %344, <8 x float> undef), !tbaa !12, !alias.scope !109, !noalias !112 - %350 = fpext <8 x float> %wide.masked.load115.3 to <8 x double> - %351 = getelementptr inbounds float, float* %11, i64 %347 - %352 = bitcast float* %351 to <8 x float>* - %wide.masked.load116.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %352, i32 4, <8 x i1> %344, <8 x float> undef), !tbaa !12, !alias.scope !115 - %353 = add i32 %346, -1 - %354 = sext i32 %353 to i64 - %355 = getelementptr inbounds float, float* %11, i64 %354 - %356 = bitcast float* %355 to <8 x float>* - %wide.masked.load117.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %356, i32 4, <8 x i1> %344, <8 x float> undef), !tbaa !12, !alias.scope !116 - %357 = fsub <8 x float> %wide.masked.load116.3, %wide.masked.load117.3 - %358 = fpext <8 x float> %357 to <8 x double> - %359 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %358, <8 x double> , <8 x double> %350) - %360 = fptrunc <8 x double> %359 to <8 x float> - %361 = bitcast float* %348 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %360, <8 x float>* %361, i32 4, <8 x i1> %344), !tbaa !12, !alias.scope !109, !noalias !112, !llvm.access.group !24 - br label %pregion_for_end.i.i.2 - -pregion_for_entry.entry.i.i.us.2: ; preds = %if.end.i.i.us.2.1, %pregion_for_entry.entry.i.i.us.2.preheader - %_local_id_x.i.0.us.2 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.2.preheader ], [ %968, %if.end.i.i.us.2.1 ] - %add1.i.i.i.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.2, %mul.i.i.i - %conv.i.i.us.2 = trunc i64 %add1.i.i.i.us.2 to i32 - %cmp4.i.i.us.2 = icmp sgt i32 %19, %conv.i.i.us.2 - %cmp7.i.i.us.2 = icmp sgt i32 %conv.i.i.us.2, 0 - %or.cond.i.i.us.2 = and i1 %cmp4.i.i.us.2, %cmp7.i.i.us.2 - br i1 %or.cond.i.i.us.2, label %if.then.i.i.us.2, label %if.end.i.i.us.2 - -if.then.i.i.us.2: ; preds = %pregion_for_entry.entry.i.i.us.2 - %add.i.i.us.2 = add i32 %mul.i.i.2, %conv.i.i.us.2 - %idxprom.i.i.us.2 = sext i32 %add.i.i.us.2 to i64 - %arrayidx.i.i.us.2 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.2 - %362 = load float, float* %arrayidx.i.i.us.2, align 4, !tbaa !12 - %conv9.i.i.us.2 = fpext float %362 to double - %arrayidx13.i.i.us.2 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.2 - %363 = load float, float* %arrayidx13.i.i.us.2, align 4, !tbaa !12 - %add15.i.i.us.2 = add i32 %add.i.i.us.2, -1 - %idxprom16.i.i.us.2 = sext i32 %add15.i.i.us.2 to i64 - %arrayidx17.i.i.us.2 = getelementptr inbounds float, float* %11, i64 %idxprom16.i.i.us.2 - %364 = load float, float* %arrayidx17.i.i.us.2, align 4, !tbaa !12 - %sub18.i.i.us.2 = fsub float %363, %364 - %conv19.i.i.us.2 = fpext float %sub18.i.i.us.2 to double - %365 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.2, double -5.000000e-01, double %conv9.i.i.us.2) #5 - %conv21.i.i.us.2 = fptrunc double %365 to float - store float %conv21.i.i.us.2, float* %arrayidx.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.2 - -if.end.i.i.us.2: ; preds = %if.then.i.i.us.2, %pregion_for_entry.entry.i.i.us.2 - %366 = or i64 %_local_id_x.i.0.us.2, 1 - %add1.i.i.i.us.2.1 = add nuw nsw i64 %366, %mul.i.i.i - %conv.i.i.us.2.1 = trunc i64 %add1.i.i.i.us.2.1 to i32 - %cmp4.i.i.us.2.1 = icmp sgt i32 %19, %conv.i.i.us.2.1 - %cmp7.i.i.us.2.1 = icmp sgt i32 %conv.i.i.us.2.1, 0 - %or.cond.i.i.us.2.1 = and i1 %cmp4.i.i.us.2.1, %cmp7.i.i.us.2.1 - br i1 %or.cond.i.i.us.2.1, label %if.then.i.i.us.2.1, label %if.end.i.i.us.2.1 - -pregion_for_end.i.i.2.loopexit: ; preds = %if.end.i.i.us.2.1 - br label %pregion_for_end.i.i.2 - -pregion_for_end.i.i.2: ; preds = %pregion_for_end.i.i.2.loopexit, %vector.ph104, %pregion_for_end.i.i.1 - %367 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.3 = or i32 %367, 3 - %cmp.i.i.3 = icmp sgt i32 %15, %conv2.i.i.3 - %mul.i.i.3 = mul nsw i32 %19, %conv2.i.i.3 - br i1 %cmp.i.i.3, label %vector.scevcheck128, label %pregion_for_end.i.i.3 - -vector.scevcheck128: ; preds = %pregion_for_end.i.i.2 - %368 = mul i32 %19, %conv2.i.i.3 - %369 = trunc i64 %2 to i32 - %370 = shl i32 %369, 5 - %371 = add i32 %368, %370 - %372 = icmp sgt i32 %371, 2147483616 - %373 = add i32 %368, %370 - %374 = add i32 %373, -1 - %375 = add i32 %373, 30 - %376 = icmp slt i32 %375, %374 - %377 = or i1 %372, %376 - br i1 %377, label %pregion_for_entry.entry.i.i.us.3.preheader, label %vector.memcheck150 - -pregion_for_entry.entry.i.i.us.3.preheader: ; preds = %vector.memcheck150, %vector.scevcheck128 - br label %pregion_for_entry.entry.i.i.us.3 - -vector.memcheck150: ; preds = %vector.scevcheck128 - %378 = mul i32 %19, %conv2.i.i.3 - %379 = trunc i64 %2 to i32 - %380 = shl i32 %379, 5 - %381 = add i32 %378, %380 - %382 = sext i32 %381 to i64 - %scevgep130 = getelementptr float, float* %7, i64 %382 - %383 = add nsw i64 %382, 32 - %scevgep132 = getelementptr float, float* %7, i64 %383 - %384 = add i32 %378, %380 - %385 = add i32 %384, -1 - %386 = sext i32 %385 to i64 - %scevgep134 = getelementptr float, float* %11, i64 %386 - %387 = add nsw i64 %386, 32 - %scevgep136 = getelementptr float, float* %11, i64 %387 - %scevgep138 = getelementptr float, float* %11, i64 %382 - %scevgep140 = getelementptr float, float* %11, i64 %383 - %bound0142 = icmp ult float* %scevgep130, %scevgep136 - %bound1143 = icmp ult float* %scevgep134, %scevgep132 - %found.conflict144 = and i1 %bound0142, %bound1143 - %bound0145 = icmp ult float* %scevgep130, %scevgep140 - %bound1146 = icmp ult float* %scevgep138, %scevgep132 - %found.conflict147 = and i1 %bound0145, %bound1146 - %conflict.rdx148 = or i1 %found.conflict144, %found.conflict147 - br i1 %conflict.rdx148, label %pregion_for_entry.entry.i.i.us.3.preheader, label %vector.ph151 - -vector.ph151: ; preds = %vector.memcheck150 - %broadcast.splatinsert158 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat159 = shufflevector <8 x i64> %broadcast.splatinsert158, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert160 = insertelement <8 x i32> undef, i32 %19, i32 0 - %broadcast.splat161 = shufflevector <8 x i32> %broadcast.splatinsert160, <8 x i32> undef, <8 x i32> zeroinitializer - %388 = trunc <8 x i64> %broadcast.splat159 to <8 x i32> - %389 = or <8 x i32> %388, - %390 = icmp sgt <8 x i32> %broadcast.splat161, %389 - %391 = icmp sgt <8 x i32> %389, zeroinitializer - %392 = and <8 x i1> %390, %391 - %393 = extractelement <8 x i32> %389, i32 0 - %394 = add i32 %mul.i.i.3, %393 - %395 = sext i32 %394 to i64 - %396 = getelementptr inbounds float, float* %7, i64 %395 - %397 = bitcast float* %396 to <8 x float>* - %wide.masked.load162 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %397, i32 4, <8 x i1> %392, <8 x float> undef), !tbaa !12, !alias.scope !117, !noalias !120 - %398 = fpext <8 x float> %wide.masked.load162 to <8 x double> - %399 = getelementptr inbounds float, float* %11, i64 %395 - %400 = bitcast float* %399 to <8 x float>* - %wide.masked.load163 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %400, i32 4, <8 x i1> %392, <8 x float> undef), !tbaa !12, !alias.scope !123 - %401 = add i32 %394, -1 - %402 = sext i32 %401 to i64 - %403 = getelementptr inbounds float, float* %11, i64 %402 - %404 = bitcast float* %403 to <8 x float>* - %wide.masked.load164 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %404, i32 4, <8 x i1> %392, <8 x float> undef), !tbaa !12, !alias.scope !124 - %405 = fsub <8 x float> %wide.masked.load163, %wide.masked.load164 - %406 = fpext <8 x float> %405 to <8 x double> - %407 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %406, <8 x double> , <8 x double> %398) - %408 = fptrunc <8 x double> %407 to <8 x float> - %409 = bitcast float* %396 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %408, <8 x float>* %409, i32 4, <8 x i1> %392), !tbaa !12, !alias.scope !117, !noalias !120, !llvm.access.group !24 - %410 = trunc <8 x i64> %broadcast.splat159 to <8 x i32> - %411 = or <8 x i32> %410, - %412 = icmp sgt <8 x i32> %broadcast.splat161, %411 - %413 = icmp sgt <8 x i32> %411, zeroinitializer - %414 = and <8 x i1> %412, %413 - %415 = extractelement <8 x i32> %411, i32 0 - %416 = add i32 %mul.i.i.3, %415 - %417 = sext i32 %416 to i64 - %418 = getelementptr inbounds float, float* %7, i64 %417 - %419 = bitcast float* %418 to <8 x float>* - %wide.masked.load162.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %419, i32 4, <8 x i1> %414, <8 x float> undef), !tbaa !12, !alias.scope !117, !noalias !120 - %420 = fpext <8 x float> %wide.masked.load162.1 to <8 x double> - %421 = getelementptr inbounds float, float* %11, i64 %417 - %422 = bitcast float* %421 to <8 x float>* - %wide.masked.load163.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %422, i32 4, <8 x i1> %414, <8 x float> undef), !tbaa !12, !alias.scope !123 - %423 = add i32 %416, -1 - %424 = sext i32 %423 to i64 - %425 = getelementptr inbounds float, float* %11, i64 %424 - %426 = bitcast float* %425 to <8 x float>* - %wide.masked.load164.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %426, i32 4, <8 x i1> %414, <8 x float> undef), !tbaa !12, !alias.scope !124 - %427 = fsub <8 x float> %wide.masked.load163.1, %wide.masked.load164.1 - %428 = fpext <8 x float> %427 to <8 x double> - %429 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %428, <8 x double> , <8 x double> %420) - %430 = fptrunc <8 x double> %429 to <8 x float> - %431 = bitcast float* %418 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %430, <8 x float>* %431, i32 4, <8 x i1> %414), !tbaa !12, !alias.scope !117, !noalias !120, !llvm.access.group !24 - %432 = trunc <8 x i64> %broadcast.splat159 to <8 x i32> - %433 = or <8 x i32> %432, - %434 = icmp sgt <8 x i32> %broadcast.splat161, %433 - %435 = icmp sgt <8 x i32> %433, zeroinitializer - %436 = and <8 x i1> %434, %435 - %437 = extractelement <8 x i32> %433, i32 0 - %438 = add i32 %mul.i.i.3, %437 - %439 = sext i32 %438 to i64 - %440 = getelementptr inbounds float, float* %7, i64 %439 - %441 = bitcast float* %440 to <8 x float>* - %wide.masked.load162.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %441, i32 4, <8 x i1> %436, <8 x float> undef), !tbaa !12, !alias.scope !117, !noalias !120 - %442 = fpext <8 x float> %wide.masked.load162.2 to <8 x double> - %443 = getelementptr inbounds float, float* %11, i64 %439 - %444 = bitcast float* %443 to <8 x float>* - %wide.masked.load163.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %444, i32 4, <8 x i1> %436, <8 x float> undef), !tbaa !12, !alias.scope !123 - %445 = add i32 %438, -1 - %446 = sext i32 %445 to i64 - %447 = getelementptr inbounds float, float* %11, i64 %446 - %448 = bitcast float* %447 to <8 x float>* - %wide.masked.load164.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %448, i32 4, <8 x i1> %436, <8 x float> undef), !tbaa !12, !alias.scope !124 - %449 = fsub <8 x float> %wide.masked.load163.2, %wide.masked.load164.2 - %450 = fpext <8 x float> %449 to <8 x double> - %451 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %450, <8 x double> , <8 x double> %442) - %452 = fptrunc <8 x double> %451 to <8 x float> - %453 = bitcast float* %440 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %452, <8 x float>* %453, i32 4, <8 x i1> %436), !tbaa !12, !alias.scope !117, !noalias !120, !llvm.access.group !24 - %454 = trunc <8 x i64> %broadcast.splat159 to <8 x i32> - %455 = or <8 x i32> %454, - %456 = icmp sgt <8 x i32> %broadcast.splat161, %455 - %457 = icmp sgt <8 x i32> %455, zeroinitializer - %458 = and <8 x i1> %456, %457 - %459 = extractelement <8 x i32> %455, i32 0 - %460 = add i32 %mul.i.i.3, %459 - %461 = sext i32 %460 to i64 - %462 = getelementptr inbounds float, float* %7, i64 %461 - %463 = bitcast float* %462 to <8 x float>* - %wide.masked.load162.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %463, i32 4, <8 x i1> %458, <8 x float> undef), !tbaa !12, !alias.scope !117, !noalias !120 - %464 = fpext <8 x float> %wide.masked.load162.3 to <8 x double> - %465 = getelementptr inbounds float, float* %11, i64 %461 - %466 = bitcast float* %465 to <8 x float>* - %wide.masked.load163.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %466, i32 4, <8 x i1> %458, <8 x float> undef), !tbaa !12, !alias.scope !123 - %467 = add i32 %460, -1 - %468 = sext i32 %467 to i64 - %469 = getelementptr inbounds float, float* %11, i64 %468 - %470 = bitcast float* %469 to <8 x float>* - %wide.masked.load164.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %470, i32 4, <8 x i1> %458, <8 x float> undef), !tbaa !12, !alias.scope !124 - %471 = fsub <8 x float> %wide.masked.load163.3, %wide.masked.load164.3 - %472 = fpext <8 x float> %471 to <8 x double> - %473 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %472, <8 x double> , <8 x double> %464) - %474 = fptrunc <8 x double> %473 to <8 x float> - %475 = bitcast float* %462 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %474, <8 x float>* %475, i32 4, <8 x i1> %458), !tbaa !12, !alias.scope !117, !noalias !120, !llvm.access.group !24 - br label %pregion_for_end.i.i.3 - -pregion_for_entry.entry.i.i.us.3: ; preds = %if.end.i.i.us.3.1, %pregion_for_entry.entry.i.i.us.3.preheader - %_local_id_x.i.0.us.3 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.3.preheader ], [ %963, %if.end.i.i.us.3.1 ] - %add1.i.i.i.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.3, %mul.i.i.i - %conv.i.i.us.3 = trunc i64 %add1.i.i.i.us.3 to i32 - %cmp4.i.i.us.3 = icmp sgt i32 %19, %conv.i.i.us.3 - %cmp7.i.i.us.3 = icmp sgt i32 %conv.i.i.us.3, 0 - %or.cond.i.i.us.3 = and i1 %cmp4.i.i.us.3, %cmp7.i.i.us.3 - br i1 %or.cond.i.i.us.3, label %if.then.i.i.us.3, label %if.end.i.i.us.3 - -if.then.i.i.us.3: ; preds = %pregion_for_entry.entry.i.i.us.3 - %add.i.i.us.3 = add i32 %mul.i.i.3, %conv.i.i.us.3 - %idxprom.i.i.us.3 = sext i32 %add.i.i.us.3 to i64 - %arrayidx.i.i.us.3 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.3 - %476 = load float, float* %arrayidx.i.i.us.3, align 4, !tbaa !12 - %conv9.i.i.us.3 = fpext float %476 to double - %arrayidx13.i.i.us.3 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.3 - %477 = load float, float* %arrayidx13.i.i.us.3, align 4, !tbaa !12 - %add15.i.i.us.3 = add i32 %add.i.i.us.3, -1 - %idxprom16.i.i.us.3 = sext i32 %add15.i.i.us.3 to i64 - %arrayidx17.i.i.us.3 = getelementptr inbounds float, float* %11, i64 %idxprom16.i.i.us.3 - %478 = load float, float* %arrayidx17.i.i.us.3, align 4, !tbaa !12 - %sub18.i.i.us.3 = fsub float %477, %478 - %conv19.i.i.us.3 = fpext float %sub18.i.i.us.3 to double - %479 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.3, double -5.000000e-01, double %conv9.i.i.us.3) #5 - %conv21.i.i.us.3 = fptrunc double %479 to float - store float %conv21.i.i.us.3, float* %arrayidx.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.3 - -if.end.i.i.us.3: ; preds = %if.then.i.i.us.3, %pregion_for_entry.entry.i.i.us.3 - %480 = or i64 %_local_id_x.i.0.us.3, 1 - %add1.i.i.i.us.3.1 = add nuw nsw i64 %480, %mul.i.i.i - %conv.i.i.us.3.1 = trunc i64 %add1.i.i.i.us.3.1 to i32 - %cmp4.i.i.us.3.1 = icmp sgt i32 %19, %conv.i.i.us.3.1 - %cmp7.i.i.us.3.1 = icmp sgt i32 %conv.i.i.us.3.1, 0 - %or.cond.i.i.us.3.1 = and i1 %cmp4.i.i.us.3.1, %cmp7.i.i.us.3.1 - br i1 %or.cond.i.i.us.3.1, label %if.then.i.i.us.3.1, label %if.end.i.i.us.3.1 - -pregion_for_end.i.i.3.loopexit: ; preds = %if.end.i.i.us.3.1 - br label %pregion_for_end.i.i.3 - -pregion_for_end.i.i.3: ; preds = %pregion_for_end.i.i.3.loopexit, %vector.ph151, %pregion_for_end.i.i.2 - %481 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.4 = or i32 %481, 4 - %cmp.i.i.4 = icmp sgt i32 %15, %conv2.i.i.4 - %mul.i.i.4 = mul nsw i32 %19, %conv2.i.i.4 - br i1 %cmp.i.i.4, label %vector.scevcheck175, label %pregion_for_end.i.i.4 - -vector.scevcheck175: ; preds = %pregion_for_end.i.i.3 - %482 = mul i32 %19, %conv2.i.i.4 - %483 = trunc i64 %2 to i32 - %484 = shl i32 %483, 5 - %485 = add i32 %482, %484 - %486 = icmp sgt i32 %485, 2147483616 - %487 = add i32 %482, %484 - %488 = add i32 %487, -1 - %489 = add i32 %487, 30 - %490 = icmp slt i32 %489, %488 - %491 = or i1 %486, %490 - br i1 %491, label %pregion_for_entry.entry.i.i.us.4.preheader, label %vector.memcheck197 - -pregion_for_entry.entry.i.i.us.4.preheader: ; preds = %vector.memcheck197, %vector.scevcheck175 - br label %pregion_for_entry.entry.i.i.us.4 - -vector.memcheck197: ; preds = %vector.scevcheck175 - %492 = mul i32 %19, %conv2.i.i.4 - %493 = trunc i64 %2 to i32 - %494 = shl i32 %493, 5 - %495 = add i32 %492, %494 - %496 = sext i32 %495 to i64 - %scevgep177 = getelementptr float, float* %7, i64 %496 - %497 = add nsw i64 %496, 32 - %scevgep179 = getelementptr float, float* %7, i64 %497 - %498 = add i32 %492, %494 - %499 = add i32 %498, -4 - %500 = sext i32 %499 to i64 - %501 = add nuw nsw i64 %500, 3 - %scevgep181 = getelementptr float, float* %11, i64 %501 - %502 = add nsw i64 %500, 35 - %scevgep183 = getelementptr float, float* %11, i64 %502 - %scevgep185 = getelementptr float, float* %11, i64 %496 - %scevgep187 = getelementptr float, float* %11, i64 %497 - %bound0189 = icmp ult float* %scevgep177, %scevgep183 - %bound1190 = icmp ult float* %scevgep181, %scevgep179 - %found.conflict191 = and i1 %bound0189, %bound1190 - %bound0192 = icmp ult float* %scevgep177, %scevgep187 - %bound1193 = icmp ult float* %scevgep185, %scevgep179 - %found.conflict194 = and i1 %bound0192, %bound1193 - %conflict.rdx195 = or i1 %found.conflict191, %found.conflict194 - br i1 %conflict.rdx195, label %pregion_for_entry.entry.i.i.us.4.preheader, label %vector.ph198 - -vector.ph198: ; preds = %vector.memcheck197 - %broadcast.splatinsert205 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat206 = shufflevector <8 x i64> %broadcast.splatinsert205, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert207 = insertelement <8 x i32> undef, i32 %19, i32 0 - %broadcast.splat208 = shufflevector <8 x i32> %broadcast.splatinsert207, <8 x i32> undef, <8 x i32> zeroinitializer - %503 = trunc <8 x i64> %broadcast.splat206 to <8 x i32> - %504 = or <8 x i32> %503, - %505 = icmp sgt <8 x i32> %broadcast.splat208, %504 - %506 = icmp sgt <8 x i32> %504, zeroinitializer - %507 = and <8 x i1> %505, %506 - %508 = extractelement <8 x i32> %504, i32 0 - %509 = add i32 %mul.i.i.4, %508 - %510 = sext i32 %509 to i64 - %511 = getelementptr inbounds float, float* %7, i64 %510 - %512 = bitcast float* %511 to <8 x float>* - %wide.masked.load209 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %512, i32 4, <8 x i1> %507, <8 x float> undef), !tbaa !12, !alias.scope !125, !noalias !128 - %513 = fpext <8 x float> %wide.masked.load209 to <8 x double> - %514 = getelementptr inbounds float, float* %11, i64 %510 - %515 = bitcast float* %514 to <8 x float>* - %wide.masked.load210 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %515, i32 4, <8 x i1> %507, <8 x float> undef), !tbaa !12, !alias.scope !131 - %516 = add i32 %509, -1 - %517 = sext i32 %516 to i64 - %518 = getelementptr inbounds float, float* %11, i64 %517 - %519 = bitcast float* %518 to <8 x float>* - %wide.masked.load211 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %519, i32 4, <8 x i1> %507, <8 x float> undef), !tbaa !12, !alias.scope !132 - %520 = fsub <8 x float> %wide.masked.load210, %wide.masked.load211 - %521 = fpext <8 x float> %520 to <8 x double> - %522 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %521, <8 x double> , <8 x double> %513) - %523 = fptrunc <8 x double> %522 to <8 x float> - %524 = bitcast float* %511 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %523, <8 x float>* %524, i32 4, <8 x i1> %507), !tbaa !12, !alias.scope !125, !noalias !128, !llvm.access.group !24 - %525 = trunc <8 x i64> %broadcast.splat206 to <8 x i32> - %526 = or <8 x i32> %525, - %527 = icmp sgt <8 x i32> %broadcast.splat208, %526 - %528 = icmp sgt <8 x i32> %526, zeroinitializer - %529 = and <8 x i1> %527, %528 - %530 = extractelement <8 x i32> %526, i32 0 - %531 = add i32 %mul.i.i.4, %530 - %532 = sext i32 %531 to i64 - %533 = getelementptr inbounds float, float* %7, i64 %532 - %534 = bitcast float* %533 to <8 x float>* - %wide.masked.load209.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %534, i32 4, <8 x i1> %529, <8 x float> undef), !tbaa !12, !alias.scope !125, !noalias !128 - %535 = fpext <8 x float> %wide.masked.load209.1 to <8 x double> - %536 = getelementptr inbounds float, float* %11, i64 %532 - %537 = bitcast float* %536 to <8 x float>* - %wide.masked.load210.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %537, i32 4, <8 x i1> %529, <8 x float> undef), !tbaa !12, !alias.scope !131 - %538 = add i32 %531, -1 - %539 = sext i32 %538 to i64 - %540 = getelementptr inbounds float, float* %11, i64 %539 - %541 = bitcast float* %540 to <8 x float>* - %wide.masked.load211.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %541, i32 4, <8 x i1> %529, <8 x float> undef), !tbaa !12, !alias.scope !132 - %542 = fsub <8 x float> %wide.masked.load210.1, %wide.masked.load211.1 - %543 = fpext <8 x float> %542 to <8 x double> - %544 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %543, <8 x double> , <8 x double> %535) - %545 = fptrunc <8 x double> %544 to <8 x float> - %546 = bitcast float* %533 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %545, <8 x float>* %546, i32 4, <8 x i1> %529), !tbaa !12, !alias.scope !125, !noalias !128, !llvm.access.group !24 - %547 = trunc <8 x i64> %broadcast.splat206 to <8 x i32> - %548 = or <8 x i32> %547, - %549 = icmp sgt <8 x i32> %broadcast.splat208, %548 - %550 = icmp sgt <8 x i32> %548, zeroinitializer - %551 = and <8 x i1> %549, %550 - %552 = extractelement <8 x i32> %548, i32 0 - %553 = add i32 %mul.i.i.4, %552 - %554 = sext i32 %553 to i64 - %555 = getelementptr inbounds float, float* %7, i64 %554 - %556 = bitcast float* %555 to <8 x float>* - %wide.masked.load209.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %556, i32 4, <8 x i1> %551, <8 x float> undef), !tbaa !12, !alias.scope !125, !noalias !128 - %557 = fpext <8 x float> %wide.masked.load209.2 to <8 x double> - %558 = getelementptr inbounds float, float* %11, i64 %554 - %559 = bitcast float* %558 to <8 x float>* - %wide.masked.load210.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %559, i32 4, <8 x i1> %551, <8 x float> undef), !tbaa !12, !alias.scope !131 - %560 = add i32 %553, -1 - %561 = sext i32 %560 to i64 - %562 = getelementptr inbounds float, float* %11, i64 %561 - %563 = bitcast float* %562 to <8 x float>* - %wide.masked.load211.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %563, i32 4, <8 x i1> %551, <8 x float> undef), !tbaa !12, !alias.scope !132 - %564 = fsub <8 x float> %wide.masked.load210.2, %wide.masked.load211.2 - %565 = fpext <8 x float> %564 to <8 x double> - %566 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %565, <8 x double> , <8 x double> %557) - %567 = fptrunc <8 x double> %566 to <8 x float> - %568 = bitcast float* %555 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %567, <8 x float>* %568, i32 4, <8 x i1> %551), !tbaa !12, !alias.scope !125, !noalias !128, !llvm.access.group !24 - %569 = trunc <8 x i64> %broadcast.splat206 to <8 x i32> - %570 = or <8 x i32> %569, - %571 = icmp sgt <8 x i32> %broadcast.splat208, %570 - %572 = icmp sgt <8 x i32> %570, zeroinitializer - %573 = and <8 x i1> %571, %572 - %574 = extractelement <8 x i32> %570, i32 0 - %575 = add i32 %mul.i.i.4, %574 - %576 = sext i32 %575 to i64 - %577 = getelementptr inbounds float, float* %7, i64 %576 - %578 = bitcast float* %577 to <8 x float>* - %wide.masked.load209.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %578, i32 4, <8 x i1> %573, <8 x float> undef), !tbaa !12, !alias.scope !125, !noalias !128 - %579 = fpext <8 x float> %wide.masked.load209.3 to <8 x double> - %580 = getelementptr inbounds float, float* %11, i64 %576 - %581 = bitcast float* %580 to <8 x float>* - %wide.masked.load210.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %581, i32 4, <8 x i1> %573, <8 x float> undef), !tbaa !12, !alias.scope !131 - %582 = add i32 %575, -1 - %583 = sext i32 %582 to i64 - %584 = getelementptr inbounds float, float* %11, i64 %583 - %585 = bitcast float* %584 to <8 x float>* - %wide.masked.load211.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %585, i32 4, <8 x i1> %573, <8 x float> undef), !tbaa !12, !alias.scope !132 - %586 = fsub <8 x float> %wide.masked.load210.3, %wide.masked.load211.3 - %587 = fpext <8 x float> %586 to <8 x double> - %588 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %587, <8 x double> , <8 x double> %579) - %589 = fptrunc <8 x double> %588 to <8 x float> - %590 = bitcast float* %577 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %589, <8 x float>* %590, i32 4, <8 x i1> %573), !tbaa !12, !alias.scope !125, !noalias !128, !llvm.access.group !24 - br label %pregion_for_end.i.i.4 - -pregion_for_entry.entry.i.i.us.4: ; preds = %if.end.i.i.us.4.1, %pregion_for_entry.entry.i.i.us.4.preheader - %_local_id_x.i.0.us.4 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.4.preheader ], [ %958, %if.end.i.i.us.4.1 ] - %add1.i.i.i.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.4, %mul.i.i.i - %conv.i.i.us.4 = trunc i64 %add1.i.i.i.us.4 to i32 - %cmp4.i.i.us.4 = icmp sgt i32 %19, %conv.i.i.us.4 - %cmp7.i.i.us.4 = icmp sgt i32 %conv.i.i.us.4, 0 - %or.cond.i.i.us.4 = and i1 %cmp4.i.i.us.4, %cmp7.i.i.us.4 - br i1 %or.cond.i.i.us.4, label %if.then.i.i.us.4, label %if.end.i.i.us.4 - -if.then.i.i.us.4: ; preds = %pregion_for_entry.entry.i.i.us.4 - %add.i.i.us.4 = add i32 %mul.i.i.4, %conv.i.i.us.4 - %idxprom.i.i.us.4 = sext i32 %add.i.i.us.4 to i64 - %arrayidx.i.i.us.4 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.4 - %591 = load float, float* %arrayidx.i.i.us.4, align 4, !tbaa !12 - %conv9.i.i.us.4 = fpext float %591 to double - %arrayidx13.i.i.us.4 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.4 - %592 = load float, float* %arrayidx13.i.i.us.4, align 4, !tbaa !12 - %add15.i.i.us.4 = add i32 %add.i.i.us.4, -1 - %idxprom16.i.i.us.4 = sext i32 %add15.i.i.us.4 to i64 - %arrayidx17.i.i.us.4 = getelementptr inbounds float, float* %11, i64 %idxprom16.i.i.us.4 - %593 = load float, float* %arrayidx17.i.i.us.4, align 4, !tbaa !12 - %sub18.i.i.us.4 = fsub float %592, %593 - %conv19.i.i.us.4 = fpext float %sub18.i.i.us.4 to double - %594 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.4, double -5.000000e-01, double %conv9.i.i.us.4) #5 - %conv21.i.i.us.4 = fptrunc double %594 to float - store float %conv21.i.i.us.4, float* %arrayidx.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.4 - -if.end.i.i.us.4: ; preds = %if.then.i.i.us.4, %pregion_for_entry.entry.i.i.us.4 - %595 = or i64 %_local_id_x.i.0.us.4, 1 - %add1.i.i.i.us.4.1 = add nuw nsw i64 %595, %mul.i.i.i - %conv.i.i.us.4.1 = trunc i64 %add1.i.i.i.us.4.1 to i32 - %cmp4.i.i.us.4.1 = icmp sgt i32 %19, %conv.i.i.us.4.1 - %cmp7.i.i.us.4.1 = icmp sgt i32 %conv.i.i.us.4.1, 0 - %or.cond.i.i.us.4.1 = and i1 %cmp4.i.i.us.4.1, %cmp7.i.i.us.4.1 - br i1 %or.cond.i.i.us.4.1, label %if.then.i.i.us.4.1, label %if.end.i.i.us.4.1 - -pregion_for_end.i.i.4.loopexit: ; preds = %if.end.i.i.us.4.1 - br label %pregion_for_end.i.i.4 - -pregion_for_end.i.i.4: ; preds = %pregion_for_end.i.i.4.loopexit, %vector.ph198, %pregion_for_end.i.i.3 - %596 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.5 = or i32 %596, 5 - %cmp.i.i.5 = icmp sgt i32 %15, %conv2.i.i.5 - %mul.i.i.5 = mul nsw i32 %19, %conv2.i.i.5 - br i1 %cmp.i.i.5, label %vector.scevcheck222, label %pregion_for_end.i.i.5 - -vector.scevcheck222: ; preds = %pregion_for_end.i.i.4 - %597 = mul i32 %19, %conv2.i.i.5 - %598 = trunc i64 %2 to i32 - %599 = shl i32 %598, 5 - %600 = add i32 %597, %599 - %601 = icmp sgt i32 %600, 2147483616 - %602 = add i32 %597, %599 - %603 = add i32 %602, -1 - %604 = add i32 %602, 30 - %605 = icmp slt i32 %604, %603 - %606 = or i1 %601, %605 - br i1 %606, label %pregion_for_entry.entry.i.i.us.5.preheader, label %vector.memcheck244 - -pregion_for_entry.entry.i.i.us.5.preheader: ; preds = %vector.memcheck244, %vector.scevcheck222 - br label %pregion_for_entry.entry.i.i.us.5 - -vector.memcheck244: ; preds = %vector.scevcheck222 - %607 = mul i32 %19, %conv2.i.i.5 - %608 = trunc i64 %2 to i32 - %609 = shl i32 %608, 5 - %610 = add i32 %607, %609 - %611 = sext i32 %610 to i64 - %scevgep224 = getelementptr float, float* %7, i64 %611 - %612 = add nsw i64 %611, 32 - %scevgep226 = getelementptr float, float* %7, i64 %612 - %613 = add i32 %607, %609 - %614 = add i32 %613, -1 - %615 = sext i32 %614 to i64 - %scevgep228 = getelementptr float, float* %11, i64 %615 - %616 = add nsw i64 %615, 32 - %scevgep230 = getelementptr float, float* %11, i64 %616 - %scevgep232 = getelementptr float, float* %11, i64 %611 - %scevgep234 = getelementptr float, float* %11, i64 %612 - %bound0236 = icmp ult float* %scevgep224, %scevgep230 - %bound1237 = icmp ult float* %scevgep228, %scevgep226 - %found.conflict238 = and i1 %bound0236, %bound1237 - %bound0239 = icmp ult float* %scevgep224, %scevgep234 - %bound1240 = icmp ult float* %scevgep232, %scevgep226 - %found.conflict241 = and i1 %bound0239, %bound1240 - %conflict.rdx242 = or i1 %found.conflict238, %found.conflict241 - br i1 %conflict.rdx242, label %pregion_for_entry.entry.i.i.us.5.preheader, label %vector.ph245 - -vector.ph245: ; preds = %vector.memcheck244 - %broadcast.splatinsert252 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat253 = shufflevector <8 x i64> %broadcast.splatinsert252, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert254 = insertelement <8 x i32> undef, i32 %19, i32 0 - %broadcast.splat255 = shufflevector <8 x i32> %broadcast.splatinsert254, <8 x i32> undef, <8 x i32> zeroinitializer - %617 = trunc <8 x i64> %broadcast.splat253 to <8 x i32> - %618 = or <8 x i32> %617, - %619 = icmp sgt <8 x i32> %broadcast.splat255, %618 - %620 = icmp sgt <8 x i32> %618, zeroinitializer - %621 = and <8 x i1> %619, %620 - %622 = extractelement <8 x i32> %618, i32 0 - %623 = add i32 %mul.i.i.5, %622 - %624 = sext i32 %623 to i64 - %625 = getelementptr inbounds float, float* %7, i64 %624 - %626 = bitcast float* %625 to <8 x float>* - %wide.masked.load256 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %626, i32 4, <8 x i1> %621, <8 x float> undef), !tbaa !12, !alias.scope !133, !noalias !136 - %627 = fpext <8 x float> %wide.masked.load256 to <8 x double> - %628 = getelementptr inbounds float, float* %11, i64 %624 - %629 = bitcast float* %628 to <8 x float>* - %wide.masked.load257 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %629, i32 4, <8 x i1> %621, <8 x float> undef), !tbaa !12, !alias.scope !139 - %630 = add i32 %623, -1 - %631 = sext i32 %630 to i64 - %632 = getelementptr inbounds float, float* %11, i64 %631 - %633 = bitcast float* %632 to <8 x float>* - %wide.masked.load258 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %633, i32 4, <8 x i1> %621, <8 x float> undef), !tbaa !12, !alias.scope !140 - %634 = fsub <8 x float> %wide.masked.load257, %wide.masked.load258 - %635 = fpext <8 x float> %634 to <8 x double> - %636 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %635, <8 x double> , <8 x double> %627) - %637 = fptrunc <8 x double> %636 to <8 x float> - %638 = bitcast float* %625 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %637, <8 x float>* %638, i32 4, <8 x i1> %621), !tbaa !12, !alias.scope !133, !noalias !136, !llvm.access.group !24 - %639 = trunc <8 x i64> %broadcast.splat253 to <8 x i32> - %640 = or <8 x i32> %639, - %641 = icmp sgt <8 x i32> %broadcast.splat255, %640 - %642 = icmp sgt <8 x i32> %640, zeroinitializer - %643 = and <8 x i1> %641, %642 - %644 = extractelement <8 x i32> %640, i32 0 - %645 = add i32 %mul.i.i.5, %644 - %646 = sext i32 %645 to i64 - %647 = getelementptr inbounds float, float* %7, i64 %646 - %648 = bitcast float* %647 to <8 x float>* - %wide.masked.load256.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %648, i32 4, <8 x i1> %643, <8 x float> undef), !tbaa !12, !alias.scope !133, !noalias !136 - %649 = fpext <8 x float> %wide.masked.load256.1 to <8 x double> - %650 = getelementptr inbounds float, float* %11, i64 %646 - %651 = bitcast float* %650 to <8 x float>* - %wide.masked.load257.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %651, i32 4, <8 x i1> %643, <8 x float> undef), !tbaa !12, !alias.scope !139 - %652 = add i32 %645, -1 - %653 = sext i32 %652 to i64 - %654 = getelementptr inbounds float, float* %11, i64 %653 - %655 = bitcast float* %654 to <8 x float>* - %wide.masked.load258.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %655, i32 4, <8 x i1> %643, <8 x float> undef), !tbaa !12, !alias.scope !140 - %656 = fsub <8 x float> %wide.masked.load257.1, %wide.masked.load258.1 - %657 = fpext <8 x float> %656 to <8 x double> - %658 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %657, <8 x double> , <8 x double> %649) - %659 = fptrunc <8 x double> %658 to <8 x float> - %660 = bitcast float* %647 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %659, <8 x float>* %660, i32 4, <8 x i1> %643), !tbaa !12, !alias.scope !133, !noalias !136, !llvm.access.group !24 - %661 = trunc <8 x i64> %broadcast.splat253 to <8 x i32> - %662 = or <8 x i32> %661, - %663 = icmp sgt <8 x i32> %broadcast.splat255, %662 - %664 = icmp sgt <8 x i32> %662, zeroinitializer - %665 = and <8 x i1> %663, %664 - %666 = extractelement <8 x i32> %662, i32 0 - %667 = add i32 %mul.i.i.5, %666 - %668 = sext i32 %667 to i64 - %669 = getelementptr inbounds float, float* %7, i64 %668 - %670 = bitcast float* %669 to <8 x float>* - %wide.masked.load256.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %670, i32 4, <8 x i1> %665, <8 x float> undef), !tbaa !12, !alias.scope !133, !noalias !136 - %671 = fpext <8 x float> %wide.masked.load256.2 to <8 x double> - %672 = getelementptr inbounds float, float* %11, i64 %668 - %673 = bitcast float* %672 to <8 x float>* - %wide.masked.load257.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %673, i32 4, <8 x i1> %665, <8 x float> undef), !tbaa !12, !alias.scope !139 - %674 = add i32 %667, -1 - %675 = sext i32 %674 to i64 - %676 = getelementptr inbounds float, float* %11, i64 %675 - %677 = bitcast float* %676 to <8 x float>* - %wide.masked.load258.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %677, i32 4, <8 x i1> %665, <8 x float> undef), !tbaa !12, !alias.scope !140 - %678 = fsub <8 x float> %wide.masked.load257.2, %wide.masked.load258.2 - %679 = fpext <8 x float> %678 to <8 x double> - %680 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %679, <8 x double> , <8 x double> %671) - %681 = fptrunc <8 x double> %680 to <8 x float> - %682 = bitcast float* %669 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %681, <8 x float>* %682, i32 4, <8 x i1> %665), !tbaa !12, !alias.scope !133, !noalias !136, !llvm.access.group !24 - %683 = trunc <8 x i64> %broadcast.splat253 to <8 x i32> - %684 = or <8 x i32> %683, - %685 = icmp sgt <8 x i32> %broadcast.splat255, %684 - %686 = icmp sgt <8 x i32> %684, zeroinitializer - %687 = and <8 x i1> %685, %686 - %688 = extractelement <8 x i32> %684, i32 0 - %689 = add i32 %mul.i.i.5, %688 - %690 = sext i32 %689 to i64 - %691 = getelementptr inbounds float, float* %7, i64 %690 - %692 = bitcast float* %691 to <8 x float>* - %wide.masked.load256.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %692, i32 4, <8 x i1> %687, <8 x float> undef), !tbaa !12, !alias.scope !133, !noalias !136 - %693 = fpext <8 x float> %wide.masked.load256.3 to <8 x double> - %694 = getelementptr inbounds float, float* %11, i64 %690 - %695 = bitcast float* %694 to <8 x float>* - %wide.masked.load257.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %695, i32 4, <8 x i1> %687, <8 x float> undef), !tbaa !12, !alias.scope !139 - %696 = add i32 %689, -1 - %697 = sext i32 %696 to i64 - %698 = getelementptr inbounds float, float* %11, i64 %697 - %699 = bitcast float* %698 to <8 x float>* - %wide.masked.load258.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %699, i32 4, <8 x i1> %687, <8 x float> undef), !tbaa !12, !alias.scope !140 - %700 = fsub <8 x float> %wide.masked.load257.3, %wide.masked.load258.3 - %701 = fpext <8 x float> %700 to <8 x double> - %702 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %701, <8 x double> , <8 x double> %693) - %703 = fptrunc <8 x double> %702 to <8 x float> - %704 = bitcast float* %691 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %703, <8 x float>* %704, i32 4, <8 x i1> %687), !tbaa !12, !alias.scope !133, !noalias !136, !llvm.access.group !24 - br label %pregion_for_end.i.i.5 - -pregion_for_entry.entry.i.i.us.5: ; preds = %if.end.i.i.us.5.1, %pregion_for_entry.entry.i.i.us.5.preheader - %_local_id_x.i.0.us.5 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.5.preheader ], [ %953, %if.end.i.i.us.5.1 ] - %add1.i.i.i.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.5, %mul.i.i.i - %conv.i.i.us.5 = trunc i64 %add1.i.i.i.us.5 to i32 - %cmp4.i.i.us.5 = icmp sgt i32 %19, %conv.i.i.us.5 - %cmp7.i.i.us.5 = icmp sgt i32 %conv.i.i.us.5, 0 - %or.cond.i.i.us.5 = and i1 %cmp4.i.i.us.5, %cmp7.i.i.us.5 - br i1 %or.cond.i.i.us.5, label %if.then.i.i.us.5, label %if.end.i.i.us.5 - -if.then.i.i.us.5: ; preds = %pregion_for_entry.entry.i.i.us.5 - %add.i.i.us.5 = add i32 %mul.i.i.5, %conv.i.i.us.5 - %idxprom.i.i.us.5 = sext i32 %add.i.i.us.5 to i64 - %arrayidx.i.i.us.5 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.5 - %705 = load float, float* %arrayidx.i.i.us.5, align 4, !tbaa !12 - %conv9.i.i.us.5 = fpext float %705 to double - %arrayidx13.i.i.us.5 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.5 - %706 = load float, float* %arrayidx13.i.i.us.5, align 4, !tbaa !12 - %add15.i.i.us.5 = add i32 %add.i.i.us.5, -1 - %idxprom16.i.i.us.5 = sext i32 %add15.i.i.us.5 to i64 - %arrayidx17.i.i.us.5 = getelementptr inbounds float, float* %11, i64 %idxprom16.i.i.us.5 - %707 = load float, float* %arrayidx17.i.i.us.5, align 4, !tbaa !12 - %sub18.i.i.us.5 = fsub float %706, %707 - %conv19.i.i.us.5 = fpext float %sub18.i.i.us.5 to double - %708 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.5, double -5.000000e-01, double %conv9.i.i.us.5) #5 - %conv21.i.i.us.5 = fptrunc double %708 to float - store float %conv21.i.i.us.5, float* %arrayidx.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.5 - -if.end.i.i.us.5: ; preds = %if.then.i.i.us.5, %pregion_for_entry.entry.i.i.us.5 - %709 = or i64 %_local_id_x.i.0.us.5, 1 - %add1.i.i.i.us.5.1 = add nuw nsw i64 %709, %mul.i.i.i - %conv.i.i.us.5.1 = trunc i64 %add1.i.i.i.us.5.1 to i32 - %cmp4.i.i.us.5.1 = icmp sgt i32 %19, %conv.i.i.us.5.1 - %cmp7.i.i.us.5.1 = icmp sgt i32 %conv.i.i.us.5.1, 0 - %or.cond.i.i.us.5.1 = and i1 %cmp4.i.i.us.5.1, %cmp7.i.i.us.5.1 - br i1 %or.cond.i.i.us.5.1, label %if.then.i.i.us.5.1, label %if.end.i.i.us.5.1 - -pregion_for_end.i.i.5.loopexit: ; preds = %if.end.i.i.us.5.1 - br label %pregion_for_end.i.i.5 - -pregion_for_end.i.i.5: ; preds = %pregion_for_end.i.i.5.loopexit, %vector.ph245, %pregion_for_end.i.i.4 - %710 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.6 = or i32 %710, 6 - %cmp.i.i.6 = icmp sgt i32 %15, %conv2.i.i.6 - %mul.i.i.6 = mul nsw i32 %19, %conv2.i.i.6 - br i1 %cmp.i.i.6, label %vector.scevcheck269, label %pregion_for_end.i.i.6 - -vector.scevcheck269: ; preds = %pregion_for_end.i.i.5 - %711 = mul i32 %19, %conv2.i.i.6 - %712 = trunc i64 %2 to i32 - %713 = shl i32 %712, 5 - %714 = add i32 %711, %713 - %715 = icmp sgt i32 %714, 2147483616 - %716 = add i32 %711, %713 - %717 = add i32 %716, -1 - %718 = add i32 %716, 30 - %719 = icmp slt i32 %718, %717 - %720 = or i1 %715, %719 - br i1 %720, label %pregion_for_entry.entry.i.i.us.6.preheader, label %vector.memcheck291 - -pregion_for_entry.entry.i.i.us.6.preheader: ; preds = %vector.memcheck291, %vector.scevcheck269 - br label %pregion_for_entry.entry.i.i.us.6 - -vector.memcheck291: ; preds = %vector.scevcheck269 - %721 = mul i32 %19, %conv2.i.i.6 - %722 = trunc i64 %2 to i32 - %723 = shl i32 %722, 5 - %724 = add i32 %721, %723 - %725 = sext i32 %724 to i64 - %scevgep271 = getelementptr float, float* %7, i64 %725 - %726 = add nsw i64 %725, 32 - %scevgep273 = getelementptr float, float* %7, i64 %726 - %727 = add i32 %721, %723 - %728 = add i32 %727, -2 - %729 = sext i32 %728 to i64 - %730 = add nuw nsw i64 %729, 1 - %scevgep275 = getelementptr float, float* %11, i64 %730 - %731 = add nsw i64 %729, 33 - %scevgep277 = getelementptr float, float* %11, i64 %731 - %scevgep279 = getelementptr float, float* %11, i64 %725 - %scevgep281 = getelementptr float, float* %11, i64 %726 - %bound0283 = icmp ult float* %scevgep271, %scevgep277 - %bound1284 = icmp ult float* %scevgep275, %scevgep273 - %found.conflict285 = and i1 %bound0283, %bound1284 - %bound0286 = icmp ult float* %scevgep271, %scevgep281 - %bound1287 = icmp ult float* %scevgep279, %scevgep273 - %found.conflict288 = and i1 %bound0286, %bound1287 - %conflict.rdx289 = or i1 %found.conflict285, %found.conflict288 - br i1 %conflict.rdx289, label %pregion_for_entry.entry.i.i.us.6.preheader, label %vector.ph292 - -vector.ph292: ; preds = %vector.memcheck291 - %broadcast.splatinsert299 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat300 = shufflevector <8 x i64> %broadcast.splatinsert299, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert301 = insertelement <8 x i32> undef, i32 %19, i32 0 - %broadcast.splat302 = shufflevector <8 x i32> %broadcast.splatinsert301, <8 x i32> undef, <8 x i32> zeroinitializer - %732 = trunc <8 x i64> %broadcast.splat300 to <8 x i32> - %733 = or <8 x i32> %732, - %734 = icmp sgt <8 x i32> %broadcast.splat302, %733 - %735 = icmp sgt <8 x i32> %733, zeroinitializer - %736 = and <8 x i1> %734, %735 - %737 = extractelement <8 x i32> %733, i32 0 - %738 = add i32 %mul.i.i.6, %737 - %739 = sext i32 %738 to i64 - %740 = getelementptr inbounds float, float* %7, i64 %739 - %741 = bitcast float* %740 to <8 x float>* - %wide.masked.load303 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %741, i32 4, <8 x i1> %736, <8 x float> undef), !tbaa !12, !alias.scope !141, !noalias !144 - %742 = fpext <8 x float> %wide.masked.load303 to <8 x double> - %743 = getelementptr inbounds float, float* %11, i64 %739 - %744 = bitcast float* %743 to <8 x float>* - %wide.masked.load304 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %744, i32 4, <8 x i1> %736, <8 x float> undef), !tbaa !12, !alias.scope !147 - %745 = add i32 %738, -1 - %746 = sext i32 %745 to i64 - %747 = getelementptr inbounds float, float* %11, i64 %746 - %748 = bitcast float* %747 to <8 x float>* - %wide.masked.load305 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %748, i32 4, <8 x i1> %736, <8 x float> undef), !tbaa !12, !alias.scope !148 - %749 = fsub <8 x float> %wide.masked.load304, %wide.masked.load305 - %750 = fpext <8 x float> %749 to <8 x double> - %751 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %750, <8 x double> , <8 x double> %742) - %752 = fptrunc <8 x double> %751 to <8 x float> - %753 = bitcast float* %740 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %752, <8 x float>* %753, i32 4, <8 x i1> %736), !tbaa !12, !alias.scope !141, !noalias !144, !llvm.access.group !24 - %754 = trunc <8 x i64> %broadcast.splat300 to <8 x i32> - %755 = or <8 x i32> %754, - %756 = icmp sgt <8 x i32> %broadcast.splat302, %755 - %757 = icmp sgt <8 x i32> %755, zeroinitializer - %758 = and <8 x i1> %756, %757 - %759 = extractelement <8 x i32> %755, i32 0 - %760 = add i32 %mul.i.i.6, %759 - %761 = sext i32 %760 to i64 - %762 = getelementptr inbounds float, float* %7, i64 %761 - %763 = bitcast float* %762 to <8 x float>* - %wide.masked.load303.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %763, i32 4, <8 x i1> %758, <8 x float> undef), !tbaa !12, !alias.scope !141, !noalias !144 - %764 = fpext <8 x float> %wide.masked.load303.1 to <8 x double> - %765 = getelementptr inbounds float, float* %11, i64 %761 - %766 = bitcast float* %765 to <8 x float>* - %wide.masked.load304.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %766, i32 4, <8 x i1> %758, <8 x float> undef), !tbaa !12, !alias.scope !147 - %767 = add i32 %760, -1 - %768 = sext i32 %767 to i64 - %769 = getelementptr inbounds float, float* %11, i64 %768 - %770 = bitcast float* %769 to <8 x float>* - %wide.masked.load305.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %770, i32 4, <8 x i1> %758, <8 x float> undef), !tbaa !12, !alias.scope !148 - %771 = fsub <8 x float> %wide.masked.load304.1, %wide.masked.load305.1 - %772 = fpext <8 x float> %771 to <8 x double> - %773 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %772, <8 x double> , <8 x double> %764) - %774 = fptrunc <8 x double> %773 to <8 x float> - %775 = bitcast float* %762 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %774, <8 x float>* %775, i32 4, <8 x i1> %758), !tbaa !12, !alias.scope !141, !noalias !144, !llvm.access.group !24 - %776 = trunc <8 x i64> %broadcast.splat300 to <8 x i32> - %777 = or <8 x i32> %776, - %778 = icmp sgt <8 x i32> %broadcast.splat302, %777 - %779 = icmp sgt <8 x i32> %777, zeroinitializer - %780 = and <8 x i1> %778, %779 - %781 = extractelement <8 x i32> %777, i32 0 - %782 = add i32 %mul.i.i.6, %781 - %783 = sext i32 %782 to i64 - %784 = getelementptr inbounds float, float* %7, i64 %783 - %785 = bitcast float* %784 to <8 x float>* - %wide.masked.load303.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %785, i32 4, <8 x i1> %780, <8 x float> undef), !tbaa !12, !alias.scope !141, !noalias !144 - %786 = fpext <8 x float> %wide.masked.load303.2 to <8 x double> - %787 = getelementptr inbounds float, float* %11, i64 %783 - %788 = bitcast float* %787 to <8 x float>* - %wide.masked.load304.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %788, i32 4, <8 x i1> %780, <8 x float> undef), !tbaa !12, !alias.scope !147 - %789 = add i32 %782, -1 - %790 = sext i32 %789 to i64 - %791 = getelementptr inbounds float, float* %11, i64 %790 - %792 = bitcast float* %791 to <8 x float>* - %wide.masked.load305.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %792, i32 4, <8 x i1> %780, <8 x float> undef), !tbaa !12, !alias.scope !148 - %793 = fsub <8 x float> %wide.masked.load304.2, %wide.masked.load305.2 - %794 = fpext <8 x float> %793 to <8 x double> - %795 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %794, <8 x double> , <8 x double> %786) - %796 = fptrunc <8 x double> %795 to <8 x float> - %797 = bitcast float* %784 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %796, <8 x float>* %797, i32 4, <8 x i1> %780), !tbaa !12, !alias.scope !141, !noalias !144, !llvm.access.group !24 - %798 = trunc <8 x i64> %broadcast.splat300 to <8 x i32> - %799 = or <8 x i32> %798, - %800 = icmp sgt <8 x i32> %broadcast.splat302, %799 - %801 = icmp sgt <8 x i32> %799, zeroinitializer - %802 = and <8 x i1> %800, %801 - %803 = extractelement <8 x i32> %799, i32 0 - %804 = add i32 %mul.i.i.6, %803 - %805 = sext i32 %804 to i64 - %806 = getelementptr inbounds float, float* %7, i64 %805 - %807 = bitcast float* %806 to <8 x float>* - %wide.masked.load303.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %807, i32 4, <8 x i1> %802, <8 x float> undef), !tbaa !12, !alias.scope !141, !noalias !144 - %808 = fpext <8 x float> %wide.masked.load303.3 to <8 x double> - %809 = getelementptr inbounds float, float* %11, i64 %805 - %810 = bitcast float* %809 to <8 x float>* - %wide.masked.load304.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %810, i32 4, <8 x i1> %802, <8 x float> undef), !tbaa !12, !alias.scope !147 - %811 = add i32 %804, -1 - %812 = sext i32 %811 to i64 - %813 = getelementptr inbounds float, float* %11, i64 %812 - %814 = bitcast float* %813 to <8 x float>* - %wide.masked.load305.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %814, i32 4, <8 x i1> %802, <8 x float> undef), !tbaa !12, !alias.scope !148 - %815 = fsub <8 x float> %wide.masked.load304.3, %wide.masked.load305.3 - %816 = fpext <8 x float> %815 to <8 x double> - %817 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %816, <8 x double> , <8 x double> %808) - %818 = fptrunc <8 x double> %817 to <8 x float> - %819 = bitcast float* %806 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %818, <8 x float>* %819, i32 4, <8 x i1> %802), !tbaa !12, !alias.scope !141, !noalias !144, !llvm.access.group !24 - br label %pregion_for_end.i.i.6 - -pregion_for_entry.entry.i.i.us.6: ; preds = %if.end.i.i.us.6.1, %pregion_for_entry.entry.i.i.us.6.preheader - %_local_id_x.i.0.us.6 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.6.preheader ], [ %948, %if.end.i.i.us.6.1 ] - %add1.i.i.i.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.6, %mul.i.i.i - %conv.i.i.us.6 = trunc i64 %add1.i.i.i.us.6 to i32 - %cmp4.i.i.us.6 = icmp sgt i32 %19, %conv.i.i.us.6 - %cmp7.i.i.us.6 = icmp sgt i32 %conv.i.i.us.6, 0 - %or.cond.i.i.us.6 = and i1 %cmp4.i.i.us.6, %cmp7.i.i.us.6 - br i1 %or.cond.i.i.us.6, label %if.then.i.i.us.6, label %if.end.i.i.us.6 - -if.then.i.i.us.6: ; preds = %pregion_for_entry.entry.i.i.us.6 - %add.i.i.us.6 = add i32 %mul.i.i.6, %conv.i.i.us.6 - %idxprom.i.i.us.6 = sext i32 %add.i.i.us.6 to i64 - %arrayidx.i.i.us.6 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.6 - %820 = load float, float* %arrayidx.i.i.us.6, align 4, !tbaa !12 - %conv9.i.i.us.6 = fpext float %820 to double - %arrayidx13.i.i.us.6 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.6 - %821 = load float, float* %arrayidx13.i.i.us.6, align 4, !tbaa !12 - %add15.i.i.us.6 = add i32 %add.i.i.us.6, -1 - %idxprom16.i.i.us.6 = sext i32 %add15.i.i.us.6 to i64 - %arrayidx17.i.i.us.6 = getelementptr inbounds float, float* %11, i64 %idxprom16.i.i.us.6 - %822 = load float, float* %arrayidx17.i.i.us.6, align 4, !tbaa !12 - %sub18.i.i.us.6 = fsub float %821, %822 - %conv19.i.i.us.6 = fpext float %sub18.i.i.us.6 to double - %823 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.6, double -5.000000e-01, double %conv9.i.i.us.6) #5 - %conv21.i.i.us.6 = fptrunc double %823 to float - store float %conv21.i.i.us.6, float* %arrayidx.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.6 - -if.end.i.i.us.6: ; preds = %if.then.i.i.us.6, %pregion_for_entry.entry.i.i.us.6 - %824 = or i64 %_local_id_x.i.0.us.6, 1 - %add1.i.i.i.us.6.1 = add nuw nsw i64 %824, %mul.i.i.i - %conv.i.i.us.6.1 = trunc i64 %add1.i.i.i.us.6.1 to i32 - %cmp4.i.i.us.6.1 = icmp sgt i32 %19, %conv.i.i.us.6.1 - %cmp7.i.i.us.6.1 = icmp sgt i32 %conv.i.i.us.6.1, 0 - %or.cond.i.i.us.6.1 = and i1 %cmp4.i.i.us.6.1, %cmp7.i.i.us.6.1 - br i1 %or.cond.i.i.us.6.1, label %if.then.i.i.us.6.1, label %if.end.i.i.us.6.1 - -pregion_for_end.i.i.6.loopexit: ; preds = %if.end.i.i.us.6.1 - br label %pregion_for_end.i.i.6 - -pregion_for_end.i.i.6: ; preds = %pregion_for_end.i.i.6.loopexit, %vector.ph292, %pregion_for_end.i.i.5 - %825 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.7 = or i32 %825, 7 - %cmp.i.i.7 = icmp sgt i32 %15, %conv2.i.i.7 - %mul.i.i.7 = mul nsw i32 %19, %conv2.i.i.7 - br i1 %cmp.i.i.7, label %vector.scevcheck316, label %pregion_for_end.i.i.7 - -vector.scevcheck316: ; preds = %pregion_for_end.i.i.6 - %826 = mul i32 %19, %conv2.i.i.7 - %827 = trunc i64 %2 to i32 - %828 = shl i32 %827, 5 - %829 = add i32 %826, %828 - %830 = icmp sgt i32 %829, 2147483616 - %831 = add i32 %826, %828 - %832 = add i32 %831, -1 - %833 = add i32 %831, 30 - %834 = icmp slt i32 %833, %832 - %835 = or i1 %830, %834 - br i1 %835, label %pregion_for_entry.entry.i.i.us.7.preheader, label %vector.memcheck338 - -pregion_for_entry.entry.i.i.us.7.preheader: ; preds = %vector.memcheck338, %vector.scevcheck316 - br label %pregion_for_entry.entry.i.i.us.7 - -vector.memcheck338: ; preds = %vector.scevcheck316 - %836 = mul i32 %19, %conv2.i.i.7 - %837 = trunc i64 %2 to i32 - %838 = shl i32 %837, 5 - %839 = add i32 %836, %838 - %840 = sext i32 %839 to i64 - %scevgep318 = getelementptr float, float* %7, i64 %840 - %841 = add nsw i64 %840, 32 - %scevgep320 = getelementptr float, float* %7, i64 %841 - %842 = add i32 %836, %838 - %843 = add i32 %842, -1 - %844 = sext i32 %843 to i64 - %scevgep322 = getelementptr float, float* %11, i64 %844 - %845 = add nsw i64 %844, 32 - %scevgep324 = getelementptr float, float* %11, i64 %845 - %scevgep326 = getelementptr float, float* %11, i64 %840 - %scevgep328 = getelementptr float, float* %11, i64 %841 - %bound0330 = icmp ult float* %scevgep318, %scevgep324 - %bound1331 = icmp ult float* %scevgep322, %scevgep320 - %found.conflict332 = and i1 %bound0330, %bound1331 - %bound0333 = icmp ult float* %scevgep318, %scevgep328 - %bound1334 = icmp ult float* %scevgep326, %scevgep320 - %found.conflict335 = and i1 %bound0333, %bound1334 - %conflict.rdx336 = or i1 %found.conflict332, %found.conflict335 - br i1 %conflict.rdx336, label %pregion_for_entry.entry.i.i.us.7.preheader, label %vector.ph339 - -vector.ph339: ; preds = %vector.memcheck338 - %broadcast.splatinsert346 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat347 = shufflevector <8 x i64> %broadcast.splatinsert346, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert348 = insertelement <8 x i32> undef, i32 %19, i32 0 - %broadcast.splat349 = shufflevector <8 x i32> %broadcast.splatinsert348, <8 x i32> undef, <8 x i32> zeroinitializer - %846 = trunc <8 x i64> %broadcast.splat347 to <8 x i32> - %847 = or <8 x i32> %846, - %848 = icmp sgt <8 x i32> %broadcast.splat349, %847 - %849 = icmp sgt <8 x i32> %847, zeroinitializer - %850 = and <8 x i1> %848, %849 - %851 = extractelement <8 x i32> %847, i32 0 - %852 = add i32 %mul.i.i.7, %851 - %853 = sext i32 %852 to i64 - %854 = getelementptr inbounds float, float* %7, i64 %853 - %855 = bitcast float* %854 to <8 x float>* - %wide.masked.load350 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %855, i32 4, <8 x i1> %850, <8 x float> undef), !tbaa !12, !alias.scope !149, !noalias !152 - %856 = fpext <8 x float> %wide.masked.load350 to <8 x double> - %857 = getelementptr inbounds float, float* %11, i64 %853 - %858 = bitcast float* %857 to <8 x float>* - %wide.masked.load351 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %858, i32 4, <8 x i1> %850, <8 x float> undef), !tbaa !12, !alias.scope !155 - %859 = add i32 %852, -1 - %860 = sext i32 %859 to i64 - %861 = getelementptr inbounds float, float* %11, i64 %860 - %862 = bitcast float* %861 to <8 x float>* - %wide.masked.load352 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %862, i32 4, <8 x i1> %850, <8 x float> undef), !tbaa !12, !alias.scope !156 - %863 = fsub <8 x float> %wide.masked.load351, %wide.masked.load352 - %864 = fpext <8 x float> %863 to <8 x double> - %865 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %864, <8 x double> , <8 x double> %856) - %866 = fptrunc <8 x double> %865 to <8 x float> - %867 = bitcast float* %854 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %866, <8 x float>* %867, i32 4, <8 x i1> %850), !tbaa !12, !alias.scope !149, !noalias !152, !llvm.access.group !24 - %868 = trunc <8 x i64> %broadcast.splat347 to <8 x i32> - %869 = or <8 x i32> %868, - %870 = icmp sgt <8 x i32> %broadcast.splat349, %869 - %871 = icmp sgt <8 x i32> %869, zeroinitializer - %872 = and <8 x i1> %870, %871 - %873 = extractelement <8 x i32> %869, i32 0 - %874 = add i32 %mul.i.i.7, %873 - %875 = sext i32 %874 to i64 - %876 = getelementptr inbounds float, float* %7, i64 %875 - %877 = bitcast float* %876 to <8 x float>* - %wide.masked.load350.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %877, i32 4, <8 x i1> %872, <8 x float> undef), !tbaa !12, !alias.scope !149, !noalias !152 - %878 = fpext <8 x float> %wide.masked.load350.1 to <8 x double> - %879 = getelementptr inbounds float, float* %11, i64 %875 - %880 = bitcast float* %879 to <8 x float>* - %wide.masked.load351.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %880, i32 4, <8 x i1> %872, <8 x float> undef), !tbaa !12, !alias.scope !155 - %881 = add i32 %874, -1 - %882 = sext i32 %881 to i64 - %883 = getelementptr inbounds float, float* %11, i64 %882 - %884 = bitcast float* %883 to <8 x float>* - %wide.masked.load352.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %884, i32 4, <8 x i1> %872, <8 x float> undef), !tbaa !12, !alias.scope !156 - %885 = fsub <8 x float> %wide.masked.load351.1, %wide.masked.load352.1 - %886 = fpext <8 x float> %885 to <8 x double> - %887 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %886, <8 x double> , <8 x double> %878) - %888 = fptrunc <8 x double> %887 to <8 x float> - %889 = bitcast float* %876 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %888, <8 x float>* %889, i32 4, <8 x i1> %872), !tbaa !12, !alias.scope !149, !noalias !152, !llvm.access.group !24 - %890 = trunc <8 x i64> %broadcast.splat347 to <8 x i32> - %891 = or <8 x i32> %890, - %892 = icmp sgt <8 x i32> %broadcast.splat349, %891 - %893 = icmp sgt <8 x i32> %891, zeroinitializer - %894 = and <8 x i1> %892, %893 - %895 = extractelement <8 x i32> %891, i32 0 - %896 = add i32 %mul.i.i.7, %895 - %897 = sext i32 %896 to i64 - %898 = getelementptr inbounds float, float* %7, i64 %897 - %899 = bitcast float* %898 to <8 x float>* - %wide.masked.load350.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %899, i32 4, <8 x i1> %894, <8 x float> undef), !tbaa !12, !alias.scope !149, !noalias !152 - %900 = fpext <8 x float> %wide.masked.load350.2 to <8 x double> - %901 = getelementptr inbounds float, float* %11, i64 %897 - %902 = bitcast float* %901 to <8 x float>* - %wide.masked.load351.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %902, i32 4, <8 x i1> %894, <8 x float> undef), !tbaa !12, !alias.scope !155 - %903 = add i32 %896, -1 - %904 = sext i32 %903 to i64 - %905 = getelementptr inbounds float, float* %11, i64 %904 - %906 = bitcast float* %905 to <8 x float>* - %wide.masked.load352.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %906, i32 4, <8 x i1> %894, <8 x float> undef), !tbaa !12, !alias.scope !156 - %907 = fsub <8 x float> %wide.masked.load351.2, %wide.masked.load352.2 - %908 = fpext <8 x float> %907 to <8 x double> - %909 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %908, <8 x double> , <8 x double> %900) - %910 = fptrunc <8 x double> %909 to <8 x float> - %911 = bitcast float* %898 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %910, <8 x float>* %911, i32 4, <8 x i1> %894), !tbaa !12, !alias.scope !149, !noalias !152, !llvm.access.group !24 - %912 = trunc <8 x i64> %broadcast.splat347 to <8 x i32> - %913 = or <8 x i32> %912, - %914 = icmp sgt <8 x i32> %broadcast.splat349, %913 - %915 = icmp sgt <8 x i32> %913, zeroinitializer - %916 = and <8 x i1> %914, %915 - %917 = extractelement <8 x i32> %913, i32 0 - %918 = add i32 %mul.i.i.7, %917 - %919 = sext i32 %918 to i64 - %920 = getelementptr inbounds float, float* %7, i64 %919 - %921 = bitcast float* %920 to <8 x float>* - %wide.masked.load350.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %921, i32 4, <8 x i1> %916, <8 x float> undef), !tbaa !12, !alias.scope !149, !noalias !152 - %922 = fpext <8 x float> %wide.masked.load350.3 to <8 x double> - %923 = getelementptr inbounds float, float* %11, i64 %919 - %924 = bitcast float* %923 to <8 x float>* - %wide.masked.load351.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %924, i32 4, <8 x i1> %916, <8 x float> undef), !tbaa !12, !alias.scope !155 - %925 = add i32 %918, -1 - %926 = sext i32 %925 to i64 - %927 = getelementptr inbounds float, float* %11, i64 %926 - %928 = bitcast float* %927 to <8 x float>* - %wide.masked.load352.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %928, i32 4, <8 x i1> %916, <8 x float> undef), !tbaa !12, !alias.scope !156 - %929 = fsub <8 x float> %wide.masked.load351.3, %wide.masked.load352.3 - %930 = fpext <8 x float> %929 to <8 x double> - %931 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %930, <8 x double> , <8 x double> %922) - %932 = fptrunc <8 x double> %931 to <8 x float> - %933 = bitcast float* %920 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %932, <8 x float>* %933, i32 4, <8 x i1> %916), !tbaa !12, !alias.scope !149, !noalias !152, !llvm.access.group !24 - br label %pregion_for_end.i.i.7 - -pregion_for_entry.entry.i.i.us.7: ; preds = %if.end.i.i.us.7.1, %pregion_for_entry.entry.i.i.us.7.preheader - %_local_id_x.i.0.us.7 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.7.preheader ], [ %943, %if.end.i.i.us.7.1 ] - %add1.i.i.i.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.7, %mul.i.i.i - %conv.i.i.us.7 = trunc i64 %add1.i.i.i.us.7 to i32 - %cmp4.i.i.us.7 = icmp sgt i32 %19, %conv.i.i.us.7 - %cmp7.i.i.us.7 = icmp sgt i32 %conv.i.i.us.7, 0 - %or.cond.i.i.us.7 = and i1 %cmp4.i.i.us.7, %cmp7.i.i.us.7 - br i1 %or.cond.i.i.us.7, label %if.then.i.i.us.7, label %if.end.i.i.us.7 - -if.then.i.i.us.7: ; preds = %pregion_for_entry.entry.i.i.us.7 - %add.i.i.us.7 = add i32 %mul.i.i.7, %conv.i.i.us.7 - %idxprom.i.i.us.7 = sext i32 %add.i.i.us.7 to i64 - %arrayidx.i.i.us.7 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.7 - %934 = load float, float* %arrayidx.i.i.us.7, align 4, !tbaa !12 - %conv9.i.i.us.7 = fpext float %934 to double - %arrayidx13.i.i.us.7 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.7 - %935 = load float, float* %arrayidx13.i.i.us.7, align 4, !tbaa !12 - %add15.i.i.us.7 = add i32 %add.i.i.us.7, -1 - %idxprom16.i.i.us.7 = sext i32 %add15.i.i.us.7 to i64 - %arrayidx17.i.i.us.7 = getelementptr inbounds float, float* %11, i64 %idxprom16.i.i.us.7 - %936 = load float, float* %arrayidx17.i.i.us.7, align 4, !tbaa !12 - %sub18.i.i.us.7 = fsub float %935, %936 - %conv19.i.i.us.7 = fpext float %sub18.i.i.us.7 to double - %937 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.7, double -5.000000e-01, double %conv9.i.i.us.7) #5 - %conv21.i.i.us.7 = fptrunc double %937 to float - store float %conv21.i.i.us.7, float* %arrayidx.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.7 - -if.end.i.i.us.7: ; preds = %if.then.i.i.us.7, %pregion_for_entry.entry.i.i.us.7 - %938 = or i64 %_local_id_x.i.0.us.7, 1 - %add1.i.i.i.us.7.1 = add nuw nsw i64 %938, %mul.i.i.i - %conv.i.i.us.7.1 = trunc i64 %add1.i.i.i.us.7.1 to i32 - %cmp4.i.i.us.7.1 = icmp sgt i32 %19, %conv.i.i.us.7.1 - %cmp7.i.i.us.7.1 = icmp sgt i32 %conv.i.i.us.7.1, 0 - %or.cond.i.i.us.7.1 = and i1 %cmp4.i.i.us.7.1, %cmp7.i.i.us.7.1 - br i1 %or.cond.i.i.us.7.1, label %if.then.i.i.us.7.1, label %if.end.i.i.us.7.1 - -pregion_for_end.i.i.7.loopexit: ; preds = %if.end.i.i.us.7.1 - br label %pregion_for_end.i.i.7 - -pregion_for_end.i.i.7: ; preds = %pregion_for_end.i.i.7.loopexit, %vector.ph339, %pregion_for_end.i.i.6 - ret void - -if.then.i.i.us.7.1: ; preds = %if.end.i.i.us.7 - %add.i.i.us.7.1 = add i32 %mul.i.i.7, %conv.i.i.us.7.1 - %idxprom.i.i.us.7.1 = sext i32 %add.i.i.us.7.1 to i64 - %arrayidx.i.i.us.7.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.7.1 - %939 = load float, float* %arrayidx.i.i.us.7.1, align 4, !tbaa !12 - %conv9.i.i.us.7.1 = fpext float %939 to double - %arrayidx13.i.i.us.7.1 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.7.1 - %940 = load float, float* %arrayidx13.i.i.us.7.1, align 4, !tbaa !12 - %add15.i.i.us.7.1 = add i32 %add.i.i.us.7.1, -1 - %idxprom16.i.i.us.7.1 = sext i32 %add15.i.i.us.7.1 to i64 - %arrayidx17.i.i.us.7.1 = getelementptr inbounds float, float* %11, i64 %idxprom16.i.i.us.7.1 - %941 = load float, float* %arrayidx17.i.i.us.7.1, align 4, !tbaa !12 - %sub18.i.i.us.7.1 = fsub float %940, %941 - %conv19.i.i.us.7.1 = fpext float %sub18.i.i.us.7.1 to double - %942 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.7.1, double -5.000000e-01, double %conv9.i.i.us.7.1) #5 - %conv21.i.i.us.7.1 = fptrunc double %942 to float - store float %conv21.i.i.us.7.1, float* %arrayidx.i.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.7.1 - -if.end.i.i.us.7.1: ; preds = %if.then.i.i.us.7.1, %if.end.i.i.us.7 - %943 = add nuw nsw i64 %_local_id_x.i.0.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %943, 32 - br i1 %exitcond.7.not.1, label %pregion_for_end.i.i.7.loopexit, label %pregion_for_entry.entry.i.i.us.7, !llvm.loop !157 - -if.then.i.i.us.6.1: ; preds = %if.end.i.i.us.6 - %add.i.i.us.6.1 = add i32 %mul.i.i.6, %conv.i.i.us.6.1 - %idxprom.i.i.us.6.1 = sext i32 %add.i.i.us.6.1 to i64 - %arrayidx.i.i.us.6.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.6.1 - %944 = load float, float* %arrayidx.i.i.us.6.1, align 4, !tbaa !12 - %conv9.i.i.us.6.1 = fpext float %944 to double - %arrayidx13.i.i.us.6.1 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.6.1 - %945 = load float, float* %arrayidx13.i.i.us.6.1, align 4, !tbaa !12 - %add15.i.i.us.6.1 = add nsw i32 %add.i.i.us.6.1, -1 - %idxprom16.i.i.us.6.1 = sext i32 %add15.i.i.us.6.1 to i64 - %arrayidx17.i.i.us.6.1 = getelementptr inbounds float, float* %11, i64 %idxprom16.i.i.us.6.1 - %946 = load float, float* %arrayidx17.i.i.us.6.1, align 4, !tbaa !12 - %sub18.i.i.us.6.1 = fsub float %945, %946 - %conv19.i.i.us.6.1 = fpext float %sub18.i.i.us.6.1 to double - %947 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.6.1, double -5.000000e-01, double %conv9.i.i.us.6.1) #5 - %conv21.i.i.us.6.1 = fptrunc double %947 to float - store float %conv21.i.i.us.6.1, float* %arrayidx.i.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.6.1 - -if.end.i.i.us.6.1: ; preds = %if.then.i.i.us.6.1, %if.end.i.i.us.6 - %948 = add nuw nsw i64 %_local_id_x.i.0.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %948, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.i.6.loopexit, label %pregion_for_entry.entry.i.i.us.6, !llvm.loop !158 - -if.then.i.i.us.5.1: ; preds = %if.end.i.i.us.5 - %add.i.i.us.5.1 = add i32 %mul.i.i.5, %conv.i.i.us.5.1 - %idxprom.i.i.us.5.1 = sext i32 %add.i.i.us.5.1 to i64 - %arrayidx.i.i.us.5.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.5.1 - %949 = load float, float* %arrayidx.i.i.us.5.1, align 4, !tbaa !12 - %conv9.i.i.us.5.1 = fpext float %949 to double - %arrayidx13.i.i.us.5.1 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.5.1 - %950 = load float, float* %arrayidx13.i.i.us.5.1, align 4, !tbaa !12 - %add15.i.i.us.5.1 = add i32 %add.i.i.us.5.1, -1 - %idxprom16.i.i.us.5.1 = sext i32 %add15.i.i.us.5.1 to i64 - %arrayidx17.i.i.us.5.1 = getelementptr inbounds float, float* %11, i64 %idxprom16.i.i.us.5.1 - %951 = load float, float* %arrayidx17.i.i.us.5.1, align 4, !tbaa !12 - %sub18.i.i.us.5.1 = fsub float %950, %951 - %conv19.i.i.us.5.1 = fpext float %sub18.i.i.us.5.1 to double - %952 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.5.1, double -5.000000e-01, double %conv9.i.i.us.5.1) #5 - %conv21.i.i.us.5.1 = fptrunc double %952 to float - store float %conv21.i.i.us.5.1, float* %arrayidx.i.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.5.1 - -if.end.i.i.us.5.1: ; preds = %if.then.i.i.us.5.1, %if.end.i.i.us.5 - %953 = add nuw nsw i64 %_local_id_x.i.0.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %953, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.i.5.loopexit, label %pregion_for_entry.entry.i.i.us.5, !llvm.loop !159 - -if.then.i.i.us.4.1: ; preds = %if.end.i.i.us.4 - %add.i.i.us.4.1 = add i32 %mul.i.i.4, %conv.i.i.us.4.1 - %idxprom.i.i.us.4.1 = sext i32 %add.i.i.us.4.1 to i64 - %arrayidx.i.i.us.4.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.4.1 - %954 = load float, float* %arrayidx.i.i.us.4.1, align 4, !tbaa !12 - %conv9.i.i.us.4.1 = fpext float %954 to double - %arrayidx13.i.i.us.4.1 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.4.1 - %955 = load float, float* %arrayidx13.i.i.us.4.1, align 4, !tbaa !12 - %add15.i.i.us.4.1 = add nsw i32 %add.i.i.us.4.1, -1 - %idxprom16.i.i.us.4.1 = sext i32 %add15.i.i.us.4.1 to i64 - %arrayidx17.i.i.us.4.1 = getelementptr inbounds float, float* %11, i64 %idxprom16.i.i.us.4.1 - %956 = load float, float* %arrayidx17.i.i.us.4.1, align 4, !tbaa !12 - %sub18.i.i.us.4.1 = fsub float %955, %956 - %conv19.i.i.us.4.1 = fpext float %sub18.i.i.us.4.1 to double - %957 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.4.1, double -5.000000e-01, double %conv9.i.i.us.4.1) #5 - %conv21.i.i.us.4.1 = fptrunc double %957 to float - store float %conv21.i.i.us.4.1, float* %arrayidx.i.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.4.1 - -if.end.i.i.us.4.1: ; preds = %if.then.i.i.us.4.1, %if.end.i.i.us.4 - %958 = add nuw nsw i64 %_local_id_x.i.0.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %958, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.i.4.loopexit, label %pregion_for_entry.entry.i.i.us.4, !llvm.loop !160 - -if.then.i.i.us.3.1: ; preds = %if.end.i.i.us.3 - %add.i.i.us.3.1 = add i32 %mul.i.i.3, %conv.i.i.us.3.1 - %idxprom.i.i.us.3.1 = sext i32 %add.i.i.us.3.1 to i64 - %arrayidx.i.i.us.3.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.3.1 - %959 = load float, float* %arrayidx.i.i.us.3.1, align 4, !tbaa !12 - %conv9.i.i.us.3.1 = fpext float %959 to double - %arrayidx13.i.i.us.3.1 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.3.1 - %960 = load float, float* %arrayidx13.i.i.us.3.1, align 4, !tbaa !12 - %add15.i.i.us.3.1 = add i32 %add.i.i.us.3.1, -1 - %idxprom16.i.i.us.3.1 = sext i32 %add15.i.i.us.3.1 to i64 - %arrayidx17.i.i.us.3.1 = getelementptr inbounds float, float* %11, i64 %idxprom16.i.i.us.3.1 - %961 = load float, float* %arrayidx17.i.i.us.3.1, align 4, !tbaa !12 - %sub18.i.i.us.3.1 = fsub float %960, %961 - %conv19.i.i.us.3.1 = fpext float %sub18.i.i.us.3.1 to double - %962 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.3.1, double -5.000000e-01, double %conv9.i.i.us.3.1) #5 - %conv21.i.i.us.3.1 = fptrunc double %962 to float - store float %conv21.i.i.us.3.1, float* %arrayidx.i.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.3.1 - -if.end.i.i.us.3.1: ; preds = %if.then.i.i.us.3.1, %if.end.i.i.us.3 - %963 = add nuw nsw i64 %_local_id_x.i.0.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %963, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.i.3.loopexit, label %pregion_for_entry.entry.i.i.us.3, !llvm.loop !161 - -if.then.i.i.us.2.1: ; preds = %if.end.i.i.us.2 - %add.i.i.us.2.1 = add i32 %mul.i.i.2, %conv.i.i.us.2.1 - %idxprom.i.i.us.2.1 = sext i32 %add.i.i.us.2.1 to i64 - %arrayidx.i.i.us.2.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.2.1 - %964 = load float, float* %arrayidx.i.i.us.2.1, align 4, !tbaa !12 - %conv9.i.i.us.2.1 = fpext float %964 to double - %arrayidx13.i.i.us.2.1 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.2.1 - %965 = load float, float* %arrayidx13.i.i.us.2.1, align 4, !tbaa !12 - %add15.i.i.us.2.1 = add nsw i32 %add.i.i.us.2.1, -1 - %idxprom16.i.i.us.2.1 = sext i32 %add15.i.i.us.2.1 to i64 - %arrayidx17.i.i.us.2.1 = getelementptr inbounds float, float* %11, i64 %idxprom16.i.i.us.2.1 - %966 = load float, float* %arrayidx17.i.i.us.2.1, align 4, !tbaa !12 - %sub18.i.i.us.2.1 = fsub float %965, %966 - %conv19.i.i.us.2.1 = fpext float %sub18.i.i.us.2.1 to double - %967 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.2.1, double -5.000000e-01, double %conv9.i.i.us.2.1) #5 - %conv21.i.i.us.2.1 = fptrunc double %967 to float - store float %conv21.i.i.us.2.1, float* %arrayidx.i.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.2.1 - -if.end.i.i.us.2.1: ; preds = %if.then.i.i.us.2.1, %if.end.i.i.us.2 - %968 = add nuw nsw i64 %_local_id_x.i.0.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %968, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.i.2.loopexit, label %pregion_for_entry.entry.i.i.us.2, !llvm.loop !162 - -if.then.i.i.us.1.1: ; preds = %if.end.i.i.us.1 - %add.i.i.us.1.1 = add i32 %mul.i.i.1, %conv.i.i.us.1.1 - %idxprom.i.i.us.1.1 = sext i32 %add.i.i.us.1.1 to i64 - %arrayidx.i.i.us.1.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.1.1 - %969 = load float, float* %arrayidx.i.i.us.1.1, align 4, !tbaa !12 - %conv9.i.i.us.1.1 = fpext float %969 to double - %arrayidx13.i.i.us.1.1 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.1.1 - %970 = load float, float* %arrayidx13.i.i.us.1.1, align 4, !tbaa !12 - %add15.i.i.us.1.1 = add i32 %add.i.i.us.1.1, -1 - %idxprom16.i.i.us.1.1 = sext i32 %add15.i.i.us.1.1 to i64 - %arrayidx17.i.i.us.1.1 = getelementptr inbounds float, float* %11, i64 %idxprom16.i.i.us.1.1 - %971 = load float, float* %arrayidx17.i.i.us.1.1, align 4, !tbaa !12 - %sub18.i.i.us.1.1 = fsub float %970, %971 - %conv19.i.i.us.1.1 = fpext float %sub18.i.i.us.1.1 to double - %972 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.1.1, double -5.000000e-01, double %conv9.i.i.us.1.1) #5 - %conv21.i.i.us.1.1 = fptrunc double %972 to float - store float %conv21.i.i.us.1.1, float* %arrayidx.i.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.1.1 - -if.end.i.i.us.1.1: ; preds = %if.then.i.i.us.1.1, %if.end.i.i.us.1 - %973 = add nuw nsw i64 %_local_id_x.i.0.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %973, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.i.1.loopexit, label %pregion_for_entry.entry.i.i.us.1, !llvm.loop !163 - -if.then.i.i.us.1379: ; preds = %if.end.i.i.us - %add.i.i.us.1368 = add i32 %mul.i.i, %conv.i.i.us.1363 - %idxprom.i.i.us.1369 = sext i32 %add.i.i.us.1368 to i64 - %arrayidx.i.i.us.1370 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.1369 - %974 = load float, float* %arrayidx.i.i.us.1370, align 4, !tbaa !12 - %conv9.i.i.us.1371 = fpext float %974 to double - %arrayidx13.i.i.us.1372 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.1369 - %975 = load float, float* %arrayidx13.i.i.us.1372, align 4, !tbaa !12 - %add15.i.i.us.1373 = add nsw i32 %add.i.i.us.1368, -1 - %idxprom16.i.i.us.1374 = sext i32 %add15.i.i.us.1373 to i64 - %arrayidx17.i.i.us.1375 = getelementptr inbounds float, float* %11, i64 %idxprom16.i.i.us.1374 - %976 = load float, float* %arrayidx17.i.i.us.1375, align 4, !tbaa !12 - %sub18.i.i.us.1376 = fsub float %975, %976 - %conv19.i.i.us.1377 = fpext float %sub18.i.i.us.1376 to double - %977 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.1377, double -5.000000e-01, double %conv9.i.i.us.1371) #5 - %conv21.i.i.us.1378 = fptrunc double %977 to float - store float %conv21.i.i.us.1378, float* %arrayidx.i.i.us.1370, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.1380 - -if.end.i.i.us.1380: ; preds = %if.then.i.i.us.1379, %if.end.i.i.us - %978 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %978, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !164 -} - -; Function Attrs: nofree nounwind -define void @_pocl_kernel_fdtd_kernel2_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { -pregion_for_entry.pregion_for_init.i.i: - %5 = bitcast i8** %0 to float** - %6 = load float*, float** %5, align 8 - %7 = getelementptr i8*, i8** %0, i64 2 - %8 = bitcast i8** %7 to float** - %9 = load float*, float** %8, align 8 - %10 = getelementptr i8*, i8** %0, i64 3 - %11 = bitcast i8** %10 to i32** - %12 = load i32*, i32** %11, align 8 - %13 = load i32, i32* %12, align 4 - %14 = getelementptr i8*, i8** %0, i64 4 - %15 = bitcast i8** %14 to i32** - %16 = load i32*, i32** %15, align 8 - %17 = load i32, i32* %16, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %conv2.i.i = trunc i64 %mul3.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %13, %conv2.i.i - %mul.i.i = mul nsw i32 %17, %conv2.i.i - br i1 %cmp.i.i, label %vector.scevcheck, label %pregion_for_end.i.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.i - %18 = trunc i64 %3 to i32 - %19 = mul i32 %17, %18 - %20 = shl i32 %19, 3 - %21 = trunc i64 %2 to i32 - %22 = shl i32 %21, 5 - %23 = add i32 %20, %22 - %24 = icmp sgt i32 %23, 2147483616 - %25 = add i32 %20, %22 - %26 = add i32 %25, -1 - %27 = add i32 %25, 30 - %28 = icmp slt i32 %27, %26 - %29 = or i1 %24, %28 - br i1 %29, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i.us - -vector.memcheck: ; preds = %vector.scevcheck - %30 = trunc i64 %3 to i32 - %31 = mul i32 %17, %30 - %32 = shl i32 %31, 3 - %33 = trunc i64 %2 to i32 - %34 = shl i32 %33, 5 - %35 = add i32 %32, %34 - %36 = sext i32 %35 to i64 - %scevgep = getelementptr float, float* %6, i64 %36 - %37 = add nsw i64 %36, 32 - %scevgep7 = getelementptr float, float* %6, i64 %37 - %38 = add i32 %32, %34 - %39 = add i32 %38, -8 - %40 = sext i32 %39 to i64 - %41 = or i64 %40, 7 - %scevgep9 = getelementptr float, float* %9, i64 %41 - %42 = add nsw i64 %40, 39 - %scevgep11 = getelementptr float, float* %9, i64 %42 - %scevgep13 = getelementptr float, float* %9, i64 %36 - %scevgep15 = getelementptr float, float* %9, i64 %37 - %bound0 = icmp ult float* %scevgep, %scevgep11 - %bound1 = icmp ult float* %scevgep9, %scevgep7 - %found.conflict = and i1 %bound0, %bound1 - %bound017 = icmp ult float* %scevgep, %scevgep15 - %bound118 = icmp ult float* %scevgep13, %scevgep7 - %found.conflict19 = and i1 %bound017, %bound118 - %conflict.rdx = or i1 %found.conflict, %found.conflict19 - br i1 %conflict.rdx, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert20 = insertelement <8 x i32> undef, i32 %17, i32 0 - %broadcast.splat21 = shufflevector <8 x i32> %broadcast.splatinsert20, <8 x i32> undef, <8 x i32> zeroinitializer - %43 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %44 = or <8 x i32> %43, - %45 = icmp sgt <8 x i32> %broadcast.splat21, %44 - %46 = icmp sgt <8 x i32> %44, zeroinitializer - %47 = and <8 x i1> %45, %46 - %48 = extractelement <8 x i32> %44, i32 0 - %49 = add i32 %mul.i.i, %48 - %50 = sext i32 %49 to i64 - %51 = getelementptr inbounds float, float* %6, i64 %50 - %52 = bitcast float* %51 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %52, i32 4, <8 x i1> %47, <8 x float> undef), !tbaa !12, !alias.scope !165, !noalias !168 - %53 = fpext <8 x float> %wide.masked.load to <8 x double> - %54 = getelementptr inbounds float, float* %9, i64 %50 - %55 = bitcast float* %54 to <8 x float>* - %wide.masked.load22 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %55, i32 4, <8 x i1> %47, <8 x float> undef), !tbaa !12, !alias.scope !171 - %56 = add i32 %49, -1 - %57 = sext i32 %56 to i64 - %58 = getelementptr inbounds float, float* %9, i64 %57 - %59 = bitcast float* %58 to <8 x float>* - %wide.masked.load23 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %59, i32 4, <8 x i1> %47, <8 x float> undef), !tbaa !12, !alias.scope !172 - %60 = fsub <8 x float> %wide.masked.load22, %wide.masked.load23 - %61 = fpext <8 x float> %60 to <8 x double> - %62 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %61, <8 x double> , <8 x double> %53) - %63 = fptrunc <8 x double> %62 to <8 x float> - %64 = bitcast float* %51 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %63, <8 x float>* %64, i32 4, <8 x i1> %47), !tbaa !12, !alias.scope !165, !noalias !168, !llvm.access.group !24 - %65 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %66 = or <8 x i32> %65, - %67 = icmp sgt <8 x i32> %broadcast.splat21, %66 - %68 = icmp sgt <8 x i32> %66, zeroinitializer - %69 = and <8 x i1> %67, %68 - %70 = extractelement <8 x i32> %66, i32 0 - %71 = add i32 %mul.i.i, %70 - %72 = sext i32 %71 to i64 - %73 = getelementptr inbounds float, float* %6, i64 %72 - %74 = bitcast float* %73 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %74, i32 4, <8 x i1> %69, <8 x float> undef), !tbaa !12, !alias.scope !165, !noalias !168 - %75 = fpext <8 x float> %wide.masked.load.1 to <8 x double> - %76 = getelementptr inbounds float, float* %9, i64 %72 - %77 = bitcast float* %76 to <8 x float>* - %wide.masked.load22.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %77, i32 4, <8 x i1> %69, <8 x float> undef), !tbaa !12, !alias.scope !171 - %78 = add i32 %71, -1 - %79 = sext i32 %78 to i64 - %80 = getelementptr inbounds float, float* %9, i64 %79 - %81 = bitcast float* %80 to <8 x float>* - %wide.masked.load23.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %81, i32 4, <8 x i1> %69, <8 x float> undef), !tbaa !12, !alias.scope !172 - %82 = fsub <8 x float> %wide.masked.load22.1, %wide.masked.load23.1 - %83 = fpext <8 x float> %82 to <8 x double> - %84 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %83, <8 x double> , <8 x double> %75) - %85 = fptrunc <8 x double> %84 to <8 x float> - %86 = bitcast float* %73 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %85, <8 x float>* %86, i32 4, <8 x i1> %69), !tbaa !12, !alias.scope !165, !noalias !168, !llvm.access.group !24 - %87 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %88 = or <8 x i32> %87, - %89 = icmp sgt <8 x i32> %broadcast.splat21, %88 - %90 = icmp sgt <8 x i32> %88, zeroinitializer - %91 = and <8 x i1> %89, %90 - %92 = extractelement <8 x i32> %88, i32 0 - %93 = add i32 %mul.i.i, %92 - %94 = sext i32 %93 to i64 - %95 = getelementptr inbounds float, float* %6, i64 %94 - %96 = bitcast float* %95 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %96, i32 4, <8 x i1> %91, <8 x float> undef), !tbaa !12, !alias.scope !165, !noalias !168 - %97 = fpext <8 x float> %wide.masked.load.2 to <8 x double> - %98 = getelementptr inbounds float, float* %9, i64 %94 - %99 = bitcast float* %98 to <8 x float>* - %wide.masked.load22.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %99, i32 4, <8 x i1> %91, <8 x float> undef), !tbaa !12, !alias.scope !171 - %100 = add i32 %93, -1 - %101 = sext i32 %100 to i64 - %102 = getelementptr inbounds float, float* %9, i64 %101 - %103 = bitcast float* %102 to <8 x float>* - %wide.masked.load23.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %103, i32 4, <8 x i1> %91, <8 x float> undef), !tbaa !12, !alias.scope !172 - %104 = fsub <8 x float> %wide.masked.load22.2, %wide.masked.load23.2 - %105 = fpext <8 x float> %104 to <8 x double> - %106 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %105, <8 x double> , <8 x double> %97) - %107 = fptrunc <8 x double> %106 to <8 x float> - %108 = bitcast float* %95 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %107, <8 x float>* %108, i32 4, <8 x i1> %91), !tbaa !12, !alias.scope !165, !noalias !168, !llvm.access.group !24 - %109 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %110 = or <8 x i32> %109, - %111 = icmp sgt <8 x i32> %broadcast.splat21, %110 - %112 = icmp sgt <8 x i32> %110, zeroinitializer - %113 = and <8 x i1> %111, %112 - %114 = extractelement <8 x i32> %110, i32 0 - %115 = add i32 %mul.i.i, %114 - %116 = sext i32 %115 to i64 - %117 = getelementptr inbounds float, float* %6, i64 %116 - %118 = bitcast float* %117 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %118, i32 4, <8 x i1> %113, <8 x float> undef), !tbaa !12, !alias.scope !165, !noalias !168 - %119 = fpext <8 x float> %wide.masked.load.3 to <8 x double> - %120 = getelementptr inbounds float, float* %9, i64 %116 - %121 = bitcast float* %120 to <8 x float>* - %wide.masked.load22.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %121, i32 4, <8 x i1> %113, <8 x float> undef), !tbaa !12, !alias.scope !171 - %122 = add i32 %115, -1 - %123 = sext i32 %122 to i64 - %124 = getelementptr inbounds float, float* %9, i64 %123 - %125 = bitcast float* %124 to <8 x float>* - %wide.masked.load23.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %125, i32 4, <8 x i1> %113, <8 x float> undef), !tbaa !12, !alias.scope !172 - %126 = fsub <8 x float> %wide.masked.load22.3, %wide.masked.load23.3 - %127 = fpext <8 x float> %126 to <8 x double> - %128 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %127, <8 x double> , <8 x double> %119) - %129 = fptrunc <8 x double> %128 to <8 x float> - %130 = bitcast float* %117 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %129, <8 x float>* %130, i32 4, <8 x i1> %113), !tbaa !12, !alias.scope !165, !noalias !168, !llvm.access.group !24 - br label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.i.i.us.1380, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.preheader ], [ %976, %if.end.i.i.us.1380 ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp4.i.i.us = icmp sgt i32 %17, %conv.i.i.us - %cmp7.i.i.us = icmp sgt i32 %conv.i.i.us, 0 - %or.cond.i.i.us = and i1 %cmp4.i.i.us, %cmp7.i.i.us - br i1 %or.cond.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %add.i.i.us = add i32 %mul.i.i, %conv.i.i.us - %idxprom.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us - %131 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %conv9.i.i.us = fpext float %131 to double - %arrayidx13.i.i.us = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us - %132 = load float, float* %arrayidx13.i.i.us, align 4, !tbaa !12 - %add15.i.i.us = add i32 %add.i.i.us, -1 - %idxprom16.i.i.us = sext i32 %add15.i.i.us to i64 - %arrayidx17.i.i.us = getelementptr inbounds float, float* %9, i64 %idxprom16.i.i.us - %133 = load float, float* %arrayidx17.i.i.us, align 4, !tbaa !12 - %sub18.i.i.us = fsub float %132, %133 - %conv19.i.i.us = fpext float %sub18.i.i.us to double - %134 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us, double -5.000000e-01, double %conv9.i.i.us) #5 - %conv21.i.i.us = fptrunc double %134 to float - store float %conv21.i.i.us, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us - %135 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1362 = add nuw nsw i64 %135, %mul.i.i.i - %conv.i.i.us.1363 = trunc i64 %add1.i.i.i.us.1362 to i32 - %cmp4.i.i.us.1364 = icmp sgt i32 %17, %conv.i.i.us.1363 - %cmp7.i.i.us.1365 = icmp sgt i32 %conv.i.i.us.1363, 0 - %or.cond.i.i.us.1366 = and i1 %cmp4.i.i.us.1364, %cmp7.i.i.us.1365 - br i1 %or.cond.i.i.us.1366, label %if.then.i.i.us.1379, label %if.end.i.i.us.1380 - -pregion_for_end.i.i.loopexit: ; preds = %if.end.i.i.us.1380 - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i.i - %136 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.1 = or i32 %136, 1 - %cmp.i.i.1 = icmp sgt i32 %13, %conv2.i.i.1 - %mul.i.i.1 = mul nsw i32 %17, %conv2.i.i.1 - br i1 %cmp.i.i.1, label %vector.scevcheck34, label %pregion_for_end.i.i.1 - -vector.scevcheck34: ; preds = %pregion_for_end.i.i - %137 = mul i32 %17, %conv2.i.i.1 - %138 = trunc i64 %2 to i32 - %139 = shl i32 %138, 5 - %140 = add i32 %137, %139 - %141 = icmp sgt i32 %140, 2147483616 - %142 = add i32 %137, %139 - %143 = add i32 %142, -1 - %144 = add i32 %142, 30 - %145 = icmp slt i32 %144, %143 - %146 = or i1 %141, %145 - br i1 %146, label %pregion_for_entry.entry.i.i.us.1.preheader, label %vector.memcheck56 - -pregion_for_entry.entry.i.i.us.1.preheader: ; preds = %vector.memcheck56, %vector.scevcheck34 - br label %pregion_for_entry.entry.i.i.us.1 - -vector.memcheck56: ; preds = %vector.scevcheck34 - %147 = mul i32 %17, %conv2.i.i.1 - %148 = trunc i64 %2 to i32 - %149 = shl i32 %148, 5 - %150 = add i32 %147, %149 - %151 = sext i32 %150 to i64 - %scevgep36 = getelementptr float, float* %6, i64 %151 - %152 = add nsw i64 %151, 32 - %scevgep38 = getelementptr float, float* %6, i64 %152 - %153 = add i32 %147, %149 - %154 = add i32 %153, -1 - %155 = sext i32 %154 to i64 - %scevgep40 = getelementptr float, float* %9, i64 %155 - %156 = add nsw i64 %155, 32 - %scevgep42 = getelementptr float, float* %9, i64 %156 - %scevgep44 = getelementptr float, float* %9, i64 %151 - %scevgep46 = getelementptr float, float* %9, i64 %152 - %bound048 = icmp ult float* %scevgep36, %scevgep42 - %bound149 = icmp ult float* %scevgep40, %scevgep38 - %found.conflict50 = and i1 %bound048, %bound149 - %bound051 = icmp ult float* %scevgep36, %scevgep46 - %bound152 = icmp ult float* %scevgep44, %scevgep38 - %found.conflict53 = and i1 %bound051, %bound152 - %conflict.rdx54 = or i1 %found.conflict50, %found.conflict53 - br i1 %conflict.rdx54, label %pregion_for_entry.entry.i.i.us.1.preheader, label %vector.ph57 - -vector.ph57: ; preds = %vector.memcheck56 - %broadcast.splatinsert64 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat65 = shufflevector <8 x i64> %broadcast.splatinsert64, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert66 = insertelement <8 x i32> undef, i32 %17, i32 0 - %broadcast.splat67 = shufflevector <8 x i32> %broadcast.splatinsert66, <8 x i32> undef, <8 x i32> zeroinitializer - %157 = trunc <8 x i64> %broadcast.splat65 to <8 x i32> - %158 = or <8 x i32> %157, - %159 = icmp sgt <8 x i32> %broadcast.splat67, %158 - %160 = icmp sgt <8 x i32> %158, zeroinitializer - %161 = and <8 x i1> %159, %160 - %162 = extractelement <8 x i32> %158, i32 0 - %163 = add i32 %mul.i.i.1, %162 - %164 = sext i32 %163 to i64 - %165 = getelementptr inbounds float, float* %6, i64 %164 - %166 = bitcast float* %165 to <8 x float>* - %wide.masked.load68 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %166, i32 4, <8 x i1> %161, <8 x float> undef), !tbaa !12, !alias.scope !173, !noalias !176 - %167 = fpext <8 x float> %wide.masked.load68 to <8 x double> - %168 = getelementptr inbounds float, float* %9, i64 %164 - %169 = bitcast float* %168 to <8 x float>* - %wide.masked.load69 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %169, i32 4, <8 x i1> %161, <8 x float> undef), !tbaa !12, !alias.scope !179 - %170 = add i32 %163, -1 - %171 = sext i32 %170 to i64 - %172 = getelementptr inbounds float, float* %9, i64 %171 - %173 = bitcast float* %172 to <8 x float>* - %wide.masked.load70 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %173, i32 4, <8 x i1> %161, <8 x float> undef), !tbaa !12, !alias.scope !180 - %174 = fsub <8 x float> %wide.masked.load69, %wide.masked.load70 - %175 = fpext <8 x float> %174 to <8 x double> - %176 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %175, <8 x double> , <8 x double> %167) - %177 = fptrunc <8 x double> %176 to <8 x float> - %178 = bitcast float* %165 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %177, <8 x float>* %178, i32 4, <8 x i1> %161), !tbaa !12, !alias.scope !173, !noalias !176, !llvm.access.group !24 - %179 = trunc <8 x i64> %broadcast.splat65 to <8 x i32> - %180 = or <8 x i32> %179, - %181 = icmp sgt <8 x i32> %broadcast.splat67, %180 - %182 = icmp sgt <8 x i32> %180, zeroinitializer - %183 = and <8 x i1> %181, %182 - %184 = extractelement <8 x i32> %180, i32 0 - %185 = add i32 %mul.i.i.1, %184 - %186 = sext i32 %185 to i64 - %187 = getelementptr inbounds float, float* %6, i64 %186 - %188 = bitcast float* %187 to <8 x float>* - %wide.masked.load68.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %188, i32 4, <8 x i1> %183, <8 x float> undef), !tbaa !12, !alias.scope !173, !noalias !176 - %189 = fpext <8 x float> %wide.masked.load68.1 to <8 x double> - %190 = getelementptr inbounds float, float* %9, i64 %186 - %191 = bitcast float* %190 to <8 x float>* - %wide.masked.load69.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %191, i32 4, <8 x i1> %183, <8 x float> undef), !tbaa !12, !alias.scope !179 - %192 = add i32 %185, -1 - %193 = sext i32 %192 to i64 - %194 = getelementptr inbounds float, float* %9, i64 %193 - %195 = bitcast float* %194 to <8 x float>* - %wide.masked.load70.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %195, i32 4, <8 x i1> %183, <8 x float> undef), !tbaa !12, !alias.scope !180 - %196 = fsub <8 x float> %wide.masked.load69.1, %wide.masked.load70.1 - %197 = fpext <8 x float> %196 to <8 x double> - %198 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %197, <8 x double> , <8 x double> %189) - %199 = fptrunc <8 x double> %198 to <8 x float> - %200 = bitcast float* %187 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %199, <8 x float>* %200, i32 4, <8 x i1> %183), !tbaa !12, !alias.scope !173, !noalias !176, !llvm.access.group !24 - %201 = trunc <8 x i64> %broadcast.splat65 to <8 x i32> - %202 = or <8 x i32> %201, - %203 = icmp sgt <8 x i32> %broadcast.splat67, %202 - %204 = icmp sgt <8 x i32> %202, zeroinitializer - %205 = and <8 x i1> %203, %204 - %206 = extractelement <8 x i32> %202, i32 0 - %207 = add i32 %mul.i.i.1, %206 - %208 = sext i32 %207 to i64 - %209 = getelementptr inbounds float, float* %6, i64 %208 - %210 = bitcast float* %209 to <8 x float>* - %wide.masked.load68.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %210, i32 4, <8 x i1> %205, <8 x float> undef), !tbaa !12, !alias.scope !173, !noalias !176 - %211 = fpext <8 x float> %wide.masked.load68.2 to <8 x double> - %212 = getelementptr inbounds float, float* %9, i64 %208 - %213 = bitcast float* %212 to <8 x float>* - %wide.masked.load69.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %213, i32 4, <8 x i1> %205, <8 x float> undef), !tbaa !12, !alias.scope !179 - %214 = add i32 %207, -1 - %215 = sext i32 %214 to i64 - %216 = getelementptr inbounds float, float* %9, i64 %215 - %217 = bitcast float* %216 to <8 x float>* - %wide.masked.load70.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %217, i32 4, <8 x i1> %205, <8 x float> undef), !tbaa !12, !alias.scope !180 - %218 = fsub <8 x float> %wide.masked.load69.2, %wide.masked.load70.2 - %219 = fpext <8 x float> %218 to <8 x double> - %220 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %219, <8 x double> , <8 x double> %211) - %221 = fptrunc <8 x double> %220 to <8 x float> - %222 = bitcast float* %209 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %221, <8 x float>* %222, i32 4, <8 x i1> %205), !tbaa !12, !alias.scope !173, !noalias !176, !llvm.access.group !24 - %223 = trunc <8 x i64> %broadcast.splat65 to <8 x i32> - %224 = or <8 x i32> %223, - %225 = icmp sgt <8 x i32> %broadcast.splat67, %224 - %226 = icmp sgt <8 x i32> %224, zeroinitializer - %227 = and <8 x i1> %225, %226 - %228 = extractelement <8 x i32> %224, i32 0 - %229 = add i32 %mul.i.i.1, %228 - %230 = sext i32 %229 to i64 - %231 = getelementptr inbounds float, float* %6, i64 %230 - %232 = bitcast float* %231 to <8 x float>* - %wide.masked.load68.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %232, i32 4, <8 x i1> %227, <8 x float> undef), !tbaa !12, !alias.scope !173, !noalias !176 - %233 = fpext <8 x float> %wide.masked.load68.3 to <8 x double> - %234 = getelementptr inbounds float, float* %9, i64 %230 - %235 = bitcast float* %234 to <8 x float>* - %wide.masked.load69.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %235, i32 4, <8 x i1> %227, <8 x float> undef), !tbaa !12, !alias.scope !179 - %236 = add i32 %229, -1 - %237 = sext i32 %236 to i64 - %238 = getelementptr inbounds float, float* %9, i64 %237 - %239 = bitcast float* %238 to <8 x float>* - %wide.masked.load70.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %239, i32 4, <8 x i1> %227, <8 x float> undef), !tbaa !12, !alias.scope !180 - %240 = fsub <8 x float> %wide.masked.load69.3, %wide.masked.load70.3 - %241 = fpext <8 x float> %240 to <8 x double> - %242 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %241, <8 x double> , <8 x double> %233) - %243 = fptrunc <8 x double> %242 to <8 x float> - %244 = bitcast float* %231 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %243, <8 x float>* %244, i32 4, <8 x i1> %227), !tbaa !12, !alias.scope !173, !noalias !176, !llvm.access.group !24 - br label %pregion_for_end.i.i.1 - -pregion_for_entry.entry.i.i.us.1: ; preds = %if.end.i.i.us.1.1, %pregion_for_entry.entry.i.i.us.1.preheader - %_local_id_x.i.0.us.1 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.1.preheader ], [ %971, %if.end.i.i.us.1.1 ] - %add1.i.i.i.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.1, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp4.i.i.us.1 = icmp sgt i32 %17, %conv.i.i.us.1 - %cmp7.i.i.us.1 = icmp sgt i32 %conv.i.i.us.1, 0 - %or.cond.i.i.us.1 = and i1 %cmp4.i.i.us.1, %cmp7.i.i.us.1 - br i1 %or.cond.i.i.us.1, label %if.then.i.i.us.1, label %if.end.i.i.us.1 - -if.then.i.i.us.1: ; preds = %pregion_for_entry.entry.i.i.us.1 - %add.i.i.us.1 = add i32 %mul.i.i.1, %conv.i.i.us.1 - %idxprom.i.i.us.1 = sext i32 %add.i.i.us.1 to i64 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.1 - %245 = load float, float* %arrayidx.i.i.us.1, align 4, !tbaa !12 - %conv9.i.i.us.1 = fpext float %245 to double - %arrayidx13.i.i.us.1 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.1 - %246 = load float, float* %arrayidx13.i.i.us.1, align 4, !tbaa !12 - %add15.i.i.us.1 = add i32 %add.i.i.us.1, -1 - %idxprom16.i.i.us.1 = sext i32 %add15.i.i.us.1 to i64 - %arrayidx17.i.i.us.1 = getelementptr inbounds float, float* %9, i64 %idxprom16.i.i.us.1 - %247 = load float, float* %arrayidx17.i.i.us.1, align 4, !tbaa !12 - %sub18.i.i.us.1 = fsub float %246, %247 - %conv19.i.i.us.1 = fpext float %sub18.i.i.us.1 to double - %248 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.1, double -5.000000e-01, double %conv9.i.i.us.1) #5 - %conv21.i.i.us.1 = fptrunc double %248 to float - store float %conv21.i.i.us.1, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.1 - -if.end.i.i.us.1: ; preds = %if.then.i.i.us.1, %pregion_for_entry.entry.i.i.us.1 - %249 = or i64 %_local_id_x.i.0.us.1, 1 - %add1.i.i.i.us.1.1 = add nuw nsw i64 %249, %mul.i.i.i - %conv.i.i.us.1.1 = trunc i64 %add1.i.i.i.us.1.1 to i32 - %cmp4.i.i.us.1.1 = icmp sgt i32 %17, %conv.i.i.us.1.1 - %cmp7.i.i.us.1.1 = icmp sgt i32 %conv.i.i.us.1.1, 0 - %or.cond.i.i.us.1.1 = and i1 %cmp4.i.i.us.1.1, %cmp7.i.i.us.1.1 - br i1 %or.cond.i.i.us.1.1, label %if.then.i.i.us.1.1, label %if.end.i.i.us.1.1 - -pregion_for_end.i.i.1.loopexit: ; preds = %if.end.i.i.us.1.1 - br label %pregion_for_end.i.i.1 - -pregion_for_end.i.i.1: ; preds = %pregion_for_end.i.i.1.loopexit, %vector.ph57, %pregion_for_end.i.i - %250 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.2 = or i32 %250, 2 - %cmp.i.i.2 = icmp sgt i32 %13, %conv2.i.i.2 - %mul.i.i.2 = mul nsw i32 %17, %conv2.i.i.2 - br i1 %cmp.i.i.2, label %vector.scevcheck81, label %pregion_for_end.i.i.2 - -vector.scevcheck81: ; preds = %pregion_for_end.i.i.1 - %251 = mul i32 %17, %conv2.i.i.2 - %252 = trunc i64 %2 to i32 - %253 = shl i32 %252, 5 - %254 = add i32 %251, %253 - %255 = icmp sgt i32 %254, 2147483616 - %256 = add i32 %251, %253 - %257 = add i32 %256, -1 - %258 = add i32 %256, 30 - %259 = icmp slt i32 %258, %257 - %260 = or i1 %255, %259 - br i1 %260, label %pregion_for_entry.entry.i.i.us.2.preheader, label %vector.memcheck103 - -pregion_for_entry.entry.i.i.us.2.preheader: ; preds = %vector.memcheck103, %vector.scevcheck81 - br label %pregion_for_entry.entry.i.i.us.2 - -vector.memcheck103: ; preds = %vector.scevcheck81 - %261 = mul i32 %17, %conv2.i.i.2 - %262 = trunc i64 %2 to i32 - %263 = shl i32 %262, 5 - %264 = add i32 %261, %263 - %265 = sext i32 %264 to i64 - %scevgep83 = getelementptr float, float* %6, i64 %265 - %266 = add nsw i64 %265, 32 - %scevgep85 = getelementptr float, float* %6, i64 %266 - %267 = add i32 %261, %263 - %268 = add i32 %267, -2 - %269 = sext i32 %268 to i64 - %270 = add nuw nsw i64 %269, 1 - %scevgep87 = getelementptr float, float* %9, i64 %270 - %271 = add nsw i64 %269, 33 - %scevgep89 = getelementptr float, float* %9, i64 %271 - %scevgep91 = getelementptr float, float* %9, i64 %265 - %scevgep93 = getelementptr float, float* %9, i64 %266 - %bound095 = icmp ult float* %scevgep83, %scevgep89 - %bound196 = icmp ult float* %scevgep87, %scevgep85 - %found.conflict97 = and i1 %bound095, %bound196 - %bound098 = icmp ult float* %scevgep83, %scevgep93 - %bound199 = icmp ult float* %scevgep91, %scevgep85 - %found.conflict100 = and i1 %bound098, %bound199 - %conflict.rdx101 = or i1 %found.conflict97, %found.conflict100 - br i1 %conflict.rdx101, label %pregion_for_entry.entry.i.i.us.2.preheader, label %vector.ph104 - -vector.ph104: ; preds = %vector.memcheck103 - %broadcast.splatinsert111 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat112 = shufflevector <8 x i64> %broadcast.splatinsert111, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert113 = insertelement <8 x i32> undef, i32 %17, i32 0 - %broadcast.splat114 = shufflevector <8 x i32> %broadcast.splatinsert113, <8 x i32> undef, <8 x i32> zeroinitializer - %272 = trunc <8 x i64> %broadcast.splat112 to <8 x i32> - %273 = or <8 x i32> %272, - %274 = icmp sgt <8 x i32> %broadcast.splat114, %273 - %275 = icmp sgt <8 x i32> %273, zeroinitializer - %276 = and <8 x i1> %274, %275 - %277 = extractelement <8 x i32> %273, i32 0 - %278 = add i32 %mul.i.i.2, %277 - %279 = sext i32 %278 to i64 - %280 = getelementptr inbounds float, float* %6, i64 %279 - %281 = bitcast float* %280 to <8 x float>* - %wide.masked.load115 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %281, i32 4, <8 x i1> %276, <8 x float> undef), !tbaa !12, !alias.scope !181, !noalias !184 - %282 = fpext <8 x float> %wide.masked.load115 to <8 x double> - %283 = getelementptr inbounds float, float* %9, i64 %279 - %284 = bitcast float* %283 to <8 x float>* - %wide.masked.load116 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %284, i32 4, <8 x i1> %276, <8 x float> undef), !tbaa !12, !alias.scope !187 - %285 = add i32 %278, -1 - %286 = sext i32 %285 to i64 - %287 = getelementptr inbounds float, float* %9, i64 %286 - %288 = bitcast float* %287 to <8 x float>* - %wide.masked.load117 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %288, i32 4, <8 x i1> %276, <8 x float> undef), !tbaa !12, !alias.scope !188 - %289 = fsub <8 x float> %wide.masked.load116, %wide.masked.load117 - %290 = fpext <8 x float> %289 to <8 x double> - %291 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %290, <8 x double> , <8 x double> %282) - %292 = fptrunc <8 x double> %291 to <8 x float> - %293 = bitcast float* %280 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %292, <8 x float>* %293, i32 4, <8 x i1> %276), !tbaa !12, !alias.scope !181, !noalias !184, !llvm.access.group !24 - %294 = trunc <8 x i64> %broadcast.splat112 to <8 x i32> - %295 = or <8 x i32> %294, - %296 = icmp sgt <8 x i32> %broadcast.splat114, %295 - %297 = icmp sgt <8 x i32> %295, zeroinitializer - %298 = and <8 x i1> %296, %297 - %299 = extractelement <8 x i32> %295, i32 0 - %300 = add i32 %mul.i.i.2, %299 - %301 = sext i32 %300 to i64 - %302 = getelementptr inbounds float, float* %6, i64 %301 - %303 = bitcast float* %302 to <8 x float>* - %wide.masked.load115.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %303, i32 4, <8 x i1> %298, <8 x float> undef), !tbaa !12, !alias.scope !181, !noalias !184 - %304 = fpext <8 x float> %wide.masked.load115.1 to <8 x double> - %305 = getelementptr inbounds float, float* %9, i64 %301 - %306 = bitcast float* %305 to <8 x float>* - %wide.masked.load116.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %306, i32 4, <8 x i1> %298, <8 x float> undef), !tbaa !12, !alias.scope !187 - %307 = add i32 %300, -1 - %308 = sext i32 %307 to i64 - %309 = getelementptr inbounds float, float* %9, i64 %308 - %310 = bitcast float* %309 to <8 x float>* - %wide.masked.load117.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %310, i32 4, <8 x i1> %298, <8 x float> undef), !tbaa !12, !alias.scope !188 - %311 = fsub <8 x float> %wide.masked.load116.1, %wide.masked.load117.1 - %312 = fpext <8 x float> %311 to <8 x double> - %313 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %312, <8 x double> , <8 x double> %304) - %314 = fptrunc <8 x double> %313 to <8 x float> - %315 = bitcast float* %302 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %314, <8 x float>* %315, i32 4, <8 x i1> %298), !tbaa !12, !alias.scope !181, !noalias !184, !llvm.access.group !24 - %316 = trunc <8 x i64> %broadcast.splat112 to <8 x i32> - %317 = or <8 x i32> %316, - %318 = icmp sgt <8 x i32> %broadcast.splat114, %317 - %319 = icmp sgt <8 x i32> %317, zeroinitializer - %320 = and <8 x i1> %318, %319 - %321 = extractelement <8 x i32> %317, i32 0 - %322 = add i32 %mul.i.i.2, %321 - %323 = sext i32 %322 to i64 - %324 = getelementptr inbounds float, float* %6, i64 %323 - %325 = bitcast float* %324 to <8 x float>* - %wide.masked.load115.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %325, i32 4, <8 x i1> %320, <8 x float> undef), !tbaa !12, !alias.scope !181, !noalias !184 - %326 = fpext <8 x float> %wide.masked.load115.2 to <8 x double> - %327 = getelementptr inbounds float, float* %9, i64 %323 - %328 = bitcast float* %327 to <8 x float>* - %wide.masked.load116.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %328, i32 4, <8 x i1> %320, <8 x float> undef), !tbaa !12, !alias.scope !187 - %329 = add i32 %322, -1 - %330 = sext i32 %329 to i64 - %331 = getelementptr inbounds float, float* %9, i64 %330 - %332 = bitcast float* %331 to <8 x float>* - %wide.masked.load117.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %332, i32 4, <8 x i1> %320, <8 x float> undef), !tbaa !12, !alias.scope !188 - %333 = fsub <8 x float> %wide.masked.load116.2, %wide.masked.load117.2 - %334 = fpext <8 x float> %333 to <8 x double> - %335 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %334, <8 x double> , <8 x double> %326) - %336 = fptrunc <8 x double> %335 to <8 x float> - %337 = bitcast float* %324 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %336, <8 x float>* %337, i32 4, <8 x i1> %320), !tbaa !12, !alias.scope !181, !noalias !184, !llvm.access.group !24 - %338 = trunc <8 x i64> %broadcast.splat112 to <8 x i32> - %339 = or <8 x i32> %338, - %340 = icmp sgt <8 x i32> %broadcast.splat114, %339 - %341 = icmp sgt <8 x i32> %339, zeroinitializer - %342 = and <8 x i1> %340, %341 - %343 = extractelement <8 x i32> %339, i32 0 - %344 = add i32 %mul.i.i.2, %343 - %345 = sext i32 %344 to i64 - %346 = getelementptr inbounds float, float* %6, i64 %345 - %347 = bitcast float* %346 to <8 x float>* - %wide.masked.load115.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %347, i32 4, <8 x i1> %342, <8 x float> undef), !tbaa !12, !alias.scope !181, !noalias !184 - %348 = fpext <8 x float> %wide.masked.load115.3 to <8 x double> - %349 = getelementptr inbounds float, float* %9, i64 %345 - %350 = bitcast float* %349 to <8 x float>* - %wide.masked.load116.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %350, i32 4, <8 x i1> %342, <8 x float> undef), !tbaa !12, !alias.scope !187 - %351 = add i32 %344, -1 - %352 = sext i32 %351 to i64 - %353 = getelementptr inbounds float, float* %9, i64 %352 - %354 = bitcast float* %353 to <8 x float>* - %wide.masked.load117.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %354, i32 4, <8 x i1> %342, <8 x float> undef), !tbaa !12, !alias.scope !188 - %355 = fsub <8 x float> %wide.masked.load116.3, %wide.masked.load117.3 - %356 = fpext <8 x float> %355 to <8 x double> - %357 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %356, <8 x double> , <8 x double> %348) - %358 = fptrunc <8 x double> %357 to <8 x float> - %359 = bitcast float* %346 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %358, <8 x float>* %359, i32 4, <8 x i1> %342), !tbaa !12, !alias.scope !181, !noalias !184, !llvm.access.group !24 - br label %pregion_for_end.i.i.2 - -pregion_for_entry.entry.i.i.us.2: ; preds = %if.end.i.i.us.2.1, %pregion_for_entry.entry.i.i.us.2.preheader - %_local_id_x.i.0.us.2 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.2.preheader ], [ %966, %if.end.i.i.us.2.1 ] - %add1.i.i.i.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.2, %mul.i.i.i - %conv.i.i.us.2 = trunc i64 %add1.i.i.i.us.2 to i32 - %cmp4.i.i.us.2 = icmp sgt i32 %17, %conv.i.i.us.2 - %cmp7.i.i.us.2 = icmp sgt i32 %conv.i.i.us.2, 0 - %or.cond.i.i.us.2 = and i1 %cmp4.i.i.us.2, %cmp7.i.i.us.2 - br i1 %or.cond.i.i.us.2, label %if.then.i.i.us.2, label %if.end.i.i.us.2 - -if.then.i.i.us.2: ; preds = %pregion_for_entry.entry.i.i.us.2 - %add.i.i.us.2 = add i32 %mul.i.i.2, %conv.i.i.us.2 - %idxprom.i.i.us.2 = sext i32 %add.i.i.us.2 to i64 - %arrayidx.i.i.us.2 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.2 - %360 = load float, float* %arrayidx.i.i.us.2, align 4, !tbaa !12 - %conv9.i.i.us.2 = fpext float %360 to double - %arrayidx13.i.i.us.2 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.2 - %361 = load float, float* %arrayidx13.i.i.us.2, align 4, !tbaa !12 - %add15.i.i.us.2 = add i32 %add.i.i.us.2, -1 - %idxprom16.i.i.us.2 = sext i32 %add15.i.i.us.2 to i64 - %arrayidx17.i.i.us.2 = getelementptr inbounds float, float* %9, i64 %idxprom16.i.i.us.2 - %362 = load float, float* %arrayidx17.i.i.us.2, align 4, !tbaa !12 - %sub18.i.i.us.2 = fsub float %361, %362 - %conv19.i.i.us.2 = fpext float %sub18.i.i.us.2 to double - %363 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.2, double -5.000000e-01, double %conv9.i.i.us.2) #5 - %conv21.i.i.us.2 = fptrunc double %363 to float - store float %conv21.i.i.us.2, float* %arrayidx.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.2 - -if.end.i.i.us.2: ; preds = %if.then.i.i.us.2, %pregion_for_entry.entry.i.i.us.2 - %364 = or i64 %_local_id_x.i.0.us.2, 1 - %add1.i.i.i.us.2.1 = add nuw nsw i64 %364, %mul.i.i.i - %conv.i.i.us.2.1 = trunc i64 %add1.i.i.i.us.2.1 to i32 - %cmp4.i.i.us.2.1 = icmp sgt i32 %17, %conv.i.i.us.2.1 - %cmp7.i.i.us.2.1 = icmp sgt i32 %conv.i.i.us.2.1, 0 - %or.cond.i.i.us.2.1 = and i1 %cmp4.i.i.us.2.1, %cmp7.i.i.us.2.1 - br i1 %or.cond.i.i.us.2.1, label %if.then.i.i.us.2.1, label %if.end.i.i.us.2.1 - -pregion_for_end.i.i.2.loopexit: ; preds = %if.end.i.i.us.2.1 - br label %pregion_for_end.i.i.2 - -pregion_for_end.i.i.2: ; preds = %pregion_for_end.i.i.2.loopexit, %vector.ph104, %pregion_for_end.i.i.1 - %365 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.3 = or i32 %365, 3 - %cmp.i.i.3 = icmp sgt i32 %13, %conv2.i.i.3 - %mul.i.i.3 = mul nsw i32 %17, %conv2.i.i.3 - br i1 %cmp.i.i.3, label %vector.scevcheck128, label %pregion_for_end.i.i.3 - -vector.scevcheck128: ; preds = %pregion_for_end.i.i.2 - %366 = mul i32 %17, %conv2.i.i.3 - %367 = trunc i64 %2 to i32 - %368 = shl i32 %367, 5 - %369 = add i32 %366, %368 - %370 = icmp sgt i32 %369, 2147483616 - %371 = add i32 %366, %368 - %372 = add i32 %371, -1 - %373 = add i32 %371, 30 - %374 = icmp slt i32 %373, %372 - %375 = or i1 %370, %374 - br i1 %375, label %pregion_for_entry.entry.i.i.us.3.preheader, label %vector.memcheck150 - -pregion_for_entry.entry.i.i.us.3.preheader: ; preds = %vector.memcheck150, %vector.scevcheck128 - br label %pregion_for_entry.entry.i.i.us.3 - -vector.memcheck150: ; preds = %vector.scevcheck128 - %376 = mul i32 %17, %conv2.i.i.3 - %377 = trunc i64 %2 to i32 - %378 = shl i32 %377, 5 - %379 = add i32 %376, %378 - %380 = sext i32 %379 to i64 - %scevgep130 = getelementptr float, float* %6, i64 %380 - %381 = add nsw i64 %380, 32 - %scevgep132 = getelementptr float, float* %6, i64 %381 - %382 = add i32 %376, %378 - %383 = add i32 %382, -1 - %384 = sext i32 %383 to i64 - %scevgep134 = getelementptr float, float* %9, i64 %384 - %385 = add nsw i64 %384, 32 - %scevgep136 = getelementptr float, float* %9, i64 %385 - %scevgep138 = getelementptr float, float* %9, i64 %380 - %scevgep140 = getelementptr float, float* %9, i64 %381 - %bound0142 = icmp ult float* %scevgep130, %scevgep136 - %bound1143 = icmp ult float* %scevgep134, %scevgep132 - %found.conflict144 = and i1 %bound0142, %bound1143 - %bound0145 = icmp ult float* %scevgep130, %scevgep140 - %bound1146 = icmp ult float* %scevgep138, %scevgep132 - %found.conflict147 = and i1 %bound0145, %bound1146 - %conflict.rdx148 = or i1 %found.conflict144, %found.conflict147 - br i1 %conflict.rdx148, label %pregion_for_entry.entry.i.i.us.3.preheader, label %vector.ph151 - -vector.ph151: ; preds = %vector.memcheck150 - %broadcast.splatinsert158 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat159 = shufflevector <8 x i64> %broadcast.splatinsert158, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert160 = insertelement <8 x i32> undef, i32 %17, i32 0 - %broadcast.splat161 = shufflevector <8 x i32> %broadcast.splatinsert160, <8 x i32> undef, <8 x i32> zeroinitializer - %386 = trunc <8 x i64> %broadcast.splat159 to <8 x i32> - %387 = or <8 x i32> %386, - %388 = icmp sgt <8 x i32> %broadcast.splat161, %387 - %389 = icmp sgt <8 x i32> %387, zeroinitializer - %390 = and <8 x i1> %388, %389 - %391 = extractelement <8 x i32> %387, i32 0 - %392 = add i32 %mul.i.i.3, %391 - %393 = sext i32 %392 to i64 - %394 = getelementptr inbounds float, float* %6, i64 %393 - %395 = bitcast float* %394 to <8 x float>* - %wide.masked.load162 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %395, i32 4, <8 x i1> %390, <8 x float> undef), !tbaa !12, !alias.scope !189, !noalias !192 - %396 = fpext <8 x float> %wide.masked.load162 to <8 x double> - %397 = getelementptr inbounds float, float* %9, i64 %393 - %398 = bitcast float* %397 to <8 x float>* - %wide.masked.load163 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %398, i32 4, <8 x i1> %390, <8 x float> undef), !tbaa !12, !alias.scope !195 - %399 = add i32 %392, -1 - %400 = sext i32 %399 to i64 - %401 = getelementptr inbounds float, float* %9, i64 %400 - %402 = bitcast float* %401 to <8 x float>* - %wide.masked.load164 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %402, i32 4, <8 x i1> %390, <8 x float> undef), !tbaa !12, !alias.scope !196 - %403 = fsub <8 x float> %wide.masked.load163, %wide.masked.load164 - %404 = fpext <8 x float> %403 to <8 x double> - %405 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %404, <8 x double> , <8 x double> %396) - %406 = fptrunc <8 x double> %405 to <8 x float> - %407 = bitcast float* %394 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %406, <8 x float>* %407, i32 4, <8 x i1> %390), !tbaa !12, !alias.scope !189, !noalias !192, !llvm.access.group !24 - %408 = trunc <8 x i64> %broadcast.splat159 to <8 x i32> - %409 = or <8 x i32> %408, - %410 = icmp sgt <8 x i32> %broadcast.splat161, %409 - %411 = icmp sgt <8 x i32> %409, zeroinitializer - %412 = and <8 x i1> %410, %411 - %413 = extractelement <8 x i32> %409, i32 0 - %414 = add i32 %mul.i.i.3, %413 - %415 = sext i32 %414 to i64 - %416 = getelementptr inbounds float, float* %6, i64 %415 - %417 = bitcast float* %416 to <8 x float>* - %wide.masked.load162.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %417, i32 4, <8 x i1> %412, <8 x float> undef), !tbaa !12, !alias.scope !189, !noalias !192 - %418 = fpext <8 x float> %wide.masked.load162.1 to <8 x double> - %419 = getelementptr inbounds float, float* %9, i64 %415 - %420 = bitcast float* %419 to <8 x float>* - %wide.masked.load163.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %420, i32 4, <8 x i1> %412, <8 x float> undef), !tbaa !12, !alias.scope !195 - %421 = add i32 %414, -1 - %422 = sext i32 %421 to i64 - %423 = getelementptr inbounds float, float* %9, i64 %422 - %424 = bitcast float* %423 to <8 x float>* - %wide.masked.load164.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %424, i32 4, <8 x i1> %412, <8 x float> undef), !tbaa !12, !alias.scope !196 - %425 = fsub <8 x float> %wide.masked.load163.1, %wide.masked.load164.1 - %426 = fpext <8 x float> %425 to <8 x double> - %427 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %426, <8 x double> , <8 x double> %418) - %428 = fptrunc <8 x double> %427 to <8 x float> - %429 = bitcast float* %416 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %428, <8 x float>* %429, i32 4, <8 x i1> %412), !tbaa !12, !alias.scope !189, !noalias !192, !llvm.access.group !24 - %430 = trunc <8 x i64> %broadcast.splat159 to <8 x i32> - %431 = or <8 x i32> %430, - %432 = icmp sgt <8 x i32> %broadcast.splat161, %431 - %433 = icmp sgt <8 x i32> %431, zeroinitializer - %434 = and <8 x i1> %432, %433 - %435 = extractelement <8 x i32> %431, i32 0 - %436 = add i32 %mul.i.i.3, %435 - %437 = sext i32 %436 to i64 - %438 = getelementptr inbounds float, float* %6, i64 %437 - %439 = bitcast float* %438 to <8 x float>* - %wide.masked.load162.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %439, i32 4, <8 x i1> %434, <8 x float> undef), !tbaa !12, !alias.scope !189, !noalias !192 - %440 = fpext <8 x float> %wide.masked.load162.2 to <8 x double> - %441 = getelementptr inbounds float, float* %9, i64 %437 - %442 = bitcast float* %441 to <8 x float>* - %wide.masked.load163.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %442, i32 4, <8 x i1> %434, <8 x float> undef), !tbaa !12, !alias.scope !195 - %443 = add i32 %436, -1 - %444 = sext i32 %443 to i64 - %445 = getelementptr inbounds float, float* %9, i64 %444 - %446 = bitcast float* %445 to <8 x float>* - %wide.masked.load164.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %446, i32 4, <8 x i1> %434, <8 x float> undef), !tbaa !12, !alias.scope !196 - %447 = fsub <8 x float> %wide.masked.load163.2, %wide.masked.load164.2 - %448 = fpext <8 x float> %447 to <8 x double> - %449 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %448, <8 x double> , <8 x double> %440) - %450 = fptrunc <8 x double> %449 to <8 x float> - %451 = bitcast float* %438 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %450, <8 x float>* %451, i32 4, <8 x i1> %434), !tbaa !12, !alias.scope !189, !noalias !192, !llvm.access.group !24 - %452 = trunc <8 x i64> %broadcast.splat159 to <8 x i32> - %453 = or <8 x i32> %452, - %454 = icmp sgt <8 x i32> %broadcast.splat161, %453 - %455 = icmp sgt <8 x i32> %453, zeroinitializer - %456 = and <8 x i1> %454, %455 - %457 = extractelement <8 x i32> %453, i32 0 - %458 = add i32 %mul.i.i.3, %457 - %459 = sext i32 %458 to i64 - %460 = getelementptr inbounds float, float* %6, i64 %459 - %461 = bitcast float* %460 to <8 x float>* - %wide.masked.load162.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %461, i32 4, <8 x i1> %456, <8 x float> undef), !tbaa !12, !alias.scope !189, !noalias !192 - %462 = fpext <8 x float> %wide.masked.load162.3 to <8 x double> - %463 = getelementptr inbounds float, float* %9, i64 %459 - %464 = bitcast float* %463 to <8 x float>* - %wide.masked.load163.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %464, i32 4, <8 x i1> %456, <8 x float> undef), !tbaa !12, !alias.scope !195 - %465 = add i32 %458, -1 - %466 = sext i32 %465 to i64 - %467 = getelementptr inbounds float, float* %9, i64 %466 - %468 = bitcast float* %467 to <8 x float>* - %wide.masked.load164.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %468, i32 4, <8 x i1> %456, <8 x float> undef), !tbaa !12, !alias.scope !196 - %469 = fsub <8 x float> %wide.masked.load163.3, %wide.masked.load164.3 - %470 = fpext <8 x float> %469 to <8 x double> - %471 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %470, <8 x double> , <8 x double> %462) - %472 = fptrunc <8 x double> %471 to <8 x float> - %473 = bitcast float* %460 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %472, <8 x float>* %473, i32 4, <8 x i1> %456), !tbaa !12, !alias.scope !189, !noalias !192, !llvm.access.group !24 - br label %pregion_for_end.i.i.3 - -pregion_for_entry.entry.i.i.us.3: ; preds = %if.end.i.i.us.3.1, %pregion_for_entry.entry.i.i.us.3.preheader - %_local_id_x.i.0.us.3 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.3.preheader ], [ %961, %if.end.i.i.us.3.1 ] - %add1.i.i.i.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.3, %mul.i.i.i - %conv.i.i.us.3 = trunc i64 %add1.i.i.i.us.3 to i32 - %cmp4.i.i.us.3 = icmp sgt i32 %17, %conv.i.i.us.3 - %cmp7.i.i.us.3 = icmp sgt i32 %conv.i.i.us.3, 0 - %or.cond.i.i.us.3 = and i1 %cmp4.i.i.us.3, %cmp7.i.i.us.3 - br i1 %or.cond.i.i.us.3, label %if.then.i.i.us.3, label %if.end.i.i.us.3 - -if.then.i.i.us.3: ; preds = %pregion_for_entry.entry.i.i.us.3 - %add.i.i.us.3 = add i32 %mul.i.i.3, %conv.i.i.us.3 - %idxprom.i.i.us.3 = sext i32 %add.i.i.us.3 to i64 - %arrayidx.i.i.us.3 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.3 - %474 = load float, float* %arrayidx.i.i.us.3, align 4, !tbaa !12 - %conv9.i.i.us.3 = fpext float %474 to double - %arrayidx13.i.i.us.3 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.3 - %475 = load float, float* %arrayidx13.i.i.us.3, align 4, !tbaa !12 - %add15.i.i.us.3 = add i32 %add.i.i.us.3, -1 - %idxprom16.i.i.us.3 = sext i32 %add15.i.i.us.3 to i64 - %arrayidx17.i.i.us.3 = getelementptr inbounds float, float* %9, i64 %idxprom16.i.i.us.3 - %476 = load float, float* %arrayidx17.i.i.us.3, align 4, !tbaa !12 - %sub18.i.i.us.3 = fsub float %475, %476 - %conv19.i.i.us.3 = fpext float %sub18.i.i.us.3 to double - %477 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.3, double -5.000000e-01, double %conv9.i.i.us.3) #5 - %conv21.i.i.us.3 = fptrunc double %477 to float - store float %conv21.i.i.us.3, float* %arrayidx.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.3 - -if.end.i.i.us.3: ; preds = %if.then.i.i.us.3, %pregion_for_entry.entry.i.i.us.3 - %478 = or i64 %_local_id_x.i.0.us.3, 1 - %add1.i.i.i.us.3.1 = add nuw nsw i64 %478, %mul.i.i.i - %conv.i.i.us.3.1 = trunc i64 %add1.i.i.i.us.3.1 to i32 - %cmp4.i.i.us.3.1 = icmp sgt i32 %17, %conv.i.i.us.3.1 - %cmp7.i.i.us.3.1 = icmp sgt i32 %conv.i.i.us.3.1, 0 - %or.cond.i.i.us.3.1 = and i1 %cmp4.i.i.us.3.1, %cmp7.i.i.us.3.1 - br i1 %or.cond.i.i.us.3.1, label %if.then.i.i.us.3.1, label %if.end.i.i.us.3.1 - -pregion_for_end.i.i.3.loopexit: ; preds = %if.end.i.i.us.3.1 - br label %pregion_for_end.i.i.3 - -pregion_for_end.i.i.3: ; preds = %pregion_for_end.i.i.3.loopexit, %vector.ph151, %pregion_for_end.i.i.2 - %479 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.4 = or i32 %479, 4 - %cmp.i.i.4 = icmp sgt i32 %13, %conv2.i.i.4 - %mul.i.i.4 = mul nsw i32 %17, %conv2.i.i.4 - br i1 %cmp.i.i.4, label %vector.scevcheck175, label %pregion_for_end.i.i.4 - -vector.scevcheck175: ; preds = %pregion_for_end.i.i.3 - %480 = mul i32 %17, %conv2.i.i.4 - %481 = trunc i64 %2 to i32 - %482 = shl i32 %481, 5 - %483 = add i32 %480, %482 - %484 = icmp sgt i32 %483, 2147483616 - %485 = add i32 %480, %482 - %486 = add i32 %485, -1 - %487 = add i32 %485, 30 - %488 = icmp slt i32 %487, %486 - %489 = or i1 %484, %488 - br i1 %489, label %pregion_for_entry.entry.i.i.us.4.preheader, label %vector.memcheck197 - -pregion_for_entry.entry.i.i.us.4.preheader: ; preds = %vector.memcheck197, %vector.scevcheck175 - br label %pregion_for_entry.entry.i.i.us.4 - -vector.memcheck197: ; preds = %vector.scevcheck175 - %490 = mul i32 %17, %conv2.i.i.4 - %491 = trunc i64 %2 to i32 - %492 = shl i32 %491, 5 - %493 = add i32 %490, %492 - %494 = sext i32 %493 to i64 - %scevgep177 = getelementptr float, float* %6, i64 %494 - %495 = add nsw i64 %494, 32 - %scevgep179 = getelementptr float, float* %6, i64 %495 - %496 = add i32 %490, %492 - %497 = add i32 %496, -4 - %498 = sext i32 %497 to i64 - %499 = add nuw nsw i64 %498, 3 - %scevgep181 = getelementptr float, float* %9, i64 %499 - %500 = add nsw i64 %498, 35 - %scevgep183 = getelementptr float, float* %9, i64 %500 - %scevgep185 = getelementptr float, float* %9, i64 %494 - %scevgep187 = getelementptr float, float* %9, i64 %495 - %bound0189 = icmp ult float* %scevgep177, %scevgep183 - %bound1190 = icmp ult float* %scevgep181, %scevgep179 - %found.conflict191 = and i1 %bound0189, %bound1190 - %bound0192 = icmp ult float* %scevgep177, %scevgep187 - %bound1193 = icmp ult float* %scevgep185, %scevgep179 - %found.conflict194 = and i1 %bound0192, %bound1193 - %conflict.rdx195 = or i1 %found.conflict191, %found.conflict194 - br i1 %conflict.rdx195, label %pregion_for_entry.entry.i.i.us.4.preheader, label %vector.ph198 - -vector.ph198: ; preds = %vector.memcheck197 - %broadcast.splatinsert205 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat206 = shufflevector <8 x i64> %broadcast.splatinsert205, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert207 = insertelement <8 x i32> undef, i32 %17, i32 0 - %broadcast.splat208 = shufflevector <8 x i32> %broadcast.splatinsert207, <8 x i32> undef, <8 x i32> zeroinitializer - %501 = trunc <8 x i64> %broadcast.splat206 to <8 x i32> - %502 = or <8 x i32> %501, - %503 = icmp sgt <8 x i32> %broadcast.splat208, %502 - %504 = icmp sgt <8 x i32> %502, zeroinitializer - %505 = and <8 x i1> %503, %504 - %506 = extractelement <8 x i32> %502, i32 0 - %507 = add i32 %mul.i.i.4, %506 - %508 = sext i32 %507 to i64 - %509 = getelementptr inbounds float, float* %6, i64 %508 - %510 = bitcast float* %509 to <8 x float>* - %wide.masked.load209 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %510, i32 4, <8 x i1> %505, <8 x float> undef), !tbaa !12, !alias.scope !197, !noalias !200 - %511 = fpext <8 x float> %wide.masked.load209 to <8 x double> - %512 = getelementptr inbounds float, float* %9, i64 %508 - %513 = bitcast float* %512 to <8 x float>* - %wide.masked.load210 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %513, i32 4, <8 x i1> %505, <8 x float> undef), !tbaa !12, !alias.scope !203 - %514 = add i32 %507, -1 - %515 = sext i32 %514 to i64 - %516 = getelementptr inbounds float, float* %9, i64 %515 - %517 = bitcast float* %516 to <8 x float>* - %wide.masked.load211 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %517, i32 4, <8 x i1> %505, <8 x float> undef), !tbaa !12, !alias.scope !204 - %518 = fsub <8 x float> %wide.masked.load210, %wide.masked.load211 - %519 = fpext <8 x float> %518 to <8 x double> - %520 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %519, <8 x double> , <8 x double> %511) - %521 = fptrunc <8 x double> %520 to <8 x float> - %522 = bitcast float* %509 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %521, <8 x float>* %522, i32 4, <8 x i1> %505), !tbaa !12, !alias.scope !197, !noalias !200, !llvm.access.group !24 - %523 = trunc <8 x i64> %broadcast.splat206 to <8 x i32> - %524 = or <8 x i32> %523, - %525 = icmp sgt <8 x i32> %broadcast.splat208, %524 - %526 = icmp sgt <8 x i32> %524, zeroinitializer - %527 = and <8 x i1> %525, %526 - %528 = extractelement <8 x i32> %524, i32 0 - %529 = add i32 %mul.i.i.4, %528 - %530 = sext i32 %529 to i64 - %531 = getelementptr inbounds float, float* %6, i64 %530 - %532 = bitcast float* %531 to <8 x float>* - %wide.masked.load209.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %532, i32 4, <8 x i1> %527, <8 x float> undef), !tbaa !12, !alias.scope !197, !noalias !200 - %533 = fpext <8 x float> %wide.masked.load209.1 to <8 x double> - %534 = getelementptr inbounds float, float* %9, i64 %530 - %535 = bitcast float* %534 to <8 x float>* - %wide.masked.load210.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %535, i32 4, <8 x i1> %527, <8 x float> undef), !tbaa !12, !alias.scope !203 - %536 = add i32 %529, -1 - %537 = sext i32 %536 to i64 - %538 = getelementptr inbounds float, float* %9, i64 %537 - %539 = bitcast float* %538 to <8 x float>* - %wide.masked.load211.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %539, i32 4, <8 x i1> %527, <8 x float> undef), !tbaa !12, !alias.scope !204 - %540 = fsub <8 x float> %wide.masked.load210.1, %wide.masked.load211.1 - %541 = fpext <8 x float> %540 to <8 x double> - %542 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %541, <8 x double> , <8 x double> %533) - %543 = fptrunc <8 x double> %542 to <8 x float> - %544 = bitcast float* %531 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %543, <8 x float>* %544, i32 4, <8 x i1> %527), !tbaa !12, !alias.scope !197, !noalias !200, !llvm.access.group !24 - %545 = trunc <8 x i64> %broadcast.splat206 to <8 x i32> - %546 = or <8 x i32> %545, - %547 = icmp sgt <8 x i32> %broadcast.splat208, %546 - %548 = icmp sgt <8 x i32> %546, zeroinitializer - %549 = and <8 x i1> %547, %548 - %550 = extractelement <8 x i32> %546, i32 0 - %551 = add i32 %mul.i.i.4, %550 - %552 = sext i32 %551 to i64 - %553 = getelementptr inbounds float, float* %6, i64 %552 - %554 = bitcast float* %553 to <8 x float>* - %wide.masked.load209.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %554, i32 4, <8 x i1> %549, <8 x float> undef), !tbaa !12, !alias.scope !197, !noalias !200 - %555 = fpext <8 x float> %wide.masked.load209.2 to <8 x double> - %556 = getelementptr inbounds float, float* %9, i64 %552 - %557 = bitcast float* %556 to <8 x float>* - %wide.masked.load210.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %557, i32 4, <8 x i1> %549, <8 x float> undef), !tbaa !12, !alias.scope !203 - %558 = add i32 %551, -1 - %559 = sext i32 %558 to i64 - %560 = getelementptr inbounds float, float* %9, i64 %559 - %561 = bitcast float* %560 to <8 x float>* - %wide.masked.load211.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %561, i32 4, <8 x i1> %549, <8 x float> undef), !tbaa !12, !alias.scope !204 - %562 = fsub <8 x float> %wide.masked.load210.2, %wide.masked.load211.2 - %563 = fpext <8 x float> %562 to <8 x double> - %564 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %563, <8 x double> , <8 x double> %555) - %565 = fptrunc <8 x double> %564 to <8 x float> - %566 = bitcast float* %553 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %565, <8 x float>* %566, i32 4, <8 x i1> %549), !tbaa !12, !alias.scope !197, !noalias !200, !llvm.access.group !24 - %567 = trunc <8 x i64> %broadcast.splat206 to <8 x i32> - %568 = or <8 x i32> %567, - %569 = icmp sgt <8 x i32> %broadcast.splat208, %568 - %570 = icmp sgt <8 x i32> %568, zeroinitializer - %571 = and <8 x i1> %569, %570 - %572 = extractelement <8 x i32> %568, i32 0 - %573 = add i32 %mul.i.i.4, %572 - %574 = sext i32 %573 to i64 - %575 = getelementptr inbounds float, float* %6, i64 %574 - %576 = bitcast float* %575 to <8 x float>* - %wide.masked.load209.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %576, i32 4, <8 x i1> %571, <8 x float> undef), !tbaa !12, !alias.scope !197, !noalias !200 - %577 = fpext <8 x float> %wide.masked.load209.3 to <8 x double> - %578 = getelementptr inbounds float, float* %9, i64 %574 - %579 = bitcast float* %578 to <8 x float>* - %wide.masked.load210.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %579, i32 4, <8 x i1> %571, <8 x float> undef), !tbaa !12, !alias.scope !203 - %580 = add i32 %573, -1 - %581 = sext i32 %580 to i64 - %582 = getelementptr inbounds float, float* %9, i64 %581 - %583 = bitcast float* %582 to <8 x float>* - %wide.masked.load211.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %583, i32 4, <8 x i1> %571, <8 x float> undef), !tbaa !12, !alias.scope !204 - %584 = fsub <8 x float> %wide.masked.load210.3, %wide.masked.load211.3 - %585 = fpext <8 x float> %584 to <8 x double> - %586 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %585, <8 x double> , <8 x double> %577) - %587 = fptrunc <8 x double> %586 to <8 x float> - %588 = bitcast float* %575 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %587, <8 x float>* %588, i32 4, <8 x i1> %571), !tbaa !12, !alias.scope !197, !noalias !200, !llvm.access.group !24 - br label %pregion_for_end.i.i.4 - -pregion_for_entry.entry.i.i.us.4: ; preds = %if.end.i.i.us.4.1, %pregion_for_entry.entry.i.i.us.4.preheader - %_local_id_x.i.0.us.4 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.4.preheader ], [ %956, %if.end.i.i.us.4.1 ] - %add1.i.i.i.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.4, %mul.i.i.i - %conv.i.i.us.4 = trunc i64 %add1.i.i.i.us.4 to i32 - %cmp4.i.i.us.4 = icmp sgt i32 %17, %conv.i.i.us.4 - %cmp7.i.i.us.4 = icmp sgt i32 %conv.i.i.us.4, 0 - %or.cond.i.i.us.4 = and i1 %cmp4.i.i.us.4, %cmp7.i.i.us.4 - br i1 %or.cond.i.i.us.4, label %if.then.i.i.us.4, label %if.end.i.i.us.4 - -if.then.i.i.us.4: ; preds = %pregion_for_entry.entry.i.i.us.4 - %add.i.i.us.4 = add i32 %mul.i.i.4, %conv.i.i.us.4 - %idxprom.i.i.us.4 = sext i32 %add.i.i.us.4 to i64 - %arrayidx.i.i.us.4 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.4 - %589 = load float, float* %arrayidx.i.i.us.4, align 4, !tbaa !12 - %conv9.i.i.us.4 = fpext float %589 to double - %arrayidx13.i.i.us.4 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.4 - %590 = load float, float* %arrayidx13.i.i.us.4, align 4, !tbaa !12 - %add15.i.i.us.4 = add i32 %add.i.i.us.4, -1 - %idxprom16.i.i.us.4 = sext i32 %add15.i.i.us.4 to i64 - %arrayidx17.i.i.us.4 = getelementptr inbounds float, float* %9, i64 %idxprom16.i.i.us.4 - %591 = load float, float* %arrayidx17.i.i.us.4, align 4, !tbaa !12 - %sub18.i.i.us.4 = fsub float %590, %591 - %conv19.i.i.us.4 = fpext float %sub18.i.i.us.4 to double - %592 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.4, double -5.000000e-01, double %conv9.i.i.us.4) #5 - %conv21.i.i.us.4 = fptrunc double %592 to float - store float %conv21.i.i.us.4, float* %arrayidx.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.4 - -if.end.i.i.us.4: ; preds = %if.then.i.i.us.4, %pregion_for_entry.entry.i.i.us.4 - %593 = or i64 %_local_id_x.i.0.us.4, 1 - %add1.i.i.i.us.4.1 = add nuw nsw i64 %593, %mul.i.i.i - %conv.i.i.us.4.1 = trunc i64 %add1.i.i.i.us.4.1 to i32 - %cmp4.i.i.us.4.1 = icmp sgt i32 %17, %conv.i.i.us.4.1 - %cmp7.i.i.us.4.1 = icmp sgt i32 %conv.i.i.us.4.1, 0 - %or.cond.i.i.us.4.1 = and i1 %cmp4.i.i.us.4.1, %cmp7.i.i.us.4.1 - br i1 %or.cond.i.i.us.4.1, label %if.then.i.i.us.4.1, label %if.end.i.i.us.4.1 - -pregion_for_end.i.i.4.loopexit: ; preds = %if.end.i.i.us.4.1 - br label %pregion_for_end.i.i.4 - -pregion_for_end.i.i.4: ; preds = %pregion_for_end.i.i.4.loopexit, %vector.ph198, %pregion_for_end.i.i.3 - %594 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.5 = or i32 %594, 5 - %cmp.i.i.5 = icmp sgt i32 %13, %conv2.i.i.5 - %mul.i.i.5 = mul nsw i32 %17, %conv2.i.i.5 - br i1 %cmp.i.i.5, label %vector.scevcheck222, label %pregion_for_end.i.i.5 - -vector.scevcheck222: ; preds = %pregion_for_end.i.i.4 - %595 = mul i32 %17, %conv2.i.i.5 - %596 = trunc i64 %2 to i32 - %597 = shl i32 %596, 5 - %598 = add i32 %595, %597 - %599 = icmp sgt i32 %598, 2147483616 - %600 = add i32 %595, %597 - %601 = add i32 %600, -1 - %602 = add i32 %600, 30 - %603 = icmp slt i32 %602, %601 - %604 = or i1 %599, %603 - br i1 %604, label %pregion_for_entry.entry.i.i.us.5.preheader, label %vector.memcheck244 - -pregion_for_entry.entry.i.i.us.5.preheader: ; preds = %vector.memcheck244, %vector.scevcheck222 - br label %pregion_for_entry.entry.i.i.us.5 - -vector.memcheck244: ; preds = %vector.scevcheck222 - %605 = mul i32 %17, %conv2.i.i.5 - %606 = trunc i64 %2 to i32 - %607 = shl i32 %606, 5 - %608 = add i32 %605, %607 - %609 = sext i32 %608 to i64 - %scevgep224 = getelementptr float, float* %6, i64 %609 - %610 = add nsw i64 %609, 32 - %scevgep226 = getelementptr float, float* %6, i64 %610 - %611 = add i32 %605, %607 - %612 = add i32 %611, -1 - %613 = sext i32 %612 to i64 - %scevgep228 = getelementptr float, float* %9, i64 %613 - %614 = add nsw i64 %613, 32 - %scevgep230 = getelementptr float, float* %9, i64 %614 - %scevgep232 = getelementptr float, float* %9, i64 %609 - %scevgep234 = getelementptr float, float* %9, i64 %610 - %bound0236 = icmp ult float* %scevgep224, %scevgep230 - %bound1237 = icmp ult float* %scevgep228, %scevgep226 - %found.conflict238 = and i1 %bound0236, %bound1237 - %bound0239 = icmp ult float* %scevgep224, %scevgep234 - %bound1240 = icmp ult float* %scevgep232, %scevgep226 - %found.conflict241 = and i1 %bound0239, %bound1240 - %conflict.rdx242 = or i1 %found.conflict238, %found.conflict241 - br i1 %conflict.rdx242, label %pregion_for_entry.entry.i.i.us.5.preheader, label %vector.ph245 - -vector.ph245: ; preds = %vector.memcheck244 - %broadcast.splatinsert252 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat253 = shufflevector <8 x i64> %broadcast.splatinsert252, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert254 = insertelement <8 x i32> undef, i32 %17, i32 0 - %broadcast.splat255 = shufflevector <8 x i32> %broadcast.splatinsert254, <8 x i32> undef, <8 x i32> zeroinitializer - %615 = trunc <8 x i64> %broadcast.splat253 to <8 x i32> - %616 = or <8 x i32> %615, - %617 = icmp sgt <8 x i32> %broadcast.splat255, %616 - %618 = icmp sgt <8 x i32> %616, zeroinitializer - %619 = and <8 x i1> %617, %618 - %620 = extractelement <8 x i32> %616, i32 0 - %621 = add i32 %mul.i.i.5, %620 - %622 = sext i32 %621 to i64 - %623 = getelementptr inbounds float, float* %6, i64 %622 - %624 = bitcast float* %623 to <8 x float>* - %wide.masked.load256 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %624, i32 4, <8 x i1> %619, <8 x float> undef), !tbaa !12, !alias.scope !205, !noalias !208 - %625 = fpext <8 x float> %wide.masked.load256 to <8 x double> - %626 = getelementptr inbounds float, float* %9, i64 %622 - %627 = bitcast float* %626 to <8 x float>* - %wide.masked.load257 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %627, i32 4, <8 x i1> %619, <8 x float> undef), !tbaa !12, !alias.scope !211 - %628 = add i32 %621, -1 - %629 = sext i32 %628 to i64 - %630 = getelementptr inbounds float, float* %9, i64 %629 - %631 = bitcast float* %630 to <8 x float>* - %wide.masked.load258 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %631, i32 4, <8 x i1> %619, <8 x float> undef), !tbaa !12, !alias.scope !212 - %632 = fsub <8 x float> %wide.masked.load257, %wide.masked.load258 - %633 = fpext <8 x float> %632 to <8 x double> - %634 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %633, <8 x double> , <8 x double> %625) - %635 = fptrunc <8 x double> %634 to <8 x float> - %636 = bitcast float* %623 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %635, <8 x float>* %636, i32 4, <8 x i1> %619), !tbaa !12, !alias.scope !205, !noalias !208, !llvm.access.group !24 - %637 = trunc <8 x i64> %broadcast.splat253 to <8 x i32> - %638 = or <8 x i32> %637, - %639 = icmp sgt <8 x i32> %broadcast.splat255, %638 - %640 = icmp sgt <8 x i32> %638, zeroinitializer - %641 = and <8 x i1> %639, %640 - %642 = extractelement <8 x i32> %638, i32 0 - %643 = add i32 %mul.i.i.5, %642 - %644 = sext i32 %643 to i64 - %645 = getelementptr inbounds float, float* %6, i64 %644 - %646 = bitcast float* %645 to <8 x float>* - %wide.masked.load256.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %646, i32 4, <8 x i1> %641, <8 x float> undef), !tbaa !12, !alias.scope !205, !noalias !208 - %647 = fpext <8 x float> %wide.masked.load256.1 to <8 x double> - %648 = getelementptr inbounds float, float* %9, i64 %644 - %649 = bitcast float* %648 to <8 x float>* - %wide.masked.load257.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %649, i32 4, <8 x i1> %641, <8 x float> undef), !tbaa !12, !alias.scope !211 - %650 = add i32 %643, -1 - %651 = sext i32 %650 to i64 - %652 = getelementptr inbounds float, float* %9, i64 %651 - %653 = bitcast float* %652 to <8 x float>* - %wide.masked.load258.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %653, i32 4, <8 x i1> %641, <8 x float> undef), !tbaa !12, !alias.scope !212 - %654 = fsub <8 x float> %wide.masked.load257.1, %wide.masked.load258.1 - %655 = fpext <8 x float> %654 to <8 x double> - %656 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %655, <8 x double> , <8 x double> %647) - %657 = fptrunc <8 x double> %656 to <8 x float> - %658 = bitcast float* %645 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %657, <8 x float>* %658, i32 4, <8 x i1> %641), !tbaa !12, !alias.scope !205, !noalias !208, !llvm.access.group !24 - %659 = trunc <8 x i64> %broadcast.splat253 to <8 x i32> - %660 = or <8 x i32> %659, - %661 = icmp sgt <8 x i32> %broadcast.splat255, %660 - %662 = icmp sgt <8 x i32> %660, zeroinitializer - %663 = and <8 x i1> %661, %662 - %664 = extractelement <8 x i32> %660, i32 0 - %665 = add i32 %mul.i.i.5, %664 - %666 = sext i32 %665 to i64 - %667 = getelementptr inbounds float, float* %6, i64 %666 - %668 = bitcast float* %667 to <8 x float>* - %wide.masked.load256.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %668, i32 4, <8 x i1> %663, <8 x float> undef), !tbaa !12, !alias.scope !205, !noalias !208 - %669 = fpext <8 x float> %wide.masked.load256.2 to <8 x double> - %670 = getelementptr inbounds float, float* %9, i64 %666 - %671 = bitcast float* %670 to <8 x float>* - %wide.masked.load257.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %671, i32 4, <8 x i1> %663, <8 x float> undef), !tbaa !12, !alias.scope !211 - %672 = add i32 %665, -1 - %673 = sext i32 %672 to i64 - %674 = getelementptr inbounds float, float* %9, i64 %673 - %675 = bitcast float* %674 to <8 x float>* - %wide.masked.load258.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %675, i32 4, <8 x i1> %663, <8 x float> undef), !tbaa !12, !alias.scope !212 - %676 = fsub <8 x float> %wide.masked.load257.2, %wide.masked.load258.2 - %677 = fpext <8 x float> %676 to <8 x double> - %678 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %677, <8 x double> , <8 x double> %669) - %679 = fptrunc <8 x double> %678 to <8 x float> - %680 = bitcast float* %667 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %679, <8 x float>* %680, i32 4, <8 x i1> %663), !tbaa !12, !alias.scope !205, !noalias !208, !llvm.access.group !24 - %681 = trunc <8 x i64> %broadcast.splat253 to <8 x i32> - %682 = or <8 x i32> %681, - %683 = icmp sgt <8 x i32> %broadcast.splat255, %682 - %684 = icmp sgt <8 x i32> %682, zeroinitializer - %685 = and <8 x i1> %683, %684 - %686 = extractelement <8 x i32> %682, i32 0 - %687 = add i32 %mul.i.i.5, %686 - %688 = sext i32 %687 to i64 - %689 = getelementptr inbounds float, float* %6, i64 %688 - %690 = bitcast float* %689 to <8 x float>* - %wide.masked.load256.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %690, i32 4, <8 x i1> %685, <8 x float> undef), !tbaa !12, !alias.scope !205, !noalias !208 - %691 = fpext <8 x float> %wide.masked.load256.3 to <8 x double> - %692 = getelementptr inbounds float, float* %9, i64 %688 - %693 = bitcast float* %692 to <8 x float>* - %wide.masked.load257.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %693, i32 4, <8 x i1> %685, <8 x float> undef), !tbaa !12, !alias.scope !211 - %694 = add i32 %687, -1 - %695 = sext i32 %694 to i64 - %696 = getelementptr inbounds float, float* %9, i64 %695 - %697 = bitcast float* %696 to <8 x float>* - %wide.masked.load258.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %697, i32 4, <8 x i1> %685, <8 x float> undef), !tbaa !12, !alias.scope !212 - %698 = fsub <8 x float> %wide.masked.load257.3, %wide.masked.load258.3 - %699 = fpext <8 x float> %698 to <8 x double> - %700 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %699, <8 x double> , <8 x double> %691) - %701 = fptrunc <8 x double> %700 to <8 x float> - %702 = bitcast float* %689 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %701, <8 x float>* %702, i32 4, <8 x i1> %685), !tbaa !12, !alias.scope !205, !noalias !208, !llvm.access.group !24 - br label %pregion_for_end.i.i.5 - -pregion_for_entry.entry.i.i.us.5: ; preds = %if.end.i.i.us.5.1, %pregion_for_entry.entry.i.i.us.5.preheader - %_local_id_x.i.0.us.5 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.5.preheader ], [ %951, %if.end.i.i.us.5.1 ] - %add1.i.i.i.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.5, %mul.i.i.i - %conv.i.i.us.5 = trunc i64 %add1.i.i.i.us.5 to i32 - %cmp4.i.i.us.5 = icmp sgt i32 %17, %conv.i.i.us.5 - %cmp7.i.i.us.5 = icmp sgt i32 %conv.i.i.us.5, 0 - %or.cond.i.i.us.5 = and i1 %cmp4.i.i.us.5, %cmp7.i.i.us.5 - br i1 %or.cond.i.i.us.5, label %if.then.i.i.us.5, label %if.end.i.i.us.5 - -if.then.i.i.us.5: ; preds = %pregion_for_entry.entry.i.i.us.5 - %add.i.i.us.5 = add i32 %mul.i.i.5, %conv.i.i.us.5 - %idxprom.i.i.us.5 = sext i32 %add.i.i.us.5 to i64 - %arrayidx.i.i.us.5 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.5 - %703 = load float, float* %arrayidx.i.i.us.5, align 4, !tbaa !12 - %conv9.i.i.us.5 = fpext float %703 to double - %arrayidx13.i.i.us.5 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.5 - %704 = load float, float* %arrayidx13.i.i.us.5, align 4, !tbaa !12 - %add15.i.i.us.5 = add i32 %add.i.i.us.5, -1 - %idxprom16.i.i.us.5 = sext i32 %add15.i.i.us.5 to i64 - %arrayidx17.i.i.us.5 = getelementptr inbounds float, float* %9, i64 %idxprom16.i.i.us.5 - %705 = load float, float* %arrayidx17.i.i.us.5, align 4, !tbaa !12 - %sub18.i.i.us.5 = fsub float %704, %705 - %conv19.i.i.us.5 = fpext float %sub18.i.i.us.5 to double - %706 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.5, double -5.000000e-01, double %conv9.i.i.us.5) #5 - %conv21.i.i.us.5 = fptrunc double %706 to float - store float %conv21.i.i.us.5, float* %arrayidx.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.5 - -if.end.i.i.us.5: ; preds = %if.then.i.i.us.5, %pregion_for_entry.entry.i.i.us.5 - %707 = or i64 %_local_id_x.i.0.us.5, 1 - %add1.i.i.i.us.5.1 = add nuw nsw i64 %707, %mul.i.i.i - %conv.i.i.us.5.1 = trunc i64 %add1.i.i.i.us.5.1 to i32 - %cmp4.i.i.us.5.1 = icmp sgt i32 %17, %conv.i.i.us.5.1 - %cmp7.i.i.us.5.1 = icmp sgt i32 %conv.i.i.us.5.1, 0 - %or.cond.i.i.us.5.1 = and i1 %cmp4.i.i.us.5.1, %cmp7.i.i.us.5.1 - br i1 %or.cond.i.i.us.5.1, label %if.then.i.i.us.5.1, label %if.end.i.i.us.5.1 - -pregion_for_end.i.i.5.loopexit: ; preds = %if.end.i.i.us.5.1 - br label %pregion_for_end.i.i.5 - -pregion_for_end.i.i.5: ; preds = %pregion_for_end.i.i.5.loopexit, %vector.ph245, %pregion_for_end.i.i.4 - %708 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.6 = or i32 %708, 6 - %cmp.i.i.6 = icmp sgt i32 %13, %conv2.i.i.6 - %mul.i.i.6 = mul nsw i32 %17, %conv2.i.i.6 - br i1 %cmp.i.i.6, label %vector.scevcheck269, label %pregion_for_end.i.i.6 - -vector.scevcheck269: ; preds = %pregion_for_end.i.i.5 - %709 = mul i32 %17, %conv2.i.i.6 - %710 = trunc i64 %2 to i32 - %711 = shl i32 %710, 5 - %712 = add i32 %709, %711 - %713 = icmp sgt i32 %712, 2147483616 - %714 = add i32 %709, %711 - %715 = add i32 %714, -1 - %716 = add i32 %714, 30 - %717 = icmp slt i32 %716, %715 - %718 = or i1 %713, %717 - br i1 %718, label %pregion_for_entry.entry.i.i.us.6.preheader, label %vector.memcheck291 - -pregion_for_entry.entry.i.i.us.6.preheader: ; preds = %vector.memcheck291, %vector.scevcheck269 - br label %pregion_for_entry.entry.i.i.us.6 - -vector.memcheck291: ; preds = %vector.scevcheck269 - %719 = mul i32 %17, %conv2.i.i.6 - %720 = trunc i64 %2 to i32 - %721 = shl i32 %720, 5 - %722 = add i32 %719, %721 - %723 = sext i32 %722 to i64 - %scevgep271 = getelementptr float, float* %6, i64 %723 - %724 = add nsw i64 %723, 32 - %scevgep273 = getelementptr float, float* %6, i64 %724 - %725 = add i32 %719, %721 - %726 = add i32 %725, -2 - %727 = sext i32 %726 to i64 - %728 = add nuw nsw i64 %727, 1 - %scevgep275 = getelementptr float, float* %9, i64 %728 - %729 = add nsw i64 %727, 33 - %scevgep277 = getelementptr float, float* %9, i64 %729 - %scevgep279 = getelementptr float, float* %9, i64 %723 - %scevgep281 = getelementptr float, float* %9, i64 %724 - %bound0283 = icmp ult float* %scevgep271, %scevgep277 - %bound1284 = icmp ult float* %scevgep275, %scevgep273 - %found.conflict285 = and i1 %bound0283, %bound1284 - %bound0286 = icmp ult float* %scevgep271, %scevgep281 - %bound1287 = icmp ult float* %scevgep279, %scevgep273 - %found.conflict288 = and i1 %bound0286, %bound1287 - %conflict.rdx289 = or i1 %found.conflict285, %found.conflict288 - br i1 %conflict.rdx289, label %pregion_for_entry.entry.i.i.us.6.preheader, label %vector.ph292 - -vector.ph292: ; preds = %vector.memcheck291 - %broadcast.splatinsert299 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat300 = shufflevector <8 x i64> %broadcast.splatinsert299, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert301 = insertelement <8 x i32> undef, i32 %17, i32 0 - %broadcast.splat302 = shufflevector <8 x i32> %broadcast.splatinsert301, <8 x i32> undef, <8 x i32> zeroinitializer - %730 = trunc <8 x i64> %broadcast.splat300 to <8 x i32> - %731 = or <8 x i32> %730, - %732 = icmp sgt <8 x i32> %broadcast.splat302, %731 - %733 = icmp sgt <8 x i32> %731, zeroinitializer - %734 = and <8 x i1> %732, %733 - %735 = extractelement <8 x i32> %731, i32 0 - %736 = add i32 %mul.i.i.6, %735 - %737 = sext i32 %736 to i64 - %738 = getelementptr inbounds float, float* %6, i64 %737 - %739 = bitcast float* %738 to <8 x float>* - %wide.masked.load303 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %739, i32 4, <8 x i1> %734, <8 x float> undef), !tbaa !12, !alias.scope !213, !noalias !216 - %740 = fpext <8 x float> %wide.masked.load303 to <8 x double> - %741 = getelementptr inbounds float, float* %9, i64 %737 - %742 = bitcast float* %741 to <8 x float>* - %wide.masked.load304 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %742, i32 4, <8 x i1> %734, <8 x float> undef), !tbaa !12, !alias.scope !219 - %743 = add i32 %736, -1 - %744 = sext i32 %743 to i64 - %745 = getelementptr inbounds float, float* %9, i64 %744 - %746 = bitcast float* %745 to <8 x float>* - %wide.masked.load305 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %746, i32 4, <8 x i1> %734, <8 x float> undef), !tbaa !12, !alias.scope !220 - %747 = fsub <8 x float> %wide.masked.load304, %wide.masked.load305 - %748 = fpext <8 x float> %747 to <8 x double> - %749 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %748, <8 x double> , <8 x double> %740) - %750 = fptrunc <8 x double> %749 to <8 x float> - %751 = bitcast float* %738 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %750, <8 x float>* %751, i32 4, <8 x i1> %734), !tbaa !12, !alias.scope !213, !noalias !216, !llvm.access.group !24 - %752 = trunc <8 x i64> %broadcast.splat300 to <8 x i32> - %753 = or <8 x i32> %752, - %754 = icmp sgt <8 x i32> %broadcast.splat302, %753 - %755 = icmp sgt <8 x i32> %753, zeroinitializer - %756 = and <8 x i1> %754, %755 - %757 = extractelement <8 x i32> %753, i32 0 - %758 = add i32 %mul.i.i.6, %757 - %759 = sext i32 %758 to i64 - %760 = getelementptr inbounds float, float* %6, i64 %759 - %761 = bitcast float* %760 to <8 x float>* - %wide.masked.load303.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %761, i32 4, <8 x i1> %756, <8 x float> undef), !tbaa !12, !alias.scope !213, !noalias !216 - %762 = fpext <8 x float> %wide.masked.load303.1 to <8 x double> - %763 = getelementptr inbounds float, float* %9, i64 %759 - %764 = bitcast float* %763 to <8 x float>* - %wide.masked.load304.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %764, i32 4, <8 x i1> %756, <8 x float> undef), !tbaa !12, !alias.scope !219 - %765 = add i32 %758, -1 - %766 = sext i32 %765 to i64 - %767 = getelementptr inbounds float, float* %9, i64 %766 - %768 = bitcast float* %767 to <8 x float>* - %wide.masked.load305.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %768, i32 4, <8 x i1> %756, <8 x float> undef), !tbaa !12, !alias.scope !220 - %769 = fsub <8 x float> %wide.masked.load304.1, %wide.masked.load305.1 - %770 = fpext <8 x float> %769 to <8 x double> - %771 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %770, <8 x double> , <8 x double> %762) - %772 = fptrunc <8 x double> %771 to <8 x float> - %773 = bitcast float* %760 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %772, <8 x float>* %773, i32 4, <8 x i1> %756), !tbaa !12, !alias.scope !213, !noalias !216, !llvm.access.group !24 - %774 = trunc <8 x i64> %broadcast.splat300 to <8 x i32> - %775 = or <8 x i32> %774, - %776 = icmp sgt <8 x i32> %broadcast.splat302, %775 - %777 = icmp sgt <8 x i32> %775, zeroinitializer - %778 = and <8 x i1> %776, %777 - %779 = extractelement <8 x i32> %775, i32 0 - %780 = add i32 %mul.i.i.6, %779 - %781 = sext i32 %780 to i64 - %782 = getelementptr inbounds float, float* %6, i64 %781 - %783 = bitcast float* %782 to <8 x float>* - %wide.masked.load303.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %783, i32 4, <8 x i1> %778, <8 x float> undef), !tbaa !12, !alias.scope !213, !noalias !216 - %784 = fpext <8 x float> %wide.masked.load303.2 to <8 x double> - %785 = getelementptr inbounds float, float* %9, i64 %781 - %786 = bitcast float* %785 to <8 x float>* - %wide.masked.load304.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %786, i32 4, <8 x i1> %778, <8 x float> undef), !tbaa !12, !alias.scope !219 - %787 = add i32 %780, -1 - %788 = sext i32 %787 to i64 - %789 = getelementptr inbounds float, float* %9, i64 %788 - %790 = bitcast float* %789 to <8 x float>* - %wide.masked.load305.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %790, i32 4, <8 x i1> %778, <8 x float> undef), !tbaa !12, !alias.scope !220 - %791 = fsub <8 x float> %wide.masked.load304.2, %wide.masked.load305.2 - %792 = fpext <8 x float> %791 to <8 x double> - %793 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %792, <8 x double> , <8 x double> %784) - %794 = fptrunc <8 x double> %793 to <8 x float> - %795 = bitcast float* %782 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %794, <8 x float>* %795, i32 4, <8 x i1> %778), !tbaa !12, !alias.scope !213, !noalias !216, !llvm.access.group !24 - %796 = trunc <8 x i64> %broadcast.splat300 to <8 x i32> - %797 = or <8 x i32> %796, - %798 = icmp sgt <8 x i32> %broadcast.splat302, %797 - %799 = icmp sgt <8 x i32> %797, zeroinitializer - %800 = and <8 x i1> %798, %799 - %801 = extractelement <8 x i32> %797, i32 0 - %802 = add i32 %mul.i.i.6, %801 - %803 = sext i32 %802 to i64 - %804 = getelementptr inbounds float, float* %6, i64 %803 - %805 = bitcast float* %804 to <8 x float>* - %wide.masked.load303.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %805, i32 4, <8 x i1> %800, <8 x float> undef), !tbaa !12, !alias.scope !213, !noalias !216 - %806 = fpext <8 x float> %wide.masked.load303.3 to <8 x double> - %807 = getelementptr inbounds float, float* %9, i64 %803 - %808 = bitcast float* %807 to <8 x float>* - %wide.masked.load304.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %808, i32 4, <8 x i1> %800, <8 x float> undef), !tbaa !12, !alias.scope !219 - %809 = add i32 %802, -1 - %810 = sext i32 %809 to i64 - %811 = getelementptr inbounds float, float* %9, i64 %810 - %812 = bitcast float* %811 to <8 x float>* - %wide.masked.load305.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %812, i32 4, <8 x i1> %800, <8 x float> undef), !tbaa !12, !alias.scope !220 - %813 = fsub <8 x float> %wide.masked.load304.3, %wide.masked.load305.3 - %814 = fpext <8 x float> %813 to <8 x double> - %815 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %814, <8 x double> , <8 x double> %806) - %816 = fptrunc <8 x double> %815 to <8 x float> - %817 = bitcast float* %804 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %816, <8 x float>* %817, i32 4, <8 x i1> %800), !tbaa !12, !alias.scope !213, !noalias !216, !llvm.access.group !24 - br label %pregion_for_end.i.i.6 - -pregion_for_entry.entry.i.i.us.6: ; preds = %if.end.i.i.us.6.1, %pregion_for_entry.entry.i.i.us.6.preheader - %_local_id_x.i.0.us.6 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.6.preheader ], [ %946, %if.end.i.i.us.6.1 ] - %add1.i.i.i.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.6, %mul.i.i.i - %conv.i.i.us.6 = trunc i64 %add1.i.i.i.us.6 to i32 - %cmp4.i.i.us.6 = icmp sgt i32 %17, %conv.i.i.us.6 - %cmp7.i.i.us.6 = icmp sgt i32 %conv.i.i.us.6, 0 - %or.cond.i.i.us.6 = and i1 %cmp4.i.i.us.6, %cmp7.i.i.us.6 - br i1 %or.cond.i.i.us.6, label %if.then.i.i.us.6, label %if.end.i.i.us.6 - -if.then.i.i.us.6: ; preds = %pregion_for_entry.entry.i.i.us.6 - %add.i.i.us.6 = add i32 %mul.i.i.6, %conv.i.i.us.6 - %idxprom.i.i.us.6 = sext i32 %add.i.i.us.6 to i64 - %arrayidx.i.i.us.6 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.6 - %818 = load float, float* %arrayidx.i.i.us.6, align 4, !tbaa !12 - %conv9.i.i.us.6 = fpext float %818 to double - %arrayidx13.i.i.us.6 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.6 - %819 = load float, float* %arrayidx13.i.i.us.6, align 4, !tbaa !12 - %add15.i.i.us.6 = add i32 %add.i.i.us.6, -1 - %idxprom16.i.i.us.6 = sext i32 %add15.i.i.us.6 to i64 - %arrayidx17.i.i.us.6 = getelementptr inbounds float, float* %9, i64 %idxprom16.i.i.us.6 - %820 = load float, float* %arrayidx17.i.i.us.6, align 4, !tbaa !12 - %sub18.i.i.us.6 = fsub float %819, %820 - %conv19.i.i.us.6 = fpext float %sub18.i.i.us.6 to double - %821 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.6, double -5.000000e-01, double %conv9.i.i.us.6) #5 - %conv21.i.i.us.6 = fptrunc double %821 to float - store float %conv21.i.i.us.6, float* %arrayidx.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.6 - -if.end.i.i.us.6: ; preds = %if.then.i.i.us.6, %pregion_for_entry.entry.i.i.us.6 - %822 = or i64 %_local_id_x.i.0.us.6, 1 - %add1.i.i.i.us.6.1 = add nuw nsw i64 %822, %mul.i.i.i - %conv.i.i.us.6.1 = trunc i64 %add1.i.i.i.us.6.1 to i32 - %cmp4.i.i.us.6.1 = icmp sgt i32 %17, %conv.i.i.us.6.1 - %cmp7.i.i.us.6.1 = icmp sgt i32 %conv.i.i.us.6.1, 0 - %or.cond.i.i.us.6.1 = and i1 %cmp4.i.i.us.6.1, %cmp7.i.i.us.6.1 - br i1 %or.cond.i.i.us.6.1, label %if.then.i.i.us.6.1, label %if.end.i.i.us.6.1 - -pregion_for_end.i.i.6.loopexit: ; preds = %if.end.i.i.us.6.1 - br label %pregion_for_end.i.i.6 - -pregion_for_end.i.i.6: ; preds = %pregion_for_end.i.i.6.loopexit, %vector.ph292, %pregion_for_end.i.i.5 - %823 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.7 = or i32 %823, 7 - %cmp.i.i.7 = icmp sgt i32 %13, %conv2.i.i.7 - %mul.i.i.7 = mul nsw i32 %17, %conv2.i.i.7 - br i1 %cmp.i.i.7, label %vector.scevcheck316, label %pregion_for_end.i.i.7 - -vector.scevcheck316: ; preds = %pregion_for_end.i.i.6 - %824 = mul i32 %17, %conv2.i.i.7 - %825 = trunc i64 %2 to i32 - %826 = shl i32 %825, 5 - %827 = add i32 %824, %826 - %828 = icmp sgt i32 %827, 2147483616 - %829 = add i32 %824, %826 - %830 = add i32 %829, -1 - %831 = add i32 %829, 30 - %832 = icmp slt i32 %831, %830 - %833 = or i1 %828, %832 - br i1 %833, label %pregion_for_entry.entry.i.i.us.7.preheader, label %vector.memcheck338 - -pregion_for_entry.entry.i.i.us.7.preheader: ; preds = %vector.memcheck338, %vector.scevcheck316 - br label %pregion_for_entry.entry.i.i.us.7 - -vector.memcheck338: ; preds = %vector.scevcheck316 - %834 = mul i32 %17, %conv2.i.i.7 - %835 = trunc i64 %2 to i32 - %836 = shl i32 %835, 5 - %837 = add i32 %834, %836 - %838 = sext i32 %837 to i64 - %scevgep318 = getelementptr float, float* %6, i64 %838 - %839 = add nsw i64 %838, 32 - %scevgep320 = getelementptr float, float* %6, i64 %839 - %840 = add i32 %834, %836 - %841 = add i32 %840, -1 - %842 = sext i32 %841 to i64 - %scevgep322 = getelementptr float, float* %9, i64 %842 - %843 = add nsw i64 %842, 32 - %scevgep324 = getelementptr float, float* %9, i64 %843 - %scevgep326 = getelementptr float, float* %9, i64 %838 - %scevgep328 = getelementptr float, float* %9, i64 %839 - %bound0330 = icmp ult float* %scevgep318, %scevgep324 - %bound1331 = icmp ult float* %scevgep322, %scevgep320 - %found.conflict332 = and i1 %bound0330, %bound1331 - %bound0333 = icmp ult float* %scevgep318, %scevgep328 - %bound1334 = icmp ult float* %scevgep326, %scevgep320 - %found.conflict335 = and i1 %bound0333, %bound1334 - %conflict.rdx336 = or i1 %found.conflict332, %found.conflict335 - br i1 %conflict.rdx336, label %pregion_for_entry.entry.i.i.us.7.preheader, label %vector.ph339 - -vector.ph339: ; preds = %vector.memcheck338 - %broadcast.splatinsert346 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat347 = shufflevector <8 x i64> %broadcast.splatinsert346, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert348 = insertelement <8 x i32> undef, i32 %17, i32 0 - %broadcast.splat349 = shufflevector <8 x i32> %broadcast.splatinsert348, <8 x i32> undef, <8 x i32> zeroinitializer - %844 = trunc <8 x i64> %broadcast.splat347 to <8 x i32> - %845 = or <8 x i32> %844, - %846 = icmp sgt <8 x i32> %broadcast.splat349, %845 - %847 = icmp sgt <8 x i32> %845, zeroinitializer - %848 = and <8 x i1> %846, %847 - %849 = extractelement <8 x i32> %845, i32 0 - %850 = add i32 %mul.i.i.7, %849 - %851 = sext i32 %850 to i64 - %852 = getelementptr inbounds float, float* %6, i64 %851 - %853 = bitcast float* %852 to <8 x float>* - %wide.masked.load350 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %853, i32 4, <8 x i1> %848, <8 x float> undef), !tbaa !12, !alias.scope !221, !noalias !224 - %854 = fpext <8 x float> %wide.masked.load350 to <8 x double> - %855 = getelementptr inbounds float, float* %9, i64 %851 - %856 = bitcast float* %855 to <8 x float>* - %wide.masked.load351 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %856, i32 4, <8 x i1> %848, <8 x float> undef), !tbaa !12, !alias.scope !227 - %857 = add i32 %850, -1 - %858 = sext i32 %857 to i64 - %859 = getelementptr inbounds float, float* %9, i64 %858 - %860 = bitcast float* %859 to <8 x float>* - %wide.masked.load352 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %860, i32 4, <8 x i1> %848, <8 x float> undef), !tbaa !12, !alias.scope !228 - %861 = fsub <8 x float> %wide.masked.load351, %wide.masked.load352 - %862 = fpext <8 x float> %861 to <8 x double> - %863 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %862, <8 x double> , <8 x double> %854) - %864 = fptrunc <8 x double> %863 to <8 x float> - %865 = bitcast float* %852 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %864, <8 x float>* %865, i32 4, <8 x i1> %848), !tbaa !12, !alias.scope !221, !noalias !224, !llvm.access.group !24 - %866 = trunc <8 x i64> %broadcast.splat347 to <8 x i32> - %867 = or <8 x i32> %866, - %868 = icmp sgt <8 x i32> %broadcast.splat349, %867 - %869 = icmp sgt <8 x i32> %867, zeroinitializer - %870 = and <8 x i1> %868, %869 - %871 = extractelement <8 x i32> %867, i32 0 - %872 = add i32 %mul.i.i.7, %871 - %873 = sext i32 %872 to i64 - %874 = getelementptr inbounds float, float* %6, i64 %873 - %875 = bitcast float* %874 to <8 x float>* - %wide.masked.load350.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %875, i32 4, <8 x i1> %870, <8 x float> undef), !tbaa !12, !alias.scope !221, !noalias !224 - %876 = fpext <8 x float> %wide.masked.load350.1 to <8 x double> - %877 = getelementptr inbounds float, float* %9, i64 %873 - %878 = bitcast float* %877 to <8 x float>* - %wide.masked.load351.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %878, i32 4, <8 x i1> %870, <8 x float> undef), !tbaa !12, !alias.scope !227 - %879 = add i32 %872, -1 - %880 = sext i32 %879 to i64 - %881 = getelementptr inbounds float, float* %9, i64 %880 - %882 = bitcast float* %881 to <8 x float>* - %wide.masked.load352.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %882, i32 4, <8 x i1> %870, <8 x float> undef), !tbaa !12, !alias.scope !228 - %883 = fsub <8 x float> %wide.masked.load351.1, %wide.masked.load352.1 - %884 = fpext <8 x float> %883 to <8 x double> - %885 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %884, <8 x double> , <8 x double> %876) - %886 = fptrunc <8 x double> %885 to <8 x float> - %887 = bitcast float* %874 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %886, <8 x float>* %887, i32 4, <8 x i1> %870), !tbaa !12, !alias.scope !221, !noalias !224, !llvm.access.group !24 - %888 = trunc <8 x i64> %broadcast.splat347 to <8 x i32> - %889 = or <8 x i32> %888, - %890 = icmp sgt <8 x i32> %broadcast.splat349, %889 - %891 = icmp sgt <8 x i32> %889, zeroinitializer - %892 = and <8 x i1> %890, %891 - %893 = extractelement <8 x i32> %889, i32 0 - %894 = add i32 %mul.i.i.7, %893 - %895 = sext i32 %894 to i64 - %896 = getelementptr inbounds float, float* %6, i64 %895 - %897 = bitcast float* %896 to <8 x float>* - %wide.masked.load350.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %897, i32 4, <8 x i1> %892, <8 x float> undef), !tbaa !12, !alias.scope !221, !noalias !224 - %898 = fpext <8 x float> %wide.masked.load350.2 to <8 x double> - %899 = getelementptr inbounds float, float* %9, i64 %895 - %900 = bitcast float* %899 to <8 x float>* - %wide.masked.load351.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %900, i32 4, <8 x i1> %892, <8 x float> undef), !tbaa !12, !alias.scope !227 - %901 = add i32 %894, -1 - %902 = sext i32 %901 to i64 - %903 = getelementptr inbounds float, float* %9, i64 %902 - %904 = bitcast float* %903 to <8 x float>* - %wide.masked.load352.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %904, i32 4, <8 x i1> %892, <8 x float> undef), !tbaa !12, !alias.scope !228 - %905 = fsub <8 x float> %wide.masked.load351.2, %wide.masked.load352.2 - %906 = fpext <8 x float> %905 to <8 x double> - %907 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %906, <8 x double> , <8 x double> %898) - %908 = fptrunc <8 x double> %907 to <8 x float> - %909 = bitcast float* %896 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %908, <8 x float>* %909, i32 4, <8 x i1> %892), !tbaa !12, !alias.scope !221, !noalias !224, !llvm.access.group !24 - %910 = trunc <8 x i64> %broadcast.splat347 to <8 x i32> - %911 = or <8 x i32> %910, - %912 = icmp sgt <8 x i32> %broadcast.splat349, %911 - %913 = icmp sgt <8 x i32> %911, zeroinitializer - %914 = and <8 x i1> %912, %913 - %915 = extractelement <8 x i32> %911, i32 0 - %916 = add i32 %mul.i.i.7, %915 - %917 = sext i32 %916 to i64 - %918 = getelementptr inbounds float, float* %6, i64 %917 - %919 = bitcast float* %918 to <8 x float>* - %wide.masked.load350.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %919, i32 4, <8 x i1> %914, <8 x float> undef), !tbaa !12, !alias.scope !221, !noalias !224 - %920 = fpext <8 x float> %wide.masked.load350.3 to <8 x double> - %921 = getelementptr inbounds float, float* %9, i64 %917 - %922 = bitcast float* %921 to <8 x float>* - %wide.masked.load351.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %922, i32 4, <8 x i1> %914, <8 x float> undef), !tbaa !12, !alias.scope !227 - %923 = add i32 %916, -1 - %924 = sext i32 %923 to i64 - %925 = getelementptr inbounds float, float* %9, i64 %924 - %926 = bitcast float* %925 to <8 x float>* - %wide.masked.load352.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %926, i32 4, <8 x i1> %914, <8 x float> undef), !tbaa !12, !alias.scope !228 - %927 = fsub <8 x float> %wide.masked.load351.3, %wide.masked.load352.3 - %928 = fpext <8 x float> %927 to <8 x double> - %929 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %928, <8 x double> , <8 x double> %920) - %930 = fptrunc <8 x double> %929 to <8 x float> - %931 = bitcast float* %918 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %930, <8 x float>* %931, i32 4, <8 x i1> %914), !tbaa !12, !alias.scope !221, !noalias !224, !llvm.access.group !24 - br label %pregion_for_end.i.i.7 - -pregion_for_entry.entry.i.i.us.7: ; preds = %if.end.i.i.us.7.1, %pregion_for_entry.entry.i.i.us.7.preheader - %_local_id_x.i.0.us.7 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.7.preheader ], [ %941, %if.end.i.i.us.7.1 ] - %add1.i.i.i.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.7, %mul.i.i.i - %conv.i.i.us.7 = trunc i64 %add1.i.i.i.us.7 to i32 - %cmp4.i.i.us.7 = icmp sgt i32 %17, %conv.i.i.us.7 - %cmp7.i.i.us.7 = icmp sgt i32 %conv.i.i.us.7, 0 - %or.cond.i.i.us.7 = and i1 %cmp4.i.i.us.7, %cmp7.i.i.us.7 - br i1 %or.cond.i.i.us.7, label %if.then.i.i.us.7, label %if.end.i.i.us.7 - -if.then.i.i.us.7: ; preds = %pregion_for_entry.entry.i.i.us.7 - %add.i.i.us.7 = add i32 %mul.i.i.7, %conv.i.i.us.7 - %idxprom.i.i.us.7 = sext i32 %add.i.i.us.7 to i64 - %arrayidx.i.i.us.7 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.7 - %932 = load float, float* %arrayidx.i.i.us.7, align 4, !tbaa !12 - %conv9.i.i.us.7 = fpext float %932 to double - %arrayidx13.i.i.us.7 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.7 - %933 = load float, float* %arrayidx13.i.i.us.7, align 4, !tbaa !12 - %add15.i.i.us.7 = add i32 %add.i.i.us.7, -1 - %idxprom16.i.i.us.7 = sext i32 %add15.i.i.us.7 to i64 - %arrayidx17.i.i.us.7 = getelementptr inbounds float, float* %9, i64 %idxprom16.i.i.us.7 - %934 = load float, float* %arrayidx17.i.i.us.7, align 4, !tbaa !12 - %sub18.i.i.us.7 = fsub float %933, %934 - %conv19.i.i.us.7 = fpext float %sub18.i.i.us.7 to double - %935 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.7, double -5.000000e-01, double %conv9.i.i.us.7) #5 - %conv21.i.i.us.7 = fptrunc double %935 to float - store float %conv21.i.i.us.7, float* %arrayidx.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.7 - -if.end.i.i.us.7: ; preds = %if.then.i.i.us.7, %pregion_for_entry.entry.i.i.us.7 - %936 = or i64 %_local_id_x.i.0.us.7, 1 - %add1.i.i.i.us.7.1 = add nuw nsw i64 %936, %mul.i.i.i - %conv.i.i.us.7.1 = trunc i64 %add1.i.i.i.us.7.1 to i32 - %cmp4.i.i.us.7.1 = icmp sgt i32 %17, %conv.i.i.us.7.1 - %cmp7.i.i.us.7.1 = icmp sgt i32 %conv.i.i.us.7.1, 0 - %or.cond.i.i.us.7.1 = and i1 %cmp4.i.i.us.7.1, %cmp7.i.i.us.7.1 - br i1 %or.cond.i.i.us.7.1, label %if.then.i.i.us.7.1, label %if.end.i.i.us.7.1 - -pregion_for_end.i.i.7.loopexit: ; preds = %if.end.i.i.us.7.1 - br label %pregion_for_end.i.i.7 - -pregion_for_end.i.i.7: ; preds = %pregion_for_end.i.i.7.loopexit, %vector.ph339, %pregion_for_end.i.i.6 - ret void - -if.then.i.i.us.7.1: ; preds = %if.end.i.i.us.7 - %add.i.i.us.7.1 = add i32 %mul.i.i.7, %conv.i.i.us.7.1 - %idxprom.i.i.us.7.1 = sext i32 %add.i.i.us.7.1 to i64 - %arrayidx.i.i.us.7.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.7.1 - %937 = load float, float* %arrayidx.i.i.us.7.1, align 4, !tbaa !12 - %conv9.i.i.us.7.1 = fpext float %937 to double - %arrayidx13.i.i.us.7.1 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.7.1 - %938 = load float, float* %arrayidx13.i.i.us.7.1, align 4, !tbaa !12 - %add15.i.i.us.7.1 = add i32 %add.i.i.us.7.1, -1 - %idxprom16.i.i.us.7.1 = sext i32 %add15.i.i.us.7.1 to i64 - %arrayidx17.i.i.us.7.1 = getelementptr inbounds float, float* %9, i64 %idxprom16.i.i.us.7.1 - %939 = load float, float* %arrayidx17.i.i.us.7.1, align 4, !tbaa !12 - %sub18.i.i.us.7.1 = fsub float %938, %939 - %conv19.i.i.us.7.1 = fpext float %sub18.i.i.us.7.1 to double - %940 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.7.1, double -5.000000e-01, double %conv9.i.i.us.7.1) #5 - %conv21.i.i.us.7.1 = fptrunc double %940 to float - store float %conv21.i.i.us.7.1, float* %arrayidx.i.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.7.1 - -if.end.i.i.us.7.1: ; preds = %if.then.i.i.us.7.1, %if.end.i.i.us.7 - %941 = add nuw nsw i64 %_local_id_x.i.0.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %941, 32 - br i1 %exitcond.7.not.1, label %pregion_for_end.i.i.7.loopexit, label %pregion_for_entry.entry.i.i.us.7, !llvm.loop !229 - -if.then.i.i.us.6.1: ; preds = %if.end.i.i.us.6 - %add.i.i.us.6.1 = add i32 %mul.i.i.6, %conv.i.i.us.6.1 - %idxprom.i.i.us.6.1 = sext i32 %add.i.i.us.6.1 to i64 - %arrayidx.i.i.us.6.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.6.1 - %942 = load float, float* %arrayidx.i.i.us.6.1, align 4, !tbaa !12 - %conv9.i.i.us.6.1 = fpext float %942 to double - %arrayidx13.i.i.us.6.1 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.6.1 - %943 = load float, float* %arrayidx13.i.i.us.6.1, align 4, !tbaa !12 - %add15.i.i.us.6.1 = add nsw i32 %add.i.i.us.6.1, -1 - %idxprom16.i.i.us.6.1 = sext i32 %add15.i.i.us.6.1 to i64 - %arrayidx17.i.i.us.6.1 = getelementptr inbounds float, float* %9, i64 %idxprom16.i.i.us.6.1 - %944 = load float, float* %arrayidx17.i.i.us.6.1, align 4, !tbaa !12 - %sub18.i.i.us.6.1 = fsub float %943, %944 - %conv19.i.i.us.6.1 = fpext float %sub18.i.i.us.6.1 to double - %945 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.6.1, double -5.000000e-01, double %conv9.i.i.us.6.1) #5 - %conv21.i.i.us.6.1 = fptrunc double %945 to float - store float %conv21.i.i.us.6.1, float* %arrayidx.i.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.6.1 - -if.end.i.i.us.6.1: ; preds = %if.then.i.i.us.6.1, %if.end.i.i.us.6 - %946 = add nuw nsw i64 %_local_id_x.i.0.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %946, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.i.6.loopexit, label %pregion_for_entry.entry.i.i.us.6, !llvm.loop !230 - -if.then.i.i.us.5.1: ; preds = %if.end.i.i.us.5 - %add.i.i.us.5.1 = add i32 %mul.i.i.5, %conv.i.i.us.5.1 - %idxprom.i.i.us.5.1 = sext i32 %add.i.i.us.5.1 to i64 - %arrayidx.i.i.us.5.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.5.1 - %947 = load float, float* %arrayidx.i.i.us.5.1, align 4, !tbaa !12 - %conv9.i.i.us.5.1 = fpext float %947 to double - %arrayidx13.i.i.us.5.1 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.5.1 - %948 = load float, float* %arrayidx13.i.i.us.5.1, align 4, !tbaa !12 - %add15.i.i.us.5.1 = add i32 %add.i.i.us.5.1, -1 - %idxprom16.i.i.us.5.1 = sext i32 %add15.i.i.us.5.1 to i64 - %arrayidx17.i.i.us.5.1 = getelementptr inbounds float, float* %9, i64 %idxprom16.i.i.us.5.1 - %949 = load float, float* %arrayidx17.i.i.us.5.1, align 4, !tbaa !12 - %sub18.i.i.us.5.1 = fsub float %948, %949 - %conv19.i.i.us.5.1 = fpext float %sub18.i.i.us.5.1 to double - %950 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.5.1, double -5.000000e-01, double %conv9.i.i.us.5.1) #5 - %conv21.i.i.us.5.1 = fptrunc double %950 to float - store float %conv21.i.i.us.5.1, float* %arrayidx.i.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.5.1 - -if.end.i.i.us.5.1: ; preds = %if.then.i.i.us.5.1, %if.end.i.i.us.5 - %951 = add nuw nsw i64 %_local_id_x.i.0.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %951, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.i.5.loopexit, label %pregion_for_entry.entry.i.i.us.5, !llvm.loop !231 - -if.then.i.i.us.4.1: ; preds = %if.end.i.i.us.4 - %add.i.i.us.4.1 = add i32 %mul.i.i.4, %conv.i.i.us.4.1 - %idxprom.i.i.us.4.1 = sext i32 %add.i.i.us.4.1 to i64 - %arrayidx.i.i.us.4.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.4.1 - %952 = load float, float* %arrayidx.i.i.us.4.1, align 4, !tbaa !12 - %conv9.i.i.us.4.1 = fpext float %952 to double - %arrayidx13.i.i.us.4.1 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.4.1 - %953 = load float, float* %arrayidx13.i.i.us.4.1, align 4, !tbaa !12 - %add15.i.i.us.4.1 = add nsw i32 %add.i.i.us.4.1, -1 - %idxprom16.i.i.us.4.1 = sext i32 %add15.i.i.us.4.1 to i64 - %arrayidx17.i.i.us.4.1 = getelementptr inbounds float, float* %9, i64 %idxprom16.i.i.us.4.1 - %954 = load float, float* %arrayidx17.i.i.us.4.1, align 4, !tbaa !12 - %sub18.i.i.us.4.1 = fsub float %953, %954 - %conv19.i.i.us.4.1 = fpext float %sub18.i.i.us.4.1 to double - %955 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.4.1, double -5.000000e-01, double %conv9.i.i.us.4.1) #5 - %conv21.i.i.us.4.1 = fptrunc double %955 to float - store float %conv21.i.i.us.4.1, float* %arrayidx.i.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.4.1 - -if.end.i.i.us.4.1: ; preds = %if.then.i.i.us.4.1, %if.end.i.i.us.4 - %956 = add nuw nsw i64 %_local_id_x.i.0.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %956, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.i.4.loopexit, label %pregion_for_entry.entry.i.i.us.4, !llvm.loop !232 - -if.then.i.i.us.3.1: ; preds = %if.end.i.i.us.3 - %add.i.i.us.3.1 = add i32 %mul.i.i.3, %conv.i.i.us.3.1 - %idxprom.i.i.us.3.1 = sext i32 %add.i.i.us.3.1 to i64 - %arrayidx.i.i.us.3.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.3.1 - %957 = load float, float* %arrayidx.i.i.us.3.1, align 4, !tbaa !12 - %conv9.i.i.us.3.1 = fpext float %957 to double - %arrayidx13.i.i.us.3.1 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.3.1 - %958 = load float, float* %arrayidx13.i.i.us.3.1, align 4, !tbaa !12 - %add15.i.i.us.3.1 = add i32 %add.i.i.us.3.1, -1 - %idxprom16.i.i.us.3.1 = sext i32 %add15.i.i.us.3.1 to i64 - %arrayidx17.i.i.us.3.1 = getelementptr inbounds float, float* %9, i64 %idxprom16.i.i.us.3.1 - %959 = load float, float* %arrayidx17.i.i.us.3.1, align 4, !tbaa !12 - %sub18.i.i.us.3.1 = fsub float %958, %959 - %conv19.i.i.us.3.1 = fpext float %sub18.i.i.us.3.1 to double - %960 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.3.1, double -5.000000e-01, double %conv9.i.i.us.3.1) #5 - %conv21.i.i.us.3.1 = fptrunc double %960 to float - store float %conv21.i.i.us.3.1, float* %arrayidx.i.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.3.1 - -if.end.i.i.us.3.1: ; preds = %if.then.i.i.us.3.1, %if.end.i.i.us.3 - %961 = add nuw nsw i64 %_local_id_x.i.0.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %961, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.i.3.loopexit, label %pregion_for_entry.entry.i.i.us.3, !llvm.loop !233 - -if.then.i.i.us.2.1: ; preds = %if.end.i.i.us.2 - %add.i.i.us.2.1 = add i32 %mul.i.i.2, %conv.i.i.us.2.1 - %idxprom.i.i.us.2.1 = sext i32 %add.i.i.us.2.1 to i64 - %arrayidx.i.i.us.2.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.2.1 - %962 = load float, float* %arrayidx.i.i.us.2.1, align 4, !tbaa !12 - %conv9.i.i.us.2.1 = fpext float %962 to double - %arrayidx13.i.i.us.2.1 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.2.1 - %963 = load float, float* %arrayidx13.i.i.us.2.1, align 4, !tbaa !12 - %add15.i.i.us.2.1 = add nsw i32 %add.i.i.us.2.1, -1 - %idxprom16.i.i.us.2.1 = sext i32 %add15.i.i.us.2.1 to i64 - %arrayidx17.i.i.us.2.1 = getelementptr inbounds float, float* %9, i64 %idxprom16.i.i.us.2.1 - %964 = load float, float* %arrayidx17.i.i.us.2.1, align 4, !tbaa !12 - %sub18.i.i.us.2.1 = fsub float %963, %964 - %conv19.i.i.us.2.1 = fpext float %sub18.i.i.us.2.1 to double - %965 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.2.1, double -5.000000e-01, double %conv9.i.i.us.2.1) #5 - %conv21.i.i.us.2.1 = fptrunc double %965 to float - store float %conv21.i.i.us.2.1, float* %arrayidx.i.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.2.1 - -if.end.i.i.us.2.1: ; preds = %if.then.i.i.us.2.1, %if.end.i.i.us.2 - %966 = add nuw nsw i64 %_local_id_x.i.0.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %966, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.i.2.loopexit, label %pregion_for_entry.entry.i.i.us.2, !llvm.loop !234 - -if.then.i.i.us.1.1: ; preds = %if.end.i.i.us.1 - %add.i.i.us.1.1 = add i32 %mul.i.i.1, %conv.i.i.us.1.1 - %idxprom.i.i.us.1.1 = sext i32 %add.i.i.us.1.1 to i64 - %arrayidx.i.i.us.1.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.1.1 - %967 = load float, float* %arrayidx.i.i.us.1.1, align 4, !tbaa !12 - %conv9.i.i.us.1.1 = fpext float %967 to double - %arrayidx13.i.i.us.1.1 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.1.1 - %968 = load float, float* %arrayidx13.i.i.us.1.1, align 4, !tbaa !12 - %add15.i.i.us.1.1 = add i32 %add.i.i.us.1.1, -1 - %idxprom16.i.i.us.1.1 = sext i32 %add15.i.i.us.1.1 to i64 - %arrayidx17.i.i.us.1.1 = getelementptr inbounds float, float* %9, i64 %idxprom16.i.i.us.1.1 - %969 = load float, float* %arrayidx17.i.i.us.1.1, align 4, !tbaa !12 - %sub18.i.i.us.1.1 = fsub float %968, %969 - %conv19.i.i.us.1.1 = fpext float %sub18.i.i.us.1.1 to double - %970 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.1.1, double -5.000000e-01, double %conv9.i.i.us.1.1) #5 - %conv21.i.i.us.1.1 = fptrunc double %970 to float - store float %conv21.i.i.us.1.1, float* %arrayidx.i.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.1.1 - -if.end.i.i.us.1.1: ; preds = %if.then.i.i.us.1.1, %if.end.i.i.us.1 - %971 = add nuw nsw i64 %_local_id_x.i.0.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %971, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.i.1.loopexit, label %pregion_for_entry.entry.i.i.us.1, !llvm.loop !235 - -if.then.i.i.us.1379: ; preds = %if.end.i.i.us - %add.i.i.us.1368 = add i32 %mul.i.i, %conv.i.i.us.1363 - %idxprom.i.i.us.1369 = sext i32 %add.i.i.us.1368 to i64 - %arrayidx.i.i.us.1370 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.1369 - %972 = load float, float* %arrayidx.i.i.us.1370, align 4, !tbaa !12 - %conv9.i.i.us.1371 = fpext float %972 to double - %arrayidx13.i.i.us.1372 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.1369 - %973 = load float, float* %arrayidx13.i.i.us.1372, align 4, !tbaa !12 - %add15.i.i.us.1373 = add nsw i32 %add.i.i.us.1368, -1 - %idxprom16.i.i.us.1374 = sext i32 %add15.i.i.us.1373 to i64 - %arrayidx17.i.i.us.1375 = getelementptr inbounds float, float* %9, i64 %idxprom16.i.i.us.1374 - %974 = load float, float* %arrayidx17.i.i.us.1375, align 4, !tbaa !12 - %sub18.i.i.us.1376 = fsub float %973, %974 - %conv19.i.i.us.1377 = fpext float %sub18.i.i.us.1376 to double - %975 = tail call double @llvm.fmuladd.f64(double %conv19.i.i.us.1377, double -5.000000e-01, double %conv9.i.i.us.1371) #5 - %conv21.i.i.us.1378 = fptrunc double %975 to float - store float %conv21.i.i.us.1378, float* %arrayidx.i.i.us.1370, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.i.i.us.1380 - -if.end.i.i.us.1380: ; preds = %if.then.i.i.us.1379, %if.end.i.i.us - %976 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %976, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !236 -} - -; Function Attrs: argmemonly nounwind readonly willreturn -declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32 immarg, <8 x i1>, <8 x float>) #3 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <8 x double> @llvm.fmuladd.v8f64(<8 x double>, <8 x double>, <8 x double>) #0 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>) #4 - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nofree nounwind } -attributes #3 = { argmemonly nounwind readonly willreturn } -attributes #4 = { argmemonly nounwind willreturn } -attributes #5 = { nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"int", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"int", !"int"} -!9 = !{!"", !"", !"", !"", !""} -!10 = !{!"ex", !"ey", !"hz", !"nx", !"ny"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{!17, !18} -!18 = distinct !{!18, !"LVerDomain"} -!19 = !{!20, !21} -!20 = distinct !{!20, !18} -!21 = distinct !{!21, !18} -!22 = !{!21} -!23 = !{!20} -!24 = !{!25, !26} -!25 = distinct !{} -!26 = distinct !{} -!27 = !{!28} -!28 = distinct !{!28, !29} -!29 = distinct !{!29, !"LVerDomain"} -!30 = !{!31, !32} -!31 = distinct !{!31, !29} -!32 = distinct !{!32, !29} -!33 = !{!32} -!34 = !{!31} -!35 = !{!36} -!36 = distinct !{!36, !37} -!37 = distinct !{!37, !"LVerDomain"} -!38 = !{!39, !40} -!39 = distinct !{!39, !37} -!40 = distinct !{!40, !37} -!41 = !{!40} -!42 = !{!39} -!43 = !{!44} -!44 = distinct !{!44, !45} -!45 = distinct !{!45, !"LVerDomain"} -!46 = !{!47, !48} -!47 = distinct !{!47, !45} -!48 = distinct !{!48, !45} -!49 = !{!48} -!50 = !{!47} -!51 = !{!52} -!52 = distinct !{!52, !53} -!53 = distinct !{!53, !"LVerDomain"} -!54 = !{!55, !56} -!55 = distinct !{!55, !53} -!56 = distinct !{!56, !53} -!57 = !{!56} -!58 = !{!55} -!59 = !{!60} -!60 = distinct !{!60, !61} -!61 = distinct !{!61, !"LVerDomain"} -!62 = !{!63, !64} -!63 = distinct !{!63, !61} -!64 = distinct !{!64, !61} -!65 = !{!64} -!66 = !{!63} -!67 = !{!68} -!68 = distinct !{!68, !69} -!69 = distinct !{!69, !"LVerDomain"} -!70 = !{!71, !72} -!71 = distinct !{!71, !69} -!72 = distinct !{!72, !69} -!73 = !{!72} -!74 = !{!71} -!75 = !{!76} -!76 = distinct !{!76, !77} -!77 = distinct !{!77, !"LVerDomain"} -!78 = !{!79, !80} -!79 = distinct !{!79, !77} -!80 = distinct !{!80, !77} -!81 = !{!80} -!82 = !{!79} -!83 = distinct !{!83, !84, !85} -!84 = !{!"llvm.loop.parallel_accesses", !25} -!85 = !{!"llvm.loop.isvectorized", i32 1} -!86 = distinct !{!86, !84, !85} -!87 = distinct !{!87, !84, !85} -!88 = distinct !{!88, !84, !85} -!89 = distinct !{!89, !84, !85} -!90 = distinct !{!90, !84, !85} -!91 = distinct !{!91, !84, !85} -!92 = distinct !{!92, !84, !85} -!93 = !{!94} -!94 = distinct !{!94, !95} -!95 = distinct !{!95, !"LVerDomain"} -!96 = !{!97, !98} -!97 = distinct !{!97, !95} -!98 = distinct !{!98, !95} -!99 = !{!98} -!100 = !{!97} -!101 = !{!102} -!102 = distinct !{!102, !103} -!103 = distinct !{!103, !"LVerDomain"} -!104 = !{!105, !106} -!105 = distinct !{!105, !103} -!106 = distinct !{!106, !103} -!107 = !{!106} -!108 = !{!105} -!109 = !{!110} -!110 = distinct !{!110, !111} -!111 = distinct !{!111, !"LVerDomain"} -!112 = !{!113, !114} -!113 = distinct !{!113, !111} -!114 = distinct !{!114, !111} -!115 = !{!114} -!116 = !{!113} -!117 = !{!118} -!118 = distinct !{!118, !119} -!119 = distinct !{!119, !"LVerDomain"} -!120 = !{!121, !122} -!121 = distinct !{!121, !119} -!122 = distinct !{!122, !119} -!123 = !{!122} -!124 = !{!121} -!125 = !{!126} -!126 = distinct !{!126, !127} -!127 = distinct !{!127, !"LVerDomain"} -!128 = !{!129, !130} -!129 = distinct !{!129, !127} -!130 = distinct !{!130, !127} -!131 = !{!130} -!132 = !{!129} -!133 = !{!134} -!134 = distinct !{!134, !135} -!135 = distinct !{!135, !"LVerDomain"} -!136 = !{!137, !138} -!137 = distinct !{!137, !135} -!138 = distinct !{!138, !135} -!139 = !{!138} -!140 = !{!137} -!141 = !{!142} -!142 = distinct !{!142, !143} -!143 = distinct !{!143, !"LVerDomain"} -!144 = !{!145, !146} -!145 = distinct !{!145, !143} -!146 = distinct !{!146, !143} -!147 = !{!146} -!148 = !{!145} -!149 = !{!150} -!150 = distinct !{!150, !151} -!151 = distinct !{!151, !"LVerDomain"} -!152 = !{!153, !154} -!153 = distinct !{!153, !151} -!154 = distinct !{!154, !151} -!155 = !{!154} -!156 = !{!153} -!157 = distinct !{!157, !84, !85} -!158 = distinct !{!158, !84, !85} -!159 = distinct !{!159, !84, !85} -!160 = distinct !{!160, !84, !85} -!161 = distinct !{!161, !84, !85} -!162 = distinct !{!162, !84, !85} -!163 = distinct !{!163, !84, !85} -!164 = distinct !{!164, !84, !85} -!165 = !{!166} -!166 = distinct !{!166, !167} -!167 = distinct !{!167, !"LVerDomain"} -!168 = !{!169, !170} -!169 = distinct !{!169, !167} -!170 = distinct !{!170, !167} -!171 = !{!170} -!172 = !{!169} -!173 = !{!174} -!174 = distinct !{!174, !175} -!175 = distinct !{!175, !"LVerDomain"} -!176 = !{!177, !178} -!177 = distinct !{!177, !175} -!178 = distinct !{!178, !175} -!179 = !{!178} -!180 = !{!177} -!181 = !{!182} -!182 = distinct !{!182, !183} -!183 = distinct !{!183, !"LVerDomain"} -!184 = !{!185, !186} -!185 = distinct !{!185, !183} -!186 = distinct !{!186, !183} -!187 = !{!186} -!188 = !{!185} -!189 = !{!190} -!190 = distinct !{!190, !191} -!191 = distinct !{!191, !"LVerDomain"} -!192 = !{!193, !194} -!193 = distinct !{!193, !191} -!194 = distinct !{!194, !191} -!195 = !{!194} -!196 = !{!193} -!197 = !{!198} -!198 = distinct !{!198, !199} -!199 = distinct !{!199, !"LVerDomain"} -!200 = !{!201, !202} -!201 = distinct !{!201, !199} -!202 = distinct !{!202, !199} -!203 = !{!202} -!204 = !{!201} -!205 = !{!206} -!206 = distinct !{!206, !207} -!207 = distinct !{!207, !"LVerDomain"} -!208 = !{!209, !210} -!209 = distinct !{!209, !207} -!210 = distinct !{!210, !207} -!211 = !{!210} -!212 = !{!209} -!213 = !{!214} -!214 = distinct !{!214, !215} -!215 = distinct !{!215, !"LVerDomain"} -!216 = !{!217, !218} -!217 = distinct !{!217, !215} -!218 = distinct !{!218, !215} -!219 = !{!218} -!220 = !{!217} -!221 = !{!222} -!222 = distinct !{!222, !223} -!223 = distinct !{!223, !"LVerDomain"} -!224 = !{!225, !226} -!225 = distinct !{!225, !223} -!226 = distinct !{!226, !223} -!227 = !{!226} -!228 = !{!225} -!229 = distinct !{!229, !84, !85} -!230 = distinct !{!230, !84, !85} -!231 = distinct !{!231, !84, !85} -!232 = distinct !{!232, !84, !85} -!233 = distinct !{!233, !84, !85} -!234 = distinct !{!234, !84, !85} -!235 = distinct !{!235, !84, !85} -!236 = distinct !{!236, !84, !85} diff --git a/pocl_irs/fdtd-2d_kernel3.ll b/pocl_irs/fdtd-2d_kernel3.ll deleted file mode 100644 index f399855..0000000 --- a/pocl_irs/fdtd-2d_kernel3.ll +++ /dev/null @@ -1,1008 +0,0 @@ -; ModuleID = './CE/HMLMAPAJJBLPBKGGCCBDEJJLIPIDBFCCIIFBD/fdtd_kernel3/32-8-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare double @llvm.fmuladd.f64(double, double, double) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_fdtd_kernel3(float* nocapture readonly %0, float* nocapture readonly %1, float* nocapture %2, i32 %3, i32 %4, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %5, i64 %6, i64 %7, i64 %8) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %6, 5 - %mul3.i.i = shl i64 %7, 3 - %sub.i = add nsw i32 %3, -1 - %sub4.i = add nsw i32 %4, -1 - %10 = trunc i64 %7 to i32 - %11 = mul i32 %10, %4 - %12 = shl i32 %11, 3 - %13 = trunc i64 %6 to i32 - %14 = shl i32 %13, 5 - %15 = add i32 %12, %14 - %16 = zext i32 %4 to i64 - %17 = or i32 %15, 1 - %18 = shl i32 %10, 3 - %19 = or i32 %18, 1 - %20 = mul i32 %19, %4 - %21 = add i32 %20, %14 - %22 = trunc i64 %7 to i32 - %23 = mul i32 %22, %4 - %24 = shl i32 %23, 3 - %25 = trunc i64 %6 to i32 - %26 = shl i32 %25, 5 - %27 = add i32 %24, %26 - %28 = zext i32 %4 to i64 - %scevgep10 = getelementptr float, float* %2, i64 32 - %scevgep15 = getelementptr float, float* %0, i64 32 - %29 = or i32 %27, 1 - %30 = zext i32 %29 to i64 - %scevgep20 = getelementptr float, float* %0, i64 32 - %scevgep25 = getelementptr float, float* %1, i64 32 - %31 = shl i32 %22, 3 - %32 = or i32 %31, 1 - %33 = mul i32 %32, %4 - %34 = add i32 %33, %26 - %scevgep30 = getelementptr float, float* %1, i64 32 - %bound0 = icmp ugt float* %scevgep15, %2 - %bound1 = icmp ugt float* %scevgep10, %0 - %found.conflict = and i1 %bound0, %bound1 - %bound036 = icmp ugt float* %scevgep25, %2 - %bound137 = icmp ugt float* %scevgep10, %1 - %found.conflict38 = and i1 %bound036, %bound137 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert44 = insertelement <8 x i32> undef, i32 %sub4.i, i32 0 - %broadcast.splat45 = shufflevector <8 x i32> %broadcast.splatinsert44, <8 x i32> undef, <8 x i32> zeroinitializer - %35 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %36 = or <8 x i32> %35, - %37 = icmp sgt <8 x i32> %broadcast.splat45, %36 - %38 = extractelement <8 x i32> %36, i32 0 - %39 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %40 = or <8 x i32> %39, - %41 = icmp sgt <8 x i32> %broadcast.splat45, %40 - %42 = extractelement <8 x i32> %40, i32 0 - %43 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %44 = or <8 x i32> %43, - %45 = icmp sgt <8 x i32> %broadcast.splat45, %44 - %46 = extractelement <8 x i32> %44, i32 0 - %47 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %48 = or <8 x i32> %47, - %49 = icmp sgt <8 x i32> %broadcast.splat45, %48 - %50 = extractelement <8 x i32> %48, i32 0 - br label %pregion_for_entry.pregion_for_init.i - -pregion_for_entry.pregion_for_init.i: ; preds = %pregion_for_end.i, %9 - %_local_id_y.0 = phi i64 [ 0, %9 ], [ %175, %pregion_for_end.i ] - %51 = mul i64 %_local_id_y.0, %28 - %52 = trunc i64 %51 to i32 - %53 = add i32 %27, %52 - %54 = sext i32 %53 to i64 - %scevgep = getelementptr float, float* %2, i64 %54 - %scevgep11 = getelementptr float, float* %scevgep10, i64 %54 - %55 = add i64 %51, %30 - %sext = shl i64 %55, 32 - %56 = ashr exact i64 %sext, 32 - %scevgep18 = getelementptr float, float* %0, i64 %56 - %scevgep21 = getelementptr float, float* %scevgep20, i64 %56 - %57 = trunc i64 %51 to i32 - %58 = add i32 %34, %57 - %59 = sext i32 %58 to i64 - %scevgep28 = getelementptr float, float* %1, i64 %59 - %scevgep31 = getelementptr float, float* %scevgep30, i64 %59 - %60 = mul i64 %_local_id_y.0, %16 - %add6.i.i = add nuw nsw i64 %_local_id_y.0, %mul3.i.i - %conv2.i = trunc i64 %add6.i.i to i32 - %cmp.i = icmp sgt i32 %sub.i, %conv2.i - %mul.i = mul nsw i32 %conv2.i, %4 - %add18.i = add nsw i32 %conv2.i, 1 - %mul19.i = mul nsw i32 %add18.i, %4 - br i1 %cmp.i, label %vector.scevcheck, label %pregion_for_end.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i - %61 = trunc i64 %60 to i32 - %62 = add i32 %21, %61 - %63 = trunc i64 %60 to i32 - %64 = add i32 %17, %63 - %65 = trunc i64 %60 to i32 - %66 = add i32 %15, %65 - %67 = icmp sgt i32 %66, 2147483616 - %68 = icmp sgt i32 %64, 2147483616 - %69 = or i1 %67, %68 - %70 = icmp sgt i32 %62, 2147483616 - %71 = or i1 %69, %70 - br i1 %71, label %pregion_for_entry.entry.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.us - -vector.memcheck: ; preds = %vector.scevcheck - %bound033 = icmp ult float* %scevgep, %scevgep21 - %bound134 = icmp ult float* %scevgep18, %scevgep11 - %found.conflict35 = and i1 %bound033, %bound134 - %conflict.rdx = or i1 %found.conflict, %found.conflict35 - %conflict.rdx39 = or i1 %conflict.rdx, %found.conflict38 - %bound040 = icmp ult float* %scevgep, %scevgep31 - %bound141 = icmp ult float* %scevgep28, %scevgep11 - %found.conflict42 = and i1 %bound040, %bound141 - %conflict.rdx43 = or i1 %conflict.rdx39, %found.conflict42 - br i1 %conflict.rdx43, label %pregion_for_entry.entry.i.us.preheader, label %vector.body - -vector.body: ; preds = %vector.memcheck - %72 = add i32 %mul.i, %38 - %73 = sext i32 %72 to i64 - %74 = getelementptr inbounds float, float* %2, i64 %73 - %75 = bitcast float* %74 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %75, i32 4, <8 x i1> %37, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %76 = fpext <8 x float> %wide.masked.load to <8 x double> - %77 = add i32 %72, 1 - %78 = sext i32 %77 to i64 - %79 = getelementptr inbounds float, float* %0, i64 %78 - %80 = bitcast float* %79 to <8 x float>* - %wide.masked.load46 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %80, i32 4, <8 x i1> %37, <8 x float> undef), !tbaa !12, !alias.scope !24 - %81 = getelementptr inbounds float, float* %0, i64 %73 - %82 = bitcast float* %81 to <8 x float>* - %wide.masked.load47 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %82, i32 4, <8 x i1> %37, <8 x float> undef), !tbaa !12, !alias.scope !25 - %83 = fsub <8 x float> %wide.masked.load46, %wide.masked.load47 - %84 = add nsw i32 %mul19.i, %38 - %85 = sext i32 %84 to i64 - %86 = getelementptr inbounds float, float* %1, i64 %85 - %87 = bitcast float* %86 to <8 x float>* - %wide.masked.load48 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %87, i32 4, <8 x i1> %37, <8 x float> undef), !tbaa !12, !alias.scope !26 - %88 = fadd <8 x float> %83, %wide.masked.load48 - %89 = getelementptr inbounds float, float* %1, i64 %73 - %90 = bitcast float* %89 to <8 x float>* - %wide.masked.load49 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %90, i32 4, <8 x i1> %37, <8 x float> undef), !tbaa !12, !alias.scope !27 - %91 = fsub <8 x float> %88, %wide.masked.load49 - %92 = fpext <8 x float> %91 to <8 x double> - %93 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %92, <8 x double> , <8 x double> %76) - %94 = fptrunc <8 x double> %93 to <8 x float> - %95 = bitcast float* %74 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %94, <8 x float>* %95, i32 4, <8 x i1> %37), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !28 - %96 = add i32 %mul.i, %42 - %97 = sext i32 %96 to i64 - %98 = getelementptr inbounds float, float* %2, i64 %97 - %99 = bitcast float* %98 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %99, i32 4, <8 x i1> %41, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %100 = fpext <8 x float> %wide.masked.load.1 to <8 x double> - %101 = add i32 %96, 1 - %102 = sext i32 %101 to i64 - %103 = getelementptr inbounds float, float* %0, i64 %102 - %104 = bitcast float* %103 to <8 x float>* - %wide.masked.load46.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %104, i32 4, <8 x i1> %41, <8 x float> undef), !tbaa !12, !alias.scope !24 - %105 = getelementptr inbounds float, float* %0, i64 %97 - %106 = bitcast float* %105 to <8 x float>* - %wide.masked.load47.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %106, i32 4, <8 x i1> %41, <8 x float> undef), !tbaa !12, !alias.scope !25 - %107 = fsub <8 x float> %wide.masked.load46.1, %wide.masked.load47.1 - %108 = add nsw i32 %mul19.i, %42 - %109 = sext i32 %108 to i64 - %110 = getelementptr inbounds float, float* %1, i64 %109 - %111 = bitcast float* %110 to <8 x float>* - %wide.masked.load48.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %111, i32 4, <8 x i1> %41, <8 x float> undef), !tbaa !12, !alias.scope !26 - %112 = fadd <8 x float> %107, %wide.masked.load48.1 - %113 = getelementptr inbounds float, float* %1, i64 %97 - %114 = bitcast float* %113 to <8 x float>* - %wide.masked.load49.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %114, i32 4, <8 x i1> %41, <8 x float> undef), !tbaa !12, !alias.scope !27 - %115 = fsub <8 x float> %112, %wide.masked.load49.1 - %116 = fpext <8 x float> %115 to <8 x double> - %117 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %116, <8 x double> , <8 x double> %100) - %118 = fptrunc <8 x double> %117 to <8 x float> - %119 = bitcast float* %98 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %118, <8 x float>* %119, i32 4, <8 x i1> %41), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !28 - %120 = add i32 %mul.i, %46 - %121 = sext i32 %120 to i64 - %122 = getelementptr inbounds float, float* %2, i64 %121 - %123 = bitcast float* %122 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %123, i32 4, <8 x i1> %45, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %124 = fpext <8 x float> %wide.masked.load.2 to <8 x double> - %125 = add i32 %120, 1 - %126 = sext i32 %125 to i64 - %127 = getelementptr inbounds float, float* %0, i64 %126 - %128 = bitcast float* %127 to <8 x float>* - %wide.masked.load46.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %128, i32 4, <8 x i1> %45, <8 x float> undef), !tbaa !12, !alias.scope !24 - %129 = getelementptr inbounds float, float* %0, i64 %121 - %130 = bitcast float* %129 to <8 x float>* - %wide.masked.load47.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %130, i32 4, <8 x i1> %45, <8 x float> undef), !tbaa !12, !alias.scope !25 - %131 = fsub <8 x float> %wide.masked.load46.2, %wide.masked.load47.2 - %132 = add nsw i32 %mul19.i, %46 - %133 = sext i32 %132 to i64 - %134 = getelementptr inbounds float, float* %1, i64 %133 - %135 = bitcast float* %134 to <8 x float>* - %wide.masked.load48.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %135, i32 4, <8 x i1> %45, <8 x float> undef), !tbaa !12, !alias.scope !26 - %136 = fadd <8 x float> %131, %wide.masked.load48.2 - %137 = getelementptr inbounds float, float* %1, i64 %121 - %138 = bitcast float* %137 to <8 x float>* - %wide.masked.load49.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %138, i32 4, <8 x i1> %45, <8 x float> undef), !tbaa !12, !alias.scope !27 - %139 = fsub <8 x float> %136, %wide.masked.load49.2 - %140 = fpext <8 x float> %139 to <8 x double> - %141 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %140, <8 x double> , <8 x double> %124) - %142 = fptrunc <8 x double> %141 to <8 x float> - %143 = bitcast float* %122 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %142, <8 x float>* %143, i32 4, <8 x i1> %45), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !28 - %144 = add i32 %mul.i, %50 - %145 = sext i32 %144 to i64 - %146 = getelementptr inbounds float, float* %2, i64 %145 - %147 = bitcast float* %146 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %147, i32 4, <8 x i1> %49, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %148 = fpext <8 x float> %wide.masked.load.3 to <8 x double> - %149 = add i32 %144, 1 - %150 = sext i32 %149 to i64 - %151 = getelementptr inbounds float, float* %0, i64 %150 - %152 = bitcast float* %151 to <8 x float>* - %wide.masked.load46.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %152, i32 4, <8 x i1> %49, <8 x float> undef), !tbaa !12, !alias.scope !24 - %153 = getelementptr inbounds float, float* %0, i64 %145 - %154 = bitcast float* %153 to <8 x float>* - %wide.masked.load47.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %154, i32 4, <8 x i1> %49, <8 x float> undef), !tbaa !12, !alias.scope !25 - %155 = fsub <8 x float> %wide.masked.load46.3, %wide.masked.load47.3 - %156 = add nsw i32 %mul19.i, %50 - %157 = sext i32 %156 to i64 - %158 = getelementptr inbounds float, float* %1, i64 %157 - %159 = bitcast float* %158 to <8 x float>* - %wide.masked.load48.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %159, i32 4, <8 x i1> %49, <8 x float> undef), !tbaa !12, !alias.scope !26 - %160 = fadd <8 x float> %155, %wide.masked.load48.3 - %161 = getelementptr inbounds float, float* %1, i64 %145 - %162 = bitcast float* %161 to <8 x float>* - %wide.masked.load49.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %162, i32 4, <8 x i1> %49, <8 x float> undef), !tbaa !12, !alias.scope !27 - %163 = fsub <8 x float> %160, %wide.masked.load49.3 - %164 = fpext <8 x float> %163 to <8 x double> - %165 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %164, <8 x double> , <8 x double> %148) - %166 = fptrunc <8 x double> %165 to <8 x float> - %167 = bitcast float* %146 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %166, <8 x float>* %167, i32 4, <8 x i1> %49), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !28 - br label %pregion_for_end.i - -pregion_for_entry.entry.i.us: ; preds = %if.end.r_exit.i.us, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ %174, %if.end.r_exit.i.us ], [ 0, %pregion_for_entry.entry.i.us.preheader ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp5.i.us = icmp sgt i32 %sub4.i, %conv.i.us - br i1 %cmp5.i.us, label %if.then.i.us, label %if.end.r_exit.i.us - -if.then.i.us: ; preds = %pregion_for_entry.entry.i.us - %add.i.us = add i32 %mul.i, %conv.i.us - %idxprom.i.us = sext i32 %add.i.us to i64 - %arrayidx.i.us = getelementptr inbounds float, float* %2, i64 %idxprom.i.us - %168 = load float, float* %arrayidx.i.us, align 4, !tbaa !12 - %conv7.i.us = fpext float %168 to double - %add10.i.us = add i32 %add.i.us, 1 - %idxprom11.i.us = sext i32 %add10.i.us to i64 - %arrayidx12.i.us = getelementptr inbounds float, float* %0, i64 %idxprom11.i.us - %169 = load float, float* %arrayidx12.i.us, align 4, !tbaa !12 - %arrayidx16.i.us = getelementptr inbounds float, float* %0, i64 %idxprom.i.us - %170 = load float, float* %arrayidx16.i.us, align 4, !tbaa !12 - %sub17.i.us = fsub float %169, %170 - %add20.i.us = add nsw i32 %mul19.i, %conv.i.us - %idxprom21.i.us = sext i32 %add20.i.us to i64 - %arrayidx22.i.us = getelementptr inbounds float, float* %1, i64 %idxprom21.i.us - %171 = load float, float* %arrayidx22.i.us, align 4, !tbaa !12 - %add23.i.us = fadd float %sub17.i.us, %171 - %arrayidx27.i.us = getelementptr inbounds float, float* %1, i64 %idxprom.i.us - %172 = load float, float* %arrayidx27.i.us, align 4, !tbaa !12 - %sub28.i.us = fsub float %add23.i.us, %172 - %conv29.i.us = fpext float %sub28.i.us to double - %173 = tail call double @llvm.fmuladd.f64(double %conv29.i.us, double 0xBFE6666666666666, double %conv7.i.us) #5 - %conv31.i.us = fptrunc double %173 to float - store float %conv31.i.us, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !28 - br label %if.end.r_exit.i.us - -if.end.r_exit.i.us: ; preds = %if.then.i.us, %pregion_for_entry.entry.i.us - %174 = add nuw nsw i64 %_local_id_x.0.us, 1 - %exitcond.not = icmp eq i64 %174, 32 - br i1 %exitcond.not, label %pregion_for_end.i.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !31 - -pregion_for_end.i.loopexit: ; preds = %if.end.r_exit.i.us - br label %pregion_for_end.i - -pregion_for_end.i: ; preds = %pregion_for_end.i.loopexit, %vector.body, %pregion_for_entry.pregion_for_init.i - %175 = add nuw nsw i64 %_local_id_y.0, 1 - %exitcond2.not = icmp eq i64 %175, 8 - br i1 %exitcond2.not, label %fdtd_kernel3.exit, label %pregion_for_entry.pregion_for_init.i, !llvm.loop !34 - -fdtd_kernel3.exit: ; preds = %pregion_for_end.i - ret void -} - -; Function Attrs: nofree nounwind -define void @_pocl_kernel_fdtd_kernel3_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to float*** - %15 = load float**, float*** %14, align 8 - %16 = load float*, float** %15, align 8 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %21 = getelementptr i8*, i8** %0, i64 4 - %22 = bitcast i8** %21 to i32** - %23 = load i32*, i32** %22, align 8 - %24 = load i32, i32* %23, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %sub.i.i = add nsw i32 %20, -1 - %sub4.i.i = add nsw i32 %24, -1 - %25 = trunc i64 %3 to i32 - %26 = mul i32 %24, %25 - %27 = shl i32 %26, 3 - %28 = trunc i64 %2 to i32 - %29 = shl i32 %28, 5 - %30 = add i32 %27, %29 - %31 = zext i32 %24 to i64 - %32 = or i32 %30, 1 - %33 = shl i32 %25, 3 - %34 = or i32 %33, 1 - %35 = mul i32 %24, %34 - %36 = add i32 %35, %29 - %37 = trunc i64 %3 to i32 - %38 = mul i32 %24, %37 - %39 = shl i32 %38, 3 - %40 = trunc i64 %2 to i32 - %41 = shl i32 %40, 5 - %42 = add i32 %39, %41 - %43 = zext i32 %24 to i64 - %scevgep10 = getelementptr float, float* %16, i64 32 - %scevgep15 = getelementptr float, float* %8, i64 32 - %44 = or i32 %42, 1 - %45 = zext i32 %44 to i64 - %scevgep20 = getelementptr float, float* %8, i64 32 - %scevgep25 = getelementptr float, float* %12, i64 32 - %46 = shl i32 %37, 3 - %47 = or i32 %46, 1 - %48 = mul i32 %24, %47 - %49 = add i32 %48, %41 - %scevgep30 = getelementptr float, float* %12, i64 32 - %bound0 = icmp ult float* %16, %scevgep15 - %bound1 = icmp ult float* %8, %scevgep10 - %found.conflict = and i1 %bound0, %bound1 - %bound036 = icmp ult float* %16, %scevgep25 - %bound137 = icmp ult float* %12, %scevgep10 - %found.conflict38 = and i1 %bound036, %bound137 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert44 = insertelement <8 x i32> undef, i32 %sub4.i.i, i32 0 - %broadcast.splat45 = shufflevector <8 x i32> %broadcast.splatinsert44, <8 x i32> undef, <8 x i32> zeroinitializer - %50 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %51 = or <8 x i32> %50, - %52 = icmp sgt <8 x i32> %broadcast.splat45, %51 - %53 = extractelement <8 x i32> %51, i32 0 - %54 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %55 = or <8 x i32> %54, - %56 = icmp sgt <8 x i32> %broadcast.splat45, %55 - %57 = extractelement <8 x i32> %55, i32 0 - %58 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %59 = or <8 x i32> %58, - %60 = icmp sgt <8 x i32> %broadcast.splat45, %59 - %61 = extractelement <8 x i32> %59, i32 0 - %62 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %63 = or <8 x i32> %62, - %64 = icmp sgt <8 x i32> %broadcast.splat45, %63 - %65 = extractelement <8 x i32> %63, i32 0 - br label %pregion_for_entry.pregion_for_init.i.i - -pregion_for_entry.pregion_for_init.i.i: ; preds = %pregion_for_end.i.i, %5 - %_local_id_y.i.0 = phi i64 [ 0, %5 ], [ %190, %pregion_for_end.i.i ] - %66 = mul i64 %_local_id_y.i.0, %43 - %67 = trunc i64 %66 to i32 - %68 = add i32 %42, %67 - %69 = sext i32 %68 to i64 - %scevgep = getelementptr float, float* %16, i64 %69 - %scevgep11 = getelementptr float, float* %scevgep10, i64 %69 - %70 = add i64 %66, %45 - %sext = shl i64 %70, 32 - %71 = ashr exact i64 %sext, 32 - %scevgep18 = getelementptr float, float* %8, i64 %71 - %scevgep21 = getelementptr float, float* %scevgep20, i64 %71 - %72 = trunc i64 %66 to i32 - %73 = add i32 %49, %72 - %74 = sext i32 %73 to i64 - %scevgep28 = getelementptr float, float* %12, i64 %74 - %scevgep31 = getelementptr float, float* %scevgep30, i64 %74 - %75 = mul i64 %_local_id_y.i.0, %31 - %add6.i.i.i = add nuw nsw i64 %_local_id_y.i.0, %mul3.i.i.i - %conv2.i.i = trunc i64 %add6.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %sub.i.i, %conv2.i.i - %mul.i.i = mul nsw i32 %24, %conv2.i.i - %add18.i.i = add nsw i32 %conv2.i.i, 1 - %mul19.i.i = mul nsw i32 %add18.i.i, %24 - br i1 %cmp.i.i, label %vector.scevcheck, label %pregion_for_end.i.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.i - %76 = trunc i64 %75 to i32 - %77 = add i32 %36, %76 - %78 = trunc i64 %75 to i32 - %79 = add i32 %32, %78 - %80 = trunc i64 %75 to i32 - %81 = add i32 %30, %80 - %82 = icmp sgt i32 %81, 2147483616 - %83 = icmp sgt i32 %79, 2147483616 - %84 = or i1 %82, %83 - %85 = icmp sgt i32 %77, 2147483616 - %86 = or i1 %84, %85 - br i1 %86, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i.us - -vector.memcheck: ; preds = %vector.scevcheck - %bound033 = icmp ult float* %scevgep, %scevgep21 - %bound134 = icmp ult float* %scevgep18, %scevgep11 - %found.conflict35 = and i1 %bound033, %bound134 - %conflict.rdx = or i1 %found.conflict, %found.conflict35 - %conflict.rdx39 = or i1 %conflict.rdx, %found.conflict38 - %bound040 = icmp ult float* %scevgep, %scevgep31 - %bound141 = icmp ult float* %scevgep28, %scevgep11 - %found.conflict42 = and i1 %bound040, %bound141 - %conflict.rdx43 = or i1 %conflict.rdx39, %found.conflict42 - br i1 %conflict.rdx43, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.body - -vector.body: ; preds = %vector.memcheck - %87 = add i32 %mul.i.i, %53 - %88 = sext i32 %87 to i64 - %89 = getelementptr inbounds float, float* %16, i64 %88 - %90 = bitcast float* %89 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %90, i32 4, <8 x i1> %52, <8 x float> undef), !tbaa !12, !alias.scope !36, !noalias !39 - %91 = fpext <8 x float> %wide.masked.load to <8 x double> - %92 = add i32 %87, 1 - %93 = sext i32 %92 to i64 - %94 = getelementptr inbounds float, float* %8, i64 %93 - %95 = bitcast float* %94 to <8 x float>* - %wide.masked.load46 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %95, i32 4, <8 x i1> %52, <8 x float> undef), !tbaa !12, !alias.scope !44 - %96 = getelementptr inbounds float, float* %8, i64 %88 - %97 = bitcast float* %96 to <8 x float>* - %wide.masked.load47 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %97, i32 4, <8 x i1> %52, <8 x float> undef), !tbaa !12, !alias.scope !45 - %98 = fsub <8 x float> %wide.masked.load46, %wide.masked.load47 - %99 = add nsw i32 %mul19.i.i, %53 - %100 = sext i32 %99 to i64 - %101 = getelementptr inbounds float, float* %12, i64 %100 - %102 = bitcast float* %101 to <8 x float>* - %wide.masked.load48 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %102, i32 4, <8 x i1> %52, <8 x float> undef), !tbaa !12, !alias.scope !46 - %103 = fadd <8 x float> %98, %wide.masked.load48 - %104 = getelementptr inbounds float, float* %12, i64 %88 - %105 = bitcast float* %104 to <8 x float>* - %wide.masked.load49 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %105, i32 4, <8 x i1> %52, <8 x float> undef), !tbaa !12, !alias.scope !47 - %106 = fsub <8 x float> %103, %wide.masked.load49 - %107 = fpext <8 x float> %106 to <8 x double> - %108 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %107, <8 x double> , <8 x double> %91) - %109 = fptrunc <8 x double> %108 to <8 x float> - %110 = bitcast float* %89 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %109, <8 x float>* %110, i32 4, <8 x i1> %52), !tbaa !12, !alias.scope !36, !noalias !39, !llvm.access.group !28 - %111 = add i32 %mul.i.i, %57 - %112 = sext i32 %111 to i64 - %113 = getelementptr inbounds float, float* %16, i64 %112 - %114 = bitcast float* %113 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %114, i32 4, <8 x i1> %56, <8 x float> undef), !tbaa !12, !alias.scope !36, !noalias !39 - %115 = fpext <8 x float> %wide.masked.load.1 to <8 x double> - %116 = add i32 %111, 1 - %117 = sext i32 %116 to i64 - %118 = getelementptr inbounds float, float* %8, i64 %117 - %119 = bitcast float* %118 to <8 x float>* - %wide.masked.load46.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %119, i32 4, <8 x i1> %56, <8 x float> undef), !tbaa !12, !alias.scope !44 - %120 = getelementptr inbounds float, float* %8, i64 %112 - %121 = bitcast float* %120 to <8 x float>* - %wide.masked.load47.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %121, i32 4, <8 x i1> %56, <8 x float> undef), !tbaa !12, !alias.scope !45 - %122 = fsub <8 x float> %wide.masked.load46.1, %wide.masked.load47.1 - %123 = add nsw i32 %mul19.i.i, %57 - %124 = sext i32 %123 to i64 - %125 = getelementptr inbounds float, float* %12, i64 %124 - %126 = bitcast float* %125 to <8 x float>* - %wide.masked.load48.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %126, i32 4, <8 x i1> %56, <8 x float> undef), !tbaa !12, !alias.scope !46 - %127 = fadd <8 x float> %122, %wide.masked.load48.1 - %128 = getelementptr inbounds float, float* %12, i64 %112 - %129 = bitcast float* %128 to <8 x float>* - %wide.masked.load49.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %129, i32 4, <8 x i1> %56, <8 x float> undef), !tbaa !12, !alias.scope !47 - %130 = fsub <8 x float> %127, %wide.masked.load49.1 - %131 = fpext <8 x float> %130 to <8 x double> - %132 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %131, <8 x double> , <8 x double> %115) - %133 = fptrunc <8 x double> %132 to <8 x float> - %134 = bitcast float* %113 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %133, <8 x float>* %134, i32 4, <8 x i1> %56), !tbaa !12, !alias.scope !36, !noalias !39, !llvm.access.group !28 - %135 = add i32 %mul.i.i, %61 - %136 = sext i32 %135 to i64 - %137 = getelementptr inbounds float, float* %16, i64 %136 - %138 = bitcast float* %137 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %138, i32 4, <8 x i1> %60, <8 x float> undef), !tbaa !12, !alias.scope !36, !noalias !39 - %139 = fpext <8 x float> %wide.masked.load.2 to <8 x double> - %140 = add i32 %135, 1 - %141 = sext i32 %140 to i64 - %142 = getelementptr inbounds float, float* %8, i64 %141 - %143 = bitcast float* %142 to <8 x float>* - %wide.masked.load46.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %143, i32 4, <8 x i1> %60, <8 x float> undef), !tbaa !12, !alias.scope !44 - %144 = getelementptr inbounds float, float* %8, i64 %136 - %145 = bitcast float* %144 to <8 x float>* - %wide.masked.load47.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %145, i32 4, <8 x i1> %60, <8 x float> undef), !tbaa !12, !alias.scope !45 - %146 = fsub <8 x float> %wide.masked.load46.2, %wide.masked.load47.2 - %147 = add nsw i32 %mul19.i.i, %61 - %148 = sext i32 %147 to i64 - %149 = getelementptr inbounds float, float* %12, i64 %148 - %150 = bitcast float* %149 to <8 x float>* - %wide.masked.load48.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %150, i32 4, <8 x i1> %60, <8 x float> undef), !tbaa !12, !alias.scope !46 - %151 = fadd <8 x float> %146, %wide.masked.load48.2 - %152 = getelementptr inbounds float, float* %12, i64 %136 - %153 = bitcast float* %152 to <8 x float>* - %wide.masked.load49.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %153, i32 4, <8 x i1> %60, <8 x float> undef), !tbaa !12, !alias.scope !47 - %154 = fsub <8 x float> %151, %wide.masked.load49.2 - %155 = fpext <8 x float> %154 to <8 x double> - %156 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %155, <8 x double> , <8 x double> %139) - %157 = fptrunc <8 x double> %156 to <8 x float> - %158 = bitcast float* %137 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %157, <8 x float>* %158, i32 4, <8 x i1> %60), !tbaa !12, !alias.scope !36, !noalias !39, !llvm.access.group !28 - %159 = add i32 %mul.i.i, %65 - %160 = sext i32 %159 to i64 - %161 = getelementptr inbounds float, float* %16, i64 %160 - %162 = bitcast float* %161 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %162, i32 4, <8 x i1> %64, <8 x float> undef), !tbaa !12, !alias.scope !36, !noalias !39 - %163 = fpext <8 x float> %wide.masked.load.3 to <8 x double> - %164 = add i32 %159, 1 - %165 = sext i32 %164 to i64 - %166 = getelementptr inbounds float, float* %8, i64 %165 - %167 = bitcast float* %166 to <8 x float>* - %wide.masked.load46.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %167, i32 4, <8 x i1> %64, <8 x float> undef), !tbaa !12, !alias.scope !44 - %168 = getelementptr inbounds float, float* %8, i64 %160 - %169 = bitcast float* %168 to <8 x float>* - %wide.masked.load47.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %169, i32 4, <8 x i1> %64, <8 x float> undef), !tbaa !12, !alias.scope !45 - %170 = fsub <8 x float> %wide.masked.load46.3, %wide.masked.load47.3 - %171 = add nsw i32 %mul19.i.i, %65 - %172 = sext i32 %171 to i64 - %173 = getelementptr inbounds float, float* %12, i64 %172 - %174 = bitcast float* %173 to <8 x float>* - %wide.masked.load48.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %174, i32 4, <8 x i1> %64, <8 x float> undef), !tbaa !12, !alias.scope !46 - %175 = fadd <8 x float> %170, %wide.masked.load48.3 - %176 = getelementptr inbounds float, float* %12, i64 %160 - %177 = bitcast float* %176 to <8 x float>* - %wide.masked.load49.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %177, i32 4, <8 x i1> %64, <8 x float> undef), !tbaa !12, !alias.scope !47 - %178 = fsub <8 x float> %175, %wide.masked.load49.3 - %179 = fpext <8 x float> %178 to <8 x double> - %180 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %179, <8 x double> , <8 x double> %163) - %181 = fptrunc <8 x double> %180 to <8 x float> - %182 = bitcast float* %161 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %181, <8 x float>* %182, i32 4, <8 x i1> %64), !tbaa !12, !alias.scope !36, !noalias !39, !llvm.access.group !28 - br label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %189, %if.end.r_exit.i.i.us ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp5.i.i.us = icmp sgt i32 %sub4.i.i, %conv.i.i.us - br i1 %cmp5.i.i.us, label %if.then.i.i.us, label %if.end.r_exit.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %add.i.i.us = add i32 %mul.i.i, %conv.i.i.us - %idxprom.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us - %183 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %conv7.i.i.us = fpext float %183 to double - %add10.i.i.us = add i32 %add.i.i.us, 1 - %idxprom11.i.i.us = sext i32 %add10.i.i.us to i64 - %arrayidx12.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom11.i.i.us - %184 = load float, float* %arrayidx12.i.i.us, align 4, !tbaa !12 - %arrayidx16.i.i.us = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us - %185 = load float, float* %arrayidx16.i.i.us, align 4, !tbaa !12 - %sub17.i.i.us = fsub float %184, %185 - %add20.i.i.us = add nsw i32 %mul19.i.i, %conv.i.i.us - %idxprom21.i.i.us = sext i32 %add20.i.i.us to i64 - %arrayidx22.i.i.us = getelementptr inbounds float, float* %12, i64 %idxprom21.i.i.us - %186 = load float, float* %arrayidx22.i.i.us, align 4, !tbaa !12 - %add23.i.i.us = fadd float %sub17.i.i.us, %186 - %arrayidx27.i.i.us = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us - %187 = load float, float* %arrayidx27.i.i.us, align 4, !tbaa !12 - %sub28.i.i.us = fsub float %add23.i.i.us, %187 - %conv29.i.i.us = fpext float %sub28.i.i.us to double - %188 = tail call double @llvm.fmuladd.f64(double %conv29.i.i.us, double 0xBFE6666666666666, double %conv7.i.i.us) #5 - %conv31.i.i.us = fptrunc double %188 to float - store float %conv31.i.i.us, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !28 - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us - %189 = add nuw nsw i64 %_local_id_x.i.0.us, 1 - %exitcond.not = icmp eq i64 %189, 32 - br i1 %exitcond.not, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !48 - -pregion_for_end.i.i.loopexit: ; preds = %if.end.r_exit.i.i.us - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %vector.body, %pregion_for_entry.pregion_for_init.i.i - %190 = add nuw nsw i64 %_local_id_y.i.0, 1 - %exitcond2.not = icmp eq i64 %190, 8 - br i1 %exitcond2.not, label %_pocl_kernel_fdtd_kernel3.exit, label %pregion_for_entry.pregion_for_init.i.i, !llvm.loop !34 - -_pocl_kernel_fdtd_kernel3.exit: ; preds = %pregion_for_end.i.i - ret void -} - -; Function Attrs: nofree nounwind -define void @_pocl_kernel_fdtd_kernel3_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to float** - %13 = load float*, float** %12, align 8 - %14 = getelementptr i8*, i8** %0, i64 3 - %15 = bitcast i8** %14 to i32** - %16 = load i32*, i32** %15, align 8 - %17 = load i32, i32* %16, align 4 - %18 = getelementptr i8*, i8** %0, i64 4 - %19 = bitcast i8** %18 to i32** - %20 = load i32*, i32** %19, align 8 - %21 = load i32, i32* %20, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %sub.i.i = add nsw i32 %17, -1 - %sub4.i.i = add nsw i32 %21, -1 - %22 = trunc i64 %3 to i32 - %23 = mul i32 %21, %22 - %24 = shl i32 %23, 3 - %25 = trunc i64 %2 to i32 - %26 = shl i32 %25, 5 - %27 = add i32 %24, %26 - %28 = zext i32 %21 to i64 - %29 = or i32 %27, 1 - %30 = shl i32 %22, 3 - %31 = or i32 %30, 1 - %32 = mul i32 %21, %31 - %33 = add i32 %32, %26 - %34 = trunc i64 %3 to i32 - %35 = mul i32 %21, %34 - %36 = shl i32 %35, 3 - %37 = trunc i64 %2 to i32 - %38 = shl i32 %37, 5 - %39 = add i32 %36, %38 - %40 = zext i32 %21 to i64 - %scevgep10 = getelementptr float, float* %13, i64 32 - %scevgep15 = getelementptr float, float* %7, i64 32 - %41 = or i32 %39, 1 - %42 = zext i32 %41 to i64 - %scevgep20 = getelementptr float, float* %7, i64 32 - %scevgep25 = getelementptr float, float* %10, i64 32 - %43 = shl i32 %34, 3 - %44 = or i32 %43, 1 - %45 = mul i32 %21, %44 - %46 = add i32 %45, %38 - %scevgep30 = getelementptr float, float* %10, i64 32 - %bound0 = icmp ult float* %13, %scevgep15 - %bound1 = icmp ult float* %7, %scevgep10 - %found.conflict = and i1 %bound0, %bound1 - %bound036 = icmp ult float* %13, %scevgep25 - %bound137 = icmp ult float* %10, %scevgep10 - %found.conflict38 = and i1 %bound036, %bound137 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert44 = insertelement <8 x i32> undef, i32 %sub4.i.i, i32 0 - %broadcast.splat45 = shufflevector <8 x i32> %broadcast.splatinsert44, <8 x i32> undef, <8 x i32> zeroinitializer - %47 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %48 = or <8 x i32> %47, - %49 = icmp sgt <8 x i32> %broadcast.splat45, %48 - %50 = extractelement <8 x i32> %48, i32 0 - %51 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %52 = or <8 x i32> %51, - %53 = icmp sgt <8 x i32> %broadcast.splat45, %52 - %54 = extractelement <8 x i32> %52, i32 0 - %55 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %56 = or <8 x i32> %55, - %57 = icmp sgt <8 x i32> %broadcast.splat45, %56 - %58 = extractelement <8 x i32> %56, i32 0 - %59 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %60 = or <8 x i32> %59, - %61 = icmp sgt <8 x i32> %broadcast.splat45, %60 - %62 = extractelement <8 x i32> %60, i32 0 - br label %pregion_for_entry.pregion_for_init.i.i - -pregion_for_entry.pregion_for_init.i.i: ; preds = %pregion_for_end.i.i, %5 - %_local_id_y.i.0 = phi i64 [ 0, %5 ], [ %187, %pregion_for_end.i.i ] - %63 = mul i64 %_local_id_y.i.0, %40 - %64 = trunc i64 %63 to i32 - %65 = add i32 %39, %64 - %66 = sext i32 %65 to i64 - %scevgep = getelementptr float, float* %13, i64 %66 - %scevgep11 = getelementptr float, float* %scevgep10, i64 %66 - %67 = add i64 %63, %42 - %sext = shl i64 %67, 32 - %68 = ashr exact i64 %sext, 32 - %scevgep18 = getelementptr float, float* %7, i64 %68 - %scevgep21 = getelementptr float, float* %scevgep20, i64 %68 - %69 = trunc i64 %63 to i32 - %70 = add i32 %46, %69 - %71 = sext i32 %70 to i64 - %scevgep28 = getelementptr float, float* %10, i64 %71 - %scevgep31 = getelementptr float, float* %scevgep30, i64 %71 - %72 = mul i64 %_local_id_y.i.0, %28 - %add6.i.i.i = add nuw nsw i64 %_local_id_y.i.0, %mul3.i.i.i - %conv2.i.i = trunc i64 %add6.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %sub.i.i, %conv2.i.i - %mul.i.i = mul nsw i32 %21, %conv2.i.i - %add18.i.i = add nsw i32 %conv2.i.i, 1 - %mul19.i.i = mul nsw i32 %add18.i.i, %21 - br i1 %cmp.i.i, label %vector.scevcheck, label %pregion_for_end.i.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.i - %73 = trunc i64 %72 to i32 - %74 = add i32 %33, %73 - %75 = trunc i64 %72 to i32 - %76 = add i32 %29, %75 - %77 = trunc i64 %72 to i32 - %78 = add i32 %27, %77 - %79 = icmp sgt i32 %78, 2147483616 - %80 = icmp sgt i32 %76, 2147483616 - %81 = or i1 %79, %80 - %82 = icmp sgt i32 %74, 2147483616 - %83 = or i1 %81, %82 - br i1 %83, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i.us - -vector.memcheck: ; preds = %vector.scevcheck - %bound033 = icmp ult float* %scevgep, %scevgep21 - %bound134 = icmp ult float* %scevgep18, %scevgep11 - %found.conflict35 = and i1 %bound033, %bound134 - %conflict.rdx = or i1 %found.conflict, %found.conflict35 - %conflict.rdx39 = or i1 %conflict.rdx, %found.conflict38 - %bound040 = icmp ult float* %scevgep, %scevgep31 - %bound141 = icmp ult float* %scevgep28, %scevgep11 - %found.conflict42 = and i1 %bound040, %bound141 - %conflict.rdx43 = or i1 %conflict.rdx39, %found.conflict42 - br i1 %conflict.rdx43, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.body - -vector.body: ; preds = %vector.memcheck - %84 = add i32 %mul.i.i, %50 - %85 = sext i32 %84 to i64 - %86 = getelementptr inbounds float, float* %13, i64 %85 - %87 = bitcast float* %86 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %87, i32 4, <8 x i1> %49, <8 x float> undef), !tbaa !12, !alias.scope !49, !noalias !52 - %88 = fpext <8 x float> %wide.masked.load to <8 x double> - %89 = add i32 %84, 1 - %90 = sext i32 %89 to i64 - %91 = getelementptr inbounds float, float* %7, i64 %90 - %92 = bitcast float* %91 to <8 x float>* - %wide.masked.load46 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %92, i32 4, <8 x i1> %49, <8 x float> undef), !tbaa !12, !alias.scope !57 - %93 = getelementptr inbounds float, float* %7, i64 %85 - %94 = bitcast float* %93 to <8 x float>* - %wide.masked.load47 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %94, i32 4, <8 x i1> %49, <8 x float> undef), !tbaa !12, !alias.scope !58 - %95 = fsub <8 x float> %wide.masked.load46, %wide.masked.load47 - %96 = add nsw i32 %mul19.i.i, %50 - %97 = sext i32 %96 to i64 - %98 = getelementptr inbounds float, float* %10, i64 %97 - %99 = bitcast float* %98 to <8 x float>* - %wide.masked.load48 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %99, i32 4, <8 x i1> %49, <8 x float> undef), !tbaa !12, !alias.scope !59 - %100 = fadd <8 x float> %95, %wide.masked.load48 - %101 = getelementptr inbounds float, float* %10, i64 %85 - %102 = bitcast float* %101 to <8 x float>* - %wide.masked.load49 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %102, i32 4, <8 x i1> %49, <8 x float> undef), !tbaa !12, !alias.scope !60 - %103 = fsub <8 x float> %100, %wide.masked.load49 - %104 = fpext <8 x float> %103 to <8 x double> - %105 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %104, <8 x double> , <8 x double> %88) - %106 = fptrunc <8 x double> %105 to <8 x float> - %107 = bitcast float* %86 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %106, <8 x float>* %107, i32 4, <8 x i1> %49), !tbaa !12, !alias.scope !49, !noalias !52, !llvm.access.group !28 - %108 = add i32 %mul.i.i, %54 - %109 = sext i32 %108 to i64 - %110 = getelementptr inbounds float, float* %13, i64 %109 - %111 = bitcast float* %110 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %111, i32 4, <8 x i1> %53, <8 x float> undef), !tbaa !12, !alias.scope !49, !noalias !52 - %112 = fpext <8 x float> %wide.masked.load.1 to <8 x double> - %113 = add i32 %108, 1 - %114 = sext i32 %113 to i64 - %115 = getelementptr inbounds float, float* %7, i64 %114 - %116 = bitcast float* %115 to <8 x float>* - %wide.masked.load46.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %116, i32 4, <8 x i1> %53, <8 x float> undef), !tbaa !12, !alias.scope !57 - %117 = getelementptr inbounds float, float* %7, i64 %109 - %118 = bitcast float* %117 to <8 x float>* - %wide.masked.load47.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %118, i32 4, <8 x i1> %53, <8 x float> undef), !tbaa !12, !alias.scope !58 - %119 = fsub <8 x float> %wide.masked.load46.1, %wide.masked.load47.1 - %120 = add nsw i32 %mul19.i.i, %54 - %121 = sext i32 %120 to i64 - %122 = getelementptr inbounds float, float* %10, i64 %121 - %123 = bitcast float* %122 to <8 x float>* - %wide.masked.load48.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %123, i32 4, <8 x i1> %53, <8 x float> undef), !tbaa !12, !alias.scope !59 - %124 = fadd <8 x float> %119, %wide.masked.load48.1 - %125 = getelementptr inbounds float, float* %10, i64 %109 - %126 = bitcast float* %125 to <8 x float>* - %wide.masked.load49.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %126, i32 4, <8 x i1> %53, <8 x float> undef), !tbaa !12, !alias.scope !60 - %127 = fsub <8 x float> %124, %wide.masked.load49.1 - %128 = fpext <8 x float> %127 to <8 x double> - %129 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %128, <8 x double> , <8 x double> %112) - %130 = fptrunc <8 x double> %129 to <8 x float> - %131 = bitcast float* %110 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %130, <8 x float>* %131, i32 4, <8 x i1> %53), !tbaa !12, !alias.scope !49, !noalias !52, !llvm.access.group !28 - %132 = add i32 %mul.i.i, %58 - %133 = sext i32 %132 to i64 - %134 = getelementptr inbounds float, float* %13, i64 %133 - %135 = bitcast float* %134 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %135, i32 4, <8 x i1> %57, <8 x float> undef), !tbaa !12, !alias.scope !49, !noalias !52 - %136 = fpext <8 x float> %wide.masked.load.2 to <8 x double> - %137 = add i32 %132, 1 - %138 = sext i32 %137 to i64 - %139 = getelementptr inbounds float, float* %7, i64 %138 - %140 = bitcast float* %139 to <8 x float>* - %wide.masked.load46.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %140, i32 4, <8 x i1> %57, <8 x float> undef), !tbaa !12, !alias.scope !57 - %141 = getelementptr inbounds float, float* %7, i64 %133 - %142 = bitcast float* %141 to <8 x float>* - %wide.masked.load47.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %142, i32 4, <8 x i1> %57, <8 x float> undef), !tbaa !12, !alias.scope !58 - %143 = fsub <8 x float> %wide.masked.load46.2, %wide.masked.load47.2 - %144 = add nsw i32 %mul19.i.i, %58 - %145 = sext i32 %144 to i64 - %146 = getelementptr inbounds float, float* %10, i64 %145 - %147 = bitcast float* %146 to <8 x float>* - %wide.masked.load48.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %147, i32 4, <8 x i1> %57, <8 x float> undef), !tbaa !12, !alias.scope !59 - %148 = fadd <8 x float> %143, %wide.masked.load48.2 - %149 = getelementptr inbounds float, float* %10, i64 %133 - %150 = bitcast float* %149 to <8 x float>* - %wide.masked.load49.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %150, i32 4, <8 x i1> %57, <8 x float> undef), !tbaa !12, !alias.scope !60 - %151 = fsub <8 x float> %148, %wide.masked.load49.2 - %152 = fpext <8 x float> %151 to <8 x double> - %153 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %152, <8 x double> , <8 x double> %136) - %154 = fptrunc <8 x double> %153 to <8 x float> - %155 = bitcast float* %134 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %154, <8 x float>* %155, i32 4, <8 x i1> %57), !tbaa !12, !alias.scope !49, !noalias !52, !llvm.access.group !28 - %156 = add i32 %mul.i.i, %62 - %157 = sext i32 %156 to i64 - %158 = getelementptr inbounds float, float* %13, i64 %157 - %159 = bitcast float* %158 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %159, i32 4, <8 x i1> %61, <8 x float> undef), !tbaa !12, !alias.scope !49, !noalias !52 - %160 = fpext <8 x float> %wide.masked.load.3 to <8 x double> - %161 = add i32 %156, 1 - %162 = sext i32 %161 to i64 - %163 = getelementptr inbounds float, float* %7, i64 %162 - %164 = bitcast float* %163 to <8 x float>* - %wide.masked.load46.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %164, i32 4, <8 x i1> %61, <8 x float> undef), !tbaa !12, !alias.scope !57 - %165 = getelementptr inbounds float, float* %7, i64 %157 - %166 = bitcast float* %165 to <8 x float>* - %wide.masked.load47.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %166, i32 4, <8 x i1> %61, <8 x float> undef), !tbaa !12, !alias.scope !58 - %167 = fsub <8 x float> %wide.masked.load46.3, %wide.masked.load47.3 - %168 = add nsw i32 %mul19.i.i, %62 - %169 = sext i32 %168 to i64 - %170 = getelementptr inbounds float, float* %10, i64 %169 - %171 = bitcast float* %170 to <8 x float>* - %wide.masked.load48.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %171, i32 4, <8 x i1> %61, <8 x float> undef), !tbaa !12, !alias.scope !59 - %172 = fadd <8 x float> %167, %wide.masked.load48.3 - %173 = getelementptr inbounds float, float* %10, i64 %157 - %174 = bitcast float* %173 to <8 x float>* - %wide.masked.load49.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %174, i32 4, <8 x i1> %61, <8 x float> undef), !tbaa !12, !alias.scope !60 - %175 = fsub <8 x float> %172, %wide.masked.load49.3 - %176 = fpext <8 x float> %175 to <8 x double> - %177 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %176, <8 x double> , <8 x double> %160) - %178 = fptrunc <8 x double> %177 to <8 x float> - %179 = bitcast float* %158 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %178, <8 x float>* %179, i32 4, <8 x i1> %61), !tbaa !12, !alias.scope !49, !noalias !52, !llvm.access.group !28 - br label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %186, %if.end.r_exit.i.i.us ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp5.i.i.us = icmp sgt i32 %sub4.i.i, %conv.i.i.us - br i1 %cmp5.i.i.us, label %if.then.i.i.us, label %if.end.r_exit.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %add.i.i.us = add i32 %mul.i.i, %conv.i.i.us - %idxprom.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us - %180 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %conv7.i.i.us = fpext float %180 to double - %add10.i.i.us = add i32 %add.i.i.us, 1 - %idxprom11.i.i.us = sext i32 %add10.i.i.us to i64 - %arrayidx12.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom11.i.i.us - %181 = load float, float* %arrayidx12.i.i.us, align 4, !tbaa !12 - %arrayidx16.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us - %182 = load float, float* %arrayidx16.i.i.us, align 4, !tbaa !12 - %sub17.i.i.us = fsub float %181, %182 - %add20.i.i.us = add nsw i32 %mul19.i.i, %conv.i.i.us - %idxprom21.i.i.us = sext i32 %add20.i.i.us to i64 - %arrayidx22.i.i.us = getelementptr inbounds float, float* %10, i64 %idxprom21.i.i.us - %183 = load float, float* %arrayidx22.i.i.us, align 4, !tbaa !12 - %add23.i.i.us = fadd float %sub17.i.i.us, %183 - %arrayidx27.i.i.us = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us - %184 = load float, float* %arrayidx27.i.i.us, align 4, !tbaa !12 - %sub28.i.i.us = fsub float %add23.i.i.us, %184 - %conv29.i.i.us = fpext float %sub28.i.i.us to double - %185 = tail call double @llvm.fmuladd.f64(double %conv29.i.i.us, double 0xBFE6666666666666, double %conv7.i.i.us) #5 - %conv31.i.i.us = fptrunc double %185 to float - store float %conv31.i.i.us, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !28 - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us - %186 = add nuw nsw i64 %_local_id_x.i.0.us, 1 - %exitcond.not = icmp eq i64 %186, 32 - br i1 %exitcond.not, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !61 - -pregion_for_end.i.i.loopexit: ; preds = %if.end.r_exit.i.i.us - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %vector.body, %pregion_for_entry.pregion_for_init.i.i - %187 = add nuw nsw i64 %_local_id_y.i.0, 1 - %exitcond2.not = icmp eq i64 %187, 8 - br i1 %exitcond2.not, label %_pocl_kernel_fdtd_kernel3.exit, label %pregion_for_entry.pregion_for_init.i.i, !llvm.loop !34 - -_pocl_kernel_fdtd_kernel3.exit: ; preds = %pregion_for_end.i.i - ret void -} - -; Function Attrs: argmemonly nounwind readonly willreturn -declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32 immarg, <8 x i1>, <8 x float>) #3 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <8 x double> @llvm.fmuladd.v8f64(<8 x double>, <8 x double>, <8 x double>) #0 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>) #4 - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nofree nounwind } -attributes #3 = { argmemonly nounwind readonly willreturn } -attributes #4 = { argmemonly nounwind willreturn } -attributes #5 = { nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"int", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"int", !"int"} -!9 = !{!"", !"", !"", !"", !""} -!10 = !{!"ex", !"ey", !"hz", !"nx", !"ny"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{!17, !18} -!18 = distinct !{!18, !"LVerDomain"} -!19 = !{!20, !21, !22, !23} -!20 = distinct !{!20, !18} -!21 = distinct !{!21, !18} -!22 = distinct !{!22, !18} -!23 = distinct !{!23, !18} -!24 = !{!21} -!25 = !{!20} -!26 = !{!23} -!27 = !{!22} -!28 = !{!29, !30} -!29 = distinct !{} -!30 = distinct !{} -!31 = distinct !{!31, !32, !33} -!32 = !{!"llvm.loop.parallel_accesses", !29} -!33 = !{!"llvm.loop.isvectorized", i32 1} -!34 = distinct !{!34, !35} -!35 = !{!"llvm.loop.parallel_accesses", !30} -!36 = !{!37} -!37 = distinct !{!37, !38} -!38 = distinct !{!38, !"LVerDomain"} -!39 = !{!40, !41, !42, !43} -!40 = distinct !{!40, !38} -!41 = distinct !{!41, !38} -!42 = distinct !{!42, !38} -!43 = distinct !{!43, !38} -!44 = !{!41} -!45 = !{!40} -!46 = !{!43} -!47 = !{!42} -!48 = distinct !{!48, !32, !33} -!49 = !{!50} -!50 = distinct !{!50, !51} -!51 = distinct !{!51, !"LVerDomain"} -!52 = !{!53, !54, !55, !56} -!53 = distinct !{!53, !51} -!54 = distinct !{!54, !51} -!55 = distinct !{!55, !51} -!56 = distinct !{!56, !51} -!57 = !{!54} -!58 = !{!53} -!59 = !{!56} -!60 = !{!55} -!61 = distinct !{!61, !32, !33} diff --git a/pocl_irs/gemm.ll b/pocl_irs/gemm.ll deleted file mode 100644 index f0e2c33..0000000 --- a/pocl_irs/gemm.ll +++ /dev/null @@ -1,5082 +0,0 @@ -; ModuleID = './AC/EBHGDELLFPNDIBOOBLJPPAIJPAOBBJILCECBH/gemm/32-8-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_gemm(float* nocapture readonly %0, float* nocapture readonly %1, float* nocapture %2, float %3, float %4, i32 %5, i32 %6, i32 %7, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %8, i64 %9, i64 %10, i64 %11) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %9, 5 - %mul3.i.i = shl i64 %10, 3 - %cmp740.i = icmp sgt i32 %7, 0 - %13 = sext i32 %6 to i64 - %wide.trip.count.i = zext i32 %7 to i64 - %conv2.i.us = trunc i64 %mul3.i.i to i32 - %cmp.i.us = icmp slt i32 %conv2.i.us, %5 - %mul.i.us = mul nsw i32 %conv2.i.us, %6 - br i1 %cmp740.i, label %pregion_for_entry.pregion_for_init.i.us, label %pregion_for_entry.pregion_for_init.i.preheader - -pregion_for_entry.pregion_for_init.i.preheader: ; preds = %12 - br i1 %cmp.i.us, label %vector.scevcheck, label %pregion_for_end.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.preheader - %14 = trunc i64 %10 to i32 - %15 = mul i32 %14, %6 - %16 = shl i32 %15, 3 - %17 = trunc i64 %9 to i32 - %18 = shl i32 %17, 5 - %19 = add i32 %16, %18 - %20 = icmp sgt i32 %19, 2147483616 - br i1 %20, label %pregion_for_entry.entry.i.us.preheader, label %vector.ph - -pregion_for_entry.entry.i.us.preheader: ; preds = %vector.scevcheck - br label %pregion_for_entry.entry.i.us - -vector.ph: ; preds = %vector.scevcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert38 = insertelement <8 x i32> undef, i32 %6, i32 0 - %broadcast.splat39 = shufflevector <8 x i32> %broadcast.splatinsert38, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert40 = insertelement <8 x float> undef, float %4, i32 0 - %broadcast.splat41 = shufflevector <8 x float> %broadcast.splatinsert40, <8 x float> undef, <8 x i32> zeroinitializer - %21 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %22 = or <8 x i32> %21, - %23 = icmp sgt <8 x i32> %broadcast.splat39, %22 - %24 = extractelement <8 x i32> %22, i32 0 - %25 = add nsw i32 %mul.i.us, %24 - %26 = sext i32 %25 to i64 - %27 = getelementptr inbounds float, float* %2, i64 %26 - %28 = bitcast float* %27 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %28, i32 4, <8 x i1> %23, <8 x float> undef), !tbaa !12 - %29 = fmul <8 x float> %wide.masked.load, %broadcast.splat41 - %30 = bitcast float* %27 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %29, <8 x float>* %30, i32 4, <8 x i1> %23), !tbaa !12, !llvm.access.group !16 - %31 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %32 = or <8 x i32> %31, - %33 = icmp sgt <8 x i32> %broadcast.splat39, %32 - %34 = extractelement <8 x i32> %32, i32 0 - %35 = add nsw i32 %mul.i.us, %34 - %36 = sext i32 %35 to i64 - %37 = getelementptr inbounds float, float* %2, i64 %36 - %38 = bitcast float* %37 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %38, i32 4, <8 x i1> %33, <8 x float> undef), !tbaa !12 - %39 = fmul <8 x float> %wide.masked.load.1, %broadcast.splat41 - %40 = bitcast float* %37 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %39, <8 x float>* %40, i32 4, <8 x i1> %33), !tbaa !12, !llvm.access.group !16 - %41 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %42 = or <8 x i32> %41, - %43 = icmp sgt <8 x i32> %broadcast.splat39, %42 - %44 = extractelement <8 x i32> %42, i32 0 - %45 = add nsw i32 %mul.i.us, %44 - %46 = sext i32 %45 to i64 - %47 = getelementptr inbounds float, float* %2, i64 %46 - %48 = bitcast float* %47 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %48, i32 4, <8 x i1> %43, <8 x float> undef), !tbaa !12 - %49 = fmul <8 x float> %wide.masked.load.2, %broadcast.splat41 - %50 = bitcast float* %47 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %49, <8 x float>* %50, i32 4, <8 x i1> %43), !tbaa !12, !llvm.access.group !16 - %51 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %52 = or <8 x i32> %51, - %53 = icmp sgt <8 x i32> %broadcast.splat39, %52 - %54 = extractelement <8 x i32> %52, i32 0 - %55 = add nsw i32 %mul.i.us, %54 - %56 = sext i32 %55 to i64 - %57 = getelementptr inbounds float, float* %2, i64 %56 - %58 = bitcast float* %57 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %58, i32 4, <8 x i1> %53, <8 x float> undef), !tbaa !12 - %59 = fmul <8 x float> %wide.masked.load.3, %broadcast.splat41 - %60 = bitcast float* %57 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %59, <8 x float>* %60, i32 4, <8 x i1> %53), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i - -pregion_for_entry.pregion_for_init.i.us: ; preds = %12 - %mul9.i.us = mul nsw i32 %conv2.i.us, %7 - %61 = sext i32 %mul9.i.us to i64 - br i1 %cmp.i.us, label %pregion_for_entry.entry.i.us.us.preheader, label %pregion_for_end.i.us - -pregion_for_entry.entry.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.us - br label %pregion_for_entry.entry.i.us.us - -pregion_for_end.i.us.loopexit: ; preds = %if.end.i.us.us - br label %pregion_for_end.i.us - -pregion_for_end.i.us: ; preds = %pregion_for_end.i.us.loopexit, %pregion_for_entry.pregion_for_init.i.us - %62 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.1 = or i32 %62, 1 - %cmp.i.us.1 = icmp slt i32 %conv2.i.us.1, %5 - %mul.i.us.1 = mul nsw i32 %conv2.i.us.1, %6 - %mul9.i.us.1 = mul nsw i32 %conv2.i.us.1, %7 - %63 = sext i32 %mul9.i.us.1 to i64 - br i1 %cmp.i.us.1, label %pregion_for_entry.entry.i.us.us.1.preheader, label %pregion_for_end.i.us.1 - -pregion_for_entry.entry.i.us.us.1.preheader: ; preds = %pregion_for_end.i.us - br label %pregion_for_entry.entry.i.us.us.1 - -pregion_for_entry.entry.i.us.us: ; preds = %if.end.i.us.us, %pregion_for_entry.entry.i.us.us.preheader - %_local_id_x.0.us.us = phi i64 [ %66, %if.end.i.us.us ], [ 0, %pregion_for_entry.entry.i.us.us.preheader ] - %add1.i.i.us.us = add nuw nsw i64 %_local_id_x.0.us.us, %mul.i.i - %conv.i.us.us = trunc i64 %add1.i.i.us.us to i32 - %cmp4.i.us.us = icmp slt i32 %conv.i.us.us, %6 - br i1 %cmp4.i.us.us, label %if.then.i.us.us, label %if.end.i.us.us - -if.then.i.us.us: ; preds = %pregion_for_entry.entry.i.us.us - %add.i.us.us = add nsw i32 %mul.i.us, %conv.i.us.us - %idxprom.i.us.us = sext i32 %add.i.us.us to i64 - %arrayidx.i.us.us = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us - %64 = load float, float* %arrayidx.i.us.us, align 4, !tbaa !12 - %mul6.i.us.us = fmul float %64, %4 - store float %mul6.i.us.us, float* %arrayidx.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us = shl i64 %add1.i.i.us.us, 32 - %65 = ashr exact i64 %sext.i.us.us, 32 - br label %for.body.i.us.us - -if.end.i.us.us.loopexit: ; preds = %for.body.i.us.us - br label %if.end.i.us.us - -if.end.i.us.us: ; preds = %if.end.i.us.us.loopexit, %pregion_for_entry.entry.i.us.us - %66 = add nuw nsw i64 %_local_id_x.0.us.us, 1 - %exitcond.not = icmp eq i64 %66, 32 - br i1 %exitcond.not, label %pregion_for_end.i.us.loopexit, label %pregion_for_entry.entry.i.us.us, !llvm.loop !19 - -for.body.i.us.us: ; preds = %for.body.i.us.us, %if.then.i.us.us - %indvars.iv.next.i3.us.us = phi i64 [ %indvars.iv.next.i.us.us, %for.body.i.us.us ], [ 0, %if.then.i.us.us ] - %67 = phi float [ %73, %for.body.i.us.us ], [ %mul6.i.us.us, %if.then.i.us.us ] - %68 = add nsw i64 %indvars.iv.next.i3.us.us, %61 - %arrayidx12.i.us.us = getelementptr inbounds float, float* %0, i64 %68 - %69 = load float, float* %arrayidx12.i.us.us, align 4, !tbaa !12 - %mul13.i.us.us = fmul float %69, %3 - %70 = mul nsw i64 %indvars.iv.next.i3.us.us, %13 - %71 = add nsw i64 %70, %65 - %arrayidx17.i.us.us = getelementptr inbounds float, float* %1, i64 %71 - %72 = load float, float* %arrayidx17.i.us.us, align 4, !tbaa !12 - %73 = tail call float @llvm.fmuladd.f32(float %mul13.i.us.us, float %72, float %67) #2 - store float %73, float* %arrayidx.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us = add nuw nsw i64 %indvars.iv.next.i3.us.us, 1 - %exitcond.not.i.us.us = icmp eq i64 %indvars.iv.next.i.us.us, %wide.trip.count.i - br i1 %exitcond.not.i.us.us, label %if.end.i.us.us.loopexit, label %for.body.i.us.us, !llvm.loop !21 - -pregion_for_entry.entry.i.us: ; preds = %if.end.i.us.3237, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ %541, %if.end.i.us.3237 ], [ 0, %pregion_for_entry.entry.i.us.preheader ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp4.i.us = icmp slt i32 %conv.i.us, %6 - br i1 %cmp4.i.us, label %if.then.i.us, label %if.end.i.us - -if.then.i.us: ; preds = %pregion_for_entry.entry.i.us - %add.i.us = add nsw i32 %mul.i.us, %conv.i.us - %idxprom.i.us = sext i32 %add.i.us to i64 - %arrayidx.i.us = getelementptr inbounds float, float* %2, i64 %idxprom.i.us - %74 = load float, float* %arrayidx.i.us, align 4, !tbaa !12 - %mul6.i.us = fmul float %74, %4 - store float %mul6.i.us, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us - -if.end.i.us: ; preds = %if.then.i.us, %pregion_for_entry.entry.i.us - %75 = or i64 %_local_id_x.0.us, 1 - %add1.i.i.us.1206 = add nuw nsw i64 %75, %mul.i.i - %conv.i.us.1207 = trunc i64 %add1.i.i.us.1206 to i32 - %cmp4.i.us.1208 = icmp slt i32 %conv.i.us.1207, %6 - br i1 %cmp4.i.us.1208, label %if.then.i.us.1214, label %if.end.i.us.1215 - -pregion_for_end.i.loopexit: ; preds = %if.end.i.us.3237 - br label %pregion_for_end.i - -pregion_for_end.i: ; preds = %pregion_for_end.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i.preheader - %76 = trunc i64 %mul3.i.i to i32 - %conv2.i.1 = or i32 %76, 1 - %cmp.i.1 = icmp slt i32 %conv2.i.1, %5 - %mul.i.1 = mul nsw i32 %conv2.i.1, %6 - br i1 %cmp.i.1, label %vector.scevcheck49, label %pregion_for_end.i.1 - -vector.scevcheck49: ; preds = %pregion_for_end.i - %77 = mul i32 %conv2.i.1, %6 - %78 = trunc i64 %9 to i32 - %79 = shl i32 %78, 5 - %80 = add i32 %77, %79 - %81 = icmp sgt i32 %80, 2147483616 - br i1 %81, label %pregion_for_entry.entry.i.us.1.preheader, label %vector.ph50 - -pregion_for_entry.entry.i.us.1.preheader: ; preds = %vector.scevcheck49 - br label %pregion_for_entry.entry.i.us.1 - -vector.ph50: ; preds = %vector.scevcheck49 - %broadcast.splatinsert57 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat58 = shufflevector <8 x i64> %broadcast.splatinsert57, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert59 = insertelement <8 x i32> undef, i32 %6, i32 0 - %broadcast.splat60 = shufflevector <8 x i32> %broadcast.splatinsert59, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert62 = insertelement <8 x float> undef, float %4, i32 0 - %broadcast.splat63 = shufflevector <8 x float> %broadcast.splatinsert62, <8 x float> undef, <8 x i32> zeroinitializer - %82 = trunc <8 x i64> %broadcast.splat58 to <8 x i32> - %83 = or <8 x i32> %82, - %84 = icmp sgt <8 x i32> %broadcast.splat60, %83 - %85 = extractelement <8 x i32> %83, i32 0 - %86 = add nsw i32 %mul.i.1, %85 - %87 = sext i32 %86 to i64 - %88 = getelementptr inbounds float, float* %2, i64 %87 - %89 = bitcast float* %88 to <8 x float>* - %wide.masked.load61 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %89, i32 4, <8 x i1> %84, <8 x float> undef), !tbaa !12 - %90 = fmul <8 x float> %wide.masked.load61, %broadcast.splat63 - %91 = bitcast float* %88 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %90, <8 x float>* %91, i32 4, <8 x i1> %84), !tbaa !12, !llvm.access.group !16 - %92 = trunc <8 x i64> %broadcast.splat58 to <8 x i32> - %93 = or <8 x i32> %92, - %94 = icmp sgt <8 x i32> %broadcast.splat60, %93 - %95 = extractelement <8 x i32> %93, i32 0 - %96 = add nsw i32 %mul.i.1, %95 - %97 = sext i32 %96 to i64 - %98 = getelementptr inbounds float, float* %2, i64 %97 - %99 = bitcast float* %98 to <8 x float>* - %wide.masked.load61.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %99, i32 4, <8 x i1> %94, <8 x float> undef), !tbaa !12 - %100 = fmul <8 x float> %wide.masked.load61.1, %broadcast.splat63 - %101 = bitcast float* %98 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %100, <8 x float>* %101, i32 4, <8 x i1> %94), !tbaa !12, !llvm.access.group !16 - %102 = trunc <8 x i64> %broadcast.splat58 to <8 x i32> - %103 = or <8 x i32> %102, - %104 = icmp sgt <8 x i32> %broadcast.splat60, %103 - %105 = extractelement <8 x i32> %103, i32 0 - %106 = add nsw i32 %mul.i.1, %105 - %107 = sext i32 %106 to i64 - %108 = getelementptr inbounds float, float* %2, i64 %107 - %109 = bitcast float* %108 to <8 x float>* - %wide.masked.load61.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %109, i32 4, <8 x i1> %104, <8 x float> undef), !tbaa !12 - %110 = fmul <8 x float> %wide.masked.load61.2, %broadcast.splat63 - %111 = bitcast float* %108 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %110, <8 x float>* %111, i32 4, <8 x i1> %104), !tbaa !12, !llvm.access.group !16 - %112 = trunc <8 x i64> %broadcast.splat58 to <8 x i32> - %113 = or <8 x i32> %112, - %114 = icmp sgt <8 x i32> %broadcast.splat60, %113 - %115 = extractelement <8 x i32> %113, i32 0 - %116 = add nsw i32 %mul.i.1, %115 - %117 = sext i32 %116 to i64 - %118 = getelementptr inbounds float, float* %2, i64 %117 - %119 = bitcast float* %118 to <8 x float>* - %wide.masked.load61.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %119, i32 4, <8 x i1> %114, <8 x float> undef), !tbaa !12 - %120 = fmul <8 x float> %wide.masked.load61.3, %broadcast.splat63 - %121 = bitcast float* %118 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %120, <8 x float>* %121, i32 4, <8 x i1> %114), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.1 - -gemm.exit.loopexit: ; preds = %if.end.i.us.us.7 - br label %gemm.exit - -gemm.exit.loopexit238: ; preds = %if.end.i.us.7.3 - br label %gemm.exit - -gemm.exit: ; preds = %pregion_for_end.i.us.6, %vector.ph182, %pregion_for_end.i.6, %gemm.exit.loopexit238, %gemm.exit.loopexit - ret void - -pregion_for_entry.entry.i.us.1: ; preds = %if.end.i.us.1.3, %pregion_for_entry.entry.i.us.1.preheader - %_local_id_x.0.us.1 = phi i64 [ %535, %if.end.i.us.1.3 ], [ 0, %pregion_for_entry.entry.i.us.1.preheader ] - %add1.i.i.us.1 = add nuw nsw i64 %_local_id_x.0.us.1, %mul.i.i - %conv.i.us.1 = trunc i64 %add1.i.i.us.1 to i32 - %cmp4.i.us.1 = icmp slt i32 %conv.i.us.1, %6 - br i1 %cmp4.i.us.1, label %if.then.i.us.1, label %if.end.i.us.1 - -if.then.i.us.1: ; preds = %pregion_for_entry.entry.i.us.1 - %add.i.us.1 = add nsw i32 %mul.i.1, %conv.i.us.1 - %idxprom.i.us.1 = sext i32 %add.i.us.1 to i64 - %arrayidx.i.us.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.1 - %122 = load float, float* %arrayidx.i.us.1, align 4, !tbaa !12 - %mul6.i.us.1 = fmul float %122, %4 - store float %mul6.i.us.1, float* %arrayidx.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.1 - -if.end.i.us.1: ; preds = %if.then.i.us.1, %pregion_for_entry.entry.i.us.1 - %123 = or i64 %_local_id_x.0.us.1, 1 - %add1.i.i.us.1.1 = add nuw nsw i64 %123, %mul.i.i - %conv.i.us.1.1 = trunc i64 %add1.i.i.us.1.1 to i32 - %cmp4.i.us.1.1 = icmp slt i32 %conv.i.us.1.1, %6 - br i1 %cmp4.i.us.1.1, label %if.then.i.us.1.1, label %if.end.i.us.1.1 - -pregion_for_end.i.1.loopexit: ; preds = %if.end.i.us.1.3 - br label %pregion_for_end.i.1 - -pregion_for_end.i.1: ; preds = %pregion_for_end.i.1.loopexit, %vector.ph50, %pregion_for_end.i - %124 = trunc i64 %mul3.i.i to i32 - %conv2.i.2 = or i32 %124, 2 - %cmp.i.2 = icmp slt i32 %conv2.i.2, %5 - %mul.i.2 = mul nsw i32 %conv2.i.2, %6 - br i1 %cmp.i.2, label %vector.scevcheck71, label %pregion_for_end.i.2 - -vector.scevcheck71: ; preds = %pregion_for_end.i.1 - %125 = mul i32 %conv2.i.2, %6 - %126 = trunc i64 %9 to i32 - %127 = shl i32 %126, 5 - %128 = add i32 %125, %127 - %129 = icmp sgt i32 %128, 2147483616 - br i1 %129, label %pregion_for_entry.entry.i.us.2.preheader, label %vector.ph72 - -pregion_for_entry.entry.i.us.2.preheader: ; preds = %vector.scevcheck71 - br label %pregion_for_entry.entry.i.us.2 - -vector.ph72: ; preds = %vector.scevcheck71 - %broadcast.splatinsert79 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat80 = shufflevector <8 x i64> %broadcast.splatinsert79, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert81 = insertelement <8 x i32> undef, i32 %6, i32 0 - %broadcast.splat82 = shufflevector <8 x i32> %broadcast.splatinsert81, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert84 = insertelement <8 x float> undef, float %4, i32 0 - %broadcast.splat85 = shufflevector <8 x float> %broadcast.splatinsert84, <8 x float> undef, <8 x i32> zeroinitializer - %130 = trunc <8 x i64> %broadcast.splat80 to <8 x i32> - %131 = or <8 x i32> %130, - %132 = icmp sgt <8 x i32> %broadcast.splat82, %131 - %133 = extractelement <8 x i32> %131, i32 0 - %134 = add nsw i32 %mul.i.2, %133 - %135 = sext i32 %134 to i64 - %136 = getelementptr inbounds float, float* %2, i64 %135 - %137 = bitcast float* %136 to <8 x float>* - %wide.masked.load83 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %137, i32 4, <8 x i1> %132, <8 x float> undef), !tbaa !12 - %138 = fmul <8 x float> %wide.masked.load83, %broadcast.splat85 - %139 = bitcast float* %136 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %138, <8 x float>* %139, i32 4, <8 x i1> %132), !tbaa !12, !llvm.access.group !16 - %140 = trunc <8 x i64> %broadcast.splat80 to <8 x i32> - %141 = or <8 x i32> %140, - %142 = icmp sgt <8 x i32> %broadcast.splat82, %141 - %143 = extractelement <8 x i32> %141, i32 0 - %144 = add nsw i32 %mul.i.2, %143 - %145 = sext i32 %144 to i64 - %146 = getelementptr inbounds float, float* %2, i64 %145 - %147 = bitcast float* %146 to <8 x float>* - %wide.masked.load83.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %147, i32 4, <8 x i1> %142, <8 x float> undef), !tbaa !12 - %148 = fmul <8 x float> %wide.masked.load83.1, %broadcast.splat85 - %149 = bitcast float* %146 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %148, <8 x float>* %149, i32 4, <8 x i1> %142), !tbaa !12, !llvm.access.group !16 - %150 = trunc <8 x i64> %broadcast.splat80 to <8 x i32> - %151 = or <8 x i32> %150, - %152 = icmp sgt <8 x i32> %broadcast.splat82, %151 - %153 = extractelement <8 x i32> %151, i32 0 - %154 = add nsw i32 %mul.i.2, %153 - %155 = sext i32 %154 to i64 - %156 = getelementptr inbounds float, float* %2, i64 %155 - %157 = bitcast float* %156 to <8 x float>* - %wide.masked.load83.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %157, i32 4, <8 x i1> %152, <8 x float> undef), !tbaa !12 - %158 = fmul <8 x float> %wide.masked.load83.2, %broadcast.splat85 - %159 = bitcast float* %156 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %158, <8 x float>* %159, i32 4, <8 x i1> %152), !tbaa !12, !llvm.access.group !16 - %160 = trunc <8 x i64> %broadcast.splat80 to <8 x i32> - %161 = or <8 x i32> %160, - %162 = icmp sgt <8 x i32> %broadcast.splat82, %161 - %163 = extractelement <8 x i32> %161, i32 0 - %164 = add nsw i32 %mul.i.2, %163 - %165 = sext i32 %164 to i64 - %166 = getelementptr inbounds float, float* %2, i64 %165 - %167 = bitcast float* %166 to <8 x float>* - %wide.masked.load83.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %167, i32 4, <8 x i1> %162, <8 x float> undef), !tbaa !12 - %168 = fmul <8 x float> %wide.masked.load83.3, %broadcast.splat85 - %169 = bitcast float* %166 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %168, <8 x float>* %169, i32 4, <8 x i1> %162), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.2 - -pregion_for_entry.entry.i.us.2: ; preds = %if.end.i.us.2.3, %pregion_for_entry.entry.i.us.2.preheader - %_local_id_x.0.us.2 = phi i64 [ %529, %if.end.i.us.2.3 ], [ 0, %pregion_for_entry.entry.i.us.2.preheader ] - %add1.i.i.us.2 = add nuw nsw i64 %_local_id_x.0.us.2, %mul.i.i - %conv.i.us.2 = trunc i64 %add1.i.i.us.2 to i32 - %cmp4.i.us.2 = icmp slt i32 %conv.i.us.2, %6 - br i1 %cmp4.i.us.2, label %if.then.i.us.2, label %if.end.i.us.2 - -if.then.i.us.2: ; preds = %pregion_for_entry.entry.i.us.2 - %add.i.us.2 = add nsw i32 %mul.i.2, %conv.i.us.2 - %idxprom.i.us.2 = sext i32 %add.i.us.2 to i64 - %arrayidx.i.us.2 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.2 - %170 = load float, float* %arrayidx.i.us.2, align 4, !tbaa !12 - %mul6.i.us.2 = fmul float %170, %4 - store float %mul6.i.us.2, float* %arrayidx.i.us.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.2 - -if.end.i.us.2: ; preds = %if.then.i.us.2, %pregion_for_entry.entry.i.us.2 - %171 = or i64 %_local_id_x.0.us.2, 1 - %add1.i.i.us.2.1 = add nuw nsw i64 %171, %mul.i.i - %conv.i.us.2.1 = trunc i64 %add1.i.i.us.2.1 to i32 - %cmp4.i.us.2.1 = icmp slt i32 %conv.i.us.2.1, %6 - br i1 %cmp4.i.us.2.1, label %if.then.i.us.2.1, label %if.end.i.us.2.1 - -pregion_for_end.i.2.loopexit: ; preds = %if.end.i.us.2.3 - br label %pregion_for_end.i.2 - -pregion_for_end.i.2: ; preds = %pregion_for_end.i.2.loopexit, %vector.ph72, %pregion_for_end.i.1 - %172 = trunc i64 %mul3.i.i to i32 - %conv2.i.3 = or i32 %172, 3 - %cmp.i.3 = icmp slt i32 %conv2.i.3, %5 - %mul.i.3 = mul nsw i32 %conv2.i.3, %6 - br i1 %cmp.i.3, label %vector.scevcheck93, label %pregion_for_end.i.3 - -vector.scevcheck93: ; preds = %pregion_for_end.i.2 - %173 = mul i32 %conv2.i.3, %6 - %174 = trunc i64 %9 to i32 - %175 = shl i32 %174, 5 - %176 = add i32 %173, %175 - %177 = icmp sgt i32 %176, 2147483616 - br i1 %177, label %pregion_for_entry.entry.i.us.3.preheader, label %vector.ph94 - -pregion_for_entry.entry.i.us.3.preheader: ; preds = %vector.scevcheck93 - br label %pregion_for_entry.entry.i.us.3 - -vector.ph94: ; preds = %vector.scevcheck93 - %broadcast.splatinsert101 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat102 = shufflevector <8 x i64> %broadcast.splatinsert101, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert103 = insertelement <8 x i32> undef, i32 %6, i32 0 - %broadcast.splat104 = shufflevector <8 x i32> %broadcast.splatinsert103, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert106 = insertelement <8 x float> undef, float %4, i32 0 - %broadcast.splat107 = shufflevector <8 x float> %broadcast.splatinsert106, <8 x float> undef, <8 x i32> zeroinitializer - %178 = trunc <8 x i64> %broadcast.splat102 to <8 x i32> - %179 = or <8 x i32> %178, - %180 = icmp sgt <8 x i32> %broadcast.splat104, %179 - %181 = extractelement <8 x i32> %179, i32 0 - %182 = add nsw i32 %mul.i.3, %181 - %183 = sext i32 %182 to i64 - %184 = getelementptr inbounds float, float* %2, i64 %183 - %185 = bitcast float* %184 to <8 x float>* - %wide.masked.load105 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %185, i32 4, <8 x i1> %180, <8 x float> undef), !tbaa !12 - %186 = fmul <8 x float> %wide.masked.load105, %broadcast.splat107 - %187 = bitcast float* %184 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %186, <8 x float>* %187, i32 4, <8 x i1> %180), !tbaa !12, !llvm.access.group !16 - %188 = trunc <8 x i64> %broadcast.splat102 to <8 x i32> - %189 = or <8 x i32> %188, - %190 = icmp sgt <8 x i32> %broadcast.splat104, %189 - %191 = extractelement <8 x i32> %189, i32 0 - %192 = add nsw i32 %mul.i.3, %191 - %193 = sext i32 %192 to i64 - %194 = getelementptr inbounds float, float* %2, i64 %193 - %195 = bitcast float* %194 to <8 x float>* - %wide.masked.load105.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %195, i32 4, <8 x i1> %190, <8 x float> undef), !tbaa !12 - %196 = fmul <8 x float> %wide.masked.load105.1, %broadcast.splat107 - %197 = bitcast float* %194 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %196, <8 x float>* %197, i32 4, <8 x i1> %190), !tbaa !12, !llvm.access.group !16 - %198 = trunc <8 x i64> %broadcast.splat102 to <8 x i32> - %199 = or <8 x i32> %198, - %200 = icmp sgt <8 x i32> %broadcast.splat104, %199 - %201 = extractelement <8 x i32> %199, i32 0 - %202 = add nsw i32 %mul.i.3, %201 - %203 = sext i32 %202 to i64 - %204 = getelementptr inbounds float, float* %2, i64 %203 - %205 = bitcast float* %204 to <8 x float>* - %wide.masked.load105.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %205, i32 4, <8 x i1> %200, <8 x float> undef), !tbaa !12 - %206 = fmul <8 x float> %wide.masked.load105.2, %broadcast.splat107 - %207 = bitcast float* %204 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %206, <8 x float>* %207, i32 4, <8 x i1> %200), !tbaa !12, !llvm.access.group !16 - %208 = trunc <8 x i64> %broadcast.splat102 to <8 x i32> - %209 = or <8 x i32> %208, - %210 = icmp sgt <8 x i32> %broadcast.splat104, %209 - %211 = extractelement <8 x i32> %209, i32 0 - %212 = add nsw i32 %mul.i.3, %211 - %213 = sext i32 %212 to i64 - %214 = getelementptr inbounds float, float* %2, i64 %213 - %215 = bitcast float* %214 to <8 x float>* - %wide.masked.load105.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %215, i32 4, <8 x i1> %210, <8 x float> undef), !tbaa !12 - %216 = fmul <8 x float> %wide.masked.load105.3, %broadcast.splat107 - %217 = bitcast float* %214 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %216, <8 x float>* %217, i32 4, <8 x i1> %210), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.3 - -pregion_for_entry.entry.i.us.3: ; preds = %if.end.i.us.3.3, %pregion_for_entry.entry.i.us.3.preheader - %_local_id_x.0.us.3 = phi i64 [ %523, %if.end.i.us.3.3 ], [ 0, %pregion_for_entry.entry.i.us.3.preheader ] - %add1.i.i.us.3 = add nuw nsw i64 %_local_id_x.0.us.3, %mul.i.i - %conv.i.us.3 = trunc i64 %add1.i.i.us.3 to i32 - %cmp4.i.us.3 = icmp slt i32 %conv.i.us.3, %6 - br i1 %cmp4.i.us.3, label %if.then.i.us.3, label %if.end.i.us.3 - -if.then.i.us.3: ; preds = %pregion_for_entry.entry.i.us.3 - %add.i.us.3 = add nsw i32 %mul.i.3, %conv.i.us.3 - %idxprom.i.us.3 = sext i32 %add.i.us.3 to i64 - %arrayidx.i.us.3 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.3 - %218 = load float, float* %arrayidx.i.us.3, align 4, !tbaa !12 - %mul6.i.us.3 = fmul float %218, %4 - store float %mul6.i.us.3, float* %arrayidx.i.us.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.3 - -if.end.i.us.3: ; preds = %if.then.i.us.3, %pregion_for_entry.entry.i.us.3 - %219 = or i64 %_local_id_x.0.us.3, 1 - %add1.i.i.us.3.1 = add nuw nsw i64 %219, %mul.i.i - %conv.i.us.3.1 = trunc i64 %add1.i.i.us.3.1 to i32 - %cmp4.i.us.3.1 = icmp slt i32 %conv.i.us.3.1, %6 - br i1 %cmp4.i.us.3.1, label %if.then.i.us.3.1, label %if.end.i.us.3.1 - -pregion_for_end.i.3.loopexit: ; preds = %if.end.i.us.3.3 - br label %pregion_for_end.i.3 - -pregion_for_end.i.3: ; preds = %pregion_for_end.i.3.loopexit, %vector.ph94, %pregion_for_end.i.2 - %220 = trunc i64 %mul3.i.i to i32 - %conv2.i.4 = or i32 %220, 4 - %cmp.i.4 = icmp slt i32 %conv2.i.4, %5 - %mul.i.4 = mul nsw i32 %conv2.i.4, %6 - br i1 %cmp.i.4, label %vector.scevcheck115, label %pregion_for_end.i.4 - -vector.scevcheck115: ; preds = %pregion_for_end.i.3 - %221 = mul i32 %conv2.i.4, %6 - %222 = trunc i64 %9 to i32 - %223 = shl i32 %222, 5 - %224 = add i32 %221, %223 - %225 = icmp sgt i32 %224, 2147483616 - br i1 %225, label %pregion_for_entry.entry.i.us.4.preheader, label %vector.ph116 - -pregion_for_entry.entry.i.us.4.preheader: ; preds = %vector.scevcheck115 - br label %pregion_for_entry.entry.i.us.4 - -vector.ph116: ; preds = %vector.scevcheck115 - %broadcast.splatinsert123 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat124 = shufflevector <8 x i64> %broadcast.splatinsert123, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert125 = insertelement <8 x i32> undef, i32 %6, i32 0 - %broadcast.splat126 = shufflevector <8 x i32> %broadcast.splatinsert125, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert128 = insertelement <8 x float> undef, float %4, i32 0 - %broadcast.splat129 = shufflevector <8 x float> %broadcast.splatinsert128, <8 x float> undef, <8 x i32> zeroinitializer - %226 = trunc <8 x i64> %broadcast.splat124 to <8 x i32> - %227 = or <8 x i32> %226, - %228 = icmp sgt <8 x i32> %broadcast.splat126, %227 - %229 = extractelement <8 x i32> %227, i32 0 - %230 = add nsw i32 %mul.i.4, %229 - %231 = sext i32 %230 to i64 - %232 = getelementptr inbounds float, float* %2, i64 %231 - %233 = bitcast float* %232 to <8 x float>* - %wide.masked.load127 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %233, i32 4, <8 x i1> %228, <8 x float> undef), !tbaa !12 - %234 = fmul <8 x float> %wide.masked.load127, %broadcast.splat129 - %235 = bitcast float* %232 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %234, <8 x float>* %235, i32 4, <8 x i1> %228), !tbaa !12, !llvm.access.group !16 - %236 = trunc <8 x i64> %broadcast.splat124 to <8 x i32> - %237 = or <8 x i32> %236, - %238 = icmp sgt <8 x i32> %broadcast.splat126, %237 - %239 = extractelement <8 x i32> %237, i32 0 - %240 = add nsw i32 %mul.i.4, %239 - %241 = sext i32 %240 to i64 - %242 = getelementptr inbounds float, float* %2, i64 %241 - %243 = bitcast float* %242 to <8 x float>* - %wide.masked.load127.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %243, i32 4, <8 x i1> %238, <8 x float> undef), !tbaa !12 - %244 = fmul <8 x float> %wide.masked.load127.1, %broadcast.splat129 - %245 = bitcast float* %242 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %244, <8 x float>* %245, i32 4, <8 x i1> %238), !tbaa !12, !llvm.access.group !16 - %246 = trunc <8 x i64> %broadcast.splat124 to <8 x i32> - %247 = or <8 x i32> %246, - %248 = icmp sgt <8 x i32> %broadcast.splat126, %247 - %249 = extractelement <8 x i32> %247, i32 0 - %250 = add nsw i32 %mul.i.4, %249 - %251 = sext i32 %250 to i64 - %252 = getelementptr inbounds float, float* %2, i64 %251 - %253 = bitcast float* %252 to <8 x float>* - %wide.masked.load127.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %253, i32 4, <8 x i1> %248, <8 x float> undef), !tbaa !12 - %254 = fmul <8 x float> %wide.masked.load127.2, %broadcast.splat129 - %255 = bitcast float* %252 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %254, <8 x float>* %255, i32 4, <8 x i1> %248), !tbaa !12, !llvm.access.group !16 - %256 = trunc <8 x i64> %broadcast.splat124 to <8 x i32> - %257 = or <8 x i32> %256, - %258 = icmp sgt <8 x i32> %broadcast.splat126, %257 - %259 = extractelement <8 x i32> %257, i32 0 - %260 = add nsw i32 %mul.i.4, %259 - %261 = sext i32 %260 to i64 - %262 = getelementptr inbounds float, float* %2, i64 %261 - %263 = bitcast float* %262 to <8 x float>* - %wide.masked.load127.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %263, i32 4, <8 x i1> %258, <8 x float> undef), !tbaa !12 - %264 = fmul <8 x float> %wide.masked.load127.3, %broadcast.splat129 - %265 = bitcast float* %262 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %264, <8 x float>* %265, i32 4, <8 x i1> %258), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.4 - -pregion_for_entry.entry.i.us.4: ; preds = %if.end.i.us.4.3, %pregion_for_entry.entry.i.us.4.preheader - %_local_id_x.0.us.4 = phi i64 [ %517, %if.end.i.us.4.3 ], [ 0, %pregion_for_entry.entry.i.us.4.preheader ] - %add1.i.i.us.4 = add nuw nsw i64 %_local_id_x.0.us.4, %mul.i.i - %conv.i.us.4 = trunc i64 %add1.i.i.us.4 to i32 - %cmp4.i.us.4 = icmp slt i32 %conv.i.us.4, %6 - br i1 %cmp4.i.us.4, label %if.then.i.us.4, label %if.end.i.us.4 - -if.then.i.us.4: ; preds = %pregion_for_entry.entry.i.us.4 - %add.i.us.4 = add nsw i32 %mul.i.4, %conv.i.us.4 - %idxprom.i.us.4 = sext i32 %add.i.us.4 to i64 - %arrayidx.i.us.4 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.4 - %266 = load float, float* %arrayidx.i.us.4, align 4, !tbaa !12 - %mul6.i.us.4 = fmul float %266, %4 - store float %mul6.i.us.4, float* %arrayidx.i.us.4, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.4 - -if.end.i.us.4: ; preds = %if.then.i.us.4, %pregion_for_entry.entry.i.us.4 - %267 = or i64 %_local_id_x.0.us.4, 1 - %add1.i.i.us.4.1 = add nuw nsw i64 %267, %mul.i.i - %conv.i.us.4.1 = trunc i64 %add1.i.i.us.4.1 to i32 - %cmp4.i.us.4.1 = icmp slt i32 %conv.i.us.4.1, %6 - br i1 %cmp4.i.us.4.1, label %if.then.i.us.4.1, label %if.end.i.us.4.1 - -pregion_for_end.i.4.loopexit: ; preds = %if.end.i.us.4.3 - br label %pregion_for_end.i.4 - -pregion_for_end.i.4: ; preds = %pregion_for_end.i.4.loopexit, %vector.ph116, %pregion_for_end.i.3 - %268 = trunc i64 %mul3.i.i to i32 - %conv2.i.5 = or i32 %268, 5 - %cmp.i.5 = icmp slt i32 %conv2.i.5, %5 - %mul.i.5 = mul nsw i32 %conv2.i.5, %6 - br i1 %cmp.i.5, label %vector.scevcheck137, label %pregion_for_end.i.5 - -vector.scevcheck137: ; preds = %pregion_for_end.i.4 - %269 = mul i32 %conv2.i.5, %6 - %270 = trunc i64 %9 to i32 - %271 = shl i32 %270, 5 - %272 = add i32 %269, %271 - %273 = icmp sgt i32 %272, 2147483616 - br i1 %273, label %pregion_for_entry.entry.i.us.5.preheader, label %vector.ph138 - -pregion_for_entry.entry.i.us.5.preheader: ; preds = %vector.scevcheck137 - br label %pregion_for_entry.entry.i.us.5 - -vector.ph138: ; preds = %vector.scevcheck137 - %broadcast.splatinsert145 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat146 = shufflevector <8 x i64> %broadcast.splatinsert145, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert147 = insertelement <8 x i32> undef, i32 %6, i32 0 - %broadcast.splat148 = shufflevector <8 x i32> %broadcast.splatinsert147, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert150 = insertelement <8 x float> undef, float %4, i32 0 - %broadcast.splat151 = shufflevector <8 x float> %broadcast.splatinsert150, <8 x float> undef, <8 x i32> zeroinitializer - %274 = trunc <8 x i64> %broadcast.splat146 to <8 x i32> - %275 = or <8 x i32> %274, - %276 = icmp sgt <8 x i32> %broadcast.splat148, %275 - %277 = extractelement <8 x i32> %275, i32 0 - %278 = add nsw i32 %mul.i.5, %277 - %279 = sext i32 %278 to i64 - %280 = getelementptr inbounds float, float* %2, i64 %279 - %281 = bitcast float* %280 to <8 x float>* - %wide.masked.load149 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %281, i32 4, <8 x i1> %276, <8 x float> undef), !tbaa !12 - %282 = fmul <8 x float> %wide.masked.load149, %broadcast.splat151 - %283 = bitcast float* %280 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %282, <8 x float>* %283, i32 4, <8 x i1> %276), !tbaa !12, !llvm.access.group !16 - %284 = trunc <8 x i64> %broadcast.splat146 to <8 x i32> - %285 = or <8 x i32> %284, - %286 = icmp sgt <8 x i32> %broadcast.splat148, %285 - %287 = extractelement <8 x i32> %285, i32 0 - %288 = add nsw i32 %mul.i.5, %287 - %289 = sext i32 %288 to i64 - %290 = getelementptr inbounds float, float* %2, i64 %289 - %291 = bitcast float* %290 to <8 x float>* - %wide.masked.load149.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %291, i32 4, <8 x i1> %286, <8 x float> undef), !tbaa !12 - %292 = fmul <8 x float> %wide.masked.load149.1, %broadcast.splat151 - %293 = bitcast float* %290 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %292, <8 x float>* %293, i32 4, <8 x i1> %286), !tbaa !12, !llvm.access.group !16 - %294 = trunc <8 x i64> %broadcast.splat146 to <8 x i32> - %295 = or <8 x i32> %294, - %296 = icmp sgt <8 x i32> %broadcast.splat148, %295 - %297 = extractelement <8 x i32> %295, i32 0 - %298 = add nsw i32 %mul.i.5, %297 - %299 = sext i32 %298 to i64 - %300 = getelementptr inbounds float, float* %2, i64 %299 - %301 = bitcast float* %300 to <8 x float>* - %wide.masked.load149.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %301, i32 4, <8 x i1> %296, <8 x float> undef), !tbaa !12 - %302 = fmul <8 x float> %wide.masked.load149.2, %broadcast.splat151 - %303 = bitcast float* %300 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %302, <8 x float>* %303, i32 4, <8 x i1> %296), !tbaa !12, !llvm.access.group !16 - %304 = trunc <8 x i64> %broadcast.splat146 to <8 x i32> - %305 = or <8 x i32> %304, - %306 = icmp sgt <8 x i32> %broadcast.splat148, %305 - %307 = extractelement <8 x i32> %305, i32 0 - %308 = add nsw i32 %mul.i.5, %307 - %309 = sext i32 %308 to i64 - %310 = getelementptr inbounds float, float* %2, i64 %309 - %311 = bitcast float* %310 to <8 x float>* - %wide.masked.load149.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %311, i32 4, <8 x i1> %306, <8 x float> undef), !tbaa !12 - %312 = fmul <8 x float> %wide.masked.load149.3, %broadcast.splat151 - %313 = bitcast float* %310 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %312, <8 x float>* %313, i32 4, <8 x i1> %306), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.5 - -pregion_for_entry.entry.i.us.5: ; preds = %if.end.i.us.5.3, %pregion_for_entry.entry.i.us.5.preheader - %_local_id_x.0.us.5 = phi i64 [ %511, %if.end.i.us.5.3 ], [ 0, %pregion_for_entry.entry.i.us.5.preheader ] - %add1.i.i.us.5 = add nuw nsw i64 %_local_id_x.0.us.5, %mul.i.i - %conv.i.us.5 = trunc i64 %add1.i.i.us.5 to i32 - %cmp4.i.us.5 = icmp slt i32 %conv.i.us.5, %6 - br i1 %cmp4.i.us.5, label %if.then.i.us.5, label %if.end.i.us.5 - -if.then.i.us.5: ; preds = %pregion_for_entry.entry.i.us.5 - %add.i.us.5 = add nsw i32 %mul.i.5, %conv.i.us.5 - %idxprom.i.us.5 = sext i32 %add.i.us.5 to i64 - %arrayidx.i.us.5 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.5 - %314 = load float, float* %arrayidx.i.us.5, align 4, !tbaa !12 - %mul6.i.us.5 = fmul float %314, %4 - store float %mul6.i.us.5, float* %arrayidx.i.us.5, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.5 - -if.end.i.us.5: ; preds = %if.then.i.us.5, %pregion_for_entry.entry.i.us.5 - %315 = or i64 %_local_id_x.0.us.5, 1 - %add1.i.i.us.5.1 = add nuw nsw i64 %315, %mul.i.i - %conv.i.us.5.1 = trunc i64 %add1.i.i.us.5.1 to i32 - %cmp4.i.us.5.1 = icmp slt i32 %conv.i.us.5.1, %6 - br i1 %cmp4.i.us.5.1, label %if.then.i.us.5.1, label %if.end.i.us.5.1 - -pregion_for_end.i.5.loopexit: ; preds = %if.end.i.us.5.3 - br label %pregion_for_end.i.5 - -pregion_for_end.i.5: ; preds = %pregion_for_end.i.5.loopexit, %vector.ph138, %pregion_for_end.i.4 - %316 = trunc i64 %mul3.i.i to i32 - %conv2.i.6 = or i32 %316, 6 - %cmp.i.6 = icmp slt i32 %conv2.i.6, %5 - %mul.i.6 = mul nsw i32 %conv2.i.6, %6 - br i1 %cmp.i.6, label %vector.scevcheck159, label %pregion_for_end.i.6 - -vector.scevcheck159: ; preds = %pregion_for_end.i.5 - %317 = mul i32 %conv2.i.6, %6 - %318 = trunc i64 %9 to i32 - %319 = shl i32 %318, 5 - %320 = add i32 %317, %319 - %321 = icmp sgt i32 %320, 2147483616 - br i1 %321, label %pregion_for_entry.entry.i.us.6.preheader, label %vector.ph160 - -pregion_for_entry.entry.i.us.6.preheader: ; preds = %vector.scevcheck159 - br label %pregion_for_entry.entry.i.us.6 - -vector.ph160: ; preds = %vector.scevcheck159 - %broadcast.splatinsert167 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat168 = shufflevector <8 x i64> %broadcast.splatinsert167, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert169 = insertelement <8 x i32> undef, i32 %6, i32 0 - %broadcast.splat170 = shufflevector <8 x i32> %broadcast.splatinsert169, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert172 = insertelement <8 x float> undef, float %4, i32 0 - %broadcast.splat173 = shufflevector <8 x float> %broadcast.splatinsert172, <8 x float> undef, <8 x i32> zeroinitializer - %322 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %323 = or <8 x i32> %322, - %324 = icmp sgt <8 x i32> %broadcast.splat170, %323 - %325 = extractelement <8 x i32> %323, i32 0 - %326 = add nsw i32 %mul.i.6, %325 - %327 = sext i32 %326 to i64 - %328 = getelementptr inbounds float, float* %2, i64 %327 - %329 = bitcast float* %328 to <8 x float>* - %wide.masked.load171 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %329, i32 4, <8 x i1> %324, <8 x float> undef), !tbaa !12 - %330 = fmul <8 x float> %wide.masked.load171, %broadcast.splat173 - %331 = bitcast float* %328 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %330, <8 x float>* %331, i32 4, <8 x i1> %324), !tbaa !12, !llvm.access.group !16 - %332 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %333 = or <8 x i32> %332, - %334 = icmp sgt <8 x i32> %broadcast.splat170, %333 - %335 = extractelement <8 x i32> %333, i32 0 - %336 = add nsw i32 %mul.i.6, %335 - %337 = sext i32 %336 to i64 - %338 = getelementptr inbounds float, float* %2, i64 %337 - %339 = bitcast float* %338 to <8 x float>* - %wide.masked.load171.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %339, i32 4, <8 x i1> %334, <8 x float> undef), !tbaa !12 - %340 = fmul <8 x float> %wide.masked.load171.1, %broadcast.splat173 - %341 = bitcast float* %338 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %340, <8 x float>* %341, i32 4, <8 x i1> %334), !tbaa !12, !llvm.access.group !16 - %342 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %343 = or <8 x i32> %342, - %344 = icmp sgt <8 x i32> %broadcast.splat170, %343 - %345 = extractelement <8 x i32> %343, i32 0 - %346 = add nsw i32 %mul.i.6, %345 - %347 = sext i32 %346 to i64 - %348 = getelementptr inbounds float, float* %2, i64 %347 - %349 = bitcast float* %348 to <8 x float>* - %wide.masked.load171.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %349, i32 4, <8 x i1> %344, <8 x float> undef), !tbaa !12 - %350 = fmul <8 x float> %wide.masked.load171.2, %broadcast.splat173 - %351 = bitcast float* %348 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %350, <8 x float>* %351, i32 4, <8 x i1> %344), !tbaa !12, !llvm.access.group !16 - %352 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %353 = or <8 x i32> %352, - %354 = icmp sgt <8 x i32> %broadcast.splat170, %353 - %355 = extractelement <8 x i32> %353, i32 0 - %356 = add nsw i32 %mul.i.6, %355 - %357 = sext i32 %356 to i64 - %358 = getelementptr inbounds float, float* %2, i64 %357 - %359 = bitcast float* %358 to <8 x float>* - %wide.masked.load171.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %359, i32 4, <8 x i1> %354, <8 x float> undef), !tbaa !12 - %360 = fmul <8 x float> %wide.masked.load171.3, %broadcast.splat173 - %361 = bitcast float* %358 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %360, <8 x float>* %361, i32 4, <8 x i1> %354), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.6 - -pregion_for_entry.entry.i.us.6: ; preds = %if.end.i.us.6.3, %pregion_for_entry.entry.i.us.6.preheader - %_local_id_x.0.us.6 = phi i64 [ %505, %if.end.i.us.6.3 ], [ 0, %pregion_for_entry.entry.i.us.6.preheader ] - %add1.i.i.us.6 = add nuw nsw i64 %_local_id_x.0.us.6, %mul.i.i - %conv.i.us.6 = trunc i64 %add1.i.i.us.6 to i32 - %cmp4.i.us.6 = icmp slt i32 %conv.i.us.6, %6 - br i1 %cmp4.i.us.6, label %if.then.i.us.6, label %if.end.i.us.6 - -if.then.i.us.6: ; preds = %pregion_for_entry.entry.i.us.6 - %add.i.us.6 = add nsw i32 %mul.i.6, %conv.i.us.6 - %idxprom.i.us.6 = sext i32 %add.i.us.6 to i64 - %arrayidx.i.us.6 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.6 - %362 = load float, float* %arrayidx.i.us.6, align 4, !tbaa !12 - %mul6.i.us.6 = fmul float %362, %4 - store float %mul6.i.us.6, float* %arrayidx.i.us.6, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.6 - -if.end.i.us.6: ; preds = %if.then.i.us.6, %pregion_for_entry.entry.i.us.6 - %363 = or i64 %_local_id_x.0.us.6, 1 - %add1.i.i.us.6.1 = add nuw nsw i64 %363, %mul.i.i - %conv.i.us.6.1 = trunc i64 %add1.i.i.us.6.1 to i32 - %cmp4.i.us.6.1 = icmp slt i32 %conv.i.us.6.1, %6 - br i1 %cmp4.i.us.6.1, label %if.then.i.us.6.1, label %if.end.i.us.6.1 - -pregion_for_end.i.6.loopexit: ; preds = %if.end.i.us.6.3 - br label %pregion_for_end.i.6 - -pregion_for_end.i.6: ; preds = %pregion_for_end.i.6.loopexit, %vector.ph160, %pregion_for_end.i.5 - %364 = trunc i64 %mul3.i.i to i32 - %conv2.i.7 = or i32 %364, 7 - %cmp.i.7 = icmp slt i32 %conv2.i.7, %5 - %mul.i.7 = mul nsw i32 %conv2.i.7, %6 - br i1 %cmp.i.7, label %vector.scevcheck181, label %gemm.exit - -vector.scevcheck181: ; preds = %pregion_for_end.i.6 - %365 = mul i32 %conv2.i.7, %6 - %366 = trunc i64 %9 to i32 - %367 = shl i32 %366, 5 - %368 = add i32 %365, %367 - %369 = icmp sgt i32 %368, 2147483616 - br i1 %369, label %pregion_for_entry.entry.i.us.7.preheader, label %vector.ph182 - -pregion_for_entry.entry.i.us.7.preheader: ; preds = %vector.scevcheck181 - br label %pregion_for_entry.entry.i.us.7 - -vector.ph182: ; preds = %vector.scevcheck181 - %broadcast.splatinsert189 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat190 = shufflevector <8 x i64> %broadcast.splatinsert189, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert191 = insertelement <8 x i32> undef, i32 %6, i32 0 - %broadcast.splat192 = shufflevector <8 x i32> %broadcast.splatinsert191, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert194 = insertelement <8 x float> undef, float %4, i32 0 - %broadcast.splat195 = shufflevector <8 x float> %broadcast.splatinsert194, <8 x float> undef, <8 x i32> zeroinitializer - %370 = trunc <8 x i64> %broadcast.splat190 to <8 x i32> - %371 = or <8 x i32> %370, - %372 = icmp sgt <8 x i32> %broadcast.splat192, %371 - %373 = extractelement <8 x i32> %371, i32 0 - %374 = add nsw i32 %mul.i.7, %373 - %375 = sext i32 %374 to i64 - %376 = getelementptr inbounds float, float* %2, i64 %375 - %377 = bitcast float* %376 to <8 x float>* - %wide.masked.load193 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %377, i32 4, <8 x i1> %372, <8 x float> undef), !tbaa !12 - %378 = fmul <8 x float> %wide.masked.load193, %broadcast.splat195 - %379 = bitcast float* %376 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %378, <8 x float>* %379, i32 4, <8 x i1> %372), !tbaa !12, !llvm.access.group !16 - %380 = trunc <8 x i64> %broadcast.splat190 to <8 x i32> - %381 = or <8 x i32> %380, - %382 = icmp sgt <8 x i32> %broadcast.splat192, %381 - %383 = extractelement <8 x i32> %381, i32 0 - %384 = add nsw i32 %mul.i.7, %383 - %385 = sext i32 %384 to i64 - %386 = getelementptr inbounds float, float* %2, i64 %385 - %387 = bitcast float* %386 to <8 x float>* - %wide.masked.load193.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %387, i32 4, <8 x i1> %382, <8 x float> undef), !tbaa !12 - %388 = fmul <8 x float> %wide.masked.load193.1, %broadcast.splat195 - %389 = bitcast float* %386 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %388, <8 x float>* %389, i32 4, <8 x i1> %382), !tbaa !12, !llvm.access.group !16 - %390 = trunc <8 x i64> %broadcast.splat190 to <8 x i32> - %391 = or <8 x i32> %390, - %392 = icmp sgt <8 x i32> %broadcast.splat192, %391 - %393 = extractelement <8 x i32> %391, i32 0 - %394 = add nsw i32 %mul.i.7, %393 - %395 = sext i32 %394 to i64 - %396 = getelementptr inbounds float, float* %2, i64 %395 - %397 = bitcast float* %396 to <8 x float>* - %wide.masked.load193.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %397, i32 4, <8 x i1> %392, <8 x float> undef), !tbaa !12 - %398 = fmul <8 x float> %wide.masked.load193.2, %broadcast.splat195 - %399 = bitcast float* %396 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %398, <8 x float>* %399, i32 4, <8 x i1> %392), !tbaa !12, !llvm.access.group !16 - %400 = trunc <8 x i64> %broadcast.splat190 to <8 x i32> - %401 = or <8 x i32> %400, - %402 = icmp sgt <8 x i32> %broadcast.splat192, %401 - %403 = extractelement <8 x i32> %401, i32 0 - %404 = add nsw i32 %mul.i.7, %403 - %405 = sext i32 %404 to i64 - %406 = getelementptr inbounds float, float* %2, i64 %405 - %407 = bitcast float* %406 to <8 x float>* - %wide.masked.load193.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %407, i32 4, <8 x i1> %402, <8 x float> undef), !tbaa !12 - %408 = fmul <8 x float> %wide.masked.load193.3, %broadcast.splat195 - %409 = bitcast float* %406 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %408, <8 x float>* %409, i32 4, <8 x i1> %402), !tbaa !12, !llvm.access.group !16 - br label %gemm.exit - -pregion_for_entry.entry.i.us.7: ; preds = %if.end.i.us.7.3, %pregion_for_entry.entry.i.us.7.preheader - %_local_id_x.0.us.7 = phi i64 [ %499, %if.end.i.us.7.3 ], [ 0, %pregion_for_entry.entry.i.us.7.preheader ] - %add1.i.i.us.7 = add nuw nsw i64 %_local_id_x.0.us.7, %mul.i.i - %conv.i.us.7 = trunc i64 %add1.i.i.us.7 to i32 - %cmp4.i.us.7 = icmp slt i32 %conv.i.us.7, %6 - br i1 %cmp4.i.us.7, label %if.then.i.us.7, label %if.end.i.us.7 - -if.then.i.us.7: ; preds = %pregion_for_entry.entry.i.us.7 - %add.i.us.7 = add nsw i32 %mul.i.7, %conv.i.us.7 - %idxprom.i.us.7 = sext i32 %add.i.us.7 to i64 - %arrayidx.i.us.7 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.7 - %410 = load float, float* %arrayidx.i.us.7, align 4, !tbaa !12 - %mul6.i.us.7 = fmul float %410, %4 - store float %mul6.i.us.7, float* %arrayidx.i.us.7, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.7 - -if.end.i.us.7: ; preds = %if.then.i.us.7, %pregion_for_entry.entry.i.us.7 - %411 = or i64 %_local_id_x.0.us.7, 1 - %add1.i.i.us.7.1 = add nuw nsw i64 %411, %mul.i.i - %conv.i.us.7.1 = trunc i64 %add1.i.i.us.7.1 to i32 - %cmp4.i.us.7.1 = icmp slt i32 %conv.i.us.7.1, %6 - br i1 %cmp4.i.us.7.1, label %if.then.i.us.7.1, label %if.end.i.us.7.1 - -pregion_for_entry.entry.i.us.us.1: ; preds = %if.end.i.us.us.1, %pregion_for_entry.entry.i.us.us.1.preheader - %_local_id_x.0.us.us.1 = phi i64 [ %421, %if.end.i.us.us.1 ], [ 0, %pregion_for_entry.entry.i.us.us.1.preheader ] - %add1.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.0.us.us.1, %mul.i.i - %conv.i.us.us.1 = trunc i64 %add1.i.i.us.us.1 to i32 - %cmp4.i.us.us.1 = icmp slt i32 %conv.i.us.us.1, %6 - br i1 %cmp4.i.us.us.1, label %if.then.i.us.us.1, label %if.end.i.us.us.1 - -if.then.i.us.us.1: ; preds = %pregion_for_entry.entry.i.us.us.1 - %add.i.us.us.1 = add nsw i32 %mul.i.us.1, %conv.i.us.us.1 - %idxprom.i.us.us.1 = sext i32 %add.i.us.us.1 to i64 - %arrayidx.i.us.us.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.1 - %412 = load float, float* %arrayidx.i.us.us.1, align 4, !tbaa !12 - %mul6.i.us.us.1 = fmul float %412, %4 - store float %mul6.i.us.us.1, float* %arrayidx.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.1 = shl i64 %add1.i.i.us.us.1, 32 - %413 = ashr exact i64 %sext.i.us.us.1, 32 - br label %for.body.i.us.us.1 - -for.body.i.us.us.1: ; preds = %for.body.i.us.us.1, %if.then.i.us.us.1 - %indvars.iv.next.i3.us.us.1 = phi i64 [ %indvars.iv.next.i.us.us.1, %for.body.i.us.us.1 ], [ 0, %if.then.i.us.us.1 ] - %414 = phi float [ %420, %for.body.i.us.us.1 ], [ %mul6.i.us.us.1, %if.then.i.us.us.1 ] - %415 = add nsw i64 %indvars.iv.next.i3.us.us.1, %63 - %arrayidx12.i.us.us.1 = getelementptr inbounds float, float* %0, i64 %415 - %416 = load float, float* %arrayidx12.i.us.us.1, align 4, !tbaa !12 - %mul13.i.us.us.1 = fmul float %416, %3 - %417 = mul nsw i64 %indvars.iv.next.i3.us.us.1, %13 - %418 = add nsw i64 %417, %413 - %arrayidx17.i.us.us.1 = getelementptr inbounds float, float* %1, i64 %418 - %419 = load float, float* %arrayidx17.i.us.us.1, align 4, !tbaa !12 - %420 = tail call float @llvm.fmuladd.f32(float %mul13.i.us.us.1, float %419, float %414) #2 - store float %420, float* %arrayidx.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.1, 1 - %exitcond.not.i.us.us.1 = icmp eq i64 %indvars.iv.next.i.us.us.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.1, label %if.end.i.us.us.1.loopexit, label %for.body.i.us.us.1, !llvm.loop !21 - -if.end.i.us.us.1.loopexit: ; preds = %for.body.i.us.us.1 - br label %if.end.i.us.us.1 - -if.end.i.us.us.1: ; preds = %if.end.i.us.us.1.loopexit, %pregion_for_entry.entry.i.us.us.1 - %421 = add nuw nsw i64 %_local_id_x.0.us.us.1, 1 - %exitcond.not.1 = icmp eq i64 %421, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.us.1.loopexit, label %pregion_for_entry.entry.i.us.us.1, !llvm.loop !19 - -pregion_for_end.i.us.1.loopexit: ; preds = %if.end.i.us.us.1 - br label %pregion_for_end.i.us.1 - -pregion_for_end.i.us.1: ; preds = %pregion_for_end.i.us.1.loopexit, %pregion_for_end.i.us - %422 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.2 = or i32 %422, 2 - %cmp.i.us.2 = icmp slt i32 %conv2.i.us.2, %5 - %mul.i.us.2 = mul nsw i32 %conv2.i.us.2, %6 - %mul9.i.us.2 = mul nsw i32 %conv2.i.us.2, %7 - %423 = sext i32 %mul9.i.us.2 to i64 - br i1 %cmp.i.us.2, label %pregion_for_entry.entry.i.us.us.2.preheader, label %pregion_for_end.i.us.2 - -pregion_for_entry.entry.i.us.us.2.preheader: ; preds = %pregion_for_end.i.us.1 - br label %pregion_for_entry.entry.i.us.us.2 - -pregion_for_entry.entry.i.us.us.2: ; preds = %if.end.i.us.us.2, %pregion_for_entry.entry.i.us.us.2.preheader - %_local_id_x.0.us.us.2 = phi i64 [ %433, %if.end.i.us.us.2 ], [ 0, %pregion_for_entry.entry.i.us.us.2.preheader ] - %add1.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.0.us.us.2, %mul.i.i - %conv.i.us.us.2 = trunc i64 %add1.i.i.us.us.2 to i32 - %cmp4.i.us.us.2 = icmp slt i32 %conv.i.us.us.2, %6 - br i1 %cmp4.i.us.us.2, label %if.then.i.us.us.2, label %if.end.i.us.us.2 - -if.then.i.us.us.2: ; preds = %pregion_for_entry.entry.i.us.us.2 - %add.i.us.us.2 = add nsw i32 %mul.i.us.2, %conv.i.us.us.2 - %idxprom.i.us.us.2 = sext i32 %add.i.us.us.2 to i64 - %arrayidx.i.us.us.2 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.2 - %424 = load float, float* %arrayidx.i.us.us.2, align 4, !tbaa !12 - %mul6.i.us.us.2 = fmul float %424, %4 - store float %mul6.i.us.us.2, float* %arrayidx.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.2 = shl i64 %add1.i.i.us.us.2, 32 - %425 = ashr exact i64 %sext.i.us.us.2, 32 - br label %for.body.i.us.us.2 - -for.body.i.us.us.2: ; preds = %for.body.i.us.us.2, %if.then.i.us.us.2 - %indvars.iv.next.i3.us.us.2 = phi i64 [ %indvars.iv.next.i.us.us.2, %for.body.i.us.us.2 ], [ 0, %if.then.i.us.us.2 ] - %426 = phi float [ %432, %for.body.i.us.us.2 ], [ %mul6.i.us.us.2, %if.then.i.us.us.2 ] - %427 = add nsw i64 %indvars.iv.next.i3.us.us.2, %423 - %arrayidx12.i.us.us.2 = getelementptr inbounds float, float* %0, i64 %427 - %428 = load float, float* %arrayidx12.i.us.us.2, align 4, !tbaa !12 - %mul13.i.us.us.2 = fmul float %428, %3 - %429 = mul nsw i64 %indvars.iv.next.i3.us.us.2, %13 - %430 = add nsw i64 %429, %425 - %arrayidx17.i.us.us.2 = getelementptr inbounds float, float* %1, i64 %430 - %431 = load float, float* %arrayidx17.i.us.us.2, align 4, !tbaa !12 - %432 = tail call float @llvm.fmuladd.f32(float %mul13.i.us.us.2, float %431, float %426) #2 - store float %432, float* %arrayidx.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.2 = add nuw nsw i64 %indvars.iv.next.i3.us.us.2, 1 - %exitcond.not.i.us.us.2 = icmp eq i64 %indvars.iv.next.i.us.us.2, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.2, label %if.end.i.us.us.2.loopexit, label %for.body.i.us.us.2, !llvm.loop !21 - -if.end.i.us.us.2.loopexit: ; preds = %for.body.i.us.us.2 - br label %if.end.i.us.us.2 - -if.end.i.us.us.2: ; preds = %if.end.i.us.us.2.loopexit, %pregion_for_entry.entry.i.us.us.2 - %433 = add nuw nsw i64 %_local_id_x.0.us.us.2, 1 - %exitcond.not.2 = icmp eq i64 %433, 32 - br i1 %exitcond.not.2, label %pregion_for_end.i.us.2.loopexit, label %pregion_for_entry.entry.i.us.us.2, !llvm.loop !19 - -pregion_for_end.i.us.2.loopexit: ; preds = %if.end.i.us.us.2 - br label %pregion_for_end.i.us.2 - -pregion_for_end.i.us.2: ; preds = %pregion_for_end.i.us.2.loopexit, %pregion_for_end.i.us.1 - %434 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.3 = or i32 %434, 3 - %cmp.i.us.3 = icmp slt i32 %conv2.i.us.3, %5 - %mul.i.us.3 = mul nsw i32 %conv2.i.us.3, %6 - %mul9.i.us.3 = mul nsw i32 %conv2.i.us.3, %7 - %435 = sext i32 %mul9.i.us.3 to i64 - br i1 %cmp.i.us.3, label %pregion_for_entry.entry.i.us.us.3.preheader, label %pregion_for_end.i.us.3 - -pregion_for_entry.entry.i.us.us.3.preheader: ; preds = %pregion_for_end.i.us.2 - br label %pregion_for_entry.entry.i.us.us.3 - -pregion_for_entry.entry.i.us.us.3: ; preds = %if.end.i.us.us.3, %pregion_for_entry.entry.i.us.us.3.preheader - %_local_id_x.0.us.us.3 = phi i64 [ %445, %if.end.i.us.us.3 ], [ 0, %pregion_for_entry.entry.i.us.us.3.preheader ] - %add1.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.0.us.us.3, %mul.i.i - %conv.i.us.us.3 = trunc i64 %add1.i.i.us.us.3 to i32 - %cmp4.i.us.us.3 = icmp slt i32 %conv.i.us.us.3, %6 - br i1 %cmp4.i.us.us.3, label %if.then.i.us.us.3, label %if.end.i.us.us.3 - -if.then.i.us.us.3: ; preds = %pregion_for_entry.entry.i.us.us.3 - %add.i.us.us.3 = add nsw i32 %mul.i.us.3, %conv.i.us.us.3 - %idxprom.i.us.us.3 = sext i32 %add.i.us.us.3 to i64 - %arrayidx.i.us.us.3 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.3 - %436 = load float, float* %arrayidx.i.us.us.3, align 4, !tbaa !12 - %mul6.i.us.us.3 = fmul float %436, %4 - store float %mul6.i.us.us.3, float* %arrayidx.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.3 = shl i64 %add1.i.i.us.us.3, 32 - %437 = ashr exact i64 %sext.i.us.us.3, 32 - br label %for.body.i.us.us.3 - -for.body.i.us.us.3: ; preds = %for.body.i.us.us.3, %if.then.i.us.us.3 - %indvars.iv.next.i3.us.us.3 = phi i64 [ %indvars.iv.next.i.us.us.3, %for.body.i.us.us.3 ], [ 0, %if.then.i.us.us.3 ] - %438 = phi float [ %444, %for.body.i.us.us.3 ], [ %mul6.i.us.us.3, %if.then.i.us.us.3 ] - %439 = add nsw i64 %indvars.iv.next.i3.us.us.3, %435 - %arrayidx12.i.us.us.3 = getelementptr inbounds float, float* %0, i64 %439 - %440 = load float, float* %arrayidx12.i.us.us.3, align 4, !tbaa !12 - %mul13.i.us.us.3 = fmul float %440, %3 - %441 = mul nsw i64 %indvars.iv.next.i3.us.us.3, %13 - %442 = add nsw i64 %441, %437 - %arrayidx17.i.us.us.3 = getelementptr inbounds float, float* %1, i64 %442 - %443 = load float, float* %arrayidx17.i.us.us.3, align 4, !tbaa !12 - %444 = tail call float @llvm.fmuladd.f32(float %mul13.i.us.us.3, float %443, float %438) #2 - store float %444, float* %arrayidx.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.3 = add nuw nsw i64 %indvars.iv.next.i3.us.us.3, 1 - %exitcond.not.i.us.us.3 = icmp eq i64 %indvars.iv.next.i.us.us.3, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.3, label %if.end.i.us.us.3.loopexit, label %for.body.i.us.us.3, !llvm.loop !21 - -if.end.i.us.us.3.loopexit: ; preds = %for.body.i.us.us.3 - br label %if.end.i.us.us.3 - -if.end.i.us.us.3: ; preds = %if.end.i.us.us.3.loopexit, %pregion_for_entry.entry.i.us.us.3 - %445 = add nuw nsw i64 %_local_id_x.0.us.us.3, 1 - %exitcond.not.3 = icmp eq i64 %445, 32 - br i1 %exitcond.not.3, label %pregion_for_end.i.us.3.loopexit, label %pregion_for_entry.entry.i.us.us.3, !llvm.loop !19 - -pregion_for_end.i.us.3.loopexit: ; preds = %if.end.i.us.us.3 - br label %pregion_for_end.i.us.3 - -pregion_for_end.i.us.3: ; preds = %pregion_for_end.i.us.3.loopexit, %pregion_for_end.i.us.2 - %446 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.4 = or i32 %446, 4 - %cmp.i.us.4 = icmp slt i32 %conv2.i.us.4, %5 - %mul.i.us.4 = mul nsw i32 %conv2.i.us.4, %6 - %mul9.i.us.4 = mul nsw i32 %conv2.i.us.4, %7 - %447 = sext i32 %mul9.i.us.4 to i64 - br i1 %cmp.i.us.4, label %pregion_for_entry.entry.i.us.us.4.preheader, label %pregion_for_end.i.us.4 - -pregion_for_entry.entry.i.us.us.4.preheader: ; preds = %pregion_for_end.i.us.3 - br label %pregion_for_entry.entry.i.us.us.4 - -pregion_for_entry.entry.i.us.us.4: ; preds = %if.end.i.us.us.4, %pregion_for_entry.entry.i.us.us.4.preheader - %_local_id_x.0.us.us.4 = phi i64 [ %457, %if.end.i.us.us.4 ], [ 0, %pregion_for_entry.entry.i.us.us.4.preheader ] - %add1.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.0.us.us.4, %mul.i.i - %conv.i.us.us.4 = trunc i64 %add1.i.i.us.us.4 to i32 - %cmp4.i.us.us.4 = icmp slt i32 %conv.i.us.us.4, %6 - br i1 %cmp4.i.us.us.4, label %if.then.i.us.us.4, label %if.end.i.us.us.4 - -if.then.i.us.us.4: ; preds = %pregion_for_entry.entry.i.us.us.4 - %add.i.us.us.4 = add nsw i32 %mul.i.us.4, %conv.i.us.us.4 - %idxprom.i.us.us.4 = sext i32 %add.i.us.us.4 to i64 - %arrayidx.i.us.us.4 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.4 - %448 = load float, float* %arrayidx.i.us.us.4, align 4, !tbaa !12 - %mul6.i.us.us.4 = fmul float %448, %4 - store float %mul6.i.us.us.4, float* %arrayidx.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.4 = shl i64 %add1.i.i.us.us.4, 32 - %449 = ashr exact i64 %sext.i.us.us.4, 32 - br label %for.body.i.us.us.4 - -for.body.i.us.us.4: ; preds = %for.body.i.us.us.4, %if.then.i.us.us.4 - %indvars.iv.next.i3.us.us.4 = phi i64 [ %indvars.iv.next.i.us.us.4, %for.body.i.us.us.4 ], [ 0, %if.then.i.us.us.4 ] - %450 = phi float [ %456, %for.body.i.us.us.4 ], [ %mul6.i.us.us.4, %if.then.i.us.us.4 ] - %451 = add nsw i64 %indvars.iv.next.i3.us.us.4, %447 - %arrayidx12.i.us.us.4 = getelementptr inbounds float, float* %0, i64 %451 - %452 = load float, float* %arrayidx12.i.us.us.4, align 4, !tbaa !12 - %mul13.i.us.us.4 = fmul float %452, %3 - %453 = mul nsw i64 %indvars.iv.next.i3.us.us.4, %13 - %454 = add nsw i64 %453, %449 - %arrayidx17.i.us.us.4 = getelementptr inbounds float, float* %1, i64 %454 - %455 = load float, float* %arrayidx17.i.us.us.4, align 4, !tbaa !12 - %456 = tail call float @llvm.fmuladd.f32(float %mul13.i.us.us.4, float %455, float %450) #2 - store float %456, float* %arrayidx.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.4 = add nuw nsw i64 %indvars.iv.next.i3.us.us.4, 1 - %exitcond.not.i.us.us.4 = icmp eq i64 %indvars.iv.next.i.us.us.4, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.4, label %if.end.i.us.us.4.loopexit, label %for.body.i.us.us.4, !llvm.loop !21 - -if.end.i.us.us.4.loopexit: ; preds = %for.body.i.us.us.4 - br label %if.end.i.us.us.4 - -if.end.i.us.us.4: ; preds = %if.end.i.us.us.4.loopexit, %pregion_for_entry.entry.i.us.us.4 - %457 = add nuw nsw i64 %_local_id_x.0.us.us.4, 1 - %exitcond.not.4 = icmp eq i64 %457, 32 - br i1 %exitcond.not.4, label %pregion_for_end.i.us.4.loopexit, label %pregion_for_entry.entry.i.us.us.4, !llvm.loop !19 - -pregion_for_end.i.us.4.loopexit: ; preds = %if.end.i.us.us.4 - br label %pregion_for_end.i.us.4 - -pregion_for_end.i.us.4: ; preds = %pregion_for_end.i.us.4.loopexit, %pregion_for_end.i.us.3 - %458 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.5 = or i32 %458, 5 - %cmp.i.us.5 = icmp slt i32 %conv2.i.us.5, %5 - %mul.i.us.5 = mul nsw i32 %conv2.i.us.5, %6 - %mul9.i.us.5 = mul nsw i32 %conv2.i.us.5, %7 - %459 = sext i32 %mul9.i.us.5 to i64 - br i1 %cmp.i.us.5, label %pregion_for_entry.entry.i.us.us.5.preheader, label %pregion_for_end.i.us.5 - -pregion_for_entry.entry.i.us.us.5.preheader: ; preds = %pregion_for_end.i.us.4 - br label %pregion_for_entry.entry.i.us.us.5 - -pregion_for_entry.entry.i.us.us.5: ; preds = %if.end.i.us.us.5, %pregion_for_entry.entry.i.us.us.5.preheader - %_local_id_x.0.us.us.5 = phi i64 [ %469, %if.end.i.us.us.5 ], [ 0, %pregion_for_entry.entry.i.us.us.5.preheader ] - %add1.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.0.us.us.5, %mul.i.i - %conv.i.us.us.5 = trunc i64 %add1.i.i.us.us.5 to i32 - %cmp4.i.us.us.5 = icmp slt i32 %conv.i.us.us.5, %6 - br i1 %cmp4.i.us.us.5, label %if.then.i.us.us.5, label %if.end.i.us.us.5 - -if.then.i.us.us.5: ; preds = %pregion_for_entry.entry.i.us.us.5 - %add.i.us.us.5 = add nsw i32 %mul.i.us.5, %conv.i.us.us.5 - %idxprom.i.us.us.5 = sext i32 %add.i.us.us.5 to i64 - %arrayidx.i.us.us.5 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.5 - %460 = load float, float* %arrayidx.i.us.us.5, align 4, !tbaa !12 - %mul6.i.us.us.5 = fmul float %460, %4 - store float %mul6.i.us.us.5, float* %arrayidx.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.5 = shl i64 %add1.i.i.us.us.5, 32 - %461 = ashr exact i64 %sext.i.us.us.5, 32 - br label %for.body.i.us.us.5 - -for.body.i.us.us.5: ; preds = %for.body.i.us.us.5, %if.then.i.us.us.5 - %indvars.iv.next.i3.us.us.5 = phi i64 [ %indvars.iv.next.i.us.us.5, %for.body.i.us.us.5 ], [ 0, %if.then.i.us.us.5 ] - %462 = phi float [ %468, %for.body.i.us.us.5 ], [ %mul6.i.us.us.5, %if.then.i.us.us.5 ] - %463 = add nsw i64 %indvars.iv.next.i3.us.us.5, %459 - %arrayidx12.i.us.us.5 = getelementptr inbounds float, float* %0, i64 %463 - %464 = load float, float* %arrayidx12.i.us.us.5, align 4, !tbaa !12 - %mul13.i.us.us.5 = fmul float %464, %3 - %465 = mul nsw i64 %indvars.iv.next.i3.us.us.5, %13 - %466 = add nsw i64 %465, %461 - %arrayidx17.i.us.us.5 = getelementptr inbounds float, float* %1, i64 %466 - %467 = load float, float* %arrayidx17.i.us.us.5, align 4, !tbaa !12 - %468 = tail call float @llvm.fmuladd.f32(float %mul13.i.us.us.5, float %467, float %462) #2 - store float %468, float* %arrayidx.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.5 = add nuw nsw i64 %indvars.iv.next.i3.us.us.5, 1 - %exitcond.not.i.us.us.5 = icmp eq i64 %indvars.iv.next.i.us.us.5, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.5, label %if.end.i.us.us.5.loopexit, label %for.body.i.us.us.5, !llvm.loop !21 - -if.end.i.us.us.5.loopexit: ; preds = %for.body.i.us.us.5 - br label %if.end.i.us.us.5 - -if.end.i.us.us.5: ; preds = %if.end.i.us.us.5.loopexit, %pregion_for_entry.entry.i.us.us.5 - %469 = add nuw nsw i64 %_local_id_x.0.us.us.5, 1 - %exitcond.not.5 = icmp eq i64 %469, 32 - br i1 %exitcond.not.5, label %pregion_for_end.i.us.5.loopexit, label %pregion_for_entry.entry.i.us.us.5, !llvm.loop !19 - -pregion_for_end.i.us.5.loopexit: ; preds = %if.end.i.us.us.5 - br label %pregion_for_end.i.us.5 - -pregion_for_end.i.us.5: ; preds = %pregion_for_end.i.us.5.loopexit, %pregion_for_end.i.us.4 - %470 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.6 = or i32 %470, 6 - %cmp.i.us.6 = icmp slt i32 %conv2.i.us.6, %5 - %mul.i.us.6 = mul nsw i32 %conv2.i.us.6, %6 - %mul9.i.us.6 = mul nsw i32 %conv2.i.us.6, %7 - %471 = sext i32 %mul9.i.us.6 to i64 - br i1 %cmp.i.us.6, label %pregion_for_entry.entry.i.us.us.6.preheader, label %pregion_for_end.i.us.6 - -pregion_for_entry.entry.i.us.us.6.preheader: ; preds = %pregion_for_end.i.us.5 - br label %pregion_for_entry.entry.i.us.us.6 - -pregion_for_entry.entry.i.us.us.6: ; preds = %if.end.i.us.us.6, %pregion_for_entry.entry.i.us.us.6.preheader - %_local_id_x.0.us.us.6 = phi i64 [ %481, %if.end.i.us.us.6 ], [ 0, %pregion_for_entry.entry.i.us.us.6.preheader ] - %add1.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.0.us.us.6, %mul.i.i - %conv.i.us.us.6 = trunc i64 %add1.i.i.us.us.6 to i32 - %cmp4.i.us.us.6 = icmp slt i32 %conv.i.us.us.6, %6 - br i1 %cmp4.i.us.us.6, label %if.then.i.us.us.6, label %if.end.i.us.us.6 - -if.then.i.us.us.6: ; preds = %pregion_for_entry.entry.i.us.us.6 - %add.i.us.us.6 = add nsw i32 %mul.i.us.6, %conv.i.us.us.6 - %idxprom.i.us.us.6 = sext i32 %add.i.us.us.6 to i64 - %arrayidx.i.us.us.6 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.6 - %472 = load float, float* %arrayidx.i.us.us.6, align 4, !tbaa !12 - %mul6.i.us.us.6 = fmul float %472, %4 - store float %mul6.i.us.us.6, float* %arrayidx.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.6 = shl i64 %add1.i.i.us.us.6, 32 - %473 = ashr exact i64 %sext.i.us.us.6, 32 - br label %for.body.i.us.us.6 - -for.body.i.us.us.6: ; preds = %for.body.i.us.us.6, %if.then.i.us.us.6 - %indvars.iv.next.i3.us.us.6 = phi i64 [ %indvars.iv.next.i.us.us.6, %for.body.i.us.us.6 ], [ 0, %if.then.i.us.us.6 ] - %474 = phi float [ %480, %for.body.i.us.us.6 ], [ %mul6.i.us.us.6, %if.then.i.us.us.6 ] - %475 = add nsw i64 %indvars.iv.next.i3.us.us.6, %471 - %arrayidx12.i.us.us.6 = getelementptr inbounds float, float* %0, i64 %475 - %476 = load float, float* %arrayidx12.i.us.us.6, align 4, !tbaa !12 - %mul13.i.us.us.6 = fmul float %476, %3 - %477 = mul nsw i64 %indvars.iv.next.i3.us.us.6, %13 - %478 = add nsw i64 %477, %473 - %arrayidx17.i.us.us.6 = getelementptr inbounds float, float* %1, i64 %478 - %479 = load float, float* %arrayidx17.i.us.us.6, align 4, !tbaa !12 - %480 = tail call float @llvm.fmuladd.f32(float %mul13.i.us.us.6, float %479, float %474) #2 - store float %480, float* %arrayidx.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.6 = add nuw nsw i64 %indvars.iv.next.i3.us.us.6, 1 - %exitcond.not.i.us.us.6 = icmp eq i64 %indvars.iv.next.i.us.us.6, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.6, label %if.end.i.us.us.6.loopexit, label %for.body.i.us.us.6, !llvm.loop !21 - -if.end.i.us.us.6.loopexit: ; preds = %for.body.i.us.us.6 - br label %if.end.i.us.us.6 - -if.end.i.us.us.6: ; preds = %if.end.i.us.us.6.loopexit, %pregion_for_entry.entry.i.us.us.6 - %481 = add nuw nsw i64 %_local_id_x.0.us.us.6, 1 - %exitcond.not.6 = icmp eq i64 %481, 32 - br i1 %exitcond.not.6, label %pregion_for_end.i.us.6.loopexit, label %pregion_for_entry.entry.i.us.us.6, !llvm.loop !19 - -pregion_for_end.i.us.6.loopexit: ; preds = %if.end.i.us.us.6 - br label %pregion_for_end.i.us.6 - -pregion_for_end.i.us.6: ; preds = %pregion_for_end.i.us.6.loopexit, %pregion_for_end.i.us.5 - %482 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.7 = or i32 %482, 7 - %cmp.i.us.7 = icmp slt i32 %conv2.i.us.7, %5 - %mul.i.us.7 = mul nsw i32 %conv2.i.us.7, %6 - %mul9.i.us.7 = mul nsw i32 %conv2.i.us.7, %7 - %483 = sext i32 %mul9.i.us.7 to i64 - br i1 %cmp.i.us.7, label %pregion_for_entry.entry.i.us.us.7.preheader, label %gemm.exit - -pregion_for_entry.entry.i.us.us.7.preheader: ; preds = %pregion_for_end.i.us.6 - br label %pregion_for_entry.entry.i.us.us.7 - -pregion_for_entry.entry.i.us.us.7: ; preds = %if.end.i.us.us.7, %pregion_for_entry.entry.i.us.us.7.preheader - %_local_id_x.0.us.us.7 = phi i64 [ %493, %if.end.i.us.us.7 ], [ 0, %pregion_for_entry.entry.i.us.us.7.preheader ] - %add1.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.0.us.us.7, %mul.i.i - %conv.i.us.us.7 = trunc i64 %add1.i.i.us.us.7 to i32 - %cmp4.i.us.us.7 = icmp slt i32 %conv.i.us.us.7, %6 - br i1 %cmp4.i.us.us.7, label %if.then.i.us.us.7, label %if.end.i.us.us.7 - -if.then.i.us.us.7: ; preds = %pregion_for_entry.entry.i.us.us.7 - %add.i.us.us.7 = add nsw i32 %mul.i.us.7, %conv.i.us.us.7 - %idxprom.i.us.us.7 = sext i32 %add.i.us.us.7 to i64 - %arrayidx.i.us.us.7 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.us.7 - %484 = load float, float* %arrayidx.i.us.us.7, align 4, !tbaa !12 - %mul6.i.us.us.7 = fmul float %484, %4 - store float %mul6.i.us.us.7, float* %arrayidx.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.us.us.7 = shl i64 %add1.i.i.us.us.7, 32 - %485 = ashr exact i64 %sext.i.us.us.7, 32 - br label %for.body.i.us.us.7 - -for.body.i.us.us.7: ; preds = %for.body.i.us.us.7, %if.then.i.us.us.7 - %indvars.iv.next.i3.us.us.7 = phi i64 [ %indvars.iv.next.i.us.us.7, %for.body.i.us.us.7 ], [ 0, %if.then.i.us.us.7 ] - %486 = phi float [ %492, %for.body.i.us.us.7 ], [ %mul6.i.us.us.7, %if.then.i.us.us.7 ] - %487 = add nsw i64 %indvars.iv.next.i3.us.us.7, %483 - %arrayidx12.i.us.us.7 = getelementptr inbounds float, float* %0, i64 %487 - %488 = load float, float* %arrayidx12.i.us.us.7, align 4, !tbaa !12 - %mul13.i.us.us.7 = fmul float %488, %3 - %489 = mul nsw i64 %indvars.iv.next.i3.us.us.7, %13 - %490 = add nsw i64 %489, %485 - %arrayidx17.i.us.us.7 = getelementptr inbounds float, float* %1, i64 %490 - %491 = load float, float* %arrayidx17.i.us.us.7, align 4, !tbaa !12 - %492 = tail call float @llvm.fmuladd.f32(float %mul13.i.us.us.7, float %491, float %486) #2 - store float %492, float* %arrayidx.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.7 = add nuw nsw i64 %indvars.iv.next.i3.us.us.7, 1 - %exitcond.not.i.us.us.7 = icmp eq i64 %indvars.iv.next.i.us.us.7, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.7, label %if.end.i.us.us.7.loopexit, label %for.body.i.us.us.7, !llvm.loop !21 - -if.end.i.us.us.7.loopexit: ; preds = %for.body.i.us.us.7 - br label %if.end.i.us.us.7 - -if.end.i.us.us.7: ; preds = %if.end.i.us.us.7.loopexit, %pregion_for_entry.entry.i.us.us.7 - %493 = add nuw nsw i64 %_local_id_x.0.us.us.7, 1 - %exitcond.not.7 = icmp eq i64 %493, 32 - br i1 %exitcond.not.7, label %gemm.exit.loopexit, label %pregion_for_entry.entry.i.us.us.7, !llvm.loop !19 - -if.then.i.us.7.1: ; preds = %if.end.i.us.7 - %add.i.us.7.1 = add nsw i32 %mul.i.7, %conv.i.us.7.1 - %idxprom.i.us.7.1 = sext i32 %add.i.us.7.1 to i64 - %arrayidx.i.us.7.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.7.1 - %494 = load float, float* %arrayidx.i.us.7.1, align 4, !tbaa !12 - %mul6.i.us.7.1 = fmul float %494, %4 - store float %mul6.i.us.7.1, float* %arrayidx.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.7.1 - -if.end.i.us.7.1: ; preds = %if.then.i.us.7.1, %if.end.i.us.7 - %495 = or i64 %_local_id_x.0.us.7, 2 - %add1.i.i.us.7.2 = add nuw nsw i64 %495, %mul.i.i - %conv.i.us.7.2 = trunc i64 %add1.i.i.us.7.2 to i32 - %cmp4.i.us.7.2 = icmp slt i32 %conv.i.us.7.2, %6 - br i1 %cmp4.i.us.7.2, label %if.then.i.us.7.2, label %if.end.i.us.7.2 - -if.then.i.us.7.2: ; preds = %if.end.i.us.7.1 - %add.i.us.7.2 = add nsw i32 %mul.i.7, %conv.i.us.7.2 - %idxprom.i.us.7.2 = sext i32 %add.i.us.7.2 to i64 - %arrayidx.i.us.7.2 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.7.2 - %496 = load float, float* %arrayidx.i.us.7.2, align 4, !tbaa !12 - %mul6.i.us.7.2 = fmul float %496, %4 - store float %mul6.i.us.7.2, float* %arrayidx.i.us.7.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.7.2 - -if.end.i.us.7.2: ; preds = %if.then.i.us.7.2, %if.end.i.us.7.1 - %497 = or i64 %_local_id_x.0.us.7, 3 - %add1.i.i.us.7.3 = add nuw nsw i64 %497, %mul.i.i - %conv.i.us.7.3 = trunc i64 %add1.i.i.us.7.3 to i32 - %cmp4.i.us.7.3 = icmp slt i32 %conv.i.us.7.3, %6 - br i1 %cmp4.i.us.7.3, label %if.then.i.us.7.3, label %if.end.i.us.7.3 - -if.then.i.us.7.3: ; preds = %if.end.i.us.7.2 - %add.i.us.7.3 = add nsw i32 %mul.i.7, %conv.i.us.7.3 - %idxprom.i.us.7.3 = sext i32 %add.i.us.7.3 to i64 - %arrayidx.i.us.7.3 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.7.3 - %498 = load float, float* %arrayidx.i.us.7.3, align 4, !tbaa !12 - %mul6.i.us.7.3 = fmul float %498, %4 - store float %mul6.i.us.7.3, float* %arrayidx.i.us.7.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.7.3 - -if.end.i.us.7.3: ; preds = %if.then.i.us.7.3, %if.end.i.us.7.2 - %499 = add nuw nsw i64 %_local_id_x.0.us.7, 4 - %exitcond34.7.not.3 = icmp eq i64 %499, 32 - br i1 %exitcond34.7.not.3, label %gemm.exit.loopexit238, label %pregion_for_entry.entry.i.us.7, !llvm.loop !23 - -if.then.i.us.6.1: ; preds = %if.end.i.us.6 - %add.i.us.6.1 = add nsw i32 %mul.i.6, %conv.i.us.6.1 - %idxprom.i.us.6.1 = sext i32 %add.i.us.6.1 to i64 - %arrayidx.i.us.6.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.6.1 - %500 = load float, float* %arrayidx.i.us.6.1, align 4, !tbaa !12 - %mul6.i.us.6.1 = fmul float %500, %4 - store float %mul6.i.us.6.1, float* %arrayidx.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.6.1 - -if.end.i.us.6.1: ; preds = %if.then.i.us.6.1, %if.end.i.us.6 - %501 = or i64 %_local_id_x.0.us.6, 2 - %add1.i.i.us.6.2 = add nuw nsw i64 %501, %mul.i.i - %conv.i.us.6.2 = trunc i64 %add1.i.i.us.6.2 to i32 - %cmp4.i.us.6.2 = icmp slt i32 %conv.i.us.6.2, %6 - br i1 %cmp4.i.us.6.2, label %if.then.i.us.6.2, label %if.end.i.us.6.2 - -if.then.i.us.6.2: ; preds = %if.end.i.us.6.1 - %add.i.us.6.2 = add nsw i32 %mul.i.6, %conv.i.us.6.2 - %idxprom.i.us.6.2 = sext i32 %add.i.us.6.2 to i64 - %arrayidx.i.us.6.2 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.6.2 - %502 = load float, float* %arrayidx.i.us.6.2, align 4, !tbaa !12 - %mul6.i.us.6.2 = fmul float %502, %4 - store float %mul6.i.us.6.2, float* %arrayidx.i.us.6.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.6.2 - -if.end.i.us.6.2: ; preds = %if.then.i.us.6.2, %if.end.i.us.6.1 - %503 = or i64 %_local_id_x.0.us.6, 3 - %add1.i.i.us.6.3 = add nuw nsw i64 %503, %mul.i.i - %conv.i.us.6.3 = trunc i64 %add1.i.i.us.6.3 to i32 - %cmp4.i.us.6.3 = icmp slt i32 %conv.i.us.6.3, %6 - br i1 %cmp4.i.us.6.3, label %if.then.i.us.6.3, label %if.end.i.us.6.3 - -if.then.i.us.6.3: ; preds = %if.end.i.us.6.2 - %add.i.us.6.3 = add nsw i32 %mul.i.6, %conv.i.us.6.3 - %idxprom.i.us.6.3 = sext i32 %add.i.us.6.3 to i64 - %arrayidx.i.us.6.3 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.6.3 - %504 = load float, float* %arrayidx.i.us.6.3, align 4, !tbaa !12 - %mul6.i.us.6.3 = fmul float %504, %4 - store float %mul6.i.us.6.3, float* %arrayidx.i.us.6.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.6.3 - -if.end.i.us.6.3: ; preds = %if.then.i.us.6.3, %if.end.i.us.6.2 - %505 = add nuw nsw i64 %_local_id_x.0.us.6, 4 - %exitcond34.6.not.3 = icmp eq i64 %505, 32 - br i1 %exitcond34.6.not.3, label %pregion_for_end.i.6.loopexit, label %pregion_for_entry.entry.i.us.6, !llvm.loop !25 - -if.then.i.us.5.1: ; preds = %if.end.i.us.5 - %add.i.us.5.1 = add nsw i32 %mul.i.5, %conv.i.us.5.1 - %idxprom.i.us.5.1 = sext i32 %add.i.us.5.1 to i64 - %arrayidx.i.us.5.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.5.1 - %506 = load float, float* %arrayidx.i.us.5.1, align 4, !tbaa !12 - %mul6.i.us.5.1 = fmul float %506, %4 - store float %mul6.i.us.5.1, float* %arrayidx.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.5.1 - -if.end.i.us.5.1: ; preds = %if.then.i.us.5.1, %if.end.i.us.5 - %507 = or i64 %_local_id_x.0.us.5, 2 - %add1.i.i.us.5.2 = add nuw nsw i64 %507, %mul.i.i - %conv.i.us.5.2 = trunc i64 %add1.i.i.us.5.2 to i32 - %cmp4.i.us.5.2 = icmp slt i32 %conv.i.us.5.2, %6 - br i1 %cmp4.i.us.5.2, label %if.then.i.us.5.2, label %if.end.i.us.5.2 - -if.then.i.us.5.2: ; preds = %if.end.i.us.5.1 - %add.i.us.5.2 = add nsw i32 %mul.i.5, %conv.i.us.5.2 - %idxprom.i.us.5.2 = sext i32 %add.i.us.5.2 to i64 - %arrayidx.i.us.5.2 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.5.2 - %508 = load float, float* %arrayidx.i.us.5.2, align 4, !tbaa !12 - %mul6.i.us.5.2 = fmul float %508, %4 - store float %mul6.i.us.5.2, float* %arrayidx.i.us.5.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.5.2 - -if.end.i.us.5.2: ; preds = %if.then.i.us.5.2, %if.end.i.us.5.1 - %509 = or i64 %_local_id_x.0.us.5, 3 - %add1.i.i.us.5.3 = add nuw nsw i64 %509, %mul.i.i - %conv.i.us.5.3 = trunc i64 %add1.i.i.us.5.3 to i32 - %cmp4.i.us.5.3 = icmp slt i32 %conv.i.us.5.3, %6 - br i1 %cmp4.i.us.5.3, label %if.then.i.us.5.3, label %if.end.i.us.5.3 - -if.then.i.us.5.3: ; preds = %if.end.i.us.5.2 - %add.i.us.5.3 = add nsw i32 %mul.i.5, %conv.i.us.5.3 - %idxprom.i.us.5.3 = sext i32 %add.i.us.5.3 to i64 - %arrayidx.i.us.5.3 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.5.3 - %510 = load float, float* %arrayidx.i.us.5.3, align 4, !tbaa !12 - %mul6.i.us.5.3 = fmul float %510, %4 - store float %mul6.i.us.5.3, float* %arrayidx.i.us.5.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.5.3 - -if.end.i.us.5.3: ; preds = %if.then.i.us.5.3, %if.end.i.us.5.2 - %511 = add nuw nsw i64 %_local_id_x.0.us.5, 4 - %exitcond34.5.not.3 = icmp eq i64 %511, 32 - br i1 %exitcond34.5.not.3, label %pregion_for_end.i.5.loopexit, label %pregion_for_entry.entry.i.us.5, !llvm.loop !26 - -if.then.i.us.4.1: ; preds = %if.end.i.us.4 - %add.i.us.4.1 = add nsw i32 %mul.i.4, %conv.i.us.4.1 - %idxprom.i.us.4.1 = sext i32 %add.i.us.4.1 to i64 - %arrayidx.i.us.4.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.4.1 - %512 = load float, float* %arrayidx.i.us.4.1, align 4, !tbaa !12 - %mul6.i.us.4.1 = fmul float %512, %4 - store float %mul6.i.us.4.1, float* %arrayidx.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.4.1 - -if.end.i.us.4.1: ; preds = %if.then.i.us.4.1, %if.end.i.us.4 - %513 = or i64 %_local_id_x.0.us.4, 2 - %add1.i.i.us.4.2 = add nuw nsw i64 %513, %mul.i.i - %conv.i.us.4.2 = trunc i64 %add1.i.i.us.4.2 to i32 - %cmp4.i.us.4.2 = icmp slt i32 %conv.i.us.4.2, %6 - br i1 %cmp4.i.us.4.2, label %if.then.i.us.4.2, label %if.end.i.us.4.2 - -if.then.i.us.4.2: ; preds = %if.end.i.us.4.1 - %add.i.us.4.2 = add nsw i32 %mul.i.4, %conv.i.us.4.2 - %idxprom.i.us.4.2 = sext i32 %add.i.us.4.2 to i64 - %arrayidx.i.us.4.2 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.4.2 - %514 = load float, float* %arrayidx.i.us.4.2, align 4, !tbaa !12 - %mul6.i.us.4.2 = fmul float %514, %4 - store float %mul6.i.us.4.2, float* %arrayidx.i.us.4.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.4.2 - -if.end.i.us.4.2: ; preds = %if.then.i.us.4.2, %if.end.i.us.4.1 - %515 = or i64 %_local_id_x.0.us.4, 3 - %add1.i.i.us.4.3 = add nuw nsw i64 %515, %mul.i.i - %conv.i.us.4.3 = trunc i64 %add1.i.i.us.4.3 to i32 - %cmp4.i.us.4.3 = icmp slt i32 %conv.i.us.4.3, %6 - br i1 %cmp4.i.us.4.3, label %if.then.i.us.4.3, label %if.end.i.us.4.3 - -if.then.i.us.4.3: ; preds = %if.end.i.us.4.2 - %add.i.us.4.3 = add nsw i32 %mul.i.4, %conv.i.us.4.3 - %idxprom.i.us.4.3 = sext i32 %add.i.us.4.3 to i64 - %arrayidx.i.us.4.3 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.4.3 - %516 = load float, float* %arrayidx.i.us.4.3, align 4, !tbaa !12 - %mul6.i.us.4.3 = fmul float %516, %4 - store float %mul6.i.us.4.3, float* %arrayidx.i.us.4.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.4.3 - -if.end.i.us.4.3: ; preds = %if.then.i.us.4.3, %if.end.i.us.4.2 - %517 = add nuw nsw i64 %_local_id_x.0.us.4, 4 - %exitcond34.4.not.3 = icmp eq i64 %517, 32 - br i1 %exitcond34.4.not.3, label %pregion_for_end.i.4.loopexit, label %pregion_for_entry.entry.i.us.4, !llvm.loop !27 - -if.then.i.us.3.1: ; preds = %if.end.i.us.3 - %add.i.us.3.1 = add nsw i32 %mul.i.3, %conv.i.us.3.1 - %idxprom.i.us.3.1 = sext i32 %add.i.us.3.1 to i64 - %arrayidx.i.us.3.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.3.1 - %518 = load float, float* %arrayidx.i.us.3.1, align 4, !tbaa !12 - %mul6.i.us.3.1 = fmul float %518, %4 - store float %mul6.i.us.3.1, float* %arrayidx.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.3.1 - -if.end.i.us.3.1: ; preds = %if.then.i.us.3.1, %if.end.i.us.3 - %519 = or i64 %_local_id_x.0.us.3, 2 - %add1.i.i.us.3.2 = add nuw nsw i64 %519, %mul.i.i - %conv.i.us.3.2 = trunc i64 %add1.i.i.us.3.2 to i32 - %cmp4.i.us.3.2 = icmp slt i32 %conv.i.us.3.2, %6 - br i1 %cmp4.i.us.3.2, label %if.then.i.us.3.2, label %if.end.i.us.3.2 - -if.then.i.us.3.2: ; preds = %if.end.i.us.3.1 - %add.i.us.3.2 = add nsw i32 %mul.i.3, %conv.i.us.3.2 - %idxprom.i.us.3.2 = sext i32 %add.i.us.3.2 to i64 - %arrayidx.i.us.3.2 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.3.2 - %520 = load float, float* %arrayidx.i.us.3.2, align 4, !tbaa !12 - %mul6.i.us.3.2 = fmul float %520, %4 - store float %mul6.i.us.3.2, float* %arrayidx.i.us.3.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.3.2 - -if.end.i.us.3.2: ; preds = %if.then.i.us.3.2, %if.end.i.us.3.1 - %521 = or i64 %_local_id_x.0.us.3, 3 - %add1.i.i.us.3.3 = add nuw nsw i64 %521, %mul.i.i - %conv.i.us.3.3 = trunc i64 %add1.i.i.us.3.3 to i32 - %cmp4.i.us.3.3 = icmp slt i32 %conv.i.us.3.3, %6 - br i1 %cmp4.i.us.3.3, label %if.then.i.us.3.3, label %if.end.i.us.3.3 - -if.then.i.us.3.3: ; preds = %if.end.i.us.3.2 - %add.i.us.3.3 = add nsw i32 %mul.i.3, %conv.i.us.3.3 - %idxprom.i.us.3.3 = sext i32 %add.i.us.3.3 to i64 - %arrayidx.i.us.3.3 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.3.3 - %522 = load float, float* %arrayidx.i.us.3.3, align 4, !tbaa !12 - %mul6.i.us.3.3 = fmul float %522, %4 - store float %mul6.i.us.3.3, float* %arrayidx.i.us.3.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.3.3 - -if.end.i.us.3.3: ; preds = %if.then.i.us.3.3, %if.end.i.us.3.2 - %523 = add nuw nsw i64 %_local_id_x.0.us.3, 4 - %exitcond34.3.not.3 = icmp eq i64 %523, 32 - br i1 %exitcond34.3.not.3, label %pregion_for_end.i.3.loopexit, label %pregion_for_entry.entry.i.us.3, !llvm.loop !28 - -if.then.i.us.2.1: ; preds = %if.end.i.us.2 - %add.i.us.2.1 = add nsw i32 %mul.i.2, %conv.i.us.2.1 - %idxprom.i.us.2.1 = sext i32 %add.i.us.2.1 to i64 - %arrayidx.i.us.2.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.2.1 - %524 = load float, float* %arrayidx.i.us.2.1, align 4, !tbaa !12 - %mul6.i.us.2.1 = fmul float %524, %4 - store float %mul6.i.us.2.1, float* %arrayidx.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.2.1 - -if.end.i.us.2.1: ; preds = %if.then.i.us.2.1, %if.end.i.us.2 - %525 = or i64 %_local_id_x.0.us.2, 2 - %add1.i.i.us.2.2 = add nuw nsw i64 %525, %mul.i.i - %conv.i.us.2.2 = trunc i64 %add1.i.i.us.2.2 to i32 - %cmp4.i.us.2.2 = icmp slt i32 %conv.i.us.2.2, %6 - br i1 %cmp4.i.us.2.2, label %if.then.i.us.2.2, label %if.end.i.us.2.2 - -if.then.i.us.2.2: ; preds = %if.end.i.us.2.1 - %add.i.us.2.2 = add nsw i32 %mul.i.2, %conv.i.us.2.2 - %idxprom.i.us.2.2 = sext i32 %add.i.us.2.2 to i64 - %arrayidx.i.us.2.2 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.2.2 - %526 = load float, float* %arrayidx.i.us.2.2, align 4, !tbaa !12 - %mul6.i.us.2.2 = fmul float %526, %4 - store float %mul6.i.us.2.2, float* %arrayidx.i.us.2.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.2.2 - -if.end.i.us.2.2: ; preds = %if.then.i.us.2.2, %if.end.i.us.2.1 - %527 = or i64 %_local_id_x.0.us.2, 3 - %add1.i.i.us.2.3 = add nuw nsw i64 %527, %mul.i.i - %conv.i.us.2.3 = trunc i64 %add1.i.i.us.2.3 to i32 - %cmp4.i.us.2.3 = icmp slt i32 %conv.i.us.2.3, %6 - br i1 %cmp4.i.us.2.3, label %if.then.i.us.2.3, label %if.end.i.us.2.3 - -if.then.i.us.2.3: ; preds = %if.end.i.us.2.2 - %add.i.us.2.3 = add nsw i32 %mul.i.2, %conv.i.us.2.3 - %idxprom.i.us.2.3 = sext i32 %add.i.us.2.3 to i64 - %arrayidx.i.us.2.3 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.2.3 - %528 = load float, float* %arrayidx.i.us.2.3, align 4, !tbaa !12 - %mul6.i.us.2.3 = fmul float %528, %4 - store float %mul6.i.us.2.3, float* %arrayidx.i.us.2.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.2.3 - -if.end.i.us.2.3: ; preds = %if.then.i.us.2.3, %if.end.i.us.2.2 - %529 = add nuw nsw i64 %_local_id_x.0.us.2, 4 - %exitcond34.2.not.3 = icmp eq i64 %529, 32 - br i1 %exitcond34.2.not.3, label %pregion_for_end.i.2.loopexit, label %pregion_for_entry.entry.i.us.2, !llvm.loop !29 - -if.then.i.us.1.1: ; preds = %if.end.i.us.1 - %add.i.us.1.1 = add nsw i32 %mul.i.1, %conv.i.us.1.1 - %idxprom.i.us.1.1 = sext i32 %add.i.us.1.1 to i64 - %arrayidx.i.us.1.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.1.1 - %530 = load float, float* %arrayidx.i.us.1.1, align 4, !tbaa !12 - %mul6.i.us.1.1 = fmul float %530, %4 - store float %mul6.i.us.1.1, float* %arrayidx.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.1.1 - -if.end.i.us.1.1: ; preds = %if.then.i.us.1.1, %if.end.i.us.1 - %531 = or i64 %_local_id_x.0.us.1, 2 - %add1.i.i.us.1.2 = add nuw nsw i64 %531, %mul.i.i - %conv.i.us.1.2 = trunc i64 %add1.i.i.us.1.2 to i32 - %cmp4.i.us.1.2 = icmp slt i32 %conv.i.us.1.2, %6 - br i1 %cmp4.i.us.1.2, label %if.then.i.us.1.2, label %if.end.i.us.1.2 - -if.then.i.us.1.2: ; preds = %if.end.i.us.1.1 - %add.i.us.1.2 = add nsw i32 %mul.i.1, %conv.i.us.1.2 - %idxprom.i.us.1.2 = sext i32 %add.i.us.1.2 to i64 - %arrayidx.i.us.1.2 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.1.2 - %532 = load float, float* %arrayidx.i.us.1.2, align 4, !tbaa !12 - %mul6.i.us.1.2 = fmul float %532, %4 - store float %mul6.i.us.1.2, float* %arrayidx.i.us.1.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.1.2 - -if.end.i.us.1.2: ; preds = %if.then.i.us.1.2, %if.end.i.us.1.1 - %533 = or i64 %_local_id_x.0.us.1, 3 - %add1.i.i.us.1.3 = add nuw nsw i64 %533, %mul.i.i - %conv.i.us.1.3 = trunc i64 %add1.i.i.us.1.3 to i32 - %cmp4.i.us.1.3 = icmp slt i32 %conv.i.us.1.3, %6 - br i1 %cmp4.i.us.1.3, label %if.then.i.us.1.3, label %if.end.i.us.1.3 - -if.then.i.us.1.3: ; preds = %if.end.i.us.1.2 - %add.i.us.1.3 = add nsw i32 %mul.i.1, %conv.i.us.1.3 - %idxprom.i.us.1.3 = sext i32 %add.i.us.1.3 to i64 - %arrayidx.i.us.1.3 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.1.3 - %534 = load float, float* %arrayidx.i.us.1.3, align 4, !tbaa !12 - %mul6.i.us.1.3 = fmul float %534, %4 - store float %mul6.i.us.1.3, float* %arrayidx.i.us.1.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.1.3 - -if.end.i.us.1.3: ; preds = %if.then.i.us.1.3, %if.end.i.us.1.2 - %535 = add nuw nsw i64 %_local_id_x.0.us.1, 4 - %exitcond34.1.not.3 = icmp eq i64 %535, 32 - br i1 %exitcond34.1.not.3, label %pregion_for_end.i.1.loopexit, label %pregion_for_entry.entry.i.us.1, !llvm.loop !30 - -if.then.i.us.1214: ; preds = %if.end.i.us - %add.i.us.1210 = add nsw i32 %mul.i.us, %conv.i.us.1207 - %idxprom.i.us.1211 = sext i32 %add.i.us.1210 to i64 - %arrayidx.i.us.1212 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.1211 - %536 = load float, float* %arrayidx.i.us.1212, align 4, !tbaa !12 - %mul6.i.us.1213 = fmul float %536, %4 - store float %mul6.i.us.1213, float* %arrayidx.i.us.1212, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.1215 - -if.end.i.us.1215: ; preds = %if.then.i.us.1214, %if.end.i.us - %537 = or i64 %_local_id_x.0.us, 2 - %add1.i.i.us.2217 = add nuw nsw i64 %537, %mul.i.i - %conv.i.us.2218 = trunc i64 %add1.i.i.us.2217 to i32 - %cmp4.i.us.2219 = icmp slt i32 %conv.i.us.2218, %6 - br i1 %cmp4.i.us.2219, label %if.then.i.us.2225, label %if.end.i.us.2226 - -if.then.i.us.2225: ; preds = %if.end.i.us.1215 - %add.i.us.2221 = add nsw i32 %mul.i.us, %conv.i.us.2218 - %idxprom.i.us.2222 = sext i32 %add.i.us.2221 to i64 - %arrayidx.i.us.2223 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.2222 - %538 = load float, float* %arrayidx.i.us.2223, align 4, !tbaa !12 - %mul6.i.us.2224 = fmul float %538, %4 - store float %mul6.i.us.2224, float* %arrayidx.i.us.2223, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.2226 - -if.end.i.us.2226: ; preds = %if.then.i.us.2225, %if.end.i.us.1215 - %539 = or i64 %_local_id_x.0.us, 3 - %add1.i.i.us.3228 = add nuw nsw i64 %539, %mul.i.i - %conv.i.us.3229 = trunc i64 %add1.i.i.us.3228 to i32 - %cmp4.i.us.3230 = icmp slt i32 %conv.i.us.3229, %6 - br i1 %cmp4.i.us.3230, label %if.then.i.us.3236, label %if.end.i.us.3237 - -if.then.i.us.3236: ; preds = %if.end.i.us.2226 - %add.i.us.3232 = add nsw i32 %mul.i.us, %conv.i.us.3229 - %idxprom.i.us.3233 = sext i32 %add.i.us.3232 to i64 - %arrayidx.i.us.3234 = getelementptr inbounds float, float* %2, i64 %idxprom.i.us.3233 - %540 = load float, float* %arrayidx.i.us.3234, align 4, !tbaa !12 - %mul6.i.us.3235 = fmul float %540, %4 - store float %mul6.i.us.3235, float* %arrayidx.i.us.3234, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.3237 - -if.end.i.us.3237: ; preds = %if.then.i.us.3236, %if.end.i.us.2226 - %541 = add nuw nsw i64 %_local_id_x.0.us, 4 - %exitcond34.not.3 = icmp eq i64 %541, 32 - br i1 %exitcond34.not.3, label %pregion_for_end.i.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !31 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_gemm_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to float*** - %15 = load float**, float*** %14, align 8 - %16 = load float*, float** %15, align 8 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to float** - %19 = load float*, float** %18, align 8 - %20 = load float, float* %19, align 4 - %21 = getelementptr i8*, i8** %0, i64 4 - %22 = bitcast i8** %21 to float** - %23 = load float*, float** %22, align 8 - %24 = load float, float* %23, align 4 - %25 = getelementptr i8*, i8** %0, i64 5 - %26 = bitcast i8** %25 to i32** - %27 = load i32*, i32** %26, align 8 - %28 = load i32, i32* %27, align 4 - %29 = getelementptr i8*, i8** %0, i64 6 - %30 = bitcast i8** %29 to i32** - %31 = load i32*, i32** %30, align 8 - %32 = load i32, i32* %31, align 4 - %33 = getelementptr i8*, i8** %0, i64 7 - %34 = bitcast i8** %33 to i32** - %35 = load i32*, i32** %34, align 8 - %36 = load i32, i32* %35, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %cmp740.i.i = icmp sgt i32 %36, 0 - %37 = sext i32 %32 to i64 - %wide.trip.count.i.i = zext i32 %36 to i64 - %conv2.i.i.us = trunc i64 %mul3.i.i.i to i32 - %cmp.i.i.us = icmp sgt i32 %28, %conv2.i.i.us - %mul.i.i.us = mul nsw i32 %32, %conv2.i.i.us - br i1 %cmp740.i.i, label %pregion_for_entry.pregion_for_init.i.i.us, label %pregion_for_entry.pregion_for_init.i.i.preheader - -pregion_for_entry.pregion_for_init.i.i.preheader: ; preds = %5 - br i1 %cmp.i.i.us, label %vector.scevcheck, label %pregion_for_end.i.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.i.preheader - %38 = trunc i64 %3 to i32 - %39 = mul i32 %32, %38 - %40 = shl i32 %39, 3 - %41 = trunc i64 %2 to i32 - %42 = shl i32 %41, 5 - %43 = add i32 %40, %42 - %44 = icmp sgt i32 %43, 2147483616 - br i1 %44, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.ph - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %vector.scevcheck - br label %pregion_for_entry.entry.i.i.us - -vector.ph: ; preds = %vector.scevcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert38 = insertelement <8 x i32> undef, i32 %32, i32 0 - %broadcast.splat39 = shufflevector <8 x i32> %broadcast.splatinsert38, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert40 = insertelement <8 x float> undef, float %24, i32 0 - %broadcast.splat41 = shufflevector <8 x float> %broadcast.splatinsert40, <8 x float> undef, <8 x i32> zeroinitializer - %45 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %46 = or <8 x i32> %45, - %47 = icmp sgt <8 x i32> %broadcast.splat39, %46 - %48 = extractelement <8 x i32> %46, i32 0 - %49 = add nsw i32 %mul.i.i.us, %48 - %50 = sext i32 %49 to i64 - %51 = getelementptr inbounds float, float* %16, i64 %50 - %52 = bitcast float* %51 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %52, i32 4, <8 x i1> %47, <8 x float> undef), !tbaa !12 - %53 = fmul <8 x float> %broadcast.splat41, %wide.masked.load - %54 = bitcast float* %51 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %53, <8 x float>* %54, i32 4, <8 x i1> %47), !tbaa !12, !llvm.access.group !16 - %55 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %56 = or <8 x i32> %55, - %57 = icmp sgt <8 x i32> %broadcast.splat39, %56 - %58 = extractelement <8 x i32> %56, i32 0 - %59 = add nsw i32 %mul.i.i.us, %58 - %60 = sext i32 %59 to i64 - %61 = getelementptr inbounds float, float* %16, i64 %60 - %62 = bitcast float* %61 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %62, i32 4, <8 x i1> %57, <8 x float> undef), !tbaa !12 - %63 = fmul <8 x float> %broadcast.splat41, %wide.masked.load.1 - %64 = bitcast float* %61 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %63, <8 x float>* %64, i32 4, <8 x i1> %57), !tbaa !12, !llvm.access.group !16 - %65 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %66 = or <8 x i32> %65, - %67 = icmp sgt <8 x i32> %broadcast.splat39, %66 - %68 = extractelement <8 x i32> %66, i32 0 - %69 = add nsw i32 %mul.i.i.us, %68 - %70 = sext i32 %69 to i64 - %71 = getelementptr inbounds float, float* %16, i64 %70 - %72 = bitcast float* %71 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %72, i32 4, <8 x i1> %67, <8 x float> undef), !tbaa !12 - %73 = fmul <8 x float> %broadcast.splat41, %wide.masked.load.2 - %74 = bitcast float* %71 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %73, <8 x float>* %74, i32 4, <8 x i1> %67), !tbaa !12, !llvm.access.group !16 - %75 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %76 = or <8 x i32> %75, - %77 = icmp sgt <8 x i32> %broadcast.splat39, %76 - %78 = extractelement <8 x i32> %76, i32 0 - %79 = add nsw i32 %mul.i.i.us, %78 - %80 = sext i32 %79 to i64 - %81 = getelementptr inbounds float, float* %16, i64 %80 - %82 = bitcast float* %81 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %82, i32 4, <8 x i1> %77, <8 x float> undef), !tbaa !12 - %83 = fmul <8 x float> %broadcast.splat41, %wide.masked.load.3 - %84 = bitcast float* %81 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %83, <8 x float>* %84, i32 4, <8 x i1> %77), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i - -pregion_for_entry.pregion_for_init.i.i.us: ; preds = %5 - %mul9.i.i.us = mul nsw i32 %36, %conv2.i.i.us - %85 = sext i32 %mul9.i.i.us to i64 - br i1 %cmp.i.i.us, label %pregion_for_entry.entry.i.i.us.us.preheader, label %pregion_for_end.i.i.us - -pregion_for_entry.entry.i.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i.us - br label %pregion_for_entry.entry.i.i.us.us - -pregion_for_end.i.i.us.loopexit: ; preds = %if.end.i.i.us.us - br label %pregion_for_end.i.i.us - -pregion_for_end.i.i.us: ; preds = %pregion_for_end.i.i.us.loopexit, %pregion_for_entry.pregion_for_init.i.i.us - %86 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.1 = or i32 %86, 1 - %cmp.i.i.us.1 = icmp sgt i32 %28, %conv2.i.i.us.1 - %mul.i.i.us.1 = mul nsw i32 %32, %conv2.i.i.us.1 - %mul9.i.i.us.1 = mul nsw i32 %36, %conv2.i.i.us.1 - %87 = sext i32 %mul9.i.i.us.1 to i64 - br i1 %cmp.i.i.us.1, label %pregion_for_entry.entry.i.i.us.us.1.preheader, label %pregion_for_end.i.i.us.1 - -pregion_for_entry.entry.i.i.us.us.1.preheader: ; preds = %pregion_for_end.i.i.us - br label %pregion_for_entry.entry.i.i.us.us.1 - -pregion_for_entry.entry.i.i.us.us: ; preds = %if.end.i.i.us.us, %pregion_for_entry.entry.i.i.us.us.preheader - %_local_id_x.i.0.us.us = phi i64 [ %90, %if.end.i.i.us.us ], [ 0, %pregion_for_entry.entry.i.i.us.us.preheader ] - %add1.i.i.i.us.us = add nuw nsw i64 %_local_id_x.i.0.us.us, %mul.i.i.i - %conv.i.i.us.us = trunc i64 %add1.i.i.i.us.us to i32 - %cmp4.i.i.us.us = icmp sgt i32 %32, %conv.i.i.us.us - br i1 %cmp4.i.i.us.us, label %if.then.i.i.us.us, label %if.end.i.i.us.us - -if.then.i.i.us.us: ; preds = %pregion_for_entry.entry.i.i.us.us - %add.i.i.us.us = add nsw i32 %mul.i.i.us, %conv.i.i.us.us - %idxprom.i.i.us.us = sext i32 %add.i.i.us.us to i64 - %arrayidx.i.i.us.us = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us - %88 = load float, float* %arrayidx.i.i.us.us, align 4, !tbaa !12 - %mul6.i.i.us.us = fmul float %24, %88 - store float %mul6.i.i.us.us, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us = shl i64 %add1.i.i.i.us.us, 32 - %89 = ashr exact i64 %sext.i.i.us.us, 32 - br label %for.body.i.i.us.us - -if.end.i.i.us.us.loopexit: ; preds = %for.body.i.i.us.us - br label %if.end.i.i.us.us - -if.end.i.i.us.us: ; preds = %if.end.i.i.us.us.loopexit, %pregion_for_entry.entry.i.i.us.us - %90 = add nuw nsw i64 %_local_id_x.i.0.us.us, 1 - %exitcond.not = icmp eq i64 %90, 32 - br i1 %exitcond.not, label %pregion_for_end.i.i.us.loopexit, label %pregion_for_entry.entry.i.i.us.us, !llvm.loop !19 - -for.body.i.i.us.us: ; preds = %for.body.i.i.us.us, %if.then.i.i.us.us - %indvars.iv.next.i.i3.us.us = phi i64 [ %indvars.iv.next.i.i.us.us, %for.body.i.i.us.us ], [ 0, %if.then.i.i.us.us ] - %91 = phi float [ %97, %for.body.i.i.us.us ], [ %mul6.i.i.us.us, %if.then.i.i.us.us ] - %92 = add nsw i64 %indvars.iv.next.i.i3.us.us, %85 - %arrayidx12.i.i.us.us = getelementptr inbounds float, float* %8, i64 %92 - %93 = load float, float* %arrayidx12.i.i.us.us, align 4, !tbaa !12 - %mul13.i.i.us.us = fmul float %20, %93 - %94 = mul nsw i64 %indvars.iv.next.i.i3.us.us, %37 - %95 = add nsw i64 %94, %89 - %arrayidx17.i.i.us.us = getelementptr inbounds float, float* %12, i64 %95 - %96 = load float, float* %arrayidx17.i.i.us.us, align 4, !tbaa !12 - %97 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us, float %96, float %91) #2 - store float %97, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us = add nuw nsw i64 %indvars.iv.next.i.i3.us.us, 1 - %exitcond.not.i.i.us.us = icmp eq i64 %indvars.iv.next.i.i.us.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us, label %if.end.i.i.us.us.loopexit, label %for.body.i.i.us.us, !llvm.loop !21 - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.i.i.us.3237, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %565, %if.end.i.i.us.3237 ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp4.i.i.us = icmp sgt i32 %32, %conv.i.i.us - br i1 %cmp4.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %add.i.i.us = add nsw i32 %mul.i.i.us, %conv.i.i.us - %idxprom.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us - %98 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %mul6.i.i.us = fmul float %24, %98 - store float %mul6.i.i.us, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us - %99 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1206 = add nuw nsw i64 %99, %mul.i.i.i - %conv.i.i.us.1207 = trunc i64 %add1.i.i.i.us.1206 to i32 - %cmp4.i.i.us.1208 = icmp sgt i32 %32, %conv.i.i.us.1207 - br i1 %cmp4.i.i.us.1208, label %if.then.i.i.us.1214, label %if.end.i.i.us.1215 - -pregion_for_end.i.i.loopexit: ; preds = %if.end.i.i.us.3237 - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i.i.preheader - %100 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.1 = or i32 %100, 1 - %cmp.i.i.1 = icmp sgt i32 %28, %conv2.i.i.1 - %mul.i.i.1 = mul nsw i32 %32, %conv2.i.i.1 - br i1 %cmp.i.i.1, label %vector.scevcheck49, label %pregion_for_end.i.i.1 - -vector.scevcheck49: ; preds = %pregion_for_end.i.i - %101 = mul i32 %32, %conv2.i.i.1 - %102 = trunc i64 %2 to i32 - %103 = shl i32 %102, 5 - %104 = add i32 %101, %103 - %105 = icmp sgt i32 %104, 2147483616 - br i1 %105, label %pregion_for_entry.entry.i.i.us.1.preheader, label %vector.ph50 - -pregion_for_entry.entry.i.i.us.1.preheader: ; preds = %vector.scevcheck49 - br label %pregion_for_entry.entry.i.i.us.1 - -vector.ph50: ; preds = %vector.scevcheck49 - %broadcast.splatinsert57 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat58 = shufflevector <8 x i64> %broadcast.splatinsert57, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert59 = insertelement <8 x i32> undef, i32 %32, i32 0 - %broadcast.splat60 = shufflevector <8 x i32> %broadcast.splatinsert59, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert62 = insertelement <8 x float> undef, float %24, i32 0 - %broadcast.splat63 = shufflevector <8 x float> %broadcast.splatinsert62, <8 x float> undef, <8 x i32> zeroinitializer - %106 = trunc <8 x i64> %broadcast.splat58 to <8 x i32> - %107 = or <8 x i32> %106, - %108 = icmp sgt <8 x i32> %broadcast.splat60, %107 - %109 = extractelement <8 x i32> %107, i32 0 - %110 = add nsw i32 %mul.i.i.1, %109 - %111 = sext i32 %110 to i64 - %112 = getelementptr inbounds float, float* %16, i64 %111 - %113 = bitcast float* %112 to <8 x float>* - %wide.masked.load61 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %113, i32 4, <8 x i1> %108, <8 x float> undef), !tbaa !12 - %114 = fmul <8 x float> %broadcast.splat63, %wide.masked.load61 - %115 = bitcast float* %112 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %114, <8 x float>* %115, i32 4, <8 x i1> %108), !tbaa !12, !llvm.access.group !16 - %116 = trunc <8 x i64> %broadcast.splat58 to <8 x i32> - %117 = or <8 x i32> %116, - %118 = icmp sgt <8 x i32> %broadcast.splat60, %117 - %119 = extractelement <8 x i32> %117, i32 0 - %120 = add nsw i32 %mul.i.i.1, %119 - %121 = sext i32 %120 to i64 - %122 = getelementptr inbounds float, float* %16, i64 %121 - %123 = bitcast float* %122 to <8 x float>* - %wide.masked.load61.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %123, i32 4, <8 x i1> %118, <8 x float> undef), !tbaa !12 - %124 = fmul <8 x float> %broadcast.splat63, %wide.masked.load61.1 - %125 = bitcast float* %122 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %124, <8 x float>* %125, i32 4, <8 x i1> %118), !tbaa !12, !llvm.access.group !16 - %126 = trunc <8 x i64> %broadcast.splat58 to <8 x i32> - %127 = or <8 x i32> %126, - %128 = icmp sgt <8 x i32> %broadcast.splat60, %127 - %129 = extractelement <8 x i32> %127, i32 0 - %130 = add nsw i32 %mul.i.i.1, %129 - %131 = sext i32 %130 to i64 - %132 = getelementptr inbounds float, float* %16, i64 %131 - %133 = bitcast float* %132 to <8 x float>* - %wide.masked.load61.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %133, i32 4, <8 x i1> %128, <8 x float> undef), !tbaa !12 - %134 = fmul <8 x float> %broadcast.splat63, %wide.masked.load61.2 - %135 = bitcast float* %132 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %134, <8 x float>* %135, i32 4, <8 x i1> %128), !tbaa !12, !llvm.access.group !16 - %136 = trunc <8 x i64> %broadcast.splat58 to <8 x i32> - %137 = or <8 x i32> %136, - %138 = icmp sgt <8 x i32> %broadcast.splat60, %137 - %139 = extractelement <8 x i32> %137, i32 0 - %140 = add nsw i32 %mul.i.i.1, %139 - %141 = sext i32 %140 to i64 - %142 = getelementptr inbounds float, float* %16, i64 %141 - %143 = bitcast float* %142 to <8 x float>* - %wide.masked.load61.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %143, i32 4, <8 x i1> %138, <8 x float> undef), !tbaa !12 - %144 = fmul <8 x float> %broadcast.splat63, %wide.masked.load61.3 - %145 = bitcast float* %142 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %144, <8 x float>* %145, i32 4, <8 x i1> %138), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.1 - -_pocl_kernel_gemm.exit.loopexit: ; preds = %if.end.i.i.us.us.7 - br label %_pocl_kernel_gemm.exit - -_pocl_kernel_gemm.exit.loopexit238: ; preds = %if.end.i.i.us.7.3 - br label %_pocl_kernel_gemm.exit - -_pocl_kernel_gemm.exit: ; preds = %pregion_for_end.i.i.us.6, %vector.ph182, %pregion_for_end.i.i.6, %_pocl_kernel_gemm.exit.loopexit238, %_pocl_kernel_gemm.exit.loopexit - ret void - -pregion_for_entry.entry.i.i.us.1: ; preds = %if.end.i.i.us.1.3, %pregion_for_entry.entry.i.i.us.1.preheader - %_local_id_x.i.0.us.1 = phi i64 [ %559, %if.end.i.i.us.1.3 ], [ 0, %pregion_for_entry.entry.i.i.us.1.preheader ] - %add1.i.i.i.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.1, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp4.i.i.us.1 = icmp sgt i32 %32, %conv.i.i.us.1 - br i1 %cmp4.i.i.us.1, label %if.then.i.i.us.1, label %if.end.i.i.us.1 - -if.then.i.i.us.1: ; preds = %pregion_for_entry.entry.i.i.us.1 - %add.i.i.us.1 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1 - %idxprom.i.i.us.1 = sext i32 %add.i.i.us.1 to i64 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.1 - %146 = load float, float* %arrayidx.i.i.us.1, align 4, !tbaa !12 - %mul6.i.i.us.1 = fmul float %24, %146 - store float %mul6.i.i.us.1, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1 - -if.end.i.i.us.1: ; preds = %if.then.i.i.us.1, %pregion_for_entry.entry.i.i.us.1 - %147 = or i64 %_local_id_x.i.0.us.1, 1 - %add1.i.i.i.us.1.1 = add nuw nsw i64 %147, %mul.i.i.i - %conv.i.i.us.1.1 = trunc i64 %add1.i.i.i.us.1.1 to i32 - %cmp4.i.i.us.1.1 = icmp sgt i32 %32, %conv.i.i.us.1.1 - br i1 %cmp4.i.i.us.1.1, label %if.then.i.i.us.1.1, label %if.end.i.i.us.1.1 - -pregion_for_end.i.i.1.loopexit: ; preds = %if.end.i.i.us.1.3 - br label %pregion_for_end.i.i.1 - -pregion_for_end.i.i.1: ; preds = %pregion_for_end.i.i.1.loopexit, %vector.ph50, %pregion_for_end.i.i - %148 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.2 = or i32 %148, 2 - %cmp.i.i.2 = icmp sgt i32 %28, %conv2.i.i.2 - %mul.i.i.2 = mul nsw i32 %32, %conv2.i.i.2 - br i1 %cmp.i.i.2, label %vector.scevcheck71, label %pregion_for_end.i.i.2 - -vector.scevcheck71: ; preds = %pregion_for_end.i.i.1 - %149 = mul i32 %32, %conv2.i.i.2 - %150 = trunc i64 %2 to i32 - %151 = shl i32 %150, 5 - %152 = add i32 %149, %151 - %153 = icmp sgt i32 %152, 2147483616 - br i1 %153, label %pregion_for_entry.entry.i.i.us.2.preheader, label %vector.ph72 - -pregion_for_entry.entry.i.i.us.2.preheader: ; preds = %vector.scevcheck71 - br label %pregion_for_entry.entry.i.i.us.2 - -vector.ph72: ; preds = %vector.scevcheck71 - %broadcast.splatinsert79 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat80 = shufflevector <8 x i64> %broadcast.splatinsert79, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert81 = insertelement <8 x i32> undef, i32 %32, i32 0 - %broadcast.splat82 = shufflevector <8 x i32> %broadcast.splatinsert81, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert84 = insertelement <8 x float> undef, float %24, i32 0 - %broadcast.splat85 = shufflevector <8 x float> %broadcast.splatinsert84, <8 x float> undef, <8 x i32> zeroinitializer - %154 = trunc <8 x i64> %broadcast.splat80 to <8 x i32> - %155 = or <8 x i32> %154, - %156 = icmp sgt <8 x i32> %broadcast.splat82, %155 - %157 = extractelement <8 x i32> %155, i32 0 - %158 = add nsw i32 %mul.i.i.2, %157 - %159 = sext i32 %158 to i64 - %160 = getelementptr inbounds float, float* %16, i64 %159 - %161 = bitcast float* %160 to <8 x float>* - %wide.masked.load83 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %161, i32 4, <8 x i1> %156, <8 x float> undef), !tbaa !12 - %162 = fmul <8 x float> %broadcast.splat85, %wide.masked.load83 - %163 = bitcast float* %160 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %162, <8 x float>* %163, i32 4, <8 x i1> %156), !tbaa !12, !llvm.access.group !16 - %164 = trunc <8 x i64> %broadcast.splat80 to <8 x i32> - %165 = or <8 x i32> %164, - %166 = icmp sgt <8 x i32> %broadcast.splat82, %165 - %167 = extractelement <8 x i32> %165, i32 0 - %168 = add nsw i32 %mul.i.i.2, %167 - %169 = sext i32 %168 to i64 - %170 = getelementptr inbounds float, float* %16, i64 %169 - %171 = bitcast float* %170 to <8 x float>* - %wide.masked.load83.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %171, i32 4, <8 x i1> %166, <8 x float> undef), !tbaa !12 - %172 = fmul <8 x float> %broadcast.splat85, %wide.masked.load83.1 - %173 = bitcast float* %170 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %172, <8 x float>* %173, i32 4, <8 x i1> %166), !tbaa !12, !llvm.access.group !16 - %174 = trunc <8 x i64> %broadcast.splat80 to <8 x i32> - %175 = or <8 x i32> %174, - %176 = icmp sgt <8 x i32> %broadcast.splat82, %175 - %177 = extractelement <8 x i32> %175, i32 0 - %178 = add nsw i32 %mul.i.i.2, %177 - %179 = sext i32 %178 to i64 - %180 = getelementptr inbounds float, float* %16, i64 %179 - %181 = bitcast float* %180 to <8 x float>* - %wide.masked.load83.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %181, i32 4, <8 x i1> %176, <8 x float> undef), !tbaa !12 - %182 = fmul <8 x float> %broadcast.splat85, %wide.masked.load83.2 - %183 = bitcast float* %180 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %182, <8 x float>* %183, i32 4, <8 x i1> %176), !tbaa !12, !llvm.access.group !16 - %184 = trunc <8 x i64> %broadcast.splat80 to <8 x i32> - %185 = or <8 x i32> %184, - %186 = icmp sgt <8 x i32> %broadcast.splat82, %185 - %187 = extractelement <8 x i32> %185, i32 0 - %188 = add nsw i32 %mul.i.i.2, %187 - %189 = sext i32 %188 to i64 - %190 = getelementptr inbounds float, float* %16, i64 %189 - %191 = bitcast float* %190 to <8 x float>* - %wide.masked.load83.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %191, i32 4, <8 x i1> %186, <8 x float> undef), !tbaa !12 - %192 = fmul <8 x float> %broadcast.splat85, %wide.masked.load83.3 - %193 = bitcast float* %190 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %192, <8 x float>* %193, i32 4, <8 x i1> %186), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.2 - -pregion_for_entry.entry.i.i.us.2: ; preds = %if.end.i.i.us.2.3, %pregion_for_entry.entry.i.i.us.2.preheader - %_local_id_x.i.0.us.2 = phi i64 [ %553, %if.end.i.i.us.2.3 ], [ 0, %pregion_for_entry.entry.i.i.us.2.preheader ] - %add1.i.i.i.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.2, %mul.i.i.i - %conv.i.i.us.2 = trunc i64 %add1.i.i.i.us.2 to i32 - %cmp4.i.i.us.2 = icmp sgt i32 %32, %conv.i.i.us.2 - br i1 %cmp4.i.i.us.2, label %if.then.i.i.us.2, label %if.end.i.i.us.2 - -if.then.i.i.us.2: ; preds = %pregion_for_entry.entry.i.i.us.2 - %add.i.i.us.2 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2 - %idxprom.i.i.us.2 = sext i32 %add.i.i.us.2 to i64 - %arrayidx.i.i.us.2 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.2 - %194 = load float, float* %arrayidx.i.i.us.2, align 4, !tbaa !12 - %mul6.i.i.us.2 = fmul float %24, %194 - store float %mul6.i.i.us.2, float* %arrayidx.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2 - -if.end.i.i.us.2: ; preds = %if.then.i.i.us.2, %pregion_for_entry.entry.i.i.us.2 - %195 = or i64 %_local_id_x.i.0.us.2, 1 - %add1.i.i.i.us.2.1 = add nuw nsw i64 %195, %mul.i.i.i - %conv.i.i.us.2.1 = trunc i64 %add1.i.i.i.us.2.1 to i32 - %cmp4.i.i.us.2.1 = icmp sgt i32 %32, %conv.i.i.us.2.1 - br i1 %cmp4.i.i.us.2.1, label %if.then.i.i.us.2.1, label %if.end.i.i.us.2.1 - -pregion_for_end.i.i.2.loopexit: ; preds = %if.end.i.i.us.2.3 - br label %pregion_for_end.i.i.2 - -pregion_for_end.i.i.2: ; preds = %pregion_for_end.i.i.2.loopexit, %vector.ph72, %pregion_for_end.i.i.1 - %196 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.3 = or i32 %196, 3 - %cmp.i.i.3 = icmp sgt i32 %28, %conv2.i.i.3 - %mul.i.i.3 = mul nsw i32 %32, %conv2.i.i.3 - br i1 %cmp.i.i.3, label %vector.scevcheck93, label %pregion_for_end.i.i.3 - -vector.scevcheck93: ; preds = %pregion_for_end.i.i.2 - %197 = mul i32 %32, %conv2.i.i.3 - %198 = trunc i64 %2 to i32 - %199 = shl i32 %198, 5 - %200 = add i32 %197, %199 - %201 = icmp sgt i32 %200, 2147483616 - br i1 %201, label %pregion_for_entry.entry.i.i.us.3.preheader, label %vector.ph94 - -pregion_for_entry.entry.i.i.us.3.preheader: ; preds = %vector.scevcheck93 - br label %pregion_for_entry.entry.i.i.us.3 - -vector.ph94: ; preds = %vector.scevcheck93 - %broadcast.splatinsert101 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat102 = shufflevector <8 x i64> %broadcast.splatinsert101, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert103 = insertelement <8 x i32> undef, i32 %32, i32 0 - %broadcast.splat104 = shufflevector <8 x i32> %broadcast.splatinsert103, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert106 = insertelement <8 x float> undef, float %24, i32 0 - %broadcast.splat107 = shufflevector <8 x float> %broadcast.splatinsert106, <8 x float> undef, <8 x i32> zeroinitializer - %202 = trunc <8 x i64> %broadcast.splat102 to <8 x i32> - %203 = or <8 x i32> %202, - %204 = icmp sgt <8 x i32> %broadcast.splat104, %203 - %205 = extractelement <8 x i32> %203, i32 0 - %206 = add nsw i32 %mul.i.i.3, %205 - %207 = sext i32 %206 to i64 - %208 = getelementptr inbounds float, float* %16, i64 %207 - %209 = bitcast float* %208 to <8 x float>* - %wide.masked.load105 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %209, i32 4, <8 x i1> %204, <8 x float> undef), !tbaa !12 - %210 = fmul <8 x float> %broadcast.splat107, %wide.masked.load105 - %211 = bitcast float* %208 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %210, <8 x float>* %211, i32 4, <8 x i1> %204), !tbaa !12, !llvm.access.group !16 - %212 = trunc <8 x i64> %broadcast.splat102 to <8 x i32> - %213 = or <8 x i32> %212, - %214 = icmp sgt <8 x i32> %broadcast.splat104, %213 - %215 = extractelement <8 x i32> %213, i32 0 - %216 = add nsw i32 %mul.i.i.3, %215 - %217 = sext i32 %216 to i64 - %218 = getelementptr inbounds float, float* %16, i64 %217 - %219 = bitcast float* %218 to <8 x float>* - %wide.masked.load105.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %219, i32 4, <8 x i1> %214, <8 x float> undef), !tbaa !12 - %220 = fmul <8 x float> %broadcast.splat107, %wide.masked.load105.1 - %221 = bitcast float* %218 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %220, <8 x float>* %221, i32 4, <8 x i1> %214), !tbaa !12, !llvm.access.group !16 - %222 = trunc <8 x i64> %broadcast.splat102 to <8 x i32> - %223 = or <8 x i32> %222, - %224 = icmp sgt <8 x i32> %broadcast.splat104, %223 - %225 = extractelement <8 x i32> %223, i32 0 - %226 = add nsw i32 %mul.i.i.3, %225 - %227 = sext i32 %226 to i64 - %228 = getelementptr inbounds float, float* %16, i64 %227 - %229 = bitcast float* %228 to <8 x float>* - %wide.masked.load105.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %229, i32 4, <8 x i1> %224, <8 x float> undef), !tbaa !12 - %230 = fmul <8 x float> %broadcast.splat107, %wide.masked.load105.2 - %231 = bitcast float* %228 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %230, <8 x float>* %231, i32 4, <8 x i1> %224), !tbaa !12, !llvm.access.group !16 - %232 = trunc <8 x i64> %broadcast.splat102 to <8 x i32> - %233 = or <8 x i32> %232, - %234 = icmp sgt <8 x i32> %broadcast.splat104, %233 - %235 = extractelement <8 x i32> %233, i32 0 - %236 = add nsw i32 %mul.i.i.3, %235 - %237 = sext i32 %236 to i64 - %238 = getelementptr inbounds float, float* %16, i64 %237 - %239 = bitcast float* %238 to <8 x float>* - %wide.masked.load105.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %239, i32 4, <8 x i1> %234, <8 x float> undef), !tbaa !12 - %240 = fmul <8 x float> %broadcast.splat107, %wide.masked.load105.3 - %241 = bitcast float* %238 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %240, <8 x float>* %241, i32 4, <8 x i1> %234), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.3 - -pregion_for_entry.entry.i.i.us.3: ; preds = %if.end.i.i.us.3.3, %pregion_for_entry.entry.i.i.us.3.preheader - %_local_id_x.i.0.us.3 = phi i64 [ %547, %if.end.i.i.us.3.3 ], [ 0, %pregion_for_entry.entry.i.i.us.3.preheader ] - %add1.i.i.i.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.3, %mul.i.i.i - %conv.i.i.us.3 = trunc i64 %add1.i.i.i.us.3 to i32 - %cmp4.i.i.us.3 = icmp sgt i32 %32, %conv.i.i.us.3 - br i1 %cmp4.i.i.us.3, label %if.then.i.i.us.3, label %if.end.i.i.us.3 - -if.then.i.i.us.3: ; preds = %pregion_for_entry.entry.i.i.us.3 - %add.i.i.us.3 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3 - %idxprom.i.i.us.3 = sext i32 %add.i.i.us.3 to i64 - %arrayidx.i.i.us.3 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.3 - %242 = load float, float* %arrayidx.i.i.us.3, align 4, !tbaa !12 - %mul6.i.i.us.3 = fmul float %24, %242 - store float %mul6.i.i.us.3, float* %arrayidx.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3 - -if.end.i.i.us.3: ; preds = %if.then.i.i.us.3, %pregion_for_entry.entry.i.i.us.3 - %243 = or i64 %_local_id_x.i.0.us.3, 1 - %add1.i.i.i.us.3.1 = add nuw nsw i64 %243, %mul.i.i.i - %conv.i.i.us.3.1 = trunc i64 %add1.i.i.i.us.3.1 to i32 - %cmp4.i.i.us.3.1 = icmp sgt i32 %32, %conv.i.i.us.3.1 - br i1 %cmp4.i.i.us.3.1, label %if.then.i.i.us.3.1, label %if.end.i.i.us.3.1 - -pregion_for_end.i.i.3.loopexit: ; preds = %if.end.i.i.us.3.3 - br label %pregion_for_end.i.i.3 - -pregion_for_end.i.i.3: ; preds = %pregion_for_end.i.i.3.loopexit, %vector.ph94, %pregion_for_end.i.i.2 - %244 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.4 = or i32 %244, 4 - %cmp.i.i.4 = icmp sgt i32 %28, %conv2.i.i.4 - %mul.i.i.4 = mul nsw i32 %32, %conv2.i.i.4 - br i1 %cmp.i.i.4, label %vector.scevcheck115, label %pregion_for_end.i.i.4 - -vector.scevcheck115: ; preds = %pregion_for_end.i.i.3 - %245 = mul i32 %32, %conv2.i.i.4 - %246 = trunc i64 %2 to i32 - %247 = shl i32 %246, 5 - %248 = add i32 %245, %247 - %249 = icmp sgt i32 %248, 2147483616 - br i1 %249, label %pregion_for_entry.entry.i.i.us.4.preheader, label %vector.ph116 - -pregion_for_entry.entry.i.i.us.4.preheader: ; preds = %vector.scevcheck115 - br label %pregion_for_entry.entry.i.i.us.4 - -vector.ph116: ; preds = %vector.scevcheck115 - %broadcast.splatinsert123 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat124 = shufflevector <8 x i64> %broadcast.splatinsert123, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert125 = insertelement <8 x i32> undef, i32 %32, i32 0 - %broadcast.splat126 = shufflevector <8 x i32> %broadcast.splatinsert125, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert128 = insertelement <8 x float> undef, float %24, i32 0 - %broadcast.splat129 = shufflevector <8 x float> %broadcast.splatinsert128, <8 x float> undef, <8 x i32> zeroinitializer - %250 = trunc <8 x i64> %broadcast.splat124 to <8 x i32> - %251 = or <8 x i32> %250, - %252 = icmp sgt <8 x i32> %broadcast.splat126, %251 - %253 = extractelement <8 x i32> %251, i32 0 - %254 = add nsw i32 %mul.i.i.4, %253 - %255 = sext i32 %254 to i64 - %256 = getelementptr inbounds float, float* %16, i64 %255 - %257 = bitcast float* %256 to <8 x float>* - %wide.masked.load127 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %257, i32 4, <8 x i1> %252, <8 x float> undef), !tbaa !12 - %258 = fmul <8 x float> %broadcast.splat129, %wide.masked.load127 - %259 = bitcast float* %256 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %258, <8 x float>* %259, i32 4, <8 x i1> %252), !tbaa !12, !llvm.access.group !16 - %260 = trunc <8 x i64> %broadcast.splat124 to <8 x i32> - %261 = or <8 x i32> %260, - %262 = icmp sgt <8 x i32> %broadcast.splat126, %261 - %263 = extractelement <8 x i32> %261, i32 0 - %264 = add nsw i32 %mul.i.i.4, %263 - %265 = sext i32 %264 to i64 - %266 = getelementptr inbounds float, float* %16, i64 %265 - %267 = bitcast float* %266 to <8 x float>* - %wide.masked.load127.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %267, i32 4, <8 x i1> %262, <8 x float> undef), !tbaa !12 - %268 = fmul <8 x float> %broadcast.splat129, %wide.masked.load127.1 - %269 = bitcast float* %266 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %268, <8 x float>* %269, i32 4, <8 x i1> %262), !tbaa !12, !llvm.access.group !16 - %270 = trunc <8 x i64> %broadcast.splat124 to <8 x i32> - %271 = or <8 x i32> %270, - %272 = icmp sgt <8 x i32> %broadcast.splat126, %271 - %273 = extractelement <8 x i32> %271, i32 0 - %274 = add nsw i32 %mul.i.i.4, %273 - %275 = sext i32 %274 to i64 - %276 = getelementptr inbounds float, float* %16, i64 %275 - %277 = bitcast float* %276 to <8 x float>* - %wide.masked.load127.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %277, i32 4, <8 x i1> %272, <8 x float> undef), !tbaa !12 - %278 = fmul <8 x float> %broadcast.splat129, %wide.masked.load127.2 - %279 = bitcast float* %276 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %278, <8 x float>* %279, i32 4, <8 x i1> %272), !tbaa !12, !llvm.access.group !16 - %280 = trunc <8 x i64> %broadcast.splat124 to <8 x i32> - %281 = or <8 x i32> %280, - %282 = icmp sgt <8 x i32> %broadcast.splat126, %281 - %283 = extractelement <8 x i32> %281, i32 0 - %284 = add nsw i32 %mul.i.i.4, %283 - %285 = sext i32 %284 to i64 - %286 = getelementptr inbounds float, float* %16, i64 %285 - %287 = bitcast float* %286 to <8 x float>* - %wide.masked.load127.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %287, i32 4, <8 x i1> %282, <8 x float> undef), !tbaa !12 - %288 = fmul <8 x float> %broadcast.splat129, %wide.masked.load127.3 - %289 = bitcast float* %286 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %288, <8 x float>* %289, i32 4, <8 x i1> %282), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.4 - -pregion_for_entry.entry.i.i.us.4: ; preds = %if.end.i.i.us.4.3, %pregion_for_entry.entry.i.i.us.4.preheader - %_local_id_x.i.0.us.4 = phi i64 [ %541, %if.end.i.i.us.4.3 ], [ 0, %pregion_for_entry.entry.i.i.us.4.preheader ] - %add1.i.i.i.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.4, %mul.i.i.i - %conv.i.i.us.4 = trunc i64 %add1.i.i.i.us.4 to i32 - %cmp4.i.i.us.4 = icmp sgt i32 %32, %conv.i.i.us.4 - br i1 %cmp4.i.i.us.4, label %if.then.i.i.us.4, label %if.end.i.i.us.4 - -if.then.i.i.us.4: ; preds = %pregion_for_entry.entry.i.i.us.4 - %add.i.i.us.4 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4 - %idxprom.i.i.us.4 = sext i32 %add.i.i.us.4 to i64 - %arrayidx.i.i.us.4 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.4 - %290 = load float, float* %arrayidx.i.i.us.4, align 4, !tbaa !12 - %mul6.i.i.us.4 = fmul float %24, %290 - store float %mul6.i.i.us.4, float* %arrayidx.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4 - -if.end.i.i.us.4: ; preds = %if.then.i.i.us.4, %pregion_for_entry.entry.i.i.us.4 - %291 = or i64 %_local_id_x.i.0.us.4, 1 - %add1.i.i.i.us.4.1 = add nuw nsw i64 %291, %mul.i.i.i - %conv.i.i.us.4.1 = trunc i64 %add1.i.i.i.us.4.1 to i32 - %cmp4.i.i.us.4.1 = icmp sgt i32 %32, %conv.i.i.us.4.1 - br i1 %cmp4.i.i.us.4.1, label %if.then.i.i.us.4.1, label %if.end.i.i.us.4.1 - -pregion_for_end.i.i.4.loopexit: ; preds = %if.end.i.i.us.4.3 - br label %pregion_for_end.i.i.4 - -pregion_for_end.i.i.4: ; preds = %pregion_for_end.i.i.4.loopexit, %vector.ph116, %pregion_for_end.i.i.3 - %292 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.5 = or i32 %292, 5 - %cmp.i.i.5 = icmp sgt i32 %28, %conv2.i.i.5 - %mul.i.i.5 = mul nsw i32 %32, %conv2.i.i.5 - br i1 %cmp.i.i.5, label %vector.scevcheck137, label %pregion_for_end.i.i.5 - -vector.scevcheck137: ; preds = %pregion_for_end.i.i.4 - %293 = mul i32 %32, %conv2.i.i.5 - %294 = trunc i64 %2 to i32 - %295 = shl i32 %294, 5 - %296 = add i32 %293, %295 - %297 = icmp sgt i32 %296, 2147483616 - br i1 %297, label %pregion_for_entry.entry.i.i.us.5.preheader, label %vector.ph138 - -pregion_for_entry.entry.i.i.us.5.preheader: ; preds = %vector.scevcheck137 - br label %pregion_for_entry.entry.i.i.us.5 - -vector.ph138: ; preds = %vector.scevcheck137 - %broadcast.splatinsert145 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat146 = shufflevector <8 x i64> %broadcast.splatinsert145, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert147 = insertelement <8 x i32> undef, i32 %32, i32 0 - %broadcast.splat148 = shufflevector <8 x i32> %broadcast.splatinsert147, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert150 = insertelement <8 x float> undef, float %24, i32 0 - %broadcast.splat151 = shufflevector <8 x float> %broadcast.splatinsert150, <8 x float> undef, <8 x i32> zeroinitializer - %298 = trunc <8 x i64> %broadcast.splat146 to <8 x i32> - %299 = or <8 x i32> %298, - %300 = icmp sgt <8 x i32> %broadcast.splat148, %299 - %301 = extractelement <8 x i32> %299, i32 0 - %302 = add nsw i32 %mul.i.i.5, %301 - %303 = sext i32 %302 to i64 - %304 = getelementptr inbounds float, float* %16, i64 %303 - %305 = bitcast float* %304 to <8 x float>* - %wide.masked.load149 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %305, i32 4, <8 x i1> %300, <8 x float> undef), !tbaa !12 - %306 = fmul <8 x float> %broadcast.splat151, %wide.masked.load149 - %307 = bitcast float* %304 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %306, <8 x float>* %307, i32 4, <8 x i1> %300), !tbaa !12, !llvm.access.group !16 - %308 = trunc <8 x i64> %broadcast.splat146 to <8 x i32> - %309 = or <8 x i32> %308, - %310 = icmp sgt <8 x i32> %broadcast.splat148, %309 - %311 = extractelement <8 x i32> %309, i32 0 - %312 = add nsw i32 %mul.i.i.5, %311 - %313 = sext i32 %312 to i64 - %314 = getelementptr inbounds float, float* %16, i64 %313 - %315 = bitcast float* %314 to <8 x float>* - %wide.masked.load149.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %315, i32 4, <8 x i1> %310, <8 x float> undef), !tbaa !12 - %316 = fmul <8 x float> %broadcast.splat151, %wide.masked.load149.1 - %317 = bitcast float* %314 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %316, <8 x float>* %317, i32 4, <8 x i1> %310), !tbaa !12, !llvm.access.group !16 - %318 = trunc <8 x i64> %broadcast.splat146 to <8 x i32> - %319 = or <8 x i32> %318, - %320 = icmp sgt <8 x i32> %broadcast.splat148, %319 - %321 = extractelement <8 x i32> %319, i32 0 - %322 = add nsw i32 %mul.i.i.5, %321 - %323 = sext i32 %322 to i64 - %324 = getelementptr inbounds float, float* %16, i64 %323 - %325 = bitcast float* %324 to <8 x float>* - %wide.masked.load149.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %325, i32 4, <8 x i1> %320, <8 x float> undef), !tbaa !12 - %326 = fmul <8 x float> %broadcast.splat151, %wide.masked.load149.2 - %327 = bitcast float* %324 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %326, <8 x float>* %327, i32 4, <8 x i1> %320), !tbaa !12, !llvm.access.group !16 - %328 = trunc <8 x i64> %broadcast.splat146 to <8 x i32> - %329 = or <8 x i32> %328, - %330 = icmp sgt <8 x i32> %broadcast.splat148, %329 - %331 = extractelement <8 x i32> %329, i32 0 - %332 = add nsw i32 %mul.i.i.5, %331 - %333 = sext i32 %332 to i64 - %334 = getelementptr inbounds float, float* %16, i64 %333 - %335 = bitcast float* %334 to <8 x float>* - %wide.masked.load149.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %335, i32 4, <8 x i1> %330, <8 x float> undef), !tbaa !12 - %336 = fmul <8 x float> %broadcast.splat151, %wide.masked.load149.3 - %337 = bitcast float* %334 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %336, <8 x float>* %337, i32 4, <8 x i1> %330), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.5 - -pregion_for_entry.entry.i.i.us.5: ; preds = %if.end.i.i.us.5.3, %pregion_for_entry.entry.i.i.us.5.preheader - %_local_id_x.i.0.us.5 = phi i64 [ %535, %if.end.i.i.us.5.3 ], [ 0, %pregion_for_entry.entry.i.i.us.5.preheader ] - %add1.i.i.i.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.5, %mul.i.i.i - %conv.i.i.us.5 = trunc i64 %add1.i.i.i.us.5 to i32 - %cmp4.i.i.us.5 = icmp sgt i32 %32, %conv.i.i.us.5 - br i1 %cmp4.i.i.us.5, label %if.then.i.i.us.5, label %if.end.i.i.us.5 - -if.then.i.i.us.5: ; preds = %pregion_for_entry.entry.i.i.us.5 - %add.i.i.us.5 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5 - %idxprom.i.i.us.5 = sext i32 %add.i.i.us.5 to i64 - %arrayidx.i.i.us.5 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.5 - %338 = load float, float* %arrayidx.i.i.us.5, align 4, !tbaa !12 - %mul6.i.i.us.5 = fmul float %24, %338 - store float %mul6.i.i.us.5, float* %arrayidx.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5 - -if.end.i.i.us.5: ; preds = %if.then.i.i.us.5, %pregion_for_entry.entry.i.i.us.5 - %339 = or i64 %_local_id_x.i.0.us.5, 1 - %add1.i.i.i.us.5.1 = add nuw nsw i64 %339, %mul.i.i.i - %conv.i.i.us.5.1 = trunc i64 %add1.i.i.i.us.5.1 to i32 - %cmp4.i.i.us.5.1 = icmp sgt i32 %32, %conv.i.i.us.5.1 - br i1 %cmp4.i.i.us.5.1, label %if.then.i.i.us.5.1, label %if.end.i.i.us.5.1 - -pregion_for_end.i.i.5.loopexit: ; preds = %if.end.i.i.us.5.3 - br label %pregion_for_end.i.i.5 - -pregion_for_end.i.i.5: ; preds = %pregion_for_end.i.i.5.loopexit, %vector.ph138, %pregion_for_end.i.i.4 - %340 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.6 = or i32 %340, 6 - %cmp.i.i.6 = icmp sgt i32 %28, %conv2.i.i.6 - %mul.i.i.6 = mul nsw i32 %32, %conv2.i.i.6 - br i1 %cmp.i.i.6, label %vector.scevcheck159, label %pregion_for_end.i.i.6 - -vector.scevcheck159: ; preds = %pregion_for_end.i.i.5 - %341 = mul i32 %32, %conv2.i.i.6 - %342 = trunc i64 %2 to i32 - %343 = shl i32 %342, 5 - %344 = add i32 %341, %343 - %345 = icmp sgt i32 %344, 2147483616 - br i1 %345, label %pregion_for_entry.entry.i.i.us.6.preheader, label %vector.ph160 - -pregion_for_entry.entry.i.i.us.6.preheader: ; preds = %vector.scevcheck159 - br label %pregion_for_entry.entry.i.i.us.6 - -vector.ph160: ; preds = %vector.scevcheck159 - %broadcast.splatinsert167 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat168 = shufflevector <8 x i64> %broadcast.splatinsert167, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert169 = insertelement <8 x i32> undef, i32 %32, i32 0 - %broadcast.splat170 = shufflevector <8 x i32> %broadcast.splatinsert169, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert172 = insertelement <8 x float> undef, float %24, i32 0 - %broadcast.splat173 = shufflevector <8 x float> %broadcast.splatinsert172, <8 x float> undef, <8 x i32> zeroinitializer - %346 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %347 = or <8 x i32> %346, - %348 = icmp sgt <8 x i32> %broadcast.splat170, %347 - %349 = extractelement <8 x i32> %347, i32 0 - %350 = add nsw i32 %mul.i.i.6, %349 - %351 = sext i32 %350 to i64 - %352 = getelementptr inbounds float, float* %16, i64 %351 - %353 = bitcast float* %352 to <8 x float>* - %wide.masked.load171 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %353, i32 4, <8 x i1> %348, <8 x float> undef), !tbaa !12 - %354 = fmul <8 x float> %broadcast.splat173, %wide.masked.load171 - %355 = bitcast float* %352 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %354, <8 x float>* %355, i32 4, <8 x i1> %348), !tbaa !12, !llvm.access.group !16 - %356 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %357 = or <8 x i32> %356, - %358 = icmp sgt <8 x i32> %broadcast.splat170, %357 - %359 = extractelement <8 x i32> %357, i32 0 - %360 = add nsw i32 %mul.i.i.6, %359 - %361 = sext i32 %360 to i64 - %362 = getelementptr inbounds float, float* %16, i64 %361 - %363 = bitcast float* %362 to <8 x float>* - %wide.masked.load171.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %363, i32 4, <8 x i1> %358, <8 x float> undef), !tbaa !12 - %364 = fmul <8 x float> %broadcast.splat173, %wide.masked.load171.1 - %365 = bitcast float* %362 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %364, <8 x float>* %365, i32 4, <8 x i1> %358), !tbaa !12, !llvm.access.group !16 - %366 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %367 = or <8 x i32> %366, - %368 = icmp sgt <8 x i32> %broadcast.splat170, %367 - %369 = extractelement <8 x i32> %367, i32 0 - %370 = add nsw i32 %mul.i.i.6, %369 - %371 = sext i32 %370 to i64 - %372 = getelementptr inbounds float, float* %16, i64 %371 - %373 = bitcast float* %372 to <8 x float>* - %wide.masked.load171.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %373, i32 4, <8 x i1> %368, <8 x float> undef), !tbaa !12 - %374 = fmul <8 x float> %broadcast.splat173, %wide.masked.load171.2 - %375 = bitcast float* %372 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %374, <8 x float>* %375, i32 4, <8 x i1> %368), !tbaa !12, !llvm.access.group !16 - %376 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %377 = or <8 x i32> %376, - %378 = icmp sgt <8 x i32> %broadcast.splat170, %377 - %379 = extractelement <8 x i32> %377, i32 0 - %380 = add nsw i32 %mul.i.i.6, %379 - %381 = sext i32 %380 to i64 - %382 = getelementptr inbounds float, float* %16, i64 %381 - %383 = bitcast float* %382 to <8 x float>* - %wide.masked.load171.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %383, i32 4, <8 x i1> %378, <8 x float> undef), !tbaa !12 - %384 = fmul <8 x float> %broadcast.splat173, %wide.masked.load171.3 - %385 = bitcast float* %382 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %384, <8 x float>* %385, i32 4, <8 x i1> %378), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.6 - -pregion_for_entry.entry.i.i.us.6: ; preds = %if.end.i.i.us.6.3, %pregion_for_entry.entry.i.i.us.6.preheader - %_local_id_x.i.0.us.6 = phi i64 [ %529, %if.end.i.i.us.6.3 ], [ 0, %pregion_for_entry.entry.i.i.us.6.preheader ] - %add1.i.i.i.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.6, %mul.i.i.i - %conv.i.i.us.6 = trunc i64 %add1.i.i.i.us.6 to i32 - %cmp4.i.i.us.6 = icmp sgt i32 %32, %conv.i.i.us.6 - br i1 %cmp4.i.i.us.6, label %if.then.i.i.us.6, label %if.end.i.i.us.6 - -if.then.i.i.us.6: ; preds = %pregion_for_entry.entry.i.i.us.6 - %add.i.i.us.6 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6 - %idxprom.i.i.us.6 = sext i32 %add.i.i.us.6 to i64 - %arrayidx.i.i.us.6 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.6 - %386 = load float, float* %arrayidx.i.i.us.6, align 4, !tbaa !12 - %mul6.i.i.us.6 = fmul float %24, %386 - store float %mul6.i.i.us.6, float* %arrayidx.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6 - -if.end.i.i.us.6: ; preds = %if.then.i.i.us.6, %pregion_for_entry.entry.i.i.us.6 - %387 = or i64 %_local_id_x.i.0.us.6, 1 - %add1.i.i.i.us.6.1 = add nuw nsw i64 %387, %mul.i.i.i - %conv.i.i.us.6.1 = trunc i64 %add1.i.i.i.us.6.1 to i32 - %cmp4.i.i.us.6.1 = icmp sgt i32 %32, %conv.i.i.us.6.1 - br i1 %cmp4.i.i.us.6.1, label %if.then.i.i.us.6.1, label %if.end.i.i.us.6.1 - -pregion_for_end.i.i.6.loopexit: ; preds = %if.end.i.i.us.6.3 - br label %pregion_for_end.i.i.6 - -pregion_for_end.i.i.6: ; preds = %pregion_for_end.i.i.6.loopexit, %vector.ph160, %pregion_for_end.i.i.5 - %388 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.7 = or i32 %388, 7 - %cmp.i.i.7 = icmp sgt i32 %28, %conv2.i.i.7 - %mul.i.i.7 = mul nsw i32 %32, %conv2.i.i.7 - br i1 %cmp.i.i.7, label %vector.scevcheck181, label %_pocl_kernel_gemm.exit - -vector.scevcheck181: ; preds = %pregion_for_end.i.i.6 - %389 = mul i32 %32, %conv2.i.i.7 - %390 = trunc i64 %2 to i32 - %391 = shl i32 %390, 5 - %392 = add i32 %389, %391 - %393 = icmp sgt i32 %392, 2147483616 - br i1 %393, label %pregion_for_entry.entry.i.i.us.7.preheader, label %vector.ph182 - -pregion_for_entry.entry.i.i.us.7.preheader: ; preds = %vector.scevcheck181 - br label %pregion_for_entry.entry.i.i.us.7 - -vector.ph182: ; preds = %vector.scevcheck181 - %broadcast.splatinsert189 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat190 = shufflevector <8 x i64> %broadcast.splatinsert189, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert191 = insertelement <8 x i32> undef, i32 %32, i32 0 - %broadcast.splat192 = shufflevector <8 x i32> %broadcast.splatinsert191, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert194 = insertelement <8 x float> undef, float %24, i32 0 - %broadcast.splat195 = shufflevector <8 x float> %broadcast.splatinsert194, <8 x float> undef, <8 x i32> zeroinitializer - %394 = trunc <8 x i64> %broadcast.splat190 to <8 x i32> - %395 = or <8 x i32> %394, - %396 = icmp sgt <8 x i32> %broadcast.splat192, %395 - %397 = extractelement <8 x i32> %395, i32 0 - %398 = add nsw i32 %mul.i.i.7, %397 - %399 = sext i32 %398 to i64 - %400 = getelementptr inbounds float, float* %16, i64 %399 - %401 = bitcast float* %400 to <8 x float>* - %wide.masked.load193 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %401, i32 4, <8 x i1> %396, <8 x float> undef), !tbaa !12 - %402 = fmul <8 x float> %broadcast.splat195, %wide.masked.load193 - %403 = bitcast float* %400 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %402, <8 x float>* %403, i32 4, <8 x i1> %396), !tbaa !12, !llvm.access.group !16 - %404 = trunc <8 x i64> %broadcast.splat190 to <8 x i32> - %405 = or <8 x i32> %404, - %406 = icmp sgt <8 x i32> %broadcast.splat192, %405 - %407 = extractelement <8 x i32> %405, i32 0 - %408 = add nsw i32 %mul.i.i.7, %407 - %409 = sext i32 %408 to i64 - %410 = getelementptr inbounds float, float* %16, i64 %409 - %411 = bitcast float* %410 to <8 x float>* - %wide.masked.load193.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %411, i32 4, <8 x i1> %406, <8 x float> undef), !tbaa !12 - %412 = fmul <8 x float> %broadcast.splat195, %wide.masked.load193.1 - %413 = bitcast float* %410 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %412, <8 x float>* %413, i32 4, <8 x i1> %406), !tbaa !12, !llvm.access.group !16 - %414 = trunc <8 x i64> %broadcast.splat190 to <8 x i32> - %415 = or <8 x i32> %414, - %416 = icmp sgt <8 x i32> %broadcast.splat192, %415 - %417 = extractelement <8 x i32> %415, i32 0 - %418 = add nsw i32 %mul.i.i.7, %417 - %419 = sext i32 %418 to i64 - %420 = getelementptr inbounds float, float* %16, i64 %419 - %421 = bitcast float* %420 to <8 x float>* - %wide.masked.load193.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %421, i32 4, <8 x i1> %416, <8 x float> undef), !tbaa !12 - %422 = fmul <8 x float> %broadcast.splat195, %wide.masked.load193.2 - %423 = bitcast float* %420 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %422, <8 x float>* %423, i32 4, <8 x i1> %416), !tbaa !12, !llvm.access.group !16 - %424 = trunc <8 x i64> %broadcast.splat190 to <8 x i32> - %425 = or <8 x i32> %424, - %426 = icmp sgt <8 x i32> %broadcast.splat192, %425 - %427 = extractelement <8 x i32> %425, i32 0 - %428 = add nsw i32 %mul.i.i.7, %427 - %429 = sext i32 %428 to i64 - %430 = getelementptr inbounds float, float* %16, i64 %429 - %431 = bitcast float* %430 to <8 x float>* - %wide.masked.load193.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %431, i32 4, <8 x i1> %426, <8 x float> undef), !tbaa !12 - %432 = fmul <8 x float> %broadcast.splat195, %wide.masked.load193.3 - %433 = bitcast float* %430 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %432, <8 x float>* %433, i32 4, <8 x i1> %426), !tbaa !12, !llvm.access.group !16 - br label %_pocl_kernel_gemm.exit - -pregion_for_entry.entry.i.i.us.7: ; preds = %if.end.i.i.us.7.3, %pregion_for_entry.entry.i.i.us.7.preheader - %_local_id_x.i.0.us.7 = phi i64 [ %523, %if.end.i.i.us.7.3 ], [ 0, %pregion_for_entry.entry.i.i.us.7.preheader ] - %add1.i.i.i.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.7, %mul.i.i.i - %conv.i.i.us.7 = trunc i64 %add1.i.i.i.us.7 to i32 - %cmp4.i.i.us.7 = icmp sgt i32 %32, %conv.i.i.us.7 - br i1 %cmp4.i.i.us.7, label %if.then.i.i.us.7, label %if.end.i.i.us.7 - -if.then.i.i.us.7: ; preds = %pregion_for_entry.entry.i.i.us.7 - %add.i.i.us.7 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7 - %idxprom.i.i.us.7 = sext i32 %add.i.i.us.7 to i64 - %arrayidx.i.i.us.7 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.7 - %434 = load float, float* %arrayidx.i.i.us.7, align 4, !tbaa !12 - %mul6.i.i.us.7 = fmul float %24, %434 - store float %mul6.i.i.us.7, float* %arrayidx.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7 - -if.end.i.i.us.7: ; preds = %if.then.i.i.us.7, %pregion_for_entry.entry.i.i.us.7 - %435 = or i64 %_local_id_x.i.0.us.7, 1 - %add1.i.i.i.us.7.1 = add nuw nsw i64 %435, %mul.i.i.i - %conv.i.i.us.7.1 = trunc i64 %add1.i.i.i.us.7.1 to i32 - %cmp4.i.i.us.7.1 = icmp sgt i32 %32, %conv.i.i.us.7.1 - br i1 %cmp4.i.i.us.7.1, label %if.then.i.i.us.7.1, label %if.end.i.i.us.7.1 - -pregion_for_entry.entry.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1, %pregion_for_entry.entry.i.i.us.us.1.preheader - %_local_id_x.i.0.us.us.1 = phi i64 [ %445, %if.end.i.i.us.us.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.1.preheader ] - %add1.i.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, %mul.i.i.i - %conv.i.i.us.us.1 = trunc i64 %add1.i.i.i.us.us.1 to i32 - %cmp4.i.i.us.us.1 = icmp sgt i32 %32, %conv.i.i.us.us.1 - br i1 %cmp4.i.i.us.us.1, label %if.then.i.i.us.us.1, label %if.end.i.i.us.us.1 - -if.then.i.i.us.us.1: ; preds = %pregion_for_entry.entry.i.i.us.us.1 - %add.i.i.us.us.1 = add nsw i32 %mul.i.i.us.1, %conv.i.i.us.us.1 - %idxprom.i.i.us.us.1 = sext i32 %add.i.i.us.us.1 to i64 - %arrayidx.i.i.us.us.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.1 - %436 = load float, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12 - %mul6.i.i.us.us.1 = fmul float %24, %436 - store float %mul6.i.i.us.us.1, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.1 = shl i64 %add1.i.i.i.us.us.1, 32 - %437 = ashr exact i64 %sext.i.i.us.us.1, 32 - br label %for.body.i.i.us.us.1 - -for.body.i.i.us.us.1: ; preds = %for.body.i.i.us.us.1, %if.then.i.i.us.us.1 - %indvars.iv.next.i.i3.us.us.1 = phi i64 [ %indvars.iv.next.i.i.us.us.1, %for.body.i.i.us.us.1 ], [ 0, %if.then.i.i.us.us.1 ] - %438 = phi float [ %444, %for.body.i.i.us.us.1 ], [ %mul6.i.i.us.us.1, %if.then.i.i.us.us.1 ] - %439 = add nsw i64 %indvars.iv.next.i.i3.us.us.1, %87 - %arrayidx12.i.i.us.us.1 = getelementptr inbounds float, float* %8, i64 %439 - %440 = load float, float* %arrayidx12.i.i.us.us.1, align 4, !tbaa !12 - %mul13.i.i.us.us.1 = fmul float %20, %440 - %441 = mul nsw i64 %indvars.iv.next.i.i3.us.us.1, %37 - %442 = add nsw i64 %441, %437 - %arrayidx17.i.i.us.us.1 = getelementptr inbounds float, float* %12, i64 %442 - %443 = load float, float* %arrayidx17.i.i.us.us.1, align 4, !tbaa !12 - %444 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.1, float %443, float %438) #2 - store float %444, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.1, 1 - %exitcond.not.i.i.us.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.1, label %if.end.i.i.us.us.1.loopexit, label %for.body.i.i.us.us.1, !llvm.loop !21 - -if.end.i.i.us.us.1.loopexit: ; preds = %for.body.i.i.us.us.1 - br label %if.end.i.i.us.us.1 - -if.end.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.loopexit, %pregion_for_entry.entry.i.i.us.us.1 - %445 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, 1 - %exitcond.not.1 = icmp eq i64 %445, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.us.1.loopexit, label %pregion_for_entry.entry.i.i.us.us.1, !llvm.loop !19 - -pregion_for_end.i.i.us.1.loopexit: ; preds = %if.end.i.i.us.us.1 - br label %pregion_for_end.i.i.us.1 - -pregion_for_end.i.i.us.1: ; preds = %pregion_for_end.i.i.us.1.loopexit, %pregion_for_end.i.i.us - %446 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.2 = or i32 %446, 2 - %cmp.i.i.us.2 = icmp sgt i32 %28, %conv2.i.i.us.2 - %mul.i.i.us.2 = mul nsw i32 %32, %conv2.i.i.us.2 - %mul9.i.i.us.2 = mul nsw i32 %36, %conv2.i.i.us.2 - %447 = sext i32 %mul9.i.i.us.2 to i64 - br i1 %cmp.i.i.us.2, label %pregion_for_entry.entry.i.i.us.us.2.preheader, label %pregion_for_end.i.i.us.2 - -pregion_for_entry.entry.i.i.us.us.2.preheader: ; preds = %pregion_for_end.i.i.us.1 - br label %pregion_for_entry.entry.i.i.us.us.2 - -pregion_for_entry.entry.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2, %pregion_for_entry.entry.i.i.us.us.2.preheader - %_local_id_x.i.0.us.us.2 = phi i64 [ %457, %if.end.i.i.us.us.2 ], [ 0, %pregion_for_entry.entry.i.i.us.us.2.preheader ] - %add1.i.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, %mul.i.i.i - %conv.i.i.us.us.2 = trunc i64 %add1.i.i.i.us.us.2 to i32 - %cmp4.i.i.us.us.2 = icmp sgt i32 %32, %conv.i.i.us.us.2 - br i1 %cmp4.i.i.us.us.2, label %if.then.i.i.us.us.2, label %if.end.i.i.us.us.2 - -if.then.i.i.us.us.2: ; preds = %pregion_for_entry.entry.i.i.us.us.2 - %add.i.i.us.us.2 = add nsw i32 %mul.i.i.us.2, %conv.i.i.us.us.2 - %idxprom.i.i.us.us.2 = sext i32 %add.i.i.us.us.2 to i64 - %arrayidx.i.i.us.us.2 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.2 - %448 = load float, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12 - %mul6.i.i.us.us.2 = fmul float %24, %448 - store float %mul6.i.i.us.us.2, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.2 = shl i64 %add1.i.i.i.us.us.2, 32 - %449 = ashr exact i64 %sext.i.i.us.us.2, 32 - br label %for.body.i.i.us.us.2 - -for.body.i.i.us.us.2: ; preds = %for.body.i.i.us.us.2, %if.then.i.i.us.us.2 - %indvars.iv.next.i.i3.us.us.2 = phi i64 [ %indvars.iv.next.i.i.us.us.2, %for.body.i.i.us.us.2 ], [ 0, %if.then.i.i.us.us.2 ] - %450 = phi float [ %456, %for.body.i.i.us.us.2 ], [ %mul6.i.i.us.us.2, %if.then.i.i.us.us.2 ] - %451 = add nsw i64 %indvars.iv.next.i.i3.us.us.2, %447 - %arrayidx12.i.i.us.us.2 = getelementptr inbounds float, float* %8, i64 %451 - %452 = load float, float* %arrayidx12.i.i.us.us.2, align 4, !tbaa !12 - %mul13.i.i.us.us.2 = fmul float %20, %452 - %453 = mul nsw i64 %indvars.iv.next.i.i3.us.us.2, %37 - %454 = add nsw i64 %453, %449 - %arrayidx17.i.i.us.us.2 = getelementptr inbounds float, float* %12, i64 %454 - %455 = load float, float* %arrayidx17.i.i.us.us.2, align 4, !tbaa !12 - %456 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.2, float %455, float %450) #2 - store float %456, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.2 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.2, 1 - %exitcond.not.i.i.us.us.2 = icmp eq i64 %indvars.iv.next.i.i.us.us.2, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.2, label %if.end.i.i.us.us.2.loopexit, label %for.body.i.i.us.us.2, !llvm.loop !21 - -if.end.i.i.us.us.2.loopexit: ; preds = %for.body.i.i.us.us.2 - br label %if.end.i.i.us.us.2 - -if.end.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.loopexit, %pregion_for_entry.entry.i.i.us.us.2 - %457 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, 1 - %exitcond.not.2 = icmp eq i64 %457, 32 - br i1 %exitcond.not.2, label %pregion_for_end.i.i.us.2.loopexit, label %pregion_for_entry.entry.i.i.us.us.2, !llvm.loop !19 - -pregion_for_end.i.i.us.2.loopexit: ; preds = %if.end.i.i.us.us.2 - br label %pregion_for_end.i.i.us.2 - -pregion_for_end.i.i.us.2: ; preds = %pregion_for_end.i.i.us.2.loopexit, %pregion_for_end.i.i.us.1 - %458 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.3 = or i32 %458, 3 - %cmp.i.i.us.3 = icmp sgt i32 %28, %conv2.i.i.us.3 - %mul.i.i.us.3 = mul nsw i32 %32, %conv2.i.i.us.3 - %mul9.i.i.us.3 = mul nsw i32 %36, %conv2.i.i.us.3 - %459 = sext i32 %mul9.i.i.us.3 to i64 - br i1 %cmp.i.i.us.3, label %pregion_for_entry.entry.i.i.us.us.3.preheader, label %pregion_for_end.i.i.us.3 - -pregion_for_entry.entry.i.i.us.us.3.preheader: ; preds = %pregion_for_end.i.i.us.2 - br label %pregion_for_entry.entry.i.i.us.us.3 - -pregion_for_entry.entry.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3, %pregion_for_entry.entry.i.i.us.us.3.preheader - %_local_id_x.i.0.us.us.3 = phi i64 [ %469, %if.end.i.i.us.us.3 ], [ 0, %pregion_for_entry.entry.i.i.us.us.3.preheader ] - %add1.i.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, %mul.i.i.i - %conv.i.i.us.us.3 = trunc i64 %add1.i.i.i.us.us.3 to i32 - %cmp4.i.i.us.us.3 = icmp sgt i32 %32, %conv.i.i.us.us.3 - br i1 %cmp4.i.i.us.us.3, label %if.then.i.i.us.us.3, label %if.end.i.i.us.us.3 - -if.then.i.i.us.us.3: ; preds = %pregion_for_entry.entry.i.i.us.us.3 - %add.i.i.us.us.3 = add nsw i32 %mul.i.i.us.3, %conv.i.i.us.us.3 - %idxprom.i.i.us.us.3 = sext i32 %add.i.i.us.us.3 to i64 - %arrayidx.i.i.us.us.3 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.3 - %460 = load float, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12 - %mul6.i.i.us.us.3 = fmul float %24, %460 - store float %mul6.i.i.us.us.3, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.3 = shl i64 %add1.i.i.i.us.us.3, 32 - %461 = ashr exact i64 %sext.i.i.us.us.3, 32 - br label %for.body.i.i.us.us.3 - -for.body.i.i.us.us.3: ; preds = %for.body.i.i.us.us.3, %if.then.i.i.us.us.3 - %indvars.iv.next.i.i3.us.us.3 = phi i64 [ %indvars.iv.next.i.i.us.us.3, %for.body.i.i.us.us.3 ], [ 0, %if.then.i.i.us.us.3 ] - %462 = phi float [ %468, %for.body.i.i.us.us.3 ], [ %mul6.i.i.us.us.3, %if.then.i.i.us.us.3 ] - %463 = add nsw i64 %indvars.iv.next.i.i3.us.us.3, %459 - %arrayidx12.i.i.us.us.3 = getelementptr inbounds float, float* %8, i64 %463 - %464 = load float, float* %arrayidx12.i.i.us.us.3, align 4, !tbaa !12 - %mul13.i.i.us.us.3 = fmul float %20, %464 - %465 = mul nsw i64 %indvars.iv.next.i.i3.us.us.3, %37 - %466 = add nsw i64 %465, %461 - %arrayidx17.i.i.us.us.3 = getelementptr inbounds float, float* %12, i64 %466 - %467 = load float, float* %arrayidx17.i.i.us.us.3, align 4, !tbaa !12 - %468 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.3, float %467, float %462) #2 - store float %468, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.3 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.3, 1 - %exitcond.not.i.i.us.us.3 = icmp eq i64 %indvars.iv.next.i.i.us.us.3, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.3, label %if.end.i.i.us.us.3.loopexit, label %for.body.i.i.us.us.3, !llvm.loop !21 - -if.end.i.i.us.us.3.loopexit: ; preds = %for.body.i.i.us.us.3 - br label %if.end.i.i.us.us.3 - -if.end.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.loopexit, %pregion_for_entry.entry.i.i.us.us.3 - %469 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, 1 - %exitcond.not.3 = icmp eq i64 %469, 32 - br i1 %exitcond.not.3, label %pregion_for_end.i.i.us.3.loopexit, label %pregion_for_entry.entry.i.i.us.us.3, !llvm.loop !19 - -pregion_for_end.i.i.us.3.loopexit: ; preds = %if.end.i.i.us.us.3 - br label %pregion_for_end.i.i.us.3 - -pregion_for_end.i.i.us.3: ; preds = %pregion_for_end.i.i.us.3.loopexit, %pregion_for_end.i.i.us.2 - %470 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.4 = or i32 %470, 4 - %cmp.i.i.us.4 = icmp sgt i32 %28, %conv2.i.i.us.4 - %mul.i.i.us.4 = mul nsw i32 %32, %conv2.i.i.us.4 - %mul9.i.i.us.4 = mul nsw i32 %36, %conv2.i.i.us.4 - %471 = sext i32 %mul9.i.i.us.4 to i64 - br i1 %cmp.i.i.us.4, label %pregion_for_entry.entry.i.i.us.us.4.preheader, label %pregion_for_end.i.i.us.4 - -pregion_for_entry.entry.i.i.us.us.4.preheader: ; preds = %pregion_for_end.i.i.us.3 - br label %pregion_for_entry.entry.i.i.us.us.4 - -pregion_for_entry.entry.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4, %pregion_for_entry.entry.i.i.us.us.4.preheader - %_local_id_x.i.0.us.us.4 = phi i64 [ %481, %if.end.i.i.us.us.4 ], [ 0, %pregion_for_entry.entry.i.i.us.us.4.preheader ] - %add1.i.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, %mul.i.i.i - %conv.i.i.us.us.4 = trunc i64 %add1.i.i.i.us.us.4 to i32 - %cmp4.i.i.us.us.4 = icmp sgt i32 %32, %conv.i.i.us.us.4 - br i1 %cmp4.i.i.us.us.4, label %if.then.i.i.us.us.4, label %if.end.i.i.us.us.4 - -if.then.i.i.us.us.4: ; preds = %pregion_for_entry.entry.i.i.us.us.4 - %add.i.i.us.us.4 = add nsw i32 %mul.i.i.us.4, %conv.i.i.us.us.4 - %idxprom.i.i.us.us.4 = sext i32 %add.i.i.us.us.4 to i64 - %arrayidx.i.i.us.us.4 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.4 - %472 = load float, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12 - %mul6.i.i.us.us.4 = fmul float %24, %472 - store float %mul6.i.i.us.us.4, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.4 = shl i64 %add1.i.i.i.us.us.4, 32 - %473 = ashr exact i64 %sext.i.i.us.us.4, 32 - br label %for.body.i.i.us.us.4 - -for.body.i.i.us.us.4: ; preds = %for.body.i.i.us.us.4, %if.then.i.i.us.us.4 - %indvars.iv.next.i.i3.us.us.4 = phi i64 [ %indvars.iv.next.i.i.us.us.4, %for.body.i.i.us.us.4 ], [ 0, %if.then.i.i.us.us.4 ] - %474 = phi float [ %480, %for.body.i.i.us.us.4 ], [ %mul6.i.i.us.us.4, %if.then.i.i.us.us.4 ] - %475 = add nsw i64 %indvars.iv.next.i.i3.us.us.4, %471 - %arrayidx12.i.i.us.us.4 = getelementptr inbounds float, float* %8, i64 %475 - %476 = load float, float* %arrayidx12.i.i.us.us.4, align 4, !tbaa !12 - %mul13.i.i.us.us.4 = fmul float %20, %476 - %477 = mul nsw i64 %indvars.iv.next.i.i3.us.us.4, %37 - %478 = add nsw i64 %477, %473 - %arrayidx17.i.i.us.us.4 = getelementptr inbounds float, float* %12, i64 %478 - %479 = load float, float* %arrayidx17.i.i.us.us.4, align 4, !tbaa !12 - %480 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.4, float %479, float %474) #2 - store float %480, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.4 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.4, 1 - %exitcond.not.i.i.us.us.4 = icmp eq i64 %indvars.iv.next.i.i.us.us.4, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.4, label %if.end.i.i.us.us.4.loopexit, label %for.body.i.i.us.us.4, !llvm.loop !21 - -if.end.i.i.us.us.4.loopexit: ; preds = %for.body.i.i.us.us.4 - br label %if.end.i.i.us.us.4 - -if.end.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.loopexit, %pregion_for_entry.entry.i.i.us.us.4 - %481 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, 1 - %exitcond.not.4 = icmp eq i64 %481, 32 - br i1 %exitcond.not.4, label %pregion_for_end.i.i.us.4.loopexit, label %pregion_for_entry.entry.i.i.us.us.4, !llvm.loop !19 - -pregion_for_end.i.i.us.4.loopexit: ; preds = %if.end.i.i.us.us.4 - br label %pregion_for_end.i.i.us.4 - -pregion_for_end.i.i.us.4: ; preds = %pregion_for_end.i.i.us.4.loopexit, %pregion_for_end.i.i.us.3 - %482 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.5 = or i32 %482, 5 - %cmp.i.i.us.5 = icmp sgt i32 %28, %conv2.i.i.us.5 - %mul.i.i.us.5 = mul nsw i32 %32, %conv2.i.i.us.5 - %mul9.i.i.us.5 = mul nsw i32 %36, %conv2.i.i.us.5 - %483 = sext i32 %mul9.i.i.us.5 to i64 - br i1 %cmp.i.i.us.5, label %pregion_for_entry.entry.i.i.us.us.5.preheader, label %pregion_for_end.i.i.us.5 - -pregion_for_entry.entry.i.i.us.us.5.preheader: ; preds = %pregion_for_end.i.i.us.4 - br label %pregion_for_entry.entry.i.i.us.us.5 - -pregion_for_entry.entry.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5, %pregion_for_entry.entry.i.i.us.us.5.preheader - %_local_id_x.i.0.us.us.5 = phi i64 [ %493, %if.end.i.i.us.us.5 ], [ 0, %pregion_for_entry.entry.i.i.us.us.5.preheader ] - %add1.i.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, %mul.i.i.i - %conv.i.i.us.us.5 = trunc i64 %add1.i.i.i.us.us.5 to i32 - %cmp4.i.i.us.us.5 = icmp sgt i32 %32, %conv.i.i.us.us.5 - br i1 %cmp4.i.i.us.us.5, label %if.then.i.i.us.us.5, label %if.end.i.i.us.us.5 - -if.then.i.i.us.us.5: ; preds = %pregion_for_entry.entry.i.i.us.us.5 - %add.i.i.us.us.5 = add nsw i32 %mul.i.i.us.5, %conv.i.i.us.us.5 - %idxprom.i.i.us.us.5 = sext i32 %add.i.i.us.us.5 to i64 - %arrayidx.i.i.us.us.5 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.5 - %484 = load float, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12 - %mul6.i.i.us.us.5 = fmul float %24, %484 - store float %mul6.i.i.us.us.5, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.5 = shl i64 %add1.i.i.i.us.us.5, 32 - %485 = ashr exact i64 %sext.i.i.us.us.5, 32 - br label %for.body.i.i.us.us.5 - -for.body.i.i.us.us.5: ; preds = %for.body.i.i.us.us.5, %if.then.i.i.us.us.5 - %indvars.iv.next.i.i3.us.us.5 = phi i64 [ %indvars.iv.next.i.i.us.us.5, %for.body.i.i.us.us.5 ], [ 0, %if.then.i.i.us.us.5 ] - %486 = phi float [ %492, %for.body.i.i.us.us.5 ], [ %mul6.i.i.us.us.5, %if.then.i.i.us.us.5 ] - %487 = add nsw i64 %indvars.iv.next.i.i3.us.us.5, %483 - %arrayidx12.i.i.us.us.5 = getelementptr inbounds float, float* %8, i64 %487 - %488 = load float, float* %arrayidx12.i.i.us.us.5, align 4, !tbaa !12 - %mul13.i.i.us.us.5 = fmul float %20, %488 - %489 = mul nsw i64 %indvars.iv.next.i.i3.us.us.5, %37 - %490 = add nsw i64 %489, %485 - %arrayidx17.i.i.us.us.5 = getelementptr inbounds float, float* %12, i64 %490 - %491 = load float, float* %arrayidx17.i.i.us.us.5, align 4, !tbaa !12 - %492 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.5, float %491, float %486) #2 - store float %492, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.5 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.5, 1 - %exitcond.not.i.i.us.us.5 = icmp eq i64 %indvars.iv.next.i.i.us.us.5, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.5, label %if.end.i.i.us.us.5.loopexit, label %for.body.i.i.us.us.5, !llvm.loop !21 - -if.end.i.i.us.us.5.loopexit: ; preds = %for.body.i.i.us.us.5 - br label %if.end.i.i.us.us.5 - -if.end.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.loopexit, %pregion_for_entry.entry.i.i.us.us.5 - %493 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, 1 - %exitcond.not.5 = icmp eq i64 %493, 32 - br i1 %exitcond.not.5, label %pregion_for_end.i.i.us.5.loopexit, label %pregion_for_entry.entry.i.i.us.us.5, !llvm.loop !19 - -pregion_for_end.i.i.us.5.loopexit: ; preds = %if.end.i.i.us.us.5 - br label %pregion_for_end.i.i.us.5 - -pregion_for_end.i.i.us.5: ; preds = %pregion_for_end.i.i.us.5.loopexit, %pregion_for_end.i.i.us.4 - %494 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.6 = or i32 %494, 6 - %cmp.i.i.us.6 = icmp sgt i32 %28, %conv2.i.i.us.6 - %mul.i.i.us.6 = mul nsw i32 %32, %conv2.i.i.us.6 - %mul9.i.i.us.6 = mul nsw i32 %36, %conv2.i.i.us.6 - %495 = sext i32 %mul9.i.i.us.6 to i64 - br i1 %cmp.i.i.us.6, label %pregion_for_entry.entry.i.i.us.us.6.preheader, label %pregion_for_end.i.i.us.6 - -pregion_for_entry.entry.i.i.us.us.6.preheader: ; preds = %pregion_for_end.i.i.us.5 - br label %pregion_for_entry.entry.i.i.us.us.6 - -pregion_for_entry.entry.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6, %pregion_for_entry.entry.i.i.us.us.6.preheader - %_local_id_x.i.0.us.us.6 = phi i64 [ %505, %if.end.i.i.us.us.6 ], [ 0, %pregion_for_entry.entry.i.i.us.us.6.preheader ] - %add1.i.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, %mul.i.i.i - %conv.i.i.us.us.6 = trunc i64 %add1.i.i.i.us.us.6 to i32 - %cmp4.i.i.us.us.6 = icmp sgt i32 %32, %conv.i.i.us.us.6 - br i1 %cmp4.i.i.us.us.6, label %if.then.i.i.us.us.6, label %if.end.i.i.us.us.6 - -if.then.i.i.us.us.6: ; preds = %pregion_for_entry.entry.i.i.us.us.6 - %add.i.i.us.us.6 = add nsw i32 %mul.i.i.us.6, %conv.i.i.us.us.6 - %idxprom.i.i.us.us.6 = sext i32 %add.i.i.us.us.6 to i64 - %arrayidx.i.i.us.us.6 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.6 - %496 = load float, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12 - %mul6.i.i.us.us.6 = fmul float %24, %496 - store float %mul6.i.i.us.us.6, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.6 = shl i64 %add1.i.i.i.us.us.6, 32 - %497 = ashr exact i64 %sext.i.i.us.us.6, 32 - br label %for.body.i.i.us.us.6 - -for.body.i.i.us.us.6: ; preds = %for.body.i.i.us.us.6, %if.then.i.i.us.us.6 - %indvars.iv.next.i.i3.us.us.6 = phi i64 [ %indvars.iv.next.i.i.us.us.6, %for.body.i.i.us.us.6 ], [ 0, %if.then.i.i.us.us.6 ] - %498 = phi float [ %504, %for.body.i.i.us.us.6 ], [ %mul6.i.i.us.us.6, %if.then.i.i.us.us.6 ] - %499 = add nsw i64 %indvars.iv.next.i.i3.us.us.6, %495 - %arrayidx12.i.i.us.us.6 = getelementptr inbounds float, float* %8, i64 %499 - %500 = load float, float* %arrayidx12.i.i.us.us.6, align 4, !tbaa !12 - %mul13.i.i.us.us.6 = fmul float %20, %500 - %501 = mul nsw i64 %indvars.iv.next.i.i3.us.us.6, %37 - %502 = add nsw i64 %501, %497 - %arrayidx17.i.i.us.us.6 = getelementptr inbounds float, float* %12, i64 %502 - %503 = load float, float* %arrayidx17.i.i.us.us.6, align 4, !tbaa !12 - %504 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.6, float %503, float %498) #2 - store float %504, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.6 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.6, 1 - %exitcond.not.i.i.us.us.6 = icmp eq i64 %indvars.iv.next.i.i.us.us.6, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.6, label %if.end.i.i.us.us.6.loopexit, label %for.body.i.i.us.us.6, !llvm.loop !21 - -if.end.i.i.us.us.6.loopexit: ; preds = %for.body.i.i.us.us.6 - br label %if.end.i.i.us.us.6 - -if.end.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.loopexit, %pregion_for_entry.entry.i.i.us.us.6 - %505 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, 1 - %exitcond.not.6 = icmp eq i64 %505, 32 - br i1 %exitcond.not.6, label %pregion_for_end.i.i.us.6.loopexit, label %pregion_for_entry.entry.i.i.us.us.6, !llvm.loop !19 - -pregion_for_end.i.i.us.6.loopexit: ; preds = %if.end.i.i.us.us.6 - br label %pregion_for_end.i.i.us.6 - -pregion_for_end.i.i.us.6: ; preds = %pregion_for_end.i.i.us.6.loopexit, %pregion_for_end.i.i.us.5 - %506 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.7 = or i32 %506, 7 - %cmp.i.i.us.7 = icmp sgt i32 %28, %conv2.i.i.us.7 - %mul.i.i.us.7 = mul nsw i32 %32, %conv2.i.i.us.7 - %mul9.i.i.us.7 = mul nsw i32 %36, %conv2.i.i.us.7 - %507 = sext i32 %mul9.i.i.us.7 to i64 - br i1 %cmp.i.i.us.7, label %pregion_for_entry.entry.i.i.us.us.7.preheader, label %_pocl_kernel_gemm.exit - -pregion_for_entry.entry.i.i.us.us.7.preheader: ; preds = %pregion_for_end.i.i.us.6 - br label %pregion_for_entry.entry.i.i.us.us.7 - -pregion_for_entry.entry.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7, %pregion_for_entry.entry.i.i.us.us.7.preheader - %_local_id_x.i.0.us.us.7 = phi i64 [ %517, %if.end.i.i.us.us.7 ], [ 0, %pregion_for_entry.entry.i.i.us.us.7.preheader ] - %add1.i.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, %mul.i.i.i - %conv.i.i.us.us.7 = trunc i64 %add1.i.i.i.us.us.7 to i32 - %cmp4.i.i.us.us.7 = icmp sgt i32 %32, %conv.i.i.us.us.7 - br i1 %cmp4.i.i.us.us.7, label %if.then.i.i.us.us.7, label %if.end.i.i.us.us.7 - -if.then.i.i.us.us.7: ; preds = %pregion_for_entry.entry.i.i.us.us.7 - %add.i.i.us.us.7 = add nsw i32 %mul.i.i.us.7, %conv.i.i.us.us.7 - %idxprom.i.i.us.us.7 = sext i32 %add.i.i.us.us.7 to i64 - %arrayidx.i.i.us.us.7 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.us.7 - %508 = load float, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12 - %mul6.i.i.us.us.7 = fmul float %24, %508 - store float %mul6.i.i.us.us.7, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.7 = shl i64 %add1.i.i.i.us.us.7, 32 - %509 = ashr exact i64 %sext.i.i.us.us.7, 32 - br label %for.body.i.i.us.us.7 - -for.body.i.i.us.us.7: ; preds = %for.body.i.i.us.us.7, %if.then.i.i.us.us.7 - %indvars.iv.next.i.i3.us.us.7 = phi i64 [ %indvars.iv.next.i.i.us.us.7, %for.body.i.i.us.us.7 ], [ 0, %if.then.i.i.us.us.7 ] - %510 = phi float [ %516, %for.body.i.i.us.us.7 ], [ %mul6.i.i.us.us.7, %if.then.i.i.us.us.7 ] - %511 = add nsw i64 %indvars.iv.next.i.i3.us.us.7, %507 - %arrayidx12.i.i.us.us.7 = getelementptr inbounds float, float* %8, i64 %511 - %512 = load float, float* %arrayidx12.i.i.us.us.7, align 4, !tbaa !12 - %mul13.i.i.us.us.7 = fmul float %20, %512 - %513 = mul nsw i64 %indvars.iv.next.i.i3.us.us.7, %37 - %514 = add nsw i64 %513, %509 - %arrayidx17.i.i.us.us.7 = getelementptr inbounds float, float* %12, i64 %514 - %515 = load float, float* %arrayidx17.i.i.us.us.7, align 4, !tbaa !12 - %516 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.7, float %515, float %510) #2 - store float %516, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.7 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.7, 1 - %exitcond.not.i.i.us.us.7 = icmp eq i64 %indvars.iv.next.i.i.us.us.7, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.7, label %if.end.i.i.us.us.7.loopexit, label %for.body.i.i.us.us.7, !llvm.loop !21 - -if.end.i.i.us.us.7.loopexit: ; preds = %for.body.i.i.us.us.7 - br label %if.end.i.i.us.us.7 - -if.end.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.loopexit, %pregion_for_entry.entry.i.i.us.us.7 - %517 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, 1 - %exitcond.not.7 = icmp eq i64 %517, 32 - br i1 %exitcond.not.7, label %_pocl_kernel_gemm.exit.loopexit, label %pregion_for_entry.entry.i.i.us.us.7, !llvm.loop !19 - -if.then.i.i.us.7.1: ; preds = %if.end.i.i.us.7 - %add.i.i.us.7.1 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7.1 - %idxprom.i.i.us.7.1 = sext i32 %add.i.i.us.7.1 to i64 - %arrayidx.i.i.us.7.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.7.1 - %518 = load float, float* %arrayidx.i.i.us.7.1, align 4, !tbaa !12 - %mul6.i.i.us.7.1 = fmul float %24, %518 - store float %mul6.i.i.us.7.1, float* %arrayidx.i.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7.1 - -if.end.i.i.us.7.1: ; preds = %if.then.i.i.us.7.1, %if.end.i.i.us.7 - %519 = or i64 %_local_id_x.i.0.us.7, 2 - %add1.i.i.i.us.7.2 = add nuw nsw i64 %519, %mul.i.i.i - %conv.i.i.us.7.2 = trunc i64 %add1.i.i.i.us.7.2 to i32 - %cmp4.i.i.us.7.2 = icmp sgt i32 %32, %conv.i.i.us.7.2 - br i1 %cmp4.i.i.us.7.2, label %if.then.i.i.us.7.2, label %if.end.i.i.us.7.2 - -if.then.i.i.us.7.2: ; preds = %if.end.i.i.us.7.1 - %add.i.i.us.7.2 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7.2 - %idxprom.i.i.us.7.2 = sext i32 %add.i.i.us.7.2 to i64 - %arrayidx.i.i.us.7.2 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.7.2 - %520 = load float, float* %arrayidx.i.i.us.7.2, align 4, !tbaa !12 - %mul6.i.i.us.7.2 = fmul float %24, %520 - store float %mul6.i.i.us.7.2, float* %arrayidx.i.i.us.7.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7.2 - -if.end.i.i.us.7.2: ; preds = %if.then.i.i.us.7.2, %if.end.i.i.us.7.1 - %521 = or i64 %_local_id_x.i.0.us.7, 3 - %add1.i.i.i.us.7.3 = add nuw nsw i64 %521, %mul.i.i.i - %conv.i.i.us.7.3 = trunc i64 %add1.i.i.i.us.7.3 to i32 - %cmp4.i.i.us.7.3 = icmp sgt i32 %32, %conv.i.i.us.7.3 - br i1 %cmp4.i.i.us.7.3, label %if.then.i.i.us.7.3, label %if.end.i.i.us.7.3 - -if.then.i.i.us.7.3: ; preds = %if.end.i.i.us.7.2 - %add.i.i.us.7.3 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7.3 - %idxprom.i.i.us.7.3 = sext i32 %add.i.i.us.7.3 to i64 - %arrayidx.i.i.us.7.3 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.7.3 - %522 = load float, float* %arrayidx.i.i.us.7.3, align 4, !tbaa !12 - %mul6.i.i.us.7.3 = fmul float %24, %522 - store float %mul6.i.i.us.7.3, float* %arrayidx.i.i.us.7.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7.3 - -if.end.i.i.us.7.3: ; preds = %if.then.i.i.us.7.3, %if.end.i.i.us.7.2 - %523 = add nuw nsw i64 %_local_id_x.i.0.us.7, 4 - %exitcond34.7.not.3 = icmp eq i64 %523, 32 - br i1 %exitcond34.7.not.3, label %_pocl_kernel_gemm.exit.loopexit238, label %pregion_for_entry.entry.i.i.us.7, !llvm.loop !32 - -if.then.i.i.us.6.1: ; preds = %if.end.i.i.us.6 - %add.i.i.us.6.1 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6.1 - %idxprom.i.i.us.6.1 = sext i32 %add.i.i.us.6.1 to i64 - %arrayidx.i.i.us.6.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.6.1 - %524 = load float, float* %arrayidx.i.i.us.6.1, align 4, !tbaa !12 - %mul6.i.i.us.6.1 = fmul float %24, %524 - store float %mul6.i.i.us.6.1, float* %arrayidx.i.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6.1 - -if.end.i.i.us.6.1: ; preds = %if.then.i.i.us.6.1, %if.end.i.i.us.6 - %525 = or i64 %_local_id_x.i.0.us.6, 2 - %add1.i.i.i.us.6.2 = add nuw nsw i64 %525, %mul.i.i.i - %conv.i.i.us.6.2 = trunc i64 %add1.i.i.i.us.6.2 to i32 - %cmp4.i.i.us.6.2 = icmp sgt i32 %32, %conv.i.i.us.6.2 - br i1 %cmp4.i.i.us.6.2, label %if.then.i.i.us.6.2, label %if.end.i.i.us.6.2 - -if.then.i.i.us.6.2: ; preds = %if.end.i.i.us.6.1 - %add.i.i.us.6.2 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6.2 - %idxprom.i.i.us.6.2 = sext i32 %add.i.i.us.6.2 to i64 - %arrayidx.i.i.us.6.2 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.6.2 - %526 = load float, float* %arrayidx.i.i.us.6.2, align 4, !tbaa !12 - %mul6.i.i.us.6.2 = fmul float %24, %526 - store float %mul6.i.i.us.6.2, float* %arrayidx.i.i.us.6.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6.2 - -if.end.i.i.us.6.2: ; preds = %if.then.i.i.us.6.2, %if.end.i.i.us.6.1 - %527 = or i64 %_local_id_x.i.0.us.6, 3 - %add1.i.i.i.us.6.3 = add nuw nsw i64 %527, %mul.i.i.i - %conv.i.i.us.6.3 = trunc i64 %add1.i.i.i.us.6.3 to i32 - %cmp4.i.i.us.6.3 = icmp sgt i32 %32, %conv.i.i.us.6.3 - br i1 %cmp4.i.i.us.6.3, label %if.then.i.i.us.6.3, label %if.end.i.i.us.6.3 - -if.then.i.i.us.6.3: ; preds = %if.end.i.i.us.6.2 - %add.i.i.us.6.3 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6.3 - %idxprom.i.i.us.6.3 = sext i32 %add.i.i.us.6.3 to i64 - %arrayidx.i.i.us.6.3 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.6.3 - %528 = load float, float* %arrayidx.i.i.us.6.3, align 4, !tbaa !12 - %mul6.i.i.us.6.3 = fmul float %24, %528 - store float %mul6.i.i.us.6.3, float* %arrayidx.i.i.us.6.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6.3 - -if.end.i.i.us.6.3: ; preds = %if.then.i.i.us.6.3, %if.end.i.i.us.6.2 - %529 = add nuw nsw i64 %_local_id_x.i.0.us.6, 4 - %exitcond34.6.not.3 = icmp eq i64 %529, 32 - br i1 %exitcond34.6.not.3, label %pregion_for_end.i.i.6.loopexit, label %pregion_for_entry.entry.i.i.us.6, !llvm.loop !33 - -if.then.i.i.us.5.1: ; preds = %if.end.i.i.us.5 - %add.i.i.us.5.1 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5.1 - %idxprom.i.i.us.5.1 = sext i32 %add.i.i.us.5.1 to i64 - %arrayidx.i.i.us.5.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.5.1 - %530 = load float, float* %arrayidx.i.i.us.5.1, align 4, !tbaa !12 - %mul6.i.i.us.5.1 = fmul float %24, %530 - store float %mul6.i.i.us.5.1, float* %arrayidx.i.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5.1 - -if.end.i.i.us.5.1: ; preds = %if.then.i.i.us.5.1, %if.end.i.i.us.5 - %531 = or i64 %_local_id_x.i.0.us.5, 2 - %add1.i.i.i.us.5.2 = add nuw nsw i64 %531, %mul.i.i.i - %conv.i.i.us.5.2 = trunc i64 %add1.i.i.i.us.5.2 to i32 - %cmp4.i.i.us.5.2 = icmp sgt i32 %32, %conv.i.i.us.5.2 - br i1 %cmp4.i.i.us.5.2, label %if.then.i.i.us.5.2, label %if.end.i.i.us.5.2 - -if.then.i.i.us.5.2: ; preds = %if.end.i.i.us.5.1 - %add.i.i.us.5.2 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5.2 - %idxprom.i.i.us.5.2 = sext i32 %add.i.i.us.5.2 to i64 - %arrayidx.i.i.us.5.2 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.5.2 - %532 = load float, float* %arrayidx.i.i.us.5.2, align 4, !tbaa !12 - %mul6.i.i.us.5.2 = fmul float %24, %532 - store float %mul6.i.i.us.5.2, float* %arrayidx.i.i.us.5.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5.2 - -if.end.i.i.us.5.2: ; preds = %if.then.i.i.us.5.2, %if.end.i.i.us.5.1 - %533 = or i64 %_local_id_x.i.0.us.5, 3 - %add1.i.i.i.us.5.3 = add nuw nsw i64 %533, %mul.i.i.i - %conv.i.i.us.5.3 = trunc i64 %add1.i.i.i.us.5.3 to i32 - %cmp4.i.i.us.5.3 = icmp sgt i32 %32, %conv.i.i.us.5.3 - br i1 %cmp4.i.i.us.5.3, label %if.then.i.i.us.5.3, label %if.end.i.i.us.5.3 - -if.then.i.i.us.5.3: ; preds = %if.end.i.i.us.5.2 - %add.i.i.us.5.3 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5.3 - %idxprom.i.i.us.5.3 = sext i32 %add.i.i.us.5.3 to i64 - %arrayidx.i.i.us.5.3 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.5.3 - %534 = load float, float* %arrayidx.i.i.us.5.3, align 4, !tbaa !12 - %mul6.i.i.us.5.3 = fmul float %24, %534 - store float %mul6.i.i.us.5.3, float* %arrayidx.i.i.us.5.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5.3 - -if.end.i.i.us.5.3: ; preds = %if.then.i.i.us.5.3, %if.end.i.i.us.5.2 - %535 = add nuw nsw i64 %_local_id_x.i.0.us.5, 4 - %exitcond34.5.not.3 = icmp eq i64 %535, 32 - br i1 %exitcond34.5.not.3, label %pregion_for_end.i.i.5.loopexit, label %pregion_for_entry.entry.i.i.us.5, !llvm.loop !34 - -if.then.i.i.us.4.1: ; preds = %if.end.i.i.us.4 - %add.i.i.us.4.1 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4.1 - %idxprom.i.i.us.4.1 = sext i32 %add.i.i.us.4.1 to i64 - %arrayidx.i.i.us.4.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.4.1 - %536 = load float, float* %arrayidx.i.i.us.4.1, align 4, !tbaa !12 - %mul6.i.i.us.4.1 = fmul float %24, %536 - store float %mul6.i.i.us.4.1, float* %arrayidx.i.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4.1 - -if.end.i.i.us.4.1: ; preds = %if.then.i.i.us.4.1, %if.end.i.i.us.4 - %537 = or i64 %_local_id_x.i.0.us.4, 2 - %add1.i.i.i.us.4.2 = add nuw nsw i64 %537, %mul.i.i.i - %conv.i.i.us.4.2 = trunc i64 %add1.i.i.i.us.4.2 to i32 - %cmp4.i.i.us.4.2 = icmp sgt i32 %32, %conv.i.i.us.4.2 - br i1 %cmp4.i.i.us.4.2, label %if.then.i.i.us.4.2, label %if.end.i.i.us.4.2 - -if.then.i.i.us.4.2: ; preds = %if.end.i.i.us.4.1 - %add.i.i.us.4.2 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4.2 - %idxprom.i.i.us.4.2 = sext i32 %add.i.i.us.4.2 to i64 - %arrayidx.i.i.us.4.2 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.4.2 - %538 = load float, float* %arrayidx.i.i.us.4.2, align 4, !tbaa !12 - %mul6.i.i.us.4.2 = fmul float %24, %538 - store float %mul6.i.i.us.4.2, float* %arrayidx.i.i.us.4.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4.2 - -if.end.i.i.us.4.2: ; preds = %if.then.i.i.us.4.2, %if.end.i.i.us.4.1 - %539 = or i64 %_local_id_x.i.0.us.4, 3 - %add1.i.i.i.us.4.3 = add nuw nsw i64 %539, %mul.i.i.i - %conv.i.i.us.4.3 = trunc i64 %add1.i.i.i.us.4.3 to i32 - %cmp4.i.i.us.4.3 = icmp sgt i32 %32, %conv.i.i.us.4.3 - br i1 %cmp4.i.i.us.4.3, label %if.then.i.i.us.4.3, label %if.end.i.i.us.4.3 - -if.then.i.i.us.4.3: ; preds = %if.end.i.i.us.4.2 - %add.i.i.us.4.3 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4.3 - %idxprom.i.i.us.4.3 = sext i32 %add.i.i.us.4.3 to i64 - %arrayidx.i.i.us.4.3 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.4.3 - %540 = load float, float* %arrayidx.i.i.us.4.3, align 4, !tbaa !12 - %mul6.i.i.us.4.3 = fmul float %24, %540 - store float %mul6.i.i.us.4.3, float* %arrayidx.i.i.us.4.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4.3 - -if.end.i.i.us.4.3: ; preds = %if.then.i.i.us.4.3, %if.end.i.i.us.4.2 - %541 = add nuw nsw i64 %_local_id_x.i.0.us.4, 4 - %exitcond34.4.not.3 = icmp eq i64 %541, 32 - br i1 %exitcond34.4.not.3, label %pregion_for_end.i.i.4.loopexit, label %pregion_for_entry.entry.i.i.us.4, !llvm.loop !35 - -if.then.i.i.us.3.1: ; preds = %if.end.i.i.us.3 - %add.i.i.us.3.1 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3.1 - %idxprom.i.i.us.3.1 = sext i32 %add.i.i.us.3.1 to i64 - %arrayidx.i.i.us.3.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.3.1 - %542 = load float, float* %arrayidx.i.i.us.3.1, align 4, !tbaa !12 - %mul6.i.i.us.3.1 = fmul float %24, %542 - store float %mul6.i.i.us.3.1, float* %arrayidx.i.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3.1 - -if.end.i.i.us.3.1: ; preds = %if.then.i.i.us.3.1, %if.end.i.i.us.3 - %543 = or i64 %_local_id_x.i.0.us.3, 2 - %add1.i.i.i.us.3.2 = add nuw nsw i64 %543, %mul.i.i.i - %conv.i.i.us.3.2 = trunc i64 %add1.i.i.i.us.3.2 to i32 - %cmp4.i.i.us.3.2 = icmp sgt i32 %32, %conv.i.i.us.3.2 - br i1 %cmp4.i.i.us.3.2, label %if.then.i.i.us.3.2, label %if.end.i.i.us.3.2 - -if.then.i.i.us.3.2: ; preds = %if.end.i.i.us.3.1 - %add.i.i.us.3.2 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3.2 - %idxprom.i.i.us.3.2 = sext i32 %add.i.i.us.3.2 to i64 - %arrayidx.i.i.us.3.2 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.3.2 - %544 = load float, float* %arrayidx.i.i.us.3.2, align 4, !tbaa !12 - %mul6.i.i.us.3.2 = fmul float %24, %544 - store float %mul6.i.i.us.3.2, float* %arrayidx.i.i.us.3.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3.2 - -if.end.i.i.us.3.2: ; preds = %if.then.i.i.us.3.2, %if.end.i.i.us.3.1 - %545 = or i64 %_local_id_x.i.0.us.3, 3 - %add1.i.i.i.us.3.3 = add nuw nsw i64 %545, %mul.i.i.i - %conv.i.i.us.3.3 = trunc i64 %add1.i.i.i.us.3.3 to i32 - %cmp4.i.i.us.3.3 = icmp sgt i32 %32, %conv.i.i.us.3.3 - br i1 %cmp4.i.i.us.3.3, label %if.then.i.i.us.3.3, label %if.end.i.i.us.3.3 - -if.then.i.i.us.3.3: ; preds = %if.end.i.i.us.3.2 - %add.i.i.us.3.3 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3.3 - %idxprom.i.i.us.3.3 = sext i32 %add.i.i.us.3.3 to i64 - %arrayidx.i.i.us.3.3 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.3.3 - %546 = load float, float* %arrayidx.i.i.us.3.3, align 4, !tbaa !12 - %mul6.i.i.us.3.3 = fmul float %24, %546 - store float %mul6.i.i.us.3.3, float* %arrayidx.i.i.us.3.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3.3 - -if.end.i.i.us.3.3: ; preds = %if.then.i.i.us.3.3, %if.end.i.i.us.3.2 - %547 = add nuw nsw i64 %_local_id_x.i.0.us.3, 4 - %exitcond34.3.not.3 = icmp eq i64 %547, 32 - br i1 %exitcond34.3.not.3, label %pregion_for_end.i.i.3.loopexit, label %pregion_for_entry.entry.i.i.us.3, !llvm.loop !36 - -if.then.i.i.us.2.1: ; preds = %if.end.i.i.us.2 - %add.i.i.us.2.1 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2.1 - %idxprom.i.i.us.2.1 = sext i32 %add.i.i.us.2.1 to i64 - %arrayidx.i.i.us.2.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.2.1 - %548 = load float, float* %arrayidx.i.i.us.2.1, align 4, !tbaa !12 - %mul6.i.i.us.2.1 = fmul float %24, %548 - store float %mul6.i.i.us.2.1, float* %arrayidx.i.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2.1 - -if.end.i.i.us.2.1: ; preds = %if.then.i.i.us.2.1, %if.end.i.i.us.2 - %549 = or i64 %_local_id_x.i.0.us.2, 2 - %add1.i.i.i.us.2.2 = add nuw nsw i64 %549, %mul.i.i.i - %conv.i.i.us.2.2 = trunc i64 %add1.i.i.i.us.2.2 to i32 - %cmp4.i.i.us.2.2 = icmp sgt i32 %32, %conv.i.i.us.2.2 - br i1 %cmp4.i.i.us.2.2, label %if.then.i.i.us.2.2, label %if.end.i.i.us.2.2 - -if.then.i.i.us.2.2: ; preds = %if.end.i.i.us.2.1 - %add.i.i.us.2.2 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2.2 - %idxprom.i.i.us.2.2 = sext i32 %add.i.i.us.2.2 to i64 - %arrayidx.i.i.us.2.2 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.2.2 - %550 = load float, float* %arrayidx.i.i.us.2.2, align 4, !tbaa !12 - %mul6.i.i.us.2.2 = fmul float %24, %550 - store float %mul6.i.i.us.2.2, float* %arrayidx.i.i.us.2.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2.2 - -if.end.i.i.us.2.2: ; preds = %if.then.i.i.us.2.2, %if.end.i.i.us.2.1 - %551 = or i64 %_local_id_x.i.0.us.2, 3 - %add1.i.i.i.us.2.3 = add nuw nsw i64 %551, %mul.i.i.i - %conv.i.i.us.2.3 = trunc i64 %add1.i.i.i.us.2.3 to i32 - %cmp4.i.i.us.2.3 = icmp sgt i32 %32, %conv.i.i.us.2.3 - br i1 %cmp4.i.i.us.2.3, label %if.then.i.i.us.2.3, label %if.end.i.i.us.2.3 - -if.then.i.i.us.2.3: ; preds = %if.end.i.i.us.2.2 - %add.i.i.us.2.3 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2.3 - %idxprom.i.i.us.2.3 = sext i32 %add.i.i.us.2.3 to i64 - %arrayidx.i.i.us.2.3 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.2.3 - %552 = load float, float* %arrayidx.i.i.us.2.3, align 4, !tbaa !12 - %mul6.i.i.us.2.3 = fmul float %24, %552 - store float %mul6.i.i.us.2.3, float* %arrayidx.i.i.us.2.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2.3 - -if.end.i.i.us.2.3: ; preds = %if.then.i.i.us.2.3, %if.end.i.i.us.2.2 - %553 = add nuw nsw i64 %_local_id_x.i.0.us.2, 4 - %exitcond34.2.not.3 = icmp eq i64 %553, 32 - br i1 %exitcond34.2.not.3, label %pregion_for_end.i.i.2.loopexit, label %pregion_for_entry.entry.i.i.us.2, !llvm.loop !37 - -if.then.i.i.us.1.1: ; preds = %if.end.i.i.us.1 - %add.i.i.us.1.1 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1.1 - %idxprom.i.i.us.1.1 = sext i32 %add.i.i.us.1.1 to i64 - %arrayidx.i.i.us.1.1 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.1.1 - %554 = load float, float* %arrayidx.i.i.us.1.1, align 4, !tbaa !12 - %mul6.i.i.us.1.1 = fmul float %24, %554 - store float %mul6.i.i.us.1.1, float* %arrayidx.i.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1.1 - -if.end.i.i.us.1.1: ; preds = %if.then.i.i.us.1.1, %if.end.i.i.us.1 - %555 = or i64 %_local_id_x.i.0.us.1, 2 - %add1.i.i.i.us.1.2 = add nuw nsw i64 %555, %mul.i.i.i - %conv.i.i.us.1.2 = trunc i64 %add1.i.i.i.us.1.2 to i32 - %cmp4.i.i.us.1.2 = icmp sgt i32 %32, %conv.i.i.us.1.2 - br i1 %cmp4.i.i.us.1.2, label %if.then.i.i.us.1.2, label %if.end.i.i.us.1.2 - -if.then.i.i.us.1.2: ; preds = %if.end.i.i.us.1.1 - %add.i.i.us.1.2 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1.2 - %idxprom.i.i.us.1.2 = sext i32 %add.i.i.us.1.2 to i64 - %arrayidx.i.i.us.1.2 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.1.2 - %556 = load float, float* %arrayidx.i.i.us.1.2, align 4, !tbaa !12 - %mul6.i.i.us.1.2 = fmul float %24, %556 - store float %mul6.i.i.us.1.2, float* %arrayidx.i.i.us.1.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1.2 - -if.end.i.i.us.1.2: ; preds = %if.then.i.i.us.1.2, %if.end.i.i.us.1.1 - %557 = or i64 %_local_id_x.i.0.us.1, 3 - %add1.i.i.i.us.1.3 = add nuw nsw i64 %557, %mul.i.i.i - %conv.i.i.us.1.3 = trunc i64 %add1.i.i.i.us.1.3 to i32 - %cmp4.i.i.us.1.3 = icmp sgt i32 %32, %conv.i.i.us.1.3 - br i1 %cmp4.i.i.us.1.3, label %if.then.i.i.us.1.3, label %if.end.i.i.us.1.3 - -if.then.i.i.us.1.3: ; preds = %if.end.i.i.us.1.2 - %add.i.i.us.1.3 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1.3 - %idxprom.i.i.us.1.3 = sext i32 %add.i.i.us.1.3 to i64 - %arrayidx.i.i.us.1.3 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.1.3 - %558 = load float, float* %arrayidx.i.i.us.1.3, align 4, !tbaa !12 - %mul6.i.i.us.1.3 = fmul float %24, %558 - store float %mul6.i.i.us.1.3, float* %arrayidx.i.i.us.1.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1.3 - -if.end.i.i.us.1.3: ; preds = %if.then.i.i.us.1.3, %if.end.i.i.us.1.2 - %559 = add nuw nsw i64 %_local_id_x.i.0.us.1, 4 - %exitcond34.1.not.3 = icmp eq i64 %559, 32 - br i1 %exitcond34.1.not.3, label %pregion_for_end.i.i.1.loopexit, label %pregion_for_entry.entry.i.i.us.1, !llvm.loop !38 - -if.then.i.i.us.1214: ; preds = %if.end.i.i.us - %add.i.i.us.1210 = add nsw i32 %mul.i.i.us, %conv.i.i.us.1207 - %idxprom.i.i.us.1211 = sext i32 %add.i.i.us.1210 to i64 - %arrayidx.i.i.us.1212 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.1211 - %560 = load float, float* %arrayidx.i.i.us.1212, align 4, !tbaa !12 - %mul6.i.i.us.1213 = fmul float %24, %560 - store float %mul6.i.i.us.1213, float* %arrayidx.i.i.us.1212, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1215 - -if.end.i.i.us.1215: ; preds = %if.then.i.i.us.1214, %if.end.i.i.us - %561 = or i64 %_local_id_x.i.0.us, 2 - %add1.i.i.i.us.2217 = add nuw nsw i64 %561, %mul.i.i.i - %conv.i.i.us.2218 = trunc i64 %add1.i.i.i.us.2217 to i32 - %cmp4.i.i.us.2219 = icmp sgt i32 %32, %conv.i.i.us.2218 - br i1 %cmp4.i.i.us.2219, label %if.then.i.i.us.2225, label %if.end.i.i.us.2226 - -if.then.i.i.us.2225: ; preds = %if.end.i.i.us.1215 - %add.i.i.us.2221 = add nsw i32 %mul.i.i.us, %conv.i.i.us.2218 - %idxprom.i.i.us.2222 = sext i32 %add.i.i.us.2221 to i64 - %arrayidx.i.i.us.2223 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.2222 - %562 = load float, float* %arrayidx.i.i.us.2223, align 4, !tbaa !12 - %mul6.i.i.us.2224 = fmul float %24, %562 - store float %mul6.i.i.us.2224, float* %arrayidx.i.i.us.2223, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2226 - -if.end.i.i.us.2226: ; preds = %if.then.i.i.us.2225, %if.end.i.i.us.1215 - %563 = or i64 %_local_id_x.i.0.us, 3 - %add1.i.i.i.us.3228 = add nuw nsw i64 %563, %mul.i.i.i - %conv.i.i.us.3229 = trunc i64 %add1.i.i.i.us.3228 to i32 - %cmp4.i.i.us.3230 = icmp sgt i32 %32, %conv.i.i.us.3229 - br i1 %cmp4.i.i.us.3230, label %if.then.i.i.us.3236, label %if.end.i.i.us.3237 - -if.then.i.i.us.3236: ; preds = %if.end.i.i.us.2226 - %add.i.i.us.3232 = add nsw i32 %mul.i.i.us, %conv.i.i.us.3229 - %idxprom.i.i.us.3233 = sext i32 %add.i.i.us.3232 to i64 - %arrayidx.i.i.us.3234 = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us.3233 - %564 = load float, float* %arrayidx.i.i.us.3234, align 4, !tbaa !12 - %mul6.i.i.us.3235 = fmul float %24, %564 - store float %mul6.i.i.us.3235, float* %arrayidx.i.i.us.3234, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3237 - -if.end.i.i.us.3237: ; preds = %if.then.i.i.us.3236, %if.end.i.i.us.2226 - %565 = add nuw nsw i64 %_local_id_x.i.0.us, 4 - %exitcond34.not.3 = icmp eq i64 %565, 32 - br i1 %exitcond34.not.3, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !39 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_gemm_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to float** - %13 = load float*, float** %12, align 8 - %14 = getelementptr i8*, i8** %0, i64 3 - %15 = bitcast i8** %14 to float** - %16 = load float*, float** %15, align 8 - %17 = load float, float* %16, align 4 - %18 = getelementptr i8*, i8** %0, i64 4 - %19 = bitcast i8** %18 to float** - %20 = load float*, float** %19, align 8 - %21 = load float, float* %20, align 4 - %22 = getelementptr i8*, i8** %0, i64 5 - %23 = bitcast i8** %22 to i32** - %24 = load i32*, i32** %23, align 8 - %25 = load i32, i32* %24, align 4 - %26 = getelementptr i8*, i8** %0, i64 6 - %27 = bitcast i8** %26 to i32** - %28 = load i32*, i32** %27, align 8 - %29 = load i32, i32* %28, align 4 - %30 = getelementptr i8*, i8** %0, i64 7 - %31 = bitcast i8** %30 to i32** - %32 = load i32*, i32** %31, align 8 - %33 = load i32, i32* %32, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %cmp740.i.i = icmp sgt i32 %33, 0 - %34 = sext i32 %29 to i64 - %wide.trip.count.i.i = zext i32 %33 to i64 - %conv2.i.i.us = trunc i64 %mul3.i.i.i to i32 - %cmp.i.i.us = icmp sgt i32 %25, %conv2.i.i.us - %mul.i.i.us = mul nsw i32 %29, %conv2.i.i.us - br i1 %cmp740.i.i, label %pregion_for_entry.pregion_for_init.i.i.us, label %pregion_for_entry.pregion_for_init.i.i.preheader - -pregion_for_entry.pregion_for_init.i.i.preheader: ; preds = %5 - br i1 %cmp.i.i.us, label %vector.scevcheck, label %pregion_for_end.i.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.i.preheader - %35 = trunc i64 %3 to i32 - %36 = mul i32 %29, %35 - %37 = shl i32 %36, 3 - %38 = trunc i64 %2 to i32 - %39 = shl i32 %38, 5 - %40 = add i32 %37, %39 - %41 = icmp sgt i32 %40, 2147483616 - br i1 %41, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.ph - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %vector.scevcheck - br label %pregion_for_entry.entry.i.i.us - -vector.ph: ; preds = %vector.scevcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert38 = insertelement <8 x i32> undef, i32 %29, i32 0 - %broadcast.splat39 = shufflevector <8 x i32> %broadcast.splatinsert38, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert40 = insertelement <8 x float> undef, float %21, i32 0 - %broadcast.splat41 = shufflevector <8 x float> %broadcast.splatinsert40, <8 x float> undef, <8 x i32> zeroinitializer - %42 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %43 = or <8 x i32> %42, - %44 = icmp sgt <8 x i32> %broadcast.splat39, %43 - %45 = extractelement <8 x i32> %43, i32 0 - %46 = add nsw i32 %mul.i.i.us, %45 - %47 = sext i32 %46 to i64 - %48 = getelementptr inbounds float, float* %13, i64 %47 - %49 = bitcast float* %48 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %49, i32 4, <8 x i1> %44, <8 x float> undef), !tbaa !12 - %50 = fmul <8 x float> %broadcast.splat41, %wide.masked.load - %51 = bitcast float* %48 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %50, <8 x float>* %51, i32 4, <8 x i1> %44), !tbaa !12, !llvm.access.group !16 - %52 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %53 = or <8 x i32> %52, - %54 = icmp sgt <8 x i32> %broadcast.splat39, %53 - %55 = extractelement <8 x i32> %53, i32 0 - %56 = add nsw i32 %mul.i.i.us, %55 - %57 = sext i32 %56 to i64 - %58 = getelementptr inbounds float, float* %13, i64 %57 - %59 = bitcast float* %58 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %59, i32 4, <8 x i1> %54, <8 x float> undef), !tbaa !12 - %60 = fmul <8 x float> %broadcast.splat41, %wide.masked.load.1 - %61 = bitcast float* %58 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %60, <8 x float>* %61, i32 4, <8 x i1> %54), !tbaa !12, !llvm.access.group !16 - %62 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %63 = or <8 x i32> %62, - %64 = icmp sgt <8 x i32> %broadcast.splat39, %63 - %65 = extractelement <8 x i32> %63, i32 0 - %66 = add nsw i32 %mul.i.i.us, %65 - %67 = sext i32 %66 to i64 - %68 = getelementptr inbounds float, float* %13, i64 %67 - %69 = bitcast float* %68 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %69, i32 4, <8 x i1> %64, <8 x float> undef), !tbaa !12 - %70 = fmul <8 x float> %broadcast.splat41, %wide.masked.load.2 - %71 = bitcast float* %68 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %70, <8 x float>* %71, i32 4, <8 x i1> %64), !tbaa !12, !llvm.access.group !16 - %72 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %73 = or <8 x i32> %72, - %74 = icmp sgt <8 x i32> %broadcast.splat39, %73 - %75 = extractelement <8 x i32> %73, i32 0 - %76 = add nsw i32 %mul.i.i.us, %75 - %77 = sext i32 %76 to i64 - %78 = getelementptr inbounds float, float* %13, i64 %77 - %79 = bitcast float* %78 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %79, i32 4, <8 x i1> %74, <8 x float> undef), !tbaa !12 - %80 = fmul <8 x float> %broadcast.splat41, %wide.masked.load.3 - %81 = bitcast float* %78 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %80, <8 x float>* %81, i32 4, <8 x i1> %74), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i - -pregion_for_entry.pregion_for_init.i.i.us: ; preds = %5 - %mul9.i.i.us = mul nsw i32 %33, %conv2.i.i.us - %82 = sext i32 %mul9.i.i.us to i64 - br i1 %cmp.i.i.us, label %pregion_for_entry.entry.i.i.us.us.preheader, label %pregion_for_end.i.i.us - -pregion_for_entry.entry.i.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i.us - br label %pregion_for_entry.entry.i.i.us.us - -pregion_for_end.i.i.us.loopexit: ; preds = %if.end.i.i.us.us - br label %pregion_for_end.i.i.us - -pregion_for_end.i.i.us: ; preds = %pregion_for_end.i.i.us.loopexit, %pregion_for_entry.pregion_for_init.i.i.us - %83 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.1 = or i32 %83, 1 - %cmp.i.i.us.1 = icmp sgt i32 %25, %conv2.i.i.us.1 - %mul.i.i.us.1 = mul nsw i32 %29, %conv2.i.i.us.1 - %mul9.i.i.us.1 = mul nsw i32 %33, %conv2.i.i.us.1 - %84 = sext i32 %mul9.i.i.us.1 to i64 - br i1 %cmp.i.i.us.1, label %pregion_for_entry.entry.i.i.us.us.1.preheader, label %pregion_for_end.i.i.us.1 - -pregion_for_entry.entry.i.i.us.us.1.preheader: ; preds = %pregion_for_end.i.i.us - br label %pregion_for_entry.entry.i.i.us.us.1 - -pregion_for_entry.entry.i.i.us.us: ; preds = %if.end.i.i.us.us, %pregion_for_entry.entry.i.i.us.us.preheader - %_local_id_x.i.0.us.us = phi i64 [ %87, %if.end.i.i.us.us ], [ 0, %pregion_for_entry.entry.i.i.us.us.preheader ] - %add1.i.i.i.us.us = add nuw nsw i64 %_local_id_x.i.0.us.us, %mul.i.i.i - %conv.i.i.us.us = trunc i64 %add1.i.i.i.us.us to i32 - %cmp4.i.i.us.us = icmp sgt i32 %29, %conv.i.i.us.us - br i1 %cmp4.i.i.us.us, label %if.then.i.i.us.us, label %if.end.i.i.us.us - -if.then.i.i.us.us: ; preds = %pregion_for_entry.entry.i.i.us.us - %add.i.i.us.us = add nsw i32 %mul.i.i.us, %conv.i.i.us.us - %idxprom.i.i.us.us = sext i32 %add.i.i.us.us to i64 - %arrayidx.i.i.us.us = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us - %85 = load float, float* %arrayidx.i.i.us.us, align 4, !tbaa !12 - %mul6.i.i.us.us = fmul float %21, %85 - store float %mul6.i.i.us.us, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us = shl i64 %add1.i.i.i.us.us, 32 - %86 = ashr exact i64 %sext.i.i.us.us, 32 - br label %for.body.i.i.us.us - -if.end.i.i.us.us.loopexit: ; preds = %for.body.i.i.us.us - br label %if.end.i.i.us.us - -if.end.i.i.us.us: ; preds = %if.end.i.i.us.us.loopexit, %pregion_for_entry.entry.i.i.us.us - %87 = add nuw nsw i64 %_local_id_x.i.0.us.us, 1 - %exitcond.not = icmp eq i64 %87, 32 - br i1 %exitcond.not, label %pregion_for_end.i.i.us.loopexit, label %pregion_for_entry.entry.i.i.us.us, !llvm.loop !19 - -for.body.i.i.us.us: ; preds = %for.body.i.i.us.us, %if.then.i.i.us.us - %indvars.iv.next.i.i3.us.us = phi i64 [ %indvars.iv.next.i.i.us.us, %for.body.i.i.us.us ], [ 0, %if.then.i.i.us.us ] - %88 = phi float [ %94, %for.body.i.i.us.us ], [ %mul6.i.i.us.us, %if.then.i.i.us.us ] - %89 = add nsw i64 %indvars.iv.next.i.i3.us.us, %82 - %arrayidx12.i.i.us.us = getelementptr inbounds float, float* %7, i64 %89 - %90 = load float, float* %arrayidx12.i.i.us.us, align 4, !tbaa !12 - %mul13.i.i.us.us = fmul float %17, %90 - %91 = mul nsw i64 %indvars.iv.next.i.i3.us.us, %34 - %92 = add nsw i64 %91, %86 - %arrayidx17.i.i.us.us = getelementptr inbounds float, float* %10, i64 %92 - %93 = load float, float* %arrayidx17.i.i.us.us, align 4, !tbaa !12 - %94 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us, float %93, float %88) #2 - store float %94, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us = add nuw nsw i64 %indvars.iv.next.i.i3.us.us, 1 - %exitcond.not.i.i.us.us = icmp eq i64 %indvars.iv.next.i.i.us.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us, label %if.end.i.i.us.us.loopexit, label %for.body.i.i.us.us, !llvm.loop !21 - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.i.i.us.3237, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %562, %if.end.i.i.us.3237 ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp4.i.i.us = icmp sgt i32 %29, %conv.i.i.us - br i1 %cmp4.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %add.i.i.us = add nsw i32 %mul.i.i.us, %conv.i.i.us - %idxprom.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us - %95 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %mul6.i.i.us = fmul float %21, %95 - store float %mul6.i.i.us, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us - %96 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1206 = add nuw nsw i64 %96, %mul.i.i.i - %conv.i.i.us.1207 = trunc i64 %add1.i.i.i.us.1206 to i32 - %cmp4.i.i.us.1208 = icmp sgt i32 %29, %conv.i.i.us.1207 - br i1 %cmp4.i.i.us.1208, label %if.then.i.i.us.1214, label %if.end.i.i.us.1215 - -pregion_for_end.i.i.loopexit: ; preds = %if.end.i.i.us.3237 - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i.i.preheader - %97 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.1 = or i32 %97, 1 - %cmp.i.i.1 = icmp sgt i32 %25, %conv2.i.i.1 - %mul.i.i.1 = mul nsw i32 %29, %conv2.i.i.1 - br i1 %cmp.i.i.1, label %vector.scevcheck49, label %pregion_for_end.i.i.1 - -vector.scevcheck49: ; preds = %pregion_for_end.i.i - %98 = mul i32 %29, %conv2.i.i.1 - %99 = trunc i64 %2 to i32 - %100 = shl i32 %99, 5 - %101 = add i32 %98, %100 - %102 = icmp sgt i32 %101, 2147483616 - br i1 %102, label %pregion_for_entry.entry.i.i.us.1.preheader, label %vector.ph50 - -pregion_for_entry.entry.i.i.us.1.preheader: ; preds = %vector.scevcheck49 - br label %pregion_for_entry.entry.i.i.us.1 - -vector.ph50: ; preds = %vector.scevcheck49 - %broadcast.splatinsert57 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat58 = shufflevector <8 x i64> %broadcast.splatinsert57, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert59 = insertelement <8 x i32> undef, i32 %29, i32 0 - %broadcast.splat60 = shufflevector <8 x i32> %broadcast.splatinsert59, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert62 = insertelement <8 x float> undef, float %21, i32 0 - %broadcast.splat63 = shufflevector <8 x float> %broadcast.splatinsert62, <8 x float> undef, <8 x i32> zeroinitializer - %103 = trunc <8 x i64> %broadcast.splat58 to <8 x i32> - %104 = or <8 x i32> %103, - %105 = icmp sgt <8 x i32> %broadcast.splat60, %104 - %106 = extractelement <8 x i32> %104, i32 0 - %107 = add nsw i32 %mul.i.i.1, %106 - %108 = sext i32 %107 to i64 - %109 = getelementptr inbounds float, float* %13, i64 %108 - %110 = bitcast float* %109 to <8 x float>* - %wide.masked.load61 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %110, i32 4, <8 x i1> %105, <8 x float> undef), !tbaa !12 - %111 = fmul <8 x float> %broadcast.splat63, %wide.masked.load61 - %112 = bitcast float* %109 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %111, <8 x float>* %112, i32 4, <8 x i1> %105), !tbaa !12, !llvm.access.group !16 - %113 = trunc <8 x i64> %broadcast.splat58 to <8 x i32> - %114 = or <8 x i32> %113, - %115 = icmp sgt <8 x i32> %broadcast.splat60, %114 - %116 = extractelement <8 x i32> %114, i32 0 - %117 = add nsw i32 %mul.i.i.1, %116 - %118 = sext i32 %117 to i64 - %119 = getelementptr inbounds float, float* %13, i64 %118 - %120 = bitcast float* %119 to <8 x float>* - %wide.masked.load61.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %120, i32 4, <8 x i1> %115, <8 x float> undef), !tbaa !12 - %121 = fmul <8 x float> %broadcast.splat63, %wide.masked.load61.1 - %122 = bitcast float* %119 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %121, <8 x float>* %122, i32 4, <8 x i1> %115), !tbaa !12, !llvm.access.group !16 - %123 = trunc <8 x i64> %broadcast.splat58 to <8 x i32> - %124 = or <8 x i32> %123, - %125 = icmp sgt <8 x i32> %broadcast.splat60, %124 - %126 = extractelement <8 x i32> %124, i32 0 - %127 = add nsw i32 %mul.i.i.1, %126 - %128 = sext i32 %127 to i64 - %129 = getelementptr inbounds float, float* %13, i64 %128 - %130 = bitcast float* %129 to <8 x float>* - %wide.masked.load61.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %130, i32 4, <8 x i1> %125, <8 x float> undef), !tbaa !12 - %131 = fmul <8 x float> %broadcast.splat63, %wide.masked.load61.2 - %132 = bitcast float* %129 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %131, <8 x float>* %132, i32 4, <8 x i1> %125), !tbaa !12, !llvm.access.group !16 - %133 = trunc <8 x i64> %broadcast.splat58 to <8 x i32> - %134 = or <8 x i32> %133, - %135 = icmp sgt <8 x i32> %broadcast.splat60, %134 - %136 = extractelement <8 x i32> %134, i32 0 - %137 = add nsw i32 %mul.i.i.1, %136 - %138 = sext i32 %137 to i64 - %139 = getelementptr inbounds float, float* %13, i64 %138 - %140 = bitcast float* %139 to <8 x float>* - %wide.masked.load61.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %140, i32 4, <8 x i1> %135, <8 x float> undef), !tbaa !12 - %141 = fmul <8 x float> %broadcast.splat63, %wide.masked.load61.3 - %142 = bitcast float* %139 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %141, <8 x float>* %142, i32 4, <8 x i1> %135), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.1 - -_pocl_kernel_gemm.exit.loopexit: ; preds = %if.end.i.i.us.us.7 - br label %_pocl_kernel_gemm.exit - -_pocl_kernel_gemm.exit.loopexit238: ; preds = %if.end.i.i.us.7.3 - br label %_pocl_kernel_gemm.exit - -_pocl_kernel_gemm.exit: ; preds = %pregion_for_end.i.i.us.6, %vector.ph182, %pregion_for_end.i.i.6, %_pocl_kernel_gemm.exit.loopexit238, %_pocl_kernel_gemm.exit.loopexit - ret void - -pregion_for_entry.entry.i.i.us.1: ; preds = %if.end.i.i.us.1.3, %pregion_for_entry.entry.i.i.us.1.preheader - %_local_id_x.i.0.us.1 = phi i64 [ %556, %if.end.i.i.us.1.3 ], [ 0, %pregion_for_entry.entry.i.i.us.1.preheader ] - %add1.i.i.i.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.1, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp4.i.i.us.1 = icmp sgt i32 %29, %conv.i.i.us.1 - br i1 %cmp4.i.i.us.1, label %if.then.i.i.us.1, label %if.end.i.i.us.1 - -if.then.i.i.us.1: ; preds = %pregion_for_entry.entry.i.i.us.1 - %add.i.i.us.1 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1 - %idxprom.i.i.us.1 = sext i32 %add.i.i.us.1 to i64 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.1 - %143 = load float, float* %arrayidx.i.i.us.1, align 4, !tbaa !12 - %mul6.i.i.us.1 = fmul float %21, %143 - store float %mul6.i.i.us.1, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1 - -if.end.i.i.us.1: ; preds = %if.then.i.i.us.1, %pregion_for_entry.entry.i.i.us.1 - %144 = or i64 %_local_id_x.i.0.us.1, 1 - %add1.i.i.i.us.1.1 = add nuw nsw i64 %144, %mul.i.i.i - %conv.i.i.us.1.1 = trunc i64 %add1.i.i.i.us.1.1 to i32 - %cmp4.i.i.us.1.1 = icmp sgt i32 %29, %conv.i.i.us.1.1 - br i1 %cmp4.i.i.us.1.1, label %if.then.i.i.us.1.1, label %if.end.i.i.us.1.1 - -pregion_for_end.i.i.1.loopexit: ; preds = %if.end.i.i.us.1.3 - br label %pregion_for_end.i.i.1 - -pregion_for_end.i.i.1: ; preds = %pregion_for_end.i.i.1.loopexit, %vector.ph50, %pregion_for_end.i.i - %145 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.2 = or i32 %145, 2 - %cmp.i.i.2 = icmp sgt i32 %25, %conv2.i.i.2 - %mul.i.i.2 = mul nsw i32 %29, %conv2.i.i.2 - br i1 %cmp.i.i.2, label %vector.scevcheck71, label %pregion_for_end.i.i.2 - -vector.scevcheck71: ; preds = %pregion_for_end.i.i.1 - %146 = mul i32 %29, %conv2.i.i.2 - %147 = trunc i64 %2 to i32 - %148 = shl i32 %147, 5 - %149 = add i32 %146, %148 - %150 = icmp sgt i32 %149, 2147483616 - br i1 %150, label %pregion_for_entry.entry.i.i.us.2.preheader, label %vector.ph72 - -pregion_for_entry.entry.i.i.us.2.preheader: ; preds = %vector.scevcheck71 - br label %pregion_for_entry.entry.i.i.us.2 - -vector.ph72: ; preds = %vector.scevcheck71 - %broadcast.splatinsert79 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat80 = shufflevector <8 x i64> %broadcast.splatinsert79, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert81 = insertelement <8 x i32> undef, i32 %29, i32 0 - %broadcast.splat82 = shufflevector <8 x i32> %broadcast.splatinsert81, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert84 = insertelement <8 x float> undef, float %21, i32 0 - %broadcast.splat85 = shufflevector <8 x float> %broadcast.splatinsert84, <8 x float> undef, <8 x i32> zeroinitializer - %151 = trunc <8 x i64> %broadcast.splat80 to <8 x i32> - %152 = or <8 x i32> %151, - %153 = icmp sgt <8 x i32> %broadcast.splat82, %152 - %154 = extractelement <8 x i32> %152, i32 0 - %155 = add nsw i32 %mul.i.i.2, %154 - %156 = sext i32 %155 to i64 - %157 = getelementptr inbounds float, float* %13, i64 %156 - %158 = bitcast float* %157 to <8 x float>* - %wide.masked.load83 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %158, i32 4, <8 x i1> %153, <8 x float> undef), !tbaa !12 - %159 = fmul <8 x float> %broadcast.splat85, %wide.masked.load83 - %160 = bitcast float* %157 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %159, <8 x float>* %160, i32 4, <8 x i1> %153), !tbaa !12, !llvm.access.group !16 - %161 = trunc <8 x i64> %broadcast.splat80 to <8 x i32> - %162 = or <8 x i32> %161, - %163 = icmp sgt <8 x i32> %broadcast.splat82, %162 - %164 = extractelement <8 x i32> %162, i32 0 - %165 = add nsw i32 %mul.i.i.2, %164 - %166 = sext i32 %165 to i64 - %167 = getelementptr inbounds float, float* %13, i64 %166 - %168 = bitcast float* %167 to <8 x float>* - %wide.masked.load83.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %168, i32 4, <8 x i1> %163, <8 x float> undef), !tbaa !12 - %169 = fmul <8 x float> %broadcast.splat85, %wide.masked.load83.1 - %170 = bitcast float* %167 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %169, <8 x float>* %170, i32 4, <8 x i1> %163), !tbaa !12, !llvm.access.group !16 - %171 = trunc <8 x i64> %broadcast.splat80 to <8 x i32> - %172 = or <8 x i32> %171, - %173 = icmp sgt <8 x i32> %broadcast.splat82, %172 - %174 = extractelement <8 x i32> %172, i32 0 - %175 = add nsw i32 %mul.i.i.2, %174 - %176 = sext i32 %175 to i64 - %177 = getelementptr inbounds float, float* %13, i64 %176 - %178 = bitcast float* %177 to <8 x float>* - %wide.masked.load83.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %178, i32 4, <8 x i1> %173, <8 x float> undef), !tbaa !12 - %179 = fmul <8 x float> %broadcast.splat85, %wide.masked.load83.2 - %180 = bitcast float* %177 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %179, <8 x float>* %180, i32 4, <8 x i1> %173), !tbaa !12, !llvm.access.group !16 - %181 = trunc <8 x i64> %broadcast.splat80 to <8 x i32> - %182 = or <8 x i32> %181, - %183 = icmp sgt <8 x i32> %broadcast.splat82, %182 - %184 = extractelement <8 x i32> %182, i32 0 - %185 = add nsw i32 %mul.i.i.2, %184 - %186 = sext i32 %185 to i64 - %187 = getelementptr inbounds float, float* %13, i64 %186 - %188 = bitcast float* %187 to <8 x float>* - %wide.masked.load83.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %188, i32 4, <8 x i1> %183, <8 x float> undef), !tbaa !12 - %189 = fmul <8 x float> %broadcast.splat85, %wide.masked.load83.3 - %190 = bitcast float* %187 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %189, <8 x float>* %190, i32 4, <8 x i1> %183), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.2 - -pregion_for_entry.entry.i.i.us.2: ; preds = %if.end.i.i.us.2.3, %pregion_for_entry.entry.i.i.us.2.preheader - %_local_id_x.i.0.us.2 = phi i64 [ %550, %if.end.i.i.us.2.3 ], [ 0, %pregion_for_entry.entry.i.i.us.2.preheader ] - %add1.i.i.i.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.2, %mul.i.i.i - %conv.i.i.us.2 = trunc i64 %add1.i.i.i.us.2 to i32 - %cmp4.i.i.us.2 = icmp sgt i32 %29, %conv.i.i.us.2 - br i1 %cmp4.i.i.us.2, label %if.then.i.i.us.2, label %if.end.i.i.us.2 - -if.then.i.i.us.2: ; preds = %pregion_for_entry.entry.i.i.us.2 - %add.i.i.us.2 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2 - %idxprom.i.i.us.2 = sext i32 %add.i.i.us.2 to i64 - %arrayidx.i.i.us.2 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.2 - %191 = load float, float* %arrayidx.i.i.us.2, align 4, !tbaa !12 - %mul6.i.i.us.2 = fmul float %21, %191 - store float %mul6.i.i.us.2, float* %arrayidx.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2 - -if.end.i.i.us.2: ; preds = %if.then.i.i.us.2, %pregion_for_entry.entry.i.i.us.2 - %192 = or i64 %_local_id_x.i.0.us.2, 1 - %add1.i.i.i.us.2.1 = add nuw nsw i64 %192, %mul.i.i.i - %conv.i.i.us.2.1 = trunc i64 %add1.i.i.i.us.2.1 to i32 - %cmp4.i.i.us.2.1 = icmp sgt i32 %29, %conv.i.i.us.2.1 - br i1 %cmp4.i.i.us.2.1, label %if.then.i.i.us.2.1, label %if.end.i.i.us.2.1 - -pregion_for_end.i.i.2.loopexit: ; preds = %if.end.i.i.us.2.3 - br label %pregion_for_end.i.i.2 - -pregion_for_end.i.i.2: ; preds = %pregion_for_end.i.i.2.loopexit, %vector.ph72, %pregion_for_end.i.i.1 - %193 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.3 = or i32 %193, 3 - %cmp.i.i.3 = icmp sgt i32 %25, %conv2.i.i.3 - %mul.i.i.3 = mul nsw i32 %29, %conv2.i.i.3 - br i1 %cmp.i.i.3, label %vector.scevcheck93, label %pregion_for_end.i.i.3 - -vector.scevcheck93: ; preds = %pregion_for_end.i.i.2 - %194 = mul i32 %29, %conv2.i.i.3 - %195 = trunc i64 %2 to i32 - %196 = shl i32 %195, 5 - %197 = add i32 %194, %196 - %198 = icmp sgt i32 %197, 2147483616 - br i1 %198, label %pregion_for_entry.entry.i.i.us.3.preheader, label %vector.ph94 - -pregion_for_entry.entry.i.i.us.3.preheader: ; preds = %vector.scevcheck93 - br label %pregion_for_entry.entry.i.i.us.3 - -vector.ph94: ; preds = %vector.scevcheck93 - %broadcast.splatinsert101 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat102 = shufflevector <8 x i64> %broadcast.splatinsert101, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert103 = insertelement <8 x i32> undef, i32 %29, i32 0 - %broadcast.splat104 = shufflevector <8 x i32> %broadcast.splatinsert103, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert106 = insertelement <8 x float> undef, float %21, i32 0 - %broadcast.splat107 = shufflevector <8 x float> %broadcast.splatinsert106, <8 x float> undef, <8 x i32> zeroinitializer - %199 = trunc <8 x i64> %broadcast.splat102 to <8 x i32> - %200 = or <8 x i32> %199, - %201 = icmp sgt <8 x i32> %broadcast.splat104, %200 - %202 = extractelement <8 x i32> %200, i32 0 - %203 = add nsw i32 %mul.i.i.3, %202 - %204 = sext i32 %203 to i64 - %205 = getelementptr inbounds float, float* %13, i64 %204 - %206 = bitcast float* %205 to <8 x float>* - %wide.masked.load105 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %206, i32 4, <8 x i1> %201, <8 x float> undef), !tbaa !12 - %207 = fmul <8 x float> %broadcast.splat107, %wide.masked.load105 - %208 = bitcast float* %205 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %207, <8 x float>* %208, i32 4, <8 x i1> %201), !tbaa !12, !llvm.access.group !16 - %209 = trunc <8 x i64> %broadcast.splat102 to <8 x i32> - %210 = or <8 x i32> %209, - %211 = icmp sgt <8 x i32> %broadcast.splat104, %210 - %212 = extractelement <8 x i32> %210, i32 0 - %213 = add nsw i32 %mul.i.i.3, %212 - %214 = sext i32 %213 to i64 - %215 = getelementptr inbounds float, float* %13, i64 %214 - %216 = bitcast float* %215 to <8 x float>* - %wide.masked.load105.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %216, i32 4, <8 x i1> %211, <8 x float> undef), !tbaa !12 - %217 = fmul <8 x float> %broadcast.splat107, %wide.masked.load105.1 - %218 = bitcast float* %215 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %217, <8 x float>* %218, i32 4, <8 x i1> %211), !tbaa !12, !llvm.access.group !16 - %219 = trunc <8 x i64> %broadcast.splat102 to <8 x i32> - %220 = or <8 x i32> %219, - %221 = icmp sgt <8 x i32> %broadcast.splat104, %220 - %222 = extractelement <8 x i32> %220, i32 0 - %223 = add nsw i32 %mul.i.i.3, %222 - %224 = sext i32 %223 to i64 - %225 = getelementptr inbounds float, float* %13, i64 %224 - %226 = bitcast float* %225 to <8 x float>* - %wide.masked.load105.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %226, i32 4, <8 x i1> %221, <8 x float> undef), !tbaa !12 - %227 = fmul <8 x float> %broadcast.splat107, %wide.masked.load105.2 - %228 = bitcast float* %225 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %227, <8 x float>* %228, i32 4, <8 x i1> %221), !tbaa !12, !llvm.access.group !16 - %229 = trunc <8 x i64> %broadcast.splat102 to <8 x i32> - %230 = or <8 x i32> %229, - %231 = icmp sgt <8 x i32> %broadcast.splat104, %230 - %232 = extractelement <8 x i32> %230, i32 0 - %233 = add nsw i32 %mul.i.i.3, %232 - %234 = sext i32 %233 to i64 - %235 = getelementptr inbounds float, float* %13, i64 %234 - %236 = bitcast float* %235 to <8 x float>* - %wide.masked.load105.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %236, i32 4, <8 x i1> %231, <8 x float> undef), !tbaa !12 - %237 = fmul <8 x float> %broadcast.splat107, %wide.masked.load105.3 - %238 = bitcast float* %235 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %237, <8 x float>* %238, i32 4, <8 x i1> %231), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.3 - -pregion_for_entry.entry.i.i.us.3: ; preds = %if.end.i.i.us.3.3, %pregion_for_entry.entry.i.i.us.3.preheader - %_local_id_x.i.0.us.3 = phi i64 [ %544, %if.end.i.i.us.3.3 ], [ 0, %pregion_for_entry.entry.i.i.us.3.preheader ] - %add1.i.i.i.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.3, %mul.i.i.i - %conv.i.i.us.3 = trunc i64 %add1.i.i.i.us.3 to i32 - %cmp4.i.i.us.3 = icmp sgt i32 %29, %conv.i.i.us.3 - br i1 %cmp4.i.i.us.3, label %if.then.i.i.us.3, label %if.end.i.i.us.3 - -if.then.i.i.us.3: ; preds = %pregion_for_entry.entry.i.i.us.3 - %add.i.i.us.3 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3 - %idxprom.i.i.us.3 = sext i32 %add.i.i.us.3 to i64 - %arrayidx.i.i.us.3 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.3 - %239 = load float, float* %arrayidx.i.i.us.3, align 4, !tbaa !12 - %mul6.i.i.us.3 = fmul float %21, %239 - store float %mul6.i.i.us.3, float* %arrayidx.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3 - -if.end.i.i.us.3: ; preds = %if.then.i.i.us.3, %pregion_for_entry.entry.i.i.us.3 - %240 = or i64 %_local_id_x.i.0.us.3, 1 - %add1.i.i.i.us.3.1 = add nuw nsw i64 %240, %mul.i.i.i - %conv.i.i.us.3.1 = trunc i64 %add1.i.i.i.us.3.1 to i32 - %cmp4.i.i.us.3.1 = icmp sgt i32 %29, %conv.i.i.us.3.1 - br i1 %cmp4.i.i.us.3.1, label %if.then.i.i.us.3.1, label %if.end.i.i.us.3.1 - -pregion_for_end.i.i.3.loopexit: ; preds = %if.end.i.i.us.3.3 - br label %pregion_for_end.i.i.3 - -pregion_for_end.i.i.3: ; preds = %pregion_for_end.i.i.3.loopexit, %vector.ph94, %pregion_for_end.i.i.2 - %241 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.4 = or i32 %241, 4 - %cmp.i.i.4 = icmp sgt i32 %25, %conv2.i.i.4 - %mul.i.i.4 = mul nsw i32 %29, %conv2.i.i.4 - br i1 %cmp.i.i.4, label %vector.scevcheck115, label %pregion_for_end.i.i.4 - -vector.scevcheck115: ; preds = %pregion_for_end.i.i.3 - %242 = mul i32 %29, %conv2.i.i.4 - %243 = trunc i64 %2 to i32 - %244 = shl i32 %243, 5 - %245 = add i32 %242, %244 - %246 = icmp sgt i32 %245, 2147483616 - br i1 %246, label %pregion_for_entry.entry.i.i.us.4.preheader, label %vector.ph116 - -pregion_for_entry.entry.i.i.us.4.preheader: ; preds = %vector.scevcheck115 - br label %pregion_for_entry.entry.i.i.us.4 - -vector.ph116: ; preds = %vector.scevcheck115 - %broadcast.splatinsert123 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat124 = shufflevector <8 x i64> %broadcast.splatinsert123, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert125 = insertelement <8 x i32> undef, i32 %29, i32 0 - %broadcast.splat126 = shufflevector <8 x i32> %broadcast.splatinsert125, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert128 = insertelement <8 x float> undef, float %21, i32 0 - %broadcast.splat129 = shufflevector <8 x float> %broadcast.splatinsert128, <8 x float> undef, <8 x i32> zeroinitializer - %247 = trunc <8 x i64> %broadcast.splat124 to <8 x i32> - %248 = or <8 x i32> %247, - %249 = icmp sgt <8 x i32> %broadcast.splat126, %248 - %250 = extractelement <8 x i32> %248, i32 0 - %251 = add nsw i32 %mul.i.i.4, %250 - %252 = sext i32 %251 to i64 - %253 = getelementptr inbounds float, float* %13, i64 %252 - %254 = bitcast float* %253 to <8 x float>* - %wide.masked.load127 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %254, i32 4, <8 x i1> %249, <8 x float> undef), !tbaa !12 - %255 = fmul <8 x float> %broadcast.splat129, %wide.masked.load127 - %256 = bitcast float* %253 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %255, <8 x float>* %256, i32 4, <8 x i1> %249), !tbaa !12, !llvm.access.group !16 - %257 = trunc <8 x i64> %broadcast.splat124 to <8 x i32> - %258 = or <8 x i32> %257, - %259 = icmp sgt <8 x i32> %broadcast.splat126, %258 - %260 = extractelement <8 x i32> %258, i32 0 - %261 = add nsw i32 %mul.i.i.4, %260 - %262 = sext i32 %261 to i64 - %263 = getelementptr inbounds float, float* %13, i64 %262 - %264 = bitcast float* %263 to <8 x float>* - %wide.masked.load127.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %264, i32 4, <8 x i1> %259, <8 x float> undef), !tbaa !12 - %265 = fmul <8 x float> %broadcast.splat129, %wide.masked.load127.1 - %266 = bitcast float* %263 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %265, <8 x float>* %266, i32 4, <8 x i1> %259), !tbaa !12, !llvm.access.group !16 - %267 = trunc <8 x i64> %broadcast.splat124 to <8 x i32> - %268 = or <8 x i32> %267, - %269 = icmp sgt <8 x i32> %broadcast.splat126, %268 - %270 = extractelement <8 x i32> %268, i32 0 - %271 = add nsw i32 %mul.i.i.4, %270 - %272 = sext i32 %271 to i64 - %273 = getelementptr inbounds float, float* %13, i64 %272 - %274 = bitcast float* %273 to <8 x float>* - %wide.masked.load127.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %274, i32 4, <8 x i1> %269, <8 x float> undef), !tbaa !12 - %275 = fmul <8 x float> %broadcast.splat129, %wide.masked.load127.2 - %276 = bitcast float* %273 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %275, <8 x float>* %276, i32 4, <8 x i1> %269), !tbaa !12, !llvm.access.group !16 - %277 = trunc <8 x i64> %broadcast.splat124 to <8 x i32> - %278 = or <8 x i32> %277, - %279 = icmp sgt <8 x i32> %broadcast.splat126, %278 - %280 = extractelement <8 x i32> %278, i32 0 - %281 = add nsw i32 %mul.i.i.4, %280 - %282 = sext i32 %281 to i64 - %283 = getelementptr inbounds float, float* %13, i64 %282 - %284 = bitcast float* %283 to <8 x float>* - %wide.masked.load127.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %284, i32 4, <8 x i1> %279, <8 x float> undef), !tbaa !12 - %285 = fmul <8 x float> %broadcast.splat129, %wide.masked.load127.3 - %286 = bitcast float* %283 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %285, <8 x float>* %286, i32 4, <8 x i1> %279), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.4 - -pregion_for_entry.entry.i.i.us.4: ; preds = %if.end.i.i.us.4.3, %pregion_for_entry.entry.i.i.us.4.preheader - %_local_id_x.i.0.us.4 = phi i64 [ %538, %if.end.i.i.us.4.3 ], [ 0, %pregion_for_entry.entry.i.i.us.4.preheader ] - %add1.i.i.i.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.4, %mul.i.i.i - %conv.i.i.us.4 = trunc i64 %add1.i.i.i.us.4 to i32 - %cmp4.i.i.us.4 = icmp sgt i32 %29, %conv.i.i.us.4 - br i1 %cmp4.i.i.us.4, label %if.then.i.i.us.4, label %if.end.i.i.us.4 - -if.then.i.i.us.4: ; preds = %pregion_for_entry.entry.i.i.us.4 - %add.i.i.us.4 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4 - %idxprom.i.i.us.4 = sext i32 %add.i.i.us.4 to i64 - %arrayidx.i.i.us.4 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.4 - %287 = load float, float* %arrayidx.i.i.us.4, align 4, !tbaa !12 - %mul6.i.i.us.4 = fmul float %21, %287 - store float %mul6.i.i.us.4, float* %arrayidx.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4 - -if.end.i.i.us.4: ; preds = %if.then.i.i.us.4, %pregion_for_entry.entry.i.i.us.4 - %288 = or i64 %_local_id_x.i.0.us.4, 1 - %add1.i.i.i.us.4.1 = add nuw nsw i64 %288, %mul.i.i.i - %conv.i.i.us.4.1 = trunc i64 %add1.i.i.i.us.4.1 to i32 - %cmp4.i.i.us.4.1 = icmp sgt i32 %29, %conv.i.i.us.4.1 - br i1 %cmp4.i.i.us.4.1, label %if.then.i.i.us.4.1, label %if.end.i.i.us.4.1 - -pregion_for_end.i.i.4.loopexit: ; preds = %if.end.i.i.us.4.3 - br label %pregion_for_end.i.i.4 - -pregion_for_end.i.i.4: ; preds = %pregion_for_end.i.i.4.loopexit, %vector.ph116, %pregion_for_end.i.i.3 - %289 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.5 = or i32 %289, 5 - %cmp.i.i.5 = icmp sgt i32 %25, %conv2.i.i.5 - %mul.i.i.5 = mul nsw i32 %29, %conv2.i.i.5 - br i1 %cmp.i.i.5, label %vector.scevcheck137, label %pregion_for_end.i.i.5 - -vector.scevcheck137: ; preds = %pregion_for_end.i.i.4 - %290 = mul i32 %29, %conv2.i.i.5 - %291 = trunc i64 %2 to i32 - %292 = shl i32 %291, 5 - %293 = add i32 %290, %292 - %294 = icmp sgt i32 %293, 2147483616 - br i1 %294, label %pregion_for_entry.entry.i.i.us.5.preheader, label %vector.ph138 - -pregion_for_entry.entry.i.i.us.5.preheader: ; preds = %vector.scevcheck137 - br label %pregion_for_entry.entry.i.i.us.5 - -vector.ph138: ; preds = %vector.scevcheck137 - %broadcast.splatinsert145 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat146 = shufflevector <8 x i64> %broadcast.splatinsert145, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert147 = insertelement <8 x i32> undef, i32 %29, i32 0 - %broadcast.splat148 = shufflevector <8 x i32> %broadcast.splatinsert147, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert150 = insertelement <8 x float> undef, float %21, i32 0 - %broadcast.splat151 = shufflevector <8 x float> %broadcast.splatinsert150, <8 x float> undef, <8 x i32> zeroinitializer - %295 = trunc <8 x i64> %broadcast.splat146 to <8 x i32> - %296 = or <8 x i32> %295, - %297 = icmp sgt <8 x i32> %broadcast.splat148, %296 - %298 = extractelement <8 x i32> %296, i32 0 - %299 = add nsw i32 %mul.i.i.5, %298 - %300 = sext i32 %299 to i64 - %301 = getelementptr inbounds float, float* %13, i64 %300 - %302 = bitcast float* %301 to <8 x float>* - %wide.masked.load149 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %302, i32 4, <8 x i1> %297, <8 x float> undef), !tbaa !12 - %303 = fmul <8 x float> %broadcast.splat151, %wide.masked.load149 - %304 = bitcast float* %301 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %303, <8 x float>* %304, i32 4, <8 x i1> %297), !tbaa !12, !llvm.access.group !16 - %305 = trunc <8 x i64> %broadcast.splat146 to <8 x i32> - %306 = or <8 x i32> %305, - %307 = icmp sgt <8 x i32> %broadcast.splat148, %306 - %308 = extractelement <8 x i32> %306, i32 0 - %309 = add nsw i32 %mul.i.i.5, %308 - %310 = sext i32 %309 to i64 - %311 = getelementptr inbounds float, float* %13, i64 %310 - %312 = bitcast float* %311 to <8 x float>* - %wide.masked.load149.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %312, i32 4, <8 x i1> %307, <8 x float> undef), !tbaa !12 - %313 = fmul <8 x float> %broadcast.splat151, %wide.masked.load149.1 - %314 = bitcast float* %311 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %313, <8 x float>* %314, i32 4, <8 x i1> %307), !tbaa !12, !llvm.access.group !16 - %315 = trunc <8 x i64> %broadcast.splat146 to <8 x i32> - %316 = or <8 x i32> %315, - %317 = icmp sgt <8 x i32> %broadcast.splat148, %316 - %318 = extractelement <8 x i32> %316, i32 0 - %319 = add nsw i32 %mul.i.i.5, %318 - %320 = sext i32 %319 to i64 - %321 = getelementptr inbounds float, float* %13, i64 %320 - %322 = bitcast float* %321 to <8 x float>* - %wide.masked.load149.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %322, i32 4, <8 x i1> %317, <8 x float> undef), !tbaa !12 - %323 = fmul <8 x float> %broadcast.splat151, %wide.masked.load149.2 - %324 = bitcast float* %321 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %323, <8 x float>* %324, i32 4, <8 x i1> %317), !tbaa !12, !llvm.access.group !16 - %325 = trunc <8 x i64> %broadcast.splat146 to <8 x i32> - %326 = or <8 x i32> %325, - %327 = icmp sgt <8 x i32> %broadcast.splat148, %326 - %328 = extractelement <8 x i32> %326, i32 0 - %329 = add nsw i32 %mul.i.i.5, %328 - %330 = sext i32 %329 to i64 - %331 = getelementptr inbounds float, float* %13, i64 %330 - %332 = bitcast float* %331 to <8 x float>* - %wide.masked.load149.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %332, i32 4, <8 x i1> %327, <8 x float> undef), !tbaa !12 - %333 = fmul <8 x float> %broadcast.splat151, %wide.masked.load149.3 - %334 = bitcast float* %331 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %333, <8 x float>* %334, i32 4, <8 x i1> %327), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.5 - -pregion_for_entry.entry.i.i.us.5: ; preds = %if.end.i.i.us.5.3, %pregion_for_entry.entry.i.i.us.5.preheader - %_local_id_x.i.0.us.5 = phi i64 [ %532, %if.end.i.i.us.5.3 ], [ 0, %pregion_for_entry.entry.i.i.us.5.preheader ] - %add1.i.i.i.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.5, %mul.i.i.i - %conv.i.i.us.5 = trunc i64 %add1.i.i.i.us.5 to i32 - %cmp4.i.i.us.5 = icmp sgt i32 %29, %conv.i.i.us.5 - br i1 %cmp4.i.i.us.5, label %if.then.i.i.us.5, label %if.end.i.i.us.5 - -if.then.i.i.us.5: ; preds = %pregion_for_entry.entry.i.i.us.5 - %add.i.i.us.5 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5 - %idxprom.i.i.us.5 = sext i32 %add.i.i.us.5 to i64 - %arrayidx.i.i.us.5 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.5 - %335 = load float, float* %arrayidx.i.i.us.5, align 4, !tbaa !12 - %mul6.i.i.us.5 = fmul float %21, %335 - store float %mul6.i.i.us.5, float* %arrayidx.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5 - -if.end.i.i.us.5: ; preds = %if.then.i.i.us.5, %pregion_for_entry.entry.i.i.us.5 - %336 = or i64 %_local_id_x.i.0.us.5, 1 - %add1.i.i.i.us.5.1 = add nuw nsw i64 %336, %mul.i.i.i - %conv.i.i.us.5.1 = trunc i64 %add1.i.i.i.us.5.1 to i32 - %cmp4.i.i.us.5.1 = icmp sgt i32 %29, %conv.i.i.us.5.1 - br i1 %cmp4.i.i.us.5.1, label %if.then.i.i.us.5.1, label %if.end.i.i.us.5.1 - -pregion_for_end.i.i.5.loopexit: ; preds = %if.end.i.i.us.5.3 - br label %pregion_for_end.i.i.5 - -pregion_for_end.i.i.5: ; preds = %pregion_for_end.i.i.5.loopexit, %vector.ph138, %pregion_for_end.i.i.4 - %337 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.6 = or i32 %337, 6 - %cmp.i.i.6 = icmp sgt i32 %25, %conv2.i.i.6 - %mul.i.i.6 = mul nsw i32 %29, %conv2.i.i.6 - br i1 %cmp.i.i.6, label %vector.scevcheck159, label %pregion_for_end.i.i.6 - -vector.scevcheck159: ; preds = %pregion_for_end.i.i.5 - %338 = mul i32 %29, %conv2.i.i.6 - %339 = trunc i64 %2 to i32 - %340 = shl i32 %339, 5 - %341 = add i32 %338, %340 - %342 = icmp sgt i32 %341, 2147483616 - br i1 %342, label %pregion_for_entry.entry.i.i.us.6.preheader, label %vector.ph160 - -pregion_for_entry.entry.i.i.us.6.preheader: ; preds = %vector.scevcheck159 - br label %pregion_for_entry.entry.i.i.us.6 - -vector.ph160: ; preds = %vector.scevcheck159 - %broadcast.splatinsert167 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat168 = shufflevector <8 x i64> %broadcast.splatinsert167, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert169 = insertelement <8 x i32> undef, i32 %29, i32 0 - %broadcast.splat170 = shufflevector <8 x i32> %broadcast.splatinsert169, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert172 = insertelement <8 x float> undef, float %21, i32 0 - %broadcast.splat173 = shufflevector <8 x float> %broadcast.splatinsert172, <8 x float> undef, <8 x i32> zeroinitializer - %343 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %344 = or <8 x i32> %343, - %345 = icmp sgt <8 x i32> %broadcast.splat170, %344 - %346 = extractelement <8 x i32> %344, i32 0 - %347 = add nsw i32 %mul.i.i.6, %346 - %348 = sext i32 %347 to i64 - %349 = getelementptr inbounds float, float* %13, i64 %348 - %350 = bitcast float* %349 to <8 x float>* - %wide.masked.load171 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %350, i32 4, <8 x i1> %345, <8 x float> undef), !tbaa !12 - %351 = fmul <8 x float> %broadcast.splat173, %wide.masked.load171 - %352 = bitcast float* %349 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %351, <8 x float>* %352, i32 4, <8 x i1> %345), !tbaa !12, !llvm.access.group !16 - %353 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %354 = or <8 x i32> %353, - %355 = icmp sgt <8 x i32> %broadcast.splat170, %354 - %356 = extractelement <8 x i32> %354, i32 0 - %357 = add nsw i32 %mul.i.i.6, %356 - %358 = sext i32 %357 to i64 - %359 = getelementptr inbounds float, float* %13, i64 %358 - %360 = bitcast float* %359 to <8 x float>* - %wide.masked.load171.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %360, i32 4, <8 x i1> %355, <8 x float> undef), !tbaa !12 - %361 = fmul <8 x float> %broadcast.splat173, %wide.masked.load171.1 - %362 = bitcast float* %359 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %361, <8 x float>* %362, i32 4, <8 x i1> %355), !tbaa !12, !llvm.access.group !16 - %363 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %364 = or <8 x i32> %363, - %365 = icmp sgt <8 x i32> %broadcast.splat170, %364 - %366 = extractelement <8 x i32> %364, i32 0 - %367 = add nsw i32 %mul.i.i.6, %366 - %368 = sext i32 %367 to i64 - %369 = getelementptr inbounds float, float* %13, i64 %368 - %370 = bitcast float* %369 to <8 x float>* - %wide.masked.load171.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %370, i32 4, <8 x i1> %365, <8 x float> undef), !tbaa !12 - %371 = fmul <8 x float> %broadcast.splat173, %wide.masked.load171.2 - %372 = bitcast float* %369 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %371, <8 x float>* %372, i32 4, <8 x i1> %365), !tbaa !12, !llvm.access.group !16 - %373 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %374 = or <8 x i32> %373, - %375 = icmp sgt <8 x i32> %broadcast.splat170, %374 - %376 = extractelement <8 x i32> %374, i32 0 - %377 = add nsw i32 %mul.i.i.6, %376 - %378 = sext i32 %377 to i64 - %379 = getelementptr inbounds float, float* %13, i64 %378 - %380 = bitcast float* %379 to <8 x float>* - %wide.masked.load171.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %380, i32 4, <8 x i1> %375, <8 x float> undef), !tbaa !12 - %381 = fmul <8 x float> %broadcast.splat173, %wide.masked.load171.3 - %382 = bitcast float* %379 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %381, <8 x float>* %382, i32 4, <8 x i1> %375), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.6 - -pregion_for_entry.entry.i.i.us.6: ; preds = %if.end.i.i.us.6.3, %pregion_for_entry.entry.i.i.us.6.preheader - %_local_id_x.i.0.us.6 = phi i64 [ %526, %if.end.i.i.us.6.3 ], [ 0, %pregion_for_entry.entry.i.i.us.6.preheader ] - %add1.i.i.i.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.6, %mul.i.i.i - %conv.i.i.us.6 = trunc i64 %add1.i.i.i.us.6 to i32 - %cmp4.i.i.us.6 = icmp sgt i32 %29, %conv.i.i.us.6 - br i1 %cmp4.i.i.us.6, label %if.then.i.i.us.6, label %if.end.i.i.us.6 - -if.then.i.i.us.6: ; preds = %pregion_for_entry.entry.i.i.us.6 - %add.i.i.us.6 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6 - %idxprom.i.i.us.6 = sext i32 %add.i.i.us.6 to i64 - %arrayidx.i.i.us.6 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.6 - %383 = load float, float* %arrayidx.i.i.us.6, align 4, !tbaa !12 - %mul6.i.i.us.6 = fmul float %21, %383 - store float %mul6.i.i.us.6, float* %arrayidx.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6 - -if.end.i.i.us.6: ; preds = %if.then.i.i.us.6, %pregion_for_entry.entry.i.i.us.6 - %384 = or i64 %_local_id_x.i.0.us.6, 1 - %add1.i.i.i.us.6.1 = add nuw nsw i64 %384, %mul.i.i.i - %conv.i.i.us.6.1 = trunc i64 %add1.i.i.i.us.6.1 to i32 - %cmp4.i.i.us.6.1 = icmp sgt i32 %29, %conv.i.i.us.6.1 - br i1 %cmp4.i.i.us.6.1, label %if.then.i.i.us.6.1, label %if.end.i.i.us.6.1 - -pregion_for_end.i.i.6.loopexit: ; preds = %if.end.i.i.us.6.3 - br label %pregion_for_end.i.i.6 - -pregion_for_end.i.i.6: ; preds = %pregion_for_end.i.i.6.loopexit, %vector.ph160, %pregion_for_end.i.i.5 - %385 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.7 = or i32 %385, 7 - %cmp.i.i.7 = icmp sgt i32 %25, %conv2.i.i.7 - %mul.i.i.7 = mul nsw i32 %29, %conv2.i.i.7 - br i1 %cmp.i.i.7, label %vector.scevcheck181, label %_pocl_kernel_gemm.exit - -vector.scevcheck181: ; preds = %pregion_for_end.i.i.6 - %386 = mul i32 %29, %conv2.i.i.7 - %387 = trunc i64 %2 to i32 - %388 = shl i32 %387, 5 - %389 = add i32 %386, %388 - %390 = icmp sgt i32 %389, 2147483616 - br i1 %390, label %pregion_for_entry.entry.i.i.us.7.preheader, label %vector.ph182 - -pregion_for_entry.entry.i.i.us.7.preheader: ; preds = %vector.scevcheck181 - br label %pregion_for_entry.entry.i.i.us.7 - -vector.ph182: ; preds = %vector.scevcheck181 - %broadcast.splatinsert189 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat190 = shufflevector <8 x i64> %broadcast.splatinsert189, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert191 = insertelement <8 x i32> undef, i32 %29, i32 0 - %broadcast.splat192 = shufflevector <8 x i32> %broadcast.splatinsert191, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert194 = insertelement <8 x float> undef, float %21, i32 0 - %broadcast.splat195 = shufflevector <8 x float> %broadcast.splatinsert194, <8 x float> undef, <8 x i32> zeroinitializer - %391 = trunc <8 x i64> %broadcast.splat190 to <8 x i32> - %392 = or <8 x i32> %391, - %393 = icmp sgt <8 x i32> %broadcast.splat192, %392 - %394 = extractelement <8 x i32> %392, i32 0 - %395 = add nsw i32 %mul.i.i.7, %394 - %396 = sext i32 %395 to i64 - %397 = getelementptr inbounds float, float* %13, i64 %396 - %398 = bitcast float* %397 to <8 x float>* - %wide.masked.load193 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %398, i32 4, <8 x i1> %393, <8 x float> undef), !tbaa !12 - %399 = fmul <8 x float> %broadcast.splat195, %wide.masked.load193 - %400 = bitcast float* %397 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %399, <8 x float>* %400, i32 4, <8 x i1> %393), !tbaa !12, !llvm.access.group !16 - %401 = trunc <8 x i64> %broadcast.splat190 to <8 x i32> - %402 = or <8 x i32> %401, - %403 = icmp sgt <8 x i32> %broadcast.splat192, %402 - %404 = extractelement <8 x i32> %402, i32 0 - %405 = add nsw i32 %mul.i.i.7, %404 - %406 = sext i32 %405 to i64 - %407 = getelementptr inbounds float, float* %13, i64 %406 - %408 = bitcast float* %407 to <8 x float>* - %wide.masked.load193.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %408, i32 4, <8 x i1> %403, <8 x float> undef), !tbaa !12 - %409 = fmul <8 x float> %broadcast.splat195, %wide.masked.load193.1 - %410 = bitcast float* %407 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %409, <8 x float>* %410, i32 4, <8 x i1> %403), !tbaa !12, !llvm.access.group !16 - %411 = trunc <8 x i64> %broadcast.splat190 to <8 x i32> - %412 = or <8 x i32> %411, - %413 = icmp sgt <8 x i32> %broadcast.splat192, %412 - %414 = extractelement <8 x i32> %412, i32 0 - %415 = add nsw i32 %mul.i.i.7, %414 - %416 = sext i32 %415 to i64 - %417 = getelementptr inbounds float, float* %13, i64 %416 - %418 = bitcast float* %417 to <8 x float>* - %wide.masked.load193.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %418, i32 4, <8 x i1> %413, <8 x float> undef), !tbaa !12 - %419 = fmul <8 x float> %broadcast.splat195, %wide.masked.load193.2 - %420 = bitcast float* %417 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %419, <8 x float>* %420, i32 4, <8 x i1> %413), !tbaa !12, !llvm.access.group !16 - %421 = trunc <8 x i64> %broadcast.splat190 to <8 x i32> - %422 = or <8 x i32> %421, - %423 = icmp sgt <8 x i32> %broadcast.splat192, %422 - %424 = extractelement <8 x i32> %422, i32 0 - %425 = add nsw i32 %mul.i.i.7, %424 - %426 = sext i32 %425 to i64 - %427 = getelementptr inbounds float, float* %13, i64 %426 - %428 = bitcast float* %427 to <8 x float>* - %wide.masked.load193.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %428, i32 4, <8 x i1> %423, <8 x float> undef), !tbaa !12 - %429 = fmul <8 x float> %broadcast.splat195, %wide.masked.load193.3 - %430 = bitcast float* %427 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %429, <8 x float>* %430, i32 4, <8 x i1> %423), !tbaa !12, !llvm.access.group !16 - br label %_pocl_kernel_gemm.exit - -pregion_for_entry.entry.i.i.us.7: ; preds = %if.end.i.i.us.7.3, %pregion_for_entry.entry.i.i.us.7.preheader - %_local_id_x.i.0.us.7 = phi i64 [ %520, %if.end.i.i.us.7.3 ], [ 0, %pregion_for_entry.entry.i.i.us.7.preheader ] - %add1.i.i.i.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.7, %mul.i.i.i - %conv.i.i.us.7 = trunc i64 %add1.i.i.i.us.7 to i32 - %cmp4.i.i.us.7 = icmp sgt i32 %29, %conv.i.i.us.7 - br i1 %cmp4.i.i.us.7, label %if.then.i.i.us.7, label %if.end.i.i.us.7 - -if.then.i.i.us.7: ; preds = %pregion_for_entry.entry.i.i.us.7 - %add.i.i.us.7 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7 - %idxprom.i.i.us.7 = sext i32 %add.i.i.us.7 to i64 - %arrayidx.i.i.us.7 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.7 - %431 = load float, float* %arrayidx.i.i.us.7, align 4, !tbaa !12 - %mul6.i.i.us.7 = fmul float %21, %431 - store float %mul6.i.i.us.7, float* %arrayidx.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7 - -if.end.i.i.us.7: ; preds = %if.then.i.i.us.7, %pregion_for_entry.entry.i.i.us.7 - %432 = or i64 %_local_id_x.i.0.us.7, 1 - %add1.i.i.i.us.7.1 = add nuw nsw i64 %432, %mul.i.i.i - %conv.i.i.us.7.1 = trunc i64 %add1.i.i.i.us.7.1 to i32 - %cmp4.i.i.us.7.1 = icmp sgt i32 %29, %conv.i.i.us.7.1 - br i1 %cmp4.i.i.us.7.1, label %if.then.i.i.us.7.1, label %if.end.i.i.us.7.1 - -pregion_for_entry.entry.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1, %pregion_for_entry.entry.i.i.us.us.1.preheader - %_local_id_x.i.0.us.us.1 = phi i64 [ %442, %if.end.i.i.us.us.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.1.preheader ] - %add1.i.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, %mul.i.i.i - %conv.i.i.us.us.1 = trunc i64 %add1.i.i.i.us.us.1 to i32 - %cmp4.i.i.us.us.1 = icmp sgt i32 %29, %conv.i.i.us.us.1 - br i1 %cmp4.i.i.us.us.1, label %if.then.i.i.us.us.1, label %if.end.i.i.us.us.1 - -if.then.i.i.us.us.1: ; preds = %pregion_for_entry.entry.i.i.us.us.1 - %add.i.i.us.us.1 = add nsw i32 %mul.i.i.us.1, %conv.i.i.us.us.1 - %idxprom.i.i.us.us.1 = sext i32 %add.i.i.us.us.1 to i64 - %arrayidx.i.i.us.us.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.1 - %433 = load float, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12 - %mul6.i.i.us.us.1 = fmul float %21, %433 - store float %mul6.i.i.us.us.1, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.1 = shl i64 %add1.i.i.i.us.us.1, 32 - %434 = ashr exact i64 %sext.i.i.us.us.1, 32 - br label %for.body.i.i.us.us.1 - -for.body.i.i.us.us.1: ; preds = %for.body.i.i.us.us.1, %if.then.i.i.us.us.1 - %indvars.iv.next.i.i3.us.us.1 = phi i64 [ %indvars.iv.next.i.i.us.us.1, %for.body.i.i.us.us.1 ], [ 0, %if.then.i.i.us.us.1 ] - %435 = phi float [ %441, %for.body.i.i.us.us.1 ], [ %mul6.i.i.us.us.1, %if.then.i.i.us.us.1 ] - %436 = add nsw i64 %indvars.iv.next.i.i3.us.us.1, %84 - %arrayidx12.i.i.us.us.1 = getelementptr inbounds float, float* %7, i64 %436 - %437 = load float, float* %arrayidx12.i.i.us.us.1, align 4, !tbaa !12 - %mul13.i.i.us.us.1 = fmul float %17, %437 - %438 = mul nsw i64 %indvars.iv.next.i.i3.us.us.1, %34 - %439 = add nsw i64 %438, %434 - %arrayidx17.i.i.us.us.1 = getelementptr inbounds float, float* %10, i64 %439 - %440 = load float, float* %arrayidx17.i.i.us.us.1, align 4, !tbaa !12 - %441 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.1, float %440, float %435) #2 - store float %441, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.1, 1 - %exitcond.not.i.i.us.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.1, label %if.end.i.i.us.us.1.loopexit, label %for.body.i.i.us.us.1, !llvm.loop !21 - -if.end.i.i.us.us.1.loopexit: ; preds = %for.body.i.i.us.us.1 - br label %if.end.i.i.us.us.1 - -if.end.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.loopexit, %pregion_for_entry.entry.i.i.us.us.1 - %442 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, 1 - %exitcond.not.1 = icmp eq i64 %442, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.us.1.loopexit, label %pregion_for_entry.entry.i.i.us.us.1, !llvm.loop !19 - -pregion_for_end.i.i.us.1.loopexit: ; preds = %if.end.i.i.us.us.1 - br label %pregion_for_end.i.i.us.1 - -pregion_for_end.i.i.us.1: ; preds = %pregion_for_end.i.i.us.1.loopexit, %pregion_for_end.i.i.us - %443 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.2 = or i32 %443, 2 - %cmp.i.i.us.2 = icmp sgt i32 %25, %conv2.i.i.us.2 - %mul.i.i.us.2 = mul nsw i32 %29, %conv2.i.i.us.2 - %mul9.i.i.us.2 = mul nsw i32 %33, %conv2.i.i.us.2 - %444 = sext i32 %mul9.i.i.us.2 to i64 - br i1 %cmp.i.i.us.2, label %pregion_for_entry.entry.i.i.us.us.2.preheader, label %pregion_for_end.i.i.us.2 - -pregion_for_entry.entry.i.i.us.us.2.preheader: ; preds = %pregion_for_end.i.i.us.1 - br label %pregion_for_entry.entry.i.i.us.us.2 - -pregion_for_entry.entry.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2, %pregion_for_entry.entry.i.i.us.us.2.preheader - %_local_id_x.i.0.us.us.2 = phi i64 [ %454, %if.end.i.i.us.us.2 ], [ 0, %pregion_for_entry.entry.i.i.us.us.2.preheader ] - %add1.i.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, %mul.i.i.i - %conv.i.i.us.us.2 = trunc i64 %add1.i.i.i.us.us.2 to i32 - %cmp4.i.i.us.us.2 = icmp sgt i32 %29, %conv.i.i.us.us.2 - br i1 %cmp4.i.i.us.us.2, label %if.then.i.i.us.us.2, label %if.end.i.i.us.us.2 - -if.then.i.i.us.us.2: ; preds = %pregion_for_entry.entry.i.i.us.us.2 - %add.i.i.us.us.2 = add nsw i32 %mul.i.i.us.2, %conv.i.i.us.us.2 - %idxprom.i.i.us.us.2 = sext i32 %add.i.i.us.us.2 to i64 - %arrayidx.i.i.us.us.2 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.2 - %445 = load float, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12 - %mul6.i.i.us.us.2 = fmul float %21, %445 - store float %mul6.i.i.us.us.2, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.2 = shl i64 %add1.i.i.i.us.us.2, 32 - %446 = ashr exact i64 %sext.i.i.us.us.2, 32 - br label %for.body.i.i.us.us.2 - -for.body.i.i.us.us.2: ; preds = %for.body.i.i.us.us.2, %if.then.i.i.us.us.2 - %indvars.iv.next.i.i3.us.us.2 = phi i64 [ %indvars.iv.next.i.i.us.us.2, %for.body.i.i.us.us.2 ], [ 0, %if.then.i.i.us.us.2 ] - %447 = phi float [ %453, %for.body.i.i.us.us.2 ], [ %mul6.i.i.us.us.2, %if.then.i.i.us.us.2 ] - %448 = add nsw i64 %indvars.iv.next.i.i3.us.us.2, %444 - %arrayidx12.i.i.us.us.2 = getelementptr inbounds float, float* %7, i64 %448 - %449 = load float, float* %arrayidx12.i.i.us.us.2, align 4, !tbaa !12 - %mul13.i.i.us.us.2 = fmul float %17, %449 - %450 = mul nsw i64 %indvars.iv.next.i.i3.us.us.2, %34 - %451 = add nsw i64 %450, %446 - %arrayidx17.i.i.us.us.2 = getelementptr inbounds float, float* %10, i64 %451 - %452 = load float, float* %arrayidx17.i.i.us.us.2, align 4, !tbaa !12 - %453 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.2, float %452, float %447) #2 - store float %453, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.2 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.2, 1 - %exitcond.not.i.i.us.us.2 = icmp eq i64 %indvars.iv.next.i.i.us.us.2, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.2, label %if.end.i.i.us.us.2.loopexit, label %for.body.i.i.us.us.2, !llvm.loop !21 - -if.end.i.i.us.us.2.loopexit: ; preds = %for.body.i.i.us.us.2 - br label %if.end.i.i.us.us.2 - -if.end.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.loopexit, %pregion_for_entry.entry.i.i.us.us.2 - %454 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, 1 - %exitcond.not.2 = icmp eq i64 %454, 32 - br i1 %exitcond.not.2, label %pregion_for_end.i.i.us.2.loopexit, label %pregion_for_entry.entry.i.i.us.us.2, !llvm.loop !19 - -pregion_for_end.i.i.us.2.loopexit: ; preds = %if.end.i.i.us.us.2 - br label %pregion_for_end.i.i.us.2 - -pregion_for_end.i.i.us.2: ; preds = %pregion_for_end.i.i.us.2.loopexit, %pregion_for_end.i.i.us.1 - %455 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.3 = or i32 %455, 3 - %cmp.i.i.us.3 = icmp sgt i32 %25, %conv2.i.i.us.3 - %mul.i.i.us.3 = mul nsw i32 %29, %conv2.i.i.us.3 - %mul9.i.i.us.3 = mul nsw i32 %33, %conv2.i.i.us.3 - %456 = sext i32 %mul9.i.i.us.3 to i64 - br i1 %cmp.i.i.us.3, label %pregion_for_entry.entry.i.i.us.us.3.preheader, label %pregion_for_end.i.i.us.3 - -pregion_for_entry.entry.i.i.us.us.3.preheader: ; preds = %pregion_for_end.i.i.us.2 - br label %pregion_for_entry.entry.i.i.us.us.3 - -pregion_for_entry.entry.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3, %pregion_for_entry.entry.i.i.us.us.3.preheader - %_local_id_x.i.0.us.us.3 = phi i64 [ %466, %if.end.i.i.us.us.3 ], [ 0, %pregion_for_entry.entry.i.i.us.us.3.preheader ] - %add1.i.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, %mul.i.i.i - %conv.i.i.us.us.3 = trunc i64 %add1.i.i.i.us.us.3 to i32 - %cmp4.i.i.us.us.3 = icmp sgt i32 %29, %conv.i.i.us.us.3 - br i1 %cmp4.i.i.us.us.3, label %if.then.i.i.us.us.3, label %if.end.i.i.us.us.3 - -if.then.i.i.us.us.3: ; preds = %pregion_for_entry.entry.i.i.us.us.3 - %add.i.i.us.us.3 = add nsw i32 %mul.i.i.us.3, %conv.i.i.us.us.3 - %idxprom.i.i.us.us.3 = sext i32 %add.i.i.us.us.3 to i64 - %arrayidx.i.i.us.us.3 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.3 - %457 = load float, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12 - %mul6.i.i.us.us.3 = fmul float %21, %457 - store float %mul6.i.i.us.us.3, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.3 = shl i64 %add1.i.i.i.us.us.3, 32 - %458 = ashr exact i64 %sext.i.i.us.us.3, 32 - br label %for.body.i.i.us.us.3 - -for.body.i.i.us.us.3: ; preds = %for.body.i.i.us.us.3, %if.then.i.i.us.us.3 - %indvars.iv.next.i.i3.us.us.3 = phi i64 [ %indvars.iv.next.i.i.us.us.3, %for.body.i.i.us.us.3 ], [ 0, %if.then.i.i.us.us.3 ] - %459 = phi float [ %465, %for.body.i.i.us.us.3 ], [ %mul6.i.i.us.us.3, %if.then.i.i.us.us.3 ] - %460 = add nsw i64 %indvars.iv.next.i.i3.us.us.3, %456 - %arrayidx12.i.i.us.us.3 = getelementptr inbounds float, float* %7, i64 %460 - %461 = load float, float* %arrayidx12.i.i.us.us.3, align 4, !tbaa !12 - %mul13.i.i.us.us.3 = fmul float %17, %461 - %462 = mul nsw i64 %indvars.iv.next.i.i3.us.us.3, %34 - %463 = add nsw i64 %462, %458 - %arrayidx17.i.i.us.us.3 = getelementptr inbounds float, float* %10, i64 %463 - %464 = load float, float* %arrayidx17.i.i.us.us.3, align 4, !tbaa !12 - %465 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.3, float %464, float %459) #2 - store float %465, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.3 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.3, 1 - %exitcond.not.i.i.us.us.3 = icmp eq i64 %indvars.iv.next.i.i.us.us.3, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.3, label %if.end.i.i.us.us.3.loopexit, label %for.body.i.i.us.us.3, !llvm.loop !21 - -if.end.i.i.us.us.3.loopexit: ; preds = %for.body.i.i.us.us.3 - br label %if.end.i.i.us.us.3 - -if.end.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.loopexit, %pregion_for_entry.entry.i.i.us.us.3 - %466 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, 1 - %exitcond.not.3 = icmp eq i64 %466, 32 - br i1 %exitcond.not.3, label %pregion_for_end.i.i.us.3.loopexit, label %pregion_for_entry.entry.i.i.us.us.3, !llvm.loop !19 - -pregion_for_end.i.i.us.3.loopexit: ; preds = %if.end.i.i.us.us.3 - br label %pregion_for_end.i.i.us.3 - -pregion_for_end.i.i.us.3: ; preds = %pregion_for_end.i.i.us.3.loopexit, %pregion_for_end.i.i.us.2 - %467 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.4 = or i32 %467, 4 - %cmp.i.i.us.4 = icmp sgt i32 %25, %conv2.i.i.us.4 - %mul.i.i.us.4 = mul nsw i32 %29, %conv2.i.i.us.4 - %mul9.i.i.us.4 = mul nsw i32 %33, %conv2.i.i.us.4 - %468 = sext i32 %mul9.i.i.us.4 to i64 - br i1 %cmp.i.i.us.4, label %pregion_for_entry.entry.i.i.us.us.4.preheader, label %pregion_for_end.i.i.us.4 - -pregion_for_entry.entry.i.i.us.us.4.preheader: ; preds = %pregion_for_end.i.i.us.3 - br label %pregion_for_entry.entry.i.i.us.us.4 - -pregion_for_entry.entry.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4, %pregion_for_entry.entry.i.i.us.us.4.preheader - %_local_id_x.i.0.us.us.4 = phi i64 [ %478, %if.end.i.i.us.us.4 ], [ 0, %pregion_for_entry.entry.i.i.us.us.4.preheader ] - %add1.i.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, %mul.i.i.i - %conv.i.i.us.us.4 = trunc i64 %add1.i.i.i.us.us.4 to i32 - %cmp4.i.i.us.us.4 = icmp sgt i32 %29, %conv.i.i.us.us.4 - br i1 %cmp4.i.i.us.us.4, label %if.then.i.i.us.us.4, label %if.end.i.i.us.us.4 - -if.then.i.i.us.us.4: ; preds = %pregion_for_entry.entry.i.i.us.us.4 - %add.i.i.us.us.4 = add nsw i32 %mul.i.i.us.4, %conv.i.i.us.us.4 - %idxprom.i.i.us.us.4 = sext i32 %add.i.i.us.us.4 to i64 - %arrayidx.i.i.us.us.4 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.4 - %469 = load float, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12 - %mul6.i.i.us.us.4 = fmul float %21, %469 - store float %mul6.i.i.us.us.4, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.4 = shl i64 %add1.i.i.i.us.us.4, 32 - %470 = ashr exact i64 %sext.i.i.us.us.4, 32 - br label %for.body.i.i.us.us.4 - -for.body.i.i.us.us.4: ; preds = %for.body.i.i.us.us.4, %if.then.i.i.us.us.4 - %indvars.iv.next.i.i3.us.us.4 = phi i64 [ %indvars.iv.next.i.i.us.us.4, %for.body.i.i.us.us.4 ], [ 0, %if.then.i.i.us.us.4 ] - %471 = phi float [ %477, %for.body.i.i.us.us.4 ], [ %mul6.i.i.us.us.4, %if.then.i.i.us.us.4 ] - %472 = add nsw i64 %indvars.iv.next.i.i3.us.us.4, %468 - %arrayidx12.i.i.us.us.4 = getelementptr inbounds float, float* %7, i64 %472 - %473 = load float, float* %arrayidx12.i.i.us.us.4, align 4, !tbaa !12 - %mul13.i.i.us.us.4 = fmul float %17, %473 - %474 = mul nsw i64 %indvars.iv.next.i.i3.us.us.4, %34 - %475 = add nsw i64 %474, %470 - %arrayidx17.i.i.us.us.4 = getelementptr inbounds float, float* %10, i64 %475 - %476 = load float, float* %arrayidx17.i.i.us.us.4, align 4, !tbaa !12 - %477 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.4, float %476, float %471) #2 - store float %477, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.4 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.4, 1 - %exitcond.not.i.i.us.us.4 = icmp eq i64 %indvars.iv.next.i.i.us.us.4, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.4, label %if.end.i.i.us.us.4.loopexit, label %for.body.i.i.us.us.4, !llvm.loop !21 - -if.end.i.i.us.us.4.loopexit: ; preds = %for.body.i.i.us.us.4 - br label %if.end.i.i.us.us.4 - -if.end.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.loopexit, %pregion_for_entry.entry.i.i.us.us.4 - %478 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, 1 - %exitcond.not.4 = icmp eq i64 %478, 32 - br i1 %exitcond.not.4, label %pregion_for_end.i.i.us.4.loopexit, label %pregion_for_entry.entry.i.i.us.us.4, !llvm.loop !19 - -pregion_for_end.i.i.us.4.loopexit: ; preds = %if.end.i.i.us.us.4 - br label %pregion_for_end.i.i.us.4 - -pregion_for_end.i.i.us.4: ; preds = %pregion_for_end.i.i.us.4.loopexit, %pregion_for_end.i.i.us.3 - %479 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.5 = or i32 %479, 5 - %cmp.i.i.us.5 = icmp sgt i32 %25, %conv2.i.i.us.5 - %mul.i.i.us.5 = mul nsw i32 %29, %conv2.i.i.us.5 - %mul9.i.i.us.5 = mul nsw i32 %33, %conv2.i.i.us.5 - %480 = sext i32 %mul9.i.i.us.5 to i64 - br i1 %cmp.i.i.us.5, label %pregion_for_entry.entry.i.i.us.us.5.preheader, label %pregion_for_end.i.i.us.5 - -pregion_for_entry.entry.i.i.us.us.5.preheader: ; preds = %pregion_for_end.i.i.us.4 - br label %pregion_for_entry.entry.i.i.us.us.5 - -pregion_for_entry.entry.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5, %pregion_for_entry.entry.i.i.us.us.5.preheader - %_local_id_x.i.0.us.us.5 = phi i64 [ %490, %if.end.i.i.us.us.5 ], [ 0, %pregion_for_entry.entry.i.i.us.us.5.preheader ] - %add1.i.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, %mul.i.i.i - %conv.i.i.us.us.5 = trunc i64 %add1.i.i.i.us.us.5 to i32 - %cmp4.i.i.us.us.5 = icmp sgt i32 %29, %conv.i.i.us.us.5 - br i1 %cmp4.i.i.us.us.5, label %if.then.i.i.us.us.5, label %if.end.i.i.us.us.5 - -if.then.i.i.us.us.5: ; preds = %pregion_for_entry.entry.i.i.us.us.5 - %add.i.i.us.us.5 = add nsw i32 %mul.i.i.us.5, %conv.i.i.us.us.5 - %idxprom.i.i.us.us.5 = sext i32 %add.i.i.us.us.5 to i64 - %arrayidx.i.i.us.us.5 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.5 - %481 = load float, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12 - %mul6.i.i.us.us.5 = fmul float %21, %481 - store float %mul6.i.i.us.us.5, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.5 = shl i64 %add1.i.i.i.us.us.5, 32 - %482 = ashr exact i64 %sext.i.i.us.us.5, 32 - br label %for.body.i.i.us.us.5 - -for.body.i.i.us.us.5: ; preds = %for.body.i.i.us.us.5, %if.then.i.i.us.us.5 - %indvars.iv.next.i.i3.us.us.5 = phi i64 [ %indvars.iv.next.i.i.us.us.5, %for.body.i.i.us.us.5 ], [ 0, %if.then.i.i.us.us.5 ] - %483 = phi float [ %489, %for.body.i.i.us.us.5 ], [ %mul6.i.i.us.us.5, %if.then.i.i.us.us.5 ] - %484 = add nsw i64 %indvars.iv.next.i.i3.us.us.5, %480 - %arrayidx12.i.i.us.us.5 = getelementptr inbounds float, float* %7, i64 %484 - %485 = load float, float* %arrayidx12.i.i.us.us.5, align 4, !tbaa !12 - %mul13.i.i.us.us.5 = fmul float %17, %485 - %486 = mul nsw i64 %indvars.iv.next.i.i3.us.us.5, %34 - %487 = add nsw i64 %486, %482 - %arrayidx17.i.i.us.us.5 = getelementptr inbounds float, float* %10, i64 %487 - %488 = load float, float* %arrayidx17.i.i.us.us.5, align 4, !tbaa !12 - %489 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.5, float %488, float %483) #2 - store float %489, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.5 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.5, 1 - %exitcond.not.i.i.us.us.5 = icmp eq i64 %indvars.iv.next.i.i.us.us.5, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.5, label %if.end.i.i.us.us.5.loopexit, label %for.body.i.i.us.us.5, !llvm.loop !21 - -if.end.i.i.us.us.5.loopexit: ; preds = %for.body.i.i.us.us.5 - br label %if.end.i.i.us.us.5 - -if.end.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.loopexit, %pregion_for_entry.entry.i.i.us.us.5 - %490 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, 1 - %exitcond.not.5 = icmp eq i64 %490, 32 - br i1 %exitcond.not.5, label %pregion_for_end.i.i.us.5.loopexit, label %pregion_for_entry.entry.i.i.us.us.5, !llvm.loop !19 - -pregion_for_end.i.i.us.5.loopexit: ; preds = %if.end.i.i.us.us.5 - br label %pregion_for_end.i.i.us.5 - -pregion_for_end.i.i.us.5: ; preds = %pregion_for_end.i.i.us.5.loopexit, %pregion_for_end.i.i.us.4 - %491 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.6 = or i32 %491, 6 - %cmp.i.i.us.6 = icmp sgt i32 %25, %conv2.i.i.us.6 - %mul.i.i.us.6 = mul nsw i32 %29, %conv2.i.i.us.6 - %mul9.i.i.us.6 = mul nsw i32 %33, %conv2.i.i.us.6 - %492 = sext i32 %mul9.i.i.us.6 to i64 - br i1 %cmp.i.i.us.6, label %pregion_for_entry.entry.i.i.us.us.6.preheader, label %pregion_for_end.i.i.us.6 - -pregion_for_entry.entry.i.i.us.us.6.preheader: ; preds = %pregion_for_end.i.i.us.5 - br label %pregion_for_entry.entry.i.i.us.us.6 - -pregion_for_entry.entry.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6, %pregion_for_entry.entry.i.i.us.us.6.preheader - %_local_id_x.i.0.us.us.6 = phi i64 [ %502, %if.end.i.i.us.us.6 ], [ 0, %pregion_for_entry.entry.i.i.us.us.6.preheader ] - %add1.i.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, %mul.i.i.i - %conv.i.i.us.us.6 = trunc i64 %add1.i.i.i.us.us.6 to i32 - %cmp4.i.i.us.us.6 = icmp sgt i32 %29, %conv.i.i.us.us.6 - br i1 %cmp4.i.i.us.us.6, label %if.then.i.i.us.us.6, label %if.end.i.i.us.us.6 - -if.then.i.i.us.us.6: ; preds = %pregion_for_entry.entry.i.i.us.us.6 - %add.i.i.us.us.6 = add nsw i32 %mul.i.i.us.6, %conv.i.i.us.us.6 - %idxprom.i.i.us.us.6 = sext i32 %add.i.i.us.us.6 to i64 - %arrayidx.i.i.us.us.6 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.6 - %493 = load float, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12 - %mul6.i.i.us.us.6 = fmul float %21, %493 - store float %mul6.i.i.us.us.6, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.6 = shl i64 %add1.i.i.i.us.us.6, 32 - %494 = ashr exact i64 %sext.i.i.us.us.6, 32 - br label %for.body.i.i.us.us.6 - -for.body.i.i.us.us.6: ; preds = %for.body.i.i.us.us.6, %if.then.i.i.us.us.6 - %indvars.iv.next.i.i3.us.us.6 = phi i64 [ %indvars.iv.next.i.i.us.us.6, %for.body.i.i.us.us.6 ], [ 0, %if.then.i.i.us.us.6 ] - %495 = phi float [ %501, %for.body.i.i.us.us.6 ], [ %mul6.i.i.us.us.6, %if.then.i.i.us.us.6 ] - %496 = add nsw i64 %indvars.iv.next.i.i3.us.us.6, %492 - %arrayidx12.i.i.us.us.6 = getelementptr inbounds float, float* %7, i64 %496 - %497 = load float, float* %arrayidx12.i.i.us.us.6, align 4, !tbaa !12 - %mul13.i.i.us.us.6 = fmul float %17, %497 - %498 = mul nsw i64 %indvars.iv.next.i.i3.us.us.6, %34 - %499 = add nsw i64 %498, %494 - %arrayidx17.i.i.us.us.6 = getelementptr inbounds float, float* %10, i64 %499 - %500 = load float, float* %arrayidx17.i.i.us.us.6, align 4, !tbaa !12 - %501 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.6, float %500, float %495) #2 - store float %501, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.6 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.6, 1 - %exitcond.not.i.i.us.us.6 = icmp eq i64 %indvars.iv.next.i.i.us.us.6, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.6, label %if.end.i.i.us.us.6.loopexit, label %for.body.i.i.us.us.6, !llvm.loop !21 - -if.end.i.i.us.us.6.loopexit: ; preds = %for.body.i.i.us.us.6 - br label %if.end.i.i.us.us.6 - -if.end.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.loopexit, %pregion_for_entry.entry.i.i.us.us.6 - %502 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, 1 - %exitcond.not.6 = icmp eq i64 %502, 32 - br i1 %exitcond.not.6, label %pregion_for_end.i.i.us.6.loopexit, label %pregion_for_entry.entry.i.i.us.us.6, !llvm.loop !19 - -pregion_for_end.i.i.us.6.loopexit: ; preds = %if.end.i.i.us.us.6 - br label %pregion_for_end.i.i.us.6 - -pregion_for_end.i.i.us.6: ; preds = %pregion_for_end.i.i.us.6.loopexit, %pregion_for_end.i.i.us.5 - %503 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.7 = or i32 %503, 7 - %cmp.i.i.us.7 = icmp sgt i32 %25, %conv2.i.i.us.7 - %mul.i.i.us.7 = mul nsw i32 %29, %conv2.i.i.us.7 - %mul9.i.i.us.7 = mul nsw i32 %33, %conv2.i.i.us.7 - %504 = sext i32 %mul9.i.i.us.7 to i64 - br i1 %cmp.i.i.us.7, label %pregion_for_entry.entry.i.i.us.us.7.preheader, label %_pocl_kernel_gemm.exit - -pregion_for_entry.entry.i.i.us.us.7.preheader: ; preds = %pregion_for_end.i.i.us.6 - br label %pregion_for_entry.entry.i.i.us.us.7 - -pregion_for_entry.entry.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7, %pregion_for_entry.entry.i.i.us.us.7.preheader - %_local_id_x.i.0.us.us.7 = phi i64 [ %514, %if.end.i.i.us.us.7 ], [ 0, %pregion_for_entry.entry.i.i.us.us.7.preheader ] - %add1.i.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, %mul.i.i.i - %conv.i.i.us.us.7 = trunc i64 %add1.i.i.i.us.us.7 to i32 - %cmp4.i.i.us.us.7 = icmp sgt i32 %29, %conv.i.i.us.us.7 - br i1 %cmp4.i.i.us.us.7, label %if.then.i.i.us.us.7, label %if.end.i.i.us.us.7 - -if.then.i.i.us.us.7: ; preds = %pregion_for_entry.entry.i.i.us.us.7 - %add.i.i.us.us.7 = add nsw i32 %mul.i.i.us.7, %conv.i.i.us.us.7 - %idxprom.i.i.us.us.7 = sext i32 %add.i.i.us.us.7 to i64 - %arrayidx.i.i.us.us.7 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.us.7 - %505 = load float, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12 - %mul6.i.i.us.us.7 = fmul float %21, %505 - store float %mul6.i.i.us.us.7, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %sext.i.i.us.us.7 = shl i64 %add1.i.i.i.us.us.7, 32 - %506 = ashr exact i64 %sext.i.i.us.us.7, 32 - br label %for.body.i.i.us.us.7 - -for.body.i.i.us.us.7: ; preds = %for.body.i.i.us.us.7, %if.then.i.i.us.us.7 - %indvars.iv.next.i.i3.us.us.7 = phi i64 [ %indvars.iv.next.i.i.us.us.7, %for.body.i.i.us.us.7 ], [ 0, %if.then.i.i.us.us.7 ] - %507 = phi float [ %513, %for.body.i.i.us.us.7 ], [ %mul6.i.i.us.us.7, %if.then.i.i.us.us.7 ] - %508 = add nsw i64 %indvars.iv.next.i.i3.us.us.7, %504 - %arrayidx12.i.i.us.us.7 = getelementptr inbounds float, float* %7, i64 %508 - %509 = load float, float* %arrayidx12.i.i.us.us.7, align 4, !tbaa !12 - %mul13.i.i.us.us.7 = fmul float %17, %509 - %510 = mul nsw i64 %indvars.iv.next.i.i3.us.us.7, %34 - %511 = add nsw i64 %510, %506 - %arrayidx17.i.i.us.us.7 = getelementptr inbounds float, float* %10, i64 %511 - %512 = load float, float* %arrayidx17.i.i.us.us.7, align 4, !tbaa !12 - %513 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.7, float %512, float %507) #2 - store float %513, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.7 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.7, 1 - %exitcond.not.i.i.us.us.7 = icmp eq i64 %indvars.iv.next.i.i.us.us.7, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.7, label %if.end.i.i.us.us.7.loopexit, label %for.body.i.i.us.us.7, !llvm.loop !21 - -if.end.i.i.us.us.7.loopexit: ; preds = %for.body.i.i.us.us.7 - br label %if.end.i.i.us.us.7 - -if.end.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.loopexit, %pregion_for_entry.entry.i.i.us.us.7 - %514 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, 1 - %exitcond.not.7 = icmp eq i64 %514, 32 - br i1 %exitcond.not.7, label %_pocl_kernel_gemm.exit.loopexit, label %pregion_for_entry.entry.i.i.us.us.7, !llvm.loop !19 - -if.then.i.i.us.7.1: ; preds = %if.end.i.i.us.7 - %add.i.i.us.7.1 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7.1 - %idxprom.i.i.us.7.1 = sext i32 %add.i.i.us.7.1 to i64 - %arrayidx.i.i.us.7.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.7.1 - %515 = load float, float* %arrayidx.i.i.us.7.1, align 4, !tbaa !12 - %mul6.i.i.us.7.1 = fmul float %21, %515 - store float %mul6.i.i.us.7.1, float* %arrayidx.i.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7.1 - -if.end.i.i.us.7.1: ; preds = %if.then.i.i.us.7.1, %if.end.i.i.us.7 - %516 = or i64 %_local_id_x.i.0.us.7, 2 - %add1.i.i.i.us.7.2 = add nuw nsw i64 %516, %mul.i.i.i - %conv.i.i.us.7.2 = trunc i64 %add1.i.i.i.us.7.2 to i32 - %cmp4.i.i.us.7.2 = icmp sgt i32 %29, %conv.i.i.us.7.2 - br i1 %cmp4.i.i.us.7.2, label %if.then.i.i.us.7.2, label %if.end.i.i.us.7.2 - -if.then.i.i.us.7.2: ; preds = %if.end.i.i.us.7.1 - %add.i.i.us.7.2 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7.2 - %idxprom.i.i.us.7.2 = sext i32 %add.i.i.us.7.2 to i64 - %arrayidx.i.i.us.7.2 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.7.2 - %517 = load float, float* %arrayidx.i.i.us.7.2, align 4, !tbaa !12 - %mul6.i.i.us.7.2 = fmul float %21, %517 - store float %mul6.i.i.us.7.2, float* %arrayidx.i.i.us.7.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7.2 - -if.end.i.i.us.7.2: ; preds = %if.then.i.i.us.7.2, %if.end.i.i.us.7.1 - %518 = or i64 %_local_id_x.i.0.us.7, 3 - %add1.i.i.i.us.7.3 = add nuw nsw i64 %518, %mul.i.i.i - %conv.i.i.us.7.3 = trunc i64 %add1.i.i.i.us.7.3 to i32 - %cmp4.i.i.us.7.3 = icmp sgt i32 %29, %conv.i.i.us.7.3 - br i1 %cmp4.i.i.us.7.3, label %if.then.i.i.us.7.3, label %if.end.i.i.us.7.3 - -if.then.i.i.us.7.3: ; preds = %if.end.i.i.us.7.2 - %add.i.i.us.7.3 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7.3 - %idxprom.i.i.us.7.3 = sext i32 %add.i.i.us.7.3 to i64 - %arrayidx.i.i.us.7.3 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.7.3 - %519 = load float, float* %arrayidx.i.i.us.7.3, align 4, !tbaa !12 - %mul6.i.i.us.7.3 = fmul float %21, %519 - store float %mul6.i.i.us.7.3, float* %arrayidx.i.i.us.7.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7.3 - -if.end.i.i.us.7.3: ; preds = %if.then.i.i.us.7.3, %if.end.i.i.us.7.2 - %520 = add nuw nsw i64 %_local_id_x.i.0.us.7, 4 - %exitcond34.7.not.3 = icmp eq i64 %520, 32 - br i1 %exitcond34.7.not.3, label %_pocl_kernel_gemm.exit.loopexit238, label %pregion_for_entry.entry.i.i.us.7, !llvm.loop !40 - -if.then.i.i.us.6.1: ; preds = %if.end.i.i.us.6 - %add.i.i.us.6.1 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6.1 - %idxprom.i.i.us.6.1 = sext i32 %add.i.i.us.6.1 to i64 - %arrayidx.i.i.us.6.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.6.1 - %521 = load float, float* %arrayidx.i.i.us.6.1, align 4, !tbaa !12 - %mul6.i.i.us.6.1 = fmul float %21, %521 - store float %mul6.i.i.us.6.1, float* %arrayidx.i.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6.1 - -if.end.i.i.us.6.1: ; preds = %if.then.i.i.us.6.1, %if.end.i.i.us.6 - %522 = or i64 %_local_id_x.i.0.us.6, 2 - %add1.i.i.i.us.6.2 = add nuw nsw i64 %522, %mul.i.i.i - %conv.i.i.us.6.2 = trunc i64 %add1.i.i.i.us.6.2 to i32 - %cmp4.i.i.us.6.2 = icmp sgt i32 %29, %conv.i.i.us.6.2 - br i1 %cmp4.i.i.us.6.2, label %if.then.i.i.us.6.2, label %if.end.i.i.us.6.2 - -if.then.i.i.us.6.2: ; preds = %if.end.i.i.us.6.1 - %add.i.i.us.6.2 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6.2 - %idxprom.i.i.us.6.2 = sext i32 %add.i.i.us.6.2 to i64 - %arrayidx.i.i.us.6.2 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.6.2 - %523 = load float, float* %arrayidx.i.i.us.6.2, align 4, !tbaa !12 - %mul6.i.i.us.6.2 = fmul float %21, %523 - store float %mul6.i.i.us.6.2, float* %arrayidx.i.i.us.6.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6.2 - -if.end.i.i.us.6.2: ; preds = %if.then.i.i.us.6.2, %if.end.i.i.us.6.1 - %524 = or i64 %_local_id_x.i.0.us.6, 3 - %add1.i.i.i.us.6.3 = add nuw nsw i64 %524, %mul.i.i.i - %conv.i.i.us.6.3 = trunc i64 %add1.i.i.i.us.6.3 to i32 - %cmp4.i.i.us.6.3 = icmp sgt i32 %29, %conv.i.i.us.6.3 - br i1 %cmp4.i.i.us.6.3, label %if.then.i.i.us.6.3, label %if.end.i.i.us.6.3 - -if.then.i.i.us.6.3: ; preds = %if.end.i.i.us.6.2 - %add.i.i.us.6.3 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6.3 - %idxprom.i.i.us.6.3 = sext i32 %add.i.i.us.6.3 to i64 - %arrayidx.i.i.us.6.3 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.6.3 - %525 = load float, float* %arrayidx.i.i.us.6.3, align 4, !tbaa !12 - %mul6.i.i.us.6.3 = fmul float %21, %525 - store float %mul6.i.i.us.6.3, float* %arrayidx.i.i.us.6.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6.3 - -if.end.i.i.us.6.3: ; preds = %if.then.i.i.us.6.3, %if.end.i.i.us.6.2 - %526 = add nuw nsw i64 %_local_id_x.i.0.us.6, 4 - %exitcond34.6.not.3 = icmp eq i64 %526, 32 - br i1 %exitcond34.6.not.3, label %pregion_for_end.i.i.6.loopexit, label %pregion_for_entry.entry.i.i.us.6, !llvm.loop !41 - -if.then.i.i.us.5.1: ; preds = %if.end.i.i.us.5 - %add.i.i.us.5.1 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5.1 - %idxprom.i.i.us.5.1 = sext i32 %add.i.i.us.5.1 to i64 - %arrayidx.i.i.us.5.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.5.1 - %527 = load float, float* %arrayidx.i.i.us.5.1, align 4, !tbaa !12 - %mul6.i.i.us.5.1 = fmul float %21, %527 - store float %mul6.i.i.us.5.1, float* %arrayidx.i.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5.1 - -if.end.i.i.us.5.1: ; preds = %if.then.i.i.us.5.1, %if.end.i.i.us.5 - %528 = or i64 %_local_id_x.i.0.us.5, 2 - %add1.i.i.i.us.5.2 = add nuw nsw i64 %528, %mul.i.i.i - %conv.i.i.us.5.2 = trunc i64 %add1.i.i.i.us.5.2 to i32 - %cmp4.i.i.us.5.2 = icmp sgt i32 %29, %conv.i.i.us.5.2 - br i1 %cmp4.i.i.us.5.2, label %if.then.i.i.us.5.2, label %if.end.i.i.us.5.2 - -if.then.i.i.us.5.2: ; preds = %if.end.i.i.us.5.1 - %add.i.i.us.5.2 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5.2 - %idxprom.i.i.us.5.2 = sext i32 %add.i.i.us.5.2 to i64 - %arrayidx.i.i.us.5.2 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.5.2 - %529 = load float, float* %arrayidx.i.i.us.5.2, align 4, !tbaa !12 - %mul6.i.i.us.5.2 = fmul float %21, %529 - store float %mul6.i.i.us.5.2, float* %arrayidx.i.i.us.5.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5.2 - -if.end.i.i.us.5.2: ; preds = %if.then.i.i.us.5.2, %if.end.i.i.us.5.1 - %530 = or i64 %_local_id_x.i.0.us.5, 3 - %add1.i.i.i.us.5.3 = add nuw nsw i64 %530, %mul.i.i.i - %conv.i.i.us.5.3 = trunc i64 %add1.i.i.i.us.5.3 to i32 - %cmp4.i.i.us.5.3 = icmp sgt i32 %29, %conv.i.i.us.5.3 - br i1 %cmp4.i.i.us.5.3, label %if.then.i.i.us.5.3, label %if.end.i.i.us.5.3 - -if.then.i.i.us.5.3: ; preds = %if.end.i.i.us.5.2 - %add.i.i.us.5.3 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5.3 - %idxprom.i.i.us.5.3 = sext i32 %add.i.i.us.5.3 to i64 - %arrayidx.i.i.us.5.3 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.5.3 - %531 = load float, float* %arrayidx.i.i.us.5.3, align 4, !tbaa !12 - %mul6.i.i.us.5.3 = fmul float %21, %531 - store float %mul6.i.i.us.5.3, float* %arrayidx.i.i.us.5.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5.3 - -if.end.i.i.us.5.3: ; preds = %if.then.i.i.us.5.3, %if.end.i.i.us.5.2 - %532 = add nuw nsw i64 %_local_id_x.i.0.us.5, 4 - %exitcond34.5.not.3 = icmp eq i64 %532, 32 - br i1 %exitcond34.5.not.3, label %pregion_for_end.i.i.5.loopexit, label %pregion_for_entry.entry.i.i.us.5, !llvm.loop !42 - -if.then.i.i.us.4.1: ; preds = %if.end.i.i.us.4 - %add.i.i.us.4.1 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4.1 - %idxprom.i.i.us.4.1 = sext i32 %add.i.i.us.4.1 to i64 - %arrayidx.i.i.us.4.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.4.1 - %533 = load float, float* %arrayidx.i.i.us.4.1, align 4, !tbaa !12 - %mul6.i.i.us.4.1 = fmul float %21, %533 - store float %mul6.i.i.us.4.1, float* %arrayidx.i.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4.1 - -if.end.i.i.us.4.1: ; preds = %if.then.i.i.us.4.1, %if.end.i.i.us.4 - %534 = or i64 %_local_id_x.i.0.us.4, 2 - %add1.i.i.i.us.4.2 = add nuw nsw i64 %534, %mul.i.i.i - %conv.i.i.us.4.2 = trunc i64 %add1.i.i.i.us.4.2 to i32 - %cmp4.i.i.us.4.2 = icmp sgt i32 %29, %conv.i.i.us.4.2 - br i1 %cmp4.i.i.us.4.2, label %if.then.i.i.us.4.2, label %if.end.i.i.us.4.2 - -if.then.i.i.us.4.2: ; preds = %if.end.i.i.us.4.1 - %add.i.i.us.4.2 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4.2 - %idxprom.i.i.us.4.2 = sext i32 %add.i.i.us.4.2 to i64 - %arrayidx.i.i.us.4.2 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.4.2 - %535 = load float, float* %arrayidx.i.i.us.4.2, align 4, !tbaa !12 - %mul6.i.i.us.4.2 = fmul float %21, %535 - store float %mul6.i.i.us.4.2, float* %arrayidx.i.i.us.4.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4.2 - -if.end.i.i.us.4.2: ; preds = %if.then.i.i.us.4.2, %if.end.i.i.us.4.1 - %536 = or i64 %_local_id_x.i.0.us.4, 3 - %add1.i.i.i.us.4.3 = add nuw nsw i64 %536, %mul.i.i.i - %conv.i.i.us.4.3 = trunc i64 %add1.i.i.i.us.4.3 to i32 - %cmp4.i.i.us.4.3 = icmp sgt i32 %29, %conv.i.i.us.4.3 - br i1 %cmp4.i.i.us.4.3, label %if.then.i.i.us.4.3, label %if.end.i.i.us.4.3 - -if.then.i.i.us.4.3: ; preds = %if.end.i.i.us.4.2 - %add.i.i.us.4.3 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4.3 - %idxprom.i.i.us.4.3 = sext i32 %add.i.i.us.4.3 to i64 - %arrayidx.i.i.us.4.3 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.4.3 - %537 = load float, float* %arrayidx.i.i.us.4.3, align 4, !tbaa !12 - %mul6.i.i.us.4.3 = fmul float %21, %537 - store float %mul6.i.i.us.4.3, float* %arrayidx.i.i.us.4.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4.3 - -if.end.i.i.us.4.3: ; preds = %if.then.i.i.us.4.3, %if.end.i.i.us.4.2 - %538 = add nuw nsw i64 %_local_id_x.i.0.us.4, 4 - %exitcond34.4.not.3 = icmp eq i64 %538, 32 - br i1 %exitcond34.4.not.3, label %pregion_for_end.i.i.4.loopexit, label %pregion_for_entry.entry.i.i.us.4, !llvm.loop !43 - -if.then.i.i.us.3.1: ; preds = %if.end.i.i.us.3 - %add.i.i.us.3.1 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3.1 - %idxprom.i.i.us.3.1 = sext i32 %add.i.i.us.3.1 to i64 - %arrayidx.i.i.us.3.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.3.1 - %539 = load float, float* %arrayidx.i.i.us.3.1, align 4, !tbaa !12 - %mul6.i.i.us.3.1 = fmul float %21, %539 - store float %mul6.i.i.us.3.1, float* %arrayidx.i.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3.1 - -if.end.i.i.us.3.1: ; preds = %if.then.i.i.us.3.1, %if.end.i.i.us.3 - %540 = or i64 %_local_id_x.i.0.us.3, 2 - %add1.i.i.i.us.3.2 = add nuw nsw i64 %540, %mul.i.i.i - %conv.i.i.us.3.2 = trunc i64 %add1.i.i.i.us.3.2 to i32 - %cmp4.i.i.us.3.2 = icmp sgt i32 %29, %conv.i.i.us.3.2 - br i1 %cmp4.i.i.us.3.2, label %if.then.i.i.us.3.2, label %if.end.i.i.us.3.2 - -if.then.i.i.us.3.2: ; preds = %if.end.i.i.us.3.1 - %add.i.i.us.3.2 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3.2 - %idxprom.i.i.us.3.2 = sext i32 %add.i.i.us.3.2 to i64 - %arrayidx.i.i.us.3.2 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.3.2 - %541 = load float, float* %arrayidx.i.i.us.3.2, align 4, !tbaa !12 - %mul6.i.i.us.3.2 = fmul float %21, %541 - store float %mul6.i.i.us.3.2, float* %arrayidx.i.i.us.3.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3.2 - -if.end.i.i.us.3.2: ; preds = %if.then.i.i.us.3.2, %if.end.i.i.us.3.1 - %542 = or i64 %_local_id_x.i.0.us.3, 3 - %add1.i.i.i.us.3.3 = add nuw nsw i64 %542, %mul.i.i.i - %conv.i.i.us.3.3 = trunc i64 %add1.i.i.i.us.3.3 to i32 - %cmp4.i.i.us.3.3 = icmp sgt i32 %29, %conv.i.i.us.3.3 - br i1 %cmp4.i.i.us.3.3, label %if.then.i.i.us.3.3, label %if.end.i.i.us.3.3 - -if.then.i.i.us.3.3: ; preds = %if.end.i.i.us.3.2 - %add.i.i.us.3.3 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3.3 - %idxprom.i.i.us.3.3 = sext i32 %add.i.i.us.3.3 to i64 - %arrayidx.i.i.us.3.3 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.3.3 - %543 = load float, float* %arrayidx.i.i.us.3.3, align 4, !tbaa !12 - %mul6.i.i.us.3.3 = fmul float %21, %543 - store float %mul6.i.i.us.3.3, float* %arrayidx.i.i.us.3.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3.3 - -if.end.i.i.us.3.3: ; preds = %if.then.i.i.us.3.3, %if.end.i.i.us.3.2 - %544 = add nuw nsw i64 %_local_id_x.i.0.us.3, 4 - %exitcond34.3.not.3 = icmp eq i64 %544, 32 - br i1 %exitcond34.3.not.3, label %pregion_for_end.i.i.3.loopexit, label %pregion_for_entry.entry.i.i.us.3, !llvm.loop !44 - -if.then.i.i.us.2.1: ; preds = %if.end.i.i.us.2 - %add.i.i.us.2.1 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2.1 - %idxprom.i.i.us.2.1 = sext i32 %add.i.i.us.2.1 to i64 - %arrayidx.i.i.us.2.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.2.1 - %545 = load float, float* %arrayidx.i.i.us.2.1, align 4, !tbaa !12 - %mul6.i.i.us.2.1 = fmul float %21, %545 - store float %mul6.i.i.us.2.1, float* %arrayidx.i.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2.1 - -if.end.i.i.us.2.1: ; preds = %if.then.i.i.us.2.1, %if.end.i.i.us.2 - %546 = or i64 %_local_id_x.i.0.us.2, 2 - %add1.i.i.i.us.2.2 = add nuw nsw i64 %546, %mul.i.i.i - %conv.i.i.us.2.2 = trunc i64 %add1.i.i.i.us.2.2 to i32 - %cmp4.i.i.us.2.2 = icmp sgt i32 %29, %conv.i.i.us.2.2 - br i1 %cmp4.i.i.us.2.2, label %if.then.i.i.us.2.2, label %if.end.i.i.us.2.2 - -if.then.i.i.us.2.2: ; preds = %if.end.i.i.us.2.1 - %add.i.i.us.2.2 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2.2 - %idxprom.i.i.us.2.2 = sext i32 %add.i.i.us.2.2 to i64 - %arrayidx.i.i.us.2.2 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.2.2 - %547 = load float, float* %arrayidx.i.i.us.2.2, align 4, !tbaa !12 - %mul6.i.i.us.2.2 = fmul float %21, %547 - store float %mul6.i.i.us.2.2, float* %arrayidx.i.i.us.2.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2.2 - -if.end.i.i.us.2.2: ; preds = %if.then.i.i.us.2.2, %if.end.i.i.us.2.1 - %548 = or i64 %_local_id_x.i.0.us.2, 3 - %add1.i.i.i.us.2.3 = add nuw nsw i64 %548, %mul.i.i.i - %conv.i.i.us.2.3 = trunc i64 %add1.i.i.i.us.2.3 to i32 - %cmp4.i.i.us.2.3 = icmp sgt i32 %29, %conv.i.i.us.2.3 - br i1 %cmp4.i.i.us.2.3, label %if.then.i.i.us.2.3, label %if.end.i.i.us.2.3 - -if.then.i.i.us.2.3: ; preds = %if.end.i.i.us.2.2 - %add.i.i.us.2.3 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2.3 - %idxprom.i.i.us.2.3 = sext i32 %add.i.i.us.2.3 to i64 - %arrayidx.i.i.us.2.3 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.2.3 - %549 = load float, float* %arrayidx.i.i.us.2.3, align 4, !tbaa !12 - %mul6.i.i.us.2.3 = fmul float %21, %549 - store float %mul6.i.i.us.2.3, float* %arrayidx.i.i.us.2.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2.3 - -if.end.i.i.us.2.3: ; preds = %if.then.i.i.us.2.3, %if.end.i.i.us.2.2 - %550 = add nuw nsw i64 %_local_id_x.i.0.us.2, 4 - %exitcond34.2.not.3 = icmp eq i64 %550, 32 - br i1 %exitcond34.2.not.3, label %pregion_for_end.i.i.2.loopexit, label %pregion_for_entry.entry.i.i.us.2, !llvm.loop !45 - -if.then.i.i.us.1.1: ; preds = %if.end.i.i.us.1 - %add.i.i.us.1.1 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1.1 - %idxprom.i.i.us.1.1 = sext i32 %add.i.i.us.1.1 to i64 - %arrayidx.i.i.us.1.1 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.1.1 - %551 = load float, float* %arrayidx.i.i.us.1.1, align 4, !tbaa !12 - %mul6.i.i.us.1.1 = fmul float %21, %551 - store float %mul6.i.i.us.1.1, float* %arrayidx.i.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1.1 - -if.end.i.i.us.1.1: ; preds = %if.then.i.i.us.1.1, %if.end.i.i.us.1 - %552 = or i64 %_local_id_x.i.0.us.1, 2 - %add1.i.i.i.us.1.2 = add nuw nsw i64 %552, %mul.i.i.i - %conv.i.i.us.1.2 = trunc i64 %add1.i.i.i.us.1.2 to i32 - %cmp4.i.i.us.1.2 = icmp sgt i32 %29, %conv.i.i.us.1.2 - br i1 %cmp4.i.i.us.1.2, label %if.then.i.i.us.1.2, label %if.end.i.i.us.1.2 - -if.then.i.i.us.1.2: ; preds = %if.end.i.i.us.1.1 - %add.i.i.us.1.2 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1.2 - %idxprom.i.i.us.1.2 = sext i32 %add.i.i.us.1.2 to i64 - %arrayidx.i.i.us.1.2 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.1.2 - %553 = load float, float* %arrayidx.i.i.us.1.2, align 4, !tbaa !12 - %mul6.i.i.us.1.2 = fmul float %21, %553 - store float %mul6.i.i.us.1.2, float* %arrayidx.i.i.us.1.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1.2 - -if.end.i.i.us.1.2: ; preds = %if.then.i.i.us.1.2, %if.end.i.i.us.1.1 - %554 = or i64 %_local_id_x.i.0.us.1, 3 - %add1.i.i.i.us.1.3 = add nuw nsw i64 %554, %mul.i.i.i - %conv.i.i.us.1.3 = trunc i64 %add1.i.i.i.us.1.3 to i32 - %cmp4.i.i.us.1.3 = icmp sgt i32 %29, %conv.i.i.us.1.3 - br i1 %cmp4.i.i.us.1.3, label %if.then.i.i.us.1.3, label %if.end.i.i.us.1.3 - -if.then.i.i.us.1.3: ; preds = %if.end.i.i.us.1.2 - %add.i.i.us.1.3 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1.3 - %idxprom.i.i.us.1.3 = sext i32 %add.i.i.us.1.3 to i64 - %arrayidx.i.i.us.1.3 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.1.3 - %555 = load float, float* %arrayidx.i.i.us.1.3, align 4, !tbaa !12 - %mul6.i.i.us.1.3 = fmul float %21, %555 - store float %mul6.i.i.us.1.3, float* %arrayidx.i.i.us.1.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1.3 - -if.end.i.i.us.1.3: ; preds = %if.then.i.i.us.1.3, %if.end.i.i.us.1.2 - %556 = add nuw nsw i64 %_local_id_x.i.0.us.1, 4 - %exitcond34.1.not.3 = icmp eq i64 %556, 32 - br i1 %exitcond34.1.not.3, label %pregion_for_end.i.i.1.loopexit, label %pregion_for_entry.entry.i.i.us.1, !llvm.loop !46 - -if.then.i.i.us.1214: ; preds = %if.end.i.i.us - %add.i.i.us.1210 = add nsw i32 %mul.i.i.us, %conv.i.i.us.1207 - %idxprom.i.i.us.1211 = sext i32 %add.i.i.us.1210 to i64 - %arrayidx.i.i.us.1212 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.1211 - %557 = load float, float* %arrayidx.i.i.us.1212, align 4, !tbaa !12 - %mul6.i.i.us.1213 = fmul float %21, %557 - store float %mul6.i.i.us.1213, float* %arrayidx.i.i.us.1212, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1215 - -if.end.i.i.us.1215: ; preds = %if.then.i.i.us.1214, %if.end.i.i.us - %558 = or i64 %_local_id_x.i.0.us, 2 - %add1.i.i.i.us.2217 = add nuw nsw i64 %558, %mul.i.i.i - %conv.i.i.us.2218 = trunc i64 %add1.i.i.i.us.2217 to i32 - %cmp4.i.i.us.2219 = icmp sgt i32 %29, %conv.i.i.us.2218 - br i1 %cmp4.i.i.us.2219, label %if.then.i.i.us.2225, label %if.end.i.i.us.2226 - -if.then.i.i.us.2225: ; preds = %if.end.i.i.us.1215 - %add.i.i.us.2221 = add nsw i32 %mul.i.i.us, %conv.i.i.us.2218 - %idxprom.i.i.us.2222 = sext i32 %add.i.i.us.2221 to i64 - %arrayidx.i.i.us.2223 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.2222 - %559 = load float, float* %arrayidx.i.i.us.2223, align 4, !tbaa !12 - %mul6.i.i.us.2224 = fmul float %21, %559 - store float %mul6.i.i.us.2224, float* %arrayidx.i.i.us.2223, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2226 - -if.end.i.i.us.2226: ; preds = %if.then.i.i.us.2225, %if.end.i.i.us.1215 - %560 = or i64 %_local_id_x.i.0.us, 3 - %add1.i.i.i.us.3228 = add nuw nsw i64 %560, %mul.i.i.i - %conv.i.i.us.3229 = trunc i64 %add1.i.i.i.us.3228 to i32 - %cmp4.i.i.us.3230 = icmp sgt i32 %29, %conv.i.i.us.3229 - br i1 %cmp4.i.i.us.3230, label %if.then.i.i.us.3236, label %if.end.i.i.us.3237 - -if.then.i.i.us.3236: ; preds = %if.end.i.i.us.2226 - %add.i.i.us.3232 = add nsw i32 %mul.i.i.us, %conv.i.i.us.3229 - %idxprom.i.i.us.3233 = sext i32 %add.i.i.us.3232 to i64 - %arrayidx.i.i.us.3234 = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us.3233 - %561 = load float, float* %arrayidx.i.i.us.3234, align 4, !tbaa !12 - %mul6.i.i.us.3235 = fmul float %21, %561 - store float %mul6.i.i.us.3235, float* %arrayidx.i.i.us.3234, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3237 - -if.end.i.i.us.3237: ; preds = %if.then.i.i.us.3236, %if.end.i.i.us.2226 - %562 = add nuw nsw i64 %_local_id_x.i.0.us, 4 - %exitcond34.not.3 = icmp eq i64 %562, 32 - br i1 %exitcond34.not.3, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !47 -} - -; Function Attrs: argmemonly nounwind readonly willreturn -declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32 immarg, <8 x i1>, <8 x float>) #3 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>) #4 - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } -attributes #3 = { argmemonly nounwind readonly willreturn } -attributes #4 = { argmemonly nounwind willreturn } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE", !"DATA_TYPE", !"int", !"int", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"float", !"float", !"int", !"int", !"int"} -!9 = !{!"", !"", !"", !"", !"", !"", !"", !""} -!10 = !{!"a", !"b", !"c", !"alpha", !"beta", !"ni", !"nj", !"nk"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17, !18} -!17 = distinct !{} -!18 = distinct !{} -!19 = distinct !{!19, !20} -!20 = !{!"llvm.loop.parallel_accesses", !17} -!21 = distinct !{!21, !22} -!22 = !{!"llvm.loop.unroll.disable"} -!23 = distinct !{!23, !20, !24} -!24 = !{!"llvm.loop.isvectorized", i32 1} -!25 = distinct !{!25, !20, !24} -!26 = distinct !{!26, !20, !24} -!27 = distinct !{!27, !20, !24} -!28 = distinct !{!28, !20, !24} -!29 = distinct !{!29, !20, !24} -!30 = distinct !{!30, !20, !24} -!31 = distinct !{!31, !20, !24} -!32 = distinct !{!32, !20, !24} -!33 = distinct !{!33, !20, !24} -!34 = distinct !{!34, !20, !24} -!35 = distinct !{!35, !20, !24} -!36 = distinct !{!36, !20, !24} -!37 = distinct !{!37, !20, !24} -!38 = distinct !{!38, !20, !24} -!39 = distinct !{!39, !20, !24} -!40 = distinct !{!40, !20, !24} -!41 = distinct !{!41, !20, !24} -!42 = distinct !{!42, !20, !24} -!43 = distinct !{!43, !20, !24} -!44 = distinct !{!44, !20, !24} -!45 = distinct !{!45, !20, !24} -!46 = distinct !{!46, !20, !24} -!47 = distinct !{!47, !20, !24} diff --git a/pocl_irs/gemver_kernel1.ll b/pocl_irs/gemver_kernel1.ll deleted file mode 100644 index 38b7966..0000000 --- a/pocl_irs/gemver_kernel1.ll +++ /dev/null @@ -1,6053 +0,0 @@ -; ModuleID = './OM/KBEGAJBBAALIMPLKDCOKKOGOMEPCEPLPNLGKF/gemver_kernel1/32-8-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_gemver_kernel1(float* nocapture %0, float* nocapture readonly %1, float* nocapture readonly %2, float* nocapture readonly %3, float* nocapture readonly %4, i32 %5, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %6, i64 %7, i64 %8, i64 %9) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { -pregion_for_entry.pregion_for_init.i: - %mul.i.i = shl i64 %7, 5 - %mul3.i.i = shl i64 %8, 3 - %conv2.i = trunc i64 %mul3.i.i to i32 - %cmp.i = icmp slt i32 %conv2.i, %5 - %sext.i = shl i64 %8, 35 - %idxprom.i = ashr exact i64 %sext.i, 32 - %arrayidx.i = getelementptr inbounds float, float* %3, i64 %idxprom.i - %arrayidx9.i = getelementptr inbounds float, float* %4, i64 %idxprom.i - %mul.i = mul nsw i32 %conv2.i, %5 - br i1 %cmp.i, label %vector.scevcheck, label %pregion_for_end.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i - %10 = trunc i64 %8 to i32 - %11 = mul i32 %10, %5 - %12 = shl i32 %11, 3 - %13 = trunc i64 %7 to i32 - %14 = shl i32 %13, 5 - %15 = add i32 %12, %14 - %16 = icmp sgt i32 %15, 2147483616 - br i1 %16, label %pregion_for_entry.entry.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.us - -vector.memcheck: ; preds = %vector.scevcheck - %sext508 = shl i64 %8, 35 - %17 = ashr exact i64 %sext508, 32 - %scevgep = getelementptr float, float* %3, i64 %17 - %scevgep3 = bitcast float* %scevgep to i8* - %uglygep = getelementptr i8, i8* %scevgep3, i64 1 - %18 = trunc i64 %8 to i32 - %19 = mul i32 %18, %5 - %20 = shl i32 %19, 3 - %21 = trunc i64 %7 to i32 - %22 = shl i32 %21, 5 - %23 = add i32 %20, %22 - %24 = sext i32 %23 to i64 - %scevgep4 = getelementptr float, float* %0, i64 %24 - %scevgep45 = bitcast float* %scevgep4 to i8* - %25 = add nsw i64 %24, 32 - %scevgep6 = getelementptr float, float* %0, i64 %25 - %26 = sext i32 %22 to i64 - %scevgep8 = getelementptr float, float* %1, i64 %26 - %27 = add nsw i64 %26, 32 - %scevgep10 = getelementptr float, float* %1, i64 %27 - %scevgep12 = getelementptr float, float* %4, i64 %17 - %scevgep1213 = bitcast float* %scevgep12 to i8* - %uglygep14 = getelementptr i8, i8* %scevgep1213, i64 1 - %scevgep15 = getelementptr float, float* %2, i64 %26 - %scevgep17 = getelementptr float, float* %2, i64 %27 - %bound0 = icmp ult float* %arrayidx.i, %scevgep6 - %bound1 = icmp ugt i8* %uglygep, %scevgep45 - %found.conflict = and i1 %bound0, %bound1 - %bound019 = icmp ult float* %scevgep8, %scevgep6 - %bound120 = icmp ult float* %scevgep4, %scevgep10 - %found.conflict21 = and i1 %bound019, %bound120 - %conflict.rdx = or i1 %found.conflict, %found.conflict21 - %bound023 = icmp ult float* %arrayidx9.i, %scevgep6 - %bound124 = icmp ugt i8* %uglygep14, %scevgep45 - %found.conflict25 = and i1 %bound023, %bound124 - %conflict.rdx26 = or i1 %conflict.rdx, %found.conflict25 - %bound027 = icmp ult float* %scevgep15, %scevgep6 - %bound128 = icmp ult float* %scevgep4, %scevgep17 - %found.conflict29 = and i1 %bound027, %bound128 - %conflict.rdx30 = or i1 %conflict.rdx26, %found.conflict29 - br i1 %conflict.rdx30, label %pregion_for_entry.entry.i.us.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert31 = insertelement <8 x i32> undef, i32 %5, i32 0 - %broadcast.splat32 = shufflevector <8 x i32> %broadcast.splatinsert31, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert33 = insertelement <8 x float*> undef, float* %arrayidx.i, i32 0 - %broadcast.splat34 = shufflevector <8 x float*> %broadcast.splatinsert33, <8 x float*> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert35 = insertelement <8 x float*> undef, float* %arrayidx9.i, i32 0 - %broadcast.splat36 = shufflevector <8 x float*> %broadcast.splatinsert35, <8 x float*> undef, <8 x i32> zeroinitializer - %28 = or <8 x i64> %broadcast.splat, - %29 = trunc <8 x i64> %28 to <8 x i32> - %30 = icmp sgt <8 x i32> %broadcast.splat32, %29 - %wide.masked.gather = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat34, i32 4, <8 x i1> %30, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %31 = extractelement <8 x i64> %28, i32 0 - %32 = shl i64 %31, 32 - %33 = ashr exact i64 %32, 32 - %34 = getelementptr inbounds float, float* %1, i64 %33 - %35 = bitcast float* %34 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %35, i32 4, <8 x i1> %30, <8 x float> undef), !tbaa !12, !alias.scope !21, !noalias !19 - %wide.masked.gather37 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat36, i32 4, <8 x i1> %30, <8 x float> undef), !tbaa !12, !alias.scope !23, !noalias !19 - %36 = getelementptr inbounds float, float* %2, i64 %33 - %37 = bitcast float* %36 to <8 x float>* - %wide.masked.load38 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %37, i32 4, <8 x i1> %30, <8 x float> undef), !tbaa !12, !alias.scope !25, !noalias !19 - %38 = fmul <8 x float> %wide.masked.gather37, %wide.masked.load38 - %39 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather, <8 x float> %wide.masked.load, <8 x float> %38) - %40 = extractelement <8 x i32> %29, i32 0 - %41 = add nsw i32 %mul.i, %40 - %42 = sext i32 %41 to i64 - %43 = getelementptr inbounds float, float* %0, i64 %42 - %44 = bitcast float* %43 to <8 x float>* - %wide.masked.load39 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %44, i32 4, <8 x i1> %30, <8 x float> undef), !tbaa !12, !alias.scope !19 - %45 = fadd <8 x float> %wide.masked.load39, %39 - %46 = bitcast float* %43 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %45, <8 x float>* %46, i32 4, <8 x i1> %30), !tbaa !12, !alias.scope !19, !llvm.access.group !27 - %47 = or <8 x i64> %broadcast.splat, - %48 = trunc <8 x i64> %47 to <8 x i32> - %49 = icmp sgt <8 x i32> %broadcast.splat32, %48 - %wide.masked.gather.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat34, i32 4, <8 x i1> %49, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %50 = extractelement <8 x i64> %47, i32 0 - %51 = shl i64 %50, 32 - %52 = ashr exact i64 %51, 32 - %53 = getelementptr inbounds float, float* %1, i64 %52 - %54 = bitcast float* %53 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %54, i32 4, <8 x i1> %49, <8 x float> undef), !tbaa !12, !alias.scope !21, !noalias !19 - %wide.masked.gather37.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat36, i32 4, <8 x i1> %49, <8 x float> undef), !tbaa !12, !alias.scope !23, !noalias !19 - %55 = getelementptr inbounds float, float* %2, i64 %52 - %56 = bitcast float* %55 to <8 x float>* - %wide.masked.load38.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %56, i32 4, <8 x i1> %49, <8 x float> undef), !tbaa !12, !alias.scope !25, !noalias !19 - %57 = fmul <8 x float> %wide.masked.gather37.1, %wide.masked.load38.1 - %58 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather.1, <8 x float> %wide.masked.load.1, <8 x float> %57) - %59 = extractelement <8 x i32> %48, i32 0 - %60 = add nsw i32 %mul.i, %59 - %61 = sext i32 %60 to i64 - %62 = getelementptr inbounds float, float* %0, i64 %61 - %63 = bitcast float* %62 to <8 x float>* - %wide.masked.load39.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %63, i32 4, <8 x i1> %49, <8 x float> undef), !tbaa !12, !alias.scope !19 - %64 = fadd <8 x float> %wide.masked.load39.1, %58 - %65 = bitcast float* %62 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %64, <8 x float>* %65, i32 4, <8 x i1> %49), !tbaa !12, !alias.scope !19, !llvm.access.group !27 - %66 = or <8 x i64> %broadcast.splat, - %67 = trunc <8 x i64> %66 to <8 x i32> - %68 = icmp sgt <8 x i32> %broadcast.splat32, %67 - %wide.masked.gather.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat34, i32 4, <8 x i1> %68, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %69 = extractelement <8 x i64> %66, i32 0 - %70 = shl i64 %69, 32 - %71 = ashr exact i64 %70, 32 - %72 = getelementptr inbounds float, float* %1, i64 %71 - %73 = bitcast float* %72 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %73, i32 4, <8 x i1> %68, <8 x float> undef), !tbaa !12, !alias.scope !21, !noalias !19 - %wide.masked.gather37.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat36, i32 4, <8 x i1> %68, <8 x float> undef), !tbaa !12, !alias.scope !23, !noalias !19 - %74 = getelementptr inbounds float, float* %2, i64 %71 - %75 = bitcast float* %74 to <8 x float>* - %wide.masked.load38.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %75, i32 4, <8 x i1> %68, <8 x float> undef), !tbaa !12, !alias.scope !25, !noalias !19 - %76 = fmul <8 x float> %wide.masked.gather37.2, %wide.masked.load38.2 - %77 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather.2, <8 x float> %wide.masked.load.2, <8 x float> %76) - %78 = extractelement <8 x i32> %67, i32 0 - %79 = add nsw i32 %mul.i, %78 - %80 = sext i32 %79 to i64 - %81 = getelementptr inbounds float, float* %0, i64 %80 - %82 = bitcast float* %81 to <8 x float>* - %wide.masked.load39.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %82, i32 4, <8 x i1> %68, <8 x float> undef), !tbaa !12, !alias.scope !19 - %83 = fadd <8 x float> %wide.masked.load39.2, %77 - %84 = bitcast float* %81 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %83, <8 x float>* %84, i32 4, <8 x i1> %68), !tbaa !12, !alias.scope !19, !llvm.access.group !27 - %85 = or <8 x i64> %broadcast.splat, - %86 = trunc <8 x i64> %85 to <8 x i32> - %87 = icmp sgt <8 x i32> %broadcast.splat32, %86 - %wide.masked.gather.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat34, i32 4, <8 x i1> %87, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %88 = extractelement <8 x i64> %85, i32 0 - %89 = shl i64 %88, 32 - %90 = ashr exact i64 %89, 32 - %91 = getelementptr inbounds float, float* %1, i64 %90 - %92 = bitcast float* %91 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %92, i32 4, <8 x i1> %87, <8 x float> undef), !tbaa !12, !alias.scope !21, !noalias !19 - %wide.masked.gather37.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat36, i32 4, <8 x i1> %87, <8 x float> undef), !tbaa !12, !alias.scope !23, !noalias !19 - %93 = getelementptr inbounds float, float* %2, i64 %90 - %94 = bitcast float* %93 to <8 x float>* - %wide.masked.load38.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %94, i32 4, <8 x i1> %87, <8 x float> undef), !tbaa !12, !alias.scope !25, !noalias !19 - %95 = fmul <8 x float> %wide.masked.gather37.3, %wide.masked.load38.3 - %96 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather.3, <8 x float> %wide.masked.load.3, <8 x float> %95) - %97 = extractelement <8 x i32> %86, i32 0 - %98 = add nsw i32 %mul.i, %97 - %99 = sext i32 %98 to i64 - %100 = getelementptr inbounds float, float* %0, i64 %99 - %101 = bitcast float* %100 to <8 x float>* - %wide.masked.load39.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %101, i32 4, <8 x i1> %87, <8 x float> undef), !tbaa !12, !alias.scope !19 - %102 = fadd <8 x float> %wide.masked.load39.3, %96 - %103 = bitcast float* %100 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %102, <8 x float>* %103, i32 4, <8 x i1> %87), !tbaa !12, !alias.scope !19, !llvm.access.group !27 - br label %pregion_for_end.i - -pregion_for_entry.entry.i.us: ; preds = %if.end.r_exit.i.us.1532, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ 0, %pregion_for_entry.entry.i.us.preheader ], [ %852, %if.end.r_exit.i.us.1532 ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp4.i.us = icmp slt i32 %conv.i.us, %5 - br i1 %cmp4.i.us, label %if.then.i.us, label %if.end.r_exit.i.us - -if.then.i.us: ; preds = %pregion_for_entry.entry.i.us - %104 = load float, float* %arrayidx.i, align 4, !tbaa !12 - %sext26.i.us = shl i64 %add1.i.i.us, 32 - %idxprom6.i.us = ashr exact i64 %sext26.i.us, 32 - %arrayidx7.i.us = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us - %105 = load float, float* %arrayidx7.i.us, align 4, !tbaa !12 - %106 = load float, float* %arrayidx9.i, align 4, !tbaa !12 - %arrayidx11.i.us = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us - %107 = load float, float* %arrayidx11.i.us, align 4, !tbaa !12 - %mul12.i.us = fmul float %106, %107 - %108 = tail call float @llvm.fmuladd.f32(float %104, float %105, float %mul12.i.us) #6 - %add.i.us = add nsw i32 %mul.i, %conv.i.us - %idxprom13.i.us = sext i32 %add.i.us to i64 - %arrayidx14.i.us = getelementptr inbounds float, float* %0, i64 %idxprom13.i.us - %109 = load float, float* %arrayidx14.i.us, align 4, !tbaa !12 - %add15.i.us = fadd float %109, %108 - store float %add15.i.us, float* %arrayidx14.i.us, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.us - -if.end.r_exit.i.us: ; preds = %if.then.i.us, %pregion_for_entry.entry.i.us - %110 = or i64 %_local_id_x.0.us, 1 - %add1.i.i.us.1518 = add nuw nsw i64 %110, %mul.i.i - %conv.i.us.1519 = trunc i64 %add1.i.i.us.1518 to i32 - %cmp4.i.us.1520 = icmp slt i32 %conv.i.us.1519, %5 - br i1 %cmp4.i.us.1520, label %if.then.i.us.1531, label %if.end.r_exit.i.us.1532 - -pregion_for_end.i.loopexit: ; preds = %if.end.r_exit.i.us.1532 - br label %pregion_for_end.i - -pregion_for_end.i: ; preds = %pregion_for_end.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i - %add6.i.i.1 = or i64 %mul3.i.i, 1 - %conv2.i.1 = trunc i64 %add6.i.i.1 to i32 - %cmp.i.1 = icmp slt i32 %conv2.i.1, %5 - %sext.i.1 = shl i64 %add6.i.i.1, 32 - %idxprom.i.1 = ashr exact i64 %sext.i.1, 32 - %arrayidx.i.1 = getelementptr inbounds float, float* %3, i64 %idxprom.i.1 - %arrayidx9.i.1 = getelementptr inbounds float, float* %4, i64 %idxprom.i.1 - %mul.i.1 = mul nsw i32 %conv2.i.1, %5 - br i1 %cmp.i.1, label %vector.scevcheck47, label %pregion_for_end.i.1 - -vector.scevcheck47: ; preds = %pregion_for_end.i - %111 = mul i32 %conv2.i.1, %5 - %112 = trunc i64 %7 to i32 - %113 = shl i32 %112, 5 - %114 = add i32 %111, %113 - %115 = icmp sgt i32 %114, 2147483616 - br i1 %115, label %pregion_for_entry.entry.i.us.1.preheader, label %vector.memcheck85 - -pregion_for_entry.entry.i.us.1.preheader: ; preds = %vector.memcheck85, %vector.scevcheck47 - br label %pregion_for_entry.entry.i.us.1 - -vector.memcheck85: ; preds = %vector.scevcheck47 - %sext507 = shl i64 %8, 35 - %116 = ashr exact i64 %sext507, 32 - %117 = or i64 %116, 1 - %scevgep49 = getelementptr float, float* %3, i64 %117 - %scevgep4950 = bitcast float* %scevgep49 to i8* - %uglygep51 = getelementptr i8, i8* %scevgep4950, i64 1 - %118 = mul i32 %conv2.i.1, %5 - %119 = trunc i64 %7 to i32 - %120 = shl i32 %119, 5 - %121 = add i32 %118, %120 - %122 = sext i32 %121 to i64 - %scevgep52 = getelementptr float, float* %0, i64 %122 - %scevgep5253 = bitcast float* %scevgep52 to i8* - %123 = add nsw i64 %122, 32 - %scevgep54 = getelementptr float, float* %0, i64 %123 - %124 = sext i32 %120 to i64 - %scevgep56 = getelementptr float, float* %1, i64 %124 - %125 = add nsw i64 %124, 32 - %scevgep58 = getelementptr float, float* %1, i64 %125 - %scevgep60 = getelementptr float, float* %4, i64 %117 - %scevgep6061 = bitcast float* %scevgep60 to i8* - %uglygep62 = getelementptr i8, i8* %scevgep6061, i64 1 - %scevgep63 = getelementptr float, float* %2, i64 %124 - %scevgep65 = getelementptr float, float* %2, i64 %125 - %bound068 = icmp ult float* %arrayidx.i.1, %scevgep54 - %bound169 = icmp ugt i8* %uglygep51, %scevgep5253 - %found.conflict70 = and i1 %bound068, %bound169 - %bound071 = icmp ult float* %scevgep56, %scevgep54 - %bound172 = icmp ult float* %scevgep52, %scevgep58 - %found.conflict73 = and i1 %bound071, %bound172 - %conflict.rdx74 = or i1 %found.conflict70, %found.conflict73 - %bound076 = icmp ult float* %arrayidx9.i.1, %scevgep54 - %bound177 = icmp ugt i8* %uglygep62, %scevgep5253 - %found.conflict78 = and i1 %bound076, %bound177 - %conflict.rdx79 = or i1 %conflict.rdx74, %found.conflict78 - %bound080 = icmp ult float* %scevgep63, %scevgep54 - %bound181 = icmp ult float* %scevgep52, %scevgep65 - %found.conflict82 = and i1 %bound080, %bound181 - %conflict.rdx83 = or i1 %conflict.rdx79, %found.conflict82 - br i1 %conflict.rdx83, label %pregion_for_entry.entry.i.us.1.preheader, label %vector.ph86 - -vector.ph86: ; preds = %vector.memcheck85 - %broadcast.splatinsert93 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat94 = shufflevector <8 x i64> %broadcast.splatinsert93, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert95 = insertelement <8 x i32> undef, i32 %5, i32 0 - %broadcast.splat96 = shufflevector <8 x i32> %broadcast.splatinsert95, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert97 = insertelement <8 x float*> undef, float* %arrayidx.i.1, i32 0 - %broadcast.splat98 = shufflevector <8 x float*> %broadcast.splatinsert97, <8 x float*> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert101 = insertelement <8 x float*> undef, float* %arrayidx9.i.1, i32 0 - %broadcast.splat102 = shufflevector <8 x float*> %broadcast.splatinsert101, <8 x float*> undef, <8 x i32> zeroinitializer - %126 = or <8 x i64> %broadcast.splat94, - %127 = trunc <8 x i64> %126 to <8 x i32> - %128 = icmp sgt <8 x i32> %broadcast.splat96, %127 - %wide.masked.gather99 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat98, i32 4, <8 x i1> %128, <8 x float> undef), !tbaa !12, !alias.scope !30, !noalias !33 - %129 = extractelement <8 x i64> %126, i32 0 - %130 = shl i64 %129, 32 - %131 = ashr exact i64 %130, 32 - %132 = getelementptr inbounds float, float* %1, i64 %131 - %133 = bitcast float* %132 to <8 x float>* - %wide.masked.load100 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %133, i32 4, <8 x i1> %128, <8 x float> undef), !tbaa !12, !alias.scope !35, !noalias !33 - %wide.masked.gather103 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat102, i32 4, <8 x i1> %128, <8 x float> undef), !tbaa !12, !alias.scope !37, !noalias !33 - %134 = getelementptr inbounds float, float* %2, i64 %131 - %135 = bitcast float* %134 to <8 x float>* - %wide.masked.load104 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %135, i32 4, <8 x i1> %128, <8 x float> undef), !tbaa !12, !alias.scope !39, !noalias !33 - %136 = fmul <8 x float> %wide.masked.gather103, %wide.masked.load104 - %137 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather99, <8 x float> %wide.masked.load100, <8 x float> %136) - %138 = extractelement <8 x i32> %127, i32 0 - %139 = add nsw i32 %mul.i.1, %138 - %140 = sext i32 %139 to i64 - %141 = getelementptr inbounds float, float* %0, i64 %140 - %142 = bitcast float* %141 to <8 x float>* - %wide.masked.load105 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %142, i32 4, <8 x i1> %128, <8 x float> undef), !tbaa !12, !alias.scope !33 - %143 = fadd <8 x float> %wide.masked.load105, %137 - %144 = bitcast float* %141 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %143, <8 x float>* %144, i32 4, <8 x i1> %128), !tbaa !12, !alias.scope !33, !llvm.access.group !27 - %145 = or <8 x i64> %broadcast.splat94, - %146 = trunc <8 x i64> %145 to <8 x i32> - %147 = icmp sgt <8 x i32> %broadcast.splat96, %146 - %wide.masked.gather99.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat98, i32 4, <8 x i1> %147, <8 x float> undef), !tbaa !12, !alias.scope !30, !noalias !33 - %148 = extractelement <8 x i64> %145, i32 0 - %149 = shl i64 %148, 32 - %150 = ashr exact i64 %149, 32 - %151 = getelementptr inbounds float, float* %1, i64 %150 - %152 = bitcast float* %151 to <8 x float>* - %wide.masked.load100.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %152, i32 4, <8 x i1> %147, <8 x float> undef), !tbaa !12, !alias.scope !35, !noalias !33 - %wide.masked.gather103.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat102, i32 4, <8 x i1> %147, <8 x float> undef), !tbaa !12, !alias.scope !37, !noalias !33 - %153 = getelementptr inbounds float, float* %2, i64 %150 - %154 = bitcast float* %153 to <8 x float>* - %wide.masked.load104.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %154, i32 4, <8 x i1> %147, <8 x float> undef), !tbaa !12, !alias.scope !39, !noalias !33 - %155 = fmul <8 x float> %wide.masked.gather103.1, %wide.masked.load104.1 - %156 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather99.1, <8 x float> %wide.masked.load100.1, <8 x float> %155) - %157 = extractelement <8 x i32> %146, i32 0 - %158 = add nsw i32 %mul.i.1, %157 - %159 = sext i32 %158 to i64 - %160 = getelementptr inbounds float, float* %0, i64 %159 - %161 = bitcast float* %160 to <8 x float>* - %wide.masked.load105.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %161, i32 4, <8 x i1> %147, <8 x float> undef), !tbaa !12, !alias.scope !33 - %162 = fadd <8 x float> %wide.masked.load105.1, %156 - %163 = bitcast float* %160 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %162, <8 x float>* %163, i32 4, <8 x i1> %147), !tbaa !12, !alias.scope !33, !llvm.access.group !27 - %164 = or <8 x i64> %broadcast.splat94, - %165 = trunc <8 x i64> %164 to <8 x i32> - %166 = icmp sgt <8 x i32> %broadcast.splat96, %165 - %wide.masked.gather99.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat98, i32 4, <8 x i1> %166, <8 x float> undef), !tbaa !12, !alias.scope !30, !noalias !33 - %167 = extractelement <8 x i64> %164, i32 0 - %168 = shl i64 %167, 32 - %169 = ashr exact i64 %168, 32 - %170 = getelementptr inbounds float, float* %1, i64 %169 - %171 = bitcast float* %170 to <8 x float>* - %wide.masked.load100.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %171, i32 4, <8 x i1> %166, <8 x float> undef), !tbaa !12, !alias.scope !35, !noalias !33 - %wide.masked.gather103.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat102, i32 4, <8 x i1> %166, <8 x float> undef), !tbaa !12, !alias.scope !37, !noalias !33 - %172 = getelementptr inbounds float, float* %2, i64 %169 - %173 = bitcast float* %172 to <8 x float>* - %wide.masked.load104.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %173, i32 4, <8 x i1> %166, <8 x float> undef), !tbaa !12, !alias.scope !39, !noalias !33 - %174 = fmul <8 x float> %wide.masked.gather103.2, %wide.masked.load104.2 - %175 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather99.2, <8 x float> %wide.masked.load100.2, <8 x float> %174) - %176 = extractelement <8 x i32> %165, i32 0 - %177 = add nsw i32 %mul.i.1, %176 - %178 = sext i32 %177 to i64 - %179 = getelementptr inbounds float, float* %0, i64 %178 - %180 = bitcast float* %179 to <8 x float>* - %wide.masked.load105.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %180, i32 4, <8 x i1> %166, <8 x float> undef), !tbaa !12, !alias.scope !33 - %181 = fadd <8 x float> %wide.masked.load105.2, %175 - %182 = bitcast float* %179 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %181, <8 x float>* %182, i32 4, <8 x i1> %166), !tbaa !12, !alias.scope !33, !llvm.access.group !27 - %183 = or <8 x i64> %broadcast.splat94, - %184 = trunc <8 x i64> %183 to <8 x i32> - %185 = icmp sgt <8 x i32> %broadcast.splat96, %184 - %wide.masked.gather99.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat98, i32 4, <8 x i1> %185, <8 x float> undef), !tbaa !12, !alias.scope !30, !noalias !33 - %186 = extractelement <8 x i64> %183, i32 0 - %187 = shl i64 %186, 32 - %188 = ashr exact i64 %187, 32 - %189 = getelementptr inbounds float, float* %1, i64 %188 - %190 = bitcast float* %189 to <8 x float>* - %wide.masked.load100.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %190, i32 4, <8 x i1> %185, <8 x float> undef), !tbaa !12, !alias.scope !35, !noalias !33 - %wide.masked.gather103.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat102, i32 4, <8 x i1> %185, <8 x float> undef), !tbaa !12, !alias.scope !37, !noalias !33 - %191 = getelementptr inbounds float, float* %2, i64 %188 - %192 = bitcast float* %191 to <8 x float>* - %wide.masked.load104.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %192, i32 4, <8 x i1> %185, <8 x float> undef), !tbaa !12, !alias.scope !39, !noalias !33 - %193 = fmul <8 x float> %wide.masked.gather103.3, %wide.masked.load104.3 - %194 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather99.3, <8 x float> %wide.masked.load100.3, <8 x float> %193) - %195 = extractelement <8 x i32> %184, i32 0 - %196 = add nsw i32 %mul.i.1, %195 - %197 = sext i32 %196 to i64 - %198 = getelementptr inbounds float, float* %0, i64 %197 - %199 = bitcast float* %198 to <8 x float>* - %wide.masked.load105.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %199, i32 4, <8 x i1> %185, <8 x float> undef), !tbaa !12, !alias.scope !33 - %200 = fadd <8 x float> %wide.masked.load105.3, %194 - %201 = bitcast float* %198 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %200, <8 x float>* %201, i32 4, <8 x i1> %185), !tbaa !12, !alias.scope !33, !llvm.access.group !27 - br label %pregion_for_end.i.1 - -pregion_for_entry.entry.i.us.1: ; preds = %if.end.r_exit.i.us.1.1, %pregion_for_entry.entry.i.us.1.preheader - %_local_id_x.0.us.1 = phi i64 [ 0, %pregion_for_entry.entry.i.us.1.preheader ], [ %845, %if.end.r_exit.i.us.1.1 ] - %add1.i.i.us.1 = add nuw nsw i64 %_local_id_x.0.us.1, %mul.i.i - %conv.i.us.1 = trunc i64 %add1.i.i.us.1 to i32 - %cmp4.i.us.1 = icmp slt i32 %conv.i.us.1, %5 - br i1 %cmp4.i.us.1, label %if.then.i.us.1, label %if.end.r_exit.i.us.1 - -if.then.i.us.1: ; preds = %pregion_for_entry.entry.i.us.1 - %202 = load float, float* %arrayidx.i.1, align 4, !tbaa !12 - %sext26.i.us.1 = shl i64 %add1.i.i.us.1, 32 - %idxprom6.i.us.1 = ashr exact i64 %sext26.i.us.1, 32 - %arrayidx7.i.us.1 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.1 - %203 = load float, float* %arrayidx7.i.us.1, align 4, !tbaa !12 - %204 = load float, float* %arrayidx9.i.1, align 4, !tbaa !12 - %arrayidx11.i.us.1 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.1 - %205 = load float, float* %arrayidx11.i.us.1, align 4, !tbaa !12 - %mul12.i.us.1 = fmul float %204, %205 - %206 = tail call float @llvm.fmuladd.f32(float %202, float %203, float %mul12.i.us.1) #6 - %add.i.us.1 = add nsw i32 %mul.i.1, %conv.i.us.1 - %idxprom13.i.us.1 = sext i32 %add.i.us.1 to i64 - %arrayidx14.i.us.1 = getelementptr inbounds float, float* %0, i64 %idxprom13.i.us.1 - %207 = load float, float* %arrayidx14.i.us.1, align 4, !tbaa !12 - %add15.i.us.1 = fadd float %207, %206 - store float %add15.i.us.1, float* %arrayidx14.i.us.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.us.1 - -if.end.r_exit.i.us.1: ; preds = %if.then.i.us.1, %pregion_for_entry.entry.i.us.1 - %208 = or i64 %_local_id_x.0.us.1, 1 - %add1.i.i.us.1.1 = add nuw nsw i64 %208, %mul.i.i - %conv.i.us.1.1 = trunc i64 %add1.i.i.us.1.1 to i32 - %cmp4.i.us.1.1 = icmp slt i32 %conv.i.us.1.1, %5 - br i1 %cmp4.i.us.1.1, label %if.then.i.us.1.1, label %if.end.r_exit.i.us.1.1 - -pregion_for_end.i.1.loopexit: ; preds = %if.end.r_exit.i.us.1.1 - br label %pregion_for_end.i.1 - -pregion_for_end.i.1: ; preds = %pregion_for_end.i.1.loopexit, %vector.ph86, %pregion_for_end.i - %add6.i.i.2 = or i64 %mul3.i.i, 2 - %conv2.i.2 = trunc i64 %add6.i.i.2 to i32 - %cmp.i.2 = icmp slt i32 %conv2.i.2, %5 - %sext.i.2 = shl i64 %add6.i.i.2, 32 - %idxprom.i.2 = ashr exact i64 %sext.i.2, 32 - %arrayidx.i.2 = getelementptr inbounds float, float* %3, i64 %idxprom.i.2 - %arrayidx9.i.2 = getelementptr inbounds float, float* %4, i64 %idxprom.i.2 - %mul.i.2 = mul nsw i32 %conv2.i.2, %5 - br i1 %cmp.i.2, label %vector.scevcheck113, label %pregion_for_end.i.2 - -vector.scevcheck113: ; preds = %pregion_for_end.i.1 - %209 = mul i32 %conv2.i.2, %5 - %210 = trunc i64 %7 to i32 - %211 = shl i32 %210, 5 - %212 = add i32 %209, %211 - %213 = icmp sgt i32 %212, 2147483616 - br i1 %213, label %pregion_for_entry.entry.i.us.2.preheader, label %vector.memcheck151 - -pregion_for_entry.entry.i.us.2.preheader: ; preds = %vector.memcheck151, %vector.scevcheck113 - br label %pregion_for_entry.entry.i.us.2 - -vector.memcheck151: ; preds = %vector.scevcheck113 - %sext506 = shl i64 %8, 35 - %214 = ashr exact i64 %sext506, 32 - %215 = or i64 %214, 2 - %scevgep115 = getelementptr float, float* %3, i64 %215 - %scevgep115116 = bitcast float* %scevgep115 to i8* - %uglygep117 = getelementptr i8, i8* %scevgep115116, i64 1 - %216 = mul i32 %conv2.i.2, %5 - %217 = trunc i64 %7 to i32 - %218 = shl i32 %217, 5 - %219 = add i32 %216, %218 - %220 = sext i32 %219 to i64 - %scevgep118 = getelementptr float, float* %0, i64 %220 - %scevgep118119 = bitcast float* %scevgep118 to i8* - %221 = add nsw i64 %220, 32 - %scevgep120 = getelementptr float, float* %0, i64 %221 - %222 = sext i32 %218 to i64 - %scevgep122 = getelementptr float, float* %1, i64 %222 - %223 = add nsw i64 %222, 32 - %scevgep124 = getelementptr float, float* %1, i64 %223 - %scevgep126 = getelementptr float, float* %4, i64 %215 - %scevgep126127 = bitcast float* %scevgep126 to i8* - %uglygep128 = getelementptr i8, i8* %scevgep126127, i64 1 - %scevgep129 = getelementptr float, float* %2, i64 %222 - %scevgep131 = getelementptr float, float* %2, i64 %223 - %bound0134 = icmp ult float* %arrayidx.i.2, %scevgep120 - %bound1135 = icmp ugt i8* %uglygep117, %scevgep118119 - %found.conflict136 = and i1 %bound0134, %bound1135 - %bound0137 = icmp ult float* %scevgep122, %scevgep120 - %bound1138 = icmp ult float* %scevgep118, %scevgep124 - %found.conflict139 = and i1 %bound0137, %bound1138 - %conflict.rdx140 = or i1 %found.conflict136, %found.conflict139 - %bound0142 = icmp ult float* %arrayidx9.i.2, %scevgep120 - %bound1143 = icmp ugt i8* %uglygep128, %scevgep118119 - %found.conflict144 = and i1 %bound0142, %bound1143 - %conflict.rdx145 = or i1 %conflict.rdx140, %found.conflict144 - %bound0146 = icmp ult float* %scevgep129, %scevgep120 - %bound1147 = icmp ult float* %scevgep118, %scevgep131 - %found.conflict148 = and i1 %bound0146, %bound1147 - %conflict.rdx149 = or i1 %conflict.rdx145, %found.conflict148 - br i1 %conflict.rdx149, label %pregion_for_entry.entry.i.us.2.preheader, label %vector.ph152 - -vector.ph152: ; preds = %vector.memcheck151 - %broadcast.splatinsert159 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat160 = shufflevector <8 x i64> %broadcast.splatinsert159, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert161 = insertelement <8 x i32> undef, i32 %5, i32 0 - %broadcast.splat162 = shufflevector <8 x i32> %broadcast.splatinsert161, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert163 = insertelement <8 x float*> undef, float* %arrayidx.i.2, i32 0 - %broadcast.splat164 = shufflevector <8 x float*> %broadcast.splatinsert163, <8 x float*> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert167 = insertelement <8 x float*> undef, float* %arrayidx9.i.2, i32 0 - %broadcast.splat168 = shufflevector <8 x float*> %broadcast.splatinsert167, <8 x float*> undef, <8 x i32> zeroinitializer - %224 = or <8 x i64> %broadcast.splat160, - %225 = trunc <8 x i64> %224 to <8 x i32> - %226 = icmp sgt <8 x i32> %broadcast.splat162, %225 - %wide.masked.gather165 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat164, i32 4, <8 x i1> %226, <8 x float> undef), !tbaa !12, !alias.scope !41, !noalias !44 - %227 = extractelement <8 x i64> %224, i32 0 - %228 = shl i64 %227, 32 - %229 = ashr exact i64 %228, 32 - %230 = getelementptr inbounds float, float* %1, i64 %229 - %231 = bitcast float* %230 to <8 x float>* - %wide.masked.load166 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %231, i32 4, <8 x i1> %226, <8 x float> undef), !tbaa !12, !alias.scope !46, !noalias !44 - %wide.masked.gather169 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat168, i32 4, <8 x i1> %226, <8 x float> undef), !tbaa !12, !alias.scope !48, !noalias !44 - %232 = getelementptr inbounds float, float* %2, i64 %229 - %233 = bitcast float* %232 to <8 x float>* - %wide.masked.load170 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %233, i32 4, <8 x i1> %226, <8 x float> undef), !tbaa !12, !alias.scope !50, !noalias !44 - %234 = fmul <8 x float> %wide.masked.gather169, %wide.masked.load170 - %235 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather165, <8 x float> %wide.masked.load166, <8 x float> %234) - %236 = extractelement <8 x i32> %225, i32 0 - %237 = add nsw i32 %mul.i.2, %236 - %238 = sext i32 %237 to i64 - %239 = getelementptr inbounds float, float* %0, i64 %238 - %240 = bitcast float* %239 to <8 x float>* - %wide.masked.load171 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %240, i32 4, <8 x i1> %226, <8 x float> undef), !tbaa !12, !alias.scope !44 - %241 = fadd <8 x float> %wide.masked.load171, %235 - %242 = bitcast float* %239 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %241, <8 x float>* %242, i32 4, <8 x i1> %226), !tbaa !12, !alias.scope !44, !llvm.access.group !27 - %243 = or <8 x i64> %broadcast.splat160, - %244 = trunc <8 x i64> %243 to <8 x i32> - %245 = icmp sgt <8 x i32> %broadcast.splat162, %244 - %wide.masked.gather165.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat164, i32 4, <8 x i1> %245, <8 x float> undef), !tbaa !12, !alias.scope !41, !noalias !44 - %246 = extractelement <8 x i64> %243, i32 0 - %247 = shl i64 %246, 32 - %248 = ashr exact i64 %247, 32 - %249 = getelementptr inbounds float, float* %1, i64 %248 - %250 = bitcast float* %249 to <8 x float>* - %wide.masked.load166.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %250, i32 4, <8 x i1> %245, <8 x float> undef), !tbaa !12, !alias.scope !46, !noalias !44 - %wide.masked.gather169.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat168, i32 4, <8 x i1> %245, <8 x float> undef), !tbaa !12, !alias.scope !48, !noalias !44 - %251 = getelementptr inbounds float, float* %2, i64 %248 - %252 = bitcast float* %251 to <8 x float>* - %wide.masked.load170.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %252, i32 4, <8 x i1> %245, <8 x float> undef), !tbaa !12, !alias.scope !50, !noalias !44 - %253 = fmul <8 x float> %wide.masked.gather169.1, %wide.masked.load170.1 - %254 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather165.1, <8 x float> %wide.masked.load166.1, <8 x float> %253) - %255 = extractelement <8 x i32> %244, i32 0 - %256 = add nsw i32 %mul.i.2, %255 - %257 = sext i32 %256 to i64 - %258 = getelementptr inbounds float, float* %0, i64 %257 - %259 = bitcast float* %258 to <8 x float>* - %wide.masked.load171.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %259, i32 4, <8 x i1> %245, <8 x float> undef), !tbaa !12, !alias.scope !44 - %260 = fadd <8 x float> %wide.masked.load171.1, %254 - %261 = bitcast float* %258 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %260, <8 x float>* %261, i32 4, <8 x i1> %245), !tbaa !12, !alias.scope !44, !llvm.access.group !27 - %262 = or <8 x i64> %broadcast.splat160, - %263 = trunc <8 x i64> %262 to <8 x i32> - %264 = icmp sgt <8 x i32> %broadcast.splat162, %263 - %wide.masked.gather165.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat164, i32 4, <8 x i1> %264, <8 x float> undef), !tbaa !12, !alias.scope !41, !noalias !44 - %265 = extractelement <8 x i64> %262, i32 0 - %266 = shl i64 %265, 32 - %267 = ashr exact i64 %266, 32 - %268 = getelementptr inbounds float, float* %1, i64 %267 - %269 = bitcast float* %268 to <8 x float>* - %wide.masked.load166.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %269, i32 4, <8 x i1> %264, <8 x float> undef), !tbaa !12, !alias.scope !46, !noalias !44 - %wide.masked.gather169.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat168, i32 4, <8 x i1> %264, <8 x float> undef), !tbaa !12, !alias.scope !48, !noalias !44 - %270 = getelementptr inbounds float, float* %2, i64 %267 - %271 = bitcast float* %270 to <8 x float>* - %wide.masked.load170.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %271, i32 4, <8 x i1> %264, <8 x float> undef), !tbaa !12, !alias.scope !50, !noalias !44 - %272 = fmul <8 x float> %wide.masked.gather169.2, %wide.masked.load170.2 - %273 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather165.2, <8 x float> %wide.masked.load166.2, <8 x float> %272) - %274 = extractelement <8 x i32> %263, i32 0 - %275 = add nsw i32 %mul.i.2, %274 - %276 = sext i32 %275 to i64 - %277 = getelementptr inbounds float, float* %0, i64 %276 - %278 = bitcast float* %277 to <8 x float>* - %wide.masked.load171.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %278, i32 4, <8 x i1> %264, <8 x float> undef), !tbaa !12, !alias.scope !44 - %279 = fadd <8 x float> %wide.masked.load171.2, %273 - %280 = bitcast float* %277 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %279, <8 x float>* %280, i32 4, <8 x i1> %264), !tbaa !12, !alias.scope !44, !llvm.access.group !27 - %281 = or <8 x i64> %broadcast.splat160, - %282 = trunc <8 x i64> %281 to <8 x i32> - %283 = icmp sgt <8 x i32> %broadcast.splat162, %282 - %wide.masked.gather165.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat164, i32 4, <8 x i1> %283, <8 x float> undef), !tbaa !12, !alias.scope !41, !noalias !44 - %284 = extractelement <8 x i64> %281, i32 0 - %285 = shl i64 %284, 32 - %286 = ashr exact i64 %285, 32 - %287 = getelementptr inbounds float, float* %1, i64 %286 - %288 = bitcast float* %287 to <8 x float>* - %wide.masked.load166.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %288, i32 4, <8 x i1> %283, <8 x float> undef), !tbaa !12, !alias.scope !46, !noalias !44 - %wide.masked.gather169.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat168, i32 4, <8 x i1> %283, <8 x float> undef), !tbaa !12, !alias.scope !48, !noalias !44 - %289 = getelementptr inbounds float, float* %2, i64 %286 - %290 = bitcast float* %289 to <8 x float>* - %wide.masked.load170.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %290, i32 4, <8 x i1> %283, <8 x float> undef), !tbaa !12, !alias.scope !50, !noalias !44 - %291 = fmul <8 x float> %wide.masked.gather169.3, %wide.masked.load170.3 - %292 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather165.3, <8 x float> %wide.masked.load166.3, <8 x float> %291) - %293 = extractelement <8 x i32> %282, i32 0 - %294 = add nsw i32 %mul.i.2, %293 - %295 = sext i32 %294 to i64 - %296 = getelementptr inbounds float, float* %0, i64 %295 - %297 = bitcast float* %296 to <8 x float>* - %wide.masked.load171.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %297, i32 4, <8 x i1> %283, <8 x float> undef), !tbaa !12, !alias.scope !44 - %298 = fadd <8 x float> %wide.masked.load171.3, %292 - %299 = bitcast float* %296 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %298, <8 x float>* %299, i32 4, <8 x i1> %283), !tbaa !12, !alias.scope !44, !llvm.access.group !27 - br label %pregion_for_end.i.2 - -pregion_for_entry.entry.i.us.2: ; preds = %if.end.r_exit.i.us.2.1, %pregion_for_entry.entry.i.us.2.preheader - %_local_id_x.0.us.2 = phi i64 [ 0, %pregion_for_entry.entry.i.us.2.preheader ], [ %838, %if.end.r_exit.i.us.2.1 ] - %add1.i.i.us.2 = add nuw nsw i64 %_local_id_x.0.us.2, %mul.i.i - %conv.i.us.2 = trunc i64 %add1.i.i.us.2 to i32 - %cmp4.i.us.2 = icmp slt i32 %conv.i.us.2, %5 - br i1 %cmp4.i.us.2, label %if.then.i.us.2, label %if.end.r_exit.i.us.2 - -if.then.i.us.2: ; preds = %pregion_for_entry.entry.i.us.2 - %300 = load float, float* %arrayidx.i.2, align 4, !tbaa !12 - %sext26.i.us.2 = shl i64 %add1.i.i.us.2, 32 - %idxprom6.i.us.2 = ashr exact i64 %sext26.i.us.2, 32 - %arrayidx7.i.us.2 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.2 - %301 = load float, float* %arrayidx7.i.us.2, align 4, !tbaa !12 - %302 = load float, float* %arrayidx9.i.2, align 4, !tbaa !12 - %arrayidx11.i.us.2 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.2 - %303 = load float, float* %arrayidx11.i.us.2, align 4, !tbaa !12 - %mul12.i.us.2 = fmul float %302, %303 - %304 = tail call float @llvm.fmuladd.f32(float %300, float %301, float %mul12.i.us.2) #6 - %add.i.us.2 = add nsw i32 %mul.i.2, %conv.i.us.2 - %idxprom13.i.us.2 = sext i32 %add.i.us.2 to i64 - %arrayidx14.i.us.2 = getelementptr inbounds float, float* %0, i64 %idxprom13.i.us.2 - %305 = load float, float* %arrayidx14.i.us.2, align 4, !tbaa !12 - %add15.i.us.2 = fadd float %305, %304 - store float %add15.i.us.2, float* %arrayidx14.i.us.2, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.us.2 - -if.end.r_exit.i.us.2: ; preds = %if.then.i.us.2, %pregion_for_entry.entry.i.us.2 - %306 = or i64 %_local_id_x.0.us.2, 1 - %add1.i.i.us.2.1 = add nuw nsw i64 %306, %mul.i.i - %conv.i.us.2.1 = trunc i64 %add1.i.i.us.2.1 to i32 - %cmp4.i.us.2.1 = icmp slt i32 %conv.i.us.2.1, %5 - br i1 %cmp4.i.us.2.1, label %if.then.i.us.2.1, label %if.end.r_exit.i.us.2.1 - -pregion_for_end.i.2.loopexit: ; preds = %if.end.r_exit.i.us.2.1 - br label %pregion_for_end.i.2 - -pregion_for_end.i.2: ; preds = %pregion_for_end.i.2.loopexit, %vector.ph152, %pregion_for_end.i.1 - %add6.i.i.3 = or i64 %mul3.i.i, 3 - %conv2.i.3 = trunc i64 %add6.i.i.3 to i32 - %cmp.i.3 = icmp slt i32 %conv2.i.3, %5 - %sext.i.3 = shl i64 %add6.i.i.3, 32 - %idxprom.i.3 = ashr exact i64 %sext.i.3, 32 - %arrayidx.i.3 = getelementptr inbounds float, float* %3, i64 %idxprom.i.3 - %arrayidx9.i.3 = getelementptr inbounds float, float* %4, i64 %idxprom.i.3 - %mul.i.3 = mul nsw i32 %conv2.i.3, %5 - br i1 %cmp.i.3, label %vector.scevcheck179, label %pregion_for_end.i.3 - -vector.scevcheck179: ; preds = %pregion_for_end.i.2 - %307 = mul i32 %conv2.i.3, %5 - %308 = trunc i64 %7 to i32 - %309 = shl i32 %308, 5 - %310 = add i32 %307, %309 - %311 = icmp sgt i32 %310, 2147483616 - br i1 %311, label %pregion_for_entry.entry.i.us.3.preheader, label %vector.memcheck217 - -pregion_for_entry.entry.i.us.3.preheader: ; preds = %vector.memcheck217, %vector.scevcheck179 - br label %pregion_for_entry.entry.i.us.3 - -vector.memcheck217: ; preds = %vector.scevcheck179 - %sext505 = shl i64 %8, 35 - %312 = ashr exact i64 %sext505, 32 - %313 = or i64 %312, 3 - %scevgep181 = getelementptr float, float* %3, i64 %313 - %scevgep181182 = bitcast float* %scevgep181 to i8* - %uglygep183 = getelementptr i8, i8* %scevgep181182, i64 1 - %314 = mul i32 %conv2.i.3, %5 - %315 = trunc i64 %7 to i32 - %316 = shl i32 %315, 5 - %317 = add i32 %314, %316 - %318 = sext i32 %317 to i64 - %scevgep184 = getelementptr float, float* %0, i64 %318 - %scevgep184185 = bitcast float* %scevgep184 to i8* - %319 = add nsw i64 %318, 32 - %scevgep186 = getelementptr float, float* %0, i64 %319 - %320 = sext i32 %316 to i64 - %scevgep188 = getelementptr float, float* %1, i64 %320 - %321 = add nsw i64 %320, 32 - %scevgep190 = getelementptr float, float* %1, i64 %321 - %scevgep192 = getelementptr float, float* %4, i64 %313 - %scevgep192193 = bitcast float* %scevgep192 to i8* - %uglygep194 = getelementptr i8, i8* %scevgep192193, i64 1 - %scevgep195 = getelementptr float, float* %2, i64 %320 - %scevgep197 = getelementptr float, float* %2, i64 %321 - %bound0200 = icmp ult float* %arrayidx.i.3, %scevgep186 - %bound1201 = icmp ugt i8* %uglygep183, %scevgep184185 - %found.conflict202 = and i1 %bound0200, %bound1201 - %bound0203 = icmp ult float* %scevgep188, %scevgep186 - %bound1204 = icmp ult float* %scevgep184, %scevgep190 - %found.conflict205 = and i1 %bound0203, %bound1204 - %conflict.rdx206 = or i1 %found.conflict202, %found.conflict205 - %bound0208 = icmp ult float* %arrayidx9.i.3, %scevgep186 - %bound1209 = icmp ugt i8* %uglygep194, %scevgep184185 - %found.conflict210 = and i1 %bound0208, %bound1209 - %conflict.rdx211 = or i1 %conflict.rdx206, %found.conflict210 - %bound0212 = icmp ult float* %scevgep195, %scevgep186 - %bound1213 = icmp ult float* %scevgep184, %scevgep197 - %found.conflict214 = and i1 %bound0212, %bound1213 - %conflict.rdx215 = or i1 %conflict.rdx211, %found.conflict214 - br i1 %conflict.rdx215, label %pregion_for_entry.entry.i.us.3.preheader, label %vector.ph218 - -vector.ph218: ; preds = %vector.memcheck217 - %broadcast.splatinsert225 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat226 = shufflevector <8 x i64> %broadcast.splatinsert225, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert227 = insertelement <8 x i32> undef, i32 %5, i32 0 - %broadcast.splat228 = shufflevector <8 x i32> %broadcast.splatinsert227, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert229 = insertelement <8 x float*> undef, float* %arrayidx.i.3, i32 0 - %broadcast.splat230 = shufflevector <8 x float*> %broadcast.splatinsert229, <8 x float*> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert233 = insertelement <8 x float*> undef, float* %arrayidx9.i.3, i32 0 - %broadcast.splat234 = shufflevector <8 x float*> %broadcast.splatinsert233, <8 x float*> undef, <8 x i32> zeroinitializer - %322 = or <8 x i64> %broadcast.splat226, - %323 = trunc <8 x i64> %322 to <8 x i32> - %324 = icmp sgt <8 x i32> %broadcast.splat228, %323 - %wide.masked.gather231 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat230, i32 4, <8 x i1> %324, <8 x float> undef), !tbaa !12, !alias.scope !52, !noalias !55 - %325 = extractelement <8 x i64> %322, i32 0 - %326 = shl i64 %325, 32 - %327 = ashr exact i64 %326, 32 - %328 = getelementptr inbounds float, float* %1, i64 %327 - %329 = bitcast float* %328 to <8 x float>* - %wide.masked.load232 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %329, i32 4, <8 x i1> %324, <8 x float> undef), !tbaa !12, !alias.scope !57, !noalias !55 - %wide.masked.gather235 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat234, i32 4, <8 x i1> %324, <8 x float> undef), !tbaa !12, !alias.scope !59, !noalias !55 - %330 = getelementptr inbounds float, float* %2, i64 %327 - %331 = bitcast float* %330 to <8 x float>* - %wide.masked.load236 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %331, i32 4, <8 x i1> %324, <8 x float> undef), !tbaa !12, !alias.scope !61, !noalias !55 - %332 = fmul <8 x float> %wide.masked.gather235, %wide.masked.load236 - %333 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather231, <8 x float> %wide.masked.load232, <8 x float> %332) - %334 = extractelement <8 x i32> %323, i32 0 - %335 = add nsw i32 %mul.i.3, %334 - %336 = sext i32 %335 to i64 - %337 = getelementptr inbounds float, float* %0, i64 %336 - %338 = bitcast float* %337 to <8 x float>* - %wide.masked.load237 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %338, i32 4, <8 x i1> %324, <8 x float> undef), !tbaa !12, !alias.scope !55 - %339 = fadd <8 x float> %wide.masked.load237, %333 - %340 = bitcast float* %337 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %339, <8 x float>* %340, i32 4, <8 x i1> %324), !tbaa !12, !alias.scope !55, !llvm.access.group !27 - %341 = or <8 x i64> %broadcast.splat226, - %342 = trunc <8 x i64> %341 to <8 x i32> - %343 = icmp sgt <8 x i32> %broadcast.splat228, %342 - %wide.masked.gather231.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat230, i32 4, <8 x i1> %343, <8 x float> undef), !tbaa !12, !alias.scope !52, !noalias !55 - %344 = extractelement <8 x i64> %341, i32 0 - %345 = shl i64 %344, 32 - %346 = ashr exact i64 %345, 32 - %347 = getelementptr inbounds float, float* %1, i64 %346 - %348 = bitcast float* %347 to <8 x float>* - %wide.masked.load232.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %348, i32 4, <8 x i1> %343, <8 x float> undef), !tbaa !12, !alias.scope !57, !noalias !55 - %wide.masked.gather235.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat234, i32 4, <8 x i1> %343, <8 x float> undef), !tbaa !12, !alias.scope !59, !noalias !55 - %349 = getelementptr inbounds float, float* %2, i64 %346 - %350 = bitcast float* %349 to <8 x float>* - %wide.masked.load236.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %350, i32 4, <8 x i1> %343, <8 x float> undef), !tbaa !12, !alias.scope !61, !noalias !55 - %351 = fmul <8 x float> %wide.masked.gather235.1, %wide.masked.load236.1 - %352 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather231.1, <8 x float> %wide.masked.load232.1, <8 x float> %351) - %353 = extractelement <8 x i32> %342, i32 0 - %354 = add nsw i32 %mul.i.3, %353 - %355 = sext i32 %354 to i64 - %356 = getelementptr inbounds float, float* %0, i64 %355 - %357 = bitcast float* %356 to <8 x float>* - %wide.masked.load237.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %357, i32 4, <8 x i1> %343, <8 x float> undef), !tbaa !12, !alias.scope !55 - %358 = fadd <8 x float> %wide.masked.load237.1, %352 - %359 = bitcast float* %356 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %358, <8 x float>* %359, i32 4, <8 x i1> %343), !tbaa !12, !alias.scope !55, !llvm.access.group !27 - %360 = or <8 x i64> %broadcast.splat226, - %361 = trunc <8 x i64> %360 to <8 x i32> - %362 = icmp sgt <8 x i32> %broadcast.splat228, %361 - %wide.masked.gather231.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat230, i32 4, <8 x i1> %362, <8 x float> undef), !tbaa !12, !alias.scope !52, !noalias !55 - %363 = extractelement <8 x i64> %360, i32 0 - %364 = shl i64 %363, 32 - %365 = ashr exact i64 %364, 32 - %366 = getelementptr inbounds float, float* %1, i64 %365 - %367 = bitcast float* %366 to <8 x float>* - %wide.masked.load232.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %367, i32 4, <8 x i1> %362, <8 x float> undef), !tbaa !12, !alias.scope !57, !noalias !55 - %wide.masked.gather235.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat234, i32 4, <8 x i1> %362, <8 x float> undef), !tbaa !12, !alias.scope !59, !noalias !55 - %368 = getelementptr inbounds float, float* %2, i64 %365 - %369 = bitcast float* %368 to <8 x float>* - %wide.masked.load236.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %369, i32 4, <8 x i1> %362, <8 x float> undef), !tbaa !12, !alias.scope !61, !noalias !55 - %370 = fmul <8 x float> %wide.masked.gather235.2, %wide.masked.load236.2 - %371 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather231.2, <8 x float> %wide.masked.load232.2, <8 x float> %370) - %372 = extractelement <8 x i32> %361, i32 0 - %373 = add nsw i32 %mul.i.3, %372 - %374 = sext i32 %373 to i64 - %375 = getelementptr inbounds float, float* %0, i64 %374 - %376 = bitcast float* %375 to <8 x float>* - %wide.masked.load237.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %376, i32 4, <8 x i1> %362, <8 x float> undef), !tbaa !12, !alias.scope !55 - %377 = fadd <8 x float> %wide.masked.load237.2, %371 - %378 = bitcast float* %375 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %377, <8 x float>* %378, i32 4, <8 x i1> %362), !tbaa !12, !alias.scope !55, !llvm.access.group !27 - %379 = or <8 x i64> %broadcast.splat226, - %380 = trunc <8 x i64> %379 to <8 x i32> - %381 = icmp sgt <8 x i32> %broadcast.splat228, %380 - %wide.masked.gather231.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat230, i32 4, <8 x i1> %381, <8 x float> undef), !tbaa !12, !alias.scope !52, !noalias !55 - %382 = extractelement <8 x i64> %379, i32 0 - %383 = shl i64 %382, 32 - %384 = ashr exact i64 %383, 32 - %385 = getelementptr inbounds float, float* %1, i64 %384 - %386 = bitcast float* %385 to <8 x float>* - %wide.masked.load232.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %386, i32 4, <8 x i1> %381, <8 x float> undef), !tbaa !12, !alias.scope !57, !noalias !55 - %wide.masked.gather235.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat234, i32 4, <8 x i1> %381, <8 x float> undef), !tbaa !12, !alias.scope !59, !noalias !55 - %387 = getelementptr inbounds float, float* %2, i64 %384 - %388 = bitcast float* %387 to <8 x float>* - %wide.masked.load236.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %388, i32 4, <8 x i1> %381, <8 x float> undef), !tbaa !12, !alias.scope !61, !noalias !55 - %389 = fmul <8 x float> %wide.masked.gather235.3, %wide.masked.load236.3 - %390 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather231.3, <8 x float> %wide.masked.load232.3, <8 x float> %389) - %391 = extractelement <8 x i32> %380, i32 0 - %392 = add nsw i32 %mul.i.3, %391 - %393 = sext i32 %392 to i64 - %394 = getelementptr inbounds float, float* %0, i64 %393 - %395 = bitcast float* %394 to <8 x float>* - %wide.masked.load237.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %395, i32 4, <8 x i1> %381, <8 x float> undef), !tbaa !12, !alias.scope !55 - %396 = fadd <8 x float> %wide.masked.load237.3, %390 - %397 = bitcast float* %394 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %396, <8 x float>* %397, i32 4, <8 x i1> %381), !tbaa !12, !alias.scope !55, !llvm.access.group !27 - br label %pregion_for_end.i.3 - -pregion_for_entry.entry.i.us.3: ; preds = %if.end.r_exit.i.us.3.1, %pregion_for_entry.entry.i.us.3.preheader - %_local_id_x.0.us.3 = phi i64 [ 0, %pregion_for_entry.entry.i.us.3.preheader ], [ %831, %if.end.r_exit.i.us.3.1 ] - %add1.i.i.us.3 = add nuw nsw i64 %_local_id_x.0.us.3, %mul.i.i - %conv.i.us.3 = trunc i64 %add1.i.i.us.3 to i32 - %cmp4.i.us.3 = icmp slt i32 %conv.i.us.3, %5 - br i1 %cmp4.i.us.3, label %if.then.i.us.3, label %if.end.r_exit.i.us.3 - -if.then.i.us.3: ; preds = %pregion_for_entry.entry.i.us.3 - %398 = load float, float* %arrayidx.i.3, align 4, !tbaa !12 - %sext26.i.us.3 = shl i64 %add1.i.i.us.3, 32 - %idxprom6.i.us.3 = ashr exact i64 %sext26.i.us.3, 32 - %arrayidx7.i.us.3 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.3 - %399 = load float, float* %arrayidx7.i.us.3, align 4, !tbaa !12 - %400 = load float, float* %arrayidx9.i.3, align 4, !tbaa !12 - %arrayidx11.i.us.3 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.3 - %401 = load float, float* %arrayidx11.i.us.3, align 4, !tbaa !12 - %mul12.i.us.3 = fmul float %400, %401 - %402 = tail call float @llvm.fmuladd.f32(float %398, float %399, float %mul12.i.us.3) #6 - %add.i.us.3 = add nsw i32 %mul.i.3, %conv.i.us.3 - %idxprom13.i.us.3 = sext i32 %add.i.us.3 to i64 - %arrayidx14.i.us.3 = getelementptr inbounds float, float* %0, i64 %idxprom13.i.us.3 - %403 = load float, float* %arrayidx14.i.us.3, align 4, !tbaa !12 - %add15.i.us.3 = fadd float %403, %402 - store float %add15.i.us.3, float* %arrayidx14.i.us.3, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.us.3 - -if.end.r_exit.i.us.3: ; preds = %if.then.i.us.3, %pregion_for_entry.entry.i.us.3 - %404 = or i64 %_local_id_x.0.us.3, 1 - %add1.i.i.us.3.1 = add nuw nsw i64 %404, %mul.i.i - %conv.i.us.3.1 = trunc i64 %add1.i.i.us.3.1 to i32 - %cmp4.i.us.3.1 = icmp slt i32 %conv.i.us.3.1, %5 - br i1 %cmp4.i.us.3.1, label %if.then.i.us.3.1, label %if.end.r_exit.i.us.3.1 - -pregion_for_end.i.3.loopexit: ; preds = %if.end.r_exit.i.us.3.1 - br label %pregion_for_end.i.3 - -pregion_for_end.i.3: ; preds = %pregion_for_end.i.3.loopexit, %vector.ph218, %pregion_for_end.i.2 - %add6.i.i.4 = or i64 %mul3.i.i, 4 - %conv2.i.4 = trunc i64 %add6.i.i.4 to i32 - %cmp.i.4 = icmp slt i32 %conv2.i.4, %5 - %sext.i.4 = shl i64 %add6.i.i.4, 32 - %idxprom.i.4 = ashr exact i64 %sext.i.4, 32 - %arrayidx.i.4 = getelementptr inbounds float, float* %3, i64 %idxprom.i.4 - %arrayidx9.i.4 = getelementptr inbounds float, float* %4, i64 %idxprom.i.4 - %mul.i.4 = mul nsw i32 %conv2.i.4, %5 - br i1 %cmp.i.4, label %vector.scevcheck245, label %pregion_for_end.i.4 - -vector.scevcheck245: ; preds = %pregion_for_end.i.3 - %405 = mul i32 %conv2.i.4, %5 - %406 = trunc i64 %7 to i32 - %407 = shl i32 %406, 5 - %408 = add i32 %405, %407 - %409 = icmp sgt i32 %408, 2147483616 - br i1 %409, label %pregion_for_entry.entry.i.us.4.preheader, label %vector.memcheck283 - -pregion_for_entry.entry.i.us.4.preheader: ; preds = %vector.memcheck283, %vector.scevcheck245 - br label %pregion_for_entry.entry.i.us.4 - -vector.memcheck283: ; preds = %vector.scevcheck245 - %sext504 = shl i64 %8, 35 - %410 = ashr exact i64 %sext504, 32 - %411 = or i64 %410, 4 - %scevgep247 = getelementptr float, float* %3, i64 %411 - %scevgep247248 = bitcast float* %scevgep247 to i8* - %uglygep249 = getelementptr i8, i8* %scevgep247248, i64 1 - %412 = mul i32 %conv2.i.4, %5 - %413 = trunc i64 %7 to i32 - %414 = shl i32 %413, 5 - %415 = add i32 %412, %414 - %416 = sext i32 %415 to i64 - %scevgep250 = getelementptr float, float* %0, i64 %416 - %scevgep250251 = bitcast float* %scevgep250 to i8* - %417 = add nsw i64 %416, 32 - %scevgep252 = getelementptr float, float* %0, i64 %417 - %418 = sext i32 %414 to i64 - %scevgep254 = getelementptr float, float* %1, i64 %418 - %419 = add nsw i64 %418, 32 - %scevgep256 = getelementptr float, float* %1, i64 %419 - %scevgep258 = getelementptr float, float* %4, i64 %411 - %scevgep258259 = bitcast float* %scevgep258 to i8* - %uglygep260 = getelementptr i8, i8* %scevgep258259, i64 1 - %scevgep261 = getelementptr float, float* %2, i64 %418 - %scevgep263 = getelementptr float, float* %2, i64 %419 - %bound0266 = icmp ult float* %arrayidx.i.4, %scevgep252 - %bound1267 = icmp ugt i8* %uglygep249, %scevgep250251 - %found.conflict268 = and i1 %bound0266, %bound1267 - %bound0269 = icmp ult float* %scevgep254, %scevgep252 - %bound1270 = icmp ult float* %scevgep250, %scevgep256 - %found.conflict271 = and i1 %bound0269, %bound1270 - %conflict.rdx272 = or i1 %found.conflict268, %found.conflict271 - %bound0274 = icmp ult float* %arrayidx9.i.4, %scevgep252 - %bound1275 = icmp ugt i8* %uglygep260, %scevgep250251 - %found.conflict276 = and i1 %bound0274, %bound1275 - %conflict.rdx277 = or i1 %conflict.rdx272, %found.conflict276 - %bound0278 = icmp ult float* %scevgep261, %scevgep252 - %bound1279 = icmp ult float* %scevgep250, %scevgep263 - %found.conflict280 = and i1 %bound0278, %bound1279 - %conflict.rdx281 = or i1 %conflict.rdx277, %found.conflict280 - br i1 %conflict.rdx281, label %pregion_for_entry.entry.i.us.4.preheader, label %vector.ph284 - -vector.ph284: ; preds = %vector.memcheck283 - %broadcast.splatinsert291 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat292 = shufflevector <8 x i64> %broadcast.splatinsert291, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert293 = insertelement <8 x i32> undef, i32 %5, i32 0 - %broadcast.splat294 = shufflevector <8 x i32> %broadcast.splatinsert293, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert295 = insertelement <8 x float*> undef, float* %arrayidx.i.4, i32 0 - %broadcast.splat296 = shufflevector <8 x float*> %broadcast.splatinsert295, <8 x float*> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert299 = insertelement <8 x float*> undef, float* %arrayidx9.i.4, i32 0 - %broadcast.splat300 = shufflevector <8 x float*> %broadcast.splatinsert299, <8 x float*> undef, <8 x i32> zeroinitializer - %420 = or <8 x i64> %broadcast.splat292, - %421 = trunc <8 x i64> %420 to <8 x i32> - %422 = icmp sgt <8 x i32> %broadcast.splat294, %421 - %wide.masked.gather297 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat296, i32 4, <8 x i1> %422, <8 x float> undef), !tbaa !12, !alias.scope !63, !noalias !66 - %423 = extractelement <8 x i64> %420, i32 0 - %424 = shl i64 %423, 32 - %425 = ashr exact i64 %424, 32 - %426 = getelementptr inbounds float, float* %1, i64 %425 - %427 = bitcast float* %426 to <8 x float>* - %wide.masked.load298 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %427, i32 4, <8 x i1> %422, <8 x float> undef), !tbaa !12, !alias.scope !68, !noalias !66 - %wide.masked.gather301 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat300, i32 4, <8 x i1> %422, <8 x float> undef), !tbaa !12, !alias.scope !70, !noalias !66 - %428 = getelementptr inbounds float, float* %2, i64 %425 - %429 = bitcast float* %428 to <8 x float>* - %wide.masked.load302 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %429, i32 4, <8 x i1> %422, <8 x float> undef), !tbaa !12, !alias.scope !72, !noalias !66 - %430 = fmul <8 x float> %wide.masked.gather301, %wide.masked.load302 - %431 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather297, <8 x float> %wide.masked.load298, <8 x float> %430) - %432 = extractelement <8 x i32> %421, i32 0 - %433 = add nsw i32 %mul.i.4, %432 - %434 = sext i32 %433 to i64 - %435 = getelementptr inbounds float, float* %0, i64 %434 - %436 = bitcast float* %435 to <8 x float>* - %wide.masked.load303 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %436, i32 4, <8 x i1> %422, <8 x float> undef), !tbaa !12, !alias.scope !66 - %437 = fadd <8 x float> %wide.masked.load303, %431 - %438 = bitcast float* %435 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %437, <8 x float>* %438, i32 4, <8 x i1> %422), !tbaa !12, !alias.scope !66, !llvm.access.group !27 - %439 = or <8 x i64> %broadcast.splat292, - %440 = trunc <8 x i64> %439 to <8 x i32> - %441 = icmp sgt <8 x i32> %broadcast.splat294, %440 - %wide.masked.gather297.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat296, i32 4, <8 x i1> %441, <8 x float> undef), !tbaa !12, !alias.scope !63, !noalias !66 - %442 = extractelement <8 x i64> %439, i32 0 - %443 = shl i64 %442, 32 - %444 = ashr exact i64 %443, 32 - %445 = getelementptr inbounds float, float* %1, i64 %444 - %446 = bitcast float* %445 to <8 x float>* - %wide.masked.load298.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %446, i32 4, <8 x i1> %441, <8 x float> undef), !tbaa !12, !alias.scope !68, !noalias !66 - %wide.masked.gather301.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat300, i32 4, <8 x i1> %441, <8 x float> undef), !tbaa !12, !alias.scope !70, !noalias !66 - %447 = getelementptr inbounds float, float* %2, i64 %444 - %448 = bitcast float* %447 to <8 x float>* - %wide.masked.load302.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %448, i32 4, <8 x i1> %441, <8 x float> undef), !tbaa !12, !alias.scope !72, !noalias !66 - %449 = fmul <8 x float> %wide.masked.gather301.1, %wide.masked.load302.1 - %450 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather297.1, <8 x float> %wide.masked.load298.1, <8 x float> %449) - %451 = extractelement <8 x i32> %440, i32 0 - %452 = add nsw i32 %mul.i.4, %451 - %453 = sext i32 %452 to i64 - %454 = getelementptr inbounds float, float* %0, i64 %453 - %455 = bitcast float* %454 to <8 x float>* - %wide.masked.load303.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %455, i32 4, <8 x i1> %441, <8 x float> undef), !tbaa !12, !alias.scope !66 - %456 = fadd <8 x float> %wide.masked.load303.1, %450 - %457 = bitcast float* %454 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %456, <8 x float>* %457, i32 4, <8 x i1> %441), !tbaa !12, !alias.scope !66, !llvm.access.group !27 - %458 = or <8 x i64> %broadcast.splat292, - %459 = trunc <8 x i64> %458 to <8 x i32> - %460 = icmp sgt <8 x i32> %broadcast.splat294, %459 - %wide.masked.gather297.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat296, i32 4, <8 x i1> %460, <8 x float> undef), !tbaa !12, !alias.scope !63, !noalias !66 - %461 = extractelement <8 x i64> %458, i32 0 - %462 = shl i64 %461, 32 - %463 = ashr exact i64 %462, 32 - %464 = getelementptr inbounds float, float* %1, i64 %463 - %465 = bitcast float* %464 to <8 x float>* - %wide.masked.load298.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %465, i32 4, <8 x i1> %460, <8 x float> undef), !tbaa !12, !alias.scope !68, !noalias !66 - %wide.masked.gather301.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat300, i32 4, <8 x i1> %460, <8 x float> undef), !tbaa !12, !alias.scope !70, !noalias !66 - %466 = getelementptr inbounds float, float* %2, i64 %463 - %467 = bitcast float* %466 to <8 x float>* - %wide.masked.load302.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %467, i32 4, <8 x i1> %460, <8 x float> undef), !tbaa !12, !alias.scope !72, !noalias !66 - %468 = fmul <8 x float> %wide.masked.gather301.2, %wide.masked.load302.2 - %469 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather297.2, <8 x float> %wide.masked.load298.2, <8 x float> %468) - %470 = extractelement <8 x i32> %459, i32 0 - %471 = add nsw i32 %mul.i.4, %470 - %472 = sext i32 %471 to i64 - %473 = getelementptr inbounds float, float* %0, i64 %472 - %474 = bitcast float* %473 to <8 x float>* - %wide.masked.load303.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %474, i32 4, <8 x i1> %460, <8 x float> undef), !tbaa !12, !alias.scope !66 - %475 = fadd <8 x float> %wide.masked.load303.2, %469 - %476 = bitcast float* %473 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %475, <8 x float>* %476, i32 4, <8 x i1> %460), !tbaa !12, !alias.scope !66, !llvm.access.group !27 - %477 = or <8 x i64> %broadcast.splat292, - %478 = trunc <8 x i64> %477 to <8 x i32> - %479 = icmp sgt <8 x i32> %broadcast.splat294, %478 - %wide.masked.gather297.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat296, i32 4, <8 x i1> %479, <8 x float> undef), !tbaa !12, !alias.scope !63, !noalias !66 - %480 = extractelement <8 x i64> %477, i32 0 - %481 = shl i64 %480, 32 - %482 = ashr exact i64 %481, 32 - %483 = getelementptr inbounds float, float* %1, i64 %482 - %484 = bitcast float* %483 to <8 x float>* - %wide.masked.load298.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %484, i32 4, <8 x i1> %479, <8 x float> undef), !tbaa !12, !alias.scope !68, !noalias !66 - %wide.masked.gather301.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat300, i32 4, <8 x i1> %479, <8 x float> undef), !tbaa !12, !alias.scope !70, !noalias !66 - %485 = getelementptr inbounds float, float* %2, i64 %482 - %486 = bitcast float* %485 to <8 x float>* - %wide.masked.load302.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %486, i32 4, <8 x i1> %479, <8 x float> undef), !tbaa !12, !alias.scope !72, !noalias !66 - %487 = fmul <8 x float> %wide.masked.gather301.3, %wide.masked.load302.3 - %488 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather297.3, <8 x float> %wide.masked.load298.3, <8 x float> %487) - %489 = extractelement <8 x i32> %478, i32 0 - %490 = add nsw i32 %mul.i.4, %489 - %491 = sext i32 %490 to i64 - %492 = getelementptr inbounds float, float* %0, i64 %491 - %493 = bitcast float* %492 to <8 x float>* - %wide.masked.load303.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %493, i32 4, <8 x i1> %479, <8 x float> undef), !tbaa !12, !alias.scope !66 - %494 = fadd <8 x float> %wide.masked.load303.3, %488 - %495 = bitcast float* %492 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %494, <8 x float>* %495, i32 4, <8 x i1> %479), !tbaa !12, !alias.scope !66, !llvm.access.group !27 - br label %pregion_for_end.i.4 - -pregion_for_entry.entry.i.us.4: ; preds = %if.end.r_exit.i.us.4.1, %pregion_for_entry.entry.i.us.4.preheader - %_local_id_x.0.us.4 = phi i64 [ 0, %pregion_for_entry.entry.i.us.4.preheader ], [ %824, %if.end.r_exit.i.us.4.1 ] - %add1.i.i.us.4 = add nuw nsw i64 %_local_id_x.0.us.4, %mul.i.i - %conv.i.us.4 = trunc i64 %add1.i.i.us.4 to i32 - %cmp4.i.us.4 = icmp slt i32 %conv.i.us.4, %5 - br i1 %cmp4.i.us.4, label %if.then.i.us.4, label %if.end.r_exit.i.us.4 - -if.then.i.us.4: ; preds = %pregion_for_entry.entry.i.us.4 - %496 = load float, float* %arrayidx.i.4, align 4, !tbaa !12 - %sext26.i.us.4 = shl i64 %add1.i.i.us.4, 32 - %idxprom6.i.us.4 = ashr exact i64 %sext26.i.us.4, 32 - %arrayidx7.i.us.4 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.4 - %497 = load float, float* %arrayidx7.i.us.4, align 4, !tbaa !12 - %498 = load float, float* %arrayidx9.i.4, align 4, !tbaa !12 - %arrayidx11.i.us.4 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.4 - %499 = load float, float* %arrayidx11.i.us.4, align 4, !tbaa !12 - %mul12.i.us.4 = fmul float %498, %499 - %500 = tail call float @llvm.fmuladd.f32(float %496, float %497, float %mul12.i.us.4) #6 - %add.i.us.4 = add nsw i32 %mul.i.4, %conv.i.us.4 - %idxprom13.i.us.4 = sext i32 %add.i.us.4 to i64 - %arrayidx14.i.us.4 = getelementptr inbounds float, float* %0, i64 %idxprom13.i.us.4 - %501 = load float, float* %arrayidx14.i.us.4, align 4, !tbaa !12 - %add15.i.us.4 = fadd float %501, %500 - store float %add15.i.us.4, float* %arrayidx14.i.us.4, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.us.4 - -if.end.r_exit.i.us.4: ; preds = %if.then.i.us.4, %pregion_for_entry.entry.i.us.4 - %502 = or i64 %_local_id_x.0.us.4, 1 - %add1.i.i.us.4.1 = add nuw nsw i64 %502, %mul.i.i - %conv.i.us.4.1 = trunc i64 %add1.i.i.us.4.1 to i32 - %cmp4.i.us.4.1 = icmp slt i32 %conv.i.us.4.1, %5 - br i1 %cmp4.i.us.4.1, label %if.then.i.us.4.1, label %if.end.r_exit.i.us.4.1 - -pregion_for_end.i.4.loopexit: ; preds = %if.end.r_exit.i.us.4.1 - br label %pregion_for_end.i.4 - -pregion_for_end.i.4: ; preds = %pregion_for_end.i.4.loopexit, %vector.ph284, %pregion_for_end.i.3 - %add6.i.i.5 = or i64 %mul3.i.i, 5 - %conv2.i.5 = trunc i64 %add6.i.i.5 to i32 - %cmp.i.5 = icmp slt i32 %conv2.i.5, %5 - %sext.i.5 = shl i64 %add6.i.i.5, 32 - %idxprom.i.5 = ashr exact i64 %sext.i.5, 32 - %arrayidx.i.5 = getelementptr inbounds float, float* %3, i64 %idxprom.i.5 - %arrayidx9.i.5 = getelementptr inbounds float, float* %4, i64 %idxprom.i.5 - %mul.i.5 = mul nsw i32 %conv2.i.5, %5 - br i1 %cmp.i.5, label %vector.scevcheck311, label %pregion_for_end.i.5 - -vector.scevcheck311: ; preds = %pregion_for_end.i.4 - %503 = mul i32 %conv2.i.5, %5 - %504 = trunc i64 %7 to i32 - %505 = shl i32 %504, 5 - %506 = add i32 %503, %505 - %507 = icmp sgt i32 %506, 2147483616 - br i1 %507, label %pregion_for_entry.entry.i.us.5.preheader, label %vector.memcheck349 - -pregion_for_entry.entry.i.us.5.preheader: ; preds = %vector.memcheck349, %vector.scevcheck311 - br label %pregion_for_entry.entry.i.us.5 - -vector.memcheck349: ; preds = %vector.scevcheck311 - %sext503 = shl i64 %8, 35 - %508 = ashr exact i64 %sext503, 32 - %509 = or i64 %508, 5 - %scevgep313 = getelementptr float, float* %3, i64 %509 - %scevgep313314 = bitcast float* %scevgep313 to i8* - %uglygep315 = getelementptr i8, i8* %scevgep313314, i64 1 - %510 = mul i32 %conv2.i.5, %5 - %511 = trunc i64 %7 to i32 - %512 = shl i32 %511, 5 - %513 = add i32 %510, %512 - %514 = sext i32 %513 to i64 - %scevgep316 = getelementptr float, float* %0, i64 %514 - %scevgep316317 = bitcast float* %scevgep316 to i8* - %515 = add nsw i64 %514, 32 - %scevgep318 = getelementptr float, float* %0, i64 %515 - %516 = sext i32 %512 to i64 - %scevgep320 = getelementptr float, float* %1, i64 %516 - %517 = add nsw i64 %516, 32 - %scevgep322 = getelementptr float, float* %1, i64 %517 - %scevgep324 = getelementptr float, float* %4, i64 %509 - %scevgep324325 = bitcast float* %scevgep324 to i8* - %uglygep326 = getelementptr i8, i8* %scevgep324325, i64 1 - %scevgep327 = getelementptr float, float* %2, i64 %516 - %scevgep329 = getelementptr float, float* %2, i64 %517 - %bound0332 = icmp ult float* %arrayidx.i.5, %scevgep318 - %bound1333 = icmp ugt i8* %uglygep315, %scevgep316317 - %found.conflict334 = and i1 %bound0332, %bound1333 - %bound0335 = icmp ult float* %scevgep320, %scevgep318 - %bound1336 = icmp ult float* %scevgep316, %scevgep322 - %found.conflict337 = and i1 %bound0335, %bound1336 - %conflict.rdx338 = or i1 %found.conflict334, %found.conflict337 - %bound0340 = icmp ult float* %arrayidx9.i.5, %scevgep318 - %bound1341 = icmp ugt i8* %uglygep326, %scevgep316317 - %found.conflict342 = and i1 %bound0340, %bound1341 - %conflict.rdx343 = or i1 %conflict.rdx338, %found.conflict342 - %bound0344 = icmp ult float* %scevgep327, %scevgep318 - %bound1345 = icmp ult float* %scevgep316, %scevgep329 - %found.conflict346 = and i1 %bound0344, %bound1345 - %conflict.rdx347 = or i1 %conflict.rdx343, %found.conflict346 - br i1 %conflict.rdx347, label %pregion_for_entry.entry.i.us.5.preheader, label %vector.ph350 - -vector.ph350: ; preds = %vector.memcheck349 - %broadcast.splatinsert357 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat358 = shufflevector <8 x i64> %broadcast.splatinsert357, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert359 = insertelement <8 x i32> undef, i32 %5, i32 0 - %broadcast.splat360 = shufflevector <8 x i32> %broadcast.splatinsert359, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert361 = insertelement <8 x float*> undef, float* %arrayidx.i.5, i32 0 - %broadcast.splat362 = shufflevector <8 x float*> %broadcast.splatinsert361, <8 x float*> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert365 = insertelement <8 x float*> undef, float* %arrayidx9.i.5, i32 0 - %broadcast.splat366 = shufflevector <8 x float*> %broadcast.splatinsert365, <8 x float*> undef, <8 x i32> zeroinitializer - %518 = or <8 x i64> %broadcast.splat358, - %519 = trunc <8 x i64> %518 to <8 x i32> - %520 = icmp sgt <8 x i32> %broadcast.splat360, %519 - %wide.masked.gather363 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat362, i32 4, <8 x i1> %520, <8 x float> undef), !tbaa !12, !alias.scope !74, !noalias !77 - %521 = extractelement <8 x i64> %518, i32 0 - %522 = shl i64 %521, 32 - %523 = ashr exact i64 %522, 32 - %524 = getelementptr inbounds float, float* %1, i64 %523 - %525 = bitcast float* %524 to <8 x float>* - %wide.masked.load364 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %525, i32 4, <8 x i1> %520, <8 x float> undef), !tbaa !12, !alias.scope !79, !noalias !77 - %wide.masked.gather367 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat366, i32 4, <8 x i1> %520, <8 x float> undef), !tbaa !12, !alias.scope !81, !noalias !77 - %526 = getelementptr inbounds float, float* %2, i64 %523 - %527 = bitcast float* %526 to <8 x float>* - %wide.masked.load368 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %527, i32 4, <8 x i1> %520, <8 x float> undef), !tbaa !12, !alias.scope !83, !noalias !77 - %528 = fmul <8 x float> %wide.masked.gather367, %wide.masked.load368 - %529 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather363, <8 x float> %wide.masked.load364, <8 x float> %528) - %530 = extractelement <8 x i32> %519, i32 0 - %531 = add nsw i32 %mul.i.5, %530 - %532 = sext i32 %531 to i64 - %533 = getelementptr inbounds float, float* %0, i64 %532 - %534 = bitcast float* %533 to <8 x float>* - %wide.masked.load369 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %534, i32 4, <8 x i1> %520, <8 x float> undef), !tbaa !12, !alias.scope !77 - %535 = fadd <8 x float> %wide.masked.load369, %529 - %536 = bitcast float* %533 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %535, <8 x float>* %536, i32 4, <8 x i1> %520), !tbaa !12, !alias.scope !77, !llvm.access.group !27 - %537 = or <8 x i64> %broadcast.splat358, - %538 = trunc <8 x i64> %537 to <8 x i32> - %539 = icmp sgt <8 x i32> %broadcast.splat360, %538 - %wide.masked.gather363.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat362, i32 4, <8 x i1> %539, <8 x float> undef), !tbaa !12, !alias.scope !74, !noalias !77 - %540 = extractelement <8 x i64> %537, i32 0 - %541 = shl i64 %540, 32 - %542 = ashr exact i64 %541, 32 - %543 = getelementptr inbounds float, float* %1, i64 %542 - %544 = bitcast float* %543 to <8 x float>* - %wide.masked.load364.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %544, i32 4, <8 x i1> %539, <8 x float> undef), !tbaa !12, !alias.scope !79, !noalias !77 - %wide.masked.gather367.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat366, i32 4, <8 x i1> %539, <8 x float> undef), !tbaa !12, !alias.scope !81, !noalias !77 - %545 = getelementptr inbounds float, float* %2, i64 %542 - %546 = bitcast float* %545 to <8 x float>* - %wide.masked.load368.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %546, i32 4, <8 x i1> %539, <8 x float> undef), !tbaa !12, !alias.scope !83, !noalias !77 - %547 = fmul <8 x float> %wide.masked.gather367.1, %wide.masked.load368.1 - %548 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather363.1, <8 x float> %wide.masked.load364.1, <8 x float> %547) - %549 = extractelement <8 x i32> %538, i32 0 - %550 = add nsw i32 %mul.i.5, %549 - %551 = sext i32 %550 to i64 - %552 = getelementptr inbounds float, float* %0, i64 %551 - %553 = bitcast float* %552 to <8 x float>* - %wide.masked.load369.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %553, i32 4, <8 x i1> %539, <8 x float> undef), !tbaa !12, !alias.scope !77 - %554 = fadd <8 x float> %wide.masked.load369.1, %548 - %555 = bitcast float* %552 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %554, <8 x float>* %555, i32 4, <8 x i1> %539), !tbaa !12, !alias.scope !77, !llvm.access.group !27 - %556 = or <8 x i64> %broadcast.splat358, - %557 = trunc <8 x i64> %556 to <8 x i32> - %558 = icmp sgt <8 x i32> %broadcast.splat360, %557 - %wide.masked.gather363.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat362, i32 4, <8 x i1> %558, <8 x float> undef), !tbaa !12, !alias.scope !74, !noalias !77 - %559 = extractelement <8 x i64> %556, i32 0 - %560 = shl i64 %559, 32 - %561 = ashr exact i64 %560, 32 - %562 = getelementptr inbounds float, float* %1, i64 %561 - %563 = bitcast float* %562 to <8 x float>* - %wide.masked.load364.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %563, i32 4, <8 x i1> %558, <8 x float> undef), !tbaa !12, !alias.scope !79, !noalias !77 - %wide.masked.gather367.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat366, i32 4, <8 x i1> %558, <8 x float> undef), !tbaa !12, !alias.scope !81, !noalias !77 - %564 = getelementptr inbounds float, float* %2, i64 %561 - %565 = bitcast float* %564 to <8 x float>* - %wide.masked.load368.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %565, i32 4, <8 x i1> %558, <8 x float> undef), !tbaa !12, !alias.scope !83, !noalias !77 - %566 = fmul <8 x float> %wide.masked.gather367.2, %wide.masked.load368.2 - %567 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather363.2, <8 x float> %wide.masked.load364.2, <8 x float> %566) - %568 = extractelement <8 x i32> %557, i32 0 - %569 = add nsw i32 %mul.i.5, %568 - %570 = sext i32 %569 to i64 - %571 = getelementptr inbounds float, float* %0, i64 %570 - %572 = bitcast float* %571 to <8 x float>* - %wide.masked.load369.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %572, i32 4, <8 x i1> %558, <8 x float> undef), !tbaa !12, !alias.scope !77 - %573 = fadd <8 x float> %wide.masked.load369.2, %567 - %574 = bitcast float* %571 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %573, <8 x float>* %574, i32 4, <8 x i1> %558), !tbaa !12, !alias.scope !77, !llvm.access.group !27 - %575 = or <8 x i64> %broadcast.splat358, - %576 = trunc <8 x i64> %575 to <8 x i32> - %577 = icmp sgt <8 x i32> %broadcast.splat360, %576 - %wide.masked.gather363.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat362, i32 4, <8 x i1> %577, <8 x float> undef), !tbaa !12, !alias.scope !74, !noalias !77 - %578 = extractelement <8 x i64> %575, i32 0 - %579 = shl i64 %578, 32 - %580 = ashr exact i64 %579, 32 - %581 = getelementptr inbounds float, float* %1, i64 %580 - %582 = bitcast float* %581 to <8 x float>* - %wide.masked.load364.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %582, i32 4, <8 x i1> %577, <8 x float> undef), !tbaa !12, !alias.scope !79, !noalias !77 - %wide.masked.gather367.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat366, i32 4, <8 x i1> %577, <8 x float> undef), !tbaa !12, !alias.scope !81, !noalias !77 - %583 = getelementptr inbounds float, float* %2, i64 %580 - %584 = bitcast float* %583 to <8 x float>* - %wide.masked.load368.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %584, i32 4, <8 x i1> %577, <8 x float> undef), !tbaa !12, !alias.scope !83, !noalias !77 - %585 = fmul <8 x float> %wide.masked.gather367.3, %wide.masked.load368.3 - %586 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather363.3, <8 x float> %wide.masked.load364.3, <8 x float> %585) - %587 = extractelement <8 x i32> %576, i32 0 - %588 = add nsw i32 %mul.i.5, %587 - %589 = sext i32 %588 to i64 - %590 = getelementptr inbounds float, float* %0, i64 %589 - %591 = bitcast float* %590 to <8 x float>* - %wide.masked.load369.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %591, i32 4, <8 x i1> %577, <8 x float> undef), !tbaa !12, !alias.scope !77 - %592 = fadd <8 x float> %wide.masked.load369.3, %586 - %593 = bitcast float* %590 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %592, <8 x float>* %593, i32 4, <8 x i1> %577), !tbaa !12, !alias.scope !77, !llvm.access.group !27 - br label %pregion_for_end.i.5 - -pregion_for_entry.entry.i.us.5: ; preds = %if.end.r_exit.i.us.5.1, %pregion_for_entry.entry.i.us.5.preheader - %_local_id_x.0.us.5 = phi i64 [ 0, %pregion_for_entry.entry.i.us.5.preheader ], [ %817, %if.end.r_exit.i.us.5.1 ] - %add1.i.i.us.5 = add nuw nsw i64 %_local_id_x.0.us.5, %mul.i.i - %conv.i.us.5 = trunc i64 %add1.i.i.us.5 to i32 - %cmp4.i.us.5 = icmp slt i32 %conv.i.us.5, %5 - br i1 %cmp4.i.us.5, label %if.then.i.us.5, label %if.end.r_exit.i.us.5 - -if.then.i.us.5: ; preds = %pregion_for_entry.entry.i.us.5 - %594 = load float, float* %arrayidx.i.5, align 4, !tbaa !12 - %sext26.i.us.5 = shl i64 %add1.i.i.us.5, 32 - %idxprom6.i.us.5 = ashr exact i64 %sext26.i.us.5, 32 - %arrayidx7.i.us.5 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.5 - %595 = load float, float* %arrayidx7.i.us.5, align 4, !tbaa !12 - %596 = load float, float* %arrayidx9.i.5, align 4, !tbaa !12 - %arrayidx11.i.us.5 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.5 - %597 = load float, float* %arrayidx11.i.us.5, align 4, !tbaa !12 - %mul12.i.us.5 = fmul float %596, %597 - %598 = tail call float @llvm.fmuladd.f32(float %594, float %595, float %mul12.i.us.5) #6 - %add.i.us.5 = add nsw i32 %mul.i.5, %conv.i.us.5 - %idxprom13.i.us.5 = sext i32 %add.i.us.5 to i64 - %arrayidx14.i.us.5 = getelementptr inbounds float, float* %0, i64 %idxprom13.i.us.5 - %599 = load float, float* %arrayidx14.i.us.5, align 4, !tbaa !12 - %add15.i.us.5 = fadd float %599, %598 - store float %add15.i.us.5, float* %arrayidx14.i.us.5, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.us.5 - -if.end.r_exit.i.us.5: ; preds = %if.then.i.us.5, %pregion_for_entry.entry.i.us.5 - %600 = or i64 %_local_id_x.0.us.5, 1 - %add1.i.i.us.5.1 = add nuw nsw i64 %600, %mul.i.i - %conv.i.us.5.1 = trunc i64 %add1.i.i.us.5.1 to i32 - %cmp4.i.us.5.1 = icmp slt i32 %conv.i.us.5.1, %5 - br i1 %cmp4.i.us.5.1, label %if.then.i.us.5.1, label %if.end.r_exit.i.us.5.1 - -pregion_for_end.i.5.loopexit: ; preds = %if.end.r_exit.i.us.5.1 - br label %pregion_for_end.i.5 - -pregion_for_end.i.5: ; preds = %pregion_for_end.i.5.loopexit, %vector.ph350, %pregion_for_end.i.4 - %add6.i.i.6 = or i64 %mul3.i.i, 6 - %conv2.i.6 = trunc i64 %add6.i.i.6 to i32 - %cmp.i.6 = icmp slt i32 %conv2.i.6, %5 - %sext.i.6 = shl i64 %add6.i.i.6, 32 - %idxprom.i.6 = ashr exact i64 %sext.i.6, 32 - %arrayidx.i.6 = getelementptr inbounds float, float* %3, i64 %idxprom.i.6 - %arrayidx9.i.6 = getelementptr inbounds float, float* %4, i64 %idxprom.i.6 - %mul.i.6 = mul nsw i32 %conv2.i.6, %5 - br i1 %cmp.i.6, label %vector.scevcheck377, label %pregion_for_end.i.6 - -vector.scevcheck377: ; preds = %pregion_for_end.i.5 - %601 = mul i32 %conv2.i.6, %5 - %602 = trunc i64 %7 to i32 - %603 = shl i32 %602, 5 - %604 = add i32 %601, %603 - %605 = icmp sgt i32 %604, 2147483616 - br i1 %605, label %pregion_for_entry.entry.i.us.6.preheader, label %vector.memcheck415 - -pregion_for_entry.entry.i.us.6.preheader: ; preds = %vector.memcheck415, %vector.scevcheck377 - br label %pregion_for_entry.entry.i.us.6 - -vector.memcheck415: ; preds = %vector.scevcheck377 - %sext502 = shl i64 %8, 35 - %606 = ashr exact i64 %sext502, 32 - %607 = or i64 %606, 6 - %scevgep379 = getelementptr float, float* %3, i64 %607 - %scevgep379380 = bitcast float* %scevgep379 to i8* - %uglygep381 = getelementptr i8, i8* %scevgep379380, i64 1 - %608 = mul i32 %conv2.i.6, %5 - %609 = trunc i64 %7 to i32 - %610 = shl i32 %609, 5 - %611 = add i32 %608, %610 - %612 = sext i32 %611 to i64 - %scevgep382 = getelementptr float, float* %0, i64 %612 - %scevgep382383 = bitcast float* %scevgep382 to i8* - %613 = add nsw i64 %612, 32 - %scevgep384 = getelementptr float, float* %0, i64 %613 - %614 = sext i32 %610 to i64 - %scevgep386 = getelementptr float, float* %1, i64 %614 - %615 = add nsw i64 %614, 32 - %scevgep388 = getelementptr float, float* %1, i64 %615 - %scevgep390 = getelementptr float, float* %4, i64 %607 - %scevgep390391 = bitcast float* %scevgep390 to i8* - %uglygep392 = getelementptr i8, i8* %scevgep390391, i64 1 - %scevgep393 = getelementptr float, float* %2, i64 %614 - %scevgep395 = getelementptr float, float* %2, i64 %615 - %bound0398 = icmp ult float* %arrayidx.i.6, %scevgep384 - %bound1399 = icmp ugt i8* %uglygep381, %scevgep382383 - %found.conflict400 = and i1 %bound0398, %bound1399 - %bound0401 = icmp ult float* %scevgep386, %scevgep384 - %bound1402 = icmp ult float* %scevgep382, %scevgep388 - %found.conflict403 = and i1 %bound0401, %bound1402 - %conflict.rdx404 = or i1 %found.conflict400, %found.conflict403 - %bound0406 = icmp ult float* %arrayidx9.i.6, %scevgep384 - %bound1407 = icmp ugt i8* %uglygep392, %scevgep382383 - %found.conflict408 = and i1 %bound0406, %bound1407 - %conflict.rdx409 = or i1 %conflict.rdx404, %found.conflict408 - %bound0410 = icmp ult float* %scevgep393, %scevgep384 - %bound1411 = icmp ult float* %scevgep382, %scevgep395 - %found.conflict412 = and i1 %bound0410, %bound1411 - %conflict.rdx413 = or i1 %conflict.rdx409, %found.conflict412 - br i1 %conflict.rdx413, label %pregion_for_entry.entry.i.us.6.preheader, label %vector.ph416 - -vector.ph416: ; preds = %vector.memcheck415 - %broadcast.splatinsert423 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat424 = shufflevector <8 x i64> %broadcast.splatinsert423, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert425 = insertelement <8 x i32> undef, i32 %5, i32 0 - %broadcast.splat426 = shufflevector <8 x i32> %broadcast.splatinsert425, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert427 = insertelement <8 x float*> undef, float* %arrayidx.i.6, i32 0 - %broadcast.splat428 = shufflevector <8 x float*> %broadcast.splatinsert427, <8 x float*> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert431 = insertelement <8 x float*> undef, float* %arrayidx9.i.6, i32 0 - %broadcast.splat432 = shufflevector <8 x float*> %broadcast.splatinsert431, <8 x float*> undef, <8 x i32> zeroinitializer - %616 = or <8 x i64> %broadcast.splat424, - %617 = trunc <8 x i64> %616 to <8 x i32> - %618 = icmp sgt <8 x i32> %broadcast.splat426, %617 - %wide.masked.gather429 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat428, i32 4, <8 x i1> %618, <8 x float> undef), !tbaa !12, !alias.scope !85, !noalias !88 - %619 = extractelement <8 x i64> %616, i32 0 - %620 = shl i64 %619, 32 - %621 = ashr exact i64 %620, 32 - %622 = getelementptr inbounds float, float* %1, i64 %621 - %623 = bitcast float* %622 to <8 x float>* - %wide.masked.load430 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %623, i32 4, <8 x i1> %618, <8 x float> undef), !tbaa !12, !alias.scope !90, !noalias !88 - %wide.masked.gather433 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat432, i32 4, <8 x i1> %618, <8 x float> undef), !tbaa !12, !alias.scope !92, !noalias !88 - %624 = getelementptr inbounds float, float* %2, i64 %621 - %625 = bitcast float* %624 to <8 x float>* - %wide.masked.load434 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %625, i32 4, <8 x i1> %618, <8 x float> undef), !tbaa !12, !alias.scope !94, !noalias !88 - %626 = fmul <8 x float> %wide.masked.gather433, %wide.masked.load434 - %627 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather429, <8 x float> %wide.masked.load430, <8 x float> %626) - %628 = extractelement <8 x i32> %617, i32 0 - %629 = add nsw i32 %mul.i.6, %628 - %630 = sext i32 %629 to i64 - %631 = getelementptr inbounds float, float* %0, i64 %630 - %632 = bitcast float* %631 to <8 x float>* - %wide.masked.load435 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %632, i32 4, <8 x i1> %618, <8 x float> undef), !tbaa !12, !alias.scope !88 - %633 = fadd <8 x float> %wide.masked.load435, %627 - %634 = bitcast float* %631 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %633, <8 x float>* %634, i32 4, <8 x i1> %618), !tbaa !12, !alias.scope !88, !llvm.access.group !27 - %635 = or <8 x i64> %broadcast.splat424, - %636 = trunc <8 x i64> %635 to <8 x i32> - %637 = icmp sgt <8 x i32> %broadcast.splat426, %636 - %wide.masked.gather429.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat428, i32 4, <8 x i1> %637, <8 x float> undef), !tbaa !12, !alias.scope !85, !noalias !88 - %638 = extractelement <8 x i64> %635, i32 0 - %639 = shl i64 %638, 32 - %640 = ashr exact i64 %639, 32 - %641 = getelementptr inbounds float, float* %1, i64 %640 - %642 = bitcast float* %641 to <8 x float>* - %wide.masked.load430.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %642, i32 4, <8 x i1> %637, <8 x float> undef), !tbaa !12, !alias.scope !90, !noalias !88 - %wide.masked.gather433.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat432, i32 4, <8 x i1> %637, <8 x float> undef), !tbaa !12, !alias.scope !92, !noalias !88 - %643 = getelementptr inbounds float, float* %2, i64 %640 - %644 = bitcast float* %643 to <8 x float>* - %wide.masked.load434.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %644, i32 4, <8 x i1> %637, <8 x float> undef), !tbaa !12, !alias.scope !94, !noalias !88 - %645 = fmul <8 x float> %wide.masked.gather433.1, %wide.masked.load434.1 - %646 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather429.1, <8 x float> %wide.masked.load430.1, <8 x float> %645) - %647 = extractelement <8 x i32> %636, i32 0 - %648 = add nsw i32 %mul.i.6, %647 - %649 = sext i32 %648 to i64 - %650 = getelementptr inbounds float, float* %0, i64 %649 - %651 = bitcast float* %650 to <8 x float>* - %wide.masked.load435.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %651, i32 4, <8 x i1> %637, <8 x float> undef), !tbaa !12, !alias.scope !88 - %652 = fadd <8 x float> %wide.masked.load435.1, %646 - %653 = bitcast float* %650 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %652, <8 x float>* %653, i32 4, <8 x i1> %637), !tbaa !12, !alias.scope !88, !llvm.access.group !27 - %654 = or <8 x i64> %broadcast.splat424, - %655 = trunc <8 x i64> %654 to <8 x i32> - %656 = icmp sgt <8 x i32> %broadcast.splat426, %655 - %wide.masked.gather429.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat428, i32 4, <8 x i1> %656, <8 x float> undef), !tbaa !12, !alias.scope !85, !noalias !88 - %657 = extractelement <8 x i64> %654, i32 0 - %658 = shl i64 %657, 32 - %659 = ashr exact i64 %658, 32 - %660 = getelementptr inbounds float, float* %1, i64 %659 - %661 = bitcast float* %660 to <8 x float>* - %wide.masked.load430.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %661, i32 4, <8 x i1> %656, <8 x float> undef), !tbaa !12, !alias.scope !90, !noalias !88 - %wide.masked.gather433.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat432, i32 4, <8 x i1> %656, <8 x float> undef), !tbaa !12, !alias.scope !92, !noalias !88 - %662 = getelementptr inbounds float, float* %2, i64 %659 - %663 = bitcast float* %662 to <8 x float>* - %wide.masked.load434.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %663, i32 4, <8 x i1> %656, <8 x float> undef), !tbaa !12, !alias.scope !94, !noalias !88 - %664 = fmul <8 x float> %wide.masked.gather433.2, %wide.masked.load434.2 - %665 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather429.2, <8 x float> %wide.masked.load430.2, <8 x float> %664) - %666 = extractelement <8 x i32> %655, i32 0 - %667 = add nsw i32 %mul.i.6, %666 - %668 = sext i32 %667 to i64 - %669 = getelementptr inbounds float, float* %0, i64 %668 - %670 = bitcast float* %669 to <8 x float>* - %wide.masked.load435.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %670, i32 4, <8 x i1> %656, <8 x float> undef), !tbaa !12, !alias.scope !88 - %671 = fadd <8 x float> %wide.masked.load435.2, %665 - %672 = bitcast float* %669 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %671, <8 x float>* %672, i32 4, <8 x i1> %656), !tbaa !12, !alias.scope !88, !llvm.access.group !27 - %673 = or <8 x i64> %broadcast.splat424, - %674 = trunc <8 x i64> %673 to <8 x i32> - %675 = icmp sgt <8 x i32> %broadcast.splat426, %674 - %wide.masked.gather429.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat428, i32 4, <8 x i1> %675, <8 x float> undef), !tbaa !12, !alias.scope !85, !noalias !88 - %676 = extractelement <8 x i64> %673, i32 0 - %677 = shl i64 %676, 32 - %678 = ashr exact i64 %677, 32 - %679 = getelementptr inbounds float, float* %1, i64 %678 - %680 = bitcast float* %679 to <8 x float>* - %wide.masked.load430.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %680, i32 4, <8 x i1> %675, <8 x float> undef), !tbaa !12, !alias.scope !90, !noalias !88 - %wide.masked.gather433.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat432, i32 4, <8 x i1> %675, <8 x float> undef), !tbaa !12, !alias.scope !92, !noalias !88 - %681 = getelementptr inbounds float, float* %2, i64 %678 - %682 = bitcast float* %681 to <8 x float>* - %wide.masked.load434.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %682, i32 4, <8 x i1> %675, <8 x float> undef), !tbaa !12, !alias.scope !94, !noalias !88 - %683 = fmul <8 x float> %wide.masked.gather433.3, %wide.masked.load434.3 - %684 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather429.3, <8 x float> %wide.masked.load430.3, <8 x float> %683) - %685 = extractelement <8 x i32> %674, i32 0 - %686 = add nsw i32 %mul.i.6, %685 - %687 = sext i32 %686 to i64 - %688 = getelementptr inbounds float, float* %0, i64 %687 - %689 = bitcast float* %688 to <8 x float>* - %wide.masked.load435.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %689, i32 4, <8 x i1> %675, <8 x float> undef), !tbaa !12, !alias.scope !88 - %690 = fadd <8 x float> %wide.masked.load435.3, %684 - %691 = bitcast float* %688 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %690, <8 x float>* %691, i32 4, <8 x i1> %675), !tbaa !12, !alias.scope !88, !llvm.access.group !27 - br label %pregion_for_end.i.6 - -pregion_for_entry.entry.i.us.6: ; preds = %if.end.r_exit.i.us.6.1, %pregion_for_entry.entry.i.us.6.preheader - %_local_id_x.0.us.6 = phi i64 [ 0, %pregion_for_entry.entry.i.us.6.preheader ], [ %810, %if.end.r_exit.i.us.6.1 ] - %add1.i.i.us.6 = add nuw nsw i64 %_local_id_x.0.us.6, %mul.i.i - %conv.i.us.6 = trunc i64 %add1.i.i.us.6 to i32 - %cmp4.i.us.6 = icmp slt i32 %conv.i.us.6, %5 - br i1 %cmp4.i.us.6, label %if.then.i.us.6, label %if.end.r_exit.i.us.6 - -if.then.i.us.6: ; preds = %pregion_for_entry.entry.i.us.6 - %692 = load float, float* %arrayidx.i.6, align 4, !tbaa !12 - %sext26.i.us.6 = shl i64 %add1.i.i.us.6, 32 - %idxprom6.i.us.6 = ashr exact i64 %sext26.i.us.6, 32 - %arrayidx7.i.us.6 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.6 - %693 = load float, float* %arrayidx7.i.us.6, align 4, !tbaa !12 - %694 = load float, float* %arrayidx9.i.6, align 4, !tbaa !12 - %arrayidx11.i.us.6 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.6 - %695 = load float, float* %arrayidx11.i.us.6, align 4, !tbaa !12 - %mul12.i.us.6 = fmul float %694, %695 - %696 = tail call float @llvm.fmuladd.f32(float %692, float %693, float %mul12.i.us.6) #6 - %add.i.us.6 = add nsw i32 %mul.i.6, %conv.i.us.6 - %idxprom13.i.us.6 = sext i32 %add.i.us.6 to i64 - %arrayidx14.i.us.6 = getelementptr inbounds float, float* %0, i64 %idxprom13.i.us.6 - %697 = load float, float* %arrayidx14.i.us.6, align 4, !tbaa !12 - %add15.i.us.6 = fadd float %697, %696 - store float %add15.i.us.6, float* %arrayidx14.i.us.6, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.us.6 - -if.end.r_exit.i.us.6: ; preds = %if.then.i.us.6, %pregion_for_entry.entry.i.us.6 - %698 = or i64 %_local_id_x.0.us.6, 1 - %add1.i.i.us.6.1 = add nuw nsw i64 %698, %mul.i.i - %conv.i.us.6.1 = trunc i64 %add1.i.i.us.6.1 to i32 - %cmp4.i.us.6.1 = icmp slt i32 %conv.i.us.6.1, %5 - br i1 %cmp4.i.us.6.1, label %if.then.i.us.6.1, label %if.end.r_exit.i.us.6.1 - -pregion_for_end.i.6.loopexit: ; preds = %if.end.r_exit.i.us.6.1 - br label %pregion_for_end.i.6 - -pregion_for_end.i.6: ; preds = %pregion_for_end.i.6.loopexit, %vector.ph416, %pregion_for_end.i.5 - %add6.i.i.7 = or i64 %mul3.i.i, 7 - %conv2.i.7 = trunc i64 %add6.i.i.7 to i32 - %cmp.i.7 = icmp slt i32 %conv2.i.7, %5 - %sext.i.7 = shl i64 %add6.i.i.7, 32 - %idxprom.i.7 = ashr exact i64 %sext.i.7, 32 - %arrayidx.i.7 = getelementptr inbounds float, float* %3, i64 %idxprom.i.7 - %arrayidx9.i.7 = getelementptr inbounds float, float* %4, i64 %idxprom.i.7 - %mul.i.7 = mul nsw i32 %conv2.i.7, %5 - br i1 %cmp.i.7, label %vector.scevcheck443, label %pregion_for_end.i.7 - -vector.scevcheck443: ; preds = %pregion_for_end.i.6 - %699 = mul i32 %conv2.i.7, %5 - %700 = trunc i64 %7 to i32 - %701 = shl i32 %700, 5 - %702 = add i32 %699, %701 - %703 = icmp sgt i32 %702, 2147483616 - br i1 %703, label %pregion_for_entry.entry.i.us.7.preheader, label %vector.memcheck481 - -pregion_for_entry.entry.i.us.7.preheader: ; preds = %vector.memcheck481, %vector.scevcheck443 - br label %pregion_for_entry.entry.i.us.7 - -vector.memcheck481: ; preds = %vector.scevcheck443 - %sext = shl i64 %8, 35 - %704 = ashr exact i64 %sext, 32 - %705 = or i64 %704, 7 - %scevgep445 = getelementptr float, float* %3, i64 %705 - %scevgep445446 = bitcast float* %scevgep445 to i8* - %uglygep447 = getelementptr i8, i8* %scevgep445446, i64 1 - %706 = mul i32 %conv2.i.7, %5 - %707 = trunc i64 %7 to i32 - %708 = shl i32 %707, 5 - %709 = add i32 %706, %708 - %710 = sext i32 %709 to i64 - %scevgep448 = getelementptr float, float* %0, i64 %710 - %scevgep448449 = bitcast float* %scevgep448 to i8* - %711 = add nsw i64 %710, 32 - %scevgep450 = getelementptr float, float* %0, i64 %711 - %712 = sext i32 %708 to i64 - %scevgep452 = getelementptr float, float* %1, i64 %712 - %713 = add nsw i64 %712, 32 - %scevgep454 = getelementptr float, float* %1, i64 %713 - %scevgep456 = getelementptr float, float* %4, i64 %705 - %scevgep456457 = bitcast float* %scevgep456 to i8* - %uglygep458 = getelementptr i8, i8* %scevgep456457, i64 1 - %scevgep459 = getelementptr float, float* %2, i64 %712 - %scevgep461 = getelementptr float, float* %2, i64 %713 - %bound0464 = icmp ult float* %arrayidx.i.7, %scevgep450 - %bound1465 = icmp ugt i8* %uglygep447, %scevgep448449 - %found.conflict466 = and i1 %bound0464, %bound1465 - %bound0467 = icmp ult float* %scevgep452, %scevgep450 - %bound1468 = icmp ult float* %scevgep448, %scevgep454 - %found.conflict469 = and i1 %bound0467, %bound1468 - %conflict.rdx470 = or i1 %found.conflict466, %found.conflict469 - %bound0472 = icmp ult float* %arrayidx9.i.7, %scevgep450 - %bound1473 = icmp ugt i8* %uglygep458, %scevgep448449 - %found.conflict474 = and i1 %bound0472, %bound1473 - %conflict.rdx475 = or i1 %conflict.rdx470, %found.conflict474 - %bound0476 = icmp ult float* %scevgep459, %scevgep450 - %bound1477 = icmp ult float* %scevgep448, %scevgep461 - %found.conflict478 = and i1 %bound0476, %bound1477 - %conflict.rdx479 = or i1 %conflict.rdx475, %found.conflict478 - br i1 %conflict.rdx479, label %pregion_for_entry.entry.i.us.7.preheader, label %vector.ph482 - -vector.ph482: ; preds = %vector.memcheck481 - %broadcast.splatinsert489 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat490 = shufflevector <8 x i64> %broadcast.splatinsert489, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert491 = insertelement <8 x i32> undef, i32 %5, i32 0 - %broadcast.splat492 = shufflevector <8 x i32> %broadcast.splatinsert491, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert493 = insertelement <8 x float*> undef, float* %arrayidx.i.7, i32 0 - %broadcast.splat494 = shufflevector <8 x float*> %broadcast.splatinsert493, <8 x float*> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert497 = insertelement <8 x float*> undef, float* %arrayidx9.i.7, i32 0 - %broadcast.splat498 = shufflevector <8 x float*> %broadcast.splatinsert497, <8 x float*> undef, <8 x i32> zeroinitializer - %714 = or <8 x i64> %broadcast.splat490, - %715 = trunc <8 x i64> %714 to <8 x i32> - %716 = icmp sgt <8 x i32> %broadcast.splat492, %715 - %wide.masked.gather495 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat494, i32 4, <8 x i1> %716, <8 x float> undef), !tbaa !12, !alias.scope !96, !noalias !99 - %717 = extractelement <8 x i64> %714, i32 0 - %718 = shl i64 %717, 32 - %719 = ashr exact i64 %718, 32 - %720 = getelementptr inbounds float, float* %1, i64 %719 - %721 = bitcast float* %720 to <8 x float>* - %wide.masked.load496 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %721, i32 4, <8 x i1> %716, <8 x float> undef), !tbaa !12, !alias.scope !101, !noalias !99 - %wide.masked.gather499 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat498, i32 4, <8 x i1> %716, <8 x float> undef), !tbaa !12, !alias.scope !103, !noalias !99 - %722 = getelementptr inbounds float, float* %2, i64 %719 - %723 = bitcast float* %722 to <8 x float>* - %wide.masked.load500 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %723, i32 4, <8 x i1> %716, <8 x float> undef), !tbaa !12, !alias.scope !105, !noalias !99 - %724 = fmul <8 x float> %wide.masked.gather499, %wide.masked.load500 - %725 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather495, <8 x float> %wide.masked.load496, <8 x float> %724) - %726 = extractelement <8 x i32> %715, i32 0 - %727 = add nsw i32 %mul.i.7, %726 - %728 = sext i32 %727 to i64 - %729 = getelementptr inbounds float, float* %0, i64 %728 - %730 = bitcast float* %729 to <8 x float>* - %wide.masked.load501 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %730, i32 4, <8 x i1> %716, <8 x float> undef), !tbaa !12, !alias.scope !99 - %731 = fadd <8 x float> %wide.masked.load501, %725 - %732 = bitcast float* %729 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %731, <8 x float>* %732, i32 4, <8 x i1> %716), !tbaa !12, !alias.scope !99, !llvm.access.group !27 - %733 = or <8 x i64> %broadcast.splat490, - %734 = trunc <8 x i64> %733 to <8 x i32> - %735 = icmp sgt <8 x i32> %broadcast.splat492, %734 - %wide.masked.gather495.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat494, i32 4, <8 x i1> %735, <8 x float> undef), !tbaa !12, !alias.scope !96, !noalias !99 - %736 = extractelement <8 x i64> %733, i32 0 - %737 = shl i64 %736, 32 - %738 = ashr exact i64 %737, 32 - %739 = getelementptr inbounds float, float* %1, i64 %738 - %740 = bitcast float* %739 to <8 x float>* - %wide.masked.load496.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %740, i32 4, <8 x i1> %735, <8 x float> undef), !tbaa !12, !alias.scope !101, !noalias !99 - %wide.masked.gather499.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat498, i32 4, <8 x i1> %735, <8 x float> undef), !tbaa !12, !alias.scope !103, !noalias !99 - %741 = getelementptr inbounds float, float* %2, i64 %738 - %742 = bitcast float* %741 to <8 x float>* - %wide.masked.load500.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %742, i32 4, <8 x i1> %735, <8 x float> undef), !tbaa !12, !alias.scope !105, !noalias !99 - %743 = fmul <8 x float> %wide.masked.gather499.1, %wide.masked.load500.1 - %744 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather495.1, <8 x float> %wide.masked.load496.1, <8 x float> %743) - %745 = extractelement <8 x i32> %734, i32 0 - %746 = add nsw i32 %mul.i.7, %745 - %747 = sext i32 %746 to i64 - %748 = getelementptr inbounds float, float* %0, i64 %747 - %749 = bitcast float* %748 to <8 x float>* - %wide.masked.load501.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %749, i32 4, <8 x i1> %735, <8 x float> undef), !tbaa !12, !alias.scope !99 - %750 = fadd <8 x float> %wide.masked.load501.1, %744 - %751 = bitcast float* %748 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %750, <8 x float>* %751, i32 4, <8 x i1> %735), !tbaa !12, !alias.scope !99, !llvm.access.group !27 - %752 = or <8 x i64> %broadcast.splat490, - %753 = trunc <8 x i64> %752 to <8 x i32> - %754 = icmp sgt <8 x i32> %broadcast.splat492, %753 - %wide.masked.gather495.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat494, i32 4, <8 x i1> %754, <8 x float> undef), !tbaa !12, !alias.scope !96, !noalias !99 - %755 = extractelement <8 x i64> %752, i32 0 - %756 = shl i64 %755, 32 - %757 = ashr exact i64 %756, 32 - %758 = getelementptr inbounds float, float* %1, i64 %757 - %759 = bitcast float* %758 to <8 x float>* - %wide.masked.load496.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %759, i32 4, <8 x i1> %754, <8 x float> undef), !tbaa !12, !alias.scope !101, !noalias !99 - %wide.masked.gather499.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat498, i32 4, <8 x i1> %754, <8 x float> undef), !tbaa !12, !alias.scope !103, !noalias !99 - %760 = getelementptr inbounds float, float* %2, i64 %757 - %761 = bitcast float* %760 to <8 x float>* - %wide.masked.load500.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %761, i32 4, <8 x i1> %754, <8 x float> undef), !tbaa !12, !alias.scope !105, !noalias !99 - %762 = fmul <8 x float> %wide.masked.gather499.2, %wide.masked.load500.2 - %763 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather495.2, <8 x float> %wide.masked.load496.2, <8 x float> %762) - %764 = extractelement <8 x i32> %753, i32 0 - %765 = add nsw i32 %mul.i.7, %764 - %766 = sext i32 %765 to i64 - %767 = getelementptr inbounds float, float* %0, i64 %766 - %768 = bitcast float* %767 to <8 x float>* - %wide.masked.load501.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %768, i32 4, <8 x i1> %754, <8 x float> undef), !tbaa !12, !alias.scope !99 - %769 = fadd <8 x float> %wide.masked.load501.2, %763 - %770 = bitcast float* %767 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %769, <8 x float>* %770, i32 4, <8 x i1> %754), !tbaa !12, !alias.scope !99, !llvm.access.group !27 - %771 = or <8 x i64> %broadcast.splat490, - %772 = trunc <8 x i64> %771 to <8 x i32> - %773 = icmp sgt <8 x i32> %broadcast.splat492, %772 - %wide.masked.gather495.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat494, i32 4, <8 x i1> %773, <8 x float> undef), !tbaa !12, !alias.scope !96, !noalias !99 - %774 = extractelement <8 x i64> %771, i32 0 - %775 = shl i64 %774, 32 - %776 = ashr exact i64 %775, 32 - %777 = getelementptr inbounds float, float* %1, i64 %776 - %778 = bitcast float* %777 to <8 x float>* - %wide.masked.load496.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %778, i32 4, <8 x i1> %773, <8 x float> undef), !tbaa !12, !alias.scope !101, !noalias !99 - %wide.masked.gather499.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat498, i32 4, <8 x i1> %773, <8 x float> undef), !tbaa !12, !alias.scope !103, !noalias !99 - %779 = getelementptr inbounds float, float* %2, i64 %776 - %780 = bitcast float* %779 to <8 x float>* - %wide.masked.load500.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %780, i32 4, <8 x i1> %773, <8 x float> undef), !tbaa !12, !alias.scope !105, !noalias !99 - %781 = fmul <8 x float> %wide.masked.gather499.3, %wide.masked.load500.3 - %782 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather495.3, <8 x float> %wide.masked.load496.3, <8 x float> %781) - %783 = extractelement <8 x i32> %772, i32 0 - %784 = add nsw i32 %mul.i.7, %783 - %785 = sext i32 %784 to i64 - %786 = getelementptr inbounds float, float* %0, i64 %785 - %787 = bitcast float* %786 to <8 x float>* - %wide.masked.load501.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %787, i32 4, <8 x i1> %773, <8 x float> undef), !tbaa !12, !alias.scope !99 - %788 = fadd <8 x float> %wide.masked.load501.3, %782 - %789 = bitcast float* %786 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %788, <8 x float>* %789, i32 4, <8 x i1> %773), !tbaa !12, !alias.scope !99, !llvm.access.group !27 - br label %pregion_for_end.i.7 - -pregion_for_entry.entry.i.us.7: ; preds = %if.end.r_exit.i.us.7.1, %pregion_for_entry.entry.i.us.7.preheader - %_local_id_x.0.us.7 = phi i64 [ 0, %pregion_for_entry.entry.i.us.7.preheader ], [ %803, %if.end.r_exit.i.us.7.1 ] - %add1.i.i.us.7 = add nuw nsw i64 %_local_id_x.0.us.7, %mul.i.i - %conv.i.us.7 = trunc i64 %add1.i.i.us.7 to i32 - %cmp4.i.us.7 = icmp slt i32 %conv.i.us.7, %5 - br i1 %cmp4.i.us.7, label %if.then.i.us.7, label %if.end.r_exit.i.us.7 - -if.then.i.us.7: ; preds = %pregion_for_entry.entry.i.us.7 - %790 = load float, float* %arrayidx.i.7, align 4, !tbaa !12 - %sext26.i.us.7 = shl i64 %add1.i.i.us.7, 32 - %idxprom6.i.us.7 = ashr exact i64 %sext26.i.us.7, 32 - %arrayidx7.i.us.7 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.7 - %791 = load float, float* %arrayidx7.i.us.7, align 4, !tbaa !12 - %792 = load float, float* %arrayidx9.i.7, align 4, !tbaa !12 - %arrayidx11.i.us.7 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.7 - %793 = load float, float* %arrayidx11.i.us.7, align 4, !tbaa !12 - %mul12.i.us.7 = fmul float %792, %793 - %794 = tail call float @llvm.fmuladd.f32(float %790, float %791, float %mul12.i.us.7) #6 - %add.i.us.7 = add nsw i32 %mul.i.7, %conv.i.us.7 - %idxprom13.i.us.7 = sext i32 %add.i.us.7 to i64 - %arrayidx14.i.us.7 = getelementptr inbounds float, float* %0, i64 %idxprom13.i.us.7 - %795 = load float, float* %arrayidx14.i.us.7, align 4, !tbaa !12 - %add15.i.us.7 = fadd float %795, %794 - store float %add15.i.us.7, float* %arrayidx14.i.us.7, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.us.7 - -if.end.r_exit.i.us.7: ; preds = %if.then.i.us.7, %pregion_for_entry.entry.i.us.7 - %796 = or i64 %_local_id_x.0.us.7, 1 - %add1.i.i.us.7.1 = add nuw nsw i64 %796, %mul.i.i - %conv.i.us.7.1 = trunc i64 %add1.i.i.us.7.1 to i32 - %cmp4.i.us.7.1 = icmp slt i32 %conv.i.us.7.1, %5 - br i1 %cmp4.i.us.7.1, label %if.then.i.us.7.1, label %if.end.r_exit.i.us.7.1 - -pregion_for_end.i.7.loopexit: ; preds = %if.end.r_exit.i.us.7.1 - br label %pregion_for_end.i.7 - -pregion_for_end.i.7: ; preds = %pregion_for_end.i.7.loopexit, %vector.ph482, %pregion_for_end.i.6 - ret void - -if.then.i.us.7.1: ; preds = %if.end.r_exit.i.us.7 - %797 = load float, float* %arrayidx.i.7, align 4, !tbaa !12 - %sext26.i.us.7.1 = shl i64 %add1.i.i.us.7.1, 32 - %idxprom6.i.us.7.1 = ashr exact i64 %sext26.i.us.7.1, 32 - %arrayidx7.i.us.7.1 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.7.1 - %798 = load float, float* %arrayidx7.i.us.7.1, align 4, !tbaa !12 - %799 = load float, float* %arrayidx9.i.7, align 4, !tbaa !12 - %arrayidx11.i.us.7.1 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.7.1 - %800 = load float, float* %arrayidx11.i.us.7.1, align 4, !tbaa !12 - %mul12.i.us.7.1 = fmul float %799, %800 - %801 = tail call float @llvm.fmuladd.f32(float %797, float %798, float %mul12.i.us.7.1) #6 - %add.i.us.7.1 = add nsw i32 %mul.i.7, %conv.i.us.7.1 - %idxprom13.i.us.7.1 = sext i32 %add.i.us.7.1 to i64 - %arrayidx14.i.us.7.1 = getelementptr inbounds float, float* %0, i64 %idxprom13.i.us.7.1 - %802 = load float, float* %arrayidx14.i.us.7.1, align 4, !tbaa !12 - %add15.i.us.7.1 = fadd float %802, %801 - store float %add15.i.us.7.1, float* %arrayidx14.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.us.7.1 - -if.end.r_exit.i.us.7.1: ; preds = %if.then.i.us.7.1, %if.end.r_exit.i.us.7 - %803 = add nuw nsw i64 %_local_id_x.0.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %803, 32 - br i1 %exitcond.7.not.1, label %pregion_for_end.i.7.loopexit, label %pregion_for_entry.entry.i.us.7, !llvm.loop !107 - -if.then.i.us.6.1: ; preds = %if.end.r_exit.i.us.6 - %804 = load float, float* %arrayidx.i.6, align 4, !tbaa !12 - %sext26.i.us.6.1 = shl i64 %add1.i.i.us.6.1, 32 - %idxprom6.i.us.6.1 = ashr exact i64 %sext26.i.us.6.1, 32 - %arrayidx7.i.us.6.1 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.6.1 - %805 = load float, float* %arrayidx7.i.us.6.1, align 4, !tbaa !12 - %806 = load float, float* %arrayidx9.i.6, align 4, !tbaa !12 - %arrayidx11.i.us.6.1 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.6.1 - %807 = load float, float* %arrayidx11.i.us.6.1, align 4, !tbaa !12 - %mul12.i.us.6.1 = fmul float %806, %807 - %808 = tail call float @llvm.fmuladd.f32(float %804, float %805, float %mul12.i.us.6.1) #6 - %add.i.us.6.1 = add nsw i32 %mul.i.6, %conv.i.us.6.1 - %idxprom13.i.us.6.1 = sext i32 %add.i.us.6.1 to i64 - %arrayidx14.i.us.6.1 = getelementptr inbounds float, float* %0, i64 %idxprom13.i.us.6.1 - %809 = load float, float* %arrayidx14.i.us.6.1, align 4, !tbaa !12 - %add15.i.us.6.1 = fadd float %809, %808 - store float %add15.i.us.6.1, float* %arrayidx14.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.us.6.1 - -if.end.r_exit.i.us.6.1: ; preds = %if.then.i.us.6.1, %if.end.r_exit.i.us.6 - %810 = add nuw nsw i64 %_local_id_x.0.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %810, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.6.loopexit, label %pregion_for_entry.entry.i.us.6, !llvm.loop !110 - -if.then.i.us.5.1: ; preds = %if.end.r_exit.i.us.5 - %811 = load float, float* %arrayidx.i.5, align 4, !tbaa !12 - %sext26.i.us.5.1 = shl i64 %add1.i.i.us.5.1, 32 - %idxprom6.i.us.5.1 = ashr exact i64 %sext26.i.us.5.1, 32 - %arrayidx7.i.us.5.1 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.5.1 - %812 = load float, float* %arrayidx7.i.us.5.1, align 4, !tbaa !12 - %813 = load float, float* %arrayidx9.i.5, align 4, !tbaa !12 - %arrayidx11.i.us.5.1 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.5.1 - %814 = load float, float* %arrayidx11.i.us.5.1, align 4, !tbaa !12 - %mul12.i.us.5.1 = fmul float %813, %814 - %815 = tail call float @llvm.fmuladd.f32(float %811, float %812, float %mul12.i.us.5.1) #6 - %add.i.us.5.1 = add nsw i32 %mul.i.5, %conv.i.us.5.1 - %idxprom13.i.us.5.1 = sext i32 %add.i.us.5.1 to i64 - %arrayidx14.i.us.5.1 = getelementptr inbounds float, float* %0, i64 %idxprom13.i.us.5.1 - %816 = load float, float* %arrayidx14.i.us.5.1, align 4, !tbaa !12 - %add15.i.us.5.1 = fadd float %816, %815 - store float %add15.i.us.5.1, float* %arrayidx14.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.us.5.1 - -if.end.r_exit.i.us.5.1: ; preds = %if.then.i.us.5.1, %if.end.r_exit.i.us.5 - %817 = add nuw nsw i64 %_local_id_x.0.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %817, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.5.loopexit, label %pregion_for_entry.entry.i.us.5, !llvm.loop !111 - -if.then.i.us.4.1: ; preds = %if.end.r_exit.i.us.4 - %818 = load float, float* %arrayidx.i.4, align 4, !tbaa !12 - %sext26.i.us.4.1 = shl i64 %add1.i.i.us.4.1, 32 - %idxprom6.i.us.4.1 = ashr exact i64 %sext26.i.us.4.1, 32 - %arrayidx7.i.us.4.1 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.4.1 - %819 = load float, float* %arrayidx7.i.us.4.1, align 4, !tbaa !12 - %820 = load float, float* %arrayidx9.i.4, align 4, !tbaa !12 - %arrayidx11.i.us.4.1 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.4.1 - %821 = load float, float* %arrayidx11.i.us.4.1, align 4, !tbaa !12 - %mul12.i.us.4.1 = fmul float %820, %821 - %822 = tail call float @llvm.fmuladd.f32(float %818, float %819, float %mul12.i.us.4.1) #6 - %add.i.us.4.1 = add nsw i32 %mul.i.4, %conv.i.us.4.1 - %idxprom13.i.us.4.1 = sext i32 %add.i.us.4.1 to i64 - %arrayidx14.i.us.4.1 = getelementptr inbounds float, float* %0, i64 %idxprom13.i.us.4.1 - %823 = load float, float* %arrayidx14.i.us.4.1, align 4, !tbaa !12 - %add15.i.us.4.1 = fadd float %823, %822 - store float %add15.i.us.4.1, float* %arrayidx14.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.us.4.1 - -if.end.r_exit.i.us.4.1: ; preds = %if.then.i.us.4.1, %if.end.r_exit.i.us.4 - %824 = add nuw nsw i64 %_local_id_x.0.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %824, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.4.loopexit, label %pregion_for_entry.entry.i.us.4, !llvm.loop !112 - -if.then.i.us.3.1: ; preds = %if.end.r_exit.i.us.3 - %825 = load float, float* %arrayidx.i.3, align 4, !tbaa !12 - %sext26.i.us.3.1 = shl i64 %add1.i.i.us.3.1, 32 - %idxprom6.i.us.3.1 = ashr exact i64 %sext26.i.us.3.1, 32 - %arrayidx7.i.us.3.1 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.3.1 - %826 = load float, float* %arrayidx7.i.us.3.1, align 4, !tbaa !12 - %827 = load float, float* %arrayidx9.i.3, align 4, !tbaa !12 - %arrayidx11.i.us.3.1 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.3.1 - %828 = load float, float* %arrayidx11.i.us.3.1, align 4, !tbaa !12 - %mul12.i.us.3.1 = fmul float %827, %828 - %829 = tail call float @llvm.fmuladd.f32(float %825, float %826, float %mul12.i.us.3.1) #6 - %add.i.us.3.1 = add nsw i32 %mul.i.3, %conv.i.us.3.1 - %idxprom13.i.us.3.1 = sext i32 %add.i.us.3.1 to i64 - %arrayidx14.i.us.3.1 = getelementptr inbounds float, float* %0, i64 %idxprom13.i.us.3.1 - %830 = load float, float* %arrayidx14.i.us.3.1, align 4, !tbaa !12 - %add15.i.us.3.1 = fadd float %830, %829 - store float %add15.i.us.3.1, float* %arrayidx14.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.us.3.1 - -if.end.r_exit.i.us.3.1: ; preds = %if.then.i.us.3.1, %if.end.r_exit.i.us.3 - %831 = add nuw nsw i64 %_local_id_x.0.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %831, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.3.loopexit, label %pregion_for_entry.entry.i.us.3, !llvm.loop !113 - -if.then.i.us.2.1: ; preds = %if.end.r_exit.i.us.2 - %832 = load float, float* %arrayidx.i.2, align 4, !tbaa !12 - %sext26.i.us.2.1 = shl i64 %add1.i.i.us.2.1, 32 - %idxprom6.i.us.2.1 = ashr exact i64 %sext26.i.us.2.1, 32 - %arrayidx7.i.us.2.1 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.2.1 - %833 = load float, float* %arrayidx7.i.us.2.1, align 4, !tbaa !12 - %834 = load float, float* %arrayidx9.i.2, align 4, !tbaa !12 - %arrayidx11.i.us.2.1 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.2.1 - %835 = load float, float* %arrayidx11.i.us.2.1, align 4, !tbaa !12 - %mul12.i.us.2.1 = fmul float %834, %835 - %836 = tail call float @llvm.fmuladd.f32(float %832, float %833, float %mul12.i.us.2.1) #6 - %add.i.us.2.1 = add nsw i32 %mul.i.2, %conv.i.us.2.1 - %idxprom13.i.us.2.1 = sext i32 %add.i.us.2.1 to i64 - %arrayidx14.i.us.2.1 = getelementptr inbounds float, float* %0, i64 %idxprom13.i.us.2.1 - %837 = load float, float* %arrayidx14.i.us.2.1, align 4, !tbaa !12 - %add15.i.us.2.1 = fadd float %837, %836 - store float %add15.i.us.2.1, float* %arrayidx14.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.us.2.1 - -if.end.r_exit.i.us.2.1: ; preds = %if.then.i.us.2.1, %if.end.r_exit.i.us.2 - %838 = add nuw nsw i64 %_local_id_x.0.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %838, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.2.loopexit, label %pregion_for_entry.entry.i.us.2, !llvm.loop !114 - -if.then.i.us.1.1: ; preds = %if.end.r_exit.i.us.1 - %839 = load float, float* %arrayidx.i.1, align 4, !tbaa !12 - %sext26.i.us.1.1 = shl i64 %add1.i.i.us.1.1, 32 - %idxprom6.i.us.1.1 = ashr exact i64 %sext26.i.us.1.1, 32 - %arrayidx7.i.us.1.1 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.1.1 - %840 = load float, float* %arrayidx7.i.us.1.1, align 4, !tbaa !12 - %841 = load float, float* %arrayidx9.i.1, align 4, !tbaa !12 - %arrayidx11.i.us.1.1 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.1.1 - %842 = load float, float* %arrayidx11.i.us.1.1, align 4, !tbaa !12 - %mul12.i.us.1.1 = fmul float %841, %842 - %843 = tail call float @llvm.fmuladd.f32(float %839, float %840, float %mul12.i.us.1.1) #6 - %add.i.us.1.1 = add nsw i32 %mul.i.1, %conv.i.us.1.1 - %idxprom13.i.us.1.1 = sext i32 %add.i.us.1.1 to i64 - %arrayidx14.i.us.1.1 = getelementptr inbounds float, float* %0, i64 %idxprom13.i.us.1.1 - %844 = load float, float* %arrayidx14.i.us.1.1, align 4, !tbaa !12 - %add15.i.us.1.1 = fadd float %844, %843 - store float %add15.i.us.1.1, float* %arrayidx14.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.us.1.1 - -if.end.r_exit.i.us.1.1: ; preds = %if.then.i.us.1.1, %if.end.r_exit.i.us.1 - %845 = add nuw nsw i64 %_local_id_x.0.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %845, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.1.loopexit, label %pregion_for_entry.entry.i.us.1, !llvm.loop !115 - -if.then.i.us.1531: ; preds = %if.end.r_exit.i.us - %846 = load float, float* %arrayidx.i, align 4, !tbaa !12 - %sext26.i.us.1522 = shl i64 %add1.i.i.us.1518, 32 - %idxprom6.i.us.1523 = ashr exact i64 %sext26.i.us.1522, 32 - %arrayidx7.i.us.1524 = getelementptr inbounds float, float* %1, i64 %idxprom6.i.us.1523 - %847 = load float, float* %arrayidx7.i.us.1524, align 4, !tbaa !12 - %848 = load float, float* %arrayidx9.i, align 4, !tbaa !12 - %arrayidx11.i.us.1525 = getelementptr inbounds float, float* %2, i64 %idxprom6.i.us.1523 - %849 = load float, float* %arrayidx11.i.us.1525, align 4, !tbaa !12 - %mul12.i.us.1526 = fmul float %848, %849 - %850 = tail call float @llvm.fmuladd.f32(float %846, float %847, float %mul12.i.us.1526) #6 - %add.i.us.1527 = add nsw i32 %mul.i, %conv.i.us.1519 - %idxprom13.i.us.1528 = sext i32 %add.i.us.1527 to i64 - %arrayidx14.i.us.1529 = getelementptr inbounds float, float* %0, i64 %idxprom13.i.us.1528 - %851 = load float, float* %arrayidx14.i.us.1529, align 4, !tbaa !12 - %add15.i.us.1530 = fadd float %851, %850 - store float %add15.i.us.1530, float* %arrayidx14.i.us.1529, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.us.1532 - -if.end.r_exit.i.us.1532: ; preds = %if.then.i.us.1531, %if.end.r_exit.i.us - %852 = add nuw nsw i64 %_local_id_x.0.us, 2 - %exitcond.not.1 = icmp eq i64 %852, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !116 -} - -; Function Attrs: nofree nounwind -define void @_pocl_kernel_gemver_kernel1_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { -pregion_for_entry.pregion_for_init.i.i: - %5 = bitcast i8** %0 to float*** - %6 = load float**, float*** %5, align 8 - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float*** - %10 = load float**, float*** %9, align 8 - %11 = load float*, float** %10, align 8 - %12 = getelementptr i8*, i8** %0, i64 2 - %13 = bitcast i8** %12 to float*** - %14 = load float**, float*** %13, align 8 - %15 = load float*, float** %14, align 8 - %16 = getelementptr i8*, i8** %0, i64 3 - %17 = bitcast i8** %16 to float*** - %18 = load float**, float*** %17, align 8 - %19 = load float*, float** %18, align 8 - %20 = getelementptr i8*, i8** %0, i64 4 - %21 = bitcast i8** %20 to float*** - %22 = load float**, float*** %21, align 8 - %23 = load float*, float** %22, align 8 - %24 = getelementptr i8*, i8** %0, i64 5 - %25 = bitcast i8** %24 to i32** - %26 = load i32*, i32** %25, align 8 - %27 = load i32, i32* %26, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %conv2.i.i = trunc i64 %mul3.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %27, %conv2.i.i - %sext.i.i = shl i64 %3, 35 - %idxprom.i.i = ashr exact i64 %sext.i.i, 32 - %arrayidx.i.i = getelementptr inbounds float, float* %19, i64 %idxprom.i.i - %arrayidx9.i.i = getelementptr inbounds float, float* %23, i64 %idxprom.i.i - %mul.i.i = mul nsw i32 %27, %conv2.i.i - br i1 %cmp.i.i, label %vector.scevcheck, label %pregion_for_end.i.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.i - %28 = trunc i64 %3 to i32 - %29 = mul i32 %27, %28 - %30 = shl i32 %29, 3 - %31 = trunc i64 %2 to i32 - %32 = shl i32 %31, 5 - %33 = add i32 %30, %32 - %34 = icmp sgt i32 %33, 2147483616 - br i1 %34, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i.us - -vector.memcheck: ; preds = %vector.scevcheck - %sext508 = shl i64 %3, 35 - %35 = ashr exact i64 %sext508, 32 - %scevgep = getelementptr float, float* %19, i64 %35 - %scevgep3 = bitcast float* %scevgep to i8* - %uglygep = getelementptr i8, i8* %scevgep3, i64 1 - %36 = trunc i64 %3 to i32 - %37 = mul i32 %27, %36 - %38 = shl i32 %37, 3 - %39 = trunc i64 %2 to i32 - %40 = shl i32 %39, 5 - %41 = add i32 %38, %40 - %42 = sext i32 %41 to i64 - %scevgep4 = getelementptr float, float* %7, i64 %42 - %scevgep45 = bitcast float* %scevgep4 to i8* - %43 = add nsw i64 %42, 32 - %scevgep6 = getelementptr float, float* %7, i64 %43 - %44 = sext i32 %40 to i64 - %scevgep8 = getelementptr float, float* %11, i64 %44 - %45 = add nsw i64 %44, 32 - %scevgep10 = getelementptr float, float* %11, i64 %45 - %scevgep12 = getelementptr float, float* %23, i64 %35 - %scevgep1213 = bitcast float* %scevgep12 to i8* - %uglygep14 = getelementptr i8, i8* %scevgep1213, i64 1 - %scevgep15 = getelementptr float, float* %15, i64 %44 - %scevgep17 = getelementptr float, float* %15, i64 %45 - %bound0 = icmp ult float* %arrayidx.i.i, %scevgep6 - %bound1 = icmp ugt i8* %uglygep, %scevgep45 - %found.conflict = and i1 %bound0, %bound1 - %bound019 = icmp ult float* %scevgep8, %scevgep6 - %bound120 = icmp ult float* %scevgep4, %scevgep10 - %found.conflict21 = and i1 %bound019, %bound120 - %conflict.rdx = or i1 %found.conflict, %found.conflict21 - %bound023 = icmp ult float* %arrayidx9.i.i, %scevgep6 - %bound124 = icmp ugt i8* %uglygep14, %scevgep45 - %found.conflict25 = and i1 %bound023, %bound124 - %conflict.rdx26 = or i1 %conflict.rdx, %found.conflict25 - %bound027 = icmp ult float* %scevgep15, %scevgep6 - %bound128 = icmp ult float* %scevgep4, %scevgep17 - %found.conflict29 = and i1 %bound027, %bound128 - %conflict.rdx30 = or i1 %conflict.rdx26, %found.conflict29 - br i1 %conflict.rdx30, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert31 = insertelement <8 x i32> undef, i32 %27, i32 0 - %broadcast.splat32 = shufflevector <8 x i32> %broadcast.splatinsert31, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert33 = insertelement <8 x float*> undef, float* %arrayidx.i.i, i32 0 - %broadcast.splat34 = shufflevector <8 x float*> %broadcast.splatinsert33, <8 x float*> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert35 = insertelement <8 x float*> undef, float* %arrayidx9.i.i, i32 0 - %broadcast.splat36 = shufflevector <8 x float*> %broadcast.splatinsert35, <8 x float*> undef, <8 x i32> zeroinitializer - %46 = or <8 x i64> %broadcast.splat, - %47 = trunc <8 x i64> %46 to <8 x i32> - %48 = icmp sgt <8 x i32> %broadcast.splat32, %47 - %wide.masked.gather = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat34, i32 4, <8 x i1> %48, <8 x float> undef), !tbaa !12, !alias.scope !117, !noalias !120 - %49 = extractelement <8 x i64> %46, i32 0 - %50 = shl i64 %49, 32 - %51 = ashr exact i64 %50, 32 - %52 = getelementptr inbounds float, float* %11, i64 %51 - %53 = bitcast float* %52 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %53, i32 4, <8 x i1> %48, <8 x float> undef), !tbaa !12, !alias.scope !122, !noalias !120 - %wide.masked.gather37 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat36, i32 4, <8 x i1> %48, <8 x float> undef), !tbaa !12, !alias.scope !124, !noalias !120 - %54 = getelementptr inbounds float, float* %15, i64 %51 - %55 = bitcast float* %54 to <8 x float>* - %wide.masked.load38 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %55, i32 4, <8 x i1> %48, <8 x float> undef), !tbaa !12, !alias.scope !126, !noalias !120 - %56 = fmul <8 x float> %wide.masked.gather37, %wide.masked.load38 - %57 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather, <8 x float> %wide.masked.load, <8 x float> %56) - %58 = extractelement <8 x i32> %47, i32 0 - %59 = add nsw i32 %mul.i.i, %58 - %60 = sext i32 %59 to i64 - %61 = getelementptr inbounds float, float* %7, i64 %60 - %62 = bitcast float* %61 to <8 x float>* - %wide.masked.load39 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %62, i32 4, <8 x i1> %48, <8 x float> undef), !tbaa !12, !alias.scope !120 - %63 = fadd <8 x float> %wide.masked.load39, %57 - %64 = bitcast float* %61 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %63, <8 x float>* %64, i32 4, <8 x i1> %48), !tbaa !12, !alias.scope !120, !llvm.access.group !27 - %65 = or <8 x i64> %broadcast.splat, - %66 = trunc <8 x i64> %65 to <8 x i32> - %67 = icmp sgt <8 x i32> %broadcast.splat32, %66 - %wide.masked.gather.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat34, i32 4, <8 x i1> %67, <8 x float> undef), !tbaa !12, !alias.scope !117, !noalias !120 - %68 = extractelement <8 x i64> %65, i32 0 - %69 = shl i64 %68, 32 - %70 = ashr exact i64 %69, 32 - %71 = getelementptr inbounds float, float* %11, i64 %70 - %72 = bitcast float* %71 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %72, i32 4, <8 x i1> %67, <8 x float> undef), !tbaa !12, !alias.scope !122, !noalias !120 - %wide.masked.gather37.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat36, i32 4, <8 x i1> %67, <8 x float> undef), !tbaa !12, !alias.scope !124, !noalias !120 - %73 = getelementptr inbounds float, float* %15, i64 %70 - %74 = bitcast float* %73 to <8 x float>* - %wide.masked.load38.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %74, i32 4, <8 x i1> %67, <8 x float> undef), !tbaa !12, !alias.scope !126, !noalias !120 - %75 = fmul <8 x float> %wide.masked.gather37.1, %wide.masked.load38.1 - %76 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather.1, <8 x float> %wide.masked.load.1, <8 x float> %75) - %77 = extractelement <8 x i32> %66, i32 0 - %78 = add nsw i32 %mul.i.i, %77 - %79 = sext i32 %78 to i64 - %80 = getelementptr inbounds float, float* %7, i64 %79 - %81 = bitcast float* %80 to <8 x float>* - %wide.masked.load39.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %81, i32 4, <8 x i1> %67, <8 x float> undef), !tbaa !12, !alias.scope !120 - %82 = fadd <8 x float> %wide.masked.load39.1, %76 - %83 = bitcast float* %80 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %82, <8 x float>* %83, i32 4, <8 x i1> %67), !tbaa !12, !alias.scope !120, !llvm.access.group !27 - %84 = or <8 x i64> %broadcast.splat, - %85 = trunc <8 x i64> %84 to <8 x i32> - %86 = icmp sgt <8 x i32> %broadcast.splat32, %85 - %wide.masked.gather.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat34, i32 4, <8 x i1> %86, <8 x float> undef), !tbaa !12, !alias.scope !117, !noalias !120 - %87 = extractelement <8 x i64> %84, i32 0 - %88 = shl i64 %87, 32 - %89 = ashr exact i64 %88, 32 - %90 = getelementptr inbounds float, float* %11, i64 %89 - %91 = bitcast float* %90 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %91, i32 4, <8 x i1> %86, <8 x float> undef), !tbaa !12, !alias.scope !122, !noalias !120 - %wide.masked.gather37.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat36, i32 4, <8 x i1> %86, <8 x float> undef), !tbaa !12, !alias.scope !124, !noalias !120 - %92 = getelementptr inbounds float, float* %15, i64 %89 - %93 = bitcast float* %92 to <8 x float>* - %wide.masked.load38.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %93, i32 4, <8 x i1> %86, <8 x float> undef), !tbaa !12, !alias.scope !126, !noalias !120 - %94 = fmul <8 x float> %wide.masked.gather37.2, %wide.masked.load38.2 - %95 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather.2, <8 x float> %wide.masked.load.2, <8 x float> %94) - %96 = extractelement <8 x i32> %85, i32 0 - %97 = add nsw i32 %mul.i.i, %96 - %98 = sext i32 %97 to i64 - %99 = getelementptr inbounds float, float* %7, i64 %98 - %100 = bitcast float* %99 to <8 x float>* - %wide.masked.load39.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %100, i32 4, <8 x i1> %86, <8 x float> undef), !tbaa !12, !alias.scope !120 - %101 = fadd <8 x float> %wide.masked.load39.2, %95 - %102 = bitcast float* %99 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %101, <8 x float>* %102, i32 4, <8 x i1> %86), !tbaa !12, !alias.scope !120, !llvm.access.group !27 - %103 = or <8 x i64> %broadcast.splat, - %104 = trunc <8 x i64> %103 to <8 x i32> - %105 = icmp sgt <8 x i32> %broadcast.splat32, %104 - %wide.masked.gather.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat34, i32 4, <8 x i1> %105, <8 x float> undef), !tbaa !12, !alias.scope !117, !noalias !120 - %106 = extractelement <8 x i64> %103, i32 0 - %107 = shl i64 %106, 32 - %108 = ashr exact i64 %107, 32 - %109 = getelementptr inbounds float, float* %11, i64 %108 - %110 = bitcast float* %109 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %110, i32 4, <8 x i1> %105, <8 x float> undef), !tbaa !12, !alias.scope !122, !noalias !120 - %wide.masked.gather37.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat36, i32 4, <8 x i1> %105, <8 x float> undef), !tbaa !12, !alias.scope !124, !noalias !120 - %111 = getelementptr inbounds float, float* %15, i64 %108 - %112 = bitcast float* %111 to <8 x float>* - %wide.masked.load38.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %112, i32 4, <8 x i1> %105, <8 x float> undef), !tbaa !12, !alias.scope !126, !noalias !120 - %113 = fmul <8 x float> %wide.masked.gather37.3, %wide.masked.load38.3 - %114 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather.3, <8 x float> %wide.masked.load.3, <8 x float> %113) - %115 = extractelement <8 x i32> %104, i32 0 - %116 = add nsw i32 %mul.i.i, %115 - %117 = sext i32 %116 to i64 - %118 = getelementptr inbounds float, float* %7, i64 %117 - %119 = bitcast float* %118 to <8 x float>* - %wide.masked.load39.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %119, i32 4, <8 x i1> %105, <8 x float> undef), !tbaa !12, !alias.scope !120 - %120 = fadd <8 x float> %wide.masked.load39.3, %114 - %121 = bitcast float* %118 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %120, <8 x float>* %121, i32 4, <8 x i1> %105), !tbaa !12, !alias.scope !120, !llvm.access.group !27 - br label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us.1532, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.preheader ], [ %870, %if.end.r_exit.i.i.us.1532 ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp4.i.i.us = icmp sgt i32 %27, %conv.i.i.us - br i1 %cmp4.i.i.us, label %if.then.i.i.us, label %if.end.r_exit.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %122 = load float, float* %arrayidx.i.i, align 4, !tbaa !12 - %sext26.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom6.i.i.us = ashr exact i64 %sext26.i.i.us, 32 - %arrayidx7.i.i.us = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us - %123 = load float, float* %arrayidx7.i.i.us, align 4, !tbaa !12 - %124 = load float, float* %arrayidx9.i.i, align 4, !tbaa !12 - %arrayidx11.i.i.us = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us - %125 = load float, float* %arrayidx11.i.i.us, align 4, !tbaa !12 - %mul12.i.i.us = fmul float %124, %125 - %126 = tail call float @llvm.fmuladd.f32(float %122, float %123, float %mul12.i.i.us) #6 - %add.i.i.us = add nsw i32 %mul.i.i, %conv.i.i.us - %idxprom13.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx14.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom13.i.i.us - %127 = load float, float* %arrayidx14.i.i.us, align 4, !tbaa !12 - %add15.i.i.us = fadd float %127, %126 - store float %add15.i.i.us, float* %arrayidx14.i.i.us, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us - %128 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1518 = add nuw nsw i64 %128, %mul.i.i.i - %conv.i.i.us.1519 = trunc i64 %add1.i.i.i.us.1518 to i32 - %cmp4.i.i.us.1520 = icmp sgt i32 %27, %conv.i.i.us.1519 - br i1 %cmp4.i.i.us.1520, label %if.then.i.i.us.1531, label %if.end.r_exit.i.i.us.1532 - -pregion_for_end.i.i.loopexit: ; preds = %if.end.r_exit.i.i.us.1532 - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i.i - %add6.i.i.i.1 = or i64 %mul3.i.i.i, 1 - %conv2.i.i.1 = trunc i64 %add6.i.i.i.1 to i32 - %cmp.i.i.1 = icmp sgt i32 %27, %conv2.i.i.1 - %sext.i.i.1 = shl i64 %add6.i.i.i.1, 32 - %idxprom.i.i.1 = ashr exact i64 %sext.i.i.1, 32 - %arrayidx.i.i.1 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.1 - %arrayidx9.i.i.1 = getelementptr inbounds float, float* %23, i64 %idxprom.i.i.1 - %mul.i.i.1 = mul nsw i32 %27, %conv2.i.i.1 - br i1 %cmp.i.i.1, label %vector.scevcheck47, label %pregion_for_end.i.i.1 - -vector.scevcheck47: ; preds = %pregion_for_end.i.i - %129 = mul i32 %27, %conv2.i.i.1 - %130 = trunc i64 %2 to i32 - %131 = shl i32 %130, 5 - %132 = add i32 %129, %131 - %133 = icmp sgt i32 %132, 2147483616 - br i1 %133, label %pregion_for_entry.entry.i.i.us.1.preheader, label %vector.memcheck85 - -pregion_for_entry.entry.i.i.us.1.preheader: ; preds = %vector.memcheck85, %vector.scevcheck47 - br label %pregion_for_entry.entry.i.i.us.1 - -vector.memcheck85: ; preds = %vector.scevcheck47 - %sext507 = shl i64 %3, 35 - %134 = ashr exact i64 %sext507, 32 - %135 = or i64 %134, 1 - %scevgep49 = getelementptr float, float* %19, i64 %135 - %scevgep4950 = bitcast float* %scevgep49 to i8* - %uglygep51 = getelementptr i8, i8* %scevgep4950, i64 1 - %136 = mul i32 %27, %conv2.i.i.1 - %137 = trunc i64 %2 to i32 - %138 = shl i32 %137, 5 - %139 = add i32 %136, %138 - %140 = sext i32 %139 to i64 - %scevgep52 = getelementptr float, float* %7, i64 %140 - %scevgep5253 = bitcast float* %scevgep52 to i8* - %141 = add nsw i64 %140, 32 - %scevgep54 = getelementptr float, float* %7, i64 %141 - %142 = sext i32 %138 to i64 - %scevgep56 = getelementptr float, float* %11, i64 %142 - %143 = add nsw i64 %142, 32 - %scevgep58 = getelementptr float, float* %11, i64 %143 - %scevgep60 = getelementptr float, float* %23, i64 %135 - %scevgep6061 = bitcast float* %scevgep60 to i8* - %uglygep62 = getelementptr i8, i8* %scevgep6061, i64 1 - %scevgep63 = getelementptr float, float* %15, i64 %142 - %scevgep65 = getelementptr float, float* %15, i64 %143 - %bound068 = icmp ult float* %arrayidx.i.i.1, %scevgep54 - %bound169 = icmp ugt i8* %uglygep51, %scevgep5253 - %found.conflict70 = and i1 %bound068, %bound169 - %bound071 = icmp ult float* %scevgep56, %scevgep54 - %bound172 = icmp ult float* %scevgep52, %scevgep58 - %found.conflict73 = and i1 %bound071, %bound172 - %conflict.rdx74 = or i1 %found.conflict70, %found.conflict73 - %bound076 = icmp ult float* %arrayidx9.i.i.1, %scevgep54 - %bound177 = icmp ugt i8* %uglygep62, %scevgep5253 - %found.conflict78 = and i1 %bound076, %bound177 - %conflict.rdx79 = or i1 %conflict.rdx74, %found.conflict78 - %bound080 = icmp ult float* %scevgep63, %scevgep54 - %bound181 = icmp ult float* %scevgep52, %scevgep65 - %found.conflict82 = and i1 %bound080, %bound181 - %conflict.rdx83 = or i1 %conflict.rdx79, %found.conflict82 - br i1 %conflict.rdx83, label %pregion_for_entry.entry.i.i.us.1.preheader, label %vector.ph86 - -vector.ph86: ; preds = %vector.memcheck85 - %broadcast.splatinsert93 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat94 = shufflevector <8 x i64> %broadcast.splatinsert93, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert95 = insertelement <8 x i32> undef, i32 %27, i32 0 - %broadcast.splat96 = shufflevector <8 x i32> %broadcast.splatinsert95, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert97 = insertelement <8 x float*> undef, float* %arrayidx.i.i.1, i32 0 - %broadcast.splat98 = shufflevector <8 x float*> %broadcast.splatinsert97, <8 x float*> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert101 = insertelement <8 x float*> undef, float* %arrayidx9.i.i.1, i32 0 - %broadcast.splat102 = shufflevector <8 x float*> %broadcast.splatinsert101, <8 x float*> undef, <8 x i32> zeroinitializer - %144 = or <8 x i64> %broadcast.splat94, - %145 = trunc <8 x i64> %144 to <8 x i32> - %146 = icmp sgt <8 x i32> %broadcast.splat96, %145 - %wide.masked.gather99 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat98, i32 4, <8 x i1> %146, <8 x float> undef), !tbaa !12, !alias.scope !128, !noalias !131 - %147 = extractelement <8 x i64> %144, i32 0 - %148 = shl i64 %147, 32 - %149 = ashr exact i64 %148, 32 - %150 = getelementptr inbounds float, float* %11, i64 %149 - %151 = bitcast float* %150 to <8 x float>* - %wide.masked.load100 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %151, i32 4, <8 x i1> %146, <8 x float> undef), !tbaa !12, !alias.scope !133, !noalias !131 - %wide.masked.gather103 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat102, i32 4, <8 x i1> %146, <8 x float> undef), !tbaa !12, !alias.scope !135, !noalias !131 - %152 = getelementptr inbounds float, float* %15, i64 %149 - %153 = bitcast float* %152 to <8 x float>* - %wide.masked.load104 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %153, i32 4, <8 x i1> %146, <8 x float> undef), !tbaa !12, !alias.scope !137, !noalias !131 - %154 = fmul <8 x float> %wide.masked.gather103, %wide.masked.load104 - %155 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather99, <8 x float> %wide.masked.load100, <8 x float> %154) - %156 = extractelement <8 x i32> %145, i32 0 - %157 = add nsw i32 %mul.i.i.1, %156 - %158 = sext i32 %157 to i64 - %159 = getelementptr inbounds float, float* %7, i64 %158 - %160 = bitcast float* %159 to <8 x float>* - %wide.masked.load105 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %160, i32 4, <8 x i1> %146, <8 x float> undef), !tbaa !12, !alias.scope !131 - %161 = fadd <8 x float> %wide.masked.load105, %155 - %162 = bitcast float* %159 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %161, <8 x float>* %162, i32 4, <8 x i1> %146), !tbaa !12, !alias.scope !131, !llvm.access.group !27 - %163 = or <8 x i64> %broadcast.splat94, - %164 = trunc <8 x i64> %163 to <8 x i32> - %165 = icmp sgt <8 x i32> %broadcast.splat96, %164 - %wide.masked.gather99.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat98, i32 4, <8 x i1> %165, <8 x float> undef), !tbaa !12, !alias.scope !128, !noalias !131 - %166 = extractelement <8 x i64> %163, i32 0 - %167 = shl i64 %166, 32 - %168 = ashr exact i64 %167, 32 - %169 = getelementptr inbounds float, float* %11, i64 %168 - %170 = bitcast float* %169 to <8 x float>* - %wide.masked.load100.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %170, i32 4, <8 x i1> %165, <8 x float> undef), !tbaa !12, !alias.scope !133, !noalias !131 - %wide.masked.gather103.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat102, i32 4, <8 x i1> %165, <8 x float> undef), !tbaa !12, !alias.scope !135, !noalias !131 - %171 = getelementptr inbounds float, float* %15, i64 %168 - %172 = bitcast float* %171 to <8 x float>* - %wide.masked.load104.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %172, i32 4, <8 x i1> %165, <8 x float> undef), !tbaa !12, !alias.scope !137, !noalias !131 - %173 = fmul <8 x float> %wide.masked.gather103.1, %wide.masked.load104.1 - %174 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather99.1, <8 x float> %wide.masked.load100.1, <8 x float> %173) - %175 = extractelement <8 x i32> %164, i32 0 - %176 = add nsw i32 %mul.i.i.1, %175 - %177 = sext i32 %176 to i64 - %178 = getelementptr inbounds float, float* %7, i64 %177 - %179 = bitcast float* %178 to <8 x float>* - %wide.masked.load105.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %179, i32 4, <8 x i1> %165, <8 x float> undef), !tbaa !12, !alias.scope !131 - %180 = fadd <8 x float> %wide.masked.load105.1, %174 - %181 = bitcast float* %178 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %180, <8 x float>* %181, i32 4, <8 x i1> %165), !tbaa !12, !alias.scope !131, !llvm.access.group !27 - %182 = or <8 x i64> %broadcast.splat94, - %183 = trunc <8 x i64> %182 to <8 x i32> - %184 = icmp sgt <8 x i32> %broadcast.splat96, %183 - %wide.masked.gather99.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat98, i32 4, <8 x i1> %184, <8 x float> undef), !tbaa !12, !alias.scope !128, !noalias !131 - %185 = extractelement <8 x i64> %182, i32 0 - %186 = shl i64 %185, 32 - %187 = ashr exact i64 %186, 32 - %188 = getelementptr inbounds float, float* %11, i64 %187 - %189 = bitcast float* %188 to <8 x float>* - %wide.masked.load100.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %189, i32 4, <8 x i1> %184, <8 x float> undef), !tbaa !12, !alias.scope !133, !noalias !131 - %wide.masked.gather103.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat102, i32 4, <8 x i1> %184, <8 x float> undef), !tbaa !12, !alias.scope !135, !noalias !131 - %190 = getelementptr inbounds float, float* %15, i64 %187 - %191 = bitcast float* %190 to <8 x float>* - %wide.masked.load104.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %191, i32 4, <8 x i1> %184, <8 x float> undef), !tbaa !12, !alias.scope !137, !noalias !131 - %192 = fmul <8 x float> %wide.masked.gather103.2, %wide.masked.load104.2 - %193 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather99.2, <8 x float> %wide.masked.load100.2, <8 x float> %192) - %194 = extractelement <8 x i32> %183, i32 0 - %195 = add nsw i32 %mul.i.i.1, %194 - %196 = sext i32 %195 to i64 - %197 = getelementptr inbounds float, float* %7, i64 %196 - %198 = bitcast float* %197 to <8 x float>* - %wide.masked.load105.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %198, i32 4, <8 x i1> %184, <8 x float> undef), !tbaa !12, !alias.scope !131 - %199 = fadd <8 x float> %wide.masked.load105.2, %193 - %200 = bitcast float* %197 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %199, <8 x float>* %200, i32 4, <8 x i1> %184), !tbaa !12, !alias.scope !131, !llvm.access.group !27 - %201 = or <8 x i64> %broadcast.splat94, - %202 = trunc <8 x i64> %201 to <8 x i32> - %203 = icmp sgt <8 x i32> %broadcast.splat96, %202 - %wide.masked.gather99.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat98, i32 4, <8 x i1> %203, <8 x float> undef), !tbaa !12, !alias.scope !128, !noalias !131 - %204 = extractelement <8 x i64> %201, i32 0 - %205 = shl i64 %204, 32 - %206 = ashr exact i64 %205, 32 - %207 = getelementptr inbounds float, float* %11, i64 %206 - %208 = bitcast float* %207 to <8 x float>* - %wide.masked.load100.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %208, i32 4, <8 x i1> %203, <8 x float> undef), !tbaa !12, !alias.scope !133, !noalias !131 - %wide.masked.gather103.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat102, i32 4, <8 x i1> %203, <8 x float> undef), !tbaa !12, !alias.scope !135, !noalias !131 - %209 = getelementptr inbounds float, float* %15, i64 %206 - %210 = bitcast float* %209 to <8 x float>* - %wide.masked.load104.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %210, i32 4, <8 x i1> %203, <8 x float> undef), !tbaa !12, !alias.scope !137, !noalias !131 - %211 = fmul <8 x float> %wide.masked.gather103.3, %wide.masked.load104.3 - %212 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather99.3, <8 x float> %wide.masked.load100.3, <8 x float> %211) - %213 = extractelement <8 x i32> %202, i32 0 - %214 = add nsw i32 %mul.i.i.1, %213 - %215 = sext i32 %214 to i64 - %216 = getelementptr inbounds float, float* %7, i64 %215 - %217 = bitcast float* %216 to <8 x float>* - %wide.masked.load105.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %217, i32 4, <8 x i1> %203, <8 x float> undef), !tbaa !12, !alias.scope !131 - %218 = fadd <8 x float> %wide.masked.load105.3, %212 - %219 = bitcast float* %216 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %218, <8 x float>* %219, i32 4, <8 x i1> %203), !tbaa !12, !alias.scope !131, !llvm.access.group !27 - br label %pregion_for_end.i.i.1 - -pregion_for_entry.entry.i.i.us.1: ; preds = %if.end.r_exit.i.i.us.1.1, %pregion_for_entry.entry.i.i.us.1.preheader - %_local_id_x.i.0.us.1 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.1.preheader ], [ %863, %if.end.r_exit.i.i.us.1.1 ] - %add1.i.i.i.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.1, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp4.i.i.us.1 = icmp sgt i32 %27, %conv.i.i.us.1 - br i1 %cmp4.i.i.us.1, label %if.then.i.i.us.1, label %if.end.r_exit.i.i.us.1 - -if.then.i.i.us.1: ; preds = %pregion_for_entry.entry.i.i.us.1 - %220 = load float, float* %arrayidx.i.i.1, align 4, !tbaa !12 - %sext26.i.i.us.1 = shl i64 %add1.i.i.i.us.1, 32 - %idxprom6.i.i.us.1 = ashr exact i64 %sext26.i.i.us.1, 32 - %arrayidx7.i.i.us.1 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.1 - %221 = load float, float* %arrayidx7.i.i.us.1, align 4, !tbaa !12 - %222 = load float, float* %arrayidx9.i.i.1, align 4, !tbaa !12 - %arrayidx11.i.i.us.1 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.1 - %223 = load float, float* %arrayidx11.i.i.us.1, align 4, !tbaa !12 - %mul12.i.i.us.1 = fmul float %222, %223 - %224 = tail call float @llvm.fmuladd.f32(float %220, float %221, float %mul12.i.i.us.1) #6 - %add.i.i.us.1 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1 - %idxprom13.i.i.us.1 = sext i32 %add.i.i.us.1 to i64 - %arrayidx14.i.i.us.1 = getelementptr inbounds float, float* %7, i64 %idxprom13.i.i.us.1 - %225 = load float, float* %arrayidx14.i.i.us.1, align 4, !tbaa !12 - %add15.i.i.us.1 = fadd float %225, %224 - store float %add15.i.i.us.1, float* %arrayidx14.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.1 - -if.end.r_exit.i.i.us.1: ; preds = %if.then.i.i.us.1, %pregion_for_entry.entry.i.i.us.1 - %226 = or i64 %_local_id_x.i.0.us.1, 1 - %add1.i.i.i.us.1.1 = add nuw nsw i64 %226, %mul.i.i.i - %conv.i.i.us.1.1 = trunc i64 %add1.i.i.i.us.1.1 to i32 - %cmp4.i.i.us.1.1 = icmp sgt i32 %27, %conv.i.i.us.1.1 - br i1 %cmp4.i.i.us.1.1, label %if.then.i.i.us.1.1, label %if.end.r_exit.i.i.us.1.1 - -pregion_for_end.i.i.1.loopexit: ; preds = %if.end.r_exit.i.i.us.1.1 - br label %pregion_for_end.i.i.1 - -pregion_for_end.i.i.1: ; preds = %pregion_for_end.i.i.1.loopexit, %vector.ph86, %pregion_for_end.i.i - %add6.i.i.i.2 = or i64 %mul3.i.i.i, 2 - %conv2.i.i.2 = trunc i64 %add6.i.i.i.2 to i32 - %cmp.i.i.2 = icmp sgt i32 %27, %conv2.i.i.2 - %sext.i.i.2 = shl i64 %add6.i.i.i.2, 32 - %idxprom.i.i.2 = ashr exact i64 %sext.i.i.2, 32 - %arrayidx.i.i.2 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.2 - %arrayidx9.i.i.2 = getelementptr inbounds float, float* %23, i64 %idxprom.i.i.2 - %mul.i.i.2 = mul nsw i32 %27, %conv2.i.i.2 - br i1 %cmp.i.i.2, label %vector.scevcheck113, label %pregion_for_end.i.i.2 - -vector.scevcheck113: ; preds = %pregion_for_end.i.i.1 - %227 = mul i32 %27, %conv2.i.i.2 - %228 = trunc i64 %2 to i32 - %229 = shl i32 %228, 5 - %230 = add i32 %227, %229 - %231 = icmp sgt i32 %230, 2147483616 - br i1 %231, label %pregion_for_entry.entry.i.i.us.2.preheader, label %vector.memcheck151 - -pregion_for_entry.entry.i.i.us.2.preheader: ; preds = %vector.memcheck151, %vector.scevcheck113 - br label %pregion_for_entry.entry.i.i.us.2 - -vector.memcheck151: ; preds = %vector.scevcheck113 - %sext506 = shl i64 %3, 35 - %232 = ashr exact i64 %sext506, 32 - %233 = or i64 %232, 2 - %scevgep115 = getelementptr float, float* %19, i64 %233 - %scevgep115116 = bitcast float* %scevgep115 to i8* - %uglygep117 = getelementptr i8, i8* %scevgep115116, i64 1 - %234 = mul i32 %27, %conv2.i.i.2 - %235 = trunc i64 %2 to i32 - %236 = shl i32 %235, 5 - %237 = add i32 %234, %236 - %238 = sext i32 %237 to i64 - %scevgep118 = getelementptr float, float* %7, i64 %238 - %scevgep118119 = bitcast float* %scevgep118 to i8* - %239 = add nsw i64 %238, 32 - %scevgep120 = getelementptr float, float* %7, i64 %239 - %240 = sext i32 %236 to i64 - %scevgep122 = getelementptr float, float* %11, i64 %240 - %241 = add nsw i64 %240, 32 - %scevgep124 = getelementptr float, float* %11, i64 %241 - %scevgep126 = getelementptr float, float* %23, i64 %233 - %scevgep126127 = bitcast float* %scevgep126 to i8* - %uglygep128 = getelementptr i8, i8* %scevgep126127, i64 1 - %scevgep129 = getelementptr float, float* %15, i64 %240 - %scevgep131 = getelementptr float, float* %15, i64 %241 - %bound0134 = icmp ult float* %arrayidx.i.i.2, %scevgep120 - %bound1135 = icmp ugt i8* %uglygep117, %scevgep118119 - %found.conflict136 = and i1 %bound0134, %bound1135 - %bound0137 = icmp ult float* %scevgep122, %scevgep120 - %bound1138 = icmp ult float* %scevgep118, %scevgep124 - %found.conflict139 = and i1 %bound0137, %bound1138 - %conflict.rdx140 = or i1 %found.conflict136, %found.conflict139 - %bound0142 = icmp ult float* %arrayidx9.i.i.2, %scevgep120 - %bound1143 = icmp ugt i8* %uglygep128, %scevgep118119 - %found.conflict144 = and i1 %bound0142, %bound1143 - %conflict.rdx145 = or i1 %conflict.rdx140, %found.conflict144 - %bound0146 = icmp ult float* %scevgep129, %scevgep120 - %bound1147 = icmp ult float* %scevgep118, %scevgep131 - %found.conflict148 = and i1 %bound0146, %bound1147 - %conflict.rdx149 = or i1 %conflict.rdx145, %found.conflict148 - br i1 %conflict.rdx149, label %pregion_for_entry.entry.i.i.us.2.preheader, label %vector.ph152 - -vector.ph152: ; preds = %vector.memcheck151 - %broadcast.splatinsert159 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat160 = shufflevector <8 x i64> %broadcast.splatinsert159, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert161 = insertelement <8 x i32> undef, i32 %27, i32 0 - %broadcast.splat162 = shufflevector <8 x i32> %broadcast.splatinsert161, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert163 = insertelement <8 x float*> undef, float* %arrayidx.i.i.2, i32 0 - %broadcast.splat164 = shufflevector <8 x float*> %broadcast.splatinsert163, <8 x float*> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert167 = insertelement <8 x float*> undef, float* %arrayidx9.i.i.2, i32 0 - %broadcast.splat168 = shufflevector <8 x float*> %broadcast.splatinsert167, <8 x float*> undef, <8 x i32> zeroinitializer - %242 = or <8 x i64> %broadcast.splat160, - %243 = trunc <8 x i64> %242 to <8 x i32> - %244 = icmp sgt <8 x i32> %broadcast.splat162, %243 - %wide.masked.gather165 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat164, i32 4, <8 x i1> %244, <8 x float> undef), !tbaa !12, !alias.scope !139, !noalias !142 - %245 = extractelement <8 x i64> %242, i32 0 - %246 = shl i64 %245, 32 - %247 = ashr exact i64 %246, 32 - %248 = getelementptr inbounds float, float* %11, i64 %247 - %249 = bitcast float* %248 to <8 x float>* - %wide.masked.load166 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %249, i32 4, <8 x i1> %244, <8 x float> undef), !tbaa !12, !alias.scope !144, !noalias !142 - %wide.masked.gather169 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat168, i32 4, <8 x i1> %244, <8 x float> undef), !tbaa !12, !alias.scope !146, !noalias !142 - %250 = getelementptr inbounds float, float* %15, i64 %247 - %251 = bitcast float* %250 to <8 x float>* - %wide.masked.load170 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %251, i32 4, <8 x i1> %244, <8 x float> undef), !tbaa !12, !alias.scope !148, !noalias !142 - %252 = fmul <8 x float> %wide.masked.gather169, %wide.masked.load170 - %253 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather165, <8 x float> %wide.masked.load166, <8 x float> %252) - %254 = extractelement <8 x i32> %243, i32 0 - %255 = add nsw i32 %mul.i.i.2, %254 - %256 = sext i32 %255 to i64 - %257 = getelementptr inbounds float, float* %7, i64 %256 - %258 = bitcast float* %257 to <8 x float>* - %wide.masked.load171 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %258, i32 4, <8 x i1> %244, <8 x float> undef), !tbaa !12, !alias.scope !142 - %259 = fadd <8 x float> %wide.masked.load171, %253 - %260 = bitcast float* %257 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %259, <8 x float>* %260, i32 4, <8 x i1> %244), !tbaa !12, !alias.scope !142, !llvm.access.group !27 - %261 = or <8 x i64> %broadcast.splat160, - %262 = trunc <8 x i64> %261 to <8 x i32> - %263 = icmp sgt <8 x i32> %broadcast.splat162, %262 - %wide.masked.gather165.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat164, i32 4, <8 x i1> %263, <8 x float> undef), !tbaa !12, !alias.scope !139, !noalias !142 - %264 = extractelement <8 x i64> %261, i32 0 - %265 = shl i64 %264, 32 - %266 = ashr exact i64 %265, 32 - %267 = getelementptr inbounds float, float* %11, i64 %266 - %268 = bitcast float* %267 to <8 x float>* - %wide.masked.load166.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %268, i32 4, <8 x i1> %263, <8 x float> undef), !tbaa !12, !alias.scope !144, !noalias !142 - %wide.masked.gather169.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat168, i32 4, <8 x i1> %263, <8 x float> undef), !tbaa !12, !alias.scope !146, !noalias !142 - %269 = getelementptr inbounds float, float* %15, i64 %266 - %270 = bitcast float* %269 to <8 x float>* - %wide.masked.load170.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %270, i32 4, <8 x i1> %263, <8 x float> undef), !tbaa !12, !alias.scope !148, !noalias !142 - %271 = fmul <8 x float> %wide.masked.gather169.1, %wide.masked.load170.1 - %272 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather165.1, <8 x float> %wide.masked.load166.1, <8 x float> %271) - %273 = extractelement <8 x i32> %262, i32 0 - %274 = add nsw i32 %mul.i.i.2, %273 - %275 = sext i32 %274 to i64 - %276 = getelementptr inbounds float, float* %7, i64 %275 - %277 = bitcast float* %276 to <8 x float>* - %wide.masked.load171.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %277, i32 4, <8 x i1> %263, <8 x float> undef), !tbaa !12, !alias.scope !142 - %278 = fadd <8 x float> %wide.masked.load171.1, %272 - %279 = bitcast float* %276 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %278, <8 x float>* %279, i32 4, <8 x i1> %263), !tbaa !12, !alias.scope !142, !llvm.access.group !27 - %280 = or <8 x i64> %broadcast.splat160, - %281 = trunc <8 x i64> %280 to <8 x i32> - %282 = icmp sgt <8 x i32> %broadcast.splat162, %281 - %wide.masked.gather165.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat164, i32 4, <8 x i1> %282, <8 x float> undef), !tbaa !12, !alias.scope !139, !noalias !142 - %283 = extractelement <8 x i64> %280, i32 0 - %284 = shl i64 %283, 32 - %285 = ashr exact i64 %284, 32 - %286 = getelementptr inbounds float, float* %11, i64 %285 - %287 = bitcast float* %286 to <8 x float>* - %wide.masked.load166.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %287, i32 4, <8 x i1> %282, <8 x float> undef), !tbaa !12, !alias.scope !144, !noalias !142 - %wide.masked.gather169.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat168, i32 4, <8 x i1> %282, <8 x float> undef), !tbaa !12, !alias.scope !146, !noalias !142 - %288 = getelementptr inbounds float, float* %15, i64 %285 - %289 = bitcast float* %288 to <8 x float>* - %wide.masked.load170.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %289, i32 4, <8 x i1> %282, <8 x float> undef), !tbaa !12, !alias.scope !148, !noalias !142 - %290 = fmul <8 x float> %wide.masked.gather169.2, %wide.masked.load170.2 - %291 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather165.2, <8 x float> %wide.masked.load166.2, <8 x float> %290) - %292 = extractelement <8 x i32> %281, i32 0 - %293 = add nsw i32 %mul.i.i.2, %292 - %294 = sext i32 %293 to i64 - %295 = getelementptr inbounds float, float* %7, i64 %294 - %296 = bitcast float* %295 to <8 x float>* - %wide.masked.load171.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %296, i32 4, <8 x i1> %282, <8 x float> undef), !tbaa !12, !alias.scope !142 - %297 = fadd <8 x float> %wide.masked.load171.2, %291 - %298 = bitcast float* %295 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %297, <8 x float>* %298, i32 4, <8 x i1> %282), !tbaa !12, !alias.scope !142, !llvm.access.group !27 - %299 = or <8 x i64> %broadcast.splat160, - %300 = trunc <8 x i64> %299 to <8 x i32> - %301 = icmp sgt <8 x i32> %broadcast.splat162, %300 - %wide.masked.gather165.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat164, i32 4, <8 x i1> %301, <8 x float> undef), !tbaa !12, !alias.scope !139, !noalias !142 - %302 = extractelement <8 x i64> %299, i32 0 - %303 = shl i64 %302, 32 - %304 = ashr exact i64 %303, 32 - %305 = getelementptr inbounds float, float* %11, i64 %304 - %306 = bitcast float* %305 to <8 x float>* - %wide.masked.load166.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %306, i32 4, <8 x i1> %301, <8 x float> undef), !tbaa !12, !alias.scope !144, !noalias !142 - %wide.masked.gather169.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat168, i32 4, <8 x i1> %301, <8 x float> undef), !tbaa !12, !alias.scope !146, !noalias !142 - %307 = getelementptr inbounds float, float* %15, i64 %304 - %308 = bitcast float* %307 to <8 x float>* - %wide.masked.load170.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %308, i32 4, <8 x i1> %301, <8 x float> undef), !tbaa !12, !alias.scope !148, !noalias !142 - %309 = fmul <8 x float> %wide.masked.gather169.3, %wide.masked.load170.3 - %310 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather165.3, <8 x float> %wide.masked.load166.3, <8 x float> %309) - %311 = extractelement <8 x i32> %300, i32 0 - %312 = add nsw i32 %mul.i.i.2, %311 - %313 = sext i32 %312 to i64 - %314 = getelementptr inbounds float, float* %7, i64 %313 - %315 = bitcast float* %314 to <8 x float>* - %wide.masked.load171.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %315, i32 4, <8 x i1> %301, <8 x float> undef), !tbaa !12, !alias.scope !142 - %316 = fadd <8 x float> %wide.masked.load171.3, %310 - %317 = bitcast float* %314 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %316, <8 x float>* %317, i32 4, <8 x i1> %301), !tbaa !12, !alias.scope !142, !llvm.access.group !27 - br label %pregion_for_end.i.i.2 - -pregion_for_entry.entry.i.i.us.2: ; preds = %if.end.r_exit.i.i.us.2.1, %pregion_for_entry.entry.i.i.us.2.preheader - %_local_id_x.i.0.us.2 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.2.preheader ], [ %856, %if.end.r_exit.i.i.us.2.1 ] - %add1.i.i.i.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.2, %mul.i.i.i - %conv.i.i.us.2 = trunc i64 %add1.i.i.i.us.2 to i32 - %cmp4.i.i.us.2 = icmp sgt i32 %27, %conv.i.i.us.2 - br i1 %cmp4.i.i.us.2, label %if.then.i.i.us.2, label %if.end.r_exit.i.i.us.2 - -if.then.i.i.us.2: ; preds = %pregion_for_entry.entry.i.i.us.2 - %318 = load float, float* %arrayidx.i.i.2, align 4, !tbaa !12 - %sext26.i.i.us.2 = shl i64 %add1.i.i.i.us.2, 32 - %idxprom6.i.i.us.2 = ashr exact i64 %sext26.i.i.us.2, 32 - %arrayidx7.i.i.us.2 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.2 - %319 = load float, float* %arrayidx7.i.i.us.2, align 4, !tbaa !12 - %320 = load float, float* %arrayidx9.i.i.2, align 4, !tbaa !12 - %arrayidx11.i.i.us.2 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.2 - %321 = load float, float* %arrayidx11.i.i.us.2, align 4, !tbaa !12 - %mul12.i.i.us.2 = fmul float %320, %321 - %322 = tail call float @llvm.fmuladd.f32(float %318, float %319, float %mul12.i.i.us.2) #6 - %add.i.i.us.2 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2 - %idxprom13.i.i.us.2 = sext i32 %add.i.i.us.2 to i64 - %arrayidx14.i.i.us.2 = getelementptr inbounds float, float* %7, i64 %idxprom13.i.i.us.2 - %323 = load float, float* %arrayidx14.i.i.us.2, align 4, !tbaa !12 - %add15.i.i.us.2 = fadd float %323, %322 - store float %add15.i.i.us.2, float* %arrayidx14.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.2 - -if.end.r_exit.i.i.us.2: ; preds = %if.then.i.i.us.2, %pregion_for_entry.entry.i.i.us.2 - %324 = or i64 %_local_id_x.i.0.us.2, 1 - %add1.i.i.i.us.2.1 = add nuw nsw i64 %324, %mul.i.i.i - %conv.i.i.us.2.1 = trunc i64 %add1.i.i.i.us.2.1 to i32 - %cmp4.i.i.us.2.1 = icmp sgt i32 %27, %conv.i.i.us.2.1 - br i1 %cmp4.i.i.us.2.1, label %if.then.i.i.us.2.1, label %if.end.r_exit.i.i.us.2.1 - -pregion_for_end.i.i.2.loopexit: ; preds = %if.end.r_exit.i.i.us.2.1 - br label %pregion_for_end.i.i.2 - -pregion_for_end.i.i.2: ; preds = %pregion_for_end.i.i.2.loopexit, %vector.ph152, %pregion_for_end.i.i.1 - %add6.i.i.i.3 = or i64 %mul3.i.i.i, 3 - %conv2.i.i.3 = trunc i64 %add6.i.i.i.3 to i32 - %cmp.i.i.3 = icmp sgt i32 %27, %conv2.i.i.3 - %sext.i.i.3 = shl i64 %add6.i.i.i.3, 32 - %idxprom.i.i.3 = ashr exact i64 %sext.i.i.3, 32 - %arrayidx.i.i.3 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.3 - %arrayidx9.i.i.3 = getelementptr inbounds float, float* %23, i64 %idxprom.i.i.3 - %mul.i.i.3 = mul nsw i32 %27, %conv2.i.i.3 - br i1 %cmp.i.i.3, label %vector.scevcheck179, label %pregion_for_end.i.i.3 - -vector.scevcheck179: ; preds = %pregion_for_end.i.i.2 - %325 = mul i32 %27, %conv2.i.i.3 - %326 = trunc i64 %2 to i32 - %327 = shl i32 %326, 5 - %328 = add i32 %325, %327 - %329 = icmp sgt i32 %328, 2147483616 - br i1 %329, label %pregion_for_entry.entry.i.i.us.3.preheader, label %vector.memcheck217 - -pregion_for_entry.entry.i.i.us.3.preheader: ; preds = %vector.memcheck217, %vector.scevcheck179 - br label %pregion_for_entry.entry.i.i.us.3 - -vector.memcheck217: ; preds = %vector.scevcheck179 - %sext505 = shl i64 %3, 35 - %330 = ashr exact i64 %sext505, 32 - %331 = or i64 %330, 3 - %scevgep181 = getelementptr float, float* %19, i64 %331 - %scevgep181182 = bitcast float* %scevgep181 to i8* - %uglygep183 = getelementptr i8, i8* %scevgep181182, i64 1 - %332 = mul i32 %27, %conv2.i.i.3 - %333 = trunc i64 %2 to i32 - %334 = shl i32 %333, 5 - %335 = add i32 %332, %334 - %336 = sext i32 %335 to i64 - %scevgep184 = getelementptr float, float* %7, i64 %336 - %scevgep184185 = bitcast float* %scevgep184 to i8* - %337 = add nsw i64 %336, 32 - %scevgep186 = getelementptr float, float* %7, i64 %337 - %338 = sext i32 %334 to i64 - %scevgep188 = getelementptr float, float* %11, i64 %338 - %339 = add nsw i64 %338, 32 - %scevgep190 = getelementptr float, float* %11, i64 %339 - %scevgep192 = getelementptr float, float* %23, i64 %331 - %scevgep192193 = bitcast float* %scevgep192 to i8* - %uglygep194 = getelementptr i8, i8* %scevgep192193, i64 1 - %scevgep195 = getelementptr float, float* %15, i64 %338 - %scevgep197 = getelementptr float, float* %15, i64 %339 - %bound0200 = icmp ult float* %arrayidx.i.i.3, %scevgep186 - %bound1201 = icmp ugt i8* %uglygep183, %scevgep184185 - %found.conflict202 = and i1 %bound0200, %bound1201 - %bound0203 = icmp ult float* %scevgep188, %scevgep186 - %bound1204 = icmp ult float* %scevgep184, %scevgep190 - %found.conflict205 = and i1 %bound0203, %bound1204 - %conflict.rdx206 = or i1 %found.conflict202, %found.conflict205 - %bound0208 = icmp ult float* %arrayidx9.i.i.3, %scevgep186 - %bound1209 = icmp ugt i8* %uglygep194, %scevgep184185 - %found.conflict210 = and i1 %bound0208, %bound1209 - %conflict.rdx211 = or i1 %conflict.rdx206, %found.conflict210 - %bound0212 = icmp ult float* %scevgep195, %scevgep186 - %bound1213 = icmp ult float* %scevgep184, %scevgep197 - %found.conflict214 = and i1 %bound0212, %bound1213 - %conflict.rdx215 = or i1 %conflict.rdx211, %found.conflict214 - br i1 %conflict.rdx215, label %pregion_for_entry.entry.i.i.us.3.preheader, label %vector.ph218 - -vector.ph218: ; preds = %vector.memcheck217 - %broadcast.splatinsert225 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat226 = shufflevector <8 x i64> %broadcast.splatinsert225, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert227 = insertelement <8 x i32> undef, i32 %27, i32 0 - %broadcast.splat228 = shufflevector <8 x i32> %broadcast.splatinsert227, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert229 = insertelement <8 x float*> undef, float* %arrayidx.i.i.3, i32 0 - %broadcast.splat230 = shufflevector <8 x float*> %broadcast.splatinsert229, <8 x float*> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert233 = insertelement <8 x float*> undef, float* %arrayidx9.i.i.3, i32 0 - %broadcast.splat234 = shufflevector <8 x float*> %broadcast.splatinsert233, <8 x float*> undef, <8 x i32> zeroinitializer - %340 = or <8 x i64> %broadcast.splat226, - %341 = trunc <8 x i64> %340 to <8 x i32> - %342 = icmp sgt <8 x i32> %broadcast.splat228, %341 - %wide.masked.gather231 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat230, i32 4, <8 x i1> %342, <8 x float> undef), !tbaa !12, !alias.scope !150, !noalias !153 - %343 = extractelement <8 x i64> %340, i32 0 - %344 = shl i64 %343, 32 - %345 = ashr exact i64 %344, 32 - %346 = getelementptr inbounds float, float* %11, i64 %345 - %347 = bitcast float* %346 to <8 x float>* - %wide.masked.load232 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %347, i32 4, <8 x i1> %342, <8 x float> undef), !tbaa !12, !alias.scope !155, !noalias !153 - %wide.masked.gather235 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat234, i32 4, <8 x i1> %342, <8 x float> undef), !tbaa !12, !alias.scope !157, !noalias !153 - %348 = getelementptr inbounds float, float* %15, i64 %345 - %349 = bitcast float* %348 to <8 x float>* - %wide.masked.load236 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %349, i32 4, <8 x i1> %342, <8 x float> undef), !tbaa !12, !alias.scope !159, !noalias !153 - %350 = fmul <8 x float> %wide.masked.gather235, %wide.masked.load236 - %351 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather231, <8 x float> %wide.masked.load232, <8 x float> %350) - %352 = extractelement <8 x i32> %341, i32 0 - %353 = add nsw i32 %mul.i.i.3, %352 - %354 = sext i32 %353 to i64 - %355 = getelementptr inbounds float, float* %7, i64 %354 - %356 = bitcast float* %355 to <8 x float>* - %wide.masked.load237 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %356, i32 4, <8 x i1> %342, <8 x float> undef), !tbaa !12, !alias.scope !153 - %357 = fadd <8 x float> %wide.masked.load237, %351 - %358 = bitcast float* %355 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %357, <8 x float>* %358, i32 4, <8 x i1> %342), !tbaa !12, !alias.scope !153, !llvm.access.group !27 - %359 = or <8 x i64> %broadcast.splat226, - %360 = trunc <8 x i64> %359 to <8 x i32> - %361 = icmp sgt <8 x i32> %broadcast.splat228, %360 - %wide.masked.gather231.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat230, i32 4, <8 x i1> %361, <8 x float> undef), !tbaa !12, !alias.scope !150, !noalias !153 - %362 = extractelement <8 x i64> %359, i32 0 - %363 = shl i64 %362, 32 - %364 = ashr exact i64 %363, 32 - %365 = getelementptr inbounds float, float* %11, i64 %364 - %366 = bitcast float* %365 to <8 x float>* - %wide.masked.load232.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %366, i32 4, <8 x i1> %361, <8 x float> undef), !tbaa !12, !alias.scope !155, !noalias !153 - %wide.masked.gather235.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat234, i32 4, <8 x i1> %361, <8 x float> undef), !tbaa !12, !alias.scope !157, !noalias !153 - %367 = getelementptr inbounds float, float* %15, i64 %364 - %368 = bitcast float* %367 to <8 x float>* - %wide.masked.load236.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %368, i32 4, <8 x i1> %361, <8 x float> undef), !tbaa !12, !alias.scope !159, !noalias !153 - %369 = fmul <8 x float> %wide.masked.gather235.1, %wide.masked.load236.1 - %370 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather231.1, <8 x float> %wide.masked.load232.1, <8 x float> %369) - %371 = extractelement <8 x i32> %360, i32 0 - %372 = add nsw i32 %mul.i.i.3, %371 - %373 = sext i32 %372 to i64 - %374 = getelementptr inbounds float, float* %7, i64 %373 - %375 = bitcast float* %374 to <8 x float>* - %wide.masked.load237.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %375, i32 4, <8 x i1> %361, <8 x float> undef), !tbaa !12, !alias.scope !153 - %376 = fadd <8 x float> %wide.masked.load237.1, %370 - %377 = bitcast float* %374 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %376, <8 x float>* %377, i32 4, <8 x i1> %361), !tbaa !12, !alias.scope !153, !llvm.access.group !27 - %378 = or <8 x i64> %broadcast.splat226, - %379 = trunc <8 x i64> %378 to <8 x i32> - %380 = icmp sgt <8 x i32> %broadcast.splat228, %379 - %wide.masked.gather231.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat230, i32 4, <8 x i1> %380, <8 x float> undef), !tbaa !12, !alias.scope !150, !noalias !153 - %381 = extractelement <8 x i64> %378, i32 0 - %382 = shl i64 %381, 32 - %383 = ashr exact i64 %382, 32 - %384 = getelementptr inbounds float, float* %11, i64 %383 - %385 = bitcast float* %384 to <8 x float>* - %wide.masked.load232.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %385, i32 4, <8 x i1> %380, <8 x float> undef), !tbaa !12, !alias.scope !155, !noalias !153 - %wide.masked.gather235.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat234, i32 4, <8 x i1> %380, <8 x float> undef), !tbaa !12, !alias.scope !157, !noalias !153 - %386 = getelementptr inbounds float, float* %15, i64 %383 - %387 = bitcast float* %386 to <8 x float>* - %wide.masked.load236.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %387, i32 4, <8 x i1> %380, <8 x float> undef), !tbaa !12, !alias.scope !159, !noalias !153 - %388 = fmul <8 x float> %wide.masked.gather235.2, %wide.masked.load236.2 - %389 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather231.2, <8 x float> %wide.masked.load232.2, <8 x float> %388) - %390 = extractelement <8 x i32> %379, i32 0 - %391 = add nsw i32 %mul.i.i.3, %390 - %392 = sext i32 %391 to i64 - %393 = getelementptr inbounds float, float* %7, i64 %392 - %394 = bitcast float* %393 to <8 x float>* - %wide.masked.load237.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %394, i32 4, <8 x i1> %380, <8 x float> undef), !tbaa !12, !alias.scope !153 - %395 = fadd <8 x float> %wide.masked.load237.2, %389 - %396 = bitcast float* %393 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %395, <8 x float>* %396, i32 4, <8 x i1> %380), !tbaa !12, !alias.scope !153, !llvm.access.group !27 - %397 = or <8 x i64> %broadcast.splat226, - %398 = trunc <8 x i64> %397 to <8 x i32> - %399 = icmp sgt <8 x i32> %broadcast.splat228, %398 - %wide.masked.gather231.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat230, i32 4, <8 x i1> %399, <8 x float> undef), !tbaa !12, !alias.scope !150, !noalias !153 - %400 = extractelement <8 x i64> %397, i32 0 - %401 = shl i64 %400, 32 - %402 = ashr exact i64 %401, 32 - %403 = getelementptr inbounds float, float* %11, i64 %402 - %404 = bitcast float* %403 to <8 x float>* - %wide.masked.load232.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %404, i32 4, <8 x i1> %399, <8 x float> undef), !tbaa !12, !alias.scope !155, !noalias !153 - %wide.masked.gather235.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat234, i32 4, <8 x i1> %399, <8 x float> undef), !tbaa !12, !alias.scope !157, !noalias !153 - %405 = getelementptr inbounds float, float* %15, i64 %402 - %406 = bitcast float* %405 to <8 x float>* - %wide.masked.load236.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %406, i32 4, <8 x i1> %399, <8 x float> undef), !tbaa !12, !alias.scope !159, !noalias !153 - %407 = fmul <8 x float> %wide.masked.gather235.3, %wide.masked.load236.3 - %408 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather231.3, <8 x float> %wide.masked.load232.3, <8 x float> %407) - %409 = extractelement <8 x i32> %398, i32 0 - %410 = add nsw i32 %mul.i.i.3, %409 - %411 = sext i32 %410 to i64 - %412 = getelementptr inbounds float, float* %7, i64 %411 - %413 = bitcast float* %412 to <8 x float>* - %wide.masked.load237.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %413, i32 4, <8 x i1> %399, <8 x float> undef), !tbaa !12, !alias.scope !153 - %414 = fadd <8 x float> %wide.masked.load237.3, %408 - %415 = bitcast float* %412 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %414, <8 x float>* %415, i32 4, <8 x i1> %399), !tbaa !12, !alias.scope !153, !llvm.access.group !27 - br label %pregion_for_end.i.i.3 - -pregion_for_entry.entry.i.i.us.3: ; preds = %if.end.r_exit.i.i.us.3.1, %pregion_for_entry.entry.i.i.us.3.preheader - %_local_id_x.i.0.us.3 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.3.preheader ], [ %849, %if.end.r_exit.i.i.us.3.1 ] - %add1.i.i.i.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.3, %mul.i.i.i - %conv.i.i.us.3 = trunc i64 %add1.i.i.i.us.3 to i32 - %cmp4.i.i.us.3 = icmp sgt i32 %27, %conv.i.i.us.3 - br i1 %cmp4.i.i.us.3, label %if.then.i.i.us.3, label %if.end.r_exit.i.i.us.3 - -if.then.i.i.us.3: ; preds = %pregion_for_entry.entry.i.i.us.3 - %416 = load float, float* %arrayidx.i.i.3, align 4, !tbaa !12 - %sext26.i.i.us.3 = shl i64 %add1.i.i.i.us.3, 32 - %idxprom6.i.i.us.3 = ashr exact i64 %sext26.i.i.us.3, 32 - %arrayidx7.i.i.us.3 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.3 - %417 = load float, float* %arrayidx7.i.i.us.3, align 4, !tbaa !12 - %418 = load float, float* %arrayidx9.i.i.3, align 4, !tbaa !12 - %arrayidx11.i.i.us.3 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.3 - %419 = load float, float* %arrayidx11.i.i.us.3, align 4, !tbaa !12 - %mul12.i.i.us.3 = fmul float %418, %419 - %420 = tail call float @llvm.fmuladd.f32(float %416, float %417, float %mul12.i.i.us.3) #6 - %add.i.i.us.3 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3 - %idxprom13.i.i.us.3 = sext i32 %add.i.i.us.3 to i64 - %arrayidx14.i.i.us.3 = getelementptr inbounds float, float* %7, i64 %idxprom13.i.i.us.3 - %421 = load float, float* %arrayidx14.i.i.us.3, align 4, !tbaa !12 - %add15.i.i.us.3 = fadd float %421, %420 - store float %add15.i.i.us.3, float* %arrayidx14.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.3 - -if.end.r_exit.i.i.us.3: ; preds = %if.then.i.i.us.3, %pregion_for_entry.entry.i.i.us.3 - %422 = or i64 %_local_id_x.i.0.us.3, 1 - %add1.i.i.i.us.3.1 = add nuw nsw i64 %422, %mul.i.i.i - %conv.i.i.us.3.1 = trunc i64 %add1.i.i.i.us.3.1 to i32 - %cmp4.i.i.us.3.1 = icmp sgt i32 %27, %conv.i.i.us.3.1 - br i1 %cmp4.i.i.us.3.1, label %if.then.i.i.us.3.1, label %if.end.r_exit.i.i.us.3.1 - -pregion_for_end.i.i.3.loopexit: ; preds = %if.end.r_exit.i.i.us.3.1 - br label %pregion_for_end.i.i.3 - -pregion_for_end.i.i.3: ; preds = %pregion_for_end.i.i.3.loopexit, %vector.ph218, %pregion_for_end.i.i.2 - %add6.i.i.i.4 = or i64 %mul3.i.i.i, 4 - %conv2.i.i.4 = trunc i64 %add6.i.i.i.4 to i32 - %cmp.i.i.4 = icmp sgt i32 %27, %conv2.i.i.4 - %sext.i.i.4 = shl i64 %add6.i.i.i.4, 32 - %idxprom.i.i.4 = ashr exact i64 %sext.i.i.4, 32 - %arrayidx.i.i.4 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.4 - %arrayidx9.i.i.4 = getelementptr inbounds float, float* %23, i64 %idxprom.i.i.4 - %mul.i.i.4 = mul nsw i32 %27, %conv2.i.i.4 - br i1 %cmp.i.i.4, label %vector.scevcheck245, label %pregion_for_end.i.i.4 - -vector.scevcheck245: ; preds = %pregion_for_end.i.i.3 - %423 = mul i32 %27, %conv2.i.i.4 - %424 = trunc i64 %2 to i32 - %425 = shl i32 %424, 5 - %426 = add i32 %423, %425 - %427 = icmp sgt i32 %426, 2147483616 - br i1 %427, label %pregion_for_entry.entry.i.i.us.4.preheader, label %vector.memcheck283 - -pregion_for_entry.entry.i.i.us.4.preheader: ; preds = %vector.memcheck283, %vector.scevcheck245 - br label %pregion_for_entry.entry.i.i.us.4 - -vector.memcheck283: ; preds = %vector.scevcheck245 - %sext504 = shl i64 %3, 35 - %428 = ashr exact i64 %sext504, 32 - %429 = or i64 %428, 4 - %scevgep247 = getelementptr float, float* %19, i64 %429 - %scevgep247248 = bitcast float* %scevgep247 to i8* - %uglygep249 = getelementptr i8, i8* %scevgep247248, i64 1 - %430 = mul i32 %27, %conv2.i.i.4 - %431 = trunc i64 %2 to i32 - %432 = shl i32 %431, 5 - %433 = add i32 %430, %432 - %434 = sext i32 %433 to i64 - %scevgep250 = getelementptr float, float* %7, i64 %434 - %scevgep250251 = bitcast float* %scevgep250 to i8* - %435 = add nsw i64 %434, 32 - %scevgep252 = getelementptr float, float* %7, i64 %435 - %436 = sext i32 %432 to i64 - %scevgep254 = getelementptr float, float* %11, i64 %436 - %437 = add nsw i64 %436, 32 - %scevgep256 = getelementptr float, float* %11, i64 %437 - %scevgep258 = getelementptr float, float* %23, i64 %429 - %scevgep258259 = bitcast float* %scevgep258 to i8* - %uglygep260 = getelementptr i8, i8* %scevgep258259, i64 1 - %scevgep261 = getelementptr float, float* %15, i64 %436 - %scevgep263 = getelementptr float, float* %15, i64 %437 - %bound0266 = icmp ult float* %arrayidx.i.i.4, %scevgep252 - %bound1267 = icmp ugt i8* %uglygep249, %scevgep250251 - %found.conflict268 = and i1 %bound0266, %bound1267 - %bound0269 = icmp ult float* %scevgep254, %scevgep252 - %bound1270 = icmp ult float* %scevgep250, %scevgep256 - %found.conflict271 = and i1 %bound0269, %bound1270 - %conflict.rdx272 = or i1 %found.conflict268, %found.conflict271 - %bound0274 = icmp ult float* %arrayidx9.i.i.4, %scevgep252 - %bound1275 = icmp ugt i8* %uglygep260, %scevgep250251 - %found.conflict276 = and i1 %bound0274, %bound1275 - %conflict.rdx277 = or i1 %conflict.rdx272, %found.conflict276 - %bound0278 = icmp ult float* %scevgep261, %scevgep252 - %bound1279 = icmp ult float* %scevgep250, %scevgep263 - %found.conflict280 = and i1 %bound0278, %bound1279 - %conflict.rdx281 = or i1 %conflict.rdx277, %found.conflict280 - br i1 %conflict.rdx281, label %pregion_for_entry.entry.i.i.us.4.preheader, label %vector.ph284 - -vector.ph284: ; preds = %vector.memcheck283 - %broadcast.splatinsert291 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat292 = shufflevector <8 x i64> %broadcast.splatinsert291, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert293 = insertelement <8 x i32> undef, i32 %27, i32 0 - %broadcast.splat294 = shufflevector <8 x i32> %broadcast.splatinsert293, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert295 = insertelement <8 x float*> undef, float* %arrayidx.i.i.4, i32 0 - %broadcast.splat296 = shufflevector <8 x float*> %broadcast.splatinsert295, <8 x float*> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert299 = insertelement <8 x float*> undef, float* %arrayidx9.i.i.4, i32 0 - %broadcast.splat300 = shufflevector <8 x float*> %broadcast.splatinsert299, <8 x float*> undef, <8 x i32> zeroinitializer - %438 = or <8 x i64> %broadcast.splat292, - %439 = trunc <8 x i64> %438 to <8 x i32> - %440 = icmp sgt <8 x i32> %broadcast.splat294, %439 - %wide.masked.gather297 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat296, i32 4, <8 x i1> %440, <8 x float> undef), !tbaa !12, !alias.scope !161, !noalias !164 - %441 = extractelement <8 x i64> %438, i32 0 - %442 = shl i64 %441, 32 - %443 = ashr exact i64 %442, 32 - %444 = getelementptr inbounds float, float* %11, i64 %443 - %445 = bitcast float* %444 to <8 x float>* - %wide.masked.load298 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %445, i32 4, <8 x i1> %440, <8 x float> undef), !tbaa !12, !alias.scope !166, !noalias !164 - %wide.masked.gather301 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat300, i32 4, <8 x i1> %440, <8 x float> undef), !tbaa !12, !alias.scope !168, !noalias !164 - %446 = getelementptr inbounds float, float* %15, i64 %443 - %447 = bitcast float* %446 to <8 x float>* - %wide.masked.load302 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %447, i32 4, <8 x i1> %440, <8 x float> undef), !tbaa !12, !alias.scope !170, !noalias !164 - %448 = fmul <8 x float> %wide.masked.gather301, %wide.masked.load302 - %449 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather297, <8 x float> %wide.masked.load298, <8 x float> %448) - %450 = extractelement <8 x i32> %439, i32 0 - %451 = add nsw i32 %mul.i.i.4, %450 - %452 = sext i32 %451 to i64 - %453 = getelementptr inbounds float, float* %7, i64 %452 - %454 = bitcast float* %453 to <8 x float>* - %wide.masked.load303 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %454, i32 4, <8 x i1> %440, <8 x float> undef), !tbaa !12, !alias.scope !164 - %455 = fadd <8 x float> %wide.masked.load303, %449 - %456 = bitcast float* %453 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %455, <8 x float>* %456, i32 4, <8 x i1> %440), !tbaa !12, !alias.scope !164, !llvm.access.group !27 - %457 = or <8 x i64> %broadcast.splat292, - %458 = trunc <8 x i64> %457 to <8 x i32> - %459 = icmp sgt <8 x i32> %broadcast.splat294, %458 - %wide.masked.gather297.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat296, i32 4, <8 x i1> %459, <8 x float> undef), !tbaa !12, !alias.scope !161, !noalias !164 - %460 = extractelement <8 x i64> %457, i32 0 - %461 = shl i64 %460, 32 - %462 = ashr exact i64 %461, 32 - %463 = getelementptr inbounds float, float* %11, i64 %462 - %464 = bitcast float* %463 to <8 x float>* - %wide.masked.load298.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %464, i32 4, <8 x i1> %459, <8 x float> undef), !tbaa !12, !alias.scope !166, !noalias !164 - %wide.masked.gather301.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat300, i32 4, <8 x i1> %459, <8 x float> undef), !tbaa !12, !alias.scope !168, !noalias !164 - %465 = getelementptr inbounds float, float* %15, i64 %462 - %466 = bitcast float* %465 to <8 x float>* - %wide.masked.load302.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %466, i32 4, <8 x i1> %459, <8 x float> undef), !tbaa !12, !alias.scope !170, !noalias !164 - %467 = fmul <8 x float> %wide.masked.gather301.1, %wide.masked.load302.1 - %468 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather297.1, <8 x float> %wide.masked.load298.1, <8 x float> %467) - %469 = extractelement <8 x i32> %458, i32 0 - %470 = add nsw i32 %mul.i.i.4, %469 - %471 = sext i32 %470 to i64 - %472 = getelementptr inbounds float, float* %7, i64 %471 - %473 = bitcast float* %472 to <8 x float>* - %wide.masked.load303.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %473, i32 4, <8 x i1> %459, <8 x float> undef), !tbaa !12, !alias.scope !164 - %474 = fadd <8 x float> %wide.masked.load303.1, %468 - %475 = bitcast float* %472 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %474, <8 x float>* %475, i32 4, <8 x i1> %459), !tbaa !12, !alias.scope !164, !llvm.access.group !27 - %476 = or <8 x i64> %broadcast.splat292, - %477 = trunc <8 x i64> %476 to <8 x i32> - %478 = icmp sgt <8 x i32> %broadcast.splat294, %477 - %wide.masked.gather297.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat296, i32 4, <8 x i1> %478, <8 x float> undef), !tbaa !12, !alias.scope !161, !noalias !164 - %479 = extractelement <8 x i64> %476, i32 0 - %480 = shl i64 %479, 32 - %481 = ashr exact i64 %480, 32 - %482 = getelementptr inbounds float, float* %11, i64 %481 - %483 = bitcast float* %482 to <8 x float>* - %wide.masked.load298.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %483, i32 4, <8 x i1> %478, <8 x float> undef), !tbaa !12, !alias.scope !166, !noalias !164 - %wide.masked.gather301.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat300, i32 4, <8 x i1> %478, <8 x float> undef), !tbaa !12, !alias.scope !168, !noalias !164 - %484 = getelementptr inbounds float, float* %15, i64 %481 - %485 = bitcast float* %484 to <8 x float>* - %wide.masked.load302.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %485, i32 4, <8 x i1> %478, <8 x float> undef), !tbaa !12, !alias.scope !170, !noalias !164 - %486 = fmul <8 x float> %wide.masked.gather301.2, %wide.masked.load302.2 - %487 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather297.2, <8 x float> %wide.masked.load298.2, <8 x float> %486) - %488 = extractelement <8 x i32> %477, i32 0 - %489 = add nsw i32 %mul.i.i.4, %488 - %490 = sext i32 %489 to i64 - %491 = getelementptr inbounds float, float* %7, i64 %490 - %492 = bitcast float* %491 to <8 x float>* - %wide.masked.load303.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %492, i32 4, <8 x i1> %478, <8 x float> undef), !tbaa !12, !alias.scope !164 - %493 = fadd <8 x float> %wide.masked.load303.2, %487 - %494 = bitcast float* %491 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %493, <8 x float>* %494, i32 4, <8 x i1> %478), !tbaa !12, !alias.scope !164, !llvm.access.group !27 - %495 = or <8 x i64> %broadcast.splat292, - %496 = trunc <8 x i64> %495 to <8 x i32> - %497 = icmp sgt <8 x i32> %broadcast.splat294, %496 - %wide.masked.gather297.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat296, i32 4, <8 x i1> %497, <8 x float> undef), !tbaa !12, !alias.scope !161, !noalias !164 - %498 = extractelement <8 x i64> %495, i32 0 - %499 = shl i64 %498, 32 - %500 = ashr exact i64 %499, 32 - %501 = getelementptr inbounds float, float* %11, i64 %500 - %502 = bitcast float* %501 to <8 x float>* - %wide.masked.load298.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %502, i32 4, <8 x i1> %497, <8 x float> undef), !tbaa !12, !alias.scope !166, !noalias !164 - %wide.masked.gather301.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat300, i32 4, <8 x i1> %497, <8 x float> undef), !tbaa !12, !alias.scope !168, !noalias !164 - %503 = getelementptr inbounds float, float* %15, i64 %500 - %504 = bitcast float* %503 to <8 x float>* - %wide.masked.load302.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %504, i32 4, <8 x i1> %497, <8 x float> undef), !tbaa !12, !alias.scope !170, !noalias !164 - %505 = fmul <8 x float> %wide.masked.gather301.3, %wide.masked.load302.3 - %506 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather297.3, <8 x float> %wide.masked.load298.3, <8 x float> %505) - %507 = extractelement <8 x i32> %496, i32 0 - %508 = add nsw i32 %mul.i.i.4, %507 - %509 = sext i32 %508 to i64 - %510 = getelementptr inbounds float, float* %7, i64 %509 - %511 = bitcast float* %510 to <8 x float>* - %wide.masked.load303.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %511, i32 4, <8 x i1> %497, <8 x float> undef), !tbaa !12, !alias.scope !164 - %512 = fadd <8 x float> %wide.masked.load303.3, %506 - %513 = bitcast float* %510 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %512, <8 x float>* %513, i32 4, <8 x i1> %497), !tbaa !12, !alias.scope !164, !llvm.access.group !27 - br label %pregion_for_end.i.i.4 - -pregion_for_entry.entry.i.i.us.4: ; preds = %if.end.r_exit.i.i.us.4.1, %pregion_for_entry.entry.i.i.us.4.preheader - %_local_id_x.i.0.us.4 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.4.preheader ], [ %842, %if.end.r_exit.i.i.us.4.1 ] - %add1.i.i.i.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.4, %mul.i.i.i - %conv.i.i.us.4 = trunc i64 %add1.i.i.i.us.4 to i32 - %cmp4.i.i.us.4 = icmp sgt i32 %27, %conv.i.i.us.4 - br i1 %cmp4.i.i.us.4, label %if.then.i.i.us.4, label %if.end.r_exit.i.i.us.4 - -if.then.i.i.us.4: ; preds = %pregion_for_entry.entry.i.i.us.4 - %514 = load float, float* %arrayidx.i.i.4, align 4, !tbaa !12 - %sext26.i.i.us.4 = shl i64 %add1.i.i.i.us.4, 32 - %idxprom6.i.i.us.4 = ashr exact i64 %sext26.i.i.us.4, 32 - %arrayidx7.i.i.us.4 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.4 - %515 = load float, float* %arrayidx7.i.i.us.4, align 4, !tbaa !12 - %516 = load float, float* %arrayidx9.i.i.4, align 4, !tbaa !12 - %arrayidx11.i.i.us.4 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.4 - %517 = load float, float* %arrayidx11.i.i.us.4, align 4, !tbaa !12 - %mul12.i.i.us.4 = fmul float %516, %517 - %518 = tail call float @llvm.fmuladd.f32(float %514, float %515, float %mul12.i.i.us.4) #6 - %add.i.i.us.4 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4 - %idxprom13.i.i.us.4 = sext i32 %add.i.i.us.4 to i64 - %arrayidx14.i.i.us.4 = getelementptr inbounds float, float* %7, i64 %idxprom13.i.i.us.4 - %519 = load float, float* %arrayidx14.i.i.us.4, align 4, !tbaa !12 - %add15.i.i.us.4 = fadd float %519, %518 - store float %add15.i.i.us.4, float* %arrayidx14.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.4 - -if.end.r_exit.i.i.us.4: ; preds = %if.then.i.i.us.4, %pregion_for_entry.entry.i.i.us.4 - %520 = or i64 %_local_id_x.i.0.us.4, 1 - %add1.i.i.i.us.4.1 = add nuw nsw i64 %520, %mul.i.i.i - %conv.i.i.us.4.1 = trunc i64 %add1.i.i.i.us.4.1 to i32 - %cmp4.i.i.us.4.1 = icmp sgt i32 %27, %conv.i.i.us.4.1 - br i1 %cmp4.i.i.us.4.1, label %if.then.i.i.us.4.1, label %if.end.r_exit.i.i.us.4.1 - -pregion_for_end.i.i.4.loopexit: ; preds = %if.end.r_exit.i.i.us.4.1 - br label %pregion_for_end.i.i.4 - -pregion_for_end.i.i.4: ; preds = %pregion_for_end.i.i.4.loopexit, %vector.ph284, %pregion_for_end.i.i.3 - %add6.i.i.i.5 = or i64 %mul3.i.i.i, 5 - %conv2.i.i.5 = trunc i64 %add6.i.i.i.5 to i32 - %cmp.i.i.5 = icmp sgt i32 %27, %conv2.i.i.5 - %sext.i.i.5 = shl i64 %add6.i.i.i.5, 32 - %idxprom.i.i.5 = ashr exact i64 %sext.i.i.5, 32 - %arrayidx.i.i.5 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.5 - %arrayidx9.i.i.5 = getelementptr inbounds float, float* %23, i64 %idxprom.i.i.5 - %mul.i.i.5 = mul nsw i32 %27, %conv2.i.i.5 - br i1 %cmp.i.i.5, label %vector.scevcheck311, label %pregion_for_end.i.i.5 - -vector.scevcheck311: ; preds = %pregion_for_end.i.i.4 - %521 = mul i32 %27, %conv2.i.i.5 - %522 = trunc i64 %2 to i32 - %523 = shl i32 %522, 5 - %524 = add i32 %521, %523 - %525 = icmp sgt i32 %524, 2147483616 - br i1 %525, label %pregion_for_entry.entry.i.i.us.5.preheader, label %vector.memcheck349 - -pregion_for_entry.entry.i.i.us.5.preheader: ; preds = %vector.memcheck349, %vector.scevcheck311 - br label %pregion_for_entry.entry.i.i.us.5 - -vector.memcheck349: ; preds = %vector.scevcheck311 - %sext503 = shl i64 %3, 35 - %526 = ashr exact i64 %sext503, 32 - %527 = or i64 %526, 5 - %scevgep313 = getelementptr float, float* %19, i64 %527 - %scevgep313314 = bitcast float* %scevgep313 to i8* - %uglygep315 = getelementptr i8, i8* %scevgep313314, i64 1 - %528 = mul i32 %27, %conv2.i.i.5 - %529 = trunc i64 %2 to i32 - %530 = shl i32 %529, 5 - %531 = add i32 %528, %530 - %532 = sext i32 %531 to i64 - %scevgep316 = getelementptr float, float* %7, i64 %532 - %scevgep316317 = bitcast float* %scevgep316 to i8* - %533 = add nsw i64 %532, 32 - %scevgep318 = getelementptr float, float* %7, i64 %533 - %534 = sext i32 %530 to i64 - %scevgep320 = getelementptr float, float* %11, i64 %534 - %535 = add nsw i64 %534, 32 - %scevgep322 = getelementptr float, float* %11, i64 %535 - %scevgep324 = getelementptr float, float* %23, i64 %527 - %scevgep324325 = bitcast float* %scevgep324 to i8* - %uglygep326 = getelementptr i8, i8* %scevgep324325, i64 1 - %scevgep327 = getelementptr float, float* %15, i64 %534 - %scevgep329 = getelementptr float, float* %15, i64 %535 - %bound0332 = icmp ult float* %arrayidx.i.i.5, %scevgep318 - %bound1333 = icmp ugt i8* %uglygep315, %scevgep316317 - %found.conflict334 = and i1 %bound0332, %bound1333 - %bound0335 = icmp ult float* %scevgep320, %scevgep318 - %bound1336 = icmp ult float* %scevgep316, %scevgep322 - %found.conflict337 = and i1 %bound0335, %bound1336 - %conflict.rdx338 = or i1 %found.conflict334, %found.conflict337 - %bound0340 = icmp ult float* %arrayidx9.i.i.5, %scevgep318 - %bound1341 = icmp ugt i8* %uglygep326, %scevgep316317 - %found.conflict342 = and i1 %bound0340, %bound1341 - %conflict.rdx343 = or i1 %conflict.rdx338, %found.conflict342 - %bound0344 = icmp ult float* %scevgep327, %scevgep318 - %bound1345 = icmp ult float* %scevgep316, %scevgep329 - %found.conflict346 = and i1 %bound0344, %bound1345 - %conflict.rdx347 = or i1 %conflict.rdx343, %found.conflict346 - br i1 %conflict.rdx347, label %pregion_for_entry.entry.i.i.us.5.preheader, label %vector.ph350 - -vector.ph350: ; preds = %vector.memcheck349 - %broadcast.splatinsert357 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat358 = shufflevector <8 x i64> %broadcast.splatinsert357, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert359 = insertelement <8 x i32> undef, i32 %27, i32 0 - %broadcast.splat360 = shufflevector <8 x i32> %broadcast.splatinsert359, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert361 = insertelement <8 x float*> undef, float* %arrayidx.i.i.5, i32 0 - %broadcast.splat362 = shufflevector <8 x float*> %broadcast.splatinsert361, <8 x float*> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert365 = insertelement <8 x float*> undef, float* %arrayidx9.i.i.5, i32 0 - %broadcast.splat366 = shufflevector <8 x float*> %broadcast.splatinsert365, <8 x float*> undef, <8 x i32> zeroinitializer - %536 = or <8 x i64> %broadcast.splat358, - %537 = trunc <8 x i64> %536 to <8 x i32> - %538 = icmp sgt <8 x i32> %broadcast.splat360, %537 - %wide.masked.gather363 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat362, i32 4, <8 x i1> %538, <8 x float> undef), !tbaa !12, !alias.scope !172, !noalias !175 - %539 = extractelement <8 x i64> %536, i32 0 - %540 = shl i64 %539, 32 - %541 = ashr exact i64 %540, 32 - %542 = getelementptr inbounds float, float* %11, i64 %541 - %543 = bitcast float* %542 to <8 x float>* - %wide.masked.load364 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %543, i32 4, <8 x i1> %538, <8 x float> undef), !tbaa !12, !alias.scope !177, !noalias !175 - %wide.masked.gather367 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat366, i32 4, <8 x i1> %538, <8 x float> undef), !tbaa !12, !alias.scope !179, !noalias !175 - %544 = getelementptr inbounds float, float* %15, i64 %541 - %545 = bitcast float* %544 to <8 x float>* - %wide.masked.load368 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %545, i32 4, <8 x i1> %538, <8 x float> undef), !tbaa !12, !alias.scope !181, !noalias !175 - %546 = fmul <8 x float> %wide.masked.gather367, %wide.masked.load368 - %547 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather363, <8 x float> %wide.masked.load364, <8 x float> %546) - %548 = extractelement <8 x i32> %537, i32 0 - %549 = add nsw i32 %mul.i.i.5, %548 - %550 = sext i32 %549 to i64 - %551 = getelementptr inbounds float, float* %7, i64 %550 - %552 = bitcast float* %551 to <8 x float>* - %wide.masked.load369 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %552, i32 4, <8 x i1> %538, <8 x float> undef), !tbaa !12, !alias.scope !175 - %553 = fadd <8 x float> %wide.masked.load369, %547 - %554 = bitcast float* %551 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %553, <8 x float>* %554, i32 4, <8 x i1> %538), !tbaa !12, !alias.scope !175, !llvm.access.group !27 - %555 = or <8 x i64> %broadcast.splat358, - %556 = trunc <8 x i64> %555 to <8 x i32> - %557 = icmp sgt <8 x i32> %broadcast.splat360, %556 - %wide.masked.gather363.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat362, i32 4, <8 x i1> %557, <8 x float> undef), !tbaa !12, !alias.scope !172, !noalias !175 - %558 = extractelement <8 x i64> %555, i32 0 - %559 = shl i64 %558, 32 - %560 = ashr exact i64 %559, 32 - %561 = getelementptr inbounds float, float* %11, i64 %560 - %562 = bitcast float* %561 to <8 x float>* - %wide.masked.load364.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %562, i32 4, <8 x i1> %557, <8 x float> undef), !tbaa !12, !alias.scope !177, !noalias !175 - %wide.masked.gather367.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat366, i32 4, <8 x i1> %557, <8 x float> undef), !tbaa !12, !alias.scope !179, !noalias !175 - %563 = getelementptr inbounds float, float* %15, i64 %560 - %564 = bitcast float* %563 to <8 x float>* - %wide.masked.load368.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %564, i32 4, <8 x i1> %557, <8 x float> undef), !tbaa !12, !alias.scope !181, !noalias !175 - %565 = fmul <8 x float> %wide.masked.gather367.1, %wide.masked.load368.1 - %566 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather363.1, <8 x float> %wide.masked.load364.1, <8 x float> %565) - %567 = extractelement <8 x i32> %556, i32 0 - %568 = add nsw i32 %mul.i.i.5, %567 - %569 = sext i32 %568 to i64 - %570 = getelementptr inbounds float, float* %7, i64 %569 - %571 = bitcast float* %570 to <8 x float>* - %wide.masked.load369.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %571, i32 4, <8 x i1> %557, <8 x float> undef), !tbaa !12, !alias.scope !175 - %572 = fadd <8 x float> %wide.masked.load369.1, %566 - %573 = bitcast float* %570 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %572, <8 x float>* %573, i32 4, <8 x i1> %557), !tbaa !12, !alias.scope !175, !llvm.access.group !27 - %574 = or <8 x i64> %broadcast.splat358, - %575 = trunc <8 x i64> %574 to <8 x i32> - %576 = icmp sgt <8 x i32> %broadcast.splat360, %575 - %wide.masked.gather363.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat362, i32 4, <8 x i1> %576, <8 x float> undef), !tbaa !12, !alias.scope !172, !noalias !175 - %577 = extractelement <8 x i64> %574, i32 0 - %578 = shl i64 %577, 32 - %579 = ashr exact i64 %578, 32 - %580 = getelementptr inbounds float, float* %11, i64 %579 - %581 = bitcast float* %580 to <8 x float>* - %wide.masked.load364.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %581, i32 4, <8 x i1> %576, <8 x float> undef), !tbaa !12, !alias.scope !177, !noalias !175 - %wide.masked.gather367.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat366, i32 4, <8 x i1> %576, <8 x float> undef), !tbaa !12, !alias.scope !179, !noalias !175 - %582 = getelementptr inbounds float, float* %15, i64 %579 - %583 = bitcast float* %582 to <8 x float>* - %wide.masked.load368.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %583, i32 4, <8 x i1> %576, <8 x float> undef), !tbaa !12, !alias.scope !181, !noalias !175 - %584 = fmul <8 x float> %wide.masked.gather367.2, %wide.masked.load368.2 - %585 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather363.2, <8 x float> %wide.masked.load364.2, <8 x float> %584) - %586 = extractelement <8 x i32> %575, i32 0 - %587 = add nsw i32 %mul.i.i.5, %586 - %588 = sext i32 %587 to i64 - %589 = getelementptr inbounds float, float* %7, i64 %588 - %590 = bitcast float* %589 to <8 x float>* - %wide.masked.load369.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %590, i32 4, <8 x i1> %576, <8 x float> undef), !tbaa !12, !alias.scope !175 - %591 = fadd <8 x float> %wide.masked.load369.2, %585 - %592 = bitcast float* %589 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %591, <8 x float>* %592, i32 4, <8 x i1> %576), !tbaa !12, !alias.scope !175, !llvm.access.group !27 - %593 = or <8 x i64> %broadcast.splat358, - %594 = trunc <8 x i64> %593 to <8 x i32> - %595 = icmp sgt <8 x i32> %broadcast.splat360, %594 - %wide.masked.gather363.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat362, i32 4, <8 x i1> %595, <8 x float> undef), !tbaa !12, !alias.scope !172, !noalias !175 - %596 = extractelement <8 x i64> %593, i32 0 - %597 = shl i64 %596, 32 - %598 = ashr exact i64 %597, 32 - %599 = getelementptr inbounds float, float* %11, i64 %598 - %600 = bitcast float* %599 to <8 x float>* - %wide.masked.load364.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %600, i32 4, <8 x i1> %595, <8 x float> undef), !tbaa !12, !alias.scope !177, !noalias !175 - %wide.masked.gather367.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat366, i32 4, <8 x i1> %595, <8 x float> undef), !tbaa !12, !alias.scope !179, !noalias !175 - %601 = getelementptr inbounds float, float* %15, i64 %598 - %602 = bitcast float* %601 to <8 x float>* - %wide.masked.load368.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %602, i32 4, <8 x i1> %595, <8 x float> undef), !tbaa !12, !alias.scope !181, !noalias !175 - %603 = fmul <8 x float> %wide.masked.gather367.3, %wide.masked.load368.3 - %604 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather363.3, <8 x float> %wide.masked.load364.3, <8 x float> %603) - %605 = extractelement <8 x i32> %594, i32 0 - %606 = add nsw i32 %mul.i.i.5, %605 - %607 = sext i32 %606 to i64 - %608 = getelementptr inbounds float, float* %7, i64 %607 - %609 = bitcast float* %608 to <8 x float>* - %wide.masked.load369.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %609, i32 4, <8 x i1> %595, <8 x float> undef), !tbaa !12, !alias.scope !175 - %610 = fadd <8 x float> %wide.masked.load369.3, %604 - %611 = bitcast float* %608 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %610, <8 x float>* %611, i32 4, <8 x i1> %595), !tbaa !12, !alias.scope !175, !llvm.access.group !27 - br label %pregion_for_end.i.i.5 - -pregion_for_entry.entry.i.i.us.5: ; preds = %if.end.r_exit.i.i.us.5.1, %pregion_for_entry.entry.i.i.us.5.preheader - %_local_id_x.i.0.us.5 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.5.preheader ], [ %835, %if.end.r_exit.i.i.us.5.1 ] - %add1.i.i.i.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.5, %mul.i.i.i - %conv.i.i.us.5 = trunc i64 %add1.i.i.i.us.5 to i32 - %cmp4.i.i.us.5 = icmp sgt i32 %27, %conv.i.i.us.5 - br i1 %cmp4.i.i.us.5, label %if.then.i.i.us.5, label %if.end.r_exit.i.i.us.5 - -if.then.i.i.us.5: ; preds = %pregion_for_entry.entry.i.i.us.5 - %612 = load float, float* %arrayidx.i.i.5, align 4, !tbaa !12 - %sext26.i.i.us.5 = shl i64 %add1.i.i.i.us.5, 32 - %idxprom6.i.i.us.5 = ashr exact i64 %sext26.i.i.us.5, 32 - %arrayidx7.i.i.us.5 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.5 - %613 = load float, float* %arrayidx7.i.i.us.5, align 4, !tbaa !12 - %614 = load float, float* %arrayidx9.i.i.5, align 4, !tbaa !12 - %arrayidx11.i.i.us.5 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.5 - %615 = load float, float* %arrayidx11.i.i.us.5, align 4, !tbaa !12 - %mul12.i.i.us.5 = fmul float %614, %615 - %616 = tail call float @llvm.fmuladd.f32(float %612, float %613, float %mul12.i.i.us.5) #6 - %add.i.i.us.5 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5 - %idxprom13.i.i.us.5 = sext i32 %add.i.i.us.5 to i64 - %arrayidx14.i.i.us.5 = getelementptr inbounds float, float* %7, i64 %idxprom13.i.i.us.5 - %617 = load float, float* %arrayidx14.i.i.us.5, align 4, !tbaa !12 - %add15.i.i.us.5 = fadd float %617, %616 - store float %add15.i.i.us.5, float* %arrayidx14.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.5 - -if.end.r_exit.i.i.us.5: ; preds = %if.then.i.i.us.5, %pregion_for_entry.entry.i.i.us.5 - %618 = or i64 %_local_id_x.i.0.us.5, 1 - %add1.i.i.i.us.5.1 = add nuw nsw i64 %618, %mul.i.i.i - %conv.i.i.us.5.1 = trunc i64 %add1.i.i.i.us.5.1 to i32 - %cmp4.i.i.us.5.1 = icmp sgt i32 %27, %conv.i.i.us.5.1 - br i1 %cmp4.i.i.us.5.1, label %if.then.i.i.us.5.1, label %if.end.r_exit.i.i.us.5.1 - -pregion_for_end.i.i.5.loopexit: ; preds = %if.end.r_exit.i.i.us.5.1 - br label %pregion_for_end.i.i.5 - -pregion_for_end.i.i.5: ; preds = %pregion_for_end.i.i.5.loopexit, %vector.ph350, %pregion_for_end.i.i.4 - %add6.i.i.i.6 = or i64 %mul3.i.i.i, 6 - %conv2.i.i.6 = trunc i64 %add6.i.i.i.6 to i32 - %cmp.i.i.6 = icmp sgt i32 %27, %conv2.i.i.6 - %sext.i.i.6 = shl i64 %add6.i.i.i.6, 32 - %idxprom.i.i.6 = ashr exact i64 %sext.i.i.6, 32 - %arrayidx.i.i.6 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.6 - %arrayidx9.i.i.6 = getelementptr inbounds float, float* %23, i64 %idxprom.i.i.6 - %mul.i.i.6 = mul nsw i32 %27, %conv2.i.i.6 - br i1 %cmp.i.i.6, label %vector.scevcheck377, label %pregion_for_end.i.i.6 - -vector.scevcheck377: ; preds = %pregion_for_end.i.i.5 - %619 = mul i32 %27, %conv2.i.i.6 - %620 = trunc i64 %2 to i32 - %621 = shl i32 %620, 5 - %622 = add i32 %619, %621 - %623 = icmp sgt i32 %622, 2147483616 - br i1 %623, label %pregion_for_entry.entry.i.i.us.6.preheader, label %vector.memcheck415 - -pregion_for_entry.entry.i.i.us.6.preheader: ; preds = %vector.memcheck415, %vector.scevcheck377 - br label %pregion_for_entry.entry.i.i.us.6 - -vector.memcheck415: ; preds = %vector.scevcheck377 - %sext502 = shl i64 %3, 35 - %624 = ashr exact i64 %sext502, 32 - %625 = or i64 %624, 6 - %scevgep379 = getelementptr float, float* %19, i64 %625 - %scevgep379380 = bitcast float* %scevgep379 to i8* - %uglygep381 = getelementptr i8, i8* %scevgep379380, i64 1 - %626 = mul i32 %27, %conv2.i.i.6 - %627 = trunc i64 %2 to i32 - %628 = shl i32 %627, 5 - %629 = add i32 %626, %628 - %630 = sext i32 %629 to i64 - %scevgep382 = getelementptr float, float* %7, i64 %630 - %scevgep382383 = bitcast float* %scevgep382 to i8* - %631 = add nsw i64 %630, 32 - %scevgep384 = getelementptr float, float* %7, i64 %631 - %632 = sext i32 %628 to i64 - %scevgep386 = getelementptr float, float* %11, i64 %632 - %633 = add nsw i64 %632, 32 - %scevgep388 = getelementptr float, float* %11, i64 %633 - %scevgep390 = getelementptr float, float* %23, i64 %625 - %scevgep390391 = bitcast float* %scevgep390 to i8* - %uglygep392 = getelementptr i8, i8* %scevgep390391, i64 1 - %scevgep393 = getelementptr float, float* %15, i64 %632 - %scevgep395 = getelementptr float, float* %15, i64 %633 - %bound0398 = icmp ult float* %arrayidx.i.i.6, %scevgep384 - %bound1399 = icmp ugt i8* %uglygep381, %scevgep382383 - %found.conflict400 = and i1 %bound0398, %bound1399 - %bound0401 = icmp ult float* %scevgep386, %scevgep384 - %bound1402 = icmp ult float* %scevgep382, %scevgep388 - %found.conflict403 = and i1 %bound0401, %bound1402 - %conflict.rdx404 = or i1 %found.conflict400, %found.conflict403 - %bound0406 = icmp ult float* %arrayidx9.i.i.6, %scevgep384 - %bound1407 = icmp ugt i8* %uglygep392, %scevgep382383 - %found.conflict408 = and i1 %bound0406, %bound1407 - %conflict.rdx409 = or i1 %conflict.rdx404, %found.conflict408 - %bound0410 = icmp ult float* %scevgep393, %scevgep384 - %bound1411 = icmp ult float* %scevgep382, %scevgep395 - %found.conflict412 = and i1 %bound0410, %bound1411 - %conflict.rdx413 = or i1 %conflict.rdx409, %found.conflict412 - br i1 %conflict.rdx413, label %pregion_for_entry.entry.i.i.us.6.preheader, label %vector.ph416 - -vector.ph416: ; preds = %vector.memcheck415 - %broadcast.splatinsert423 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat424 = shufflevector <8 x i64> %broadcast.splatinsert423, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert425 = insertelement <8 x i32> undef, i32 %27, i32 0 - %broadcast.splat426 = shufflevector <8 x i32> %broadcast.splatinsert425, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert427 = insertelement <8 x float*> undef, float* %arrayidx.i.i.6, i32 0 - %broadcast.splat428 = shufflevector <8 x float*> %broadcast.splatinsert427, <8 x float*> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert431 = insertelement <8 x float*> undef, float* %arrayidx9.i.i.6, i32 0 - %broadcast.splat432 = shufflevector <8 x float*> %broadcast.splatinsert431, <8 x float*> undef, <8 x i32> zeroinitializer - %634 = or <8 x i64> %broadcast.splat424, - %635 = trunc <8 x i64> %634 to <8 x i32> - %636 = icmp sgt <8 x i32> %broadcast.splat426, %635 - %wide.masked.gather429 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat428, i32 4, <8 x i1> %636, <8 x float> undef), !tbaa !12, !alias.scope !183, !noalias !186 - %637 = extractelement <8 x i64> %634, i32 0 - %638 = shl i64 %637, 32 - %639 = ashr exact i64 %638, 32 - %640 = getelementptr inbounds float, float* %11, i64 %639 - %641 = bitcast float* %640 to <8 x float>* - %wide.masked.load430 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %641, i32 4, <8 x i1> %636, <8 x float> undef), !tbaa !12, !alias.scope !188, !noalias !186 - %wide.masked.gather433 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat432, i32 4, <8 x i1> %636, <8 x float> undef), !tbaa !12, !alias.scope !190, !noalias !186 - %642 = getelementptr inbounds float, float* %15, i64 %639 - %643 = bitcast float* %642 to <8 x float>* - %wide.masked.load434 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %643, i32 4, <8 x i1> %636, <8 x float> undef), !tbaa !12, !alias.scope !192, !noalias !186 - %644 = fmul <8 x float> %wide.masked.gather433, %wide.masked.load434 - %645 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather429, <8 x float> %wide.masked.load430, <8 x float> %644) - %646 = extractelement <8 x i32> %635, i32 0 - %647 = add nsw i32 %mul.i.i.6, %646 - %648 = sext i32 %647 to i64 - %649 = getelementptr inbounds float, float* %7, i64 %648 - %650 = bitcast float* %649 to <8 x float>* - %wide.masked.load435 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %650, i32 4, <8 x i1> %636, <8 x float> undef), !tbaa !12, !alias.scope !186 - %651 = fadd <8 x float> %wide.masked.load435, %645 - %652 = bitcast float* %649 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %651, <8 x float>* %652, i32 4, <8 x i1> %636), !tbaa !12, !alias.scope !186, !llvm.access.group !27 - %653 = or <8 x i64> %broadcast.splat424, - %654 = trunc <8 x i64> %653 to <8 x i32> - %655 = icmp sgt <8 x i32> %broadcast.splat426, %654 - %wide.masked.gather429.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat428, i32 4, <8 x i1> %655, <8 x float> undef), !tbaa !12, !alias.scope !183, !noalias !186 - %656 = extractelement <8 x i64> %653, i32 0 - %657 = shl i64 %656, 32 - %658 = ashr exact i64 %657, 32 - %659 = getelementptr inbounds float, float* %11, i64 %658 - %660 = bitcast float* %659 to <8 x float>* - %wide.masked.load430.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %660, i32 4, <8 x i1> %655, <8 x float> undef), !tbaa !12, !alias.scope !188, !noalias !186 - %wide.masked.gather433.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat432, i32 4, <8 x i1> %655, <8 x float> undef), !tbaa !12, !alias.scope !190, !noalias !186 - %661 = getelementptr inbounds float, float* %15, i64 %658 - %662 = bitcast float* %661 to <8 x float>* - %wide.masked.load434.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %662, i32 4, <8 x i1> %655, <8 x float> undef), !tbaa !12, !alias.scope !192, !noalias !186 - %663 = fmul <8 x float> %wide.masked.gather433.1, %wide.masked.load434.1 - %664 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather429.1, <8 x float> %wide.masked.load430.1, <8 x float> %663) - %665 = extractelement <8 x i32> %654, i32 0 - %666 = add nsw i32 %mul.i.i.6, %665 - %667 = sext i32 %666 to i64 - %668 = getelementptr inbounds float, float* %7, i64 %667 - %669 = bitcast float* %668 to <8 x float>* - %wide.masked.load435.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %669, i32 4, <8 x i1> %655, <8 x float> undef), !tbaa !12, !alias.scope !186 - %670 = fadd <8 x float> %wide.masked.load435.1, %664 - %671 = bitcast float* %668 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %670, <8 x float>* %671, i32 4, <8 x i1> %655), !tbaa !12, !alias.scope !186, !llvm.access.group !27 - %672 = or <8 x i64> %broadcast.splat424, - %673 = trunc <8 x i64> %672 to <8 x i32> - %674 = icmp sgt <8 x i32> %broadcast.splat426, %673 - %wide.masked.gather429.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat428, i32 4, <8 x i1> %674, <8 x float> undef), !tbaa !12, !alias.scope !183, !noalias !186 - %675 = extractelement <8 x i64> %672, i32 0 - %676 = shl i64 %675, 32 - %677 = ashr exact i64 %676, 32 - %678 = getelementptr inbounds float, float* %11, i64 %677 - %679 = bitcast float* %678 to <8 x float>* - %wide.masked.load430.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %679, i32 4, <8 x i1> %674, <8 x float> undef), !tbaa !12, !alias.scope !188, !noalias !186 - %wide.masked.gather433.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat432, i32 4, <8 x i1> %674, <8 x float> undef), !tbaa !12, !alias.scope !190, !noalias !186 - %680 = getelementptr inbounds float, float* %15, i64 %677 - %681 = bitcast float* %680 to <8 x float>* - %wide.masked.load434.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %681, i32 4, <8 x i1> %674, <8 x float> undef), !tbaa !12, !alias.scope !192, !noalias !186 - %682 = fmul <8 x float> %wide.masked.gather433.2, %wide.masked.load434.2 - %683 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather429.2, <8 x float> %wide.masked.load430.2, <8 x float> %682) - %684 = extractelement <8 x i32> %673, i32 0 - %685 = add nsw i32 %mul.i.i.6, %684 - %686 = sext i32 %685 to i64 - %687 = getelementptr inbounds float, float* %7, i64 %686 - %688 = bitcast float* %687 to <8 x float>* - %wide.masked.load435.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %688, i32 4, <8 x i1> %674, <8 x float> undef), !tbaa !12, !alias.scope !186 - %689 = fadd <8 x float> %wide.masked.load435.2, %683 - %690 = bitcast float* %687 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %689, <8 x float>* %690, i32 4, <8 x i1> %674), !tbaa !12, !alias.scope !186, !llvm.access.group !27 - %691 = or <8 x i64> %broadcast.splat424, - %692 = trunc <8 x i64> %691 to <8 x i32> - %693 = icmp sgt <8 x i32> %broadcast.splat426, %692 - %wide.masked.gather429.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat428, i32 4, <8 x i1> %693, <8 x float> undef), !tbaa !12, !alias.scope !183, !noalias !186 - %694 = extractelement <8 x i64> %691, i32 0 - %695 = shl i64 %694, 32 - %696 = ashr exact i64 %695, 32 - %697 = getelementptr inbounds float, float* %11, i64 %696 - %698 = bitcast float* %697 to <8 x float>* - %wide.masked.load430.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %698, i32 4, <8 x i1> %693, <8 x float> undef), !tbaa !12, !alias.scope !188, !noalias !186 - %wide.masked.gather433.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat432, i32 4, <8 x i1> %693, <8 x float> undef), !tbaa !12, !alias.scope !190, !noalias !186 - %699 = getelementptr inbounds float, float* %15, i64 %696 - %700 = bitcast float* %699 to <8 x float>* - %wide.masked.load434.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %700, i32 4, <8 x i1> %693, <8 x float> undef), !tbaa !12, !alias.scope !192, !noalias !186 - %701 = fmul <8 x float> %wide.masked.gather433.3, %wide.masked.load434.3 - %702 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather429.3, <8 x float> %wide.masked.load430.3, <8 x float> %701) - %703 = extractelement <8 x i32> %692, i32 0 - %704 = add nsw i32 %mul.i.i.6, %703 - %705 = sext i32 %704 to i64 - %706 = getelementptr inbounds float, float* %7, i64 %705 - %707 = bitcast float* %706 to <8 x float>* - %wide.masked.load435.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %707, i32 4, <8 x i1> %693, <8 x float> undef), !tbaa !12, !alias.scope !186 - %708 = fadd <8 x float> %wide.masked.load435.3, %702 - %709 = bitcast float* %706 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %708, <8 x float>* %709, i32 4, <8 x i1> %693), !tbaa !12, !alias.scope !186, !llvm.access.group !27 - br label %pregion_for_end.i.i.6 - -pregion_for_entry.entry.i.i.us.6: ; preds = %if.end.r_exit.i.i.us.6.1, %pregion_for_entry.entry.i.i.us.6.preheader - %_local_id_x.i.0.us.6 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.6.preheader ], [ %828, %if.end.r_exit.i.i.us.6.1 ] - %add1.i.i.i.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.6, %mul.i.i.i - %conv.i.i.us.6 = trunc i64 %add1.i.i.i.us.6 to i32 - %cmp4.i.i.us.6 = icmp sgt i32 %27, %conv.i.i.us.6 - br i1 %cmp4.i.i.us.6, label %if.then.i.i.us.6, label %if.end.r_exit.i.i.us.6 - -if.then.i.i.us.6: ; preds = %pregion_for_entry.entry.i.i.us.6 - %710 = load float, float* %arrayidx.i.i.6, align 4, !tbaa !12 - %sext26.i.i.us.6 = shl i64 %add1.i.i.i.us.6, 32 - %idxprom6.i.i.us.6 = ashr exact i64 %sext26.i.i.us.6, 32 - %arrayidx7.i.i.us.6 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.6 - %711 = load float, float* %arrayidx7.i.i.us.6, align 4, !tbaa !12 - %712 = load float, float* %arrayidx9.i.i.6, align 4, !tbaa !12 - %arrayidx11.i.i.us.6 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.6 - %713 = load float, float* %arrayidx11.i.i.us.6, align 4, !tbaa !12 - %mul12.i.i.us.6 = fmul float %712, %713 - %714 = tail call float @llvm.fmuladd.f32(float %710, float %711, float %mul12.i.i.us.6) #6 - %add.i.i.us.6 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6 - %idxprom13.i.i.us.6 = sext i32 %add.i.i.us.6 to i64 - %arrayidx14.i.i.us.6 = getelementptr inbounds float, float* %7, i64 %idxprom13.i.i.us.6 - %715 = load float, float* %arrayidx14.i.i.us.6, align 4, !tbaa !12 - %add15.i.i.us.6 = fadd float %715, %714 - store float %add15.i.i.us.6, float* %arrayidx14.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.6 - -if.end.r_exit.i.i.us.6: ; preds = %if.then.i.i.us.6, %pregion_for_entry.entry.i.i.us.6 - %716 = or i64 %_local_id_x.i.0.us.6, 1 - %add1.i.i.i.us.6.1 = add nuw nsw i64 %716, %mul.i.i.i - %conv.i.i.us.6.1 = trunc i64 %add1.i.i.i.us.6.1 to i32 - %cmp4.i.i.us.6.1 = icmp sgt i32 %27, %conv.i.i.us.6.1 - br i1 %cmp4.i.i.us.6.1, label %if.then.i.i.us.6.1, label %if.end.r_exit.i.i.us.6.1 - -pregion_for_end.i.i.6.loopexit: ; preds = %if.end.r_exit.i.i.us.6.1 - br label %pregion_for_end.i.i.6 - -pregion_for_end.i.i.6: ; preds = %pregion_for_end.i.i.6.loopexit, %vector.ph416, %pregion_for_end.i.i.5 - %add6.i.i.i.7 = or i64 %mul3.i.i.i, 7 - %conv2.i.i.7 = trunc i64 %add6.i.i.i.7 to i32 - %cmp.i.i.7 = icmp sgt i32 %27, %conv2.i.i.7 - %sext.i.i.7 = shl i64 %add6.i.i.i.7, 32 - %idxprom.i.i.7 = ashr exact i64 %sext.i.i.7, 32 - %arrayidx.i.i.7 = getelementptr inbounds float, float* %19, i64 %idxprom.i.i.7 - %arrayidx9.i.i.7 = getelementptr inbounds float, float* %23, i64 %idxprom.i.i.7 - %mul.i.i.7 = mul nsw i32 %27, %conv2.i.i.7 - br i1 %cmp.i.i.7, label %vector.scevcheck443, label %pregion_for_end.i.i.7 - -vector.scevcheck443: ; preds = %pregion_for_end.i.i.6 - %717 = mul i32 %27, %conv2.i.i.7 - %718 = trunc i64 %2 to i32 - %719 = shl i32 %718, 5 - %720 = add i32 %717, %719 - %721 = icmp sgt i32 %720, 2147483616 - br i1 %721, label %pregion_for_entry.entry.i.i.us.7.preheader, label %vector.memcheck481 - -pregion_for_entry.entry.i.i.us.7.preheader: ; preds = %vector.memcheck481, %vector.scevcheck443 - br label %pregion_for_entry.entry.i.i.us.7 - -vector.memcheck481: ; preds = %vector.scevcheck443 - %sext = shl i64 %3, 35 - %722 = ashr exact i64 %sext, 32 - %723 = or i64 %722, 7 - %scevgep445 = getelementptr float, float* %19, i64 %723 - %scevgep445446 = bitcast float* %scevgep445 to i8* - %uglygep447 = getelementptr i8, i8* %scevgep445446, i64 1 - %724 = mul i32 %27, %conv2.i.i.7 - %725 = trunc i64 %2 to i32 - %726 = shl i32 %725, 5 - %727 = add i32 %724, %726 - %728 = sext i32 %727 to i64 - %scevgep448 = getelementptr float, float* %7, i64 %728 - %scevgep448449 = bitcast float* %scevgep448 to i8* - %729 = add nsw i64 %728, 32 - %scevgep450 = getelementptr float, float* %7, i64 %729 - %730 = sext i32 %726 to i64 - %scevgep452 = getelementptr float, float* %11, i64 %730 - %731 = add nsw i64 %730, 32 - %scevgep454 = getelementptr float, float* %11, i64 %731 - %scevgep456 = getelementptr float, float* %23, i64 %723 - %scevgep456457 = bitcast float* %scevgep456 to i8* - %uglygep458 = getelementptr i8, i8* %scevgep456457, i64 1 - %scevgep459 = getelementptr float, float* %15, i64 %730 - %scevgep461 = getelementptr float, float* %15, i64 %731 - %bound0464 = icmp ult float* %arrayidx.i.i.7, %scevgep450 - %bound1465 = icmp ugt i8* %uglygep447, %scevgep448449 - %found.conflict466 = and i1 %bound0464, %bound1465 - %bound0467 = icmp ult float* %scevgep452, %scevgep450 - %bound1468 = icmp ult float* %scevgep448, %scevgep454 - %found.conflict469 = and i1 %bound0467, %bound1468 - %conflict.rdx470 = or i1 %found.conflict466, %found.conflict469 - %bound0472 = icmp ult float* %arrayidx9.i.i.7, %scevgep450 - %bound1473 = icmp ugt i8* %uglygep458, %scevgep448449 - %found.conflict474 = and i1 %bound0472, %bound1473 - %conflict.rdx475 = or i1 %conflict.rdx470, %found.conflict474 - %bound0476 = icmp ult float* %scevgep459, %scevgep450 - %bound1477 = icmp ult float* %scevgep448, %scevgep461 - %found.conflict478 = and i1 %bound0476, %bound1477 - %conflict.rdx479 = or i1 %conflict.rdx475, %found.conflict478 - br i1 %conflict.rdx479, label %pregion_for_entry.entry.i.i.us.7.preheader, label %vector.ph482 - -vector.ph482: ; preds = %vector.memcheck481 - %broadcast.splatinsert489 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat490 = shufflevector <8 x i64> %broadcast.splatinsert489, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert491 = insertelement <8 x i32> undef, i32 %27, i32 0 - %broadcast.splat492 = shufflevector <8 x i32> %broadcast.splatinsert491, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert493 = insertelement <8 x float*> undef, float* %arrayidx.i.i.7, i32 0 - %broadcast.splat494 = shufflevector <8 x float*> %broadcast.splatinsert493, <8 x float*> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert497 = insertelement <8 x float*> undef, float* %arrayidx9.i.i.7, i32 0 - %broadcast.splat498 = shufflevector <8 x float*> %broadcast.splatinsert497, <8 x float*> undef, <8 x i32> zeroinitializer - %732 = or <8 x i64> %broadcast.splat490, - %733 = trunc <8 x i64> %732 to <8 x i32> - %734 = icmp sgt <8 x i32> %broadcast.splat492, %733 - %wide.masked.gather495 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat494, i32 4, <8 x i1> %734, <8 x float> undef), !tbaa !12, !alias.scope !194, !noalias !197 - %735 = extractelement <8 x i64> %732, i32 0 - %736 = shl i64 %735, 32 - %737 = ashr exact i64 %736, 32 - %738 = getelementptr inbounds float, float* %11, i64 %737 - %739 = bitcast float* %738 to <8 x float>* - %wide.masked.load496 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %739, i32 4, <8 x i1> %734, <8 x float> undef), !tbaa !12, !alias.scope !199, !noalias !197 - %wide.masked.gather499 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat498, i32 4, <8 x i1> %734, <8 x float> undef), !tbaa !12, !alias.scope !201, !noalias !197 - %740 = getelementptr inbounds float, float* %15, i64 %737 - %741 = bitcast float* %740 to <8 x float>* - %wide.masked.load500 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %741, i32 4, <8 x i1> %734, <8 x float> undef), !tbaa !12, !alias.scope !203, !noalias !197 - %742 = fmul <8 x float> %wide.masked.gather499, %wide.masked.load500 - %743 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather495, <8 x float> %wide.masked.load496, <8 x float> %742) - %744 = extractelement <8 x i32> %733, i32 0 - %745 = add nsw i32 %mul.i.i.7, %744 - %746 = sext i32 %745 to i64 - %747 = getelementptr inbounds float, float* %7, i64 %746 - %748 = bitcast float* %747 to <8 x float>* - %wide.masked.load501 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %748, i32 4, <8 x i1> %734, <8 x float> undef), !tbaa !12, !alias.scope !197 - %749 = fadd <8 x float> %wide.masked.load501, %743 - %750 = bitcast float* %747 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %749, <8 x float>* %750, i32 4, <8 x i1> %734), !tbaa !12, !alias.scope !197, !llvm.access.group !27 - %751 = or <8 x i64> %broadcast.splat490, - %752 = trunc <8 x i64> %751 to <8 x i32> - %753 = icmp sgt <8 x i32> %broadcast.splat492, %752 - %wide.masked.gather495.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat494, i32 4, <8 x i1> %753, <8 x float> undef), !tbaa !12, !alias.scope !194, !noalias !197 - %754 = extractelement <8 x i64> %751, i32 0 - %755 = shl i64 %754, 32 - %756 = ashr exact i64 %755, 32 - %757 = getelementptr inbounds float, float* %11, i64 %756 - %758 = bitcast float* %757 to <8 x float>* - %wide.masked.load496.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %758, i32 4, <8 x i1> %753, <8 x float> undef), !tbaa !12, !alias.scope !199, !noalias !197 - %wide.masked.gather499.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat498, i32 4, <8 x i1> %753, <8 x float> undef), !tbaa !12, !alias.scope !201, !noalias !197 - %759 = getelementptr inbounds float, float* %15, i64 %756 - %760 = bitcast float* %759 to <8 x float>* - %wide.masked.load500.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %760, i32 4, <8 x i1> %753, <8 x float> undef), !tbaa !12, !alias.scope !203, !noalias !197 - %761 = fmul <8 x float> %wide.masked.gather499.1, %wide.masked.load500.1 - %762 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather495.1, <8 x float> %wide.masked.load496.1, <8 x float> %761) - %763 = extractelement <8 x i32> %752, i32 0 - %764 = add nsw i32 %mul.i.i.7, %763 - %765 = sext i32 %764 to i64 - %766 = getelementptr inbounds float, float* %7, i64 %765 - %767 = bitcast float* %766 to <8 x float>* - %wide.masked.load501.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %767, i32 4, <8 x i1> %753, <8 x float> undef), !tbaa !12, !alias.scope !197 - %768 = fadd <8 x float> %wide.masked.load501.1, %762 - %769 = bitcast float* %766 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %768, <8 x float>* %769, i32 4, <8 x i1> %753), !tbaa !12, !alias.scope !197, !llvm.access.group !27 - %770 = or <8 x i64> %broadcast.splat490, - %771 = trunc <8 x i64> %770 to <8 x i32> - %772 = icmp sgt <8 x i32> %broadcast.splat492, %771 - %wide.masked.gather495.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat494, i32 4, <8 x i1> %772, <8 x float> undef), !tbaa !12, !alias.scope !194, !noalias !197 - %773 = extractelement <8 x i64> %770, i32 0 - %774 = shl i64 %773, 32 - %775 = ashr exact i64 %774, 32 - %776 = getelementptr inbounds float, float* %11, i64 %775 - %777 = bitcast float* %776 to <8 x float>* - %wide.masked.load496.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %777, i32 4, <8 x i1> %772, <8 x float> undef), !tbaa !12, !alias.scope !199, !noalias !197 - %wide.masked.gather499.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat498, i32 4, <8 x i1> %772, <8 x float> undef), !tbaa !12, !alias.scope !201, !noalias !197 - %778 = getelementptr inbounds float, float* %15, i64 %775 - %779 = bitcast float* %778 to <8 x float>* - %wide.masked.load500.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %779, i32 4, <8 x i1> %772, <8 x float> undef), !tbaa !12, !alias.scope !203, !noalias !197 - %780 = fmul <8 x float> %wide.masked.gather499.2, %wide.masked.load500.2 - %781 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather495.2, <8 x float> %wide.masked.load496.2, <8 x float> %780) - %782 = extractelement <8 x i32> %771, i32 0 - %783 = add nsw i32 %mul.i.i.7, %782 - %784 = sext i32 %783 to i64 - %785 = getelementptr inbounds float, float* %7, i64 %784 - %786 = bitcast float* %785 to <8 x float>* - %wide.masked.load501.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %786, i32 4, <8 x i1> %772, <8 x float> undef), !tbaa !12, !alias.scope !197 - %787 = fadd <8 x float> %wide.masked.load501.2, %781 - %788 = bitcast float* %785 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %787, <8 x float>* %788, i32 4, <8 x i1> %772), !tbaa !12, !alias.scope !197, !llvm.access.group !27 - %789 = or <8 x i64> %broadcast.splat490, - %790 = trunc <8 x i64> %789 to <8 x i32> - %791 = icmp sgt <8 x i32> %broadcast.splat492, %790 - %wide.masked.gather495.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat494, i32 4, <8 x i1> %791, <8 x float> undef), !tbaa !12, !alias.scope !194, !noalias !197 - %792 = extractelement <8 x i64> %789, i32 0 - %793 = shl i64 %792, 32 - %794 = ashr exact i64 %793, 32 - %795 = getelementptr inbounds float, float* %11, i64 %794 - %796 = bitcast float* %795 to <8 x float>* - %wide.masked.load496.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %796, i32 4, <8 x i1> %791, <8 x float> undef), !tbaa !12, !alias.scope !199, !noalias !197 - %wide.masked.gather499.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat498, i32 4, <8 x i1> %791, <8 x float> undef), !tbaa !12, !alias.scope !201, !noalias !197 - %797 = getelementptr inbounds float, float* %15, i64 %794 - %798 = bitcast float* %797 to <8 x float>* - %wide.masked.load500.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %798, i32 4, <8 x i1> %791, <8 x float> undef), !tbaa !12, !alias.scope !203, !noalias !197 - %799 = fmul <8 x float> %wide.masked.gather499.3, %wide.masked.load500.3 - %800 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather495.3, <8 x float> %wide.masked.load496.3, <8 x float> %799) - %801 = extractelement <8 x i32> %790, i32 0 - %802 = add nsw i32 %mul.i.i.7, %801 - %803 = sext i32 %802 to i64 - %804 = getelementptr inbounds float, float* %7, i64 %803 - %805 = bitcast float* %804 to <8 x float>* - %wide.masked.load501.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %805, i32 4, <8 x i1> %791, <8 x float> undef), !tbaa !12, !alias.scope !197 - %806 = fadd <8 x float> %wide.masked.load501.3, %800 - %807 = bitcast float* %804 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %806, <8 x float>* %807, i32 4, <8 x i1> %791), !tbaa !12, !alias.scope !197, !llvm.access.group !27 - br label %pregion_for_end.i.i.7 - -pregion_for_entry.entry.i.i.us.7: ; preds = %if.end.r_exit.i.i.us.7.1, %pregion_for_entry.entry.i.i.us.7.preheader - %_local_id_x.i.0.us.7 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.7.preheader ], [ %821, %if.end.r_exit.i.i.us.7.1 ] - %add1.i.i.i.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.7, %mul.i.i.i - %conv.i.i.us.7 = trunc i64 %add1.i.i.i.us.7 to i32 - %cmp4.i.i.us.7 = icmp sgt i32 %27, %conv.i.i.us.7 - br i1 %cmp4.i.i.us.7, label %if.then.i.i.us.7, label %if.end.r_exit.i.i.us.7 - -if.then.i.i.us.7: ; preds = %pregion_for_entry.entry.i.i.us.7 - %808 = load float, float* %arrayidx.i.i.7, align 4, !tbaa !12 - %sext26.i.i.us.7 = shl i64 %add1.i.i.i.us.7, 32 - %idxprom6.i.i.us.7 = ashr exact i64 %sext26.i.i.us.7, 32 - %arrayidx7.i.i.us.7 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.7 - %809 = load float, float* %arrayidx7.i.i.us.7, align 4, !tbaa !12 - %810 = load float, float* %arrayidx9.i.i.7, align 4, !tbaa !12 - %arrayidx11.i.i.us.7 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.7 - %811 = load float, float* %arrayidx11.i.i.us.7, align 4, !tbaa !12 - %mul12.i.i.us.7 = fmul float %810, %811 - %812 = tail call float @llvm.fmuladd.f32(float %808, float %809, float %mul12.i.i.us.7) #6 - %add.i.i.us.7 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7 - %idxprom13.i.i.us.7 = sext i32 %add.i.i.us.7 to i64 - %arrayidx14.i.i.us.7 = getelementptr inbounds float, float* %7, i64 %idxprom13.i.i.us.7 - %813 = load float, float* %arrayidx14.i.i.us.7, align 4, !tbaa !12 - %add15.i.i.us.7 = fadd float %813, %812 - store float %add15.i.i.us.7, float* %arrayidx14.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.7 - -if.end.r_exit.i.i.us.7: ; preds = %if.then.i.i.us.7, %pregion_for_entry.entry.i.i.us.7 - %814 = or i64 %_local_id_x.i.0.us.7, 1 - %add1.i.i.i.us.7.1 = add nuw nsw i64 %814, %mul.i.i.i - %conv.i.i.us.7.1 = trunc i64 %add1.i.i.i.us.7.1 to i32 - %cmp4.i.i.us.7.1 = icmp sgt i32 %27, %conv.i.i.us.7.1 - br i1 %cmp4.i.i.us.7.1, label %if.then.i.i.us.7.1, label %if.end.r_exit.i.i.us.7.1 - -pregion_for_end.i.i.7.loopexit: ; preds = %if.end.r_exit.i.i.us.7.1 - br label %pregion_for_end.i.i.7 - -pregion_for_end.i.i.7: ; preds = %pregion_for_end.i.i.7.loopexit, %vector.ph482, %pregion_for_end.i.i.6 - ret void - -if.then.i.i.us.7.1: ; preds = %if.end.r_exit.i.i.us.7 - %815 = load float, float* %arrayidx.i.i.7, align 4, !tbaa !12 - %sext26.i.i.us.7.1 = shl i64 %add1.i.i.i.us.7.1, 32 - %idxprom6.i.i.us.7.1 = ashr exact i64 %sext26.i.i.us.7.1, 32 - %arrayidx7.i.i.us.7.1 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.7.1 - %816 = load float, float* %arrayidx7.i.i.us.7.1, align 4, !tbaa !12 - %817 = load float, float* %arrayidx9.i.i.7, align 4, !tbaa !12 - %arrayidx11.i.i.us.7.1 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.7.1 - %818 = load float, float* %arrayidx11.i.i.us.7.1, align 4, !tbaa !12 - %mul12.i.i.us.7.1 = fmul float %817, %818 - %819 = tail call float @llvm.fmuladd.f32(float %815, float %816, float %mul12.i.i.us.7.1) #6 - %add.i.i.us.7.1 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7.1 - %idxprom13.i.i.us.7.1 = sext i32 %add.i.i.us.7.1 to i64 - %arrayidx14.i.i.us.7.1 = getelementptr inbounds float, float* %7, i64 %idxprom13.i.i.us.7.1 - %820 = load float, float* %arrayidx14.i.i.us.7.1, align 4, !tbaa !12 - %add15.i.i.us.7.1 = fadd float %820, %819 - store float %add15.i.i.us.7.1, float* %arrayidx14.i.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.7.1 - -if.end.r_exit.i.i.us.7.1: ; preds = %if.then.i.i.us.7.1, %if.end.r_exit.i.i.us.7 - %821 = add nuw nsw i64 %_local_id_x.i.0.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %821, 32 - br i1 %exitcond.7.not.1, label %pregion_for_end.i.i.7.loopexit, label %pregion_for_entry.entry.i.i.us.7, !llvm.loop !205 - -if.then.i.i.us.6.1: ; preds = %if.end.r_exit.i.i.us.6 - %822 = load float, float* %arrayidx.i.i.6, align 4, !tbaa !12 - %sext26.i.i.us.6.1 = shl i64 %add1.i.i.i.us.6.1, 32 - %idxprom6.i.i.us.6.1 = ashr exact i64 %sext26.i.i.us.6.1, 32 - %arrayidx7.i.i.us.6.1 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.6.1 - %823 = load float, float* %arrayidx7.i.i.us.6.1, align 4, !tbaa !12 - %824 = load float, float* %arrayidx9.i.i.6, align 4, !tbaa !12 - %arrayidx11.i.i.us.6.1 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.6.1 - %825 = load float, float* %arrayidx11.i.i.us.6.1, align 4, !tbaa !12 - %mul12.i.i.us.6.1 = fmul float %824, %825 - %826 = tail call float @llvm.fmuladd.f32(float %822, float %823, float %mul12.i.i.us.6.1) #6 - %add.i.i.us.6.1 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6.1 - %idxprom13.i.i.us.6.1 = sext i32 %add.i.i.us.6.1 to i64 - %arrayidx14.i.i.us.6.1 = getelementptr inbounds float, float* %7, i64 %idxprom13.i.i.us.6.1 - %827 = load float, float* %arrayidx14.i.i.us.6.1, align 4, !tbaa !12 - %add15.i.i.us.6.1 = fadd float %827, %826 - store float %add15.i.i.us.6.1, float* %arrayidx14.i.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.6.1 - -if.end.r_exit.i.i.us.6.1: ; preds = %if.then.i.i.us.6.1, %if.end.r_exit.i.i.us.6 - %828 = add nuw nsw i64 %_local_id_x.i.0.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %828, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.i.6.loopexit, label %pregion_for_entry.entry.i.i.us.6, !llvm.loop !206 - -if.then.i.i.us.5.1: ; preds = %if.end.r_exit.i.i.us.5 - %829 = load float, float* %arrayidx.i.i.5, align 4, !tbaa !12 - %sext26.i.i.us.5.1 = shl i64 %add1.i.i.i.us.5.1, 32 - %idxprom6.i.i.us.5.1 = ashr exact i64 %sext26.i.i.us.5.1, 32 - %arrayidx7.i.i.us.5.1 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.5.1 - %830 = load float, float* %arrayidx7.i.i.us.5.1, align 4, !tbaa !12 - %831 = load float, float* %arrayidx9.i.i.5, align 4, !tbaa !12 - %arrayidx11.i.i.us.5.1 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.5.1 - %832 = load float, float* %arrayidx11.i.i.us.5.1, align 4, !tbaa !12 - %mul12.i.i.us.5.1 = fmul float %831, %832 - %833 = tail call float @llvm.fmuladd.f32(float %829, float %830, float %mul12.i.i.us.5.1) #6 - %add.i.i.us.5.1 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5.1 - %idxprom13.i.i.us.5.1 = sext i32 %add.i.i.us.5.1 to i64 - %arrayidx14.i.i.us.5.1 = getelementptr inbounds float, float* %7, i64 %idxprom13.i.i.us.5.1 - %834 = load float, float* %arrayidx14.i.i.us.5.1, align 4, !tbaa !12 - %add15.i.i.us.5.1 = fadd float %834, %833 - store float %add15.i.i.us.5.1, float* %arrayidx14.i.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.5.1 - -if.end.r_exit.i.i.us.5.1: ; preds = %if.then.i.i.us.5.1, %if.end.r_exit.i.i.us.5 - %835 = add nuw nsw i64 %_local_id_x.i.0.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %835, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.i.5.loopexit, label %pregion_for_entry.entry.i.i.us.5, !llvm.loop !207 - -if.then.i.i.us.4.1: ; preds = %if.end.r_exit.i.i.us.4 - %836 = load float, float* %arrayidx.i.i.4, align 4, !tbaa !12 - %sext26.i.i.us.4.1 = shl i64 %add1.i.i.i.us.4.1, 32 - %idxprom6.i.i.us.4.1 = ashr exact i64 %sext26.i.i.us.4.1, 32 - %arrayidx7.i.i.us.4.1 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.4.1 - %837 = load float, float* %arrayidx7.i.i.us.4.1, align 4, !tbaa !12 - %838 = load float, float* %arrayidx9.i.i.4, align 4, !tbaa !12 - %arrayidx11.i.i.us.4.1 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.4.1 - %839 = load float, float* %arrayidx11.i.i.us.4.1, align 4, !tbaa !12 - %mul12.i.i.us.4.1 = fmul float %838, %839 - %840 = tail call float @llvm.fmuladd.f32(float %836, float %837, float %mul12.i.i.us.4.1) #6 - %add.i.i.us.4.1 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4.1 - %idxprom13.i.i.us.4.1 = sext i32 %add.i.i.us.4.1 to i64 - %arrayidx14.i.i.us.4.1 = getelementptr inbounds float, float* %7, i64 %idxprom13.i.i.us.4.1 - %841 = load float, float* %arrayidx14.i.i.us.4.1, align 4, !tbaa !12 - %add15.i.i.us.4.1 = fadd float %841, %840 - store float %add15.i.i.us.4.1, float* %arrayidx14.i.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.4.1 - -if.end.r_exit.i.i.us.4.1: ; preds = %if.then.i.i.us.4.1, %if.end.r_exit.i.i.us.4 - %842 = add nuw nsw i64 %_local_id_x.i.0.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %842, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.i.4.loopexit, label %pregion_for_entry.entry.i.i.us.4, !llvm.loop !208 - -if.then.i.i.us.3.1: ; preds = %if.end.r_exit.i.i.us.3 - %843 = load float, float* %arrayidx.i.i.3, align 4, !tbaa !12 - %sext26.i.i.us.3.1 = shl i64 %add1.i.i.i.us.3.1, 32 - %idxprom6.i.i.us.3.1 = ashr exact i64 %sext26.i.i.us.3.1, 32 - %arrayidx7.i.i.us.3.1 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.3.1 - %844 = load float, float* %arrayidx7.i.i.us.3.1, align 4, !tbaa !12 - %845 = load float, float* %arrayidx9.i.i.3, align 4, !tbaa !12 - %arrayidx11.i.i.us.3.1 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.3.1 - %846 = load float, float* %arrayidx11.i.i.us.3.1, align 4, !tbaa !12 - %mul12.i.i.us.3.1 = fmul float %845, %846 - %847 = tail call float @llvm.fmuladd.f32(float %843, float %844, float %mul12.i.i.us.3.1) #6 - %add.i.i.us.3.1 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3.1 - %idxprom13.i.i.us.3.1 = sext i32 %add.i.i.us.3.1 to i64 - %arrayidx14.i.i.us.3.1 = getelementptr inbounds float, float* %7, i64 %idxprom13.i.i.us.3.1 - %848 = load float, float* %arrayidx14.i.i.us.3.1, align 4, !tbaa !12 - %add15.i.i.us.3.1 = fadd float %848, %847 - store float %add15.i.i.us.3.1, float* %arrayidx14.i.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.3.1 - -if.end.r_exit.i.i.us.3.1: ; preds = %if.then.i.i.us.3.1, %if.end.r_exit.i.i.us.3 - %849 = add nuw nsw i64 %_local_id_x.i.0.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %849, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.i.3.loopexit, label %pregion_for_entry.entry.i.i.us.3, !llvm.loop !209 - -if.then.i.i.us.2.1: ; preds = %if.end.r_exit.i.i.us.2 - %850 = load float, float* %arrayidx.i.i.2, align 4, !tbaa !12 - %sext26.i.i.us.2.1 = shl i64 %add1.i.i.i.us.2.1, 32 - %idxprom6.i.i.us.2.1 = ashr exact i64 %sext26.i.i.us.2.1, 32 - %arrayidx7.i.i.us.2.1 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.2.1 - %851 = load float, float* %arrayidx7.i.i.us.2.1, align 4, !tbaa !12 - %852 = load float, float* %arrayidx9.i.i.2, align 4, !tbaa !12 - %arrayidx11.i.i.us.2.1 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.2.1 - %853 = load float, float* %arrayidx11.i.i.us.2.1, align 4, !tbaa !12 - %mul12.i.i.us.2.1 = fmul float %852, %853 - %854 = tail call float @llvm.fmuladd.f32(float %850, float %851, float %mul12.i.i.us.2.1) #6 - %add.i.i.us.2.1 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2.1 - %idxprom13.i.i.us.2.1 = sext i32 %add.i.i.us.2.1 to i64 - %arrayidx14.i.i.us.2.1 = getelementptr inbounds float, float* %7, i64 %idxprom13.i.i.us.2.1 - %855 = load float, float* %arrayidx14.i.i.us.2.1, align 4, !tbaa !12 - %add15.i.i.us.2.1 = fadd float %855, %854 - store float %add15.i.i.us.2.1, float* %arrayidx14.i.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.2.1 - -if.end.r_exit.i.i.us.2.1: ; preds = %if.then.i.i.us.2.1, %if.end.r_exit.i.i.us.2 - %856 = add nuw nsw i64 %_local_id_x.i.0.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %856, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.i.2.loopexit, label %pregion_for_entry.entry.i.i.us.2, !llvm.loop !210 - -if.then.i.i.us.1.1: ; preds = %if.end.r_exit.i.i.us.1 - %857 = load float, float* %arrayidx.i.i.1, align 4, !tbaa !12 - %sext26.i.i.us.1.1 = shl i64 %add1.i.i.i.us.1.1, 32 - %idxprom6.i.i.us.1.1 = ashr exact i64 %sext26.i.i.us.1.1, 32 - %arrayidx7.i.i.us.1.1 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.1.1 - %858 = load float, float* %arrayidx7.i.i.us.1.1, align 4, !tbaa !12 - %859 = load float, float* %arrayidx9.i.i.1, align 4, !tbaa !12 - %arrayidx11.i.i.us.1.1 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.1.1 - %860 = load float, float* %arrayidx11.i.i.us.1.1, align 4, !tbaa !12 - %mul12.i.i.us.1.1 = fmul float %859, %860 - %861 = tail call float @llvm.fmuladd.f32(float %857, float %858, float %mul12.i.i.us.1.1) #6 - %add.i.i.us.1.1 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1.1 - %idxprom13.i.i.us.1.1 = sext i32 %add.i.i.us.1.1 to i64 - %arrayidx14.i.i.us.1.1 = getelementptr inbounds float, float* %7, i64 %idxprom13.i.i.us.1.1 - %862 = load float, float* %arrayidx14.i.i.us.1.1, align 4, !tbaa !12 - %add15.i.i.us.1.1 = fadd float %862, %861 - store float %add15.i.i.us.1.1, float* %arrayidx14.i.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.1.1 - -if.end.r_exit.i.i.us.1.1: ; preds = %if.then.i.i.us.1.1, %if.end.r_exit.i.i.us.1 - %863 = add nuw nsw i64 %_local_id_x.i.0.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %863, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.i.1.loopexit, label %pregion_for_entry.entry.i.i.us.1, !llvm.loop !211 - -if.then.i.i.us.1531: ; preds = %if.end.r_exit.i.i.us - %864 = load float, float* %arrayidx.i.i, align 4, !tbaa !12 - %sext26.i.i.us.1522 = shl i64 %add1.i.i.i.us.1518, 32 - %idxprom6.i.i.us.1523 = ashr exact i64 %sext26.i.i.us.1522, 32 - %arrayidx7.i.i.us.1524 = getelementptr inbounds float, float* %11, i64 %idxprom6.i.i.us.1523 - %865 = load float, float* %arrayidx7.i.i.us.1524, align 4, !tbaa !12 - %866 = load float, float* %arrayidx9.i.i, align 4, !tbaa !12 - %arrayidx11.i.i.us.1525 = getelementptr inbounds float, float* %15, i64 %idxprom6.i.i.us.1523 - %867 = load float, float* %arrayidx11.i.i.us.1525, align 4, !tbaa !12 - %mul12.i.i.us.1526 = fmul float %866, %867 - %868 = tail call float @llvm.fmuladd.f32(float %864, float %865, float %mul12.i.i.us.1526) #6 - %add.i.i.us.1527 = add nsw i32 %mul.i.i, %conv.i.i.us.1519 - %idxprom13.i.i.us.1528 = sext i32 %add.i.i.us.1527 to i64 - %arrayidx14.i.i.us.1529 = getelementptr inbounds float, float* %7, i64 %idxprom13.i.i.us.1528 - %869 = load float, float* %arrayidx14.i.i.us.1529, align 4, !tbaa !12 - %add15.i.i.us.1530 = fadd float %869, %868 - store float %add15.i.i.us.1530, float* %arrayidx14.i.i.us.1529, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.1532 - -if.end.r_exit.i.i.us.1532: ; preds = %if.then.i.i.us.1531, %if.end.r_exit.i.i.us - %870 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %870, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !212 -} - -; Function Attrs: nofree nounwind -define void @_pocl_kernel_gemver_kernel1_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { -pregion_for_entry.pregion_for_init.i.i: - %5 = bitcast i8** %0 to float** - %6 = load float*, float** %5, align 8 - %7 = getelementptr i8*, i8** %0, i64 1 - %8 = bitcast i8** %7 to float** - %9 = load float*, float** %8, align 8 - %10 = getelementptr i8*, i8** %0, i64 2 - %11 = bitcast i8** %10 to float** - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 3 - %14 = bitcast i8** %13 to float** - %15 = load float*, float** %14, align 8 - %16 = getelementptr i8*, i8** %0, i64 4 - %17 = bitcast i8** %16 to float** - %18 = load float*, float** %17, align 8 - %19 = getelementptr i8*, i8** %0, i64 5 - %20 = bitcast i8** %19 to i32** - %21 = load i32*, i32** %20, align 8 - %22 = load i32, i32* %21, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %conv2.i.i = trunc i64 %mul3.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %22, %conv2.i.i - %sext.i.i = shl i64 %3, 35 - %idxprom.i.i = ashr exact i64 %sext.i.i, 32 - %arrayidx.i.i = getelementptr inbounds float, float* %15, i64 %idxprom.i.i - %arrayidx9.i.i = getelementptr inbounds float, float* %18, i64 %idxprom.i.i - %mul.i.i = mul nsw i32 %22, %conv2.i.i - br i1 %cmp.i.i, label %vector.scevcheck, label %pregion_for_end.i.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.i - %23 = trunc i64 %3 to i32 - %24 = mul i32 %22, %23 - %25 = shl i32 %24, 3 - %26 = trunc i64 %2 to i32 - %27 = shl i32 %26, 5 - %28 = add i32 %25, %27 - %29 = icmp sgt i32 %28, 2147483616 - br i1 %29, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i.us - -vector.memcheck: ; preds = %vector.scevcheck - %sext508 = shl i64 %3, 35 - %30 = ashr exact i64 %sext508, 32 - %scevgep = getelementptr float, float* %15, i64 %30 - %scevgep3 = bitcast float* %scevgep to i8* - %uglygep = getelementptr i8, i8* %scevgep3, i64 1 - %31 = trunc i64 %3 to i32 - %32 = mul i32 %22, %31 - %33 = shl i32 %32, 3 - %34 = trunc i64 %2 to i32 - %35 = shl i32 %34, 5 - %36 = add i32 %33, %35 - %37 = sext i32 %36 to i64 - %scevgep4 = getelementptr float, float* %6, i64 %37 - %scevgep45 = bitcast float* %scevgep4 to i8* - %38 = add nsw i64 %37, 32 - %scevgep6 = getelementptr float, float* %6, i64 %38 - %39 = sext i32 %35 to i64 - %scevgep8 = getelementptr float, float* %9, i64 %39 - %40 = add nsw i64 %39, 32 - %scevgep10 = getelementptr float, float* %9, i64 %40 - %scevgep12 = getelementptr float, float* %18, i64 %30 - %scevgep1213 = bitcast float* %scevgep12 to i8* - %uglygep14 = getelementptr i8, i8* %scevgep1213, i64 1 - %scevgep15 = getelementptr float, float* %12, i64 %39 - %scevgep17 = getelementptr float, float* %12, i64 %40 - %bound0 = icmp ult float* %arrayidx.i.i, %scevgep6 - %bound1 = icmp ugt i8* %uglygep, %scevgep45 - %found.conflict = and i1 %bound0, %bound1 - %bound019 = icmp ult float* %scevgep8, %scevgep6 - %bound120 = icmp ult float* %scevgep4, %scevgep10 - %found.conflict21 = and i1 %bound019, %bound120 - %conflict.rdx = or i1 %found.conflict, %found.conflict21 - %bound023 = icmp ult float* %arrayidx9.i.i, %scevgep6 - %bound124 = icmp ugt i8* %uglygep14, %scevgep45 - %found.conflict25 = and i1 %bound023, %bound124 - %conflict.rdx26 = or i1 %conflict.rdx, %found.conflict25 - %bound027 = icmp ult float* %scevgep15, %scevgep6 - %bound128 = icmp ult float* %scevgep4, %scevgep17 - %found.conflict29 = and i1 %bound027, %bound128 - %conflict.rdx30 = or i1 %conflict.rdx26, %found.conflict29 - br i1 %conflict.rdx30, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert31 = insertelement <8 x i32> undef, i32 %22, i32 0 - %broadcast.splat32 = shufflevector <8 x i32> %broadcast.splatinsert31, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert33 = insertelement <8 x float*> undef, float* %arrayidx.i.i, i32 0 - %broadcast.splat34 = shufflevector <8 x float*> %broadcast.splatinsert33, <8 x float*> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert35 = insertelement <8 x float*> undef, float* %arrayidx9.i.i, i32 0 - %broadcast.splat36 = shufflevector <8 x float*> %broadcast.splatinsert35, <8 x float*> undef, <8 x i32> zeroinitializer - %41 = or <8 x i64> %broadcast.splat, - %42 = trunc <8 x i64> %41 to <8 x i32> - %43 = icmp sgt <8 x i32> %broadcast.splat32, %42 - %wide.masked.gather = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat34, i32 4, <8 x i1> %43, <8 x float> undef), !tbaa !12, !alias.scope !213, !noalias !216 - %44 = extractelement <8 x i64> %41, i32 0 - %45 = shl i64 %44, 32 - %46 = ashr exact i64 %45, 32 - %47 = getelementptr inbounds float, float* %9, i64 %46 - %48 = bitcast float* %47 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %48, i32 4, <8 x i1> %43, <8 x float> undef), !tbaa !12, !alias.scope !218, !noalias !216 - %wide.masked.gather37 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat36, i32 4, <8 x i1> %43, <8 x float> undef), !tbaa !12, !alias.scope !220, !noalias !216 - %49 = getelementptr inbounds float, float* %12, i64 %46 - %50 = bitcast float* %49 to <8 x float>* - %wide.masked.load38 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %50, i32 4, <8 x i1> %43, <8 x float> undef), !tbaa !12, !alias.scope !222, !noalias !216 - %51 = fmul <8 x float> %wide.masked.gather37, %wide.masked.load38 - %52 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather, <8 x float> %wide.masked.load, <8 x float> %51) - %53 = extractelement <8 x i32> %42, i32 0 - %54 = add nsw i32 %mul.i.i, %53 - %55 = sext i32 %54 to i64 - %56 = getelementptr inbounds float, float* %6, i64 %55 - %57 = bitcast float* %56 to <8 x float>* - %wide.masked.load39 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %57, i32 4, <8 x i1> %43, <8 x float> undef), !tbaa !12, !alias.scope !216 - %58 = fadd <8 x float> %wide.masked.load39, %52 - %59 = bitcast float* %56 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %58, <8 x float>* %59, i32 4, <8 x i1> %43), !tbaa !12, !alias.scope !216, !llvm.access.group !27 - %60 = or <8 x i64> %broadcast.splat, - %61 = trunc <8 x i64> %60 to <8 x i32> - %62 = icmp sgt <8 x i32> %broadcast.splat32, %61 - %wide.masked.gather.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat34, i32 4, <8 x i1> %62, <8 x float> undef), !tbaa !12, !alias.scope !213, !noalias !216 - %63 = extractelement <8 x i64> %60, i32 0 - %64 = shl i64 %63, 32 - %65 = ashr exact i64 %64, 32 - %66 = getelementptr inbounds float, float* %9, i64 %65 - %67 = bitcast float* %66 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %67, i32 4, <8 x i1> %62, <8 x float> undef), !tbaa !12, !alias.scope !218, !noalias !216 - %wide.masked.gather37.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat36, i32 4, <8 x i1> %62, <8 x float> undef), !tbaa !12, !alias.scope !220, !noalias !216 - %68 = getelementptr inbounds float, float* %12, i64 %65 - %69 = bitcast float* %68 to <8 x float>* - %wide.masked.load38.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %69, i32 4, <8 x i1> %62, <8 x float> undef), !tbaa !12, !alias.scope !222, !noalias !216 - %70 = fmul <8 x float> %wide.masked.gather37.1, %wide.masked.load38.1 - %71 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather.1, <8 x float> %wide.masked.load.1, <8 x float> %70) - %72 = extractelement <8 x i32> %61, i32 0 - %73 = add nsw i32 %mul.i.i, %72 - %74 = sext i32 %73 to i64 - %75 = getelementptr inbounds float, float* %6, i64 %74 - %76 = bitcast float* %75 to <8 x float>* - %wide.masked.load39.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %76, i32 4, <8 x i1> %62, <8 x float> undef), !tbaa !12, !alias.scope !216 - %77 = fadd <8 x float> %wide.masked.load39.1, %71 - %78 = bitcast float* %75 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %77, <8 x float>* %78, i32 4, <8 x i1> %62), !tbaa !12, !alias.scope !216, !llvm.access.group !27 - %79 = or <8 x i64> %broadcast.splat, - %80 = trunc <8 x i64> %79 to <8 x i32> - %81 = icmp sgt <8 x i32> %broadcast.splat32, %80 - %wide.masked.gather.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat34, i32 4, <8 x i1> %81, <8 x float> undef), !tbaa !12, !alias.scope !213, !noalias !216 - %82 = extractelement <8 x i64> %79, i32 0 - %83 = shl i64 %82, 32 - %84 = ashr exact i64 %83, 32 - %85 = getelementptr inbounds float, float* %9, i64 %84 - %86 = bitcast float* %85 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %86, i32 4, <8 x i1> %81, <8 x float> undef), !tbaa !12, !alias.scope !218, !noalias !216 - %wide.masked.gather37.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat36, i32 4, <8 x i1> %81, <8 x float> undef), !tbaa !12, !alias.scope !220, !noalias !216 - %87 = getelementptr inbounds float, float* %12, i64 %84 - %88 = bitcast float* %87 to <8 x float>* - %wide.masked.load38.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %88, i32 4, <8 x i1> %81, <8 x float> undef), !tbaa !12, !alias.scope !222, !noalias !216 - %89 = fmul <8 x float> %wide.masked.gather37.2, %wide.masked.load38.2 - %90 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather.2, <8 x float> %wide.masked.load.2, <8 x float> %89) - %91 = extractelement <8 x i32> %80, i32 0 - %92 = add nsw i32 %mul.i.i, %91 - %93 = sext i32 %92 to i64 - %94 = getelementptr inbounds float, float* %6, i64 %93 - %95 = bitcast float* %94 to <8 x float>* - %wide.masked.load39.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %95, i32 4, <8 x i1> %81, <8 x float> undef), !tbaa !12, !alias.scope !216 - %96 = fadd <8 x float> %wide.masked.load39.2, %90 - %97 = bitcast float* %94 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %96, <8 x float>* %97, i32 4, <8 x i1> %81), !tbaa !12, !alias.scope !216, !llvm.access.group !27 - %98 = or <8 x i64> %broadcast.splat, - %99 = trunc <8 x i64> %98 to <8 x i32> - %100 = icmp sgt <8 x i32> %broadcast.splat32, %99 - %wide.masked.gather.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat34, i32 4, <8 x i1> %100, <8 x float> undef), !tbaa !12, !alias.scope !213, !noalias !216 - %101 = extractelement <8 x i64> %98, i32 0 - %102 = shl i64 %101, 32 - %103 = ashr exact i64 %102, 32 - %104 = getelementptr inbounds float, float* %9, i64 %103 - %105 = bitcast float* %104 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %105, i32 4, <8 x i1> %100, <8 x float> undef), !tbaa !12, !alias.scope !218, !noalias !216 - %wide.masked.gather37.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat36, i32 4, <8 x i1> %100, <8 x float> undef), !tbaa !12, !alias.scope !220, !noalias !216 - %106 = getelementptr inbounds float, float* %12, i64 %103 - %107 = bitcast float* %106 to <8 x float>* - %wide.masked.load38.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %107, i32 4, <8 x i1> %100, <8 x float> undef), !tbaa !12, !alias.scope !222, !noalias !216 - %108 = fmul <8 x float> %wide.masked.gather37.3, %wide.masked.load38.3 - %109 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather.3, <8 x float> %wide.masked.load.3, <8 x float> %108) - %110 = extractelement <8 x i32> %99, i32 0 - %111 = add nsw i32 %mul.i.i, %110 - %112 = sext i32 %111 to i64 - %113 = getelementptr inbounds float, float* %6, i64 %112 - %114 = bitcast float* %113 to <8 x float>* - %wide.masked.load39.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %114, i32 4, <8 x i1> %100, <8 x float> undef), !tbaa !12, !alias.scope !216 - %115 = fadd <8 x float> %wide.masked.load39.3, %109 - %116 = bitcast float* %113 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %115, <8 x float>* %116, i32 4, <8 x i1> %100), !tbaa !12, !alias.scope !216, !llvm.access.group !27 - br label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us.1532, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.preheader ], [ %865, %if.end.r_exit.i.i.us.1532 ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp4.i.i.us = icmp sgt i32 %22, %conv.i.i.us - br i1 %cmp4.i.i.us, label %if.then.i.i.us, label %if.end.r_exit.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %117 = load float, float* %arrayidx.i.i, align 4, !tbaa !12 - %sext26.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom6.i.i.us = ashr exact i64 %sext26.i.i.us, 32 - %arrayidx7.i.i.us = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us - %118 = load float, float* %arrayidx7.i.i.us, align 4, !tbaa !12 - %119 = load float, float* %arrayidx9.i.i, align 4, !tbaa !12 - %arrayidx11.i.i.us = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us - %120 = load float, float* %arrayidx11.i.i.us, align 4, !tbaa !12 - %mul12.i.i.us = fmul float %119, %120 - %121 = tail call float @llvm.fmuladd.f32(float %117, float %118, float %mul12.i.i.us) #6 - %add.i.i.us = add nsw i32 %mul.i.i, %conv.i.i.us - %idxprom13.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx14.i.i.us = getelementptr inbounds float, float* %6, i64 %idxprom13.i.i.us - %122 = load float, float* %arrayidx14.i.i.us, align 4, !tbaa !12 - %add15.i.i.us = fadd float %122, %121 - store float %add15.i.i.us, float* %arrayidx14.i.i.us, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us - %123 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1518 = add nuw nsw i64 %123, %mul.i.i.i - %conv.i.i.us.1519 = trunc i64 %add1.i.i.i.us.1518 to i32 - %cmp4.i.i.us.1520 = icmp sgt i32 %22, %conv.i.i.us.1519 - br i1 %cmp4.i.i.us.1520, label %if.then.i.i.us.1531, label %if.end.r_exit.i.i.us.1532 - -pregion_for_end.i.i.loopexit: ; preds = %if.end.r_exit.i.i.us.1532 - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i.i - %add6.i.i.i.1 = or i64 %mul3.i.i.i, 1 - %conv2.i.i.1 = trunc i64 %add6.i.i.i.1 to i32 - %cmp.i.i.1 = icmp sgt i32 %22, %conv2.i.i.1 - %sext.i.i.1 = shl i64 %add6.i.i.i.1, 32 - %idxprom.i.i.1 = ashr exact i64 %sext.i.i.1, 32 - %arrayidx.i.i.1 = getelementptr inbounds float, float* %15, i64 %idxprom.i.i.1 - %arrayidx9.i.i.1 = getelementptr inbounds float, float* %18, i64 %idxprom.i.i.1 - %mul.i.i.1 = mul nsw i32 %22, %conv2.i.i.1 - br i1 %cmp.i.i.1, label %vector.scevcheck47, label %pregion_for_end.i.i.1 - -vector.scevcheck47: ; preds = %pregion_for_end.i.i - %124 = mul i32 %22, %conv2.i.i.1 - %125 = trunc i64 %2 to i32 - %126 = shl i32 %125, 5 - %127 = add i32 %124, %126 - %128 = icmp sgt i32 %127, 2147483616 - br i1 %128, label %pregion_for_entry.entry.i.i.us.1.preheader, label %vector.memcheck85 - -pregion_for_entry.entry.i.i.us.1.preheader: ; preds = %vector.memcheck85, %vector.scevcheck47 - br label %pregion_for_entry.entry.i.i.us.1 - -vector.memcheck85: ; preds = %vector.scevcheck47 - %sext507 = shl i64 %3, 35 - %129 = ashr exact i64 %sext507, 32 - %130 = or i64 %129, 1 - %scevgep49 = getelementptr float, float* %15, i64 %130 - %scevgep4950 = bitcast float* %scevgep49 to i8* - %uglygep51 = getelementptr i8, i8* %scevgep4950, i64 1 - %131 = mul i32 %22, %conv2.i.i.1 - %132 = trunc i64 %2 to i32 - %133 = shl i32 %132, 5 - %134 = add i32 %131, %133 - %135 = sext i32 %134 to i64 - %scevgep52 = getelementptr float, float* %6, i64 %135 - %scevgep5253 = bitcast float* %scevgep52 to i8* - %136 = add nsw i64 %135, 32 - %scevgep54 = getelementptr float, float* %6, i64 %136 - %137 = sext i32 %133 to i64 - %scevgep56 = getelementptr float, float* %9, i64 %137 - %138 = add nsw i64 %137, 32 - %scevgep58 = getelementptr float, float* %9, i64 %138 - %scevgep60 = getelementptr float, float* %18, i64 %130 - %scevgep6061 = bitcast float* %scevgep60 to i8* - %uglygep62 = getelementptr i8, i8* %scevgep6061, i64 1 - %scevgep63 = getelementptr float, float* %12, i64 %137 - %scevgep65 = getelementptr float, float* %12, i64 %138 - %bound068 = icmp ult float* %arrayidx.i.i.1, %scevgep54 - %bound169 = icmp ugt i8* %uglygep51, %scevgep5253 - %found.conflict70 = and i1 %bound068, %bound169 - %bound071 = icmp ult float* %scevgep56, %scevgep54 - %bound172 = icmp ult float* %scevgep52, %scevgep58 - %found.conflict73 = and i1 %bound071, %bound172 - %conflict.rdx74 = or i1 %found.conflict70, %found.conflict73 - %bound076 = icmp ult float* %arrayidx9.i.i.1, %scevgep54 - %bound177 = icmp ugt i8* %uglygep62, %scevgep5253 - %found.conflict78 = and i1 %bound076, %bound177 - %conflict.rdx79 = or i1 %conflict.rdx74, %found.conflict78 - %bound080 = icmp ult float* %scevgep63, %scevgep54 - %bound181 = icmp ult float* %scevgep52, %scevgep65 - %found.conflict82 = and i1 %bound080, %bound181 - %conflict.rdx83 = or i1 %conflict.rdx79, %found.conflict82 - br i1 %conflict.rdx83, label %pregion_for_entry.entry.i.i.us.1.preheader, label %vector.ph86 - -vector.ph86: ; preds = %vector.memcheck85 - %broadcast.splatinsert93 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat94 = shufflevector <8 x i64> %broadcast.splatinsert93, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert95 = insertelement <8 x i32> undef, i32 %22, i32 0 - %broadcast.splat96 = shufflevector <8 x i32> %broadcast.splatinsert95, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert97 = insertelement <8 x float*> undef, float* %arrayidx.i.i.1, i32 0 - %broadcast.splat98 = shufflevector <8 x float*> %broadcast.splatinsert97, <8 x float*> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert101 = insertelement <8 x float*> undef, float* %arrayidx9.i.i.1, i32 0 - %broadcast.splat102 = shufflevector <8 x float*> %broadcast.splatinsert101, <8 x float*> undef, <8 x i32> zeroinitializer - %139 = or <8 x i64> %broadcast.splat94, - %140 = trunc <8 x i64> %139 to <8 x i32> - %141 = icmp sgt <8 x i32> %broadcast.splat96, %140 - %wide.masked.gather99 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat98, i32 4, <8 x i1> %141, <8 x float> undef), !tbaa !12, !alias.scope !224, !noalias !227 - %142 = extractelement <8 x i64> %139, i32 0 - %143 = shl i64 %142, 32 - %144 = ashr exact i64 %143, 32 - %145 = getelementptr inbounds float, float* %9, i64 %144 - %146 = bitcast float* %145 to <8 x float>* - %wide.masked.load100 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %146, i32 4, <8 x i1> %141, <8 x float> undef), !tbaa !12, !alias.scope !229, !noalias !227 - %wide.masked.gather103 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat102, i32 4, <8 x i1> %141, <8 x float> undef), !tbaa !12, !alias.scope !231, !noalias !227 - %147 = getelementptr inbounds float, float* %12, i64 %144 - %148 = bitcast float* %147 to <8 x float>* - %wide.masked.load104 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %148, i32 4, <8 x i1> %141, <8 x float> undef), !tbaa !12, !alias.scope !233, !noalias !227 - %149 = fmul <8 x float> %wide.masked.gather103, %wide.masked.load104 - %150 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather99, <8 x float> %wide.masked.load100, <8 x float> %149) - %151 = extractelement <8 x i32> %140, i32 0 - %152 = add nsw i32 %mul.i.i.1, %151 - %153 = sext i32 %152 to i64 - %154 = getelementptr inbounds float, float* %6, i64 %153 - %155 = bitcast float* %154 to <8 x float>* - %wide.masked.load105 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %155, i32 4, <8 x i1> %141, <8 x float> undef), !tbaa !12, !alias.scope !227 - %156 = fadd <8 x float> %wide.masked.load105, %150 - %157 = bitcast float* %154 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %156, <8 x float>* %157, i32 4, <8 x i1> %141), !tbaa !12, !alias.scope !227, !llvm.access.group !27 - %158 = or <8 x i64> %broadcast.splat94, - %159 = trunc <8 x i64> %158 to <8 x i32> - %160 = icmp sgt <8 x i32> %broadcast.splat96, %159 - %wide.masked.gather99.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat98, i32 4, <8 x i1> %160, <8 x float> undef), !tbaa !12, !alias.scope !224, !noalias !227 - %161 = extractelement <8 x i64> %158, i32 0 - %162 = shl i64 %161, 32 - %163 = ashr exact i64 %162, 32 - %164 = getelementptr inbounds float, float* %9, i64 %163 - %165 = bitcast float* %164 to <8 x float>* - %wide.masked.load100.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %165, i32 4, <8 x i1> %160, <8 x float> undef), !tbaa !12, !alias.scope !229, !noalias !227 - %wide.masked.gather103.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat102, i32 4, <8 x i1> %160, <8 x float> undef), !tbaa !12, !alias.scope !231, !noalias !227 - %166 = getelementptr inbounds float, float* %12, i64 %163 - %167 = bitcast float* %166 to <8 x float>* - %wide.masked.load104.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %167, i32 4, <8 x i1> %160, <8 x float> undef), !tbaa !12, !alias.scope !233, !noalias !227 - %168 = fmul <8 x float> %wide.masked.gather103.1, %wide.masked.load104.1 - %169 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather99.1, <8 x float> %wide.masked.load100.1, <8 x float> %168) - %170 = extractelement <8 x i32> %159, i32 0 - %171 = add nsw i32 %mul.i.i.1, %170 - %172 = sext i32 %171 to i64 - %173 = getelementptr inbounds float, float* %6, i64 %172 - %174 = bitcast float* %173 to <8 x float>* - %wide.masked.load105.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %174, i32 4, <8 x i1> %160, <8 x float> undef), !tbaa !12, !alias.scope !227 - %175 = fadd <8 x float> %wide.masked.load105.1, %169 - %176 = bitcast float* %173 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %175, <8 x float>* %176, i32 4, <8 x i1> %160), !tbaa !12, !alias.scope !227, !llvm.access.group !27 - %177 = or <8 x i64> %broadcast.splat94, - %178 = trunc <8 x i64> %177 to <8 x i32> - %179 = icmp sgt <8 x i32> %broadcast.splat96, %178 - %wide.masked.gather99.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat98, i32 4, <8 x i1> %179, <8 x float> undef), !tbaa !12, !alias.scope !224, !noalias !227 - %180 = extractelement <8 x i64> %177, i32 0 - %181 = shl i64 %180, 32 - %182 = ashr exact i64 %181, 32 - %183 = getelementptr inbounds float, float* %9, i64 %182 - %184 = bitcast float* %183 to <8 x float>* - %wide.masked.load100.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %184, i32 4, <8 x i1> %179, <8 x float> undef), !tbaa !12, !alias.scope !229, !noalias !227 - %wide.masked.gather103.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat102, i32 4, <8 x i1> %179, <8 x float> undef), !tbaa !12, !alias.scope !231, !noalias !227 - %185 = getelementptr inbounds float, float* %12, i64 %182 - %186 = bitcast float* %185 to <8 x float>* - %wide.masked.load104.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %186, i32 4, <8 x i1> %179, <8 x float> undef), !tbaa !12, !alias.scope !233, !noalias !227 - %187 = fmul <8 x float> %wide.masked.gather103.2, %wide.masked.load104.2 - %188 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather99.2, <8 x float> %wide.masked.load100.2, <8 x float> %187) - %189 = extractelement <8 x i32> %178, i32 0 - %190 = add nsw i32 %mul.i.i.1, %189 - %191 = sext i32 %190 to i64 - %192 = getelementptr inbounds float, float* %6, i64 %191 - %193 = bitcast float* %192 to <8 x float>* - %wide.masked.load105.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %193, i32 4, <8 x i1> %179, <8 x float> undef), !tbaa !12, !alias.scope !227 - %194 = fadd <8 x float> %wide.masked.load105.2, %188 - %195 = bitcast float* %192 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %194, <8 x float>* %195, i32 4, <8 x i1> %179), !tbaa !12, !alias.scope !227, !llvm.access.group !27 - %196 = or <8 x i64> %broadcast.splat94, - %197 = trunc <8 x i64> %196 to <8 x i32> - %198 = icmp sgt <8 x i32> %broadcast.splat96, %197 - %wide.masked.gather99.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat98, i32 4, <8 x i1> %198, <8 x float> undef), !tbaa !12, !alias.scope !224, !noalias !227 - %199 = extractelement <8 x i64> %196, i32 0 - %200 = shl i64 %199, 32 - %201 = ashr exact i64 %200, 32 - %202 = getelementptr inbounds float, float* %9, i64 %201 - %203 = bitcast float* %202 to <8 x float>* - %wide.masked.load100.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %203, i32 4, <8 x i1> %198, <8 x float> undef), !tbaa !12, !alias.scope !229, !noalias !227 - %wide.masked.gather103.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat102, i32 4, <8 x i1> %198, <8 x float> undef), !tbaa !12, !alias.scope !231, !noalias !227 - %204 = getelementptr inbounds float, float* %12, i64 %201 - %205 = bitcast float* %204 to <8 x float>* - %wide.masked.load104.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %205, i32 4, <8 x i1> %198, <8 x float> undef), !tbaa !12, !alias.scope !233, !noalias !227 - %206 = fmul <8 x float> %wide.masked.gather103.3, %wide.masked.load104.3 - %207 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather99.3, <8 x float> %wide.masked.load100.3, <8 x float> %206) - %208 = extractelement <8 x i32> %197, i32 0 - %209 = add nsw i32 %mul.i.i.1, %208 - %210 = sext i32 %209 to i64 - %211 = getelementptr inbounds float, float* %6, i64 %210 - %212 = bitcast float* %211 to <8 x float>* - %wide.masked.load105.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %212, i32 4, <8 x i1> %198, <8 x float> undef), !tbaa !12, !alias.scope !227 - %213 = fadd <8 x float> %wide.masked.load105.3, %207 - %214 = bitcast float* %211 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %213, <8 x float>* %214, i32 4, <8 x i1> %198), !tbaa !12, !alias.scope !227, !llvm.access.group !27 - br label %pregion_for_end.i.i.1 - -pregion_for_entry.entry.i.i.us.1: ; preds = %if.end.r_exit.i.i.us.1.1, %pregion_for_entry.entry.i.i.us.1.preheader - %_local_id_x.i.0.us.1 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.1.preheader ], [ %858, %if.end.r_exit.i.i.us.1.1 ] - %add1.i.i.i.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.1, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp4.i.i.us.1 = icmp sgt i32 %22, %conv.i.i.us.1 - br i1 %cmp4.i.i.us.1, label %if.then.i.i.us.1, label %if.end.r_exit.i.i.us.1 - -if.then.i.i.us.1: ; preds = %pregion_for_entry.entry.i.i.us.1 - %215 = load float, float* %arrayidx.i.i.1, align 4, !tbaa !12 - %sext26.i.i.us.1 = shl i64 %add1.i.i.i.us.1, 32 - %idxprom6.i.i.us.1 = ashr exact i64 %sext26.i.i.us.1, 32 - %arrayidx7.i.i.us.1 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.1 - %216 = load float, float* %arrayidx7.i.i.us.1, align 4, !tbaa !12 - %217 = load float, float* %arrayidx9.i.i.1, align 4, !tbaa !12 - %arrayidx11.i.i.us.1 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.1 - %218 = load float, float* %arrayidx11.i.i.us.1, align 4, !tbaa !12 - %mul12.i.i.us.1 = fmul float %217, %218 - %219 = tail call float @llvm.fmuladd.f32(float %215, float %216, float %mul12.i.i.us.1) #6 - %add.i.i.us.1 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1 - %idxprom13.i.i.us.1 = sext i32 %add.i.i.us.1 to i64 - %arrayidx14.i.i.us.1 = getelementptr inbounds float, float* %6, i64 %idxprom13.i.i.us.1 - %220 = load float, float* %arrayidx14.i.i.us.1, align 4, !tbaa !12 - %add15.i.i.us.1 = fadd float %220, %219 - store float %add15.i.i.us.1, float* %arrayidx14.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.1 - -if.end.r_exit.i.i.us.1: ; preds = %if.then.i.i.us.1, %pregion_for_entry.entry.i.i.us.1 - %221 = or i64 %_local_id_x.i.0.us.1, 1 - %add1.i.i.i.us.1.1 = add nuw nsw i64 %221, %mul.i.i.i - %conv.i.i.us.1.1 = trunc i64 %add1.i.i.i.us.1.1 to i32 - %cmp4.i.i.us.1.1 = icmp sgt i32 %22, %conv.i.i.us.1.1 - br i1 %cmp4.i.i.us.1.1, label %if.then.i.i.us.1.1, label %if.end.r_exit.i.i.us.1.1 - -pregion_for_end.i.i.1.loopexit: ; preds = %if.end.r_exit.i.i.us.1.1 - br label %pregion_for_end.i.i.1 - -pregion_for_end.i.i.1: ; preds = %pregion_for_end.i.i.1.loopexit, %vector.ph86, %pregion_for_end.i.i - %add6.i.i.i.2 = or i64 %mul3.i.i.i, 2 - %conv2.i.i.2 = trunc i64 %add6.i.i.i.2 to i32 - %cmp.i.i.2 = icmp sgt i32 %22, %conv2.i.i.2 - %sext.i.i.2 = shl i64 %add6.i.i.i.2, 32 - %idxprom.i.i.2 = ashr exact i64 %sext.i.i.2, 32 - %arrayidx.i.i.2 = getelementptr inbounds float, float* %15, i64 %idxprom.i.i.2 - %arrayidx9.i.i.2 = getelementptr inbounds float, float* %18, i64 %idxprom.i.i.2 - %mul.i.i.2 = mul nsw i32 %22, %conv2.i.i.2 - br i1 %cmp.i.i.2, label %vector.scevcheck113, label %pregion_for_end.i.i.2 - -vector.scevcheck113: ; preds = %pregion_for_end.i.i.1 - %222 = mul i32 %22, %conv2.i.i.2 - %223 = trunc i64 %2 to i32 - %224 = shl i32 %223, 5 - %225 = add i32 %222, %224 - %226 = icmp sgt i32 %225, 2147483616 - br i1 %226, label %pregion_for_entry.entry.i.i.us.2.preheader, label %vector.memcheck151 - -pregion_for_entry.entry.i.i.us.2.preheader: ; preds = %vector.memcheck151, %vector.scevcheck113 - br label %pregion_for_entry.entry.i.i.us.2 - -vector.memcheck151: ; preds = %vector.scevcheck113 - %sext506 = shl i64 %3, 35 - %227 = ashr exact i64 %sext506, 32 - %228 = or i64 %227, 2 - %scevgep115 = getelementptr float, float* %15, i64 %228 - %scevgep115116 = bitcast float* %scevgep115 to i8* - %uglygep117 = getelementptr i8, i8* %scevgep115116, i64 1 - %229 = mul i32 %22, %conv2.i.i.2 - %230 = trunc i64 %2 to i32 - %231 = shl i32 %230, 5 - %232 = add i32 %229, %231 - %233 = sext i32 %232 to i64 - %scevgep118 = getelementptr float, float* %6, i64 %233 - %scevgep118119 = bitcast float* %scevgep118 to i8* - %234 = add nsw i64 %233, 32 - %scevgep120 = getelementptr float, float* %6, i64 %234 - %235 = sext i32 %231 to i64 - %scevgep122 = getelementptr float, float* %9, i64 %235 - %236 = add nsw i64 %235, 32 - %scevgep124 = getelementptr float, float* %9, i64 %236 - %scevgep126 = getelementptr float, float* %18, i64 %228 - %scevgep126127 = bitcast float* %scevgep126 to i8* - %uglygep128 = getelementptr i8, i8* %scevgep126127, i64 1 - %scevgep129 = getelementptr float, float* %12, i64 %235 - %scevgep131 = getelementptr float, float* %12, i64 %236 - %bound0134 = icmp ult float* %arrayidx.i.i.2, %scevgep120 - %bound1135 = icmp ugt i8* %uglygep117, %scevgep118119 - %found.conflict136 = and i1 %bound0134, %bound1135 - %bound0137 = icmp ult float* %scevgep122, %scevgep120 - %bound1138 = icmp ult float* %scevgep118, %scevgep124 - %found.conflict139 = and i1 %bound0137, %bound1138 - %conflict.rdx140 = or i1 %found.conflict136, %found.conflict139 - %bound0142 = icmp ult float* %arrayidx9.i.i.2, %scevgep120 - %bound1143 = icmp ugt i8* %uglygep128, %scevgep118119 - %found.conflict144 = and i1 %bound0142, %bound1143 - %conflict.rdx145 = or i1 %conflict.rdx140, %found.conflict144 - %bound0146 = icmp ult float* %scevgep129, %scevgep120 - %bound1147 = icmp ult float* %scevgep118, %scevgep131 - %found.conflict148 = and i1 %bound0146, %bound1147 - %conflict.rdx149 = or i1 %conflict.rdx145, %found.conflict148 - br i1 %conflict.rdx149, label %pregion_for_entry.entry.i.i.us.2.preheader, label %vector.ph152 - -vector.ph152: ; preds = %vector.memcheck151 - %broadcast.splatinsert159 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat160 = shufflevector <8 x i64> %broadcast.splatinsert159, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert161 = insertelement <8 x i32> undef, i32 %22, i32 0 - %broadcast.splat162 = shufflevector <8 x i32> %broadcast.splatinsert161, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert163 = insertelement <8 x float*> undef, float* %arrayidx.i.i.2, i32 0 - %broadcast.splat164 = shufflevector <8 x float*> %broadcast.splatinsert163, <8 x float*> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert167 = insertelement <8 x float*> undef, float* %arrayidx9.i.i.2, i32 0 - %broadcast.splat168 = shufflevector <8 x float*> %broadcast.splatinsert167, <8 x float*> undef, <8 x i32> zeroinitializer - %237 = or <8 x i64> %broadcast.splat160, - %238 = trunc <8 x i64> %237 to <8 x i32> - %239 = icmp sgt <8 x i32> %broadcast.splat162, %238 - %wide.masked.gather165 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat164, i32 4, <8 x i1> %239, <8 x float> undef), !tbaa !12, !alias.scope !235, !noalias !238 - %240 = extractelement <8 x i64> %237, i32 0 - %241 = shl i64 %240, 32 - %242 = ashr exact i64 %241, 32 - %243 = getelementptr inbounds float, float* %9, i64 %242 - %244 = bitcast float* %243 to <8 x float>* - %wide.masked.load166 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %244, i32 4, <8 x i1> %239, <8 x float> undef), !tbaa !12, !alias.scope !240, !noalias !238 - %wide.masked.gather169 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat168, i32 4, <8 x i1> %239, <8 x float> undef), !tbaa !12, !alias.scope !242, !noalias !238 - %245 = getelementptr inbounds float, float* %12, i64 %242 - %246 = bitcast float* %245 to <8 x float>* - %wide.masked.load170 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %246, i32 4, <8 x i1> %239, <8 x float> undef), !tbaa !12, !alias.scope !244, !noalias !238 - %247 = fmul <8 x float> %wide.masked.gather169, %wide.masked.load170 - %248 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather165, <8 x float> %wide.masked.load166, <8 x float> %247) - %249 = extractelement <8 x i32> %238, i32 0 - %250 = add nsw i32 %mul.i.i.2, %249 - %251 = sext i32 %250 to i64 - %252 = getelementptr inbounds float, float* %6, i64 %251 - %253 = bitcast float* %252 to <8 x float>* - %wide.masked.load171 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %253, i32 4, <8 x i1> %239, <8 x float> undef), !tbaa !12, !alias.scope !238 - %254 = fadd <8 x float> %wide.masked.load171, %248 - %255 = bitcast float* %252 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %254, <8 x float>* %255, i32 4, <8 x i1> %239), !tbaa !12, !alias.scope !238, !llvm.access.group !27 - %256 = or <8 x i64> %broadcast.splat160, - %257 = trunc <8 x i64> %256 to <8 x i32> - %258 = icmp sgt <8 x i32> %broadcast.splat162, %257 - %wide.masked.gather165.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat164, i32 4, <8 x i1> %258, <8 x float> undef), !tbaa !12, !alias.scope !235, !noalias !238 - %259 = extractelement <8 x i64> %256, i32 0 - %260 = shl i64 %259, 32 - %261 = ashr exact i64 %260, 32 - %262 = getelementptr inbounds float, float* %9, i64 %261 - %263 = bitcast float* %262 to <8 x float>* - %wide.masked.load166.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %263, i32 4, <8 x i1> %258, <8 x float> undef), !tbaa !12, !alias.scope !240, !noalias !238 - %wide.masked.gather169.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat168, i32 4, <8 x i1> %258, <8 x float> undef), !tbaa !12, !alias.scope !242, !noalias !238 - %264 = getelementptr inbounds float, float* %12, i64 %261 - %265 = bitcast float* %264 to <8 x float>* - %wide.masked.load170.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %265, i32 4, <8 x i1> %258, <8 x float> undef), !tbaa !12, !alias.scope !244, !noalias !238 - %266 = fmul <8 x float> %wide.masked.gather169.1, %wide.masked.load170.1 - %267 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather165.1, <8 x float> %wide.masked.load166.1, <8 x float> %266) - %268 = extractelement <8 x i32> %257, i32 0 - %269 = add nsw i32 %mul.i.i.2, %268 - %270 = sext i32 %269 to i64 - %271 = getelementptr inbounds float, float* %6, i64 %270 - %272 = bitcast float* %271 to <8 x float>* - %wide.masked.load171.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %272, i32 4, <8 x i1> %258, <8 x float> undef), !tbaa !12, !alias.scope !238 - %273 = fadd <8 x float> %wide.masked.load171.1, %267 - %274 = bitcast float* %271 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %273, <8 x float>* %274, i32 4, <8 x i1> %258), !tbaa !12, !alias.scope !238, !llvm.access.group !27 - %275 = or <8 x i64> %broadcast.splat160, - %276 = trunc <8 x i64> %275 to <8 x i32> - %277 = icmp sgt <8 x i32> %broadcast.splat162, %276 - %wide.masked.gather165.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat164, i32 4, <8 x i1> %277, <8 x float> undef), !tbaa !12, !alias.scope !235, !noalias !238 - %278 = extractelement <8 x i64> %275, i32 0 - %279 = shl i64 %278, 32 - %280 = ashr exact i64 %279, 32 - %281 = getelementptr inbounds float, float* %9, i64 %280 - %282 = bitcast float* %281 to <8 x float>* - %wide.masked.load166.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %282, i32 4, <8 x i1> %277, <8 x float> undef), !tbaa !12, !alias.scope !240, !noalias !238 - %wide.masked.gather169.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat168, i32 4, <8 x i1> %277, <8 x float> undef), !tbaa !12, !alias.scope !242, !noalias !238 - %283 = getelementptr inbounds float, float* %12, i64 %280 - %284 = bitcast float* %283 to <8 x float>* - %wide.masked.load170.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %284, i32 4, <8 x i1> %277, <8 x float> undef), !tbaa !12, !alias.scope !244, !noalias !238 - %285 = fmul <8 x float> %wide.masked.gather169.2, %wide.masked.load170.2 - %286 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather165.2, <8 x float> %wide.masked.load166.2, <8 x float> %285) - %287 = extractelement <8 x i32> %276, i32 0 - %288 = add nsw i32 %mul.i.i.2, %287 - %289 = sext i32 %288 to i64 - %290 = getelementptr inbounds float, float* %6, i64 %289 - %291 = bitcast float* %290 to <8 x float>* - %wide.masked.load171.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %291, i32 4, <8 x i1> %277, <8 x float> undef), !tbaa !12, !alias.scope !238 - %292 = fadd <8 x float> %wide.masked.load171.2, %286 - %293 = bitcast float* %290 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %292, <8 x float>* %293, i32 4, <8 x i1> %277), !tbaa !12, !alias.scope !238, !llvm.access.group !27 - %294 = or <8 x i64> %broadcast.splat160, - %295 = trunc <8 x i64> %294 to <8 x i32> - %296 = icmp sgt <8 x i32> %broadcast.splat162, %295 - %wide.masked.gather165.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat164, i32 4, <8 x i1> %296, <8 x float> undef), !tbaa !12, !alias.scope !235, !noalias !238 - %297 = extractelement <8 x i64> %294, i32 0 - %298 = shl i64 %297, 32 - %299 = ashr exact i64 %298, 32 - %300 = getelementptr inbounds float, float* %9, i64 %299 - %301 = bitcast float* %300 to <8 x float>* - %wide.masked.load166.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %301, i32 4, <8 x i1> %296, <8 x float> undef), !tbaa !12, !alias.scope !240, !noalias !238 - %wide.masked.gather169.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat168, i32 4, <8 x i1> %296, <8 x float> undef), !tbaa !12, !alias.scope !242, !noalias !238 - %302 = getelementptr inbounds float, float* %12, i64 %299 - %303 = bitcast float* %302 to <8 x float>* - %wide.masked.load170.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %303, i32 4, <8 x i1> %296, <8 x float> undef), !tbaa !12, !alias.scope !244, !noalias !238 - %304 = fmul <8 x float> %wide.masked.gather169.3, %wide.masked.load170.3 - %305 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather165.3, <8 x float> %wide.masked.load166.3, <8 x float> %304) - %306 = extractelement <8 x i32> %295, i32 0 - %307 = add nsw i32 %mul.i.i.2, %306 - %308 = sext i32 %307 to i64 - %309 = getelementptr inbounds float, float* %6, i64 %308 - %310 = bitcast float* %309 to <8 x float>* - %wide.masked.load171.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %310, i32 4, <8 x i1> %296, <8 x float> undef), !tbaa !12, !alias.scope !238 - %311 = fadd <8 x float> %wide.masked.load171.3, %305 - %312 = bitcast float* %309 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %311, <8 x float>* %312, i32 4, <8 x i1> %296), !tbaa !12, !alias.scope !238, !llvm.access.group !27 - br label %pregion_for_end.i.i.2 - -pregion_for_entry.entry.i.i.us.2: ; preds = %if.end.r_exit.i.i.us.2.1, %pregion_for_entry.entry.i.i.us.2.preheader - %_local_id_x.i.0.us.2 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.2.preheader ], [ %851, %if.end.r_exit.i.i.us.2.1 ] - %add1.i.i.i.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.2, %mul.i.i.i - %conv.i.i.us.2 = trunc i64 %add1.i.i.i.us.2 to i32 - %cmp4.i.i.us.2 = icmp sgt i32 %22, %conv.i.i.us.2 - br i1 %cmp4.i.i.us.2, label %if.then.i.i.us.2, label %if.end.r_exit.i.i.us.2 - -if.then.i.i.us.2: ; preds = %pregion_for_entry.entry.i.i.us.2 - %313 = load float, float* %arrayidx.i.i.2, align 4, !tbaa !12 - %sext26.i.i.us.2 = shl i64 %add1.i.i.i.us.2, 32 - %idxprom6.i.i.us.2 = ashr exact i64 %sext26.i.i.us.2, 32 - %arrayidx7.i.i.us.2 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.2 - %314 = load float, float* %arrayidx7.i.i.us.2, align 4, !tbaa !12 - %315 = load float, float* %arrayidx9.i.i.2, align 4, !tbaa !12 - %arrayidx11.i.i.us.2 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.2 - %316 = load float, float* %arrayidx11.i.i.us.2, align 4, !tbaa !12 - %mul12.i.i.us.2 = fmul float %315, %316 - %317 = tail call float @llvm.fmuladd.f32(float %313, float %314, float %mul12.i.i.us.2) #6 - %add.i.i.us.2 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2 - %idxprom13.i.i.us.2 = sext i32 %add.i.i.us.2 to i64 - %arrayidx14.i.i.us.2 = getelementptr inbounds float, float* %6, i64 %idxprom13.i.i.us.2 - %318 = load float, float* %arrayidx14.i.i.us.2, align 4, !tbaa !12 - %add15.i.i.us.2 = fadd float %318, %317 - store float %add15.i.i.us.2, float* %arrayidx14.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.2 - -if.end.r_exit.i.i.us.2: ; preds = %if.then.i.i.us.2, %pregion_for_entry.entry.i.i.us.2 - %319 = or i64 %_local_id_x.i.0.us.2, 1 - %add1.i.i.i.us.2.1 = add nuw nsw i64 %319, %mul.i.i.i - %conv.i.i.us.2.1 = trunc i64 %add1.i.i.i.us.2.1 to i32 - %cmp4.i.i.us.2.1 = icmp sgt i32 %22, %conv.i.i.us.2.1 - br i1 %cmp4.i.i.us.2.1, label %if.then.i.i.us.2.1, label %if.end.r_exit.i.i.us.2.1 - -pregion_for_end.i.i.2.loopexit: ; preds = %if.end.r_exit.i.i.us.2.1 - br label %pregion_for_end.i.i.2 - -pregion_for_end.i.i.2: ; preds = %pregion_for_end.i.i.2.loopexit, %vector.ph152, %pregion_for_end.i.i.1 - %add6.i.i.i.3 = or i64 %mul3.i.i.i, 3 - %conv2.i.i.3 = trunc i64 %add6.i.i.i.3 to i32 - %cmp.i.i.3 = icmp sgt i32 %22, %conv2.i.i.3 - %sext.i.i.3 = shl i64 %add6.i.i.i.3, 32 - %idxprom.i.i.3 = ashr exact i64 %sext.i.i.3, 32 - %arrayidx.i.i.3 = getelementptr inbounds float, float* %15, i64 %idxprom.i.i.3 - %arrayidx9.i.i.3 = getelementptr inbounds float, float* %18, i64 %idxprom.i.i.3 - %mul.i.i.3 = mul nsw i32 %22, %conv2.i.i.3 - br i1 %cmp.i.i.3, label %vector.scevcheck179, label %pregion_for_end.i.i.3 - -vector.scevcheck179: ; preds = %pregion_for_end.i.i.2 - %320 = mul i32 %22, %conv2.i.i.3 - %321 = trunc i64 %2 to i32 - %322 = shl i32 %321, 5 - %323 = add i32 %320, %322 - %324 = icmp sgt i32 %323, 2147483616 - br i1 %324, label %pregion_for_entry.entry.i.i.us.3.preheader, label %vector.memcheck217 - -pregion_for_entry.entry.i.i.us.3.preheader: ; preds = %vector.memcheck217, %vector.scevcheck179 - br label %pregion_for_entry.entry.i.i.us.3 - -vector.memcheck217: ; preds = %vector.scevcheck179 - %sext505 = shl i64 %3, 35 - %325 = ashr exact i64 %sext505, 32 - %326 = or i64 %325, 3 - %scevgep181 = getelementptr float, float* %15, i64 %326 - %scevgep181182 = bitcast float* %scevgep181 to i8* - %uglygep183 = getelementptr i8, i8* %scevgep181182, i64 1 - %327 = mul i32 %22, %conv2.i.i.3 - %328 = trunc i64 %2 to i32 - %329 = shl i32 %328, 5 - %330 = add i32 %327, %329 - %331 = sext i32 %330 to i64 - %scevgep184 = getelementptr float, float* %6, i64 %331 - %scevgep184185 = bitcast float* %scevgep184 to i8* - %332 = add nsw i64 %331, 32 - %scevgep186 = getelementptr float, float* %6, i64 %332 - %333 = sext i32 %329 to i64 - %scevgep188 = getelementptr float, float* %9, i64 %333 - %334 = add nsw i64 %333, 32 - %scevgep190 = getelementptr float, float* %9, i64 %334 - %scevgep192 = getelementptr float, float* %18, i64 %326 - %scevgep192193 = bitcast float* %scevgep192 to i8* - %uglygep194 = getelementptr i8, i8* %scevgep192193, i64 1 - %scevgep195 = getelementptr float, float* %12, i64 %333 - %scevgep197 = getelementptr float, float* %12, i64 %334 - %bound0200 = icmp ult float* %arrayidx.i.i.3, %scevgep186 - %bound1201 = icmp ugt i8* %uglygep183, %scevgep184185 - %found.conflict202 = and i1 %bound0200, %bound1201 - %bound0203 = icmp ult float* %scevgep188, %scevgep186 - %bound1204 = icmp ult float* %scevgep184, %scevgep190 - %found.conflict205 = and i1 %bound0203, %bound1204 - %conflict.rdx206 = or i1 %found.conflict202, %found.conflict205 - %bound0208 = icmp ult float* %arrayidx9.i.i.3, %scevgep186 - %bound1209 = icmp ugt i8* %uglygep194, %scevgep184185 - %found.conflict210 = and i1 %bound0208, %bound1209 - %conflict.rdx211 = or i1 %conflict.rdx206, %found.conflict210 - %bound0212 = icmp ult float* %scevgep195, %scevgep186 - %bound1213 = icmp ult float* %scevgep184, %scevgep197 - %found.conflict214 = and i1 %bound0212, %bound1213 - %conflict.rdx215 = or i1 %conflict.rdx211, %found.conflict214 - br i1 %conflict.rdx215, label %pregion_for_entry.entry.i.i.us.3.preheader, label %vector.ph218 - -vector.ph218: ; preds = %vector.memcheck217 - %broadcast.splatinsert225 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat226 = shufflevector <8 x i64> %broadcast.splatinsert225, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert227 = insertelement <8 x i32> undef, i32 %22, i32 0 - %broadcast.splat228 = shufflevector <8 x i32> %broadcast.splatinsert227, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert229 = insertelement <8 x float*> undef, float* %arrayidx.i.i.3, i32 0 - %broadcast.splat230 = shufflevector <8 x float*> %broadcast.splatinsert229, <8 x float*> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert233 = insertelement <8 x float*> undef, float* %arrayidx9.i.i.3, i32 0 - %broadcast.splat234 = shufflevector <8 x float*> %broadcast.splatinsert233, <8 x float*> undef, <8 x i32> zeroinitializer - %335 = or <8 x i64> %broadcast.splat226, - %336 = trunc <8 x i64> %335 to <8 x i32> - %337 = icmp sgt <8 x i32> %broadcast.splat228, %336 - %wide.masked.gather231 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat230, i32 4, <8 x i1> %337, <8 x float> undef), !tbaa !12, !alias.scope !246, !noalias !249 - %338 = extractelement <8 x i64> %335, i32 0 - %339 = shl i64 %338, 32 - %340 = ashr exact i64 %339, 32 - %341 = getelementptr inbounds float, float* %9, i64 %340 - %342 = bitcast float* %341 to <8 x float>* - %wide.masked.load232 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %342, i32 4, <8 x i1> %337, <8 x float> undef), !tbaa !12, !alias.scope !251, !noalias !249 - %wide.masked.gather235 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat234, i32 4, <8 x i1> %337, <8 x float> undef), !tbaa !12, !alias.scope !253, !noalias !249 - %343 = getelementptr inbounds float, float* %12, i64 %340 - %344 = bitcast float* %343 to <8 x float>* - %wide.masked.load236 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %344, i32 4, <8 x i1> %337, <8 x float> undef), !tbaa !12, !alias.scope !255, !noalias !249 - %345 = fmul <8 x float> %wide.masked.gather235, %wide.masked.load236 - %346 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather231, <8 x float> %wide.masked.load232, <8 x float> %345) - %347 = extractelement <8 x i32> %336, i32 0 - %348 = add nsw i32 %mul.i.i.3, %347 - %349 = sext i32 %348 to i64 - %350 = getelementptr inbounds float, float* %6, i64 %349 - %351 = bitcast float* %350 to <8 x float>* - %wide.masked.load237 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %351, i32 4, <8 x i1> %337, <8 x float> undef), !tbaa !12, !alias.scope !249 - %352 = fadd <8 x float> %wide.masked.load237, %346 - %353 = bitcast float* %350 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %352, <8 x float>* %353, i32 4, <8 x i1> %337), !tbaa !12, !alias.scope !249, !llvm.access.group !27 - %354 = or <8 x i64> %broadcast.splat226, - %355 = trunc <8 x i64> %354 to <8 x i32> - %356 = icmp sgt <8 x i32> %broadcast.splat228, %355 - %wide.masked.gather231.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat230, i32 4, <8 x i1> %356, <8 x float> undef), !tbaa !12, !alias.scope !246, !noalias !249 - %357 = extractelement <8 x i64> %354, i32 0 - %358 = shl i64 %357, 32 - %359 = ashr exact i64 %358, 32 - %360 = getelementptr inbounds float, float* %9, i64 %359 - %361 = bitcast float* %360 to <8 x float>* - %wide.masked.load232.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %361, i32 4, <8 x i1> %356, <8 x float> undef), !tbaa !12, !alias.scope !251, !noalias !249 - %wide.masked.gather235.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat234, i32 4, <8 x i1> %356, <8 x float> undef), !tbaa !12, !alias.scope !253, !noalias !249 - %362 = getelementptr inbounds float, float* %12, i64 %359 - %363 = bitcast float* %362 to <8 x float>* - %wide.masked.load236.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %363, i32 4, <8 x i1> %356, <8 x float> undef), !tbaa !12, !alias.scope !255, !noalias !249 - %364 = fmul <8 x float> %wide.masked.gather235.1, %wide.masked.load236.1 - %365 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather231.1, <8 x float> %wide.masked.load232.1, <8 x float> %364) - %366 = extractelement <8 x i32> %355, i32 0 - %367 = add nsw i32 %mul.i.i.3, %366 - %368 = sext i32 %367 to i64 - %369 = getelementptr inbounds float, float* %6, i64 %368 - %370 = bitcast float* %369 to <8 x float>* - %wide.masked.load237.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %370, i32 4, <8 x i1> %356, <8 x float> undef), !tbaa !12, !alias.scope !249 - %371 = fadd <8 x float> %wide.masked.load237.1, %365 - %372 = bitcast float* %369 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %371, <8 x float>* %372, i32 4, <8 x i1> %356), !tbaa !12, !alias.scope !249, !llvm.access.group !27 - %373 = or <8 x i64> %broadcast.splat226, - %374 = trunc <8 x i64> %373 to <8 x i32> - %375 = icmp sgt <8 x i32> %broadcast.splat228, %374 - %wide.masked.gather231.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat230, i32 4, <8 x i1> %375, <8 x float> undef), !tbaa !12, !alias.scope !246, !noalias !249 - %376 = extractelement <8 x i64> %373, i32 0 - %377 = shl i64 %376, 32 - %378 = ashr exact i64 %377, 32 - %379 = getelementptr inbounds float, float* %9, i64 %378 - %380 = bitcast float* %379 to <8 x float>* - %wide.masked.load232.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %380, i32 4, <8 x i1> %375, <8 x float> undef), !tbaa !12, !alias.scope !251, !noalias !249 - %wide.masked.gather235.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat234, i32 4, <8 x i1> %375, <8 x float> undef), !tbaa !12, !alias.scope !253, !noalias !249 - %381 = getelementptr inbounds float, float* %12, i64 %378 - %382 = bitcast float* %381 to <8 x float>* - %wide.masked.load236.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %382, i32 4, <8 x i1> %375, <8 x float> undef), !tbaa !12, !alias.scope !255, !noalias !249 - %383 = fmul <8 x float> %wide.masked.gather235.2, %wide.masked.load236.2 - %384 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather231.2, <8 x float> %wide.masked.load232.2, <8 x float> %383) - %385 = extractelement <8 x i32> %374, i32 0 - %386 = add nsw i32 %mul.i.i.3, %385 - %387 = sext i32 %386 to i64 - %388 = getelementptr inbounds float, float* %6, i64 %387 - %389 = bitcast float* %388 to <8 x float>* - %wide.masked.load237.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %389, i32 4, <8 x i1> %375, <8 x float> undef), !tbaa !12, !alias.scope !249 - %390 = fadd <8 x float> %wide.masked.load237.2, %384 - %391 = bitcast float* %388 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %390, <8 x float>* %391, i32 4, <8 x i1> %375), !tbaa !12, !alias.scope !249, !llvm.access.group !27 - %392 = or <8 x i64> %broadcast.splat226, - %393 = trunc <8 x i64> %392 to <8 x i32> - %394 = icmp sgt <8 x i32> %broadcast.splat228, %393 - %wide.masked.gather231.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat230, i32 4, <8 x i1> %394, <8 x float> undef), !tbaa !12, !alias.scope !246, !noalias !249 - %395 = extractelement <8 x i64> %392, i32 0 - %396 = shl i64 %395, 32 - %397 = ashr exact i64 %396, 32 - %398 = getelementptr inbounds float, float* %9, i64 %397 - %399 = bitcast float* %398 to <8 x float>* - %wide.masked.load232.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %399, i32 4, <8 x i1> %394, <8 x float> undef), !tbaa !12, !alias.scope !251, !noalias !249 - %wide.masked.gather235.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat234, i32 4, <8 x i1> %394, <8 x float> undef), !tbaa !12, !alias.scope !253, !noalias !249 - %400 = getelementptr inbounds float, float* %12, i64 %397 - %401 = bitcast float* %400 to <8 x float>* - %wide.masked.load236.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %401, i32 4, <8 x i1> %394, <8 x float> undef), !tbaa !12, !alias.scope !255, !noalias !249 - %402 = fmul <8 x float> %wide.masked.gather235.3, %wide.masked.load236.3 - %403 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather231.3, <8 x float> %wide.masked.load232.3, <8 x float> %402) - %404 = extractelement <8 x i32> %393, i32 0 - %405 = add nsw i32 %mul.i.i.3, %404 - %406 = sext i32 %405 to i64 - %407 = getelementptr inbounds float, float* %6, i64 %406 - %408 = bitcast float* %407 to <8 x float>* - %wide.masked.load237.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %408, i32 4, <8 x i1> %394, <8 x float> undef), !tbaa !12, !alias.scope !249 - %409 = fadd <8 x float> %wide.masked.load237.3, %403 - %410 = bitcast float* %407 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %409, <8 x float>* %410, i32 4, <8 x i1> %394), !tbaa !12, !alias.scope !249, !llvm.access.group !27 - br label %pregion_for_end.i.i.3 - -pregion_for_entry.entry.i.i.us.3: ; preds = %if.end.r_exit.i.i.us.3.1, %pregion_for_entry.entry.i.i.us.3.preheader - %_local_id_x.i.0.us.3 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.3.preheader ], [ %844, %if.end.r_exit.i.i.us.3.1 ] - %add1.i.i.i.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.3, %mul.i.i.i - %conv.i.i.us.3 = trunc i64 %add1.i.i.i.us.3 to i32 - %cmp4.i.i.us.3 = icmp sgt i32 %22, %conv.i.i.us.3 - br i1 %cmp4.i.i.us.3, label %if.then.i.i.us.3, label %if.end.r_exit.i.i.us.3 - -if.then.i.i.us.3: ; preds = %pregion_for_entry.entry.i.i.us.3 - %411 = load float, float* %arrayidx.i.i.3, align 4, !tbaa !12 - %sext26.i.i.us.3 = shl i64 %add1.i.i.i.us.3, 32 - %idxprom6.i.i.us.3 = ashr exact i64 %sext26.i.i.us.3, 32 - %arrayidx7.i.i.us.3 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.3 - %412 = load float, float* %arrayidx7.i.i.us.3, align 4, !tbaa !12 - %413 = load float, float* %arrayidx9.i.i.3, align 4, !tbaa !12 - %arrayidx11.i.i.us.3 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.3 - %414 = load float, float* %arrayidx11.i.i.us.3, align 4, !tbaa !12 - %mul12.i.i.us.3 = fmul float %413, %414 - %415 = tail call float @llvm.fmuladd.f32(float %411, float %412, float %mul12.i.i.us.3) #6 - %add.i.i.us.3 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3 - %idxprom13.i.i.us.3 = sext i32 %add.i.i.us.3 to i64 - %arrayidx14.i.i.us.3 = getelementptr inbounds float, float* %6, i64 %idxprom13.i.i.us.3 - %416 = load float, float* %arrayidx14.i.i.us.3, align 4, !tbaa !12 - %add15.i.i.us.3 = fadd float %416, %415 - store float %add15.i.i.us.3, float* %arrayidx14.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.3 - -if.end.r_exit.i.i.us.3: ; preds = %if.then.i.i.us.3, %pregion_for_entry.entry.i.i.us.3 - %417 = or i64 %_local_id_x.i.0.us.3, 1 - %add1.i.i.i.us.3.1 = add nuw nsw i64 %417, %mul.i.i.i - %conv.i.i.us.3.1 = trunc i64 %add1.i.i.i.us.3.1 to i32 - %cmp4.i.i.us.3.1 = icmp sgt i32 %22, %conv.i.i.us.3.1 - br i1 %cmp4.i.i.us.3.1, label %if.then.i.i.us.3.1, label %if.end.r_exit.i.i.us.3.1 - -pregion_for_end.i.i.3.loopexit: ; preds = %if.end.r_exit.i.i.us.3.1 - br label %pregion_for_end.i.i.3 - -pregion_for_end.i.i.3: ; preds = %pregion_for_end.i.i.3.loopexit, %vector.ph218, %pregion_for_end.i.i.2 - %add6.i.i.i.4 = or i64 %mul3.i.i.i, 4 - %conv2.i.i.4 = trunc i64 %add6.i.i.i.4 to i32 - %cmp.i.i.4 = icmp sgt i32 %22, %conv2.i.i.4 - %sext.i.i.4 = shl i64 %add6.i.i.i.4, 32 - %idxprom.i.i.4 = ashr exact i64 %sext.i.i.4, 32 - %arrayidx.i.i.4 = getelementptr inbounds float, float* %15, i64 %idxprom.i.i.4 - %arrayidx9.i.i.4 = getelementptr inbounds float, float* %18, i64 %idxprom.i.i.4 - %mul.i.i.4 = mul nsw i32 %22, %conv2.i.i.4 - br i1 %cmp.i.i.4, label %vector.scevcheck245, label %pregion_for_end.i.i.4 - -vector.scevcheck245: ; preds = %pregion_for_end.i.i.3 - %418 = mul i32 %22, %conv2.i.i.4 - %419 = trunc i64 %2 to i32 - %420 = shl i32 %419, 5 - %421 = add i32 %418, %420 - %422 = icmp sgt i32 %421, 2147483616 - br i1 %422, label %pregion_for_entry.entry.i.i.us.4.preheader, label %vector.memcheck283 - -pregion_for_entry.entry.i.i.us.4.preheader: ; preds = %vector.memcheck283, %vector.scevcheck245 - br label %pregion_for_entry.entry.i.i.us.4 - -vector.memcheck283: ; preds = %vector.scevcheck245 - %sext504 = shl i64 %3, 35 - %423 = ashr exact i64 %sext504, 32 - %424 = or i64 %423, 4 - %scevgep247 = getelementptr float, float* %15, i64 %424 - %scevgep247248 = bitcast float* %scevgep247 to i8* - %uglygep249 = getelementptr i8, i8* %scevgep247248, i64 1 - %425 = mul i32 %22, %conv2.i.i.4 - %426 = trunc i64 %2 to i32 - %427 = shl i32 %426, 5 - %428 = add i32 %425, %427 - %429 = sext i32 %428 to i64 - %scevgep250 = getelementptr float, float* %6, i64 %429 - %scevgep250251 = bitcast float* %scevgep250 to i8* - %430 = add nsw i64 %429, 32 - %scevgep252 = getelementptr float, float* %6, i64 %430 - %431 = sext i32 %427 to i64 - %scevgep254 = getelementptr float, float* %9, i64 %431 - %432 = add nsw i64 %431, 32 - %scevgep256 = getelementptr float, float* %9, i64 %432 - %scevgep258 = getelementptr float, float* %18, i64 %424 - %scevgep258259 = bitcast float* %scevgep258 to i8* - %uglygep260 = getelementptr i8, i8* %scevgep258259, i64 1 - %scevgep261 = getelementptr float, float* %12, i64 %431 - %scevgep263 = getelementptr float, float* %12, i64 %432 - %bound0266 = icmp ult float* %arrayidx.i.i.4, %scevgep252 - %bound1267 = icmp ugt i8* %uglygep249, %scevgep250251 - %found.conflict268 = and i1 %bound0266, %bound1267 - %bound0269 = icmp ult float* %scevgep254, %scevgep252 - %bound1270 = icmp ult float* %scevgep250, %scevgep256 - %found.conflict271 = and i1 %bound0269, %bound1270 - %conflict.rdx272 = or i1 %found.conflict268, %found.conflict271 - %bound0274 = icmp ult float* %arrayidx9.i.i.4, %scevgep252 - %bound1275 = icmp ugt i8* %uglygep260, %scevgep250251 - %found.conflict276 = and i1 %bound0274, %bound1275 - %conflict.rdx277 = or i1 %conflict.rdx272, %found.conflict276 - %bound0278 = icmp ult float* %scevgep261, %scevgep252 - %bound1279 = icmp ult float* %scevgep250, %scevgep263 - %found.conflict280 = and i1 %bound0278, %bound1279 - %conflict.rdx281 = or i1 %conflict.rdx277, %found.conflict280 - br i1 %conflict.rdx281, label %pregion_for_entry.entry.i.i.us.4.preheader, label %vector.ph284 - -vector.ph284: ; preds = %vector.memcheck283 - %broadcast.splatinsert291 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat292 = shufflevector <8 x i64> %broadcast.splatinsert291, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert293 = insertelement <8 x i32> undef, i32 %22, i32 0 - %broadcast.splat294 = shufflevector <8 x i32> %broadcast.splatinsert293, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert295 = insertelement <8 x float*> undef, float* %arrayidx.i.i.4, i32 0 - %broadcast.splat296 = shufflevector <8 x float*> %broadcast.splatinsert295, <8 x float*> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert299 = insertelement <8 x float*> undef, float* %arrayidx9.i.i.4, i32 0 - %broadcast.splat300 = shufflevector <8 x float*> %broadcast.splatinsert299, <8 x float*> undef, <8 x i32> zeroinitializer - %433 = or <8 x i64> %broadcast.splat292, - %434 = trunc <8 x i64> %433 to <8 x i32> - %435 = icmp sgt <8 x i32> %broadcast.splat294, %434 - %wide.masked.gather297 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat296, i32 4, <8 x i1> %435, <8 x float> undef), !tbaa !12, !alias.scope !257, !noalias !260 - %436 = extractelement <8 x i64> %433, i32 0 - %437 = shl i64 %436, 32 - %438 = ashr exact i64 %437, 32 - %439 = getelementptr inbounds float, float* %9, i64 %438 - %440 = bitcast float* %439 to <8 x float>* - %wide.masked.load298 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %440, i32 4, <8 x i1> %435, <8 x float> undef), !tbaa !12, !alias.scope !262, !noalias !260 - %wide.masked.gather301 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat300, i32 4, <8 x i1> %435, <8 x float> undef), !tbaa !12, !alias.scope !264, !noalias !260 - %441 = getelementptr inbounds float, float* %12, i64 %438 - %442 = bitcast float* %441 to <8 x float>* - %wide.masked.load302 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %442, i32 4, <8 x i1> %435, <8 x float> undef), !tbaa !12, !alias.scope !266, !noalias !260 - %443 = fmul <8 x float> %wide.masked.gather301, %wide.masked.load302 - %444 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather297, <8 x float> %wide.masked.load298, <8 x float> %443) - %445 = extractelement <8 x i32> %434, i32 0 - %446 = add nsw i32 %mul.i.i.4, %445 - %447 = sext i32 %446 to i64 - %448 = getelementptr inbounds float, float* %6, i64 %447 - %449 = bitcast float* %448 to <8 x float>* - %wide.masked.load303 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %449, i32 4, <8 x i1> %435, <8 x float> undef), !tbaa !12, !alias.scope !260 - %450 = fadd <8 x float> %wide.masked.load303, %444 - %451 = bitcast float* %448 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %450, <8 x float>* %451, i32 4, <8 x i1> %435), !tbaa !12, !alias.scope !260, !llvm.access.group !27 - %452 = or <8 x i64> %broadcast.splat292, - %453 = trunc <8 x i64> %452 to <8 x i32> - %454 = icmp sgt <8 x i32> %broadcast.splat294, %453 - %wide.masked.gather297.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat296, i32 4, <8 x i1> %454, <8 x float> undef), !tbaa !12, !alias.scope !257, !noalias !260 - %455 = extractelement <8 x i64> %452, i32 0 - %456 = shl i64 %455, 32 - %457 = ashr exact i64 %456, 32 - %458 = getelementptr inbounds float, float* %9, i64 %457 - %459 = bitcast float* %458 to <8 x float>* - %wide.masked.load298.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %459, i32 4, <8 x i1> %454, <8 x float> undef), !tbaa !12, !alias.scope !262, !noalias !260 - %wide.masked.gather301.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat300, i32 4, <8 x i1> %454, <8 x float> undef), !tbaa !12, !alias.scope !264, !noalias !260 - %460 = getelementptr inbounds float, float* %12, i64 %457 - %461 = bitcast float* %460 to <8 x float>* - %wide.masked.load302.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %461, i32 4, <8 x i1> %454, <8 x float> undef), !tbaa !12, !alias.scope !266, !noalias !260 - %462 = fmul <8 x float> %wide.masked.gather301.1, %wide.masked.load302.1 - %463 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather297.1, <8 x float> %wide.masked.load298.1, <8 x float> %462) - %464 = extractelement <8 x i32> %453, i32 0 - %465 = add nsw i32 %mul.i.i.4, %464 - %466 = sext i32 %465 to i64 - %467 = getelementptr inbounds float, float* %6, i64 %466 - %468 = bitcast float* %467 to <8 x float>* - %wide.masked.load303.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %468, i32 4, <8 x i1> %454, <8 x float> undef), !tbaa !12, !alias.scope !260 - %469 = fadd <8 x float> %wide.masked.load303.1, %463 - %470 = bitcast float* %467 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %469, <8 x float>* %470, i32 4, <8 x i1> %454), !tbaa !12, !alias.scope !260, !llvm.access.group !27 - %471 = or <8 x i64> %broadcast.splat292, - %472 = trunc <8 x i64> %471 to <8 x i32> - %473 = icmp sgt <8 x i32> %broadcast.splat294, %472 - %wide.masked.gather297.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat296, i32 4, <8 x i1> %473, <8 x float> undef), !tbaa !12, !alias.scope !257, !noalias !260 - %474 = extractelement <8 x i64> %471, i32 0 - %475 = shl i64 %474, 32 - %476 = ashr exact i64 %475, 32 - %477 = getelementptr inbounds float, float* %9, i64 %476 - %478 = bitcast float* %477 to <8 x float>* - %wide.masked.load298.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %478, i32 4, <8 x i1> %473, <8 x float> undef), !tbaa !12, !alias.scope !262, !noalias !260 - %wide.masked.gather301.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat300, i32 4, <8 x i1> %473, <8 x float> undef), !tbaa !12, !alias.scope !264, !noalias !260 - %479 = getelementptr inbounds float, float* %12, i64 %476 - %480 = bitcast float* %479 to <8 x float>* - %wide.masked.load302.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %480, i32 4, <8 x i1> %473, <8 x float> undef), !tbaa !12, !alias.scope !266, !noalias !260 - %481 = fmul <8 x float> %wide.masked.gather301.2, %wide.masked.load302.2 - %482 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather297.2, <8 x float> %wide.masked.load298.2, <8 x float> %481) - %483 = extractelement <8 x i32> %472, i32 0 - %484 = add nsw i32 %mul.i.i.4, %483 - %485 = sext i32 %484 to i64 - %486 = getelementptr inbounds float, float* %6, i64 %485 - %487 = bitcast float* %486 to <8 x float>* - %wide.masked.load303.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %487, i32 4, <8 x i1> %473, <8 x float> undef), !tbaa !12, !alias.scope !260 - %488 = fadd <8 x float> %wide.masked.load303.2, %482 - %489 = bitcast float* %486 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %488, <8 x float>* %489, i32 4, <8 x i1> %473), !tbaa !12, !alias.scope !260, !llvm.access.group !27 - %490 = or <8 x i64> %broadcast.splat292, - %491 = trunc <8 x i64> %490 to <8 x i32> - %492 = icmp sgt <8 x i32> %broadcast.splat294, %491 - %wide.masked.gather297.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat296, i32 4, <8 x i1> %492, <8 x float> undef), !tbaa !12, !alias.scope !257, !noalias !260 - %493 = extractelement <8 x i64> %490, i32 0 - %494 = shl i64 %493, 32 - %495 = ashr exact i64 %494, 32 - %496 = getelementptr inbounds float, float* %9, i64 %495 - %497 = bitcast float* %496 to <8 x float>* - %wide.masked.load298.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %497, i32 4, <8 x i1> %492, <8 x float> undef), !tbaa !12, !alias.scope !262, !noalias !260 - %wide.masked.gather301.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat300, i32 4, <8 x i1> %492, <8 x float> undef), !tbaa !12, !alias.scope !264, !noalias !260 - %498 = getelementptr inbounds float, float* %12, i64 %495 - %499 = bitcast float* %498 to <8 x float>* - %wide.masked.load302.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %499, i32 4, <8 x i1> %492, <8 x float> undef), !tbaa !12, !alias.scope !266, !noalias !260 - %500 = fmul <8 x float> %wide.masked.gather301.3, %wide.masked.load302.3 - %501 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather297.3, <8 x float> %wide.masked.load298.3, <8 x float> %500) - %502 = extractelement <8 x i32> %491, i32 0 - %503 = add nsw i32 %mul.i.i.4, %502 - %504 = sext i32 %503 to i64 - %505 = getelementptr inbounds float, float* %6, i64 %504 - %506 = bitcast float* %505 to <8 x float>* - %wide.masked.load303.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %506, i32 4, <8 x i1> %492, <8 x float> undef), !tbaa !12, !alias.scope !260 - %507 = fadd <8 x float> %wide.masked.load303.3, %501 - %508 = bitcast float* %505 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %507, <8 x float>* %508, i32 4, <8 x i1> %492), !tbaa !12, !alias.scope !260, !llvm.access.group !27 - br label %pregion_for_end.i.i.4 - -pregion_for_entry.entry.i.i.us.4: ; preds = %if.end.r_exit.i.i.us.4.1, %pregion_for_entry.entry.i.i.us.4.preheader - %_local_id_x.i.0.us.4 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.4.preheader ], [ %837, %if.end.r_exit.i.i.us.4.1 ] - %add1.i.i.i.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.4, %mul.i.i.i - %conv.i.i.us.4 = trunc i64 %add1.i.i.i.us.4 to i32 - %cmp4.i.i.us.4 = icmp sgt i32 %22, %conv.i.i.us.4 - br i1 %cmp4.i.i.us.4, label %if.then.i.i.us.4, label %if.end.r_exit.i.i.us.4 - -if.then.i.i.us.4: ; preds = %pregion_for_entry.entry.i.i.us.4 - %509 = load float, float* %arrayidx.i.i.4, align 4, !tbaa !12 - %sext26.i.i.us.4 = shl i64 %add1.i.i.i.us.4, 32 - %idxprom6.i.i.us.4 = ashr exact i64 %sext26.i.i.us.4, 32 - %arrayidx7.i.i.us.4 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.4 - %510 = load float, float* %arrayidx7.i.i.us.4, align 4, !tbaa !12 - %511 = load float, float* %arrayidx9.i.i.4, align 4, !tbaa !12 - %arrayidx11.i.i.us.4 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.4 - %512 = load float, float* %arrayidx11.i.i.us.4, align 4, !tbaa !12 - %mul12.i.i.us.4 = fmul float %511, %512 - %513 = tail call float @llvm.fmuladd.f32(float %509, float %510, float %mul12.i.i.us.4) #6 - %add.i.i.us.4 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4 - %idxprom13.i.i.us.4 = sext i32 %add.i.i.us.4 to i64 - %arrayidx14.i.i.us.4 = getelementptr inbounds float, float* %6, i64 %idxprom13.i.i.us.4 - %514 = load float, float* %arrayidx14.i.i.us.4, align 4, !tbaa !12 - %add15.i.i.us.4 = fadd float %514, %513 - store float %add15.i.i.us.4, float* %arrayidx14.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.4 - -if.end.r_exit.i.i.us.4: ; preds = %if.then.i.i.us.4, %pregion_for_entry.entry.i.i.us.4 - %515 = or i64 %_local_id_x.i.0.us.4, 1 - %add1.i.i.i.us.4.1 = add nuw nsw i64 %515, %mul.i.i.i - %conv.i.i.us.4.1 = trunc i64 %add1.i.i.i.us.4.1 to i32 - %cmp4.i.i.us.4.1 = icmp sgt i32 %22, %conv.i.i.us.4.1 - br i1 %cmp4.i.i.us.4.1, label %if.then.i.i.us.4.1, label %if.end.r_exit.i.i.us.4.1 - -pregion_for_end.i.i.4.loopexit: ; preds = %if.end.r_exit.i.i.us.4.1 - br label %pregion_for_end.i.i.4 - -pregion_for_end.i.i.4: ; preds = %pregion_for_end.i.i.4.loopexit, %vector.ph284, %pregion_for_end.i.i.3 - %add6.i.i.i.5 = or i64 %mul3.i.i.i, 5 - %conv2.i.i.5 = trunc i64 %add6.i.i.i.5 to i32 - %cmp.i.i.5 = icmp sgt i32 %22, %conv2.i.i.5 - %sext.i.i.5 = shl i64 %add6.i.i.i.5, 32 - %idxprom.i.i.5 = ashr exact i64 %sext.i.i.5, 32 - %arrayidx.i.i.5 = getelementptr inbounds float, float* %15, i64 %idxprom.i.i.5 - %arrayidx9.i.i.5 = getelementptr inbounds float, float* %18, i64 %idxprom.i.i.5 - %mul.i.i.5 = mul nsw i32 %22, %conv2.i.i.5 - br i1 %cmp.i.i.5, label %vector.scevcheck311, label %pregion_for_end.i.i.5 - -vector.scevcheck311: ; preds = %pregion_for_end.i.i.4 - %516 = mul i32 %22, %conv2.i.i.5 - %517 = trunc i64 %2 to i32 - %518 = shl i32 %517, 5 - %519 = add i32 %516, %518 - %520 = icmp sgt i32 %519, 2147483616 - br i1 %520, label %pregion_for_entry.entry.i.i.us.5.preheader, label %vector.memcheck349 - -pregion_for_entry.entry.i.i.us.5.preheader: ; preds = %vector.memcheck349, %vector.scevcheck311 - br label %pregion_for_entry.entry.i.i.us.5 - -vector.memcheck349: ; preds = %vector.scevcheck311 - %sext503 = shl i64 %3, 35 - %521 = ashr exact i64 %sext503, 32 - %522 = or i64 %521, 5 - %scevgep313 = getelementptr float, float* %15, i64 %522 - %scevgep313314 = bitcast float* %scevgep313 to i8* - %uglygep315 = getelementptr i8, i8* %scevgep313314, i64 1 - %523 = mul i32 %22, %conv2.i.i.5 - %524 = trunc i64 %2 to i32 - %525 = shl i32 %524, 5 - %526 = add i32 %523, %525 - %527 = sext i32 %526 to i64 - %scevgep316 = getelementptr float, float* %6, i64 %527 - %scevgep316317 = bitcast float* %scevgep316 to i8* - %528 = add nsw i64 %527, 32 - %scevgep318 = getelementptr float, float* %6, i64 %528 - %529 = sext i32 %525 to i64 - %scevgep320 = getelementptr float, float* %9, i64 %529 - %530 = add nsw i64 %529, 32 - %scevgep322 = getelementptr float, float* %9, i64 %530 - %scevgep324 = getelementptr float, float* %18, i64 %522 - %scevgep324325 = bitcast float* %scevgep324 to i8* - %uglygep326 = getelementptr i8, i8* %scevgep324325, i64 1 - %scevgep327 = getelementptr float, float* %12, i64 %529 - %scevgep329 = getelementptr float, float* %12, i64 %530 - %bound0332 = icmp ult float* %arrayidx.i.i.5, %scevgep318 - %bound1333 = icmp ugt i8* %uglygep315, %scevgep316317 - %found.conflict334 = and i1 %bound0332, %bound1333 - %bound0335 = icmp ult float* %scevgep320, %scevgep318 - %bound1336 = icmp ult float* %scevgep316, %scevgep322 - %found.conflict337 = and i1 %bound0335, %bound1336 - %conflict.rdx338 = or i1 %found.conflict334, %found.conflict337 - %bound0340 = icmp ult float* %arrayidx9.i.i.5, %scevgep318 - %bound1341 = icmp ugt i8* %uglygep326, %scevgep316317 - %found.conflict342 = and i1 %bound0340, %bound1341 - %conflict.rdx343 = or i1 %conflict.rdx338, %found.conflict342 - %bound0344 = icmp ult float* %scevgep327, %scevgep318 - %bound1345 = icmp ult float* %scevgep316, %scevgep329 - %found.conflict346 = and i1 %bound0344, %bound1345 - %conflict.rdx347 = or i1 %conflict.rdx343, %found.conflict346 - br i1 %conflict.rdx347, label %pregion_for_entry.entry.i.i.us.5.preheader, label %vector.ph350 - -vector.ph350: ; preds = %vector.memcheck349 - %broadcast.splatinsert357 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat358 = shufflevector <8 x i64> %broadcast.splatinsert357, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert359 = insertelement <8 x i32> undef, i32 %22, i32 0 - %broadcast.splat360 = shufflevector <8 x i32> %broadcast.splatinsert359, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert361 = insertelement <8 x float*> undef, float* %arrayidx.i.i.5, i32 0 - %broadcast.splat362 = shufflevector <8 x float*> %broadcast.splatinsert361, <8 x float*> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert365 = insertelement <8 x float*> undef, float* %arrayidx9.i.i.5, i32 0 - %broadcast.splat366 = shufflevector <8 x float*> %broadcast.splatinsert365, <8 x float*> undef, <8 x i32> zeroinitializer - %531 = or <8 x i64> %broadcast.splat358, - %532 = trunc <8 x i64> %531 to <8 x i32> - %533 = icmp sgt <8 x i32> %broadcast.splat360, %532 - %wide.masked.gather363 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat362, i32 4, <8 x i1> %533, <8 x float> undef), !tbaa !12, !alias.scope !268, !noalias !271 - %534 = extractelement <8 x i64> %531, i32 0 - %535 = shl i64 %534, 32 - %536 = ashr exact i64 %535, 32 - %537 = getelementptr inbounds float, float* %9, i64 %536 - %538 = bitcast float* %537 to <8 x float>* - %wide.masked.load364 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %538, i32 4, <8 x i1> %533, <8 x float> undef), !tbaa !12, !alias.scope !273, !noalias !271 - %wide.masked.gather367 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat366, i32 4, <8 x i1> %533, <8 x float> undef), !tbaa !12, !alias.scope !275, !noalias !271 - %539 = getelementptr inbounds float, float* %12, i64 %536 - %540 = bitcast float* %539 to <8 x float>* - %wide.masked.load368 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %540, i32 4, <8 x i1> %533, <8 x float> undef), !tbaa !12, !alias.scope !277, !noalias !271 - %541 = fmul <8 x float> %wide.masked.gather367, %wide.masked.load368 - %542 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather363, <8 x float> %wide.masked.load364, <8 x float> %541) - %543 = extractelement <8 x i32> %532, i32 0 - %544 = add nsw i32 %mul.i.i.5, %543 - %545 = sext i32 %544 to i64 - %546 = getelementptr inbounds float, float* %6, i64 %545 - %547 = bitcast float* %546 to <8 x float>* - %wide.masked.load369 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %547, i32 4, <8 x i1> %533, <8 x float> undef), !tbaa !12, !alias.scope !271 - %548 = fadd <8 x float> %wide.masked.load369, %542 - %549 = bitcast float* %546 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %548, <8 x float>* %549, i32 4, <8 x i1> %533), !tbaa !12, !alias.scope !271, !llvm.access.group !27 - %550 = or <8 x i64> %broadcast.splat358, - %551 = trunc <8 x i64> %550 to <8 x i32> - %552 = icmp sgt <8 x i32> %broadcast.splat360, %551 - %wide.masked.gather363.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat362, i32 4, <8 x i1> %552, <8 x float> undef), !tbaa !12, !alias.scope !268, !noalias !271 - %553 = extractelement <8 x i64> %550, i32 0 - %554 = shl i64 %553, 32 - %555 = ashr exact i64 %554, 32 - %556 = getelementptr inbounds float, float* %9, i64 %555 - %557 = bitcast float* %556 to <8 x float>* - %wide.masked.load364.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %557, i32 4, <8 x i1> %552, <8 x float> undef), !tbaa !12, !alias.scope !273, !noalias !271 - %wide.masked.gather367.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat366, i32 4, <8 x i1> %552, <8 x float> undef), !tbaa !12, !alias.scope !275, !noalias !271 - %558 = getelementptr inbounds float, float* %12, i64 %555 - %559 = bitcast float* %558 to <8 x float>* - %wide.masked.load368.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %559, i32 4, <8 x i1> %552, <8 x float> undef), !tbaa !12, !alias.scope !277, !noalias !271 - %560 = fmul <8 x float> %wide.masked.gather367.1, %wide.masked.load368.1 - %561 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather363.1, <8 x float> %wide.masked.load364.1, <8 x float> %560) - %562 = extractelement <8 x i32> %551, i32 0 - %563 = add nsw i32 %mul.i.i.5, %562 - %564 = sext i32 %563 to i64 - %565 = getelementptr inbounds float, float* %6, i64 %564 - %566 = bitcast float* %565 to <8 x float>* - %wide.masked.load369.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %566, i32 4, <8 x i1> %552, <8 x float> undef), !tbaa !12, !alias.scope !271 - %567 = fadd <8 x float> %wide.masked.load369.1, %561 - %568 = bitcast float* %565 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %567, <8 x float>* %568, i32 4, <8 x i1> %552), !tbaa !12, !alias.scope !271, !llvm.access.group !27 - %569 = or <8 x i64> %broadcast.splat358, - %570 = trunc <8 x i64> %569 to <8 x i32> - %571 = icmp sgt <8 x i32> %broadcast.splat360, %570 - %wide.masked.gather363.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat362, i32 4, <8 x i1> %571, <8 x float> undef), !tbaa !12, !alias.scope !268, !noalias !271 - %572 = extractelement <8 x i64> %569, i32 0 - %573 = shl i64 %572, 32 - %574 = ashr exact i64 %573, 32 - %575 = getelementptr inbounds float, float* %9, i64 %574 - %576 = bitcast float* %575 to <8 x float>* - %wide.masked.load364.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %576, i32 4, <8 x i1> %571, <8 x float> undef), !tbaa !12, !alias.scope !273, !noalias !271 - %wide.masked.gather367.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat366, i32 4, <8 x i1> %571, <8 x float> undef), !tbaa !12, !alias.scope !275, !noalias !271 - %577 = getelementptr inbounds float, float* %12, i64 %574 - %578 = bitcast float* %577 to <8 x float>* - %wide.masked.load368.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %578, i32 4, <8 x i1> %571, <8 x float> undef), !tbaa !12, !alias.scope !277, !noalias !271 - %579 = fmul <8 x float> %wide.masked.gather367.2, %wide.masked.load368.2 - %580 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather363.2, <8 x float> %wide.masked.load364.2, <8 x float> %579) - %581 = extractelement <8 x i32> %570, i32 0 - %582 = add nsw i32 %mul.i.i.5, %581 - %583 = sext i32 %582 to i64 - %584 = getelementptr inbounds float, float* %6, i64 %583 - %585 = bitcast float* %584 to <8 x float>* - %wide.masked.load369.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %585, i32 4, <8 x i1> %571, <8 x float> undef), !tbaa !12, !alias.scope !271 - %586 = fadd <8 x float> %wide.masked.load369.2, %580 - %587 = bitcast float* %584 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %586, <8 x float>* %587, i32 4, <8 x i1> %571), !tbaa !12, !alias.scope !271, !llvm.access.group !27 - %588 = or <8 x i64> %broadcast.splat358, - %589 = trunc <8 x i64> %588 to <8 x i32> - %590 = icmp sgt <8 x i32> %broadcast.splat360, %589 - %wide.masked.gather363.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat362, i32 4, <8 x i1> %590, <8 x float> undef), !tbaa !12, !alias.scope !268, !noalias !271 - %591 = extractelement <8 x i64> %588, i32 0 - %592 = shl i64 %591, 32 - %593 = ashr exact i64 %592, 32 - %594 = getelementptr inbounds float, float* %9, i64 %593 - %595 = bitcast float* %594 to <8 x float>* - %wide.masked.load364.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %595, i32 4, <8 x i1> %590, <8 x float> undef), !tbaa !12, !alias.scope !273, !noalias !271 - %wide.masked.gather367.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat366, i32 4, <8 x i1> %590, <8 x float> undef), !tbaa !12, !alias.scope !275, !noalias !271 - %596 = getelementptr inbounds float, float* %12, i64 %593 - %597 = bitcast float* %596 to <8 x float>* - %wide.masked.load368.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %597, i32 4, <8 x i1> %590, <8 x float> undef), !tbaa !12, !alias.scope !277, !noalias !271 - %598 = fmul <8 x float> %wide.masked.gather367.3, %wide.masked.load368.3 - %599 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather363.3, <8 x float> %wide.masked.load364.3, <8 x float> %598) - %600 = extractelement <8 x i32> %589, i32 0 - %601 = add nsw i32 %mul.i.i.5, %600 - %602 = sext i32 %601 to i64 - %603 = getelementptr inbounds float, float* %6, i64 %602 - %604 = bitcast float* %603 to <8 x float>* - %wide.masked.load369.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %604, i32 4, <8 x i1> %590, <8 x float> undef), !tbaa !12, !alias.scope !271 - %605 = fadd <8 x float> %wide.masked.load369.3, %599 - %606 = bitcast float* %603 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %605, <8 x float>* %606, i32 4, <8 x i1> %590), !tbaa !12, !alias.scope !271, !llvm.access.group !27 - br label %pregion_for_end.i.i.5 - -pregion_for_entry.entry.i.i.us.5: ; preds = %if.end.r_exit.i.i.us.5.1, %pregion_for_entry.entry.i.i.us.5.preheader - %_local_id_x.i.0.us.5 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.5.preheader ], [ %830, %if.end.r_exit.i.i.us.5.1 ] - %add1.i.i.i.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.5, %mul.i.i.i - %conv.i.i.us.5 = trunc i64 %add1.i.i.i.us.5 to i32 - %cmp4.i.i.us.5 = icmp sgt i32 %22, %conv.i.i.us.5 - br i1 %cmp4.i.i.us.5, label %if.then.i.i.us.5, label %if.end.r_exit.i.i.us.5 - -if.then.i.i.us.5: ; preds = %pregion_for_entry.entry.i.i.us.5 - %607 = load float, float* %arrayidx.i.i.5, align 4, !tbaa !12 - %sext26.i.i.us.5 = shl i64 %add1.i.i.i.us.5, 32 - %idxprom6.i.i.us.5 = ashr exact i64 %sext26.i.i.us.5, 32 - %arrayidx7.i.i.us.5 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.5 - %608 = load float, float* %arrayidx7.i.i.us.5, align 4, !tbaa !12 - %609 = load float, float* %arrayidx9.i.i.5, align 4, !tbaa !12 - %arrayidx11.i.i.us.5 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.5 - %610 = load float, float* %arrayidx11.i.i.us.5, align 4, !tbaa !12 - %mul12.i.i.us.5 = fmul float %609, %610 - %611 = tail call float @llvm.fmuladd.f32(float %607, float %608, float %mul12.i.i.us.5) #6 - %add.i.i.us.5 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5 - %idxprom13.i.i.us.5 = sext i32 %add.i.i.us.5 to i64 - %arrayidx14.i.i.us.5 = getelementptr inbounds float, float* %6, i64 %idxprom13.i.i.us.5 - %612 = load float, float* %arrayidx14.i.i.us.5, align 4, !tbaa !12 - %add15.i.i.us.5 = fadd float %612, %611 - store float %add15.i.i.us.5, float* %arrayidx14.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.5 - -if.end.r_exit.i.i.us.5: ; preds = %if.then.i.i.us.5, %pregion_for_entry.entry.i.i.us.5 - %613 = or i64 %_local_id_x.i.0.us.5, 1 - %add1.i.i.i.us.5.1 = add nuw nsw i64 %613, %mul.i.i.i - %conv.i.i.us.5.1 = trunc i64 %add1.i.i.i.us.5.1 to i32 - %cmp4.i.i.us.5.1 = icmp sgt i32 %22, %conv.i.i.us.5.1 - br i1 %cmp4.i.i.us.5.1, label %if.then.i.i.us.5.1, label %if.end.r_exit.i.i.us.5.1 - -pregion_for_end.i.i.5.loopexit: ; preds = %if.end.r_exit.i.i.us.5.1 - br label %pregion_for_end.i.i.5 - -pregion_for_end.i.i.5: ; preds = %pregion_for_end.i.i.5.loopexit, %vector.ph350, %pregion_for_end.i.i.4 - %add6.i.i.i.6 = or i64 %mul3.i.i.i, 6 - %conv2.i.i.6 = trunc i64 %add6.i.i.i.6 to i32 - %cmp.i.i.6 = icmp sgt i32 %22, %conv2.i.i.6 - %sext.i.i.6 = shl i64 %add6.i.i.i.6, 32 - %idxprom.i.i.6 = ashr exact i64 %sext.i.i.6, 32 - %arrayidx.i.i.6 = getelementptr inbounds float, float* %15, i64 %idxprom.i.i.6 - %arrayidx9.i.i.6 = getelementptr inbounds float, float* %18, i64 %idxprom.i.i.6 - %mul.i.i.6 = mul nsw i32 %22, %conv2.i.i.6 - br i1 %cmp.i.i.6, label %vector.scevcheck377, label %pregion_for_end.i.i.6 - -vector.scevcheck377: ; preds = %pregion_for_end.i.i.5 - %614 = mul i32 %22, %conv2.i.i.6 - %615 = trunc i64 %2 to i32 - %616 = shl i32 %615, 5 - %617 = add i32 %614, %616 - %618 = icmp sgt i32 %617, 2147483616 - br i1 %618, label %pregion_for_entry.entry.i.i.us.6.preheader, label %vector.memcheck415 - -pregion_for_entry.entry.i.i.us.6.preheader: ; preds = %vector.memcheck415, %vector.scevcheck377 - br label %pregion_for_entry.entry.i.i.us.6 - -vector.memcheck415: ; preds = %vector.scevcheck377 - %sext502 = shl i64 %3, 35 - %619 = ashr exact i64 %sext502, 32 - %620 = or i64 %619, 6 - %scevgep379 = getelementptr float, float* %15, i64 %620 - %scevgep379380 = bitcast float* %scevgep379 to i8* - %uglygep381 = getelementptr i8, i8* %scevgep379380, i64 1 - %621 = mul i32 %22, %conv2.i.i.6 - %622 = trunc i64 %2 to i32 - %623 = shl i32 %622, 5 - %624 = add i32 %621, %623 - %625 = sext i32 %624 to i64 - %scevgep382 = getelementptr float, float* %6, i64 %625 - %scevgep382383 = bitcast float* %scevgep382 to i8* - %626 = add nsw i64 %625, 32 - %scevgep384 = getelementptr float, float* %6, i64 %626 - %627 = sext i32 %623 to i64 - %scevgep386 = getelementptr float, float* %9, i64 %627 - %628 = add nsw i64 %627, 32 - %scevgep388 = getelementptr float, float* %9, i64 %628 - %scevgep390 = getelementptr float, float* %18, i64 %620 - %scevgep390391 = bitcast float* %scevgep390 to i8* - %uglygep392 = getelementptr i8, i8* %scevgep390391, i64 1 - %scevgep393 = getelementptr float, float* %12, i64 %627 - %scevgep395 = getelementptr float, float* %12, i64 %628 - %bound0398 = icmp ult float* %arrayidx.i.i.6, %scevgep384 - %bound1399 = icmp ugt i8* %uglygep381, %scevgep382383 - %found.conflict400 = and i1 %bound0398, %bound1399 - %bound0401 = icmp ult float* %scevgep386, %scevgep384 - %bound1402 = icmp ult float* %scevgep382, %scevgep388 - %found.conflict403 = and i1 %bound0401, %bound1402 - %conflict.rdx404 = or i1 %found.conflict400, %found.conflict403 - %bound0406 = icmp ult float* %arrayidx9.i.i.6, %scevgep384 - %bound1407 = icmp ugt i8* %uglygep392, %scevgep382383 - %found.conflict408 = and i1 %bound0406, %bound1407 - %conflict.rdx409 = or i1 %conflict.rdx404, %found.conflict408 - %bound0410 = icmp ult float* %scevgep393, %scevgep384 - %bound1411 = icmp ult float* %scevgep382, %scevgep395 - %found.conflict412 = and i1 %bound0410, %bound1411 - %conflict.rdx413 = or i1 %conflict.rdx409, %found.conflict412 - br i1 %conflict.rdx413, label %pregion_for_entry.entry.i.i.us.6.preheader, label %vector.ph416 - -vector.ph416: ; preds = %vector.memcheck415 - %broadcast.splatinsert423 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat424 = shufflevector <8 x i64> %broadcast.splatinsert423, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert425 = insertelement <8 x i32> undef, i32 %22, i32 0 - %broadcast.splat426 = shufflevector <8 x i32> %broadcast.splatinsert425, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert427 = insertelement <8 x float*> undef, float* %arrayidx.i.i.6, i32 0 - %broadcast.splat428 = shufflevector <8 x float*> %broadcast.splatinsert427, <8 x float*> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert431 = insertelement <8 x float*> undef, float* %arrayidx9.i.i.6, i32 0 - %broadcast.splat432 = shufflevector <8 x float*> %broadcast.splatinsert431, <8 x float*> undef, <8 x i32> zeroinitializer - %629 = or <8 x i64> %broadcast.splat424, - %630 = trunc <8 x i64> %629 to <8 x i32> - %631 = icmp sgt <8 x i32> %broadcast.splat426, %630 - %wide.masked.gather429 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat428, i32 4, <8 x i1> %631, <8 x float> undef), !tbaa !12, !alias.scope !279, !noalias !282 - %632 = extractelement <8 x i64> %629, i32 0 - %633 = shl i64 %632, 32 - %634 = ashr exact i64 %633, 32 - %635 = getelementptr inbounds float, float* %9, i64 %634 - %636 = bitcast float* %635 to <8 x float>* - %wide.masked.load430 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %636, i32 4, <8 x i1> %631, <8 x float> undef), !tbaa !12, !alias.scope !284, !noalias !282 - %wide.masked.gather433 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat432, i32 4, <8 x i1> %631, <8 x float> undef), !tbaa !12, !alias.scope !286, !noalias !282 - %637 = getelementptr inbounds float, float* %12, i64 %634 - %638 = bitcast float* %637 to <8 x float>* - %wide.masked.load434 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %638, i32 4, <8 x i1> %631, <8 x float> undef), !tbaa !12, !alias.scope !288, !noalias !282 - %639 = fmul <8 x float> %wide.masked.gather433, %wide.masked.load434 - %640 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather429, <8 x float> %wide.masked.load430, <8 x float> %639) - %641 = extractelement <8 x i32> %630, i32 0 - %642 = add nsw i32 %mul.i.i.6, %641 - %643 = sext i32 %642 to i64 - %644 = getelementptr inbounds float, float* %6, i64 %643 - %645 = bitcast float* %644 to <8 x float>* - %wide.masked.load435 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %645, i32 4, <8 x i1> %631, <8 x float> undef), !tbaa !12, !alias.scope !282 - %646 = fadd <8 x float> %wide.masked.load435, %640 - %647 = bitcast float* %644 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %646, <8 x float>* %647, i32 4, <8 x i1> %631), !tbaa !12, !alias.scope !282, !llvm.access.group !27 - %648 = or <8 x i64> %broadcast.splat424, - %649 = trunc <8 x i64> %648 to <8 x i32> - %650 = icmp sgt <8 x i32> %broadcast.splat426, %649 - %wide.masked.gather429.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat428, i32 4, <8 x i1> %650, <8 x float> undef), !tbaa !12, !alias.scope !279, !noalias !282 - %651 = extractelement <8 x i64> %648, i32 0 - %652 = shl i64 %651, 32 - %653 = ashr exact i64 %652, 32 - %654 = getelementptr inbounds float, float* %9, i64 %653 - %655 = bitcast float* %654 to <8 x float>* - %wide.masked.load430.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %655, i32 4, <8 x i1> %650, <8 x float> undef), !tbaa !12, !alias.scope !284, !noalias !282 - %wide.masked.gather433.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat432, i32 4, <8 x i1> %650, <8 x float> undef), !tbaa !12, !alias.scope !286, !noalias !282 - %656 = getelementptr inbounds float, float* %12, i64 %653 - %657 = bitcast float* %656 to <8 x float>* - %wide.masked.load434.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %657, i32 4, <8 x i1> %650, <8 x float> undef), !tbaa !12, !alias.scope !288, !noalias !282 - %658 = fmul <8 x float> %wide.masked.gather433.1, %wide.masked.load434.1 - %659 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather429.1, <8 x float> %wide.masked.load430.1, <8 x float> %658) - %660 = extractelement <8 x i32> %649, i32 0 - %661 = add nsw i32 %mul.i.i.6, %660 - %662 = sext i32 %661 to i64 - %663 = getelementptr inbounds float, float* %6, i64 %662 - %664 = bitcast float* %663 to <8 x float>* - %wide.masked.load435.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %664, i32 4, <8 x i1> %650, <8 x float> undef), !tbaa !12, !alias.scope !282 - %665 = fadd <8 x float> %wide.masked.load435.1, %659 - %666 = bitcast float* %663 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %665, <8 x float>* %666, i32 4, <8 x i1> %650), !tbaa !12, !alias.scope !282, !llvm.access.group !27 - %667 = or <8 x i64> %broadcast.splat424, - %668 = trunc <8 x i64> %667 to <8 x i32> - %669 = icmp sgt <8 x i32> %broadcast.splat426, %668 - %wide.masked.gather429.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat428, i32 4, <8 x i1> %669, <8 x float> undef), !tbaa !12, !alias.scope !279, !noalias !282 - %670 = extractelement <8 x i64> %667, i32 0 - %671 = shl i64 %670, 32 - %672 = ashr exact i64 %671, 32 - %673 = getelementptr inbounds float, float* %9, i64 %672 - %674 = bitcast float* %673 to <8 x float>* - %wide.masked.load430.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %674, i32 4, <8 x i1> %669, <8 x float> undef), !tbaa !12, !alias.scope !284, !noalias !282 - %wide.masked.gather433.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat432, i32 4, <8 x i1> %669, <8 x float> undef), !tbaa !12, !alias.scope !286, !noalias !282 - %675 = getelementptr inbounds float, float* %12, i64 %672 - %676 = bitcast float* %675 to <8 x float>* - %wide.masked.load434.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %676, i32 4, <8 x i1> %669, <8 x float> undef), !tbaa !12, !alias.scope !288, !noalias !282 - %677 = fmul <8 x float> %wide.masked.gather433.2, %wide.masked.load434.2 - %678 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather429.2, <8 x float> %wide.masked.load430.2, <8 x float> %677) - %679 = extractelement <8 x i32> %668, i32 0 - %680 = add nsw i32 %mul.i.i.6, %679 - %681 = sext i32 %680 to i64 - %682 = getelementptr inbounds float, float* %6, i64 %681 - %683 = bitcast float* %682 to <8 x float>* - %wide.masked.load435.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %683, i32 4, <8 x i1> %669, <8 x float> undef), !tbaa !12, !alias.scope !282 - %684 = fadd <8 x float> %wide.masked.load435.2, %678 - %685 = bitcast float* %682 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %684, <8 x float>* %685, i32 4, <8 x i1> %669), !tbaa !12, !alias.scope !282, !llvm.access.group !27 - %686 = or <8 x i64> %broadcast.splat424, - %687 = trunc <8 x i64> %686 to <8 x i32> - %688 = icmp sgt <8 x i32> %broadcast.splat426, %687 - %wide.masked.gather429.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat428, i32 4, <8 x i1> %688, <8 x float> undef), !tbaa !12, !alias.scope !279, !noalias !282 - %689 = extractelement <8 x i64> %686, i32 0 - %690 = shl i64 %689, 32 - %691 = ashr exact i64 %690, 32 - %692 = getelementptr inbounds float, float* %9, i64 %691 - %693 = bitcast float* %692 to <8 x float>* - %wide.masked.load430.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %693, i32 4, <8 x i1> %688, <8 x float> undef), !tbaa !12, !alias.scope !284, !noalias !282 - %wide.masked.gather433.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat432, i32 4, <8 x i1> %688, <8 x float> undef), !tbaa !12, !alias.scope !286, !noalias !282 - %694 = getelementptr inbounds float, float* %12, i64 %691 - %695 = bitcast float* %694 to <8 x float>* - %wide.masked.load434.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %695, i32 4, <8 x i1> %688, <8 x float> undef), !tbaa !12, !alias.scope !288, !noalias !282 - %696 = fmul <8 x float> %wide.masked.gather433.3, %wide.masked.load434.3 - %697 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather429.3, <8 x float> %wide.masked.load430.3, <8 x float> %696) - %698 = extractelement <8 x i32> %687, i32 0 - %699 = add nsw i32 %mul.i.i.6, %698 - %700 = sext i32 %699 to i64 - %701 = getelementptr inbounds float, float* %6, i64 %700 - %702 = bitcast float* %701 to <8 x float>* - %wide.masked.load435.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %702, i32 4, <8 x i1> %688, <8 x float> undef), !tbaa !12, !alias.scope !282 - %703 = fadd <8 x float> %wide.masked.load435.3, %697 - %704 = bitcast float* %701 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %703, <8 x float>* %704, i32 4, <8 x i1> %688), !tbaa !12, !alias.scope !282, !llvm.access.group !27 - br label %pregion_for_end.i.i.6 - -pregion_for_entry.entry.i.i.us.6: ; preds = %if.end.r_exit.i.i.us.6.1, %pregion_for_entry.entry.i.i.us.6.preheader - %_local_id_x.i.0.us.6 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.6.preheader ], [ %823, %if.end.r_exit.i.i.us.6.1 ] - %add1.i.i.i.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.6, %mul.i.i.i - %conv.i.i.us.6 = trunc i64 %add1.i.i.i.us.6 to i32 - %cmp4.i.i.us.6 = icmp sgt i32 %22, %conv.i.i.us.6 - br i1 %cmp4.i.i.us.6, label %if.then.i.i.us.6, label %if.end.r_exit.i.i.us.6 - -if.then.i.i.us.6: ; preds = %pregion_for_entry.entry.i.i.us.6 - %705 = load float, float* %arrayidx.i.i.6, align 4, !tbaa !12 - %sext26.i.i.us.6 = shl i64 %add1.i.i.i.us.6, 32 - %idxprom6.i.i.us.6 = ashr exact i64 %sext26.i.i.us.6, 32 - %arrayidx7.i.i.us.6 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.6 - %706 = load float, float* %arrayidx7.i.i.us.6, align 4, !tbaa !12 - %707 = load float, float* %arrayidx9.i.i.6, align 4, !tbaa !12 - %arrayidx11.i.i.us.6 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.6 - %708 = load float, float* %arrayidx11.i.i.us.6, align 4, !tbaa !12 - %mul12.i.i.us.6 = fmul float %707, %708 - %709 = tail call float @llvm.fmuladd.f32(float %705, float %706, float %mul12.i.i.us.6) #6 - %add.i.i.us.6 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6 - %idxprom13.i.i.us.6 = sext i32 %add.i.i.us.6 to i64 - %arrayidx14.i.i.us.6 = getelementptr inbounds float, float* %6, i64 %idxprom13.i.i.us.6 - %710 = load float, float* %arrayidx14.i.i.us.6, align 4, !tbaa !12 - %add15.i.i.us.6 = fadd float %710, %709 - store float %add15.i.i.us.6, float* %arrayidx14.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.6 - -if.end.r_exit.i.i.us.6: ; preds = %if.then.i.i.us.6, %pregion_for_entry.entry.i.i.us.6 - %711 = or i64 %_local_id_x.i.0.us.6, 1 - %add1.i.i.i.us.6.1 = add nuw nsw i64 %711, %mul.i.i.i - %conv.i.i.us.6.1 = trunc i64 %add1.i.i.i.us.6.1 to i32 - %cmp4.i.i.us.6.1 = icmp sgt i32 %22, %conv.i.i.us.6.1 - br i1 %cmp4.i.i.us.6.1, label %if.then.i.i.us.6.1, label %if.end.r_exit.i.i.us.6.1 - -pregion_for_end.i.i.6.loopexit: ; preds = %if.end.r_exit.i.i.us.6.1 - br label %pregion_for_end.i.i.6 - -pregion_for_end.i.i.6: ; preds = %pregion_for_end.i.i.6.loopexit, %vector.ph416, %pregion_for_end.i.i.5 - %add6.i.i.i.7 = or i64 %mul3.i.i.i, 7 - %conv2.i.i.7 = trunc i64 %add6.i.i.i.7 to i32 - %cmp.i.i.7 = icmp sgt i32 %22, %conv2.i.i.7 - %sext.i.i.7 = shl i64 %add6.i.i.i.7, 32 - %idxprom.i.i.7 = ashr exact i64 %sext.i.i.7, 32 - %arrayidx.i.i.7 = getelementptr inbounds float, float* %15, i64 %idxprom.i.i.7 - %arrayidx9.i.i.7 = getelementptr inbounds float, float* %18, i64 %idxprom.i.i.7 - %mul.i.i.7 = mul nsw i32 %22, %conv2.i.i.7 - br i1 %cmp.i.i.7, label %vector.scevcheck443, label %pregion_for_end.i.i.7 - -vector.scevcheck443: ; preds = %pregion_for_end.i.i.6 - %712 = mul i32 %22, %conv2.i.i.7 - %713 = trunc i64 %2 to i32 - %714 = shl i32 %713, 5 - %715 = add i32 %712, %714 - %716 = icmp sgt i32 %715, 2147483616 - br i1 %716, label %pregion_for_entry.entry.i.i.us.7.preheader, label %vector.memcheck481 - -pregion_for_entry.entry.i.i.us.7.preheader: ; preds = %vector.memcheck481, %vector.scevcheck443 - br label %pregion_for_entry.entry.i.i.us.7 - -vector.memcheck481: ; preds = %vector.scevcheck443 - %sext = shl i64 %3, 35 - %717 = ashr exact i64 %sext, 32 - %718 = or i64 %717, 7 - %scevgep445 = getelementptr float, float* %15, i64 %718 - %scevgep445446 = bitcast float* %scevgep445 to i8* - %uglygep447 = getelementptr i8, i8* %scevgep445446, i64 1 - %719 = mul i32 %22, %conv2.i.i.7 - %720 = trunc i64 %2 to i32 - %721 = shl i32 %720, 5 - %722 = add i32 %719, %721 - %723 = sext i32 %722 to i64 - %scevgep448 = getelementptr float, float* %6, i64 %723 - %scevgep448449 = bitcast float* %scevgep448 to i8* - %724 = add nsw i64 %723, 32 - %scevgep450 = getelementptr float, float* %6, i64 %724 - %725 = sext i32 %721 to i64 - %scevgep452 = getelementptr float, float* %9, i64 %725 - %726 = add nsw i64 %725, 32 - %scevgep454 = getelementptr float, float* %9, i64 %726 - %scevgep456 = getelementptr float, float* %18, i64 %718 - %scevgep456457 = bitcast float* %scevgep456 to i8* - %uglygep458 = getelementptr i8, i8* %scevgep456457, i64 1 - %scevgep459 = getelementptr float, float* %12, i64 %725 - %scevgep461 = getelementptr float, float* %12, i64 %726 - %bound0464 = icmp ult float* %arrayidx.i.i.7, %scevgep450 - %bound1465 = icmp ugt i8* %uglygep447, %scevgep448449 - %found.conflict466 = and i1 %bound0464, %bound1465 - %bound0467 = icmp ult float* %scevgep452, %scevgep450 - %bound1468 = icmp ult float* %scevgep448, %scevgep454 - %found.conflict469 = and i1 %bound0467, %bound1468 - %conflict.rdx470 = or i1 %found.conflict466, %found.conflict469 - %bound0472 = icmp ult float* %arrayidx9.i.i.7, %scevgep450 - %bound1473 = icmp ugt i8* %uglygep458, %scevgep448449 - %found.conflict474 = and i1 %bound0472, %bound1473 - %conflict.rdx475 = or i1 %conflict.rdx470, %found.conflict474 - %bound0476 = icmp ult float* %scevgep459, %scevgep450 - %bound1477 = icmp ult float* %scevgep448, %scevgep461 - %found.conflict478 = and i1 %bound0476, %bound1477 - %conflict.rdx479 = or i1 %conflict.rdx475, %found.conflict478 - br i1 %conflict.rdx479, label %pregion_for_entry.entry.i.i.us.7.preheader, label %vector.ph482 - -vector.ph482: ; preds = %vector.memcheck481 - %broadcast.splatinsert489 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat490 = shufflevector <8 x i64> %broadcast.splatinsert489, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert491 = insertelement <8 x i32> undef, i32 %22, i32 0 - %broadcast.splat492 = shufflevector <8 x i32> %broadcast.splatinsert491, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert493 = insertelement <8 x float*> undef, float* %arrayidx.i.i.7, i32 0 - %broadcast.splat494 = shufflevector <8 x float*> %broadcast.splatinsert493, <8 x float*> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert497 = insertelement <8 x float*> undef, float* %arrayidx9.i.i.7, i32 0 - %broadcast.splat498 = shufflevector <8 x float*> %broadcast.splatinsert497, <8 x float*> undef, <8 x i32> zeroinitializer - %727 = or <8 x i64> %broadcast.splat490, - %728 = trunc <8 x i64> %727 to <8 x i32> - %729 = icmp sgt <8 x i32> %broadcast.splat492, %728 - %wide.masked.gather495 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat494, i32 4, <8 x i1> %729, <8 x float> undef), !tbaa !12, !alias.scope !290, !noalias !293 - %730 = extractelement <8 x i64> %727, i32 0 - %731 = shl i64 %730, 32 - %732 = ashr exact i64 %731, 32 - %733 = getelementptr inbounds float, float* %9, i64 %732 - %734 = bitcast float* %733 to <8 x float>* - %wide.masked.load496 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %734, i32 4, <8 x i1> %729, <8 x float> undef), !tbaa !12, !alias.scope !295, !noalias !293 - %wide.masked.gather499 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat498, i32 4, <8 x i1> %729, <8 x float> undef), !tbaa !12, !alias.scope !297, !noalias !293 - %735 = getelementptr inbounds float, float* %12, i64 %732 - %736 = bitcast float* %735 to <8 x float>* - %wide.masked.load500 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %736, i32 4, <8 x i1> %729, <8 x float> undef), !tbaa !12, !alias.scope !299, !noalias !293 - %737 = fmul <8 x float> %wide.masked.gather499, %wide.masked.load500 - %738 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather495, <8 x float> %wide.masked.load496, <8 x float> %737) - %739 = extractelement <8 x i32> %728, i32 0 - %740 = add nsw i32 %mul.i.i.7, %739 - %741 = sext i32 %740 to i64 - %742 = getelementptr inbounds float, float* %6, i64 %741 - %743 = bitcast float* %742 to <8 x float>* - %wide.masked.load501 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %743, i32 4, <8 x i1> %729, <8 x float> undef), !tbaa !12, !alias.scope !293 - %744 = fadd <8 x float> %wide.masked.load501, %738 - %745 = bitcast float* %742 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %744, <8 x float>* %745, i32 4, <8 x i1> %729), !tbaa !12, !alias.scope !293, !llvm.access.group !27 - %746 = or <8 x i64> %broadcast.splat490, - %747 = trunc <8 x i64> %746 to <8 x i32> - %748 = icmp sgt <8 x i32> %broadcast.splat492, %747 - %wide.masked.gather495.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat494, i32 4, <8 x i1> %748, <8 x float> undef), !tbaa !12, !alias.scope !290, !noalias !293 - %749 = extractelement <8 x i64> %746, i32 0 - %750 = shl i64 %749, 32 - %751 = ashr exact i64 %750, 32 - %752 = getelementptr inbounds float, float* %9, i64 %751 - %753 = bitcast float* %752 to <8 x float>* - %wide.masked.load496.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %753, i32 4, <8 x i1> %748, <8 x float> undef), !tbaa !12, !alias.scope !295, !noalias !293 - %wide.masked.gather499.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat498, i32 4, <8 x i1> %748, <8 x float> undef), !tbaa !12, !alias.scope !297, !noalias !293 - %754 = getelementptr inbounds float, float* %12, i64 %751 - %755 = bitcast float* %754 to <8 x float>* - %wide.masked.load500.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %755, i32 4, <8 x i1> %748, <8 x float> undef), !tbaa !12, !alias.scope !299, !noalias !293 - %756 = fmul <8 x float> %wide.masked.gather499.1, %wide.masked.load500.1 - %757 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather495.1, <8 x float> %wide.masked.load496.1, <8 x float> %756) - %758 = extractelement <8 x i32> %747, i32 0 - %759 = add nsw i32 %mul.i.i.7, %758 - %760 = sext i32 %759 to i64 - %761 = getelementptr inbounds float, float* %6, i64 %760 - %762 = bitcast float* %761 to <8 x float>* - %wide.masked.load501.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %762, i32 4, <8 x i1> %748, <8 x float> undef), !tbaa !12, !alias.scope !293 - %763 = fadd <8 x float> %wide.masked.load501.1, %757 - %764 = bitcast float* %761 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %763, <8 x float>* %764, i32 4, <8 x i1> %748), !tbaa !12, !alias.scope !293, !llvm.access.group !27 - %765 = or <8 x i64> %broadcast.splat490, - %766 = trunc <8 x i64> %765 to <8 x i32> - %767 = icmp sgt <8 x i32> %broadcast.splat492, %766 - %wide.masked.gather495.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat494, i32 4, <8 x i1> %767, <8 x float> undef), !tbaa !12, !alias.scope !290, !noalias !293 - %768 = extractelement <8 x i64> %765, i32 0 - %769 = shl i64 %768, 32 - %770 = ashr exact i64 %769, 32 - %771 = getelementptr inbounds float, float* %9, i64 %770 - %772 = bitcast float* %771 to <8 x float>* - %wide.masked.load496.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %772, i32 4, <8 x i1> %767, <8 x float> undef), !tbaa !12, !alias.scope !295, !noalias !293 - %wide.masked.gather499.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat498, i32 4, <8 x i1> %767, <8 x float> undef), !tbaa !12, !alias.scope !297, !noalias !293 - %773 = getelementptr inbounds float, float* %12, i64 %770 - %774 = bitcast float* %773 to <8 x float>* - %wide.masked.load500.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %774, i32 4, <8 x i1> %767, <8 x float> undef), !tbaa !12, !alias.scope !299, !noalias !293 - %775 = fmul <8 x float> %wide.masked.gather499.2, %wide.masked.load500.2 - %776 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather495.2, <8 x float> %wide.masked.load496.2, <8 x float> %775) - %777 = extractelement <8 x i32> %766, i32 0 - %778 = add nsw i32 %mul.i.i.7, %777 - %779 = sext i32 %778 to i64 - %780 = getelementptr inbounds float, float* %6, i64 %779 - %781 = bitcast float* %780 to <8 x float>* - %wide.masked.load501.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %781, i32 4, <8 x i1> %767, <8 x float> undef), !tbaa !12, !alias.scope !293 - %782 = fadd <8 x float> %wide.masked.load501.2, %776 - %783 = bitcast float* %780 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %782, <8 x float>* %783, i32 4, <8 x i1> %767), !tbaa !12, !alias.scope !293, !llvm.access.group !27 - %784 = or <8 x i64> %broadcast.splat490, - %785 = trunc <8 x i64> %784 to <8 x i32> - %786 = icmp sgt <8 x i32> %broadcast.splat492, %785 - %wide.masked.gather495.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat494, i32 4, <8 x i1> %786, <8 x float> undef), !tbaa !12, !alias.scope !290, !noalias !293 - %787 = extractelement <8 x i64> %784, i32 0 - %788 = shl i64 %787, 32 - %789 = ashr exact i64 %788, 32 - %790 = getelementptr inbounds float, float* %9, i64 %789 - %791 = bitcast float* %790 to <8 x float>* - %wide.masked.load496.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %791, i32 4, <8 x i1> %786, <8 x float> undef), !tbaa !12, !alias.scope !295, !noalias !293 - %wide.masked.gather499.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat498, i32 4, <8 x i1> %786, <8 x float> undef), !tbaa !12, !alias.scope !297, !noalias !293 - %792 = getelementptr inbounds float, float* %12, i64 %789 - %793 = bitcast float* %792 to <8 x float>* - %wide.masked.load500.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %793, i32 4, <8 x i1> %786, <8 x float> undef), !tbaa !12, !alias.scope !299, !noalias !293 - %794 = fmul <8 x float> %wide.masked.gather499.3, %wide.masked.load500.3 - %795 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %wide.masked.gather495.3, <8 x float> %wide.masked.load496.3, <8 x float> %794) - %796 = extractelement <8 x i32> %785, i32 0 - %797 = add nsw i32 %mul.i.i.7, %796 - %798 = sext i32 %797 to i64 - %799 = getelementptr inbounds float, float* %6, i64 %798 - %800 = bitcast float* %799 to <8 x float>* - %wide.masked.load501.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %800, i32 4, <8 x i1> %786, <8 x float> undef), !tbaa !12, !alias.scope !293 - %801 = fadd <8 x float> %wide.masked.load501.3, %795 - %802 = bitcast float* %799 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %801, <8 x float>* %802, i32 4, <8 x i1> %786), !tbaa !12, !alias.scope !293, !llvm.access.group !27 - br label %pregion_for_end.i.i.7 - -pregion_for_entry.entry.i.i.us.7: ; preds = %if.end.r_exit.i.i.us.7.1, %pregion_for_entry.entry.i.i.us.7.preheader - %_local_id_x.i.0.us.7 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.7.preheader ], [ %816, %if.end.r_exit.i.i.us.7.1 ] - %add1.i.i.i.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.7, %mul.i.i.i - %conv.i.i.us.7 = trunc i64 %add1.i.i.i.us.7 to i32 - %cmp4.i.i.us.7 = icmp sgt i32 %22, %conv.i.i.us.7 - br i1 %cmp4.i.i.us.7, label %if.then.i.i.us.7, label %if.end.r_exit.i.i.us.7 - -if.then.i.i.us.7: ; preds = %pregion_for_entry.entry.i.i.us.7 - %803 = load float, float* %arrayidx.i.i.7, align 4, !tbaa !12 - %sext26.i.i.us.7 = shl i64 %add1.i.i.i.us.7, 32 - %idxprom6.i.i.us.7 = ashr exact i64 %sext26.i.i.us.7, 32 - %arrayidx7.i.i.us.7 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.7 - %804 = load float, float* %arrayidx7.i.i.us.7, align 4, !tbaa !12 - %805 = load float, float* %arrayidx9.i.i.7, align 4, !tbaa !12 - %arrayidx11.i.i.us.7 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.7 - %806 = load float, float* %arrayidx11.i.i.us.7, align 4, !tbaa !12 - %mul12.i.i.us.7 = fmul float %805, %806 - %807 = tail call float @llvm.fmuladd.f32(float %803, float %804, float %mul12.i.i.us.7) #6 - %add.i.i.us.7 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7 - %idxprom13.i.i.us.7 = sext i32 %add.i.i.us.7 to i64 - %arrayidx14.i.i.us.7 = getelementptr inbounds float, float* %6, i64 %idxprom13.i.i.us.7 - %808 = load float, float* %arrayidx14.i.i.us.7, align 4, !tbaa !12 - %add15.i.i.us.7 = fadd float %808, %807 - store float %add15.i.i.us.7, float* %arrayidx14.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.7 - -if.end.r_exit.i.i.us.7: ; preds = %if.then.i.i.us.7, %pregion_for_entry.entry.i.i.us.7 - %809 = or i64 %_local_id_x.i.0.us.7, 1 - %add1.i.i.i.us.7.1 = add nuw nsw i64 %809, %mul.i.i.i - %conv.i.i.us.7.1 = trunc i64 %add1.i.i.i.us.7.1 to i32 - %cmp4.i.i.us.7.1 = icmp sgt i32 %22, %conv.i.i.us.7.1 - br i1 %cmp4.i.i.us.7.1, label %if.then.i.i.us.7.1, label %if.end.r_exit.i.i.us.7.1 - -pregion_for_end.i.i.7.loopexit: ; preds = %if.end.r_exit.i.i.us.7.1 - br label %pregion_for_end.i.i.7 - -pregion_for_end.i.i.7: ; preds = %pregion_for_end.i.i.7.loopexit, %vector.ph482, %pregion_for_end.i.i.6 - ret void - -if.then.i.i.us.7.1: ; preds = %if.end.r_exit.i.i.us.7 - %810 = load float, float* %arrayidx.i.i.7, align 4, !tbaa !12 - %sext26.i.i.us.7.1 = shl i64 %add1.i.i.i.us.7.1, 32 - %idxprom6.i.i.us.7.1 = ashr exact i64 %sext26.i.i.us.7.1, 32 - %arrayidx7.i.i.us.7.1 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.7.1 - %811 = load float, float* %arrayidx7.i.i.us.7.1, align 4, !tbaa !12 - %812 = load float, float* %arrayidx9.i.i.7, align 4, !tbaa !12 - %arrayidx11.i.i.us.7.1 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.7.1 - %813 = load float, float* %arrayidx11.i.i.us.7.1, align 4, !tbaa !12 - %mul12.i.i.us.7.1 = fmul float %812, %813 - %814 = tail call float @llvm.fmuladd.f32(float %810, float %811, float %mul12.i.i.us.7.1) #6 - %add.i.i.us.7.1 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7.1 - %idxprom13.i.i.us.7.1 = sext i32 %add.i.i.us.7.1 to i64 - %arrayidx14.i.i.us.7.1 = getelementptr inbounds float, float* %6, i64 %idxprom13.i.i.us.7.1 - %815 = load float, float* %arrayidx14.i.i.us.7.1, align 4, !tbaa !12 - %add15.i.i.us.7.1 = fadd float %815, %814 - store float %add15.i.i.us.7.1, float* %arrayidx14.i.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.7.1 - -if.end.r_exit.i.i.us.7.1: ; preds = %if.then.i.i.us.7.1, %if.end.r_exit.i.i.us.7 - %816 = add nuw nsw i64 %_local_id_x.i.0.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %816, 32 - br i1 %exitcond.7.not.1, label %pregion_for_end.i.i.7.loopexit, label %pregion_for_entry.entry.i.i.us.7, !llvm.loop !301 - -if.then.i.i.us.6.1: ; preds = %if.end.r_exit.i.i.us.6 - %817 = load float, float* %arrayidx.i.i.6, align 4, !tbaa !12 - %sext26.i.i.us.6.1 = shl i64 %add1.i.i.i.us.6.1, 32 - %idxprom6.i.i.us.6.1 = ashr exact i64 %sext26.i.i.us.6.1, 32 - %arrayidx7.i.i.us.6.1 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.6.1 - %818 = load float, float* %arrayidx7.i.i.us.6.1, align 4, !tbaa !12 - %819 = load float, float* %arrayidx9.i.i.6, align 4, !tbaa !12 - %arrayidx11.i.i.us.6.1 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.6.1 - %820 = load float, float* %arrayidx11.i.i.us.6.1, align 4, !tbaa !12 - %mul12.i.i.us.6.1 = fmul float %819, %820 - %821 = tail call float @llvm.fmuladd.f32(float %817, float %818, float %mul12.i.i.us.6.1) #6 - %add.i.i.us.6.1 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6.1 - %idxprom13.i.i.us.6.1 = sext i32 %add.i.i.us.6.1 to i64 - %arrayidx14.i.i.us.6.1 = getelementptr inbounds float, float* %6, i64 %idxprom13.i.i.us.6.1 - %822 = load float, float* %arrayidx14.i.i.us.6.1, align 4, !tbaa !12 - %add15.i.i.us.6.1 = fadd float %822, %821 - store float %add15.i.i.us.6.1, float* %arrayidx14.i.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.6.1 - -if.end.r_exit.i.i.us.6.1: ; preds = %if.then.i.i.us.6.1, %if.end.r_exit.i.i.us.6 - %823 = add nuw nsw i64 %_local_id_x.i.0.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %823, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.i.6.loopexit, label %pregion_for_entry.entry.i.i.us.6, !llvm.loop !302 - -if.then.i.i.us.5.1: ; preds = %if.end.r_exit.i.i.us.5 - %824 = load float, float* %arrayidx.i.i.5, align 4, !tbaa !12 - %sext26.i.i.us.5.1 = shl i64 %add1.i.i.i.us.5.1, 32 - %idxprom6.i.i.us.5.1 = ashr exact i64 %sext26.i.i.us.5.1, 32 - %arrayidx7.i.i.us.5.1 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.5.1 - %825 = load float, float* %arrayidx7.i.i.us.5.1, align 4, !tbaa !12 - %826 = load float, float* %arrayidx9.i.i.5, align 4, !tbaa !12 - %arrayidx11.i.i.us.5.1 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.5.1 - %827 = load float, float* %arrayidx11.i.i.us.5.1, align 4, !tbaa !12 - %mul12.i.i.us.5.1 = fmul float %826, %827 - %828 = tail call float @llvm.fmuladd.f32(float %824, float %825, float %mul12.i.i.us.5.1) #6 - %add.i.i.us.5.1 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5.1 - %idxprom13.i.i.us.5.1 = sext i32 %add.i.i.us.5.1 to i64 - %arrayidx14.i.i.us.5.1 = getelementptr inbounds float, float* %6, i64 %idxprom13.i.i.us.5.1 - %829 = load float, float* %arrayidx14.i.i.us.5.1, align 4, !tbaa !12 - %add15.i.i.us.5.1 = fadd float %829, %828 - store float %add15.i.i.us.5.1, float* %arrayidx14.i.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.5.1 - -if.end.r_exit.i.i.us.5.1: ; preds = %if.then.i.i.us.5.1, %if.end.r_exit.i.i.us.5 - %830 = add nuw nsw i64 %_local_id_x.i.0.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %830, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.i.5.loopexit, label %pregion_for_entry.entry.i.i.us.5, !llvm.loop !303 - -if.then.i.i.us.4.1: ; preds = %if.end.r_exit.i.i.us.4 - %831 = load float, float* %arrayidx.i.i.4, align 4, !tbaa !12 - %sext26.i.i.us.4.1 = shl i64 %add1.i.i.i.us.4.1, 32 - %idxprom6.i.i.us.4.1 = ashr exact i64 %sext26.i.i.us.4.1, 32 - %arrayidx7.i.i.us.4.1 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.4.1 - %832 = load float, float* %arrayidx7.i.i.us.4.1, align 4, !tbaa !12 - %833 = load float, float* %arrayidx9.i.i.4, align 4, !tbaa !12 - %arrayidx11.i.i.us.4.1 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.4.1 - %834 = load float, float* %arrayidx11.i.i.us.4.1, align 4, !tbaa !12 - %mul12.i.i.us.4.1 = fmul float %833, %834 - %835 = tail call float @llvm.fmuladd.f32(float %831, float %832, float %mul12.i.i.us.4.1) #6 - %add.i.i.us.4.1 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4.1 - %idxprom13.i.i.us.4.1 = sext i32 %add.i.i.us.4.1 to i64 - %arrayidx14.i.i.us.4.1 = getelementptr inbounds float, float* %6, i64 %idxprom13.i.i.us.4.1 - %836 = load float, float* %arrayidx14.i.i.us.4.1, align 4, !tbaa !12 - %add15.i.i.us.4.1 = fadd float %836, %835 - store float %add15.i.i.us.4.1, float* %arrayidx14.i.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.4.1 - -if.end.r_exit.i.i.us.4.1: ; preds = %if.then.i.i.us.4.1, %if.end.r_exit.i.i.us.4 - %837 = add nuw nsw i64 %_local_id_x.i.0.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %837, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.i.4.loopexit, label %pregion_for_entry.entry.i.i.us.4, !llvm.loop !304 - -if.then.i.i.us.3.1: ; preds = %if.end.r_exit.i.i.us.3 - %838 = load float, float* %arrayidx.i.i.3, align 4, !tbaa !12 - %sext26.i.i.us.3.1 = shl i64 %add1.i.i.i.us.3.1, 32 - %idxprom6.i.i.us.3.1 = ashr exact i64 %sext26.i.i.us.3.1, 32 - %arrayidx7.i.i.us.3.1 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.3.1 - %839 = load float, float* %arrayidx7.i.i.us.3.1, align 4, !tbaa !12 - %840 = load float, float* %arrayidx9.i.i.3, align 4, !tbaa !12 - %arrayidx11.i.i.us.3.1 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.3.1 - %841 = load float, float* %arrayidx11.i.i.us.3.1, align 4, !tbaa !12 - %mul12.i.i.us.3.1 = fmul float %840, %841 - %842 = tail call float @llvm.fmuladd.f32(float %838, float %839, float %mul12.i.i.us.3.1) #6 - %add.i.i.us.3.1 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3.1 - %idxprom13.i.i.us.3.1 = sext i32 %add.i.i.us.3.1 to i64 - %arrayidx14.i.i.us.3.1 = getelementptr inbounds float, float* %6, i64 %idxprom13.i.i.us.3.1 - %843 = load float, float* %arrayidx14.i.i.us.3.1, align 4, !tbaa !12 - %add15.i.i.us.3.1 = fadd float %843, %842 - store float %add15.i.i.us.3.1, float* %arrayidx14.i.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.3.1 - -if.end.r_exit.i.i.us.3.1: ; preds = %if.then.i.i.us.3.1, %if.end.r_exit.i.i.us.3 - %844 = add nuw nsw i64 %_local_id_x.i.0.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %844, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.i.3.loopexit, label %pregion_for_entry.entry.i.i.us.3, !llvm.loop !305 - -if.then.i.i.us.2.1: ; preds = %if.end.r_exit.i.i.us.2 - %845 = load float, float* %arrayidx.i.i.2, align 4, !tbaa !12 - %sext26.i.i.us.2.1 = shl i64 %add1.i.i.i.us.2.1, 32 - %idxprom6.i.i.us.2.1 = ashr exact i64 %sext26.i.i.us.2.1, 32 - %arrayidx7.i.i.us.2.1 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.2.1 - %846 = load float, float* %arrayidx7.i.i.us.2.1, align 4, !tbaa !12 - %847 = load float, float* %arrayidx9.i.i.2, align 4, !tbaa !12 - %arrayidx11.i.i.us.2.1 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.2.1 - %848 = load float, float* %arrayidx11.i.i.us.2.1, align 4, !tbaa !12 - %mul12.i.i.us.2.1 = fmul float %847, %848 - %849 = tail call float @llvm.fmuladd.f32(float %845, float %846, float %mul12.i.i.us.2.1) #6 - %add.i.i.us.2.1 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2.1 - %idxprom13.i.i.us.2.1 = sext i32 %add.i.i.us.2.1 to i64 - %arrayidx14.i.i.us.2.1 = getelementptr inbounds float, float* %6, i64 %idxprom13.i.i.us.2.1 - %850 = load float, float* %arrayidx14.i.i.us.2.1, align 4, !tbaa !12 - %add15.i.i.us.2.1 = fadd float %850, %849 - store float %add15.i.i.us.2.1, float* %arrayidx14.i.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.2.1 - -if.end.r_exit.i.i.us.2.1: ; preds = %if.then.i.i.us.2.1, %if.end.r_exit.i.i.us.2 - %851 = add nuw nsw i64 %_local_id_x.i.0.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %851, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.i.2.loopexit, label %pregion_for_entry.entry.i.i.us.2, !llvm.loop !306 - -if.then.i.i.us.1.1: ; preds = %if.end.r_exit.i.i.us.1 - %852 = load float, float* %arrayidx.i.i.1, align 4, !tbaa !12 - %sext26.i.i.us.1.1 = shl i64 %add1.i.i.i.us.1.1, 32 - %idxprom6.i.i.us.1.1 = ashr exact i64 %sext26.i.i.us.1.1, 32 - %arrayidx7.i.i.us.1.1 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.1.1 - %853 = load float, float* %arrayidx7.i.i.us.1.1, align 4, !tbaa !12 - %854 = load float, float* %arrayidx9.i.i.1, align 4, !tbaa !12 - %arrayidx11.i.i.us.1.1 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.1.1 - %855 = load float, float* %arrayidx11.i.i.us.1.1, align 4, !tbaa !12 - %mul12.i.i.us.1.1 = fmul float %854, %855 - %856 = tail call float @llvm.fmuladd.f32(float %852, float %853, float %mul12.i.i.us.1.1) #6 - %add.i.i.us.1.1 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1.1 - %idxprom13.i.i.us.1.1 = sext i32 %add.i.i.us.1.1 to i64 - %arrayidx14.i.i.us.1.1 = getelementptr inbounds float, float* %6, i64 %idxprom13.i.i.us.1.1 - %857 = load float, float* %arrayidx14.i.i.us.1.1, align 4, !tbaa !12 - %add15.i.i.us.1.1 = fadd float %857, %856 - store float %add15.i.i.us.1.1, float* %arrayidx14.i.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.1.1 - -if.end.r_exit.i.i.us.1.1: ; preds = %if.then.i.i.us.1.1, %if.end.r_exit.i.i.us.1 - %858 = add nuw nsw i64 %_local_id_x.i.0.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %858, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.i.1.loopexit, label %pregion_for_entry.entry.i.i.us.1, !llvm.loop !307 - -if.then.i.i.us.1531: ; preds = %if.end.r_exit.i.i.us - %859 = load float, float* %arrayidx.i.i, align 4, !tbaa !12 - %sext26.i.i.us.1522 = shl i64 %add1.i.i.i.us.1518, 32 - %idxprom6.i.i.us.1523 = ashr exact i64 %sext26.i.i.us.1522, 32 - %arrayidx7.i.i.us.1524 = getelementptr inbounds float, float* %9, i64 %idxprom6.i.i.us.1523 - %860 = load float, float* %arrayidx7.i.i.us.1524, align 4, !tbaa !12 - %861 = load float, float* %arrayidx9.i.i, align 4, !tbaa !12 - %arrayidx11.i.i.us.1525 = getelementptr inbounds float, float* %12, i64 %idxprom6.i.i.us.1523 - %862 = load float, float* %arrayidx11.i.i.us.1525, align 4, !tbaa !12 - %mul12.i.i.us.1526 = fmul float %861, %862 - %863 = tail call float @llvm.fmuladd.f32(float %859, float %860, float %mul12.i.i.us.1526) #6 - %add.i.i.us.1527 = add nsw i32 %mul.i.i, %conv.i.i.us.1519 - %idxprom13.i.i.us.1528 = sext i32 %add.i.i.us.1527 to i64 - %arrayidx14.i.i.us.1529 = getelementptr inbounds float, float* %6, i64 %idxprom13.i.i.us.1528 - %864 = load float, float* %arrayidx14.i.i.us.1529, align 4, !tbaa !12 - %add15.i.i.us.1530 = fadd float %864, %863 - store float %add15.i.i.us.1530, float* %arrayidx14.i.i.us.1529, align 4, !tbaa !12, !llvm.access.group !27 - br label %if.end.r_exit.i.i.us.1532 - -if.end.r_exit.i.i.us.1532: ; preds = %if.then.i.i.us.1531, %if.end.r_exit.i.i.us - %865 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %865, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !308 -} - -; Function Attrs: nounwind readonly willreturn -declare <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*>, i32 immarg, <8 x i1>, <8 x float>) #3 - -; Function Attrs: argmemonly nounwind readonly willreturn -declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32 immarg, <8 x i1>, <8 x float>) #4 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <8 x float> @llvm.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>) #0 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>) #5 - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nofree nounwind } -attributes #3 = { nounwind readonly willreturn } -attributes #4 = { argmemonly nounwind readonly willreturn } -attributes #5 = { argmemonly nounwind willreturn } -attributes #6 = { nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 1, i32 1, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"float*", !"float*", !"int"} -!9 = !{!"", !"", !"", !"", !"", !""} -!10 = !{!"A", !"V1", !"V2", !"U1", !"U2", !"n"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{!17, !18} -!18 = distinct !{!18, !"LVerDomain"} -!19 = !{!20} -!20 = distinct !{!20, !18} -!21 = !{!22} -!22 = distinct !{!22, !18} -!23 = !{!24} -!24 = distinct !{!24, !18} -!25 = !{!26} -!26 = distinct !{!26, !18} -!27 = !{!28, !29} -!28 = distinct !{} -!29 = distinct !{} -!30 = !{!31} -!31 = distinct !{!31, !32} -!32 = distinct !{!32, !"LVerDomain"} -!33 = !{!34} -!34 = distinct !{!34, !32} -!35 = !{!36} -!36 = distinct !{!36, !32} -!37 = !{!38} -!38 = distinct !{!38, !32} -!39 = !{!40} -!40 = distinct !{!40, !32} -!41 = !{!42} -!42 = distinct !{!42, !43} -!43 = distinct !{!43, !"LVerDomain"} -!44 = !{!45} -!45 = distinct !{!45, !43} -!46 = !{!47} -!47 = distinct !{!47, !43} -!48 = !{!49} -!49 = distinct !{!49, !43} -!50 = !{!51} -!51 = distinct !{!51, !43} -!52 = !{!53} -!53 = distinct !{!53, !54} -!54 = distinct !{!54, !"LVerDomain"} -!55 = !{!56} -!56 = distinct !{!56, !54} -!57 = !{!58} -!58 = distinct !{!58, !54} -!59 = !{!60} -!60 = distinct !{!60, !54} -!61 = !{!62} -!62 = distinct !{!62, !54} -!63 = !{!64} -!64 = distinct !{!64, !65} -!65 = distinct !{!65, !"LVerDomain"} -!66 = !{!67} -!67 = distinct !{!67, !65} -!68 = !{!69} -!69 = distinct !{!69, !65} -!70 = !{!71} -!71 = distinct !{!71, !65} -!72 = !{!73} -!73 = distinct !{!73, !65} -!74 = !{!75} -!75 = distinct !{!75, !76} -!76 = distinct !{!76, !"LVerDomain"} -!77 = !{!78} -!78 = distinct !{!78, !76} -!79 = !{!80} -!80 = distinct !{!80, !76} -!81 = !{!82} -!82 = distinct !{!82, !76} -!83 = !{!84} -!84 = distinct !{!84, !76} -!85 = !{!86} -!86 = distinct !{!86, !87} -!87 = distinct !{!87, !"LVerDomain"} -!88 = !{!89} -!89 = distinct !{!89, !87} -!90 = !{!91} -!91 = distinct !{!91, !87} -!92 = !{!93} -!93 = distinct !{!93, !87} -!94 = !{!95} -!95 = distinct !{!95, !87} -!96 = !{!97} -!97 = distinct !{!97, !98} -!98 = distinct !{!98, !"LVerDomain"} -!99 = !{!100} -!100 = distinct !{!100, !98} -!101 = !{!102} -!102 = distinct !{!102, !98} -!103 = !{!104} -!104 = distinct !{!104, !98} -!105 = !{!106} -!106 = distinct !{!106, !98} -!107 = distinct !{!107, !108, !109} -!108 = !{!"llvm.loop.parallel_accesses", !28} -!109 = !{!"llvm.loop.isvectorized", i32 1} -!110 = distinct !{!110, !108, !109} -!111 = distinct !{!111, !108, !109} -!112 = distinct !{!112, !108, !109} -!113 = distinct !{!113, !108, !109} -!114 = distinct !{!114, !108, !109} -!115 = distinct !{!115, !108, !109} -!116 = distinct !{!116, !108, !109} -!117 = !{!118} -!118 = distinct !{!118, !119} -!119 = distinct !{!119, !"LVerDomain"} -!120 = !{!121} -!121 = distinct !{!121, !119} -!122 = !{!123} -!123 = distinct !{!123, !119} -!124 = !{!125} -!125 = distinct !{!125, !119} -!126 = !{!127} -!127 = distinct !{!127, !119} -!128 = !{!129} -!129 = distinct !{!129, !130} -!130 = distinct !{!130, !"LVerDomain"} -!131 = !{!132} -!132 = distinct !{!132, !130} -!133 = !{!134} -!134 = distinct !{!134, !130} -!135 = !{!136} -!136 = distinct !{!136, !130} -!137 = !{!138} -!138 = distinct !{!138, !130} -!139 = !{!140} -!140 = distinct !{!140, !141} -!141 = distinct !{!141, !"LVerDomain"} -!142 = !{!143} -!143 = distinct !{!143, !141} -!144 = !{!145} -!145 = distinct !{!145, !141} -!146 = !{!147} -!147 = distinct !{!147, !141} -!148 = !{!149} -!149 = distinct !{!149, !141} -!150 = !{!151} -!151 = distinct !{!151, !152} -!152 = distinct !{!152, !"LVerDomain"} -!153 = !{!154} -!154 = distinct !{!154, !152} -!155 = !{!156} -!156 = distinct !{!156, !152} -!157 = !{!158} -!158 = distinct !{!158, !152} -!159 = !{!160} -!160 = distinct !{!160, !152} -!161 = !{!162} -!162 = distinct !{!162, !163} -!163 = distinct !{!163, !"LVerDomain"} -!164 = !{!165} -!165 = distinct !{!165, !163} -!166 = !{!167} -!167 = distinct !{!167, !163} -!168 = !{!169} -!169 = distinct !{!169, !163} -!170 = !{!171} -!171 = distinct !{!171, !163} -!172 = !{!173} -!173 = distinct !{!173, !174} -!174 = distinct !{!174, !"LVerDomain"} -!175 = !{!176} -!176 = distinct !{!176, !174} -!177 = !{!178} -!178 = distinct !{!178, !174} -!179 = !{!180} -!180 = distinct !{!180, !174} -!181 = !{!182} -!182 = distinct !{!182, !174} -!183 = !{!184} -!184 = distinct !{!184, !185} -!185 = distinct !{!185, !"LVerDomain"} -!186 = !{!187} -!187 = distinct !{!187, !185} -!188 = !{!189} -!189 = distinct !{!189, !185} -!190 = !{!191} -!191 = distinct !{!191, !185} -!192 = !{!193} -!193 = distinct !{!193, !185} -!194 = !{!195} -!195 = distinct !{!195, !196} -!196 = distinct !{!196, !"LVerDomain"} -!197 = !{!198} -!198 = distinct !{!198, !196} -!199 = !{!200} -!200 = distinct !{!200, !196} -!201 = !{!202} -!202 = distinct !{!202, !196} -!203 = !{!204} -!204 = distinct !{!204, !196} -!205 = distinct !{!205, !108, !109} -!206 = distinct !{!206, !108, !109} -!207 = distinct !{!207, !108, !109} -!208 = distinct !{!208, !108, !109} -!209 = distinct !{!209, !108, !109} -!210 = distinct !{!210, !108, !109} -!211 = distinct !{!211, !108, !109} -!212 = distinct !{!212, !108, !109} -!213 = !{!214} -!214 = distinct !{!214, !215} -!215 = distinct !{!215, !"LVerDomain"} -!216 = !{!217} -!217 = distinct !{!217, !215} -!218 = !{!219} -!219 = distinct !{!219, !215} -!220 = !{!221} -!221 = distinct !{!221, !215} -!222 = !{!223} -!223 = distinct !{!223, !215} -!224 = !{!225} -!225 = distinct !{!225, !226} -!226 = distinct !{!226, !"LVerDomain"} -!227 = !{!228} -!228 = distinct !{!228, !226} -!229 = !{!230} -!230 = distinct !{!230, !226} -!231 = !{!232} -!232 = distinct !{!232, !226} -!233 = !{!234} -!234 = distinct !{!234, !226} -!235 = !{!236} -!236 = distinct !{!236, !237} -!237 = distinct !{!237, !"LVerDomain"} -!238 = !{!239} -!239 = distinct !{!239, !237} -!240 = !{!241} -!241 = distinct !{!241, !237} -!242 = !{!243} -!243 = distinct !{!243, !237} -!244 = !{!245} -!245 = distinct !{!245, !237} -!246 = !{!247} -!247 = distinct !{!247, !248} -!248 = distinct !{!248, !"LVerDomain"} -!249 = !{!250} -!250 = distinct !{!250, !248} -!251 = !{!252} -!252 = distinct !{!252, !248} -!253 = !{!254} -!254 = distinct !{!254, !248} -!255 = !{!256} -!256 = distinct !{!256, !248} -!257 = !{!258} -!258 = distinct !{!258, !259} -!259 = distinct !{!259, !"LVerDomain"} -!260 = !{!261} -!261 = distinct !{!261, !259} -!262 = !{!263} -!263 = distinct !{!263, !259} -!264 = !{!265} -!265 = distinct !{!265, !259} -!266 = !{!267} -!267 = distinct !{!267, !259} -!268 = !{!269} -!269 = distinct !{!269, !270} -!270 = distinct !{!270, !"LVerDomain"} -!271 = !{!272} -!272 = distinct !{!272, !270} -!273 = !{!274} -!274 = distinct !{!274, !270} -!275 = !{!276} -!276 = distinct !{!276, !270} -!277 = !{!278} -!278 = distinct !{!278, !270} -!279 = !{!280} -!280 = distinct !{!280, !281} -!281 = distinct !{!281, !"LVerDomain"} -!282 = !{!283} -!283 = distinct !{!283, !281} -!284 = !{!285} -!285 = distinct !{!285, !281} -!286 = !{!287} -!287 = distinct !{!287, !281} -!288 = !{!289} -!289 = distinct !{!289, !281} -!290 = !{!291} -!291 = distinct !{!291, !292} -!292 = distinct !{!292, !"LVerDomain"} -!293 = !{!294} -!294 = distinct !{!294, !292} -!295 = !{!296} -!296 = distinct !{!296, !292} -!297 = !{!298} -!298 = distinct !{!298, !292} -!299 = !{!300} -!300 = distinct !{!300, !292} -!301 = distinct !{!301, !108, !109} -!302 = distinct !{!302, !108, !109} -!303 = distinct !{!303, !108, !109} -!304 = distinct !{!304, !108, !109} -!305 = distinct !{!305, !108, !109} -!306 = distinct !{!306, !108, !109} -!307 = distinct !{!307, !108, !109} -!308 = distinct !{!308, !108, !109} diff --git a/pocl_irs/gemver_kernel2.ll b/pocl_irs/gemver_kernel2.ll deleted file mode 100644 index daac8cc..0000000 --- a/pocl_irs/gemver_kernel2.ll +++ /dev/null @@ -1,939 +0,0 @@ -; ModuleID = './OM/KBEGAJBBAALIMPLKDCOKKOGOMEPCEPLPNLGKF/gemver_kernel2/256-1-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_gemver_kernel2(float* nocapture readonly %0, float* nocapture %1, float* nocapture readonly %2, float* nocapture readonly %3, float %4, i32 %5, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %6, i64 %7, i64 %8, i64 %9) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %7, 8 - %cmp228.i = icmp sgt i32 %5, 0 - %11 = zext i32 %5 to i64 - br i1 %cmp228.i, label %pregion_for_entry.entry.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.us.preheader: ; preds = %10 - br label %pregion_for_entry.entry.i.us - -vector.memcheck: ; preds = %10 - %12 = trunc i64 %7 to i32 - %13 = shl i32 %12, 8 - %14 = sext i32 %13 to i64 - %scevgep = getelementptr float, float* %1, i64 %14 - %15 = add nsw i64 %14, 256 - %scevgep13 = getelementptr float, float* %1, i64 %15 - %scevgep15 = getelementptr float, float* %3, i64 %14 - %scevgep17 = getelementptr float, float* %3, i64 %15 - %bound0 = icmp ult float* %scevgep, %scevgep17 - %bound1 = icmp ult float* %scevgep15, %scevgep13 - %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %pregion_for_entry.entry.i.preheader, label %vector.ph - -pregion_for_entry.entry.i.preheader: ; preds = %vector.memcheck - br label %pregion_for_entry.entry.i - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert19 = insertelement <8 x i32> undef, i32 %5, i32 0 - %broadcast.splat20 = shufflevector <8 x i32> %broadcast.splatinsert19, <8 x i32> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next.3, %vector.body ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next.3, %vector.body ] - %16 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %17 = trunc <8 x i64> %16 to <8 x i32> - %18 = icmp sgt <8 x i32> %broadcast.splat20, %17 - %19 = extractelement <8 x i64> %16, i32 0 - %20 = shl i64 %19, 32 - %21 = ashr exact i64 %20, 32 - %22 = getelementptr inbounds float, float* %1, i64 %21 - %23 = bitcast float* %22 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %23, i32 4, <8 x i1> %18, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %24 = getelementptr inbounds float, float* %3, i64 %21 - %25 = bitcast float* %24 to <8 x float>* - %wide.masked.load21 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %25, i32 4, <8 x i1> %18, <8 x float> undef), !tbaa !12, !alias.scope !19 - %26 = fadd <8 x float> %wide.masked.load21, %wide.masked.load - %27 = bitcast float* %22 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %26, <8 x float>* %27, i32 4, <8 x i1> %18), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !21 - %vec.ind.next = add <8 x i64> %vec.ind, - %28 = add nuw nsw <8 x i64> %vec.ind.next, %broadcast.splat - %29 = trunc <8 x i64> %28 to <8 x i32> - %30 = icmp sgt <8 x i32> %broadcast.splat20, %29 - %31 = extractelement <8 x i64> %28, i32 0 - %32 = shl i64 %31, 32 - %33 = ashr exact i64 %32, 32 - %34 = getelementptr inbounds float, float* %1, i64 %33 - %35 = bitcast float* %34 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %35, i32 4, <8 x i1> %30, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %36 = getelementptr inbounds float, float* %3, i64 %33 - %37 = bitcast float* %36 to <8 x float>* - %wide.masked.load21.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %37, i32 4, <8 x i1> %30, <8 x float> undef), !tbaa !12, !alias.scope !19 - %38 = fadd <8 x float> %wide.masked.load21.1, %wide.masked.load.1 - %39 = bitcast float* %34 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %38, <8 x float>* %39, i32 4, <8 x i1> %30), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !21 - %vec.ind.next.1 = add <8 x i64> %vec.ind, - %40 = add nuw nsw <8 x i64> %vec.ind.next.1, %broadcast.splat - %41 = trunc <8 x i64> %40 to <8 x i32> - %42 = icmp sgt <8 x i32> %broadcast.splat20, %41 - %43 = extractelement <8 x i64> %40, i32 0 - %44 = shl i64 %43, 32 - %45 = ashr exact i64 %44, 32 - %46 = getelementptr inbounds float, float* %1, i64 %45 - %47 = bitcast float* %46 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %47, i32 4, <8 x i1> %42, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %48 = getelementptr inbounds float, float* %3, i64 %45 - %49 = bitcast float* %48 to <8 x float>* - %wide.masked.load21.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %49, i32 4, <8 x i1> %42, <8 x float> undef), !tbaa !12, !alias.scope !19 - %50 = fadd <8 x float> %wide.masked.load21.2, %wide.masked.load.2 - %51 = bitcast float* %46 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %50, <8 x float>* %51, i32 4, <8 x i1> %42), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !21 - %vec.ind.next.2 = add <8 x i64> %vec.ind, - %52 = add nuw nsw <8 x i64> %vec.ind.next.2, %broadcast.splat - %53 = trunc <8 x i64> %52 to <8 x i32> - %54 = icmp sgt <8 x i32> %broadcast.splat20, %53 - %55 = extractelement <8 x i64> %52, i32 0 - %56 = shl i64 %55, 32 - %57 = ashr exact i64 %56, 32 - %58 = getelementptr inbounds float, float* %1, i64 %57 - %59 = bitcast float* %58 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %59, i32 4, <8 x i1> %54, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %60 = getelementptr inbounds float, float* %3, i64 %57 - %61 = bitcast float* %60 to <8 x float>* - %wide.masked.load21.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %61, i32 4, <8 x i1> %54, <8 x float> undef), !tbaa !12, !alias.scope !19 - %62 = fadd <8 x float> %wide.masked.load21.3, %wide.masked.load.3 - %63 = bitcast float* %58 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %62, <8 x float>* %63, i32 4, <8 x i1> %54), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !21 - %index.next.3 = add nuw nsw i64 %index, 32 - %vec.ind.next.3 = add <8 x i64> %vec.ind, - %64 = icmp eq i64 %index.next.3, 256 - br i1 %64, label %gemver_kernel2.exit.loopexit26, label %vector.body, !llvm.loop !23 - -pregion_for_entry.entry.i.us: ; preds = %if.end.r_exit.i.us.1, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ %82, %if.end.r_exit.i.us.1 ], [ 0, %pregion_for_entry.entry.i.us.preheader ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp.i.us = icmp slt i32 %conv.i.us, %5 - br i1 %cmp.i.us, label %for.cond.preheader.i.us, label %if.end.r_exit.i.us - -for.cond.preheader.i.us: ; preds = %pregion_for_entry.entry.i.us - %sext27.i.us = shl i64 %add1.i.i.us, 32 - %idxprom8.i.us = ashr exact i64 %sext27.i.us, 32 - %arrayidx9.i.us = getelementptr inbounds float, float* %1, i64 %idxprom8.i.us - %.pre.i2.us7 = load float, float* %arrayidx9.i.us, align 4, !tbaa !12 - br label %for.body.i.us - -for.body.i.us: ; preds = %for.body.i.us, %for.cond.preheader.i.us - %indvars.iv.next.i5.us = phi i64 [ %indvars.iv.next.i.us, %for.body.i.us ], [ 0, %for.cond.preheader.i.us ] - %65 = phi float [ %70, %for.body.i.us ], [ %.pre.i2.us7, %for.cond.preheader.i.us ] - %66 = mul nuw nsw i64 %indvars.iv.next.i5.us, %11 - %67 = add nsw i64 %66, %idxprom8.i.us - %arrayidx.i.us = getelementptr inbounds float, float* %0, i64 %67 - %68 = load float, float* %arrayidx.i.us, align 4, !tbaa !12 - %mul4.i.us = fmul float %68, %4 - %arrayidx6.i.us = getelementptr inbounds float, float* %2, i64 %indvars.iv.next.i5.us - %69 = load float, float* %arrayidx6.i.us, align 4, !tbaa !12 - %70 = tail call float @llvm.fmuladd.f32(float %mul4.i.us, float %69, float %65) #2 - store float %70, float* %arrayidx9.i.us, align 4, !tbaa !12, !llvm.access.group !21 - %indvars.iv.next.i.us = add nuw nsw i64 %indvars.iv.next.i5.us, 1 - %exitcond.not.i.us = icmp eq i64 %indvars.iv.next.i.us, %11 - br i1 %exitcond.not.i.us, label %for.end.loopexit.i.us, label %for.body.i.us, !llvm.loop !26 - -for.end.loopexit.i.us: ; preds = %for.body.i.us - %.lcssa = phi float [ %70, %for.body.i.us ] - %arrayidx11.i.us = getelementptr inbounds float, float* %3, i64 %idxprom8.i.us - %71 = load float, float* %arrayidx11.i.us, align 4, !tbaa !12 - %add14.i.us = fadd float %.lcssa, %71 - store float %add14.i.us, float* %arrayidx9.i.us, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us - -if.end.r_exit.i.us: ; preds = %for.end.loopexit.i.us, %pregion_for_entry.entry.i.us - %72 = or i64 %_local_id_x.0.us, 1 - %add1.i.i.us.1 = add nuw nsw i64 %72, %mul.i.i - %conv.i.us.1 = trunc i64 %add1.i.i.us.1 to i32 - %cmp.i.us.1 = icmp slt i32 %conv.i.us.1, %5 - br i1 %cmp.i.us.1, label %for.cond.preheader.i.us.1, label %if.end.r_exit.i.us.1 - -pregion_for_entry.entry.i: ; preds = %if.end.r_exit.i.3, %pregion_for_entry.entry.i.preheader - %_local_id_x.0 = phi i64 [ %88, %if.end.r_exit.i.3 ], [ 0, %pregion_for_entry.entry.i.preheader ] - %add1.i.i = add nuw nsw i64 %_local_id_x.0, %mul.i.i - %conv.i = trunc i64 %add1.i.i to i32 - %cmp.i = icmp slt i32 %conv.i, %5 - br i1 %cmp.i, label %for.cond.preheader.i, label %if.end.r_exit.i - -for.cond.preheader.i: ; preds = %pregion_for_entry.entry.i - %sext27.i = shl i64 %add1.i.i, 32 - %idxprom8.i = ashr exact i64 %sext27.i, 32 - %arrayidx9.i = getelementptr inbounds float, float* %1, i64 %idxprom8.i - %.pre1.i16 = load float, float* %arrayidx9.i, align 4, !tbaa !12 - %arrayidx11.i = getelementptr inbounds float, float* %3, i64 %idxprom8.i - %73 = load float, float* %arrayidx11.i, align 4, !tbaa !12 - %add14.i = fadd float %73, %.pre1.i16 - store float %add14.i, float* %arrayidx9.i, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i - -if.end.r_exit.i: ; preds = %for.cond.preheader.i, %pregion_for_entry.entry.i - %74 = or i64 %_local_id_x.0, 1 - %add1.i.i.1 = add nuw nsw i64 %74, %mul.i.i - %conv.i.1 = trunc i64 %add1.i.i.1 to i32 - %cmp.i.1 = icmp slt i32 %conv.i.1, %5 - br i1 %cmp.i.1, label %for.cond.preheader.i.1, label %if.end.r_exit.i.1 - -gemver_kernel2.exit.loopexit: ; preds = %if.end.r_exit.i.us.1 - br label %gemver_kernel2.exit - -gemver_kernel2.exit.loopexit25: ; preds = %if.end.r_exit.i.3 - br label %gemver_kernel2.exit - -gemver_kernel2.exit.loopexit26: ; preds = %vector.body - br label %gemver_kernel2.exit - -gemver_kernel2.exit: ; preds = %gemver_kernel2.exit.loopexit26, %gemver_kernel2.exit.loopexit25, %gemver_kernel2.exit.loopexit - ret void - -for.cond.preheader.i.us.1: ; preds = %if.end.r_exit.i.us - %sext27.i.us.1 = shl i64 %add1.i.i.us.1, 32 - %idxprom8.i.us.1 = ashr exact i64 %sext27.i.us.1, 32 - %arrayidx9.i.us.1 = getelementptr inbounds float, float* %1, i64 %idxprom8.i.us.1 - %.pre.i2.us7.1 = load float, float* %arrayidx9.i.us.1, align 4, !tbaa !12 - br label %for.body.i.us.1 - -for.body.i.us.1: ; preds = %for.body.i.us.1, %for.cond.preheader.i.us.1 - %indvars.iv.next.i5.us.1 = phi i64 [ %indvars.iv.next.i.us.1, %for.body.i.us.1 ], [ 0, %for.cond.preheader.i.us.1 ] - %75 = phi float [ %80, %for.body.i.us.1 ], [ %.pre.i2.us7.1, %for.cond.preheader.i.us.1 ] - %76 = mul nuw nsw i64 %indvars.iv.next.i5.us.1, %11 - %77 = add nsw i64 %76, %idxprom8.i.us.1 - %arrayidx.i.us.1 = getelementptr inbounds float, float* %0, i64 %77 - %78 = load float, float* %arrayidx.i.us.1, align 4, !tbaa !12 - %mul4.i.us.1 = fmul float %78, %4 - %arrayidx6.i.us.1 = getelementptr inbounds float, float* %2, i64 %indvars.iv.next.i5.us.1 - %79 = load float, float* %arrayidx6.i.us.1, align 4, !tbaa !12 - %80 = tail call float @llvm.fmuladd.f32(float %mul4.i.us.1, float %79, float %75) #2 - store float %80, float* %arrayidx9.i.us.1, align 4, !tbaa !12, !llvm.access.group !21 - %indvars.iv.next.i.us.1 = add nuw nsw i64 %indvars.iv.next.i5.us.1, 1 - %exitcond.not.i.us.1 = icmp eq i64 %indvars.iv.next.i.us.1, %11 - br i1 %exitcond.not.i.us.1, label %for.end.loopexit.i.us.1, label %for.body.i.us.1, !llvm.loop !26 - -for.end.loopexit.i.us.1: ; preds = %for.body.i.us.1 - %.lcssa27 = phi float [ %80, %for.body.i.us.1 ] - %arrayidx11.i.us.1 = getelementptr inbounds float, float* %3, i64 %idxprom8.i.us.1 - %81 = load float, float* %arrayidx11.i.us.1, align 4, !tbaa !12 - %add14.i.us.1 = fadd float %.lcssa27, %81 - store float %add14.i.us.1, float* %arrayidx9.i.us.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us.1 - -if.end.r_exit.i.us.1: ; preds = %for.end.loopexit.i.us.1, %if.end.r_exit.i.us - %82 = add nuw nsw i64 %_local_id_x.0.us, 2 - %exitcond.not.1 = icmp eq i64 %82, 256 - br i1 %exitcond.not.1, label %gemver_kernel2.exit.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !28 - -for.cond.preheader.i.1: ; preds = %if.end.r_exit.i - %sext27.i.1 = shl i64 %add1.i.i.1, 32 - %idxprom8.i.1 = ashr exact i64 %sext27.i.1, 32 - %arrayidx9.i.1 = getelementptr inbounds float, float* %1, i64 %idxprom8.i.1 - %.pre1.i16.1 = load float, float* %arrayidx9.i.1, align 4, !tbaa !12 - %arrayidx11.i.1 = getelementptr inbounds float, float* %3, i64 %idxprom8.i.1 - %83 = load float, float* %arrayidx11.i.1, align 4, !tbaa !12 - %add14.i.1 = fadd float %83, %.pre1.i16.1 - store float %add14.i.1, float* %arrayidx9.i.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.1 - -if.end.r_exit.i.1: ; preds = %for.cond.preheader.i.1, %if.end.r_exit.i - %84 = or i64 %_local_id_x.0, 2 - %add1.i.i.2 = add nuw nsw i64 %84, %mul.i.i - %conv.i.2 = trunc i64 %add1.i.i.2 to i32 - %cmp.i.2 = icmp slt i32 %conv.i.2, %5 - br i1 %cmp.i.2, label %for.cond.preheader.i.2, label %if.end.r_exit.i.2 - -for.cond.preheader.i.2: ; preds = %if.end.r_exit.i.1 - %sext27.i.2 = shl i64 %add1.i.i.2, 32 - %idxprom8.i.2 = ashr exact i64 %sext27.i.2, 32 - %arrayidx9.i.2 = getelementptr inbounds float, float* %1, i64 %idxprom8.i.2 - %.pre1.i16.2 = load float, float* %arrayidx9.i.2, align 4, !tbaa !12 - %arrayidx11.i.2 = getelementptr inbounds float, float* %3, i64 %idxprom8.i.2 - %85 = load float, float* %arrayidx11.i.2, align 4, !tbaa !12 - %add14.i.2 = fadd float %85, %.pre1.i16.2 - store float %add14.i.2, float* %arrayidx9.i.2, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.2 - -if.end.r_exit.i.2: ; preds = %for.cond.preheader.i.2, %if.end.r_exit.i.1 - %86 = or i64 %_local_id_x.0, 3 - %add1.i.i.3 = add nuw nsw i64 %86, %mul.i.i - %conv.i.3 = trunc i64 %add1.i.i.3 to i32 - %cmp.i.3 = icmp slt i32 %conv.i.3, %5 - br i1 %cmp.i.3, label %for.cond.preheader.i.3, label %if.end.r_exit.i.3 - -for.cond.preheader.i.3: ; preds = %if.end.r_exit.i.2 - %sext27.i.3 = shl i64 %add1.i.i.3, 32 - %idxprom8.i.3 = ashr exact i64 %sext27.i.3, 32 - %arrayidx9.i.3 = getelementptr inbounds float, float* %1, i64 %idxprom8.i.3 - %.pre1.i16.3 = load float, float* %arrayidx9.i.3, align 4, !tbaa !12 - %arrayidx11.i.3 = getelementptr inbounds float, float* %3, i64 %idxprom8.i.3 - %87 = load float, float* %arrayidx11.i.3, align 4, !tbaa !12 - %add14.i.3 = fadd float %87, %.pre1.i16.3 - store float %add14.i.3, float* %arrayidx9.i.3, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.3 - -if.end.r_exit.i.3: ; preds = %for.cond.preheader.i.3, %if.end.r_exit.i.2 - %88 = add nuw nsw i64 %_local_id_x.0, 4 - %exitcond9.not.3 = icmp eq i64 %88, 256 - br i1 %exitcond9.not.3, label %gemver_kernel2.exit.loopexit25, label %pregion_for_entry.entry.i, !llvm.loop !29 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_gemver_kernel2_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to float*** - %15 = load float**, float*** %14, align 8 - %16 = load float*, float** %15, align 8 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to float*** - %19 = load float**, float*** %18, align 8 - %20 = load float*, float** %19, align 8 - %21 = getelementptr i8*, i8** %0, i64 4 - %22 = bitcast i8** %21 to float** - %23 = load float*, float** %22, align 8 - %24 = load float, float* %23, align 4 - %25 = getelementptr i8*, i8** %0, i64 5 - %26 = bitcast i8** %25 to i32** - %27 = load i32*, i32** %26, align 8 - %28 = load i32, i32* %27, align 4 - %mul.i.i.i = shl i64 %2, 8 - %cmp228.i.i = icmp sgt i32 %28, 0 - %29 = zext i32 %28 to i64 - br i1 %cmp228.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -vector.memcheck: ; preds = %5 - %30 = trunc i64 %2 to i32 - %31 = shl i32 %30, 8 - %32 = sext i32 %31 to i64 - %scevgep = getelementptr float, float* %12, i64 %32 - %33 = add nsw i64 %32, 256 - %scevgep13 = getelementptr float, float* %12, i64 %33 - %scevgep15 = getelementptr float, float* %20, i64 %32 - %scevgep17 = getelementptr float, float* %20, i64 %33 - %bound0 = icmp ult float* %scevgep, %scevgep17 - %bound1 = icmp ult float* %scevgep15, %scevgep13 - %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %pregion_for_entry.entry.i.i.preheader, label %vector.ph - -pregion_for_entry.entry.i.i.preheader: ; preds = %vector.memcheck - br label %pregion_for_entry.entry.i.i - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert19 = insertelement <8 x i32> undef, i32 %28, i32 0 - %broadcast.splat20 = shufflevector <8 x i32> %broadcast.splatinsert19, <8 x i32> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next.3, %vector.body ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next.3, %vector.body ] - %34 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %35 = trunc <8 x i64> %34 to <8 x i32> - %36 = icmp sgt <8 x i32> %broadcast.splat20, %35 - %37 = extractelement <8 x i64> %34, i32 0 - %38 = shl i64 %37, 32 - %39 = ashr exact i64 %38, 32 - %40 = getelementptr inbounds float, float* %12, i64 %39 - %41 = bitcast float* %40 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %41, i32 4, <8 x i1> %36, <8 x float> undef), !tbaa !12, !alias.scope !30, !noalias !33 - %42 = getelementptr inbounds float, float* %20, i64 %39 - %43 = bitcast float* %42 to <8 x float>* - %wide.masked.load21 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %43, i32 4, <8 x i1> %36, <8 x float> undef), !tbaa !12, !alias.scope !33 - %44 = fadd <8 x float> %wide.masked.load21, %wide.masked.load - %45 = bitcast float* %40 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %44, <8 x float>* %45, i32 4, <8 x i1> %36), !tbaa !12, !alias.scope !30, !noalias !33, !llvm.access.group !21 - %vec.ind.next = add <8 x i64> %vec.ind, - %46 = add nuw nsw <8 x i64> %vec.ind.next, %broadcast.splat - %47 = trunc <8 x i64> %46 to <8 x i32> - %48 = icmp sgt <8 x i32> %broadcast.splat20, %47 - %49 = extractelement <8 x i64> %46, i32 0 - %50 = shl i64 %49, 32 - %51 = ashr exact i64 %50, 32 - %52 = getelementptr inbounds float, float* %12, i64 %51 - %53 = bitcast float* %52 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %53, i32 4, <8 x i1> %48, <8 x float> undef), !tbaa !12, !alias.scope !30, !noalias !33 - %54 = getelementptr inbounds float, float* %20, i64 %51 - %55 = bitcast float* %54 to <8 x float>* - %wide.masked.load21.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %55, i32 4, <8 x i1> %48, <8 x float> undef), !tbaa !12, !alias.scope !33 - %56 = fadd <8 x float> %wide.masked.load21.1, %wide.masked.load.1 - %57 = bitcast float* %52 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %56, <8 x float>* %57, i32 4, <8 x i1> %48), !tbaa !12, !alias.scope !30, !noalias !33, !llvm.access.group !21 - %vec.ind.next.1 = add <8 x i64> %vec.ind, - %58 = add nuw nsw <8 x i64> %vec.ind.next.1, %broadcast.splat - %59 = trunc <8 x i64> %58 to <8 x i32> - %60 = icmp sgt <8 x i32> %broadcast.splat20, %59 - %61 = extractelement <8 x i64> %58, i32 0 - %62 = shl i64 %61, 32 - %63 = ashr exact i64 %62, 32 - %64 = getelementptr inbounds float, float* %12, i64 %63 - %65 = bitcast float* %64 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %65, i32 4, <8 x i1> %60, <8 x float> undef), !tbaa !12, !alias.scope !30, !noalias !33 - %66 = getelementptr inbounds float, float* %20, i64 %63 - %67 = bitcast float* %66 to <8 x float>* - %wide.masked.load21.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %67, i32 4, <8 x i1> %60, <8 x float> undef), !tbaa !12, !alias.scope !33 - %68 = fadd <8 x float> %wide.masked.load21.2, %wide.masked.load.2 - %69 = bitcast float* %64 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %68, <8 x float>* %69, i32 4, <8 x i1> %60), !tbaa !12, !alias.scope !30, !noalias !33, !llvm.access.group !21 - %vec.ind.next.2 = add <8 x i64> %vec.ind, - %70 = add nuw nsw <8 x i64> %vec.ind.next.2, %broadcast.splat - %71 = trunc <8 x i64> %70 to <8 x i32> - %72 = icmp sgt <8 x i32> %broadcast.splat20, %71 - %73 = extractelement <8 x i64> %70, i32 0 - %74 = shl i64 %73, 32 - %75 = ashr exact i64 %74, 32 - %76 = getelementptr inbounds float, float* %12, i64 %75 - %77 = bitcast float* %76 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %77, i32 4, <8 x i1> %72, <8 x float> undef), !tbaa !12, !alias.scope !30, !noalias !33 - %78 = getelementptr inbounds float, float* %20, i64 %75 - %79 = bitcast float* %78 to <8 x float>* - %wide.masked.load21.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %79, i32 4, <8 x i1> %72, <8 x float> undef), !tbaa !12, !alias.scope !33 - %80 = fadd <8 x float> %wide.masked.load21.3, %wide.masked.load.3 - %81 = bitcast float* %76 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %80, <8 x float>* %81, i32 4, <8 x i1> %72), !tbaa !12, !alias.scope !30, !noalias !33, !llvm.access.group !21 - %index.next.3 = add nuw nsw i64 %index, 32 - %vec.ind.next.3 = add <8 x i64> %vec.ind, - %82 = icmp eq i64 %index.next.3, 256 - br i1 %82, label %_pocl_kernel_gemver_kernel2.exit.loopexit26, label %vector.body, !llvm.loop !35 - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us.1, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %100, %if.end.r_exit.i.i.us.1 ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %28, %conv.i.i.us - br i1 %cmp.i.i.us, label %for.cond.preheader.i.i.us, label %if.end.r_exit.i.i.us - -for.cond.preheader.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %sext27.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom8.i.i.us = ashr exact i64 %sext27.i.i.us, 32 - %arrayidx9.i.i.us = getelementptr inbounds float, float* %12, i64 %idxprom8.i.i.us - %.pre.i.i2.us7 = load float, float* %arrayidx9.i.i.us, align 4, !tbaa !12 - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %for.cond.preheader.i.i.us - %indvars.iv.next.i.i5.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %for.cond.preheader.i.i.us ] - %83 = phi float [ %88, %for.body.i.i.us ], [ %.pre.i.i2.us7, %for.cond.preheader.i.i.us ] - %84 = mul nuw nsw i64 %indvars.iv.next.i.i5.us, %29 - %85 = add nsw i64 %84, %idxprom8.i.i.us - %arrayidx.i.i.us = getelementptr inbounds float, float* %8, i64 %85 - %86 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %mul4.i.i.us = fmul float %24, %86 - %arrayidx6.i.i.us = getelementptr inbounds float, float* %16, i64 %indvars.iv.next.i.i5.us - %87 = load float, float* %arrayidx6.i.i.us, align 4, !tbaa !12 - %88 = tail call float @llvm.fmuladd.f32(float %mul4.i.i.us, float %87, float %83) #2 - store float %88, float* %arrayidx9.i.i.us, align 4, !tbaa !12, !llvm.access.group !21 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i5.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %29 - br i1 %exitcond.not.i.i.us, label %for.end.loopexit.i.i.us, label %for.body.i.i.us, !llvm.loop !26 - -for.end.loopexit.i.i.us: ; preds = %for.body.i.i.us - %.lcssa = phi float [ %88, %for.body.i.i.us ] - %arrayidx11.i.i.us = getelementptr inbounds float, float* %20, i64 %idxprom8.i.i.us - %89 = load float, float* %arrayidx11.i.i.us, align 4, !tbaa !12 - %add14.i.i.us = fadd float %.lcssa, %89 - store float %add14.i.i.us, float* %arrayidx9.i.i.us, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %for.end.loopexit.i.i.us, %pregion_for_entry.entry.i.i.us - %90 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1 = add nuw nsw i64 %90, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp.i.i.us.1 = icmp sgt i32 %28, %conv.i.i.us.1 - br i1 %cmp.i.i.us.1, label %for.cond.preheader.i.i.us.1, label %if.end.r_exit.i.i.us.1 - -pregion_for_entry.entry.i.i: ; preds = %if.end.r_exit.i.i.3, %pregion_for_entry.entry.i.i.preheader - %_local_id_x.i.0 = phi i64 [ %106, %if.end.r_exit.i.i.3 ], [ 0, %pregion_for_entry.entry.i.i.preheader ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %conv.i.i = trunc i64 %add1.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %28, %conv.i.i - br i1 %cmp.i.i, label %for.cond.preheader.i.i, label %if.end.r_exit.i.i - -for.cond.preheader.i.i: ; preds = %pregion_for_entry.entry.i.i - %sext27.i.i = shl i64 %add1.i.i.i, 32 - %idxprom8.i.i = ashr exact i64 %sext27.i.i, 32 - %arrayidx9.i.i = getelementptr inbounds float, float* %12, i64 %idxprom8.i.i - %.pre1.i.i16 = load float, float* %arrayidx9.i.i, align 4, !tbaa !12 - %arrayidx11.i.i = getelementptr inbounds float, float* %20, i64 %idxprom8.i.i - %91 = load float, float* %arrayidx11.i.i, align 4, !tbaa !12 - %add14.i.i = fadd float %91, %.pre1.i.i16 - store float %add14.i.i, float* %arrayidx9.i.i, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i - -if.end.r_exit.i.i: ; preds = %for.cond.preheader.i.i, %pregion_for_entry.entry.i.i - %92 = or i64 %_local_id_x.i.0, 1 - %add1.i.i.i.1 = add nuw nsw i64 %92, %mul.i.i.i - %conv.i.i.1 = trunc i64 %add1.i.i.i.1 to i32 - %cmp.i.i.1 = icmp sgt i32 %28, %conv.i.i.1 - br i1 %cmp.i.i.1, label %for.cond.preheader.i.i.1, label %if.end.r_exit.i.i.1 - -_pocl_kernel_gemver_kernel2.exit.loopexit: ; preds = %if.end.r_exit.i.i.us.1 - br label %_pocl_kernel_gemver_kernel2.exit - -_pocl_kernel_gemver_kernel2.exit.loopexit25: ; preds = %if.end.r_exit.i.i.3 - br label %_pocl_kernel_gemver_kernel2.exit - -_pocl_kernel_gemver_kernel2.exit.loopexit26: ; preds = %vector.body - br label %_pocl_kernel_gemver_kernel2.exit - -_pocl_kernel_gemver_kernel2.exit: ; preds = %_pocl_kernel_gemver_kernel2.exit.loopexit26, %_pocl_kernel_gemver_kernel2.exit.loopexit25, %_pocl_kernel_gemver_kernel2.exit.loopexit - ret void - -for.cond.preheader.i.i.us.1: ; preds = %if.end.r_exit.i.i.us - %sext27.i.i.us.1 = shl i64 %add1.i.i.i.us.1, 32 - %idxprom8.i.i.us.1 = ashr exact i64 %sext27.i.i.us.1, 32 - %arrayidx9.i.i.us.1 = getelementptr inbounds float, float* %12, i64 %idxprom8.i.i.us.1 - %.pre.i.i2.us7.1 = load float, float* %arrayidx9.i.i.us.1, align 4, !tbaa !12 - br label %for.body.i.i.us.1 - -for.body.i.i.us.1: ; preds = %for.body.i.i.us.1, %for.cond.preheader.i.i.us.1 - %indvars.iv.next.i.i5.us.1 = phi i64 [ %indvars.iv.next.i.i.us.1, %for.body.i.i.us.1 ], [ 0, %for.cond.preheader.i.i.us.1 ] - %93 = phi float [ %98, %for.body.i.i.us.1 ], [ %.pre.i.i2.us7.1, %for.cond.preheader.i.i.us.1 ] - %94 = mul nuw nsw i64 %indvars.iv.next.i.i5.us.1, %29 - %95 = add nsw i64 %94, %idxprom8.i.i.us.1 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %8, i64 %95 - %96 = load float, float* %arrayidx.i.i.us.1, align 4, !tbaa !12 - %mul4.i.i.us.1 = fmul float %24, %96 - %arrayidx6.i.i.us.1 = getelementptr inbounds float, float* %16, i64 %indvars.iv.next.i.i5.us.1 - %97 = load float, float* %arrayidx6.i.i.us.1, align 4, !tbaa !12 - %98 = tail call float @llvm.fmuladd.f32(float %mul4.i.i.us.1, float %97, float %93) #2 - store float %98, float* %arrayidx9.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !21 - %indvars.iv.next.i.i.us.1 = add nuw nsw i64 %indvars.iv.next.i.i5.us.1, 1 - %exitcond.not.i.i.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.1, %29 - br i1 %exitcond.not.i.i.us.1, label %for.end.loopexit.i.i.us.1, label %for.body.i.i.us.1, !llvm.loop !26 - -for.end.loopexit.i.i.us.1: ; preds = %for.body.i.i.us.1 - %.lcssa27 = phi float [ %98, %for.body.i.i.us.1 ] - %arrayidx11.i.i.us.1 = getelementptr inbounds float, float* %20, i64 %idxprom8.i.i.us.1 - %99 = load float, float* %arrayidx11.i.i.us.1, align 4, !tbaa !12 - %add14.i.i.us.1 = fadd float %.lcssa27, %99 - store float %add14.i.i.us.1, float* %arrayidx9.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.1 - -if.end.r_exit.i.i.us.1: ; preds = %for.end.loopexit.i.i.us.1, %if.end.r_exit.i.i.us - %100 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %100, 256 - br i1 %exitcond.not.1, label %_pocl_kernel_gemver_kernel2.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !28 - -for.cond.preheader.i.i.1: ; preds = %if.end.r_exit.i.i - %sext27.i.i.1 = shl i64 %add1.i.i.i.1, 32 - %idxprom8.i.i.1 = ashr exact i64 %sext27.i.i.1, 32 - %arrayidx9.i.i.1 = getelementptr inbounds float, float* %12, i64 %idxprom8.i.i.1 - %.pre1.i.i16.1 = load float, float* %arrayidx9.i.i.1, align 4, !tbaa !12 - %arrayidx11.i.i.1 = getelementptr inbounds float, float* %20, i64 %idxprom8.i.i.1 - %101 = load float, float* %arrayidx11.i.i.1, align 4, !tbaa !12 - %add14.i.i.1 = fadd float %101, %.pre1.i.i16.1 - store float %add14.i.i.1, float* %arrayidx9.i.i.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.1 - -if.end.r_exit.i.i.1: ; preds = %for.cond.preheader.i.i.1, %if.end.r_exit.i.i - %102 = or i64 %_local_id_x.i.0, 2 - %add1.i.i.i.2 = add nuw nsw i64 %102, %mul.i.i.i - %conv.i.i.2 = trunc i64 %add1.i.i.i.2 to i32 - %cmp.i.i.2 = icmp sgt i32 %28, %conv.i.i.2 - br i1 %cmp.i.i.2, label %for.cond.preheader.i.i.2, label %if.end.r_exit.i.i.2 - -for.cond.preheader.i.i.2: ; preds = %if.end.r_exit.i.i.1 - %sext27.i.i.2 = shl i64 %add1.i.i.i.2, 32 - %idxprom8.i.i.2 = ashr exact i64 %sext27.i.i.2, 32 - %arrayidx9.i.i.2 = getelementptr inbounds float, float* %12, i64 %idxprom8.i.i.2 - %.pre1.i.i16.2 = load float, float* %arrayidx9.i.i.2, align 4, !tbaa !12 - %arrayidx11.i.i.2 = getelementptr inbounds float, float* %20, i64 %idxprom8.i.i.2 - %103 = load float, float* %arrayidx11.i.i.2, align 4, !tbaa !12 - %add14.i.i.2 = fadd float %103, %.pre1.i.i16.2 - store float %add14.i.i.2, float* %arrayidx9.i.i.2, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.2 - -if.end.r_exit.i.i.2: ; preds = %for.cond.preheader.i.i.2, %if.end.r_exit.i.i.1 - %104 = or i64 %_local_id_x.i.0, 3 - %add1.i.i.i.3 = add nuw nsw i64 %104, %mul.i.i.i - %conv.i.i.3 = trunc i64 %add1.i.i.i.3 to i32 - %cmp.i.i.3 = icmp sgt i32 %28, %conv.i.i.3 - br i1 %cmp.i.i.3, label %for.cond.preheader.i.i.3, label %if.end.r_exit.i.i.3 - -for.cond.preheader.i.i.3: ; preds = %if.end.r_exit.i.i.2 - %sext27.i.i.3 = shl i64 %add1.i.i.i.3, 32 - %idxprom8.i.i.3 = ashr exact i64 %sext27.i.i.3, 32 - %arrayidx9.i.i.3 = getelementptr inbounds float, float* %12, i64 %idxprom8.i.i.3 - %.pre1.i.i16.3 = load float, float* %arrayidx9.i.i.3, align 4, !tbaa !12 - %arrayidx11.i.i.3 = getelementptr inbounds float, float* %20, i64 %idxprom8.i.i.3 - %105 = load float, float* %arrayidx11.i.i.3, align 4, !tbaa !12 - %add14.i.i.3 = fadd float %105, %.pre1.i.i16.3 - store float %add14.i.i.3, float* %arrayidx9.i.i.3, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.3 - -if.end.r_exit.i.i.3: ; preds = %for.cond.preheader.i.i.3, %if.end.r_exit.i.i.2 - %106 = add nuw nsw i64 %_local_id_x.i.0, 4 - %exitcond9.not.3 = icmp eq i64 %106, 256 - br i1 %exitcond9.not.3, label %_pocl_kernel_gemver_kernel2.exit.loopexit25, label %pregion_for_entry.entry.i.i, !llvm.loop !36 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_gemver_kernel2_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to float** - %13 = load float*, float** %12, align 8 - %14 = getelementptr i8*, i8** %0, i64 3 - %15 = bitcast i8** %14 to float** - %16 = load float*, float** %15, align 8 - %17 = getelementptr i8*, i8** %0, i64 4 - %18 = bitcast i8** %17 to float** - %19 = load float*, float** %18, align 8 - %20 = load float, float* %19, align 4 - %21 = getelementptr i8*, i8** %0, i64 5 - %22 = bitcast i8** %21 to i32** - %23 = load i32*, i32** %22, align 8 - %24 = load i32, i32* %23, align 4 - %mul.i.i.i = shl i64 %2, 8 - %cmp228.i.i = icmp sgt i32 %24, 0 - %25 = zext i32 %24 to i64 - br i1 %cmp228.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -vector.memcheck: ; preds = %5 - %26 = trunc i64 %2 to i32 - %27 = shl i32 %26, 8 - %28 = sext i32 %27 to i64 - %scevgep = getelementptr float, float* %10, i64 %28 - %29 = add nsw i64 %28, 256 - %scevgep13 = getelementptr float, float* %10, i64 %29 - %scevgep15 = getelementptr float, float* %16, i64 %28 - %scevgep17 = getelementptr float, float* %16, i64 %29 - %bound0 = icmp ult float* %scevgep, %scevgep17 - %bound1 = icmp ult float* %scevgep15, %scevgep13 - %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %pregion_for_entry.entry.i.i.preheader, label %vector.ph - -pregion_for_entry.entry.i.i.preheader: ; preds = %vector.memcheck - br label %pregion_for_entry.entry.i.i - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert19 = insertelement <8 x i32> undef, i32 %24, i32 0 - %broadcast.splat20 = shufflevector <8 x i32> %broadcast.splatinsert19, <8 x i32> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next.3, %vector.body ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next.3, %vector.body ] - %30 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %31 = trunc <8 x i64> %30 to <8 x i32> - %32 = icmp sgt <8 x i32> %broadcast.splat20, %31 - %33 = extractelement <8 x i64> %30, i32 0 - %34 = shl i64 %33, 32 - %35 = ashr exact i64 %34, 32 - %36 = getelementptr inbounds float, float* %10, i64 %35 - %37 = bitcast float* %36 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %37, i32 4, <8 x i1> %32, <8 x float> undef), !tbaa !12, !alias.scope !37, !noalias !40 - %38 = getelementptr inbounds float, float* %16, i64 %35 - %39 = bitcast float* %38 to <8 x float>* - %wide.masked.load21 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %39, i32 4, <8 x i1> %32, <8 x float> undef), !tbaa !12, !alias.scope !40 - %40 = fadd <8 x float> %wide.masked.load21, %wide.masked.load - %41 = bitcast float* %36 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %40, <8 x float>* %41, i32 4, <8 x i1> %32), !tbaa !12, !alias.scope !37, !noalias !40, !llvm.access.group !21 - %vec.ind.next = add <8 x i64> %vec.ind, - %42 = add nuw nsw <8 x i64> %vec.ind.next, %broadcast.splat - %43 = trunc <8 x i64> %42 to <8 x i32> - %44 = icmp sgt <8 x i32> %broadcast.splat20, %43 - %45 = extractelement <8 x i64> %42, i32 0 - %46 = shl i64 %45, 32 - %47 = ashr exact i64 %46, 32 - %48 = getelementptr inbounds float, float* %10, i64 %47 - %49 = bitcast float* %48 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %49, i32 4, <8 x i1> %44, <8 x float> undef), !tbaa !12, !alias.scope !37, !noalias !40 - %50 = getelementptr inbounds float, float* %16, i64 %47 - %51 = bitcast float* %50 to <8 x float>* - %wide.masked.load21.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %51, i32 4, <8 x i1> %44, <8 x float> undef), !tbaa !12, !alias.scope !40 - %52 = fadd <8 x float> %wide.masked.load21.1, %wide.masked.load.1 - %53 = bitcast float* %48 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %52, <8 x float>* %53, i32 4, <8 x i1> %44), !tbaa !12, !alias.scope !37, !noalias !40, !llvm.access.group !21 - %vec.ind.next.1 = add <8 x i64> %vec.ind, - %54 = add nuw nsw <8 x i64> %vec.ind.next.1, %broadcast.splat - %55 = trunc <8 x i64> %54 to <8 x i32> - %56 = icmp sgt <8 x i32> %broadcast.splat20, %55 - %57 = extractelement <8 x i64> %54, i32 0 - %58 = shl i64 %57, 32 - %59 = ashr exact i64 %58, 32 - %60 = getelementptr inbounds float, float* %10, i64 %59 - %61 = bitcast float* %60 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %61, i32 4, <8 x i1> %56, <8 x float> undef), !tbaa !12, !alias.scope !37, !noalias !40 - %62 = getelementptr inbounds float, float* %16, i64 %59 - %63 = bitcast float* %62 to <8 x float>* - %wide.masked.load21.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %63, i32 4, <8 x i1> %56, <8 x float> undef), !tbaa !12, !alias.scope !40 - %64 = fadd <8 x float> %wide.masked.load21.2, %wide.masked.load.2 - %65 = bitcast float* %60 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %64, <8 x float>* %65, i32 4, <8 x i1> %56), !tbaa !12, !alias.scope !37, !noalias !40, !llvm.access.group !21 - %vec.ind.next.2 = add <8 x i64> %vec.ind, - %66 = add nuw nsw <8 x i64> %vec.ind.next.2, %broadcast.splat - %67 = trunc <8 x i64> %66 to <8 x i32> - %68 = icmp sgt <8 x i32> %broadcast.splat20, %67 - %69 = extractelement <8 x i64> %66, i32 0 - %70 = shl i64 %69, 32 - %71 = ashr exact i64 %70, 32 - %72 = getelementptr inbounds float, float* %10, i64 %71 - %73 = bitcast float* %72 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %73, i32 4, <8 x i1> %68, <8 x float> undef), !tbaa !12, !alias.scope !37, !noalias !40 - %74 = getelementptr inbounds float, float* %16, i64 %71 - %75 = bitcast float* %74 to <8 x float>* - %wide.masked.load21.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %75, i32 4, <8 x i1> %68, <8 x float> undef), !tbaa !12, !alias.scope !40 - %76 = fadd <8 x float> %wide.masked.load21.3, %wide.masked.load.3 - %77 = bitcast float* %72 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %76, <8 x float>* %77, i32 4, <8 x i1> %68), !tbaa !12, !alias.scope !37, !noalias !40, !llvm.access.group !21 - %index.next.3 = add nuw nsw i64 %index, 32 - %vec.ind.next.3 = add <8 x i64> %vec.ind, - %78 = icmp eq i64 %index.next.3, 256 - br i1 %78, label %_pocl_kernel_gemver_kernel2.exit.loopexit26, label %vector.body, !llvm.loop !42 - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us.1, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %96, %if.end.r_exit.i.i.us.1 ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %24, %conv.i.i.us - br i1 %cmp.i.i.us, label %for.cond.preheader.i.i.us, label %if.end.r_exit.i.i.us - -for.cond.preheader.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %sext27.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom8.i.i.us = ashr exact i64 %sext27.i.i.us, 32 - %arrayidx9.i.i.us = getelementptr inbounds float, float* %10, i64 %idxprom8.i.i.us - %.pre.i.i2.us7 = load float, float* %arrayidx9.i.i.us, align 4, !tbaa !12 - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %for.cond.preheader.i.i.us - %indvars.iv.next.i.i5.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %for.cond.preheader.i.i.us ] - %79 = phi float [ %84, %for.body.i.i.us ], [ %.pre.i.i2.us7, %for.cond.preheader.i.i.us ] - %80 = mul nuw nsw i64 %indvars.iv.next.i.i5.us, %25 - %81 = add nsw i64 %80, %idxprom8.i.i.us - %arrayidx.i.i.us = getelementptr inbounds float, float* %7, i64 %81 - %82 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %mul4.i.i.us = fmul float %20, %82 - %arrayidx6.i.i.us = getelementptr inbounds float, float* %13, i64 %indvars.iv.next.i.i5.us - %83 = load float, float* %arrayidx6.i.i.us, align 4, !tbaa !12 - %84 = tail call float @llvm.fmuladd.f32(float %mul4.i.i.us, float %83, float %79) #2 - store float %84, float* %arrayidx9.i.i.us, align 4, !tbaa !12, !llvm.access.group !21 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i5.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %25 - br i1 %exitcond.not.i.i.us, label %for.end.loopexit.i.i.us, label %for.body.i.i.us, !llvm.loop !26 - -for.end.loopexit.i.i.us: ; preds = %for.body.i.i.us - %.lcssa = phi float [ %84, %for.body.i.i.us ] - %arrayidx11.i.i.us = getelementptr inbounds float, float* %16, i64 %idxprom8.i.i.us - %85 = load float, float* %arrayidx11.i.i.us, align 4, !tbaa !12 - %add14.i.i.us = fadd float %.lcssa, %85 - store float %add14.i.i.us, float* %arrayidx9.i.i.us, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %for.end.loopexit.i.i.us, %pregion_for_entry.entry.i.i.us - %86 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1 = add nuw nsw i64 %86, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp.i.i.us.1 = icmp sgt i32 %24, %conv.i.i.us.1 - br i1 %cmp.i.i.us.1, label %for.cond.preheader.i.i.us.1, label %if.end.r_exit.i.i.us.1 - -pregion_for_entry.entry.i.i: ; preds = %if.end.r_exit.i.i.3, %pregion_for_entry.entry.i.i.preheader - %_local_id_x.i.0 = phi i64 [ %102, %if.end.r_exit.i.i.3 ], [ 0, %pregion_for_entry.entry.i.i.preheader ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %conv.i.i = trunc i64 %add1.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %24, %conv.i.i - br i1 %cmp.i.i, label %for.cond.preheader.i.i, label %if.end.r_exit.i.i - -for.cond.preheader.i.i: ; preds = %pregion_for_entry.entry.i.i - %sext27.i.i = shl i64 %add1.i.i.i, 32 - %idxprom8.i.i = ashr exact i64 %sext27.i.i, 32 - %arrayidx9.i.i = getelementptr inbounds float, float* %10, i64 %idxprom8.i.i - %.pre1.i.i16 = load float, float* %arrayidx9.i.i, align 4, !tbaa !12 - %arrayidx11.i.i = getelementptr inbounds float, float* %16, i64 %idxprom8.i.i - %87 = load float, float* %arrayidx11.i.i, align 4, !tbaa !12 - %add14.i.i = fadd float %87, %.pre1.i.i16 - store float %add14.i.i, float* %arrayidx9.i.i, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i - -if.end.r_exit.i.i: ; preds = %for.cond.preheader.i.i, %pregion_for_entry.entry.i.i - %88 = or i64 %_local_id_x.i.0, 1 - %add1.i.i.i.1 = add nuw nsw i64 %88, %mul.i.i.i - %conv.i.i.1 = trunc i64 %add1.i.i.i.1 to i32 - %cmp.i.i.1 = icmp sgt i32 %24, %conv.i.i.1 - br i1 %cmp.i.i.1, label %for.cond.preheader.i.i.1, label %if.end.r_exit.i.i.1 - -_pocl_kernel_gemver_kernel2.exit.loopexit: ; preds = %if.end.r_exit.i.i.us.1 - br label %_pocl_kernel_gemver_kernel2.exit - -_pocl_kernel_gemver_kernel2.exit.loopexit25: ; preds = %if.end.r_exit.i.i.3 - br label %_pocl_kernel_gemver_kernel2.exit - -_pocl_kernel_gemver_kernel2.exit.loopexit26: ; preds = %vector.body - br label %_pocl_kernel_gemver_kernel2.exit - -_pocl_kernel_gemver_kernel2.exit: ; preds = %_pocl_kernel_gemver_kernel2.exit.loopexit26, %_pocl_kernel_gemver_kernel2.exit.loopexit25, %_pocl_kernel_gemver_kernel2.exit.loopexit - ret void - -for.cond.preheader.i.i.us.1: ; preds = %if.end.r_exit.i.i.us - %sext27.i.i.us.1 = shl i64 %add1.i.i.i.us.1, 32 - %idxprom8.i.i.us.1 = ashr exact i64 %sext27.i.i.us.1, 32 - %arrayidx9.i.i.us.1 = getelementptr inbounds float, float* %10, i64 %idxprom8.i.i.us.1 - %.pre.i.i2.us7.1 = load float, float* %arrayidx9.i.i.us.1, align 4, !tbaa !12 - br label %for.body.i.i.us.1 - -for.body.i.i.us.1: ; preds = %for.body.i.i.us.1, %for.cond.preheader.i.i.us.1 - %indvars.iv.next.i.i5.us.1 = phi i64 [ %indvars.iv.next.i.i.us.1, %for.body.i.i.us.1 ], [ 0, %for.cond.preheader.i.i.us.1 ] - %89 = phi float [ %94, %for.body.i.i.us.1 ], [ %.pre.i.i2.us7.1, %for.cond.preheader.i.i.us.1 ] - %90 = mul nuw nsw i64 %indvars.iv.next.i.i5.us.1, %25 - %91 = add nsw i64 %90, %idxprom8.i.i.us.1 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %7, i64 %91 - %92 = load float, float* %arrayidx.i.i.us.1, align 4, !tbaa !12 - %mul4.i.i.us.1 = fmul float %20, %92 - %arrayidx6.i.i.us.1 = getelementptr inbounds float, float* %13, i64 %indvars.iv.next.i.i5.us.1 - %93 = load float, float* %arrayidx6.i.i.us.1, align 4, !tbaa !12 - %94 = tail call float @llvm.fmuladd.f32(float %mul4.i.i.us.1, float %93, float %89) #2 - store float %94, float* %arrayidx9.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !21 - %indvars.iv.next.i.i.us.1 = add nuw nsw i64 %indvars.iv.next.i.i5.us.1, 1 - %exitcond.not.i.i.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.1, %25 - br i1 %exitcond.not.i.i.us.1, label %for.end.loopexit.i.i.us.1, label %for.body.i.i.us.1, !llvm.loop !26 - -for.end.loopexit.i.i.us.1: ; preds = %for.body.i.i.us.1 - %.lcssa27 = phi float [ %94, %for.body.i.i.us.1 ] - %arrayidx11.i.i.us.1 = getelementptr inbounds float, float* %16, i64 %idxprom8.i.i.us.1 - %95 = load float, float* %arrayidx11.i.i.us.1, align 4, !tbaa !12 - %add14.i.i.us.1 = fadd float %.lcssa27, %95 - store float %add14.i.i.us.1, float* %arrayidx9.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us.1 - -if.end.r_exit.i.i.us.1: ; preds = %for.end.loopexit.i.i.us.1, %if.end.r_exit.i.i.us - %96 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %96, 256 - br i1 %exitcond.not.1, label %_pocl_kernel_gemver_kernel2.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !28 - -for.cond.preheader.i.i.1: ; preds = %if.end.r_exit.i.i - %sext27.i.i.1 = shl i64 %add1.i.i.i.1, 32 - %idxprom8.i.i.1 = ashr exact i64 %sext27.i.i.1, 32 - %arrayidx9.i.i.1 = getelementptr inbounds float, float* %10, i64 %idxprom8.i.i.1 - %.pre1.i.i16.1 = load float, float* %arrayidx9.i.i.1, align 4, !tbaa !12 - %arrayidx11.i.i.1 = getelementptr inbounds float, float* %16, i64 %idxprom8.i.i.1 - %97 = load float, float* %arrayidx11.i.i.1, align 4, !tbaa !12 - %add14.i.i.1 = fadd float %97, %.pre1.i.i16.1 - store float %add14.i.i.1, float* %arrayidx9.i.i.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.1 - -if.end.r_exit.i.i.1: ; preds = %for.cond.preheader.i.i.1, %if.end.r_exit.i.i - %98 = or i64 %_local_id_x.i.0, 2 - %add1.i.i.i.2 = add nuw nsw i64 %98, %mul.i.i.i - %conv.i.i.2 = trunc i64 %add1.i.i.i.2 to i32 - %cmp.i.i.2 = icmp sgt i32 %24, %conv.i.i.2 - br i1 %cmp.i.i.2, label %for.cond.preheader.i.i.2, label %if.end.r_exit.i.i.2 - -for.cond.preheader.i.i.2: ; preds = %if.end.r_exit.i.i.1 - %sext27.i.i.2 = shl i64 %add1.i.i.i.2, 32 - %idxprom8.i.i.2 = ashr exact i64 %sext27.i.i.2, 32 - %arrayidx9.i.i.2 = getelementptr inbounds float, float* %10, i64 %idxprom8.i.i.2 - %.pre1.i.i16.2 = load float, float* %arrayidx9.i.i.2, align 4, !tbaa !12 - %arrayidx11.i.i.2 = getelementptr inbounds float, float* %16, i64 %idxprom8.i.i.2 - %99 = load float, float* %arrayidx11.i.i.2, align 4, !tbaa !12 - %add14.i.i.2 = fadd float %99, %.pre1.i.i16.2 - store float %add14.i.i.2, float* %arrayidx9.i.i.2, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.2 - -if.end.r_exit.i.i.2: ; preds = %for.cond.preheader.i.i.2, %if.end.r_exit.i.i.1 - %100 = or i64 %_local_id_x.i.0, 3 - %add1.i.i.i.3 = add nuw nsw i64 %100, %mul.i.i.i - %conv.i.i.3 = trunc i64 %add1.i.i.i.3 to i32 - %cmp.i.i.3 = icmp sgt i32 %24, %conv.i.i.3 - br i1 %cmp.i.i.3, label %for.cond.preheader.i.i.3, label %if.end.r_exit.i.i.3 - -for.cond.preheader.i.i.3: ; preds = %if.end.r_exit.i.i.2 - %sext27.i.i.3 = shl i64 %add1.i.i.i.3, 32 - %idxprom8.i.i.3 = ashr exact i64 %sext27.i.i.3, 32 - %arrayidx9.i.i.3 = getelementptr inbounds float, float* %10, i64 %idxprom8.i.i.3 - %.pre1.i.i16.3 = load float, float* %arrayidx9.i.i.3, align 4, !tbaa !12 - %arrayidx11.i.i.3 = getelementptr inbounds float, float* %16, i64 %idxprom8.i.i.3 - %101 = load float, float* %arrayidx11.i.i.3, align 4, !tbaa !12 - %add14.i.i.3 = fadd float %101, %.pre1.i.i16.3 - store float %add14.i.i.3, float* %arrayidx9.i.i.3, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.3 - -if.end.r_exit.i.i.3: ; preds = %for.cond.preheader.i.i.3, %if.end.r_exit.i.i.2 - %102 = add nuw nsw i64 %_local_id_x.i.0, 4 - %exitcond9.not.3 = icmp eq i64 %102, 256 - br i1 %exitcond9.not.3, label %_pocl_kernel_gemver_kernel2.exit.loopexit25, label %pregion_for_entry.entry.i.i, !llvm.loop !43 -} - -; Function Attrs: argmemonly nounwind readonly willreturn -declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32 immarg, <8 x i1>, <8 x float>) #3 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>) #4 - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } -attributes #3 = { argmemonly nounwind readonly willreturn } -attributes #4 = { argmemonly nounwind willreturn } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 1, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"float*", !"float", !"int"} -!9 = !{!"", !"", !"", !"", !"", !""} -!10 = !{!"A", !"X", !"Y", !"Z", !"beta", !"n"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{!17, !18} -!18 = distinct !{!18, !"LVerDomain"} -!19 = !{!20} -!20 = distinct !{!20, !18} -!21 = !{!22} -!22 = distinct !{} -!23 = distinct !{!23, !24, !25} -!24 = !{!"llvm.loop.parallel_accesses", !22} -!25 = !{!"llvm.loop.isvectorized", i32 1} -!26 = distinct !{!26, !27} -!27 = !{!"llvm.loop.unroll.disable"} -!28 = distinct !{!28, !24} -!29 = distinct !{!29, !24, !25} -!30 = !{!31} -!31 = distinct !{!31, !32} -!32 = distinct !{!32, !"LVerDomain"} -!33 = !{!34} -!34 = distinct !{!34, !32} -!35 = distinct !{!35, !24, !25} -!36 = distinct !{!36, !24, !25} -!37 = !{!38} -!38 = distinct !{!38, !39} -!39 = distinct !{!39, !"LVerDomain"} -!40 = !{!41} -!41 = distinct !{!41, !39} -!42 = distinct !{!42, !24, !25} -!43 = distinct !{!43, !24, !25} diff --git a/pocl_irs/gemver_kernel3.ll b/pocl_irs/gemver_kernel3.ll deleted file mode 100644 index cbe520c..0000000 --- a/pocl_irs/gemver_kernel3.ll +++ /dev/null @@ -1,344 +0,0 @@ -; ModuleID = './OM/KBEGAJBBAALIMPLKDCOKKOGOMEPCEPLPNLGKF/gemver_kernel3/256-1-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_gemver_kernel3(float* nocapture readonly %0, float* nocapture readonly %1, float* nocapture %2, float %3, i32 %4, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %5, i64 %6, i64 %7, i64 %8) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %6, 8 - %cmp219.i = icmp sgt i32 %4, 0 - %wide.trip.count.i = zext i32 %4 to i64 - br i1 %cmp219.i, label %pregion_for_entry.entry.i.us.preheader, label %gemver_kernel3.exit - -pregion_for_entry.entry.i.us.preheader: ; preds = %9 - br label %pregion_for_entry.entry.i.us - -pregion_for_entry.entry.i.us: ; preds = %if.end.r_exit.i.us.1, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ %23, %if.end.r_exit.i.us.1 ], [ 0, %pregion_for_entry.entry.i.us.preheader ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp.i.us = icmp slt i32 %conv.i.us, %4 - br i1 %cmp.i.us, label %for.body.lr.ph.i.us, label %if.end.r_exit.i.us - -for.body.lr.ph.i.us: ; preds = %pregion_for_entry.entry.i.us - %mul.i.us = mul nsw i32 %conv.i.us, %4 - %sext.i.us = shl i64 %add1.i.i.us, 32 - %idxprom8.i.us = ashr exact i64 %sext.i.us, 32 - %arrayidx9.i.us = getelementptr inbounds float, float* %2, i64 %idxprom8.i.us - %10 = sext i32 %mul.i.us to i64 - %.pre.i1.us4 = load float, float* %arrayidx9.i.us, align 4, !tbaa !12 - br label %for.body.i.us - -for.body.i.us: ; preds = %for.body.i.us, %for.body.lr.ph.i.us - %indvars.iv.next.i3.us = phi i64 [ %indvars.iv.next.i.us, %for.body.i.us ], [ 0, %for.body.lr.ph.i.us ] - %11 = phi float [ %15, %for.body.i.us ], [ %.pre.i1.us4, %for.body.lr.ph.i.us ] - %12 = add nsw i64 %indvars.iv.next.i3.us, %10 - %arrayidx.i.us = getelementptr inbounds float, float* %0, i64 %12 - %13 = load float, float* %arrayidx.i.us, align 4, !tbaa !12 - %mul4.i.us = fmul float %13, %3 - %arrayidx6.i.us = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.i3.us - %14 = load float, float* %arrayidx6.i.us, align 4, !tbaa !12 - %15 = tail call float @llvm.fmuladd.f32(float %mul4.i.us, float %14, float %11) #2 - store float %15, float* %arrayidx9.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us = add nuw nsw i64 %indvars.iv.next.i3.us, 1 - %exitcond.not.i.us = icmp eq i64 %indvars.iv.next.i.us, %wide.trip.count.i - br i1 %exitcond.not.i.us, label %if.end.r_exit.i.us.loopexit, label %for.body.i.us, !llvm.loop !18 - -if.end.r_exit.i.us.loopexit: ; preds = %for.body.i.us - br label %if.end.r_exit.i.us - -if.end.r_exit.i.us: ; preds = %if.end.r_exit.i.us.loopexit, %pregion_for_entry.entry.i.us - %16 = or i64 %_local_id_x.0.us, 1 - %add1.i.i.us.1 = add nuw nsw i64 %16, %mul.i.i - %conv.i.us.1 = trunc i64 %add1.i.i.us.1 to i32 - %cmp.i.us.1 = icmp slt i32 %conv.i.us.1, %4 - br i1 %cmp.i.us.1, label %for.body.lr.ph.i.us.1, label %if.end.r_exit.i.us.1 - -gemver_kernel3.exit.loopexit: ; preds = %if.end.r_exit.i.us.1 - br label %gemver_kernel3.exit - -gemver_kernel3.exit: ; preds = %gemver_kernel3.exit.loopexit, %9 - ret void - -for.body.lr.ph.i.us.1: ; preds = %if.end.r_exit.i.us - %mul.i.us.1 = mul nsw i32 %conv.i.us.1, %4 - %sext.i.us.1 = shl i64 %add1.i.i.us.1, 32 - %idxprom8.i.us.1 = ashr exact i64 %sext.i.us.1, 32 - %arrayidx9.i.us.1 = getelementptr inbounds float, float* %2, i64 %idxprom8.i.us.1 - %17 = sext i32 %mul.i.us.1 to i64 - %.pre.i1.us4.1 = load float, float* %arrayidx9.i.us.1, align 4, !tbaa !12 - br label %for.body.i.us.1 - -for.body.i.us.1: ; preds = %for.body.i.us.1, %for.body.lr.ph.i.us.1 - %indvars.iv.next.i3.us.1 = phi i64 [ %indvars.iv.next.i.us.1, %for.body.i.us.1 ], [ 0, %for.body.lr.ph.i.us.1 ] - %18 = phi float [ %22, %for.body.i.us.1 ], [ %.pre.i1.us4.1, %for.body.lr.ph.i.us.1 ] - %19 = add nsw i64 %indvars.iv.next.i3.us.1, %17 - %arrayidx.i.us.1 = getelementptr inbounds float, float* %0, i64 %19 - %20 = load float, float* %arrayidx.i.us.1, align 4, !tbaa !12 - %mul4.i.us.1 = fmul float %20, %3 - %arrayidx6.i.us.1 = getelementptr inbounds float, float* %1, i64 %indvars.iv.next.i3.us.1 - %21 = load float, float* %arrayidx6.i.us.1, align 4, !tbaa !12 - %22 = tail call float @llvm.fmuladd.f32(float %mul4.i.us.1, float %21, float %18) #2 - store float %22, float* %arrayidx9.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.1 = add nuw nsw i64 %indvars.iv.next.i3.us.1, 1 - %exitcond.not.i.us.1 = icmp eq i64 %indvars.iv.next.i.us.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.1, label %if.end.r_exit.i.us.1.loopexit, label %for.body.i.us.1, !llvm.loop !18 - -if.end.r_exit.i.us.1.loopexit: ; preds = %for.body.i.us.1 - br label %if.end.r_exit.i.us.1 - -if.end.r_exit.i.us.1: ; preds = %if.end.r_exit.i.us.1.loopexit, %if.end.r_exit.i.us - %23 = add nuw nsw i64 %_local_id_x.0.us, 2 - %exitcond.not.1 = icmp eq i64 %23, 256 - br i1 %exitcond.not.1, label %gemver_kernel3.exit.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !20 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_gemver_kernel3_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to float*** - %15 = load float**, float*** %14, align 8 - %16 = load float*, float** %15, align 8 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to float** - %19 = load float*, float** %18, align 8 - %20 = load float, float* %19, align 4 - %21 = getelementptr i8*, i8** %0, i64 4 - %22 = bitcast i8** %21 to i32** - %23 = load i32*, i32** %22, align 8 - %24 = load i32, i32* %23, align 4 - %mul.i.i.i = shl i64 %2, 8 - %cmp219.i.i = icmp sgt i32 %24, 0 - %wide.trip.count.i.i = zext i32 %24 to i64 - br i1 %cmp219.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %_pocl_kernel_gemver_kernel3.exit - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us.1, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %38, %if.end.r_exit.i.i.us.1 ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %24, %conv.i.i.us - br i1 %cmp.i.i.us, label %for.body.lr.ph.i.i.us, label %if.end.r_exit.i.i.us - -for.body.lr.ph.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %mul.i.i.us = mul nsw i32 %24, %conv.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom8.i.i.us = ashr exact i64 %sext.i.i.us, 32 - %arrayidx9.i.i.us = getelementptr inbounds float, float* %16, i64 %idxprom8.i.i.us - %25 = sext i32 %mul.i.i.us to i64 - %.pre.i.i1.us4 = load float, float* %arrayidx9.i.i.us, align 4, !tbaa !12 - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %for.body.lr.ph.i.i.us - %indvars.iv.next.i.i3.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %for.body.lr.ph.i.i.us ] - %26 = phi float [ %30, %for.body.i.i.us ], [ %.pre.i.i1.us4, %for.body.lr.ph.i.i.us ] - %27 = add nsw i64 %indvars.iv.next.i.i3.us, %25 - %arrayidx.i.i.us = getelementptr inbounds float, float* %8, i64 %27 - %28 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %mul4.i.i.us = fmul float %20, %28 - %arrayidx6.i.i.us = getelementptr inbounds float, float* %12, i64 %indvars.iv.next.i.i3.us - %29 = load float, float* %arrayidx6.i.i.us, align 4, !tbaa !12 - %30 = tail call float @llvm.fmuladd.f32(float %mul4.i.i.us, float %29, float %26) #2 - store float %30, float* %arrayidx9.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i3.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us, label %if.end.r_exit.i.i.us.loopexit, label %for.body.i.i.us, !llvm.loop !18 - -if.end.r_exit.i.i.us.loopexit: ; preds = %for.body.i.i.us - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %if.end.r_exit.i.i.us.loopexit, %pregion_for_entry.entry.i.i.us - %31 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1 = add nuw nsw i64 %31, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp.i.i.us.1 = icmp sgt i32 %24, %conv.i.i.us.1 - br i1 %cmp.i.i.us.1, label %for.body.lr.ph.i.i.us.1, label %if.end.r_exit.i.i.us.1 - -_pocl_kernel_gemver_kernel3.exit.loopexit: ; preds = %if.end.r_exit.i.i.us.1 - br label %_pocl_kernel_gemver_kernel3.exit - -_pocl_kernel_gemver_kernel3.exit: ; preds = %_pocl_kernel_gemver_kernel3.exit.loopexit, %5 - ret void - -for.body.lr.ph.i.i.us.1: ; preds = %if.end.r_exit.i.i.us - %mul.i.i.us.1 = mul nsw i32 %24, %conv.i.i.us.1 - %sext.i.i.us.1 = shl i64 %add1.i.i.i.us.1, 32 - %idxprom8.i.i.us.1 = ashr exact i64 %sext.i.i.us.1, 32 - %arrayidx9.i.i.us.1 = getelementptr inbounds float, float* %16, i64 %idxprom8.i.i.us.1 - %32 = sext i32 %mul.i.i.us.1 to i64 - %.pre.i.i1.us4.1 = load float, float* %arrayidx9.i.i.us.1, align 4, !tbaa !12 - br label %for.body.i.i.us.1 - -for.body.i.i.us.1: ; preds = %for.body.i.i.us.1, %for.body.lr.ph.i.i.us.1 - %indvars.iv.next.i.i3.us.1 = phi i64 [ %indvars.iv.next.i.i.us.1, %for.body.i.i.us.1 ], [ 0, %for.body.lr.ph.i.i.us.1 ] - %33 = phi float [ %37, %for.body.i.i.us.1 ], [ %.pre.i.i1.us4.1, %for.body.lr.ph.i.i.us.1 ] - %34 = add nsw i64 %indvars.iv.next.i.i3.us.1, %32 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %8, i64 %34 - %35 = load float, float* %arrayidx.i.i.us.1, align 4, !tbaa !12 - %mul4.i.i.us.1 = fmul float %20, %35 - %arrayidx6.i.i.us.1 = getelementptr inbounds float, float* %12, i64 %indvars.iv.next.i.i3.us.1 - %36 = load float, float* %arrayidx6.i.i.us.1, align 4, !tbaa !12 - %37 = tail call float @llvm.fmuladd.f32(float %mul4.i.i.us.1, float %36, float %33) #2 - store float %37, float* %arrayidx9.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.1, 1 - %exitcond.not.i.i.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.1, label %if.end.r_exit.i.i.us.1.loopexit, label %for.body.i.i.us.1, !llvm.loop !18 - -if.end.r_exit.i.i.us.1.loopexit: ; preds = %for.body.i.i.us.1 - br label %if.end.r_exit.i.i.us.1 - -if.end.r_exit.i.i.us.1: ; preds = %if.end.r_exit.i.i.us.1.loopexit, %if.end.r_exit.i.i.us - %38 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %38, 256 - br i1 %exitcond.not.1, label %_pocl_kernel_gemver_kernel3.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !20 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_gemver_kernel3_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to float** - %13 = load float*, float** %12, align 8 - %14 = getelementptr i8*, i8** %0, i64 3 - %15 = bitcast i8** %14 to float** - %16 = load float*, float** %15, align 8 - %17 = load float, float* %16, align 4 - %18 = getelementptr i8*, i8** %0, i64 4 - %19 = bitcast i8** %18 to i32** - %20 = load i32*, i32** %19, align 8 - %21 = load i32, i32* %20, align 4 - %mul.i.i.i = shl i64 %2, 8 - %cmp219.i.i = icmp sgt i32 %21, 0 - %wide.trip.count.i.i = zext i32 %21 to i64 - br i1 %cmp219.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %_pocl_kernel_gemver_kernel3.exit - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us.1, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %35, %if.end.r_exit.i.i.us.1 ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %21, %conv.i.i.us - br i1 %cmp.i.i.us, label %for.body.lr.ph.i.i.us, label %if.end.r_exit.i.i.us - -for.body.lr.ph.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %mul.i.i.us = mul nsw i32 %21, %conv.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom8.i.i.us = ashr exact i64 %sext.i.i.us, 32 - %arrayidx9.i.i.us = getelementptr inbounds float, float* %13, i64 %idxprom8.i.i.us - %22 = sext i32 %mul.i.i.us to i64 - %.pre.i.i1.us4 = load float, float* %arrayidx9.i.i.us, align 4, !tbaa !12 - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %for.body.lr.ph.i.i.us - %indvars.iv.next.i.i3.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %for.body.lr.ph.i.i.us ] - %23 = phi float [ %27, %for.body.i.i.us ], [ %.pre.i.i1.us4, %for.body.lr.ph.i.i.us ] - %24 = add nsw i64 %indvars.iv.next.i.i3.us, %22 - %arrayidx.i.i.us = getelementptr inbounds float, float* %7, i64 %24 - %25 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %mul4.i.i.us = fmul float %17, %25 - %arrayidx6.i.i.us = getelementptr inbounds float, float* %10, i64 %indvars.iv.next.i.i3.us - %26 = load float, float* %arrayidx6.i.i.us, align 4, !tbaa !12 - %27 = tail call float @llvm.fmuladd.f32(float %mul4.i.i.us, float %26, float %23) #2 - store float %27, float* %arrayidx9.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i3.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us, label %if.end.r_exit.i.i.us.loopexit, label %for.body.i.i.us, !llvm.loop !18 - -if.end.r_exit.i.i.us.loopexit: ; preds = %for.body.i.i.us - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %if.end.r_exit.i.i.us.loopexit, %pregion_for_entry.entry.i.i.us - %28 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1 = add nuw nsw i64 %28, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp.i.i.us.1 = icmp sgt i32 %21, %conv.i.i.us.1 - br i1 %cmp.i.i.us.1, label %for.body.lr.ph.i.i.us.1, label %if.end.r_exit.i.i.us.1 - -_pocl_kernel_gemver_kernel3.exit.loopexit: ; preds = %if.end.r_exit.i.i.us.1 - br label %_pocl_kernel_gemver_kernel3.exit - -_pocl_kernel_gemver_kernel3.exit: ; preds = %_pocl_kernel_gemver_kernel3.exit.loopexit, %5 - ret void - -for.body.lr.ph.i.i.us.1: ; preds = %if.end.r_exit.i.i.us - %mul.i.i.us.1 = mul nsw i32 %21, %conv.i.i.us.1 - %sext.i.i.us.1 = shl i64 %add1.i.i.i.us.1, 32 - %idxprom8.i.i.us.1 = ashr exact i64 %sext.i.i.us.1, 32 - %arrayidx9.i.i.us.1 = getelementptr inbounds float, float* %13, i64 %idxprom8.i.i.us.1 - %29 = sext i32 %mul.i.i.us.1 to i64 - %.pre.i.i1.us4.1 = load float, float* %arrayidx9.i.i.us.1, align 4, !tbaa !12 - br label %for.body.i.i.us.1 - -for.body.i.i.us.1: ; preds = %for.body.i.i.us.1, %for.body.lr.ph.i.i.us.1 - %indvars.iv.next.i.i3.us.1 = phi i64 [ %indvars.iv.next.i.i.us.1, %for.body.i.i.us.1 ], [ 0, %for.body.lr.ph.i.i.us.1 ] - %30 = phi float [ %34, %for.body.i.i.us.1 ], [ %.pre.i.i1.us4.1, %for.body.lr.ph.i.i.us.1 ] - %31 = add nsw i64 %indvars.iv.next.i.i3.us.1, %29 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %7, i64 %31 - %32 = load float, float* %arrayidx.i.i.us.1, align 4, !tbaa !12 - %mul4.i.i.us.1 = fmul float %17, %32 - %arrayidx6.i.i.us.1 = getelementptr inbounds float, float* %10, i64 %indvars.iv.next.i.i3.us.1 - %33 = load float, float* %arrayidx6.i.i.us.1, align 4, !tbaa !12 - %34 = tail call float @llvm.fmuladd.f32(float %mul4.i.i.us.1, float %33, float %30) #2 - store float %34, float* %arrayidx9.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.1, 1 - %exitcond.not.i.i.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.1, label %if.end.r_exit.i.i.us.1.loopexit, label %for.body.i.i.us.1, !llvm.loop !18 - -if.end.r_exit.i.i.us.1.loopexit: ; preds = %for.body.i.i.us.1 - br label %if.end.r_exit.i.i.us.1 - -if.end.r_exit.i.i.us.1: ; preds = %if.end.r_exit.i.i.us.1.loopexit, %if.end.r_exit.i.i.us - %35 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %35, 256 - br i1 %exitcond.not.1, label %_pocl_kernel_gemver_kernel3.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !20 -} - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"float", !"int"} -!9 = !{!"", !"", !"", !"", !""} -!10 = !{!"A", !"X", !"w", !"alpha", !"n"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{} -!18 = distinct !{!18, !19} -!19 = !{!"llvm.loop.unroll.disable"} -!20 = distinct !{!20, !21} -!21 = !{!"llvm.loop.parallel_accesses", !17} diff --git a/pocl_irs/gesummv.ll b/pocl_irs/gesummv.ll deleted file mode 100644 index 92097ad..0000000 --- a/pocl_irs/gesummv.ll +++ /dev/null @@ -1,681 +0,0 @@ -; ModuleID = './AJ/DGEGMLMFGPCKGEIFAGKKHFNDNOEDKKFLBPBAA/gesummv_kernel/256-1-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_gesummv_kernel(float* nocapture readonly %0, float* nocapture readonly %1, float* nocapture readonly %2, float* nocapture %3, float* nocapture %4, float %5, float %6, i32 %7, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %8, i64 %9, i64 %10, i64 %11) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %9, 8 - %cmp248.i = icmp sgt i32 %7, 0 - %wide.trip.count.i = zext i32 %7 to i64 - br i1 %cmp248.i, label %pregion_for_entry.entry.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.us.preheader: ; preds = %12 - br label %pregion_for_entry.entry.i.us - -vector.memcheck: ; preds = %12 - %13 = trunc i64 %9 to i32 - %14 = shl i32 %13, 8 - %15 = sext i32 %14 to i64 - %scevgep = getelementptr float, float* %3, i64 %15 - %16 = add nsw i64 %15, 256 - %scevgep9 = getelementptr float, float* %3, i64 %16 - %scevgep11 = getelementptr float, float* %4, i64 %15 - %scevgep13 = getelementptr float, float* %4, i64 %16 - %bound0 = icmp ult float* %scevgep, %scevgep13 - %bound1 = icmp ult float* %scevgep11, %scevgep9 - %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %pregion_for_entry.entry.i.preheader, label %vector.ph - -pregion_for_entry.entry.i.preheader: ; preds = %vector.memcheck - br label %pregion_for_entry.entry.i - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert15 = insertelement <8 x i32> undef, i32 %7, i32 0 - %broadcast.splat16 = shufflevector <8 x i32> %broadcast.splatinsert15, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert18 = insertelement <8 x float> undef, float %6, i32 0 - %broadcast.splat19 = shufflevector <8 x float> %broadcast.splatinsert18, <8 x float> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert20 = insertelement <8 x float> undef, float %5, i32 0 - %broadcast.splat21 = shufflevector <8 x float> %broadcast.splatinsert20, <8 x float> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next.1, %vector.body ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next.1, %vector.body ] - %17 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %18 = trunc <8 x i64> %17 to <8 x i32> - %19 = icmp sgt <8 x i32> %broadcast.splat16, %18 - %20 = extractelement <8 x i64> %17, i32 0 - %21 = shl i64 %20, 32 - %22 = ashr exact i64 %21, 32 - %23 = getelementptr inbounds float, float* %3, i64 %22 - %24 = bitcast float* %23 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %24, i32 4, <8 x i1> %19, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %25 = getelementptr inbounds float, float* %4, i64 %22 - %26 = bitcast float* %25 to <8 x float>* - %wide.masked.load17 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %26, i32 4, <8 x i1> %19, <8 x float> undef), !tbaa !12, !alias.scope !19 - %27 = fmul <8 x float> %wide.masked.load, %broadcast.splat19 - %28 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %broadcast.splat21, <8 x float> %wide.masked.load17, <8 x float> %27) - %29 = bitcast float* %23 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %28, <8 x float>* %29, i32 4, <8 x i1> %19), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !21 - %vec.ind.next = add <8 x i64> %vec.ind, - %30 = add nuw nsw <8 x i64> %vec.ind.next, %broadcast.splat - %31 = trunc <8 x i64> %30 to <8 x i32> - %32 = icmp sgt <8 x i32> %broadcast.splat16, %31 - %33 = extractelement <8 x i64> %30, i32 0 - %34 = shl i64 %33, 32 - %35 = ashr exact i64 %34, 32 - %36 = getelementptr inbounds float, float* %3, i64 %35 - %37 = bitcast float* %36 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %37, i32 4, <8 x i1> %32, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %38 = getelementptr inbounds float, float* %4, i64 %35 - %39 = bitcast float* %38 to <8 x float>* - %wide.masked.load17.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %39, i32 4, <8 x i1> %32, <8 x float> undef), !tbaa !12, !alias.scope !19 - %40 = fmul <8 x float> %wide.masked.load.1, %broadcast.splat19 - %41 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %broadcast.splat21, <8 x float> %wide.masked.load17.1, <8 x float> %40) - %42 = bitcast float* %36 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %41, <8 x float>* %42, i32 4, <8 x i1> %32), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !21 - %index.next.1 = add nuw nsw i64 %index, 16 - %vec.ind.next.1 = add <8 x i64> %vec.ind, - %43 = icmp eq i64 %index.next.1, 256 - br i1 %43, label %gesummv_kernel.exit.loopexit25, label %vector.body, !llvm.loop !23 - -pregion_for_entry.entry.i.us: ; preds = %if.end.r_exit.i.us, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ %56, %if.end.r_exit.i.us ], [ 0, %pregion_for_entry.entry.i.us.preheader ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp.i.us = icmp slt i32 %conv.i.us, %7 - br i1 %cmp.i.us, label %for.body.lr.ph.i.us, label %if.end.r_exit.i.us - -for.body.lr.ph.i.us: ; preds = %pregion_for_entry.entry.i.us - %mul.i.us = mul nsw i32 %conv.i.us, %7 - %sext47.i.us = shl i64 %add1.i.i.us, 32 - %idxprom7.i.us = ashr exact i64 %sext47.i.us, 32 - %arrayidx8.i.us = getelementptr inbounds float, float* %4, i64 %idxprom7.i.us - %arrayidx17.i.us = getelementptr inbounds float, float* %3, i64 %idxprom7.i.us - %44 = sext i32 %mul.i.us to i64 - br label %for.body.i.us - -for.body.i.us: ; preds = %for.body.i.us, %for.body.lr.ph.i.us - %indvars.iv.next.i2.us = phi i64 [ %indvars.iv.next.i.us, %for.body.i.us ], [ 0, %for.body.lr.ph.i.us ] - %45 = add nsw i64 %indvars.iv.next.i2.us, %44 - %arrayidx.i.us = getelementptr inbounds float, float* %0, i64 %45 - %46 = load float, float* %arrayidx.i.us, align 4, !tbaa !12 - %arrayidx5.i.us = getelementptr inbounds float, float* %2, i64 %indvars.iv.next.i2.us - %47 = load float, float* %arrayidx5.i.us, align 4, !tbaa !12 - %48 = load float, float* %arrayidx8.i.us, align 4, !tbaa !12 - %49 = tail call float @llvm.fmuladd.f32(float %46, float %47, float %48) #2 - store float %49, float* %arrayidx8.i.us, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx12.i.us = getelementptr inbounds float, float* %1, i64 %45 - %50 = load float, float* %arrayidx12.i.us, align 4, !tbaa !12 - %51 = load float, float* %arrayidx5.i.us, align 4, !tbaa !12 - %52 = load float, float* %arrayidx17.i.us, align 4, !tbaa !12 - %53 = tail call float @llvm.fmuladd.f32(float %50, float %51, float %52) #2 - store float %53, float* %arrayidx17.i.us, align 4, !tbaa !12, !llvm.access.group !21 - %indvars.iv.next.i.us = add nuw nsw i64 %indvars.iv.next.i2.us, 1 - %exitcond.not.i.us = icmp eq i64 %indvars.iv.next.i.us, %wide.trip.count.i - br i1 %exitcond.not.i.us, label %for.end.loopexit.i.us, label %for.body.i.us, !llvm.loop !26 - -for.end.loopexit.i.us: ; preds = %for.body.i.us - %.lcssa = phi float [ %53, %for.body.i.us ] - %54 = load float, float* %arrayidx8.i.us, align 4, !tbaa !12 - %mul23.i.us = fmul float %.lcssa, %6 - %55 = tail call float @llvm.fmuladd.f32(float %5, float %54, float %mul23.i.us) #2 - store float %55, float* %arrayidx17.i.us, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.us - -if.end.r_exit.i.us: ; preds = %for.end.loopexit.i.us, %pregion_for_entry.entry.i.us - %56 = add nuw nsw i64 %_local_id_x.0.us, 1 - %exitcond.not = icmp eq i64 %56, 256 - br i1 %exitcond.not, label %gesummv_kernel.exit.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !28 - -pregion_for_entry.entry.i: ; preds = %if.end.r_exit.i.1, %pregion_for_entry.entry.i.preheader - %_local_id_x.0 = phi i64 [ %62, %if.end.r_exit.i.1 ], [ 0, %pregion_for_entry.entry.i.preheader ] - %add1.i.i = add nuw nsw i64 %_local_id_x.0, %mul.i.i - %conv.i = trunc i64 %add1.i.i to i32 - %cmp.i = icmp slt i32 %conv.i, %7 - br i1 %cmp.i, label %for.cond.preheader.for.end_crit_edge.i, label %if.end.r_exit.i - -for.cond.preheader.for.end_crit_edge.i: ; preds = %pregion_for_entry.entry.i - %.pre.i = shl i64 %add1.i.i, 32 - %.pre51.i = ashr exact i64 %.pre.i, 32 - %arrayidx22.phi.trans.insert.i = getelementptr inbounds float, float* %3, i64 %.pre51.i - %.pre1.i13 = load float, float* %arrayidx22.phi.trans.insert.i, align 4, !tbaa !12 - %arrayidx19.i = getelementptr inbounds float, float* %4, i64 %.pre51.i - %57 = load float, float* %arrayidx19.i, align 4, !tbaa !12 - %mul23.i = fmul float %.pre1.i13, %6 - %58 = tail call float @llvm.fmuladd.f32(float %5, float %57, float %mul23.i) #2 - store float %58, float* %arrayidx22.phi.trans.insert.i, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i - -if.end.r_exit.i: ; preds = %for.cond.preheader.for.end_crit_edge.i, %pregion_for_entry.entry.i - %59 = or i64 %_local_id_x.0, 1 - %add1.i.i.1 = add nuw nsw i64 %59, %mul.i.i - %conv.i.1 = trunc i64 %add1.i.i.1 to i32 - %cmp.i.1 = icmp slt i32 %conv.i.1, %7 - br i1 %cmp.i.1, label %for.cond.preheader.for.end_crit_edge.i.1, label %if.end.r_exit.i.1 - -gesummv_kernel.exit.loopexit: ; preds = %if.end.r_exit.i.us - br label %gesummv_kernel.exit - -gesummv_kernel.exit.loopexit24: ; preds = %if.end.r_exit.i.1 - br label %gesummv_kernel.exit - -gesummv_kernel.exit.loopexit25: ; preds = %vector.body - br label %gesummv_kernel.exit - -gesummv_kernel.exit: ; preds = %gesummv_kernel.exit.loopexit25, %gesummv_kernel.exit.loopexit24, %gesummv_kernel.exit.loopexit - ret void - -for.cond.preheader.for.end_crit_edge.i.1: ; preds = %if.end.r_exit.i - %.pre.i.1 = shl i64 %add1.i.i.1, 32 - %.pre51.i.1 = ashr exact i64 %.pre.i.1, 32 - %arrayidx22.phi.trans.insert.i.1 = getelementptr inbounds float, float* %3, i64 %.pre51.i.1 - %.pre1.i13.1 = load float, float* %arrayidx22.phi.trans.insert.i.1, align 4, !tbaa !12 - %arrayidx19.i.1 = getelementptr inbounds float, float* %4, i64 %.pre51.i.1 - %60 = load float, float* %arrayidx19.i.1, align 4, !tbaa !12 - %mul23.i.1 = fmul float %.pre1.i13.1, %6 - %61 = tail call float @llvm.fmuladd.f32(float %5, float %60, float %mul23.i.1) #2 - store float %61, float* %arrayidx22.phi.trans.insert.i.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.1 - -if.end.r_exit.i.1: ; preds = %for.cond.preheader.for.end_crit_edge.i.1, %if.end.r_exit.i - %62 = add nuw nsw i64 %_local_id_x.0, 2 - %exitcond5.not.1 = icmp eq i64 %62, 256 - br i1 %exitcond5.not.1, label %gesummv_kernel.exit.loopexit24, label %pregion_for_entry.entry.i, !llvm.loop !29 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_gesummv_kernel_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to float*** - %15 = load float**, float*** %14, align 8 - %16 = load float*, float** %15, align 8 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to float*** - %19 = load float**, float*** %18, align 8 - %20 = load float*, float** %19, align 8 - %21 = getelementptr i8*, i8** %0, i64 4 - %22 = bitcast i8** %21 to float*** - %23 = load float**, float*** %22, align 8 - %24 = load float*, float** %23, align 8 - %25 = getelementptr i8*, i8** %0, i64 5 - %26 = bitcast i8** %25 to float** - %27 = load float*, float** %26, align 8 - %28 = load float, float* %27, align 4 - %29 = getelementptr i8*, i8** %0, i64 6 - %30 = bitcast i8** %29 to float** - %31 = load float*, float** %30, align 8 - %32 = load float, float* %31, align 4 - %33 = getelementptr i8*, i8** %0, i64 7 - %34 = bitcast i8** %33 to i32** - %35 = load i32*, i32** %34, align 8 - %36 = load i32, i32* %35, align 4 - %mul.i.i.i = shl i64 %2, 8 - %cmp248.i.i = icmp sgt i32 %36, 0 - %wide.trip.count.i.i = zext i32 %36 to i64 - br i1 %cmp248.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -vector.memcheck: ; preds = %5 - %37 = trunc i64 %2 to i32 - %38 = shl i32 %37, 8 - %39 = sext i32 %38 to i64 - %scevgep = getelementptr float, float* %20, i64 %39 - %40 = add nsw i64 %39, 256 - %scevgep9 = getelementptr float, float* %20, i64 %40 - %scevgep11 = getelementptr float, float* %24, i64 %39 - %scevgep13 = getelementptr float, float* %24, i64 %40 - %bound0 = icmp ult float* %scevgep, %scevgep13 - %bound1 = icmp ult float* %scevgep11, %scevgep9 - %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %pregion_for_entry.entry.i.i.preheader, label %vector.ph - -pregion_for_entry.entry.i.i.preheader: ; preds = %vector.memcheck - br label %pregion_for_entry.entry.i.i - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert15 = insertelement <8 x i32> undef, i32 %36, i32 0 - %broadcast.splat16 = shufflevector <8 x i32> %broadcast.splatinsert15, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert18 = insertelement <8 x float> undef, float %32, i32 0 - %broadcast.splat19 = shufflevector <8 x float> %broadcast.splatinsert18, <8 x float> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert20 = insertelement <8 x float> undef, float %28, i32 0 - %broadcast.splat21 = shufflevector <8 x float> %broadcast.splatinsert20, <8 x float> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next.1, %vector.body ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next.1, %vector.body ] - %41 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %42 = trunc <8 x i64> %41 to <8 x i32> - %43 = icmp sgt <8 x i32> %broadcast.splat16, %42 - %44 = extractelement <8 x i64> %41, i32 0 - %45 = shl i64 %44, 32 - %46 = ashr exact i64 %45, 32 - %47 = getelementptr inbounds float, float* %20, i64 %46 - %48 = bitcast float* %47 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %48, i32 4, <8 x i1> %43, <8 x float> undef), !tbaa !12, !alias.scope !30, !noalias !33 - %49 = getelementptr inbounds float, float* %24, i64 %46 - %50 = bitcast float* %49 to <8 x float>* - %wide.masked.load17 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %50, i32 4, <8 x i1> %43, <8 x float> undef), !tbaa !12, !alias.scope !33 - %51 = fmul <8 x float> %broadcast.splat19, %wide.masked.load - %52 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %broadcast.splat21, <8 x float> %wide.masked.load17, <8 x float> %51) - %53 = bitcast float* %47 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %52, <8 x float>* %53, i32 4, <8 x i1> %43), !tbaa !12, !alias.scope !30, !noalias !33, !llvm.access.group !21 - %vec.ind.next = add <8 x i64> %vec.ind, - %54 = add nuw nsw <8 x i64> %vec.ind.next, %broadcast.splat - %55 = trunc <8 x i64> %54 to <8 x i32> - %56 = icmp sgt <8 x i32> %broadcast.splat16, %55 - %57 = extractelement <8 x i64> %54, i32 0 - %58 = shl i64 %57, 32 - %59 = ashr exact i64 %58, 32 - %60 = getelementptr inbounds float, float* %20, i64 %59 - %61 = bitcast float* %60 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %61, i32 4, <8 x i1> %56, <8 x float> undef), !tbaa !12, !alias.scope !30, !noalias !33 - %62 = getelementptr inbounds float, float* %24, i64 %59 - %63 = bitcast float* %62 to <8 x float>* - %wide.masked.load17.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %63, i32 4, <8 x i1> %56, <8 x float> undef), !tbaa !12, !alias.scope !33 - %64 = fmul <8 x float> %broadcast.splat19, %wide.masked.load.1 - %65 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %broadcast.splat21, <8 x float> %wide.masked.load17.1, <8 x float> %64) - %66 = bitcast float* %60 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %65, <8 x float>* %66, i32 4, <8 x i1> %56), !tbaa !12, !alias.scope !30, !noalias !33, !llvm.access.group !21 - %index.next.1 = add nuw nsw i64 %index, 16 - %vec.ind.next.1 = add <8 x i64> %vec.ind, - %67 = icmp eq i64 %index.next.1, 256 - br i1 %67, label %_pocl_kernel_gesummv_kernel.exit.loopexit25, label %vector.body, !llvm.loop !35 - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %80, %if.end.r_exit.i.i.us ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %36, %conv.i.i.us - br i1 %cmp.i.i.us, label %for.body.lr.ph.i.i.us, label %if.end.r_exit.i.i.us - -for.body.lr.ph.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %mul.i.i.us = mul nsw i32 %36, %conv.i.i.us - %sext47.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom7.i.i.us = ashr exact i64 %sext47.i.i.us, 32 - %arrayidx8.i.i.us = getelementptr inbounds float, float* %24, i64 %idxprom7.i.i.us - %arrayidx17.i.i.us = getelementptr inbounds float, float* %20, i64 %idxprom7.i.i.us - %68 = sext i32 %mul.i.i.us to i64 - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %for.body.lr.ph.i.i.us - %indvars.iv.next.i.i2.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %for.body.lr.ph.i.i.us ] - %69 = add nsw i64 %indvars.iv.next.i.i2.us, %68 - %arrayidx.i.i.us = getelementptr inbounds float, float* %8, i64 %69 - %70 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %arrayidx5.i.i.us = getelementptr inbounds float, float* %16, i64 %indvars.iv.next.i.i2.us - %71 = load float, float* %arrayidx5.i.i.us, align 4, !tbaa !12 - %72 = load float, float* %arrayidx8.i.i.us, align 4, !tbaa !12 - %73 = tail call float @llvm.fmuladd.f32(float %70, float %71, float %72) #2 - store float %73, float* %arrayidx8.i.i.us, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx12.i.i.us = getelementptr inbounds float, float* %12, i64 %69 - %74 = load float, float* %arrayidx12.i.i.us, align 4, !tbaa !12 - %75 = load float, float* %arrayidx5.i.i.us, align 4, !tbaa !12 - %76 = load float, float* %arrayidx17.i.i.us, align 4, !tbaa !12 - %77 = tail call float @llvm.fmuladd.f32(float %74, float %75, float %76) #2 - store float %77, float* %arrayidx17.i.i.us, align 4, !tbaa !12, !llvm.access.group !21 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i2.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us, label %for.end.loopexit.i.i.us, label %for.body.i.i.us, !llvm.loop !26 - -for.end.loopexit.i.i.us: ; preds = %for.body.i.i.us - %.lcssa = phi float [ %77, %for.body.i.i.us ] - %78 = load float, float* %arrayidx8.i.i.us, align 4, !tbaa !12 - %mul23.i.i.us = fmul float %32, %.lcssa - %79 = tail call float @llvm.fmuladd.f32(float %28, float %78, float %mul23.i.i.us) #2 - store float %79, float* %arrayidx17.i.i.us, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %for.end.loopexit.i.i.us, %pregion_for_entry.entry.i.i.us - %80 = add nuw nsw i64 %_local_id_x.i.0.us, 1 - %exitcond.not = icmp eq i64 %80, 256 - br i1 %exitcond.not, label %_pocl_kernel_gesummv_kernel.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !28 - -pregion_for_entry.entry.i.i: ; preds = %if.end.r_exit.i.i.1, %pregion_for_entry.entry.i.i.preheader - %_local_id_x.i.0 = phi i64 [ %86, %if.end.r_exit.i.i.1 ], [ 0, %pregion_for_entry.entry.i.i.preheader ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %conv.i.i = trunc i64 %add1.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %36, %conv.i.i - br i1 %cmp.i.i, label %for.cond.preheader.for.end_crit_edge.i.i, label %if.end.r_exit.i.i - -for.cond.preheader.for.end_crit_edge.i.i: ; preds = %pregion_for_entry.entry.i.i - %.pre.i.i = shl i64 %add1.i.i.i, 32 - %.pre51.i.i = ashr exact i64 %.pre.i.i, 32 - %arrayidx22.phi.trans.insert.i.i = getelementptr inbounds float, float* %20, i64 %.pre51.i.i - %.pre1.i.i13 = load float, float* %arrayidx22.phi.trans.insert.i.i, align 4, !tbaa !12 - %arrayidx19.i.i = getelementptr inbounds float, float* %24, i64 %.pre51.i.i - %81 = load float, float* %arrayidx19.i.i, align 4, !tbaa !12 - %mul23.i.i = fmul float %32, %.pre1.i.i13 - %82 = tail call float @llvm.fmuladd.f32(float %28, float %81, float %mul23.i.i) #2 - store float %82, float* %arrayidx22.phi.trans.insert.i.i, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i - -if.end.r_exit.i.i: ; preds = %for.cond.preheader.for.end_crit_edge.i.i, %pregion_for_entry.entry.i.i - %83 = or i64 %_local_id_x.i.0, 1 - %add1.i.i.i.1 = add nuw nsw i64 %83, %mul.i.i.i - %conv.i.i.1 = trunc i64 %add1.i.i.i.1 to i32 - %cmp.i.i.1 = icmp sgt i32 %36, %conv.i.i.1 - br i1 %cmp.i.i.1, label %for.cond.preheader.for.end_crit_edge.i.i.1, label %if.end.r_exit.i.i.1 - -_pocl_kernel_gesummv_kernel.exit.loopexit: ; preds = %if.end.r_exit.i.i.us - br label %_pocl_kernel_gesummv_kernel.exit - -_pocl_kernel_gesummv_kernel.exit.loopexit24: ; preds = %if.end.r_exit.i.i.1 - br label %_pocl_kernel_gesummv_kernel.exit - -_pocl_kernel_gesummv_kernel.exit.loopexit25: ; preds = %vector.body - br label %_pocl_kernel_gesummv_kernel.exit - -_pocl_kernel_gesummv_kernel.exit: ; preds = %_pocl_kernel_gesummv_kernel.exit.loopexit25, %_pocl_kernel_gesummv_kernel.exit.loopexit24, %_pocl_kernel_gesummv_kernel.exit.loopexit - ret void - -for.cond.preheader.for.end_crit_edge.i.i.1: ; preds = %if.end.r_exit.i.i - %.pre.i.i.1 = shl i64 %add1.i.i.i.1, 32 - %.pre51.i.i.1 = ashr exact i64 %.pre.i.i.1, 32 - %arrayidx22.phi.trans.insert.i.i.1 = getelementptr inbounds float, float* %20, i64 %.pre51.i.i.1 - %.pre1.i.i13.1 = load float, float* %arrayidx22.phi.trans.insert.i.i.1, align 4, !tbaa !12 - %arrayidx19.i.i.1 = getelementptr inbounds float, float* %24, i64 %.pre51.i.i.1 - %84 = load float, float* %arrayidx19.i.i.1, align 4, !tbaa !12 - %mul23.i.i.1 = fmul float %32, %.pre1.i.i13.1 - %85 = tail call float @llvm.fmuladd.f32(float %28, float %84, float %mul23.i.i.1) #2 - store float %85, float* %arrayidx22.phi.trans.insert.i.i.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.1 - -if.end.r_exit.i.i.1: ; preds = %for.cond.preheader.for.end_crit_edge.i.i.1, %if.end.r_exit.i.i - %86 = add nuw nsw i64 %_local_id_x.i.0, 2 - %exitcond5.not.1 = icmp eq i64 %86, 256 - br i1 %exitcond5.not.1, label %_pocl_kernel_gesummv_kernel.exit.loopexit24, label %pregion_for_entry.entry.i.i, !llvm.loop !36 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_gesummv_kernel_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to float** - %13 = load float*, float** %12, align 8 - %14 = getelementptr i8*, i8** %0, i64 3 - %15 = bitcast i8** %14 to float** - %16 = load float*, float** %15, align 8 - %17 = getelementptr i8*, i8** %0, i64 4 - %18 = bitcast i8** %17 to float** - %19 = load float*, float** %18, align 8 - %20 = getelementptr i8*, i8** %0, i64 5 - %21 = bitcast i8** %20 to float** - %22 = load float*, float** %21, align 8 - %23 = load float, float* %22, align 4 - %24 = getelementptr i8*, i8** %0, i64 6 - %25 = bitcast i8** %24 to float** - %26 = load float*, float** %25, align 8 - %27 = load float, float* %26, align 4 - %28 = getelementptr i8*, i8** %0, i64 7 - %29 = bitcast i8** %28 to i32** - %30 = load i32*, i32** %29, align 8 - %31 = load i32, i32* %30, align 4 - %mul.i.i.i = shl i64 %2, 8 - %cmp248.i.i = icmp sgt i32 %31, 0 - %wide.trip.count.i.i = zext i32 %31 to i64 - br i1 %cmp248.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -vector.memcheck: ; preds = %5 - %32 = trunc i64 %2 to i32 - %33 = shl i32 %32, 8 - %34 = sext i32 %33 to i64 - %scevgep = getelementptr float, float* %16, i64 %34 - %35 = add nsw i64 %34, 256 - %scevgep9 = getelementptr float, float* %16, i64 %35 - %scevgep11 = getelementptr float, float* %19, i64 %34 - %scevgep13 = getelementptr float, float* %19, i64 %35 - %bound0 = icmp ult float* %scevgep, %scevgep13 - %bound1 = icmp ult float* %scevgep11, %scevgep9 - %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %pregion_for_entry.entry.i.i.preheader, label %vector.ph - -pregion_for_entry.entry.i.i.preheader: ; preds = %vector.memcheck - br label %pregion_for_entry.entry.i.i - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert15 = insertelement <8 x i32> undef, i32 %31, i32 0 - %broadcast.splat16 = shufflevector <8 x i32> %broadcast.splatinsert15, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert18 = insertelement <8 x float> undef, float %27, i32 0 - %broadcast.splat19 = shufflevector <8 x float> %broadcast.splatinsert18, <8 x float> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert20 = insertelement <8 x float> undef, float %23, i32 0 - %broadcast.splat21 = shufflevector <8 x float> %broadcast.splatinsert20, <8 x float> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next.1, %vector.body ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next.1, %vector.body ] - %36 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %37 = trunc <8 x i64> %36 to <8 x i32> - %38 = icmp sgt <8 x i32> %broadcast.splat16, %37 - %39 = extractelement <8 x i64> %36, i32 0 - %40 = shl i64 %39, 32 - %41 = ashr exact i64 %40, 32 - %42 = getelementptr inbounds float, float* %16, i64 %41 - %43 = bitcast float* %42 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %43, i32 4, <8 x i1> %38, <8 x float> undef), !tbaa !12, !alias.scope !37, !noalias !40 - %44 = getelementptr inbounds float, float* %19, i64 %41 - %45 = bitcast float* %44 to <8 x float>* - %wide.masked.load17 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %45, i32 4, <8 x i1> %38, <8 x float> undef), !tbaa !12, !alias.scope !40 - %46 = fmul <8 x float> %broadcast.splat19, %wide.masked.load - %47 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %broadcast.splat21, <8 x float> %wide.masked.load17, <8 x float> %46) - %48 = bitcast float* %42 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %47, <8 x float>* %48, i32 4, <8 x i1> %38), !tbaa !12, !alias.scope !37, !noalias !40, !llvm.access.group !21 - %vec.ind.next = add <8 x i64> %vec.ind, - %49 = add nuw nsw <8 x i64> %vec.ind.next, %broadcast.splat - %50 = trunc <8 x i64> %49 to <8 x i32> - %51 = icmp sgt <8 x i32> %broadcast.splat16, %50 - %52 = extractelement <8 x i64> %49, i32 0 - %53 = shl i64 %52, 32 - %54 = ashr exact i64 %53, 32 - %55 = getelementptr inbounds float, float* %16, i64 %54 - %56 = bitcast float* %55 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %56, i32 4, <8 x i1> %51, <8 x float> undef), !tbaa !12, !alias.scope !37, !noalias !40 - %57 = getelementptr inbounds float, float* %19, i64 %54 - %58 = bitcast float* %57 to <8 x float>* - %wide.masked.load17.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %58, i32 4, <8 x i1> %51, <8 x float> undef), !tbaa !12, !alias.scope !40 - %59 = fmul <8 x float> %broadcast.splat19, %wide.masked.load.1 - %60 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %broadcast.splat21, <8 x float> %wide.masked.load17.1, <8 x float> %59) - %61 = bitcast float* %55 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %60, <8 x float>* %61, i32 4, <8 x i1> %51), !tbaa !12, !alias.scope !37, !noalias !40, !llvm.access.group !21 - %index.next.1 = add nuw nsw i64 %index, 16 - %vec.ind.next.1 = add <8 x i64> %vec.ind, - %62 = icmp eq i64 %index.next.1, 256 - br i1 %62, label %_pocl_kernel_gesummv_kernel.exit.loopexit25, label %vector.body, !llvm.loop !42 - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %75, %if.end.r_exit.i.i.us ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %31, %conv.i.i.us - br i1 %cmp.i.i.us, label %for.body.lr.ph.i.i.us, label %if.end.r_exit.i.i.us - -for.body.lr.ph.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %mul.i.i.us = mul nsw i32 %31, %conv.i.i.us - %sext47.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom7.i.i.us = ashr exact i64 %sext47.i.i.us, 32 - %arrayidx8.i.i.us = getelementptr inbounds float, float* %19, i64 %idxprom7.i.i.us - %arrayidx17.i.i.us = getelementptr inbounds float, float* %16, i64 %idxprom7.i.i.us - %63 = sext i32 %mul.i.i.us to i64 - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %for.body.lr.ph.i.i.us - %indvars.iv.next.i.i2.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %for.body.lr.ph.i.i.us ] - %64 = add nsw i64 %indvars.iv.next.i.i2.us, %63 - %arrayidx.i.i.us = getelementptr inbounds float, float* %7, i64 %64 - %65 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %arrayidx5.i.i.us = getelementptr inbounds float, float* %13, i64 %indvars.iv.next.i.i2.us - %66 = load float, float* %arrayidx5.i.i.us, align 4, !tbaa !12 - %67 = load float, float* %arrayidx8.i.i.us, align 4, !tbaa !12 - %68 = tail call float @llvm.fmuladd.f32(float %65, float %66, float %67) #2 - store float %68, float* %arrayidx8.i.i.us, align 4, !tbaa !12, !llvm.access.group !21 - %arrayidx12.i.i.us = getelementptr inbounds float, float* %10, i64 %64 - %69 = load float, float* %arrayidx12.i.i.us, align 4, !tbaa !12 - %70 = load float, float* %arrayidx5.i.i.us, align 4, !tbaa !12 - %71 = load float, float* %arrayidx17.i.i.us, align 4, !tbaa !12 - %72 = tail call float @llvm.fmuladd.f32(float %69, float %70, float %71) #2 - store float %72, float* %arrayidx17.i.i.us, align 4, !tbaa !12, !llvm.access.group !21 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i2.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us, label %for.end.loopexit.i.i.us, label %for.body.i.i.us, !llvm.loop !26 - -for.end.loopexit.i.i.us: ; preds = %for.body.i.i.us - %.lcssa = phi float [ %72, %for.body.i.i.us ] - %73 = load float, float* %arrayidx8.i.i.us, align 4, !tbaa !12 - %mul23.i.i.us = fmul float %27, %.lcssa - %74 = tail call float @llvm.fmuladd.f32(float %23, float %73, float %mul23.i.i.us) #2 - store float %74, float* %arrayidx17.i.i.us, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %for.end.loopexit.i.i.us, %pregion_for_entry.entry.i.i.us - %75 = add nuw nsw i64 %_local_id_x.i.0.us, 1 - %exitcond.not = icmp eq i64 %75, 256 - br i1 %exitcond.not, label %_pocl_kernel_gesummv_kernel.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !28 - -pregion_for_entry.entry.i.i: ; preds = %if.end.r_exit.i.i.1, %pregion_for_entry.entry.i.i.preheader - %_local_id_x.i.0 = phi i64 [ %81, %if.end.r_exit.i.i.1 ], [ 0, %pregion_for_entry.entry.i.i.preheader ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %conv.i.i = trunc i64 %add1.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %31, %conv.i.i - br i1 %cmp.i.i, label %for.cond.preheader.for.end_crit_edge.i.i, label %if.end.r_exit.i.i - -for.cond.preheader.for.end_crit_edge.i.i: ; preds = %pregion_for_entry.entry.i.i - %.pre.i.i = shl i64 %add1.i.i.i, 32 - %.pre51.i.i = ashr exact i64 %.pre.i.i, 32 - %arrayidx22.phi.trans.insert.i.i = getelementptr inbounds float, float* %16, i64 %.pre51.i.i - %.pre1.i.i13 = load float, float* %arrayidx22.phi.trans.insert.i.i, align 4, !tbaa !12 - %arrayidx19.i.i = getelementptr inbounds float, float* %19, i64 %.pre51.i.i - %76 = load float, float* %arrayidx19.i.i, align 4, !tbaa !12 - %mul23.i.i = fmul float %27, %.pre1.i.i13 - %77 = tail call float @llvm.fmuladd.f32(float %23, float %76, float %mul23.i.i) #2 - store float %77, float* %arrayidx22.phi.trans.insert.i.i, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i - -if.end.r_exit.i.i: ; preds = %for.cond.preheader.for.end_crit_edge.i.i, %pregion_for_entry.entry.i.i - %78 = or i64 %_local_id_x.i.0, 1 - %add1.i.i.i.1 = add nuw nsw i64 %78, %mul.i.i.i - %conv.i.i.1 = trunc i64 %add1.i.i.i.1 to i32 - %cmp.i.i.1 = icmp sgt i32 %31, %conv.i.i.1 - br i1 %cmp.i.i.1, label %for.cond.preheader.for.end_crit_edge.i.i.1, label %if.end.r_exit.i.i.1 - -_pocl_kernel_gesummv_kernel.exit.loopexit: ; preds = %if.end.r_exit.i.i.us - br label %_pocl_kernel_gesummv_kernel.exit - -_pocl_kernel_gesummv_kernel.exit.loopexit24: ; preds = %if.end.r_exit.i.i.1 - br label %_pocl_kernel_gesummv_kernel.exit - -_pocl_kernel_gesummv_kernel.exit.loopexit25: ; preds = %vector.body - br label %_pocl_kernel_gesummv_kernel.exit - -_pocl_kernel_gesummv_kernel.exit: ; preds = %_pocl_kernel_gesummv_kernel.exit.loopexit25, %_pocl_kernel_gesummv_kernel.exit.loopexit24, %_pocl_kernel_gesummv_kernel.exit.loopexit - ret void - -for.cond.preheader.for.end_crit_edge.i.i.1: ; preds = %if.end.r_exit.i.i - %.pre.i.i.1 = shl i64 %add1.i.i.i.1, 32 - %.pre51.i.i.1 = ashr exact i64 %.pre.i.i.1, 32 - %arrayidx22.phi.trans.insert.i.i.1 = getelementptr inbounds float, float* %16, i64 %.pre51.i.i.1 - %.pre1.i.i13.1 = load float, float* %arrayidx22.phi.trans.insert.i.i.1, align 4, !tbaa !12 - %arrayidx19.i.i.1 = getelementptr inbounds float, float* %19, i64 %.pre51.i.i.1 - %79 = load float, float* %arrayidx19.i.i.1, align 4, !tbaa !12 - %mul23.i.i.1 = fmul float %27, %.pre1.i.i13.1 - %80 = tail call float @llvm.fmuladd.f32(float %23, float %79, float %mul23.i.i.1) #2 - store float %80, float* %arrayidx22.phi.trans.insert.i.i.1, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.1 - -if.end.r_exit.i.i.1: ; preds = %for.cond.preheader.for.end_crit_edge.i.i.1, %if.end.r_exit.i.i - %81 = add nuw nsw i64 %_local_id_x.i.0, 2 - %exitcond5.not.1 = icmp eq i64 %81, 256 - br i1 %exitcond5.not.1, label %_pocl_kernel_gesummv_kernel.exit.loopexit24, label %pregion_for_entry.entry.i.i, !llvm.loop !43 -} - -; Function Attrs: argmemonly nounwind readonly willreturn -declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32 immarg, <8 x i1>, <8 x float>) #3 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <8 x float> @llvm.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>) #0 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>) #4 - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } -attributes #3 = { argmemonly nounwind readonly willreturn } -attributes #4 = { argmemonly nounwind willreturn } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 1, i32 1, i32 0, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE", !"DATA_TYPE", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"float*", !"float*", !"float", !"float", !"int"} -!9 = !{!"", !"", !"", !"", !"", !"", !"", !""} -!10 = !{!"a", !"b", !"x", !"y", !"tmp", !"alpha", !"beta", !"n"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{!17, !18} -!18 = distinct !{!18, !"LVerDomain"} -!19 = !{!20} -!20 = distinct !{!20, !18} -!21 = !{!22} -!22 = distinct !{} -!23 = distinct !{!23, !24, !25} -!24 = !{!"llvm.loop.parallel_accesses", !22} -!25 = !{!"llvm.loop.isvectorized", i32 1} -!26 = distinct !{!26, !27} -!27 = !{!"llvm.loop.unroll.disable"} -!28 = distinct !{!28, !24} -!29 = distinct !{!29, !24, !25} -!30 = !{!31} -!31 = distinct !{!31, !32} -!32 = distinct !{!32, !"LVerDomain"} -!33 = !{!34} -!34 = distinct !{!34, !32} -!35 = distinct !{!35, !24, !25} -!36 = distinct !{!36, !24, !25} -!37 = !{!38} -!38 = distinct !{!38, !39} -!39 = distinct !{!39, !"LVerDomain"} -!40 = !{!41} -!41 = distinct !{!41, !39} -!42 = distinct !{!42, !24, !25} -!43 = distinct !{!43, !24, !25} diff --git a/pocl_irs/gramschmidt_kernel1.ll b/pocl_irs/gramschmidt_kernel1.ll deleted file mode 100644 index 332be82..0000000 --- a/pocl_irs/gramschmidt_kernel1.ll +++ /dev/null @@ -1,445 +0,0 @@ -; ModuleID = './KJ/PLAOGDMDEAHKMIGEICILPNJNPJGMEGIKFDLHM/gramschmidt_kernel1/256-1-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.sqrt.f32(float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_gramschmidt_kernel1(float* nocapture readonly %0, float* nocapture %1, float* nocapture readnone %2, i32 %3, i32 %4, i32 %5, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %6, i64 %7, i64 %8, i64 %9) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %7, 8 - %cmp227.i = icmp sgt i32 %4, 0 - %11 = sext i32 %5 to i64 - %12 = sext i32 %3 to i64 - %wide.trip.count.i = zext i32 %4 to i64 - %mul10.i = mul nsw i32 %5, %3 - %add11.i = add nsw i32 %mul10.i, %3 - %idxprom12.i = sext i32 %add11.i to i64 - %arrayidx13.i = getelementptr inbounds float, float* %1, i64 %idxprom12.i - br i1 %cmp227.i, label %pregion_for_entry.entry.i.us.preheader, label %vector.ph - -pregion_for_entry.entry.i.us.preheader: ; preds = %10 - br label %pregion_for_entry.entry.i.us - -vector.ph: ; preds = %10 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %62, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next.1, %62 ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next.1, %62 ] - %13 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %14 = trunc <8 x i64> %13 to <8 x i32> - %15 = icmp eq <8 x i32> %14, zeroinitializer - %16 = extractelement <8 x i1> %15, i32 0 - %17 = extractelement <8 x i1> %15, i32 1 - %18 = or i1 %16, %17 - %19 = extractelement <8 x i1> %15, i32 2 - %20 = or i1 %18, %19 - %21 = extractelement <8 x i1> %15, i32 3 - %22 = or i1 %20, %21 - %23 = extractelement <8 x i1> %15, i32 4 - %24 = or i1 %22, %23 - %25 = extractelement <8 x i1> %15, i32 5 - %26 = or i1 %24, %25 - %27 = extractelement <8 x i1> %15, i32 6 - %28 = or i1 %26, %27 - %29 = extractelement <8 x i1> %15, i32 7 - %30 = or i1 %28, %29 - %vec.ind.next = add <8 x i64> %vec.ind, - %31 = add nuw nsw <8 x i64> %vec.ind.next, %broadcast.splat - %32 = trunc <8 x i64> %31 to <8 x i32> - %33 = icmp eq <8 x i32> %32, zeroinitializer - %34 = extractelement <8 x i1> %33, i32 0 - %35 = extractelement <8 x i1> %33, i32 1 - %36 = or i1 %34, %35 - %37 = extractelement <8 x i1> %33, i32 2 - %38 = or i1 %36, %37 - %39 = extractelement <8 x i1> %33, i32 3 - %40 = or i1 %38, %39 - %41 = extractelement <8 x i1> %33, i32 4 - %42 = or i1 %40, %41 - %43 = extractelement <8 x i1> %33, i32 5 - %44 = or i1 %42, %43 - %45 = extractelement <8 x i1> %33, i32 6 - %46 = or i1 %44, %45 - %47 = extractelement <8 x i1> %33, i32 7 - %48 = or i1 %46, %47 - %49 = xor i1 %30, true - %50 = xor i1 %48, true - %51 = xor i1 %49, true - %52 = xor i1 %50, true - %53 = or i1 %51, %52 - br i1 %53, label %61, label %62 - -pregion_for_entry.entry.i.us: ; preds = %if.end.r_exit.i.us.1, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ %60, %if.end.r_exit.i.us.1 ], [ 0, %pregion_for_entry.entry.i.us.preheader ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp.i.us = icmp eq i32 %conv.i.us, 0 - br i1 %cmp.i.us, label %for.body.i.us.preheader, label %if.end.r_exit.i.us.1 - -for.body.i.us.preheader: ; preds = %pregion_for_entry.entry.i.us - br label %for.body.i.us - -for.body.i.us: ; preds = %for.body.i.us, %for.body.i.us.preheader - %54 = phi float [ %58, %for.body.i.us ], [ 0.000000e+00, %for.body.i.us.preheader ] - %indvars.iv.next.i1.us = phi i64 [ %indvars.iv.next.i.us, %for.body.i.us ], [ 0, %for.body.i.us.preheader ] - %55 = mul nsw i64 %indvars.iv.next.i1.us, %11 - %56 = add nsw i64 %55, %12 - %arrayidx.i.us = getelementptr inbounds float, float* %0, i64 %56 - %57 = load float, float* %arrayidx.i.us, align 4, !tbaa !12 - %58 = tail call float @llvm.fmuladd.f32(float %57, float %57, float %54) #2 - %indvars.iv.next.i.us = add nuw nsw i64 %indvars.iv.next.i1.us, 1 - %exitcond.not.i.us = icmp eq i64 %indvars.iv.next.i.us, %wide.trip.count.i - br i1 %exitcond.not.i.us, label %for.end.loopexit.i.us, label %for.body.i.us, !llvm.loop !16 - -for.end.loopexit.i.us: ; preds = %for.body.i.us - %.lcssa = phi float [ %58, %for.body.i.us ] - %59 = tail call float @llvm.sqrt.f32(float %.lcssa) #2 - store float %59, float* %arrayidx13.i, align 4, !tbaa !12, !llvm.access.group !18 - br label %if.end.r_exit.i.us.1 - -gramschmidt_kernel1.exit.loopexit: ; preds = %if.end.r_exit.i.us.1 - br label %gramschmidt_kernel1.exit - -gramschmidt_kernel1.exit.loopexit23: ; preds = %62 - br label %gramschmidt_kernel1.exit - -gramschmidt_kernel1.exit: ; preds = %gramschmidt_kernel1.exit.loopexit23, %gramschmidt_kernel1.exit.loopexit - ret void - -if.end.r_exit.i.us.1: ; preds = %for.end.loopexit.i.us, %pregion_for_entry.entry.i.us - %60 = add nuw nsw i64 %_local_id_x.0.us, 2 - %exitcond.not.1 = icmp eq i64 %60, 256 - br i1 %exitcond.not.1, label %gramschmidt_kernel1.exit.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !20 - -61: ; preds = %vector.body - store float 0.000000e+00, float* %arrayidx13.i, align 4, !tbaa !12 - br label %62 - -62: ; preds = %61, %vector.body - %index.next.1 = add nuw nsw i64 %index, 16 - %vec.ind.next.1 = add <8 x i64> %vec.ind, - %63 = icmp eq i64 %index.next.1, 256 - br i1 %63, label %gramschmidt_kernel1.exit.loopexit23, label %vector.body, !llvm.loop !22 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_gramschmidt_kernel1_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 3 - %14 = bitcast i8** %13 to i32** - %15 = load i32*, i32** %14, align 8 - %16 = load i32, i32* %15, align 4 - %17 = getelementptr i8*, i8** %0, i64 4 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %21 = getelementptr i8*, i8** %0, i64 5 - %22 = bitcast i8** %21 to i32** - %23 = load i32*, i32** %22, align 8 - %24 = load i32, i32* %23, align 4 - %mul.i.i.i = shl i64 %2, 8 - %cmp227.i.i = icmp sgt i32 %20, 0 - %25 = sext i32 %24 to i64 - %26 = sext i32 %16 to i64 - %wide.trip.count.i.i = zext i32 %20 to i64 - %mul10.i.i = mul nsw i32 %24, %16 - %add11.i.i = add nsw i32 %mul10.i.i, %16 - %idxprom12.i.i = sext i32 %add11.i.i to i64 - %arrayidx13.i.i = getelementptr inbounds float, float* %12, i64 %idxprom12.i.i - br i1 %cmp227.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.ph - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -vector.ph: ; preds = %5 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %76, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next.1, %76 ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next.1, %76 ] - %27 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %28 = trunc <8 x i64> %27 to <8 x i32> - %29 = icmp eq <8 x i32> %28, zeroinitializer - %30 = extractelement <8 x i1> %29, i32 0 - %31 = extractelement <8 x i1> %29, i32 1 - %32 = or i1 %30, %31 - %33 = extractelement <8 x i1> %29, i32 2 - %34 = or i1 %32, %33 - %35 = extractelement <8 x i1> %29, i32 3 - %36 = or i1 %34, %35 - %37 = extractelement <8 x i1> %29, i32 4 - %38 = or i1 %36, %37 - %39 = extractelement <8 x i1> %29, i32 5 - %40 = or i1 %38, %39 - %41 = extractelement <8 x i1> %29, i32 6 - %42 = or i1 %40, %41 - %43 = extractelement <8 x i1> %29, i32 7 - %44 = or i1 %42, %43 - %vec.ind.next = add <8 x i64> %vec.ind, - %45 = add nuw nsw <8 x i64> %vec.ind.next, %broadcast.splat - %46 = trunc <8 x i64> %45 to <8 x i32> - %47 = icmp eq <8 x i32> %46, zeroinitializer - %48 = extractelement <8 x i1> %47, i32 0 - %49 = extractelement <8 x i1> %47, i32 1 - %50 = or i1 %48, %49 - %51 = extractelement <8 x i1> %47, i32 2 - %52 = or i1 %50, %51 - %53 = extractelement <8 x i1> %47, i32 3 - %54 = or i1 %52, %53 - %55 = extractelement <8 x i1> %47, i32 4 - %56 = or i1 %54, %55 - %57 = extractelement <8 x i1> %47, i32 5 - %58 = or i1 %56, %57 - %59 = extractelement <8 x i1> %47, i32 6 - %60 = or i1 %58, %59 - %61 = extractelement <8 x i1> %47, i32 7 - %62 = or i1 %60, %61 - %63 = xor i1 %44, true - %64 = xor i1 %62, true - %65 = xor i1 %63, true - %66 = xor i1 %64, true - %67 = or i1 %65, %66 - br i1 %67, label %75, label %76 - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us.1, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %74, %if.end.r_exit.i.i.us.1 ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp eq i32 %conv.i.i.us, 0 - br i1 %cmp.i.i.us, label %for.body.i.i.us.preheader, label %if.end.r_exit.i.i.us.1 - -for.body.i.i.us.preheader: ; preds = %pregion_for_entry.entry.i.i.us - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %for.body.i.i.us.preheader - %68 = phi float [ %72, %for.body.i.i.us ], [ 0.000000e+00, %for.body.i.i.us.preheader ] - %indvars.iv.next.i.i1.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %for.body.i.i.us.preheader ] - %69 = mul nsw i64 %indvars.iv.next.i.i1.us, %25 - %70 = add nsw i64 %69, %26 - %arrayidx.i.i.us = getelementptr inbounds float, float* %8, i64 %70 - %71 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %72 = tail call float @llvm.fmuladd.f32(float %71, float %71, float %68) #2 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i1.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us, label %for.end.loopexit.i.i.us, label %for.body.i.i.us, !llvm.loop !16 - -for.end.loopexit.i.i.us: ; preds = %for.body.i.i.us - %.lcssa = phi float [ %72, %for.body.i.i.us ] - %73 = tail call float @llvm.sqrt.f32(float %.lcssa) #2 - store float %73, float* %arrayidx13.i.i, align 4, !tbaa !12, !llvm.access.group !18 - br label %if.end.r_exit.i.i.us.1 - -_pocl_kernel_gramschmidt_kernel1.exit.loopexit: ; preds = %if.end.r_exit.i.i.us.1 - br label %_pocl_kernel_gramschmidt_kernel1.exit - -_pocl_kernel_gramschmidt_kernel1.exit.loopexit23: ; preds = %76 - br label %_pocl_kernel_gramschmidt_kernel1.exit - -_pocl_kernel_gramschmidt_kernel1.exit: ; preds = %_pocl_kernel_gramschmidt_kernel1.exit.loopexit23, %_pocl_kernel_gramschmidt_kernel1.exit.loopexit - ret void - -if.end.r_exit.i.i.us.1: ; preds = %for.end.loopexit.i.i.us, %pregion_for_entry.entry.i.i.us - %74 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %74, 256 - br i1 %exitcond.not.1, label %_pocl_kernel_gramschmidt_kernel1.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !20 - -75: ; preds = %vector.body - store float 0.000000e+00, float* %arrayidx13.i.i, align 4, !tbaa !12 - br label %76 - -76: ; preds = %75, %vector.body - %index.next.1 = add nuw nsw i64 %index, 16 - %vec.ind.next.1 = add <8 x i64> %vec.ind, - %77 = icmp eq i64 %index.next.1, 256 - br i1 %77, label %_pocl_kernel_gramschmidt_kernel1.exit.loopexit23, label %vector.body, !llvm.loop !24 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_gramschmidt_kernel1_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 3 - %12 = bitcast i8** %11 to i32** - %13 = load i32*, i32** %12, align 8 - %14 = load i32, i32* %13, align 4 - %15 = getelementptr i8*, i8** %0, i64 4 - %16 = bitcast i8** %15 to i32** - %17 = load i32*, i32** %16, align 8 - %18 = load i32, i32* %17, align 4 - %19 = getelementptr i8*, i8** %0, i64 5 - %20 = bitcast i8** %19 to i32** - %21 = load i32*, i32** %20, align 8 - %22 = load i32, i32* %21, align 4 - %mul.i.i.i = shl i64 %2, 8 - %cmp227.i.i = icmp sgt i32 %18, 0 - %23 = sext i32 %22 to i64 - %24 = sext i32 %14 to i64 - %wide.trip.count.i.i = zext i32 %18 to i64 - %mul10.i.i = mul nsw i32 %22, %14 - %add11.i.i = add nsw i32 %mul10.i.i, %14 - %idxprom12.i.i = sext i32 %add11.i.i to i64 - %arrayidx13.i.i = getelementptr inbounds float, float* %10, i64 %idxprom12.i.i - br i1 %cmp227.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.ph - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -vector.ph: ; preds = %5 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %74, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next.1, %74 ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next.1, %74 ] - %25 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %26 = trunc <8 x i64> %25 to <8 x i32> - %27 = icmp eq <8 x i32> %26, zeroinitializer - %28 = extractelement <8 x i1> %27, i32 0 - %29 = extractelement <8 x i1> %27, i32 1 - %30 = or i1 %28, %29 - %31 = extractelement <8 x i1> %27, i32 2 - %32 = or i1 %30, %31 - %33 = extractelement <8 x i1> %27, i32 3 - %34 = or i1 %32, %33 - %35 = extractelement <8 x i1> %27, i32 4 - %36 = or i1 %34, %35 - %37 = extractelement <8 x i1> %27, i32 5 - %38 = or i1 %36, %37 - %39 = extractelement <8 x i1> %27, i32 6 - %40 = or i1 %38, %39 - %41 = extractelement <8 x i1> %27, i32 7 - %42 = or i1 %40, %41 - %vec.ind.next = add <8 x i64> %vec.ind, - %43 = add nuw nsw <8 x i64> %vec.ind.next, %broadcast.splat - %44 = trunc <8 x i64> %43 to <8 x i32> - %45 = icmp eq <8 x i32> %44, zeroinitializer - %46 = extractelement <8 x i1> %45, i32 0 - %47 = extractelement <8 x i1> %45, i32 1 - %48 = or i1 %46, %47 - %49 = extractelement <8 x i1> %45, i32 2 - %50 = or i1 %48, %49 - %51 = extractelement <8 x i1> %45, i32 3 - %52 = or i1 %50, %51 - %53 = extractelement <8 x i1> %45, i32 4 - %54 = or i1 %52, %53 - %55 = extractelement <8 x i1> %45, i32 5 - %56 = or i1 %54, %55 - %57 = extractelement <8 x i1> %45, i32 6 - %58 = or i1 %56, %57 - %59 = extractelement <8 x i1> %45, i32 7 - %60 = or i1 %58, %59 - %61 = xor i1 %42, true - %62 = xor i1 %60, true - %63 = xor i1 %61, true - %64 = xor i1 %62, true - %65 = or i1 %63, %64 - br i1 %65, label %73, label %74 - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us.1, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %72, %if.end.r_exit.i.i.us.1 ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp eq i32 %conv.i.i.us, 0 - br i1 %cmp.i.i.us, label %for.body.i.i.us.preheader, label %if.end.r_exit.i.i.us.1 - -for.body.i.i.us.preheader: ; preds = %pregion_for_entry.entry.i.i.us - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %for.body.i.i.us.preheader - %66 = phi float [ %70, %for.body.i.i.us ], [ 0.000000e+00, %for.body.i.i.us.preheader ] - %indvars.iv.next.i.i1.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %for.body.i.i.us.preheader ] - %67 = mul nsw i64 %indvars.iv.next.i.i1.us, %23 - %68 = add nsw i64 %67, %24 - %arrayidx.i.i.us = getelementptr inbounds float, float* %7, i64 %68 - %69 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %70 = tail call float @llvm.fmuladd.f32(float %69, float %69, float %66) #2 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i1.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us, label %for.end.loopexit.i.i.us, label %for.body.i.i.us, !llvm.loop !16 - -for.end.loopexit.i.i.us: ; preds = %for.body.i.i.us - %.lcssa = phi float [ %70, %for.body.i.i.us ] - %71 = tail call float @llvm.sqrt.f32(float %.lcssa) #2 - store float %71, float* %arrayidx13.i.i, align 4, !tbaa !12, !llvm.access.group !18 - br label %if.end.r_exit.i.i.us.1 - -_pocl_kernel_gramschmidt_kernel1.exit.loopexit: ; preds = %if.end.r_exit.i.i.us.1 - br label %_pocl_kernel_gramschmidt_kernel1.exit - -_pocl_kernel_gramschmidt_kernel1.exit.loopexit23: ; preds = %74 - br label %_pocl_kernel_gramschmidt_kernel1.exit - -_pocl_kernel_gramschmidt_kernel1.exit: ; preds = %_pocl_kernel_gramschmidt_kernel1.exit.loopexit23, %_pocl_kernel_gramschmidt_kernel1.exit.loopexit - ret void - -if.end.r_exit.i.i.us.1: ; preds = %for.end.loopexit.i.i.us, %pregion_for_entry.entry.i.i.us - %72 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %72, 256 - br i1 %exitcond.not.1, label %_pocl_kernel_gramschmidt_kernel1.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !20 - -73: ; preds = %vector.body - store float 0.000000e+00, float* %arrayidx13.i.i, align 4, !tbaa !12 - br label %74 - -74: ; preds = %73, %vector.body - %index.next.1 = add nuw nsw i64 %index, 16 - %vec.ind.next.1 = add <8 x i64> %vec.ind, - %75 = icmp eq i64 %index.next.1, 256 - br i1 %75, label %_pocl_kernel_gramschmidt_kernel1.exit.loopexit23, label %vector.body, !llvm.loop !25 -} - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"int", !"int", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"int", !"int", !"int"} -!9 = !{!"", !"", !"", !"", !"", !""} -!10 = !{!"a", !"r", !"q", !"k", !"ni", !"nj"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = distinct !{!16, !17} -!17 = !{!"llvm.loop.unroll.disable"} -!18 = !{!19} -!19 = distinct !{} -!20 = distinct !{!20, !21} -!21 = !{!"llvm.loop.parallel_accesses", !19} -!22 = distinct !{!22, !21, !23} -!23 = !{!"llvm.loop.isvectorized", i32 1} -!24 = distinct !{!24, !21, !23} -!25 = distinct !{!25, !21, !23} diff --git a/pocl_irs/gramschmidt_kernel2.ll b/pocl_irs/gramschmidt_kernel2.ll deleted file mode 100644 index 9314ce6..0000000 --- a/pocl_irs/gramschmidt_kernel2.ll +++ /dev/null @@ -1,545 +0,0 @@ -; ModuleID = './KJ/PLAOGDMDEAHKMIGEICILPNJNPJGMEGIKFDLHM/gramschmidt_kernel2/256-1-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_gramschmidt_kernel2(float* nocapture readonly %0, float* nocapture readonly %1, float* nocapture %2, i32 %3, i32 %4, i32 %5, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %6, i64 %7, i64 %8, i64 %9) local_unnamed_addr #0 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { -vector.scevcheck: - %mul.i.i = shl i64 %7, 8 - %mul2.i = mul nsw i32 %5, %3 - %add3.i = add nsw i32 %mul2.i, %3 - %idxprom4.i = sext i32 %add3.i to i64 - %arrayidx5.i = getelementptr inbounds float, float* %1, i64 %idxprom4.i - %ident.check = icmp ne i32 %5, 1 - %10 = trunc i64 %7 to i32 - %11 = shl i32 %10, 8 - %12 = add i32 %11, %3 - %13 = icmp sgt i32 %12, 2147483392 - %14 = or i1 %ident.check, %13 - br i1 %14, label %pregion_for_entry.entry.i.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i - -vector.memcheck: ; preds = %vector.scevcheck - %scevgep = getelementptr float, float* %1, i64 %idxprom4.i - %scevgep1 = bitcast float* %scevgep to i8* - %uglygep = getelementptr i8, i8* %scevgep1, i64 1 - %15 = trunc i64 %7 to i32 - %16 = shl i32 %15, 8 - %17 = add i32 %16, %3 - %18 = sext i32 %17 to i64 - %scevgep2 = getelementptr float, float* %2, i64 %18 - %scevgep23 = bitcast float* %scevgep2 to i8* - %19 = add nsw i64 %18, 256 - %scevgep4 = getelementptr float, float* %2, i64 %19 - %scevgep6 = getelementptr float, float* %0, i64 %18 - %scevgep8 = getelementptr float, float* %0, i64 %19 - %bound0 = icmp ult float* %arrayidx5.i, %scevgep4 - %bound1 = icmp ugt i8* %uglygep, %scevgep23 - %found.conflict = and i1 %bound0, %bound1 - %bound010 = icmp ult float* %scevgep2, %scevgep8 - %bound111 = icmp ult float* %scevgep6, %scevgep4 - %found.conflict12 = and i1 %bound010, %bound111 - %conflict.rdx = or i1 %found.conflict, %found.conflict12 - br i1 %conflict.rdx, label %pregion_for_entry.entry.i.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert13 = insertelement <8 x i32> undef, i32 %4, i32 0 - %broadcast.splat14 = shufflevector <8 x i32> %broadcast.splatinsert13, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert15 = insertelement <8 x float*> undef, float* %arrayidx5.i, i32 0 - %broadcast.splat16 = shufflevector <8 x float*> %broadcast.splatinsert15, <8 x float*> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next.1, %vector.body ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next.1, %vector.body ] - %20 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %21 = trunc <8 x i64> %20 to <8 x i32> - %22 = icmp sgt <8 x i32> %broadcast.splat14, %21 - %23 = extractelement <8 x i32> %21, i32 0 - %24 = mul nsw i32 %23, %5 - %25 = add nsw i32 %24, %3 - %26 = sext i32 %25 to i64 - %27 = getelementptr inbounds float, float* %0, i64 %26 - %28 = bitcast float* %27 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %28, i32 4, <8 x i1> %22, <8 x float> undef), !tbaa !12, !alias.scope !16 - %wide.masked.gather = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat16, i32 4, <8 x i1> %22, <8 x float> undef), !tbaa !12, !alias.scope !19, !noalias !21 - %29 = fdiv <8 x float> %wide.masked.load, %wide.masked.gather, !fpmath !23 - %30 = getelementptr inbounds float, float* %2, i64 %26 - %31 = bitcast float* %30 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %29, <8 x float>* %31, i32 4, <8 x i1> %22), !tbaa !12, !alias.scope !21, !noalias !16, !llvm.access.group !24 - %vec.ind.next = add <8 x i64> %vec.ind, - %32 = add nuw nsw <8 x i64> %vec.ind.next, %broadcast.splat - %33 = trunc <8 x i64> %32 to <8 x i32> - %34 = icmp sgt <8 x i32> %broadcast.splat14, %33 - %35 = extractelement <8 x i32> %33, i32 0 - %36 = mul nsw i32 %35, %5 - %37 = add nsw i32 %36, %3 - %38 = sext i32 %37 to i64 - %39 = getelementptr inbounds float, float* %0, i64 %38 - %40 = bitcast float* %39 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %40, i32 4, <8 x i1> %34, <8 x float> undef), !tbaa !12, !alias.scope !16 - %wide.masked.gather.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat16, i32 4, <8 x i1> %34, <8 x float> undef), !tbaa !12, !alias.scope !19, !noalias !21 - %41 = fdiv <8 x float> %wide.masked.load.1, %wide.masked.gather.1, !fpmath !23 - %42 = getelementptr inbounds float, float* %2, i64 %38 - %43 = bitcast float* %42 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %41, <8 x float>* %43, i32 4, <8 x i1> %34), !tbaa !12, !alias.scope !21, !noalias !16, !llvm.access.group !24 - %index.next.1 = add nuw nsw i64 %index, 16 - %vec.ind.next.1 = add <8 x i64> %vec.ind, - %44 = icmp eq i64 %index.next.1, 256 - br i1 %44, label %gramschmidt_kernel2.exit.loopexit18, label %vector.body, !llvm.loop !26 - -pregion_for_entry.entry.i: ; preds = %if.end.r_exit.i.1, %pregion_for_entry.entry.i.preheader - %_local_id_x.0 = phi i64 [ 0, %pregion_for_entry.entry.i.preheader ], [ %50, %if.end.r_exit.i.1 ] - %add1.i.i = add nuw nsw i64 %_local_id_x.0, %mul.i.i - %conv.i = trunc i64 %add1.i.i to i32 - %cmp.i = icmp slt i32 %conv.i, %4 - br i1 %cmp.i, label %if.then.i, label %if.end.r_exit.i - -if.then.i: ; preds = %pregion_for_entry.entry.i - %mul.i = mul nsw i32 %conv.i, %5 - %add.i = add nsw i32 %mul.i, %3 - %idxprom.i = sext i32 %add.i to i64 - %arrayidx.i = getelementptr inbounds float, float* %0, i64 %idxprom.i - %45 = load float, float* %arrayidx.i, align 4, !tbaa !12 - %46 = load float, float* %arrayidx5.i, align 4, !tbaa !12 - %div.i = fdiv float %45, %46, !fpmath !23 - %arrayidx9.i = getelementptr inbounds float, float* %2, i64 %idxprom.i - store float %div.i, float* %arrayidx9.i, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i - -if.end.r_exit.i: ; preds = %if.then.i, %pregion_for_entry.entry.i - %47 = or i64 %_local_id_x.0, 1 - %add1.i.i.1 = add nuw nsw i64 %47, %mul.i.i - %conv.i.1 = trunc i64 %add1.i.i.1 to i32 - %cmp.i.1 = icmp slt i32 %conv.i.1, %4 - br i1 %cmp.i.1, label %if.then.i.1, label %if.end.r_exit.i.1 - -gramschmidt_kernel2.exit.loopexit: ; preds = %if.end.r_exit.i.1 - br label %gramschmidt_kernel2.exit - -gramschmidt_kernel2.exit.loopexit18: ; preds = %vector.body - br label %gramschmidt_kernel2.exit - -gramschmidt_kernel2.exit: ; preds = %gramschmidt_kernel2.exit.loopexit18, %gramschmidt_kernel2.exit.loopexit - ret void - -if.then.i.1: ; preds = %if.end.r_exit.i - %mul.i.1 = mul nsw i32 %conv.i.1, %5 - %add.i.1 = add nsw i32 %mul.i.1, %3 - %idxprom.i.1 = sext i32 %add.i.1 to i64 - %arrayidx.i.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.1 - %48 = load float, float* %arrayidx.i.1, align 4, !tbaa !12 - %49 = load float, float* %arrayidx5.i, align 4, !tbaa !12 - %div.i.1 = fdiv float %48, %49, !fpmath !23 - %arrayidx9.i.1 = getelementptr inbounds float, float* %2, i64 %idxprom.i.1 - store float %div.i.1, float* %arrayidx9.i.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.1 - -if.end.r_exit.i.1: ; preds = %if.then.i.1, %if.end.r_exit.i - %50 = add nuw nsw i64 %_local_id_x.0, 2 - %exitcond.not.1 = icmp eq i64 %50, 256 - br i1 %exitcond.not.1, label %gramschmidt_kernel2.exit.loopexit, label %pregion_for_entry.entry.i, !llvm.loop !29 -} - -; Function Attrs: nofree norecurse nounwind -define void @_pocl_kernel_gramschmidt_kernel2_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { -vector.scevcheck: - %5 = bitcast i8** %0 to float*** - %6 = load float**, float*** %5, align 8 - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float*** - %10 = load float**, float*** %9, align 8 - %11 = load float*, float** %10, align 8 - %12 = getelementptr i8*, i8** %0, i64 2 - %13 = bitcast i8** %12 to float*** - %14 = load float**, float*** %13, align 8 - %15 = load float*, float** %14, align 8 - %16 = getelementptr i8*, i8** %0, i64 3 - %17 = bitcast i8** %16 to i32** - %18 = load i32*, i32** %17, align 8 - %19 = load i32, i32* %18, align 4 - %20 = getelementptr i8*, i8** %0, i64 4 - %21 = bitcast i8** %20 to i32** - %22 = load i32*, i32** %21, align 8 - %23 = load i32, i32* %22, align 4 - %24 = getelementptr i8*, i8** %0, i64 5 - %25 = bitcast i8** %24 to i32** - %26 = load i32*, i32** %25, align 8 - %27 = load i32, i32* %26, align 4 - %mul.i.i.i = shl i64 %2, 8 - %mul2.i.i = mul nsw i32 %27, %19 - %add3.i.i = add nsw i32 %mul2.i.i, %19 - %idxprom4.i.i = sext i32 %add3.i.i to i64 - %arrayidx5.i.i = getelementptr inbounds float, float* %11, i64 %idxprom4.i.i - %ident.check = icmp ne i32 %27, 1 - %28 = trunc i64 %2 to i32 - %29 = shl i32 %28, 8 - %30 = add i32 %19, %29 - %31 = icmp sgt i32 %30, 2147483392 - %32 = or i1 %ident.check, %31 - br i1 %32, label %pregion_for_entry.entry.i.i.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i - -vector.memcheck: ; preds = %vector.scevcheck - %scevgep = getelementptr float, float* %11, i64 %idxprom4.i.i - %scevgep1 = bitcast float* %scevgep to i8* - %uglygep = getelementptr i8, i8* %scevgep1, i64 1 - %33 = trunc i64 %2 to i32 - %34 = shl i32 %33, 8 - %35 = add i32 %19, %34 - %36 = sext i32 %35 to i64 - %scevgep2 = getelementptr float, float* %15, i64 %36 - %scevgep23 = bitcast float* %scevgep2 to i8* - %37 = add nsw i64 %36, 256 - %scevgep4 = getelementptr float, float* %15, i64 %37 - %scevgep6 = getelementptr float, float* %7, i64 %36 - %scevgep8 = getelementptr float, float* %7, i64 %37 - %bound0 = icmp ult float* %arrayidx5.i.i, %scevgep4 - %bound1 = icmp ugt i8* %uglygep, %scevgep23 - %found.conflict = and i1 %bound0, %bound1 - %bound010 = icmp ult float* %scevgep2, %scevgep8 - %bound111 = icmp ult float* %scevgep6, %scevgep4 - %found.conflict12 = and i1 %bound010, %bound111 - %conflict.rdx = or i1 %found.conflict, %found.conflict12 - br i1 %conflict.rdx, label %pregion_for_entry.entry.i.i.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert13 = insertelement <8 x i32> undef, i32 %23, i32 0 - %broadcast.splat14 = shufflevector <8 x i32> %broadcast.splatinsert13, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert15 = insertelement <8 x float*> undef, float* %arrayidx5.i.i, i32 0 - %broadcast.splat16 = shufflevector <8 x float*> %broadcast.splatinsert15, <8 x float*> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next.1, %vector.body ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next.1, %vector.body ] - %38 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %39 = trunc <8 x i64> %38 to <8 x i32> - %40 = icmp sgt <8 x i32> %broadcast.splat14, %39 - %41 = extractelement <8 x i32> %39, i32 0 - %42 = mul nsw i32 %27, %41 - %43 = add nsw i32 %42, %19 - %44 = sext i32 %43 to i64 - %45 = getelementptr inbounds float, float* %7, i64 %44 - %46 = bitcast float* %45 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %46, i32 4, <8 x i1> %40, <8 x float> undef), !tbaa !12, !alias.scope !30 - %wide.masked.gather = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat16, i32 4, <8 x i1> %40, <8 x float> undef), !tbaa !12, !alias.scope !33, !noalias !35 - %47 = fdiv <8 x float> %wide.masked.load, %wide.masked.gather, !fpmath !23 - %48 = getelementptr inbounds float, float* %15, i64 %44 - %49 = bitcast float* %48 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %47, <8 x float>* %49, i32 4, <8 x i1> %40), !tbaa !12, !alias.scope !35, !noalias !30, !llvm.access.group !24 - %vec.ind.next = add <8 x i64> %vec.ind, - %50 = add nuw nsw <8 x i64> %vec.ind.next, %broadcast.splat - %51 = trunc <8 x i64> %50 to <8 x i32> - %52 = icmp sgt <8 x i32> %broadcast.splat14, %51 - %53 = extractelement <8 x i32> %51, i32 0 - %54 = mul nsw i32 %27, %53 - %55 = add nsw i32 %54, %19 - %56 = sext i32 %55 to i64 - %57 = getelementptr inbounds float, float* %7, i64 %56 - %58 = bitcast float* %57 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %58, i32 4, <8 x i1> %52, <8 x float> undef), !tbaa !12, !alias.scope !30 - %wide.masked.gather.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat16, i32 4, <8 x i1> %52, <8 x float> undef), !tbaa !12, !alias.scope !33, !noalias !35 - %59 = fdiv <8 x float> %wide.masked.load.1, %wide.masked.gather.1, !fpmath !23 - %60 = getelementptr inbounds float, float* %15, i64 %56 - %61 = bitcast float* %60 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %59, <8 x float>* %61, i32 4, <8 x i1> %52), !tbaa !12, !alias.scope !35, !noalias !30, !llvm.access.group !24 - %index.next.1 = add nuw nsw i64 %index, 16 - %vec.ind.next.1 = add <8 x i64> %vec.ind, - %62 = icmp eq i64 %index.next.1, 256 - br i1 %62, label %_pocl_kernel_gramschmidt_kernel2.exit.loopexit18, label %vector.body, !llvm.loop !37 - -pregion_for_entry.entry.i.i: ; preds = %if.end.r_exit.i.i.1, %pregion_for_entry.entry.i.i.preheader - %_local_id_x.i.0 = phi i64 [ 0, %pregion_for_entry.entry.i.i.preheader ], [ %68, %if.end.r_exit.i.i.1 ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %conv.i.i = trunc i64 %add1.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %23, %conv.i.i - br i1 %cmp.i.i, label %if.then.i.i, label %if.end.r_exit.i.i - -if.then.i.i: ; preds = %pregion_for_entry.entry.i.i - %mul.i.i = mul nsw i32 %27, %conv.i.i - %add.i.i = add nsw i32 %mul.i.i, %19 - %idxprom.i.i = sext i32 %add.i.i to i64 - %arrayidx.i.i = getelementptr inbounds float, float* %7, i64 %idxprom.i.i - %63 = load float, float* %arrayidx.i.i, align 4, !tbaa !12 - %64 = load float, float* %arrayidx5.i.i, align 4, !tbaa !12 - %div.i.i = fdiv float %63, %64, !fpmath !23 - %arrayidx9.i.i = getelementptr inbounds float, float* %15, i64 %idxprom.i.i - store float %div.i.i, float* %arrayidx9.i.i, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i - -if.end.r_exit.i.i: ; preds = %if.then.i.i, %pregion_for_entry.entry.i.i - %65 = or i64 %_local_id_x.i.0, 1 - %add1.i.i.i.1 = add nuw nsw i64 %65, %mul.i.i.i - %conv.i.i.1 = trunc i64 %add1.i.i.i.1 to i32 - %cmp.i.i.1 = icmp sgt i32 %23, %conv.i.i.1 - br i1 %cmp.i.i.1, label %if.then.i.i.1, label %if.end.r_exit.i.i.1 - -_pocl_kernel_gramschmidt_kernel2.exit.loopexit: ; preds = %if.end.r_exit.i.i.1 - br label %_pocl_kernel_gramschmidt_kernel2.exit - -_pocl_kernel_gramschmidt_kernel2.exit.loopexit18: ; preds = %vector.body - br label %_pocl_kernel_gramschmidt_kernel2.exit - -_pocl_kernel_gramschmidt_kernel2.exit: ; preds = %_pocl_kernel_gramschmidt_kernel2.exit.loopexit18, %_pocl_kernel_gramschmidt_kernel2.exit.loopexit - ret void - -if.then.i.i.1: ; preds = %if.end.r_exit.i.i - %mul.i.i.1 = mul nsw i32 %27, %conv.i.i.1 - %add.i.i.1 = add nsw i32 %mul.i.i.1, %19 - %idxprom.i.i.1 = sext i32 %add.i.i.1 to i64 - %arrayidx.i.i.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.1 - %66 = load float, float* %arrayidx.i.i.1, align 4, !tbaa !12 - %67 = load float, float* %arrayidx5.i.i, align 4, !tbaa !12 - %div.i.i.1 = fdiv float %66, %67, !fpmath !23 - %arrayidx9.i.i.1 = getelementptr inbounds float, float* %15, i64 %idxprom.i.i.1 - store float %div.i.i.1, float* %arrayidx9.i.i.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.1 - -if.end.r_exit.i.i.1: ; preds = %if.then.i.i.1, %if.end.r_exit.i.i - %68 = add nuw nsw i64 %_local_id_x.i.0, 2 - %exitcond.not.1 = icmp eq i64 %68, 256 - br i1 %exitcond.not.1, label %_pocl_kernel_gramschmidt_kernel2.exit.loopexit, label %pregion_for_entry.entry.i.i, !llvm.loop !38 -} - -; Function Attrs: nofree norecurse nounwind -define void @_pocl_kernel_gramschmidt_kernel2_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { -vector.scevcheck: - %5 = bitcast i8** %0 to float** - %6 = load float*, float** %5, align 8 - %7 = getelementptr i8*, i8** %0, i64 1 - %8 = bitcast i8** %7 to float** - %9 = load float*, float** %8, align 8 - %10 = getelementptr i8*, i8** %0, i64 2 - %11 = bitcast i8** %10 to float** - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 3 - %14 = bitcast i8** %13 to i32** - %15 = load i32*, i32** %14, align 8 - %16 = load i32, i32* %15, align 4 - %17 = getelementptr i8*, i8** %0, i64 4 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %21 = getelementptr i8*, i8** %0, i64 5 - %22 = bitcast i8** %21 to i32** - %23 = load i32*, i32** %22, align 8 - %24 = load i32, i32* %23, align 4 - %mul.i.i.i = shl i64 %2, 8 - %mul2.i.i = mul nsw i32 %24, %16 - %add3.i.i = add nsw i32 %mul2.i.i, %16 - %idxprom4.i.i = sext i32 %add3.i.i to i64 - %arrayidx5.i.i = getelementptr inbounds float, float* %9, i64 %idxprom4.i.i - %ident.check = icmp ne i32 %24, 1 - %25 = trunc i64 %2 to i32 - %26 = shl i32 %25, 8 - %27 = add i32 %16, %26 - %28 = icmp sgt i32 %27, 2147483392 - %29 = or i1 %ident.check, %28 - br i1 %29, label %pregion_for_entry.entry.i.i.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i - -vector.memcheck: ; preds = %vector.scevcheck - %scevgep = getelementptr float, float* %9, i64 %idxprom4.i.i - %scevgep1 = bitcast float* %scevgep to i8* - %uglygep = getelementptr i8, i8* %scevgep1, i64 1 - %30 = trunc i64 %2 to i32 - %31 = shl i32 %30, 8 - %32 = add i32 %16, %31 - %33 = sext i32 %32 to i64 - %scevgep2 = getelementptr float, float* %12, i64 %33 - %scevgep23 = bitcast float* %scevgep2 to i8* - %34 = add nsw i64 %33, 256 - %scevgep4 = getelementptr float, float* %12, i64 %34 - %scevgep6 = getelementptr float, float* %6, i64 %33 - %scevgep8 = getelementptr float, float* %6, i64 %34 - %bound0 = icmp ult float* %arrayidx5.i.i, %scevgep4 - %bound1 = icmp ugt i8* %uglygep, %scevgep23 - %found.conflict = and i1 %bound0, %bound1 - %bound010 = icmp ult float* %scevgep2, %scevgep8 - %bound111 = icmp ult float* %scevgep6, %scevgep4 - %found.conflict12 = and i1 %bound010, %bound111 - %conflict.rdx = or i1 %found.conflict, %found.conflict12 - br i1 %conflict.rdx, label %pregion_for_entry.entry.i.i.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert13 = insertelement <8 x i32> undef, i32 %20, i32 0 - %broadcast.splat14 = shufflevector <8 x i32> %broadcast.splatinsert13, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert15 = insertelement <8 x float*> undef, float* %arrayidx5.i.i, i32 0 - %broadcast.splat16 = shufflevector <8 x float*> %broadcast.splatinsert15, <8 x float*> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next.1, %vector.body ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next.1, %vector.body ] - %35 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %36 = trunc <8 x i64> %35 to <8 x i32> - %37 = icmp sgt <8 x i32> %broadcast.splat14, %36 - %38 = extractelement <8 x i32> %36, i32 0 - %39 = mul nsw i32 %24, %38 - %40 = add nsw i32 %39, %16 - %41 = sext i32 %40 to i64 - %42 = getelementptr inbounds float, float* %6, i64 %41 - %43 = bitcast float* %42 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %43, i32 4, <8 x i1> %37, <8 x float> undef), !tbaa !12, !alias.scope !39 - %wide.masked.gather = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat16, i32 4, <8 x i1> %37, <8 x float> undef), !tbaa !12, !alias.scope !42, !noalias !44 - %44 = fdiv <8 x float> %wide.masked.load, %wide.masked.gather, !fpmath !23 - %45 = getelementptr inbounds float, float* %12, i64 %41 - %46 = bitcast float* %45 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %44, <8 x float>* %46, i32 4, <8 x i1> %37), !tbaa !12, !alias.scope !44, !noalias !39, !llvm.access.group !24 - %vec.ind.next = add <8 x i64> %vec.ind, - %47 = add nuw nsw <8 x i64> %vec.ind.next, %broadcast.splat - %48 = trunc <8 x i64> %47 to <8 x i32> - %49 = icmp sgt <8 x i32> %broadcast.splat14, %48 - %50 = extractelement <8 x i32> %48, i32 0 - %51 = mul nsw i32 %24, %50 - %52 = add nsw i32 %51, %16 - %53 = sext i32 %52 to i64 - %54 = getelementptr inbounds float, float* %6, i64 %53 - %55 = bitcast float* %54 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %55, i32 4, <8 x i1> %49, <8 x float> undef), !tbaa !12, !alias.scope !39 - %wide.masked.gather.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat16, i32 4, <8 x i1> %49, <8 x float> undef), !tbaa !12, !alias.scope !42, !noalias !44 - %56 = fdiv <8 x float> %wide.masked.load.1, %wide.masked.gather.1, !fpmath !23 - %57 = getelementptr inbounds float, float* %12, i64 %53 - %58 = bitcast float* %57 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %56, <8 x float>* %58, i32 4, <8 x i1> %49), !tbaa !12, !alias.scope !44, !noalias !39, !llvm.access.group !24 - %index.next.1 = add nuw nsw i64 %index, 16 - %vec.ind.next.1 = add <8 x i64> %vec.ind, - %59 = icmp eq i64 %index.next.1, 256 - br i1 %59, label %_pocl_kernel_gramschmidt_kernel2.exit.loopexit18, label %vector.body, !llvm.loop !46 - -pregion_for_entry.entry.i.i: ; preds = %if.end.r_exit.i.i.1, %pregion_for_entry.entry.i.i.preheader - %_local_id_x.i.0 = phi i64 [ 0, %pregion_for_entry.entry.i.i.preheader ], [ %65, %if.end.r_exit.i.i.1 ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %conv.i.i = trunc i64 %add1.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %20, %conv.i.i - br i1 %cmp.i.i, label %if.then.i.i, label %if.end.r_exit.i.i - -if.then.i.i: ; preds = %pregion_for_entry.entry.i.i - %mul.i.i = mul nsw i32 %24, %conv.i.i - %add.i.i = add nsw i32 %mul.i.i, %16 - %idxprom.i.i = sext i32 %add.i.i to i64 - %arrayidx.i.i = getelementptr inbounds float, float* %6, i64 %idxprom.i.i - %60 = load float, float* %arrayidx.i.i, align 4, !tbaa !12 - %61 = load float, float* %arrayidx5.i.i, align 4, !tbaa !12 - %div.i.i = fdiv float %60, %61, !fpmath !23 - %arrayidx9.i.i = getelementptr inbounds float, float* %12, i64 %idxprom.i.i - store float %div.i.i, float* %arrayidx9.i.i, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i - -if.end.r_exit.i.i: ; preds = %if.then.i.i, %pregion_for_entry.entry.i.i - %62 = or i64 %_local_id_x.i.0, 1 - %add1.i.i.i.1 = add nuw nsw i64 %62, %mul.i.i.i - %conv.i.i.1 = trunc i64 %add1.i.i.i.1 to i32 - %cmp.i.i.1 = icmp sgt i32 %20, %conv.i.i.1 - br i1 %cmp.i.i.1, label %if.then.i.i.1, label %if.end.r_exit.i.i.1 - -_pocl_kernel_gramschmidt_kernel2.exit.loopexit: ; preds = %if.end.r_exit.i.i.1 - br label %_pocl_kernel_gramschmidt_kernel2.exit - -_pocl_kernel_gramschmidt_kernel2.exit.loopexit18: ; preds = %vector.body - br label %_pocl_kernel_gramschmidt_kernel2.exit - -_pocl_kernel_gramschmidt_kernel2.exit: ; preds = %_pocl_kernel_gramschmidt_kernel2.exit.loopexit18, %_pocl_kernel_gramschmidt_kernel2.exit.loopexit - ret void - -if.then.i.i.1: ; preds = %if.end.r_exit.i.i - %mul.i.i.1 = mul nsw i32 %24, %conv.i.i.1 - %add.i.i.1 = add nsw i32 %mul.i.i.1, %16 - %idxprom.i.i.1 = sext i32 %add.i.i.1 to i64 - %arrayidx.i.i.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.1 - %63 = load float, float* %arrayidx.i.i.1, align 4, !tbaa !12 - %64 = load float, float* %arrayidx5.i.i, align 4, !tbaa !12 - %div.i.i.1 = fdiv float %63, %64, !fpmath !23 - %arrayidx9.i.i.1 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.1 - store float %div.i.i.1, float* %arrayidx9.i.i.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.1 - -if.end.r_exit.i.i.1: ; preds = %if.then.i.i.1, %if.end.r_exit.i.i - %65 = add nuw nsw i64 %_local_id_x.i.0, 2 - %exitcond.not.1 = icmp eq i64 %65, 256 - br i1 %exitcond.not.1, label %_pocl_kernel_gramschmidt_kernel2.exit.loopexit, label %pregion_for_entry.entry.i.i, !llvm.loop !47 -} - -; Function Attrs: argmemonly nounwind readonly willreturn -declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32 immarg, <8 x i1>, <8 x float>) #2 - -; Function Attrs: nounwind readonly willreturn -declare <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*>, i32 immarg, <8 x i1>, <8 x float>) #3 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>) #4 - -attributes #0 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nofree norecurse nounwind } -attributes #2 = { argmemonly nounwind readonly willreturn } -attributes #3 = { nounwind readonly willreturn } -attributes #4 = { argmemonly nounwind willreturn } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"int", !"int", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"int", !"int", !"int"} -!9 = !{!"", !"", !"", !"", !"", !""} -!10 = !{!"a", !"r", !"q", !"k", !"ni", !"nj"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{!17, !18} -!18 = distinct !{!18, !"LVerDomain"} -!19 = !{!20} -!20 = distinct !{!20, !18} -!21 = !{!22} -!22 = distinct !{!22, !18} -!23 = !{float 2.500000e+00} -!24 = !{!25} -!25 = distinct !{} -!26 = distinct !{!26, !27, !28} -!27 = !{!"llvm.loop.parallel_accesses", !25} -!28 = !{!"llvm.loop.isvectorized", i32 1} -!29 = distinct !{!29, !27, !28} -!30 = !{!31} -!31 = distinct !{!31, !32} -!32 = distinct !{!32, !"LVerDomain"} -!33 = !{!34} -!34 = distinct !{!34, !32} -!35 = !{!36} -!36 = distinct !{!36, !32} -!37 = distinct !{!37, !27, !28} -!38 = distinct !{!38, !27, !28} -!39 = !{!40} -!40 = distinct !{!40, !41} -!41 = distinct !{!41, !"LVerDomain"} -!42 = !{!43} -!43 = distinct !{!43, !41} -!44 = !{!45} -!45 = distinct !{!45, !41} -!46 = distinct !{!46, !27, !28} -!47 = distinct !{!47, !27, !28} diff --git a/pocl_irs/gramschmidt_kernel3.ll b/pocl_irs/gramschmidt_kernel3.ll deleted file mode 100644 index bf5933e..0000000 --- a/pocl_irs/gramschmidt_kernel3.ll +++ /dev/null @@ -1,590 +0,0 @@ -; ModuleID = './KJ/PLAOGDMDEAHKMIGEICILPNJNPJGMEGIKFDLHM/gramschmidt_kernel3/256-1-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <8 x float> @llvm.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_gramschmidt_kernel3(float* nocapture %0, float* nocapture %1, float* nocapture readonly %2, i32 %3, i32 %4, i32 %5, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %6, i64 %7, i64 %8, i64 %9) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %7, 8 - %add.i = add nsw i32 %3, 1 - %mul.i = mul nsw i32 %5, %3 - %cmp574.i = icmp sgt i32 %4, 0 - %11 = sext i32 %5 to i64 - %12 = sext i32 %3 to i64 - %wide.trip.count84.i = zext i32 %4 to i64 - %arrayidx2786.i = getelementptr inbounds float, float* %2, i64 %12 - %exitcond.not89.i = icmp eq i32 %4, 1 - %13 = add nsw i64 %wide.trip.count84.i, -1 - %min.iters.check.i = icmp ugt i64 %13, 15 - %ident.check.not.i = icmp eq i32 %5, 1 - %or.cond.i = and i1 %ident.check.not.i, %min.iters.check.i - %14 = add nsw i64 %12, 1 - %scevgep6.i = getelementptr float, float* %2, i64 %14 - %15 = add nsw i64 %wide.trip.count84.i, %12 - %scevgep8.i = getelementptr float, float* %2, i64 %15 - %n.vec.i = and i64 %13, -16 - %ind.end.i = or i64 %n.vec.i, 1 - %cmp.n.i = icmp eq i64 %13, %n.vec.i - br label %pregion_for_entry.entry.i - -pregion_for_entry.entry.i: ; preds = %if.end.i, %10 - %_local_id_x.0 = phi i64 [ 0, %10 ], [ %55, %if.end.i ] - %add1.i.i = add nuw nsw i64 %_local_id_x.0, %mul.i.i - %16 = trunc i64 %add1.i.i to i32 - %conv2.i = add i32 %add.i, %16 - %cmp.i = icmp slt i32 %conv2.i, %5 - br i1 %cmp.i, label %if.then.i, label %if.end.i - -if.then.i: ; preds = %pregion_for_entry.entry.i - %add4.i = add nsw i32 %conv2.i, %mul.i - %idxprom.i = sext i32 %add4.i to i64 - %arrayidx.i = getelementptr float, float* %1, i64 %idxprom.i - store float 0.000000e+00, float* %arrayidx.i, align 4, !tbaa !12, !llvm.access.group !16 - br i1 %cmp574.i, label %for.body.preheader.i, label %if.end.i - -for.body.preheader.i: ; preds = %if.then.i - %17 = sext i32 %conv2.i to i64 - br label %for.body.i - -for.body23.preheader.i: ; preds = %for.body.i - %.lcssa = phi float [ %48, %for.body.i ] - %18 = load float, float* %arrayidx2786.i, align 4, !tbaa !12 - %arrayidx3687.i = getelementptr inbounds float, float* %0, i64 %17 - %19 = load float, float* %arrayidx3687.i, align 4, !tbaa !12 - %neg88.i = fneg float %18 - %20 = tail call float @llvm.fmuladd.f32(float %neg88.i, float %.lcssa, float %19) #2 - store float %20, float* %arrayidx3687.i, align 4, !tbaa !12, !llvm.access.group !16 - br i1 %exitcond.not89.i, label %if.end.i, label %for.body23.for.body23_crit_edge.preheader.i, !llvm.loop !18 - -for.body23.for.body23_crit_edge.preheader.i: ; preds = %for.body23.preheader.i - br i1 %or.cond.i, label %vector.memcheck.i, label %for.body23.for.body23_crit_edge.i.preheader - -for.body23.for.body23_crit_edge.i.preheader: ; preds = %middle.block.i, %vector.memcheck.i, %for.body23.for.body23_crit_edge.preheader.i - %indvars.iv.next.i9.ph = phi i64 [ 1, %for.body23.for.body23_crit_edge.preheader.i ], [ 1, %vector.memcheck.i ], [ %ind.end.i, %middle.block.i ] - br label %for.body23.for.body23_crit_edge.i - -vector.memcheck.i: ; preds = %for.body23.for.body23_crit_edge.preheader.i - %21 = add nsw i64 %17, 1 - %scevgep.i = getelementptr float, float* %0, i64 %21 - %scevgep1.i = bitcast float* %scevgep.i to i8* - %22 = add nsw i64 %17, %wide.trip.count84.i - %scevgep2.i = getelementptr float, float* %0, i64 %22 - %scevgep45.i = bitcast float* %arrayidx.i to i8* - %uglygep.i = getelementptr i8, i8* %scevgep45.i, i64 1 - %bound0.i = icmp ugt i8* %uglygep.i, %scevgep1.i - %bound1.i = icmp ult float* %arrayidx.i, %scevgep2.i - %found.conflict.i = and i1 %bound1.i, %bound0.i - %bound010.i = icmp ult float* %scevgep.i, %scevgep8.i - %bound111.i = icmp ult float* %scevgep6.i, %scevgep2.i - %found.conflict12.i = and i1 %bound010.i, %bound111.i - %conflict.rdx.i = or i1 %found.conflict12.i, %found.conflict.i - br i1 %conflict.rdx.i, label %for.body23.for.body23_crit_edge.i.preheader, label %vector.ph.i - -vector.ph.i: ; preds = %vector.memcheck.i - %23 = load float, float* %arrayidx.i, align 4, !tbaa !12, !alias.scope !20 - %24 = insertelement <8 x float> undef, float %23, i32 0 - %25 = shufflevector <8 x float> %24, <8 x float> undef, <8 x i32> zeroinitializer - br label %vector.body.i - -vector.body.i: ; preds = %vector.body.i, %vector.ph.i - %index.next.i7 = phi i64 [ %index.next.i, %vector.body.i ], [ 0, %vector.ph.i ] - %offset.idx.i = or i64 %index.next.i7, 1 - %26 = mul nsw i64 %offset.idx.i, %11 - %27 = add nsw i64 %26, %12 - %28 = getelementptr inbounds float, float* %2, i64 %27 - %29 = bitcast float* %28 to <8 x float>* - %wide.load.i = load <8 x float>, <8 x float>* %29, align 4, !tbaa !12, !alias.scope !23 - %30 = getelementptr inbounds float, float* %28, i64 8 - %31 = bitcast float* %30 to <8 x float>* - %wide.load13.i = load <8 x float>, <8 x float>* %31, align 4, !tbaa !12, !alias.scope !23 - %32 = add nsw i64 %26, %17 - %33 = getelementptr inbounds float, float* %0, i64 %32 - %34 = bitcast float* %33 to <8 x float>* - %wide.load14.i = load <8 x float>, <8 x float>* %34, align 4, !tbaa !12, !alias.scope !25, !noalias !27 - %35 = getelementptr inbounds float, float* %33, i64 8 - %36 = bitcast float* %35 to <8 x float>* - %wide.load15.i = load <8 x float>, <8 x float>* %36, align 4, !tbaa !12, !alias.scope !25, !noalias !27 - %37 = fneg <8 x float> %wide.load.i - %38 = fneg <8 x float> %wide.load13.i - %39 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %37, <8 x float> %25, <8 x float> %wide.load14.i) #2 - %40 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %38, <8 x float> %25, <8 x float> %wide.load15.i) #2 - store <8 x float> %39, <8 x float>* %34, align 4, !tbaa !12, !alias.scope !25, !noalias !27, !llvm.access.group !16 - store <8 x float> %40, <8 x float>* %36, align 4, !tbaa !12, !alias.scope !25, !noalias !27, !llvm.access.group !16 - %index.next.i = add i64 %index.next.i7, 16 - %41 = icmp eq i64 %index.next.i, %n.vec.i - br i1 %41, label %middle.block.i, label %vector.body.i, !llvm.loop !28 - -middle.block.i: ; preds = %vector.body.i - br i1 %cmp.n.i, label %if.end.i, label %for.body23.for.body23_crit_edge.i.preheader - -for.body.i: ; preds = %for.body.i, %for.body.preheader.i - %indvars.iv.next80.i5 = phi i64 [ %indvars.iv.next80.i, %for.body.i ], [ 0, %for.body.preheader.i ] - %42 = phi float [ %48, %for.body.i ], [ 0.000000e+00, %for.body.preheader.i ] - %43 = mul nsw i64 %indvars.iv.next80.i5, %11 - %44 = add nsw i64 %43, %12 - %arrayidx10.i = getelementptr inbounds float, float* %2, i64 %44 - %45 = load float, float* %arrayidx10.i, align 4, !tbaa !12 - %46 = add nsw i64 %43, %17 - %arrayidx14.i = getelementptr inbounds float, float* %0, i64 %46 - %47 = load float, float* %arrayidx14.i, align 4, !tbaa !12 - %48 = tail call float @llvm.fmuladd.f32(float %45, float %47, float %42) #2 - store float %48, float* %arrayidx.i, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next80.i = add nuw nsw i64 %indvars.iv.next80.i5, 1 - %exitcond85.not.i = icmp eq i64 %indvars.iv.next80.i, %wide.trip.count84.i - br i1 %exitcond85.not.i, label %for.body23.preheader.i, label %for.body.i, !llvm.loop !30 - -for.body23.for.body23_crit_edge.i: ; preds = %for.body23.for.body23_crit_edge.i, %for.body23.for.body23_crit_edge.i.preheader - %indvars.iv.next.i9 = phi i64 [ %indvars.iv.next.i, %for.body23.for.body23_crit_edge.i ], [ %indvars.iv.next.i9.ph, %for.body23.for.body23_crit_edge.i.preheader ] - %.pre.i = load float, float* %arrayidx.i, align 4, !tbaa !12 - %49 = mul nsw i64 %indvars.iv.next.i9, %11 - %50 = add nsw i64 %49, %12 - %arrayidx27.i = getelementptr inbounds float, float* %2, i64 %50 - %51 = load float, float* %arrayidx27.i, align 4, !tbaa !12 - %52 = add nsw i64 %49, %17 - %arrayidx36.i = getelementptr inbounds float, float* %0, i64 %52 - %53 = load float, float* %arrayidx36.i, align 4, !tbaa !12 - %neg.i = fneg float %51 - %54 = tail call float @llvm.fmuladd.f32(float %neg.i, float %.pre.i, float %53) #2 - store float %54, float* %arrayidx36.i, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i = add nuw nsw i64 %indvars.iv.next.i9, 1 - %exitcond.not.i = icmp eq i64 %indvars.iv.next.i, %wide.trip.count84.i - br i1 %exitcond.not.i, label %if.end.i.loopexit, label %for.body23.for.body23_crit_edge.i, !llvm.loop !31 - -if.end.i.loopexit: ; preds = %for.body23.for.body23_crit_edge.i - br label %if.end.i - -if.end.i: ; preds = %if.end.i.loopexit, %middle.block.i, %for.body23.preheader.i, %if.then.i, %pregion_for_entry.entry.i - %55 = add nuw nsw i64 %_local_id_x.0, 1 - %exitcond.not = icmp eq i64 %55, 256 - br i1 %exitcond.not, label %gramschmidt_kernel3.exit, label %pregion_for_entry.entry.i, !llvm.loop !32 - -gramschmidt_kernel3.exit: ; preds = %if.end.i - ret void -} - -; Function Attrs: nounwind -define void @_pocl_kernel_gramschmidt_kernel3_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to float*** - %15 = load float**, float*** %14, align 8 - %16 = load float*, float** %15, align 8 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %21 = getelementptr i8*, i8** %0, i64 4 - %22 = bitcast i8** %21 to i32** - %23 = load i32*, i32** %22, align 8 - %24 = load i32, i32* %23, align 4 - %25 = getelementptr i8*, i8** %0, i64 5 - %26 = bitcast i8** %25 to i32** - %27 = load i32*, i32** %26, align 8 - %28 = load i32, i32* %27, align 4 - %mul.i.i.i = shl i64 %2, 8 - %add.i.i = add nsw i32 %20, 1 - %mul.i.i = mul nsw i32 %28, %20 - %cmp574.i.i = icmp sgt i32 %24, 0 - %29 = sext i32 %28 to i64 - %30 = sext i32 %20 to i64 - %wide.trip.count84.i.i = zext i32 %24 to i64 - %arrayidx2786.i.i = getelementptr inbounds float, float* %16, i64 %30 - %exitcond.not89.i.i = icmp eq i32 %24, 1 - %31 = add nsw i64 %wide.trip.count84.i.i, -1 - %min.iters.check.i.i = icmp ugt i64 %31, 15 - %ident.check.not.i.i = icmp eq i32 %28, 1 - %or.cond.i.i = and i1 %min.iters.check.i.i, %ident.check.not.i.i - %32 = add nsw i64 %30, 1 - %scevgep6.i.i = getelementptr float, float* %16, i64 %32 - %33 = add nsw i64 %wide.trip.count84.i.i, %30 - %scevgep8.i.i = getelementptr float, float* %16, i64 %33 - %n.vec.i.i = and i64 %31, -16 - %ind.end.i.i = or i64 %n.vec.i.i, 1 - %cmp.n.i.i = icmp eq i64 %31, %n.vec.i.i - br label %pregion_for_entry.entry.i.i - -pregion_for_entry.entry.i.i: ; preds = %if.end.i.i, %5 - %_local_id_x.i.0 = phi i64 [ 0, %5 ], [ %73, %if.end.i.i ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %34 = trunc i64 %add1.i.i.i to i32 - %conv2.i.i = add i32 %add.i.i, %34 - %cmp.i.i = icmp slt i32 %conv2.i.i, %28 - br i1 %cmp.i.i, label %if.then.i.i, label %if.end.i.i - -if.then.i.i: ; preds = %pregion_for_entry.entry.i.i - %add4.i.i = add nsw i32 %conv2.i.i, %mul.i.i - %idxprom.i.i = sext i32 %add4.i.i to i64 - %arrayidx.i.i = getelementptr float, float* %12, i64 %idxprom.i.i - store float 0.000000e+00, float* %arrayidx.i.i, align 4, !tbaa !12, !llvm.access.group !16 - br i1 %cmp574.i.i, label %for.body.preheader.i.i, label %if.end.i.i - -for.body.preheader.i.i: ; preds = %if.then.i.i - %35 = sext i32 %conv2.i.i to i64 - br label %for.body.i.i - -for.body23.preheader.i.i: ; preds = %for.body.i.i - %.lcssa = phi float [ %66, %for.body.i.i ] - %36 = load float, float* %arrayidx2786.i.i, align 4, !tbaa !12 - %arrayidx3687.i.i = getelementptr inbounds float, float* %8, i64 %35 - %37 = load float, float* %arrayidx3687.i.i, align 4, !tbaa !12 - %neg88.i.i = fneg float %36 - %38 = tail call float @llvm.fmuladd.f32(float %neg88.i.i, float %.lcssa, float %37) #2 - store float %38, float* %arrayidx3687.i.i, align 4, !tbaa !12, !llvm.access.group !16 - br i1 %exitcond.not89.i.i, label %if.end.i.i, label %for.body23.for.body23_crit_edge.preheader.i.i, !llvm.loop !18 - -for.body23.for.body23_crit_edge.preheader.i.i: ; preds = %for.body23.preheader.i.i - br i1 %or.cond.i.i, label %vector.memcheck.i.i, label %for.body23.for.body23_crit_edge.i.i.preheader - -for.body23.for.body23_crit_edge.i.i.preheader: ; preds = %middle.block.i.i, %vector.memcheck.i.i, %for.body23.for.body23_crit_edge.preheader.i.i - %indvars.iv.next.i.i9.ph = phi i64 [ 1, %for.body23.for.body23_crit_edge.preheader.i.i ], [ 1, %vector.memcheck.i.i ], [ %ind.end.i.i, %middle.block.i.i ] - br label %for.body23.for.body23_crit_edge.i.i - -vector.memcheck.i.i: ; preds = %for.body23.for.body23_crit_edge.preheader.i.i - %39 = add nsw i64 %35, 1 - %scevgep.i.i = getelementptr float, float* %8, i64 %39 - %scevgep1.i.i = bitcast float* %scevgep.i.i to i8* - %40 = add nsw i64 %35, %wide.trip.count84.i.i - %scevgep2.i.i = getelementptr float, float* %8, i64 %40 - %scevgep45.i.i = bitcast float* %arrayidx.i.i to i8* - %uglygep.i.i = getelementptr i8, i8* %scevgep45.i.i, i64 1 - %bound0.i.i = icmp ugt i8* %uglygep.i.i, %scevgep1.i.i - %bound1.i.i = icmp ult float* %arrayidx.i.i, %scevgep2.i.i - %found.conflict.i.i = and i1 %bound1.i.i, %bound0.i.i - %bound010.i.i = icmp ult float* %scevgep.i.i, %scevgep8.i.i - %bound111.i.i = icmp ult float* %scevgep6.i.i, %scevgep2.i.i - %found.conflict12.i.i = and i1 %bound010.i.i, %bound111.i.i - %conflict.rdx.i.i = or i1 %found.conflict12.i.i, %found.conflict.i.i - br i1 %conflict.rdx.i.i, label %for.body23.for.body23_crit_edge.i.i.preheader, label %vector.ph.i.i - -vector.ph.i.i: ; preds = %vector.memcheck.i.i - %41 = load float, float* %arrayidx.i.i, align 4, !tbaa !12, !alias.scope !34 - %42 = insertelement <8 x float> undef, float %41, i32 0 - %43 = shufflevector <8 x float> %42, <8 x float> undef, <8 x i32> zeroinitializer - br label %vector.body.i.i - -vector.body.i.i: ; preds = %vector.body.i.i, %vector.ph.i.i - %index.next.i.i7 = phi i64 [ %index.next.i.i, %vector.body.i.i ], [ 0, %vector.ph.i.i ] - %offset.idx.i.i = or i64 %index.next.i.i7, 1 - %44 = mul nsw i64 %offset.idx.i.i, %29 - %45 = add nsw i64 %44, %30 - %46 = getelementptr inbounds float, float* %16, i64 %45 - %47 = bitcast float* %46 to <8 x float>* - %wide.load.i.i = load <8 x float>, <8 x float>* %47, align 4, !tbaa !12, !alias.scope !37 - %48 = getelementptr inbounds float, float* %46, i64 8 - %49 = bitcast float* %48 to <8 x float>* - %wide.load13.i.i = load <8 x float>, <8 x float>* %49, align 4, !tbaa !12, !alias.scope !37 - %50 = add nsw i64 %44, %35 - %51 = getelementptr inbounds float, float* %8, i64 %50 - %52 = bitcast float* %51 to <8 x float>* - %wide.load14.i.i = load <8 x float>, <8 x float>* %52, align 4, !tbaa !12, !alias.scope !39, !noalias !41 - %53 = getelementptr inbounds float, float* %51, i64 8 - %54 = bitcast float* %53 to <8 x float>* - %wide.load15.i.i = load <8 x float>, <8 x float>* %54, align 4, !tbaa !12, !alias.scope !39, !noalias !41 - %55 = fneg <8 x float> %wide.load.i.i - %56 = fneg <8 x float> %wide.load13.i.i - %57 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %55, <8 x float> %43, <8 x float> %wide.load14.i.i) #2 - %58 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %56, <8 x float> %43, <8 x float> %wide.load15.i.i) #2 - store <8 x float> %57, <8 x float>* %52, align 4, !tbaa !12, !alias.scope !39, !noalias !41, !llvm.access.group !16 - store <8 x float> %58, <8 x float>* %54, align 4, !tbaa !12, !alias.scope !39, !noalias !41, !llvm.access.group !16 - %index.next.i.i = add i64 %index.next.i.i7, 16 - %59 = icmp eq i64 %index.next.i.i, %n.vec.i.i - br i1 %59, label %middle.block.i.i, label %vector.body.i.i, !llvm.loop !28 - -middle.block.i.i: ; preds = %vector.body.i.i - br i1 %cmp.n.i.i, label %if.end.i.i, label %for.body23.for.body23_crit_edge.i.i.preheader - -for.body.i.i: ; preds = %for.body.i.i, %for.body.preheader.i.i - %indvars.iv.next80.i.i5 = phi i64 [ %indvars.iv.next80.i.i, %for.body.i.i ], [ 0, %for.body.preheader.i.i ] - %60 = phi float [ %66, %for.body.i.i ], [ 0.000000e+00, %for.body.preheader.i.i ] - %61 = mul nsw i64 %indvars.iv.next80.i.i5, %29 - %62 = add nsw i64 %61, %30 - %arrayidx10.i.i = getelementptr inbounds float, float* %16, i64 %62 - %63 = load float, float* %arrayidx10.i.i, align 4, !tbaa !12 - %64 = add nsw i64 %61, %35 - %arrayidx14.i.i = getelementptr inbounds float, float* %8, i64 %64 - %65 = load float, float* %arrayidx14.i.i, align 4, !tbaa !12 - %66 = tail call float @llvm.fmuladd.f32(float %63, float %65, float %60) #2 - store float %66, float* %arrayidx.i.i, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next80.i.i = add nuw nsw i64 %indvars.iv.next80.i.i5, 1 - %exitcond85.not.i.i = icmp eq i64 %indvars.iv.next80.i.i, %wide.trip.count84.i.i - br i1 %exitcond85.not.i.i, label %for.body23.preheader.i.i, label %for.body.i.i, !llvm.loop !30 - -for.body23.for.body23_crit_edge.i.i: ; preds = %for.body23.for.body23_crit_edge.i.i, %for.body23.for.body23_crit_edge.i.i.preheader - %indvars.iv.next.i.i9 = phi i64 [ %indvars.iv.next.i.i, %for.body23.for.body23_crit_edge.i.i ], [ %indvars.iv.next.i.i9.ph, %for.body23.for.body23_crit_edge.i.i.preheader ] - %.pre.i.i = load float, float* %arrayidx.i.i, align 4, !tbaa !12 - %67 = mul nsw i64 %indvars.iv.next.i.i9, %29 - %68 = add nsw i64 %67, %30 - %arrayidx27.i.i = getelementptr inbounds float, float* %16, i64 %68 - %69 = load float, float* %arrayidx27.i.i, align 4, !tbaa !12 - %70 = add nsw i64 %67, %35 - %arrayidx36.i.i = getelementptr inbounds float, float* %8, i64 %70 - %71 = load float, float* %arrayidx36.i.i, align 4, !tbaa !12 - %neg.i.i = fneg float %69 - %72 = tail call float @llvm.fmuladd.f32(float %neg.i.i, float %.pre.i.i, float %71) #2 - store float %72, float* %arrayidx36.i.i, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i = add nuw nsw i64 %indvars.iv.next.i.i9, 1 - %exitcond.not.i.i = icmp eq i64 %indvars.iv.next.i.i, %wide.trip.count84.i.i - br i1 %exitcond.not.i.i, label %if.end.i.i.loopexit, label %for.body23.for.body23_crit_edge.i.i, !llvm.loop !31 - -if.end.i.i.loopexit: ; preds = %for.body23.for.body23_crit_edge.i.i - br label %if.end.i.i - -if.end.i.i: ; preds = %if.end.i.i.loopexit, %middle.block.i.i, %for.body23.preheader.i.i, %if.then.i.i, %pregion_for_entry.entry.i.i - %73 = add nuw nsw i64 %_local_id_x.i.0, 1 - %exitcond.not = icmp eq i64 %73, 256 - br i1 %exitcond.not, label %_pocl_kernel_gramschmidt_kernel3.exit, label %pregion_for_entry.entry.i.i, !llvm.loop !32 - -_pocl_kernel_gramschmidt_kernel3.exit: ; preds = %if.end.i.i - ret void -} - -; Function Attrs: nounwind -define void @_pocl_kernel_gramschmidt_kernel3_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to float** - %13 = load float*, float** %12, align 8 - %14 = getelementptr i8*, i8** %0, i64 3 - %15 = bitcast i8** %14 to i32** - %16 = load i32*, i32** %15, align 8 - %17 = load i32, i32* %16, align 4 - %18 = getelementptr i8*, i8** %0, i64 4 - %19 = bitcast i8** %18 to i32** - %20 = load i32*, i32** %19, align 8 - %21 = load i32, i32* %20, align 4 - %22 = getelementptr i8*, i8** %0, i64 5 - %23 = bitcast i8** %22 to i32** - %24 = load i32*, i32** %23, align 8 - %25 = load i32, i32* %24, align 4 - %mul.i.i.i = shl i64 %2, 8 - %add.i.i = add nsw i32 %17, 1 - %mul.i.i = mul nsw i32 %25, %17 - %cmp574.i.i = icmp sgt i32 %21, 0 - %26 = sext i32 %25 to i64 - %27 = sext i32 %17 to i64 - %wide.trip.count84.i.i = zext i32 %21 to i64 - %arrayidx2786.i.i = getelementptr inbounds float, float* %13, i64 %27 - %exitcond.not89.i.i = icmp eq i32 %21, 1 - %28 = add nsw i64 %wide.trip.count84.i.i, -1 - %min.iters.check.i.i = icmp ugt i64 %28, 15 - %ident.check.not.i.i = icmp eq i32 %25, 1 - %or.cond.i.i = and i1 %min.iters.check.i.i, %ident.check.not.i.i - %29 = add nsw i64 %27, 1 - %scevgep6.i.i = getelementptr float, float* %13, i64 %29 - %30 = add nsw i64 %wide.trip.count84.i.i, %27 - %scevgep8.i.i = getelementptr float, float* %13, i64 %30 - %n.vec.i.i = and i64 %28, -16 - %ind.end.i.i = or i64 %n.vec.i.i, 1 - %cmp.n.i.i = icmp eq i64 %28, %n.vec.i.i - br label %pregion_for_entry.entry.i.i - -pregion_for_entry.entry.i.i: ; preds = %if.end.i.i, %5 - %_local_id_x.i.0 = phi i64 [ 0, %5 ], [ %70, %if.end.i.i ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %31 = trunc i64 %add1.i.i.i to i32 - %conv2.i.i = add i32 %add.i.i, %31 - %cmp.i.i = icmp slt i32 %conv2.i.i, %25 - br i1 %cmp.i.i, label %if.then.i.i, label %if.end.i.i - -if.then.i.i: ; preds = %pregion_for_entry.entry.i.i - %add4.i.i = add nsw i32 %conv2.i.i, %mul.i.i - %idxprom.i.i = sext i32 %add4.i.i to i64 - %arrayidx.i.i = getelementptr float, float* %10, i64 %idxprom.i.i - store float 0.000000e+00, float* %arrayidx.i.i, align 4, !tbaa !12, !llvm.access.group !16 - br i1 %cmp574.i.i, label %for.body.preheader.i.i, label %if.end.i.i - -for.body.preheader.i.i: ; preds = %if.then.i.i - %32 = sext i32 %conv2.i.i to i64 - br label %for.body.i.i - -for.body23.preheader.i.i: ; preds = %for.body.i.i - %.lcssa = phi float [ %63, %for.body.i.i ] - %33 = load float, float* %arrayidx2786.i.i, align 4, !tbaa !12 - %arrayidx3687.i.i = getelementptr inbounds float, float* %7, i64 %32 - %34 = load float, float* %arrayidx3687.i.i, align 4, !tbaa !12 - %neg88.i.i = fneg float %33 - %35 = tail call float @llvm.fmuladd.f32(float %neg88.i.i, float %.lcssa, float %34) #2 - store float %35, float* %arrayidx3687.i.i, align 4, !tbaa !12, !llvm.access.group !16 - br i1 %exitcond.not89.i.i, label %if.end.i.i, label %for.body23.for.body23_crit_edge.preheader.i.i, !llvm.loop !18 - -for.body23.for.body23_crit_edge.preheader.i.i: ; preds = %for.body23.preheader.i.i - br i1 %or.cond.i.i, label %vector.memcheck.i.i, label %for.body23.for.body23_crit_edge.i.i.preheader - -for.body23.for.body23_crit_edge.i.i.preheader: ; preds = %middle.block.i.i, %vector.memcheck.i.i, %for.body23.for.body23_crit_edge.preheader.i.i - %indvars.iv.next.i.i9.ph = phi i64 [ 1, %for.body23.for.body23_crit_edge.preheader.i.i ], [ 1, %vector.memcheck.i.i ], [ %ind.end.i.i, %middle.block.i.i ] - br label %for.body23.for.body23_crit_edge.i.i - -vector.memcheck.i.i: ; preds = %for.body23.for.body23_crit_edge.preheader.i.i - %36 = add nsw i64 %32, 1 - %scevgep.i.i = getelementptr float, float* %7, i64 %36 - %scevgep1.i.i = bitcast float* %scevgep.i.i to i8* - %37 = add nsw i64 %32, %wide.trip.count84.i.i - %scevgep2.i.i = getelementptr float, float* %7, i64 %37 - %scevgep45.i.i = bitcast float* %arrayidx.i.i to i8* - %uglygep.i.i = getelementptr i8, i8* %scevgep45.i.i, i64 1 - %bound0.i.i = icmp ugt i8* %uglygep.i.i, %scevgep1.i.i - %bound1.i.i = icmp ult float* %arrayidx.i.i, %scevgep2.i.i - %found.conflict.i.i = and i1 %bound1.i.i, %bound0.i.i - %bound010.i.i = icmp ult float* %scevgep.i.i, %scevgep8.i.i - %bound111.i.i = icmp ult float* %scevgep6.i.i, %scevgep2.i.i - %found.conflict12.i.i = and i1 %bound010.i.i, %bound111.i.i - %conflict.rdx.i.i = or i1 %found.conflict12.i.i, %found.conflict.i.i - br i1 %conflict.rdx.i.i, label %for.body23.for.body23_crit_edge.i.i.preheader, label %vector.ph.i.i - -vector.ph.i.i: ; preds = %vector.memcheck.i.i - %38 = load float, float* %arrayidx.i.i, align 4, !tbaa !12, !alias.scope !42 - %39 = insertelement <8 x float> undef, float %38, i32 0 - %40 = shufflevector <8 x float> %39, <8 x float> undef, <8 x i32> zeroinitializer - br label %vector.body.i.i - -vector.body.i.i: ; preds = %vector.body.i.i, %vector.ph.i.i - %index.next.i.i7 = phi i64 [ %index.next.i.i, %vector.body.i.i ], [ 0, %vector.ph.i.i ] - %offset.idx.i.i = or i64 %index.next.i.i7, 1 - %41 = mul nsw i64 %offset.idx.i.i, %26 - %42 = add nsw i64 %41, %27 - %43 = getelementptr inbounds float, float* %13, i64 %42 - %44 = bitcast float* %43 to <8 x float>* - %wide.load.i.i = load <8 x float>, <8 x float>* %44, align 4, !tbaa !12, !alias.scope !45 - %45 = getelementptr inbounds float, float* %43, i64 8 - %46 = bitcast float* %45 to <8 x float>* - %wide.load13.i.i = load <8 x float>, <8 x float>* %46, align 4, !tbaa !12, !alias.scope !45 - %47 = add nsw i64 %41, %32 - %48 = getelementptr inbounds float, float* %7, i64 %47 - %49 = bitcast float* %48 to <8 x float>* - %wide.load14.i.i = load <8 x float>, <8 x float>* %49, align 4, !tbaa !12, !alias.scope !47, !noalias !49 - %50 = getelementptr inbounds float, float* %48, i64 8 - %51 = bitcast float* %50 to <8 x float>* - %wide.load15.i.i = load <8 x float>, <8 x float>* %51, align 4, !tbaa !12, !alias.scope !47, !noalias !49 - %52 = fneg <8 x float> %wide.load.i.i - %53 = fneg <8 x float> %wide.load13.i.i - %54 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %52, <8 x float> %40, <8 x float> %wide.load14.i.i) #2 - %55 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %53, <8 x float> %40, <8 x float> %wide.load15.i.i) #2 - store <8 x float> %54, <8 x float>* %49, align 4, !tbaa !12, !alias.scope !47, !noalias !49, !llvm.access.group !16 - store <8 x float> %55, <8 x float>* %51, align 4, !tbaa !12, !alias.scope !47, !noalias !49, !llvm.access.group !16 - %index.next.i.i = add i64 %index.next.i.i7, 16 - %56 = icmp eq i64 %index.next.i.i, %n.vec.i.i - br i1 %56, label %middle.block.i.i, label %vector.body.i.i, !llvm.loop !28 - -middle.block.i.i: ; preds = %vector.body.i.i - br i1 %cmp.n.i.i, label %if.end.i.i, label %for.body23.for.body23_crit_edge.i.i.preheader - -for.body.i.i: ; preds = %for.body.i.i, %for.body.preheader.i.i - %indvars.iv.next80.i.i5 = phi i64 [ %indvars.iv.next80.i.i, %for.body.i.i ], [ 0, %for.body.preheader.i.i ] - %57 = phi float [ %63, %for.body.i.i ], [ 0.000000e+00, %for.body.preheader.i.i ] - %58 = mul nsw i64 %indvars.iv.next80.i.i5, %26 - %59 = add nsw i64 %58, %27 - %arrayidx10.i.i = getelementptr inbounds float, float* %13, i64 %59 - %60 = load float, float* %arrayidx10.i.i, align 4, !tbaa !12 - %61 = add nsw i64 %58, %32 - %arrayidx14.i.i = getelementptr inbounds float, float* %7, i64 %61 - %62 = load float, float* %arrayidx14.i.i, align 4, !tbaa !12 - %63 = tail call float @llvm.fmuladd.f32(float %60, float %62, float %57) #2 - store float %63, float* %arrayidx.i.i, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next80.i.i = add nuw nsw i64 %indvars.iv.next80.i.i5, 1 - %exitcond85.not.i.i = icmp eq i64 %indvars.iv.next80.i.i, %wide.trip.count84.i.i - br i1 %exitcond85.not.i.i, label %for.body23.preheader.i.i, label %for.body.i.i, !llvm.loop !30 - -for.body23.for.body23_crit_edge.i.i: ; preds = %for.body23.for.body23_crit_edge.i.i, %for.body23.for.body23_crit_edge.i.i.preheader - %indvars.iv.next.i.i9 = phi i64 [ %indvars.iv.next.i.i, %for.body23.for.body23_crit_edge.i.i ], [ %indvars.iv.next.i.i9.ph, %for.body23.for.body23_crit_edge.i.i.preheader ] - %.pre.i.i = load float, float* %arrayidx.i.i, align 4, !tbaa !12 - %64 = mul nsw i64 %indvars.iv.next.i.i9, %26 - %65 = add nsw i64 %64, %27 - %arrayidx27.i.i = getelementptr inbounds float, float* %13, i64 %65 - %66 = load float, float* %arrayidx27.i.i, align 4, !tbaa !12 - %67 = add nsw i64 %64, %32 - %arrayidx36.i.i = getelementptr inbounds float, float* %7, i64 %67 - %68 = load float, float* %arrayidx36.i.i, align 4, !tbaa !12 - %neg.i.i = fneg float %66 - %69 = tail call float @llvm.fmuladd.f32(float %neg.i.i, float %.pre.i.i, float %68) #2 - store float %69, float* %arrayidx36.i.i, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i = add nuw nsw i64 %indvars.iv.next.i.i9, 1 - %exitcond.not.i.i = icmp eq i64 %indvars.iv.next.i.i, %wide.trip.count84.i.i - br i1 %exitcond.not.i.i, label %if.end.i.i.loopexit, label %for.body23.for.body23_crit_edge.i.i, !llvm.loop !31 - -if.end.i.i.loopexit: ; preds = %for.body23.for.body23_crit_edge.i.i - br label %if.end.i.i - -if.end.i.i: ; preds = %if.end.i.i.loopexit, %middle.block.i.i, %for.body23.preheader.i.i, %if.then.i.i, %pregion_for_entry.entry.i.i - %70 = add nuw nsw i64 %_local_id_x.i.0, 1 - %exitcond.not = icmp eq i64 %70, 256 - br i1 %exitcond.not, label %_pocl_kernel_gramschmidt_kernel3.exit, label %pregion_for_entry.entry.i.i, !llvm.loop !32 - -_pocl_kernel_gramschmidt_kernel3.exit: ; preds = %if.end.i.i - ret void -} - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"int", !"int", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"int", !"int", !"int"} -!9 = !{!"", !"", !"", !"", !"", !""} -!10 = !{!"a", !"r", !"q", !"k", !"ni", !"nj"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{} -!18 = distinct !{!18, !19} -!19 = !{!"llvm.loop.unroll.disable"} -!20 = !{!21} -!21 = distinct !{!21, !22} -!22 = distinct !{!22, !"LVerDomain"} -!23 = !{!24} -!24 = distinct !{!24, !22} -!25 = !{!26} -!26 = distinct !{!26, !22} -!27 = !{!21, !24} -!28 = distinct !{!28, !19, !29} -!29 = !{!"llvm.loop.isvectorized", i32 1} -!30 = distinct !{!30, !19} -!31 = distinct !{!31, !19, !29} -!32 = distinct !{!32, !33} -!33 = !{!"llvm.loop.parallel_accesses", !17} -!34 = !{!35} -!35 = distinct !{!35, !36} -!36 = distinct !{!36, !"LVerDomain"} -!37 = !{!38} -!38 = distinct !{!38, !36} -!39 = !{!40} -!40 = distinct !{!40, !36} -!41 = !{!35, !38} -!42 = !{!43} -!43 = distinct !{!43, !44} -!44 = distinct !{!44, !"LVerDomain"} -!45 = !{!46} -!46 = distinct !{!46, !44} -!47 = !{!48} -!48 = distinct !{!48, !44} -!49 = !{!43, !46} diff --git a/pocl_irs/jacobi1D_kernel1.ll b/pocl_irs/jacobi1D_kernel1.ll deleted file mode 100644 index b145647..0000000 --- a/pocl_irs/jacobi1D_kernel1.ll +++ /dev/null @@ -1,272 +0,0 @@ -; ModuleID = './BM/NHBDLHALCILAIHDELFFBPNKBMKDJOKFCBHJHI/runJacobi1D_kernel1/256-1-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_runJacobi1D_kernel1(float* nocapture readonly %0, float* nocapture %1, i32 %2, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %3, i64 %4, i64 %5, i64 %6) local_unnamed_addr #0 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %4, 8 - %sub.i = add nsw i32 %2, -1 - br label %pregion_for_entry.entry.i - -pregion_for_entry.entry.i: ; preds = %if.end.r_exit.i.1, %7 - %_local_id_x.0 = phi i64 [ 0, %7 ], [ %17, %if.end.r_exit.i.1 ] - %add1.i.i = add nuw nsw i64 %_local_id_x.0, %mul.i.i - %conv.i = trunc i64 %add1.i.i to i32 - %cmp.i = icmp sgt i32 %conv.i, 0 - %cmp2.i = icmp sgt i32 %sub.i, %conv.i - %or.cond.i = and i1 %cmp.i, %cmp2.i - br i1 %or.cond.i, label %if.then.i, label %if.end.r_exit.i - -if.then.i: ; preds = %pregion_for_entry.entry.i - %sub4.i = add i64 %add1.i.i, 4294967295 - %8 = and i64 %sub4.i, 4294967295 - %arrayidx.i = getelementptr inbounds float, float* %0, i64 %8 - %9 = load float, float* %arrayidx.i, align 4, !tbaa !12 - %sext.i = shl i64 %add1.i.i, 32 - %idxprom5.i = ashr exact i64 %sext.i, 32 - %arrayidx6.i = getelementptr inbounds float, float* %0, i64 %idxprom5.i - %10 = load float, float* %arrayidx6.i, align 4, !tbaa !12 - %add.i = fadd float %9, %10 - %sext21.i = ashr exact i64 %sext.i, 32 - %idxprom8.i = or i64 %sext21.i, 1 - %arrayidx9.i = getelementptr inbounds float, float* %0, i64 %idxprom8.i - %11 = load float, float* %arrayidx9.i, align 4, !tbaa !12 - %add10.i = fadd float %add.i, %11 - %mul.i = fmul float %add10.i, 0x3FD5554760000000 - %arrayidx12.i = getelementptr inbounds float, float* %1, i64 %idxprom5.i - store float %mul.i, float* %arrayidx12.i, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i - -if.end.r_exit.i: ; preds = %if.then.i, %pregion_for_entry.entry.i - %12 = or i64 %_local_id_x.0, 1 - %add1.i.i.1 = add nuw nsw i64 %12, %mul.i.i - %conv.i.1 = trunc i64 %add1.i.i.1 to i32 - %cmp.i.1 = icmp sgt i32 %conv.i.1, 0 - %cmp2.i.1 = icmp sgt i32 %sub.i, %conv.i.1 - %or.cond.i.1 = and i1 %cmp.i.1, %cmp2.i.1 - br i1 %or.cond.i.1, label %if.then.i.1, label %if.end.r_exit.i.1 - -runJacobi1D_kernel1.exit: ; preds = %if.end.r_exit.i.1 - ret void - -if.then.i.1: ; preds = %if.end.r_exit.i - %sub4.i.1 = add i64 %add1.i.i.1, 4294967295 - %13 = and i64 %sub4.i.1, 4294967294 - %arrayidx.i.1 = getelementptr inbounds float, float* %0, i64 %13 - %14 = load float, float* %arrayidx.i.1, align 4, !tbaa !12 - %sext.i.1 = shl i64 %add1.i.i.1, 32 - %idxprom5.i.1 = ashr exact i64 %sext.i.1, 32 - %arrayidx6.i.1 = getelementptr inbounds float, float* %0, i64 %idxprom5.i.1 - %15 = load float, float* %arrayidx6.i.1, align 4, !tbaa !12 - %add.i.1 = fadd float %14, %15 - %sext21.i.1 = add i64 %sext.i.1, 4294967296 - %idxprom8.i.1 = ashr exact i64 %sext21.i.1, 32 - %arrayidx9.i.1 = getelementptr inbounds float, float* %0, i64 %idxprom8.i.1 - %16 = load float, float* %arrayidx9.i.1, align 4, !tbaa !12 - %add10.i.1 = fadd float %add.i.1, %16 - %mul.i.1 = fmul float %add10.i.1, 0x3FD5554760000000 - %arrayidx12.i.1 = getelementptr inbounds float, float* %1, i64 %idxprom5.i.1 - store float %mul.i.1, float* %arrayidx12.i.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.1 - -if.end.r_exit.i.1: ; preds = %if.then.i.1, %if.end.r_exit.i - %17 = add nuw nsw i64 %_local_id_x.0, 2 - %exitcond.not.1 = icmp eq i64 %17, 256 - br i1 %exitcond.not.1, label %runJacobi1D_kernel1.exit, label %pregion_for_entry.entry.i, !llvm.loop !18 -} - -; Function Attrs: nofree norecurse nounwind -define void @_pocl_kernel_runJacobi1D_kernel1_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to i32** - %15 = load i32*, i32** %14, align 8 - %16 = load i32, i32* %15, align 4 - %mul.i.i.i = shl i64 %2, 8 - %sub.i.i = add nsw i32 %16, -1 - br label %pregion_for_entry.entry.i.i - -pregion_for_entry.entry.i.i: ; preds = %if.end.r_exit.i.i.1, %5 - %_local_id_x.i.0 = phi i64 [ 0, %5 ], [ %26, %if.end.r_exit.i.i.1 ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %conv.i.i = trunc i64 %add1.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %conv.i.i, 0 - %cmp2.i.i = icmp sgt i32 %sub.i.i, %conv.i.i - %or.cond.i.i = and i1 %cmp.i.i, %cmp2.i.i - br i1 %or.cond.i.i, label %if.then.i.i, label %if.end.r_exit.i.i - -if.then.i.i: ; preds = %pregion_for_entry.entry.i.i - %sub4.i.i = add i64 %add1.i.i.i, 4294967295 - %17 = and i64 %sub4.i.i, 4294967295 - %arrayidx.i.i = getelementptr inbounds float, float* %8, i64 %17 - %18 = load float, float* %arrayidx.i.i, align 4, !tbaa !12 - %sext.i.i = shl i64 %add1.i.i.i, 32 - %idxprom5.i.i = ashr exact i64 %sext.i.i, 32 - %arrayidx6.i.i = getelementptr inbounds float, float* %8, i64 %idxprom5.i.i - %19 = load float, float* %arrayidx6.i.i, align 4, !tbaa !12 - %add.i.i = fadd float %18, %19 - %sext21.i.i = ashr exact i64 %sext.i.i, 32 - %idxprom8.i.i = or i64 %sext21.i.i, 1 - %arrayidx9.i.i = getelementptr inbounds float, float* %8, i64 %idxprom8.i.i - %20 = load float, float* %arrayidx9.i.i, align 4, !tbaa !12 - %add10.i.i = fadd float %add.i.i, %20 - %mul.i.i = fmul float %add10.i.i, 0x3FD5554760000000 - %arrayidx12.i.i = getelementptr inbounds float, float* %12, i64 %idxprom5.i.i - store float %mul.i.i, float* %arrayidx12.i.i, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i - -if.end.r_exit.i.i: ; preds = %if.then.i.i, %pregion_for_entry.entry.i.i - %21 = or i64 %_local_id_x.i.0, 1 - %add1.i.i.i.1 = add nuw nsw i64 %21, %mul.i.i.i - %conv.i.i.1 = trunc i64 %add1.i.i.i.1 to i32 - %cmp.i.i.1 = icmp sgt i32 %conv.i.i.1, 0 - %cmp2.i.i.1 = icmp sgt i32 %sub.i.i, %conv.i.i.1 - %or.cond.i.i.1 = and i1 %cmp.i.i.1, %cmp2.i.i.1 - br i1 %or.cond.i.i.1, label %if.then.i.i.1, label %if.end.r_exit.i.i.1 - -_pocl_kernel_runJacobi1D_kernel1.exit: ; preds = %if.end.r_exit.i.i.1 - ret void - -if.then.i.i.1: ; preds = %if.end.r_exit.i.i - %sub4.i.i.1 = add i64 %add1.i.i.i.1, 4294967295 - %22 = and i64 %sub4.i.i.1, 4294967294 - %arrayidx.i.i.1 = getelementptr inbounds float, float* %8, i64 %22 - %23 = load float, float* %arrayidx.i.i.1, align 4, !tbaa !12 - %sext.i.i.1 = shl i64 %add1.i.i.i.1, 32 - %idxprom5.i.i.1 = ashr exact i64 %sext.i.i.1, 32 - %arrayidx6.i.i.1 = getelementptr inbounds float, float* %8, i64 %idxprom5.i.i.1 - %24 = load float, float* %arrayidx6.i.i.1, align 4, !tbaa !12 - %add.i.i.1 = fadd float %23, %24 - %sext21.i.i.1 = add i64 %sext.i.i.1, 4294967296 - %idxprom8.i.i.1 = ashr exact i64 %sext21.i.i.1, 32 - %arrayidx9.i.i.1 = getelementptr inbounds float, float* %8, i64 %idxprom8.i.i.1 - %25 = load float, float* %arrayidx9.i.i.1, align 4, !tbaa !12 - %add10.i.i.1 = fadd float %add.i.i.1, %25 - %mul.i.i.1 = fmul float %add10.i.i.1, 0x3FD5554760000000 - %arrayidx12.i.i.1 = getelementptr inbounds float, float* %12, i64 %idxprom5.i.i.1 - store float %mul.i.i.1, float* %arrayidx12.i.i.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.1 - -if.end.r_exit.i.i.1: ; preds = %if.then.i.i.1, %if.end.r_exit.i.i - %26 = add nuw nsw i64 %_local_id_x.i.0, 2 - %exitcond.not.1 = icmp eq i64 %26, 256 - br i1 %exitcond.not.1, label %_pocl_kernel_runJacobi1D_kernel1.exit, label %pregion_for_entry.entry.i.i, !llvm.loop !18 -} - -; Function Attrs: nofree norecurse nounwind -define void @_pocl_kernel_runJacobi1D_kernel1_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to i32** - %13 = load i32*, i32** %12, align 8 - %14 = load i32, i32* %13, align 4 - %mul.i.i.i = shl i64 %2, 8 - %sub.i.i = add nsw i32 %14, -1 - br label %pregion_for_entry.entry.i.i - -pregion_for_entry.entry.i.i: ; preds = %if.end.r_exit.i.i.1, %5 - %_local_id_x.i.0 = phi i64 [ 0, %5 ], [ %24, %if.end.r_exit.i.i.1 ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %conv.i.i = trunc i64 %add1.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %conv.i.i, 0 - %cmp2.i.i = icmp sgt i32 %sub.i.i, %conv.i.i - %or.cond.i.i = and i1 %cmp.i.i, %cmp2.i.i - br i1 %or.cond.i.i, label %if.then.i.i, label %if.end.r_exit.i.i - -if.then.i.i: ; preds = %pregion_for_entry.entry.i.i - %sub4.i.i = add i64 %add1.i.i.i, 4294967295 - %15 = and i64 %sub4.i.i, 4294967295 - %arrayidx.i.i = getelementptr inbounds float, float* %7, i64 %15 - %16 = load float, float* %arrayidx.i.i, align 4, !tbaa !12 - %sext.i.i = shl i64 %add1.i.i.i, 32 - %idxprom5.i.i = ashr exact i64 %sext.i.i, 32 - %arrayidx6.i.i = getelementptr inbounds float, float* %7, i64 %idxprom5.i.i - %17 = load float, float* %arrayidx6.i.i, align 4, !tbaa !12 - %add.i.i = fadd float %16, %17 - %sext21.i.i = ashr exact i64 %sext.i.i, 32 - %idxprom8.i.i = or i64 %sext21.i.i, 1 - %arrayidx9.i.i = getelementptr inbounds float, float* %7, i64 %idxprom8.i.i - %18 = load float, float* %arrayidx9.i.i, align 4, !tbaa !12 - %add10.i.i = fadd float %add.i.i, %18 - %mul.i.i = fmul float %add10.i.i, 0x3FD5554760000000 - %arrayidx12.i.i = getelementptr inbounds float, float* %10, i64 %idxprom5.i.i - store float %mul.i.i, float* %arrayidx12.i.i, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i - -if.end.r_exit.i.i: ; preds = %if.then.i.i, %pregion_for_entry.entry.i.i - %19 = or i64 %_local_id_x.i.0, 1 - %add1.i.i.i.1 = add nuw nsw i64 %19, %mul.i.i.i - %conv.i.i.1 = trunc i64 %add1.i.i.i.1 to i32 - %cmp.i.i.1 = icmp sgt i32 %conv.i.i.1, 0 - %cmp2.i.i.1 = icmp sgt i32 %sub.i.i, %conv.i.i.1 - %or.cond.i.i.1 = and i1 %cmp.i.i.1, %cmp2.i.i.1 - br i1 %or.cond.i.i.1, label %if.then.i.i.1, label %if.end.r_exit.i.i.1 - -_pocl_kernel_runJacobi1D_kernel1.exit: ; preds = %if.end.r_exit.i.i.1 - ret void - -if.then.i.i.1: ; preds = %if.end.r_exit.i.i - %sub4.i.i.1 = add i64 %add1.i.i.i.1, 4294967295 - %20 = and i64 %sub4.i.i.1, 4294967294 - %arrayidx.i.i.1 = getelementptr inbounds float, float* %7, i64 %20 - %21 = load float, float* %arrayidx.i.i.1, align 4, !tbaa !12 - %sext.i.i.1 = shl i64 %add1.i.i.i.1, 32 - %idxprom5.i.i.1 = ashr exact i64 %sext.i.i.1, 32 - %arrayidx6.i.i.1 = getelementptr inbounds float, float* %7, i64 %idxprom5.i.i.1 - %22 = load float, float* %arrayidx6.i.i.1, align 4, !tbaa !12 - %add.i.i.1 = fadd float %21, %22 - %sext21.i.i.1 = add i64 %sext.i.i.1, 4294967296 - %idxprom8.i.i.1 = ashr exact i64 %sext21.i.i.1, 32 - %arrayidx9.i.i.1 = getelementptr inbounds float, float* %7, i64 %idxprom8.i.i.1 - %23 = load float, float* %arrayidx9.i.i.1, align 4, !tbaa !12 - %add10.i.i.1 = fadd float %add.i.i.1, %23 - %mul.i.i.1 = fmul float %add10.i.i.1, 0x3FD5554760000000 - %arrayidx12.i.i.1 = getelementptr inbounds float, float* %10, i64 %idxprom5.i.i.1 - store float %mul.i.i.1, float* %arrayidx12.i.i.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.r_exit.i.i.1 - -if.end.r_exit.i.i.1: ; preds = %if.then.i.i.1, %if.end.r_exit.i.i - %24 = add nuw nsw i64 %_local_id_x.i.0, 2 - %exitcond.not.1 = icmp eq i64 %24, 256 - br i1 %exitcond.not.1, label %_pocl_kernel_runJacobi1D_kernel1.exit, label %pregion_for_entry.entry.i.i, !llvm.loop !18 -} - -attributes #0 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nofree norecurse nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 0} -!6 = !{!"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"int"} -!8 = !{!"float*", !"float*", !"int"} -!9 = !{!"", !"", !""} -!10 = !{!"A", !"B", !"n"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{} -!18 = distinct !{!18, !19} -!19 = !{!"llvm.loop.parallel_accesses", !17} diff --git a/pocl_irs/jacobi1D_kernel2.ll b/pocl_irs/jacobi1D_kernel2.ll deleted file mode 100644 index 1d93fab..0000000 --- a/pocl_irs/jacobi1D_kernel2.ll +++ /dev/null @@ -1,652 +0,0 @@ -; ModuleID = './BM/NHBDLHALCILAIHDELFFBPNKBMKDJOKFCBHJHI/runJacobi1D_kernel2/256-1-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_runJacobi1D_kernel2(float* nocapture %0, float* nocapture readonly %1, i32 %2, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %3, i64 %4, i64 %5, i64 %6) local_unnamed_addr #0 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { -vector.memcheck: - %mul.i.i = shl i64 %4, 8 - %sub.i = add nsw i32 %2, -1 - %7 = trunc i64 %4 to i32 - %8 = shl i32 %7, 8 - %9 = sext i32 %8 to i64 - %scevgep = getelementptr float, float* %0, i64 %9 - %10 = add nsw i64 %9, 256 - %scevgep2 = getelementptr float, float* %0, i64 %10 - %scevgep4 = getelementptr float, float* %1, i64 %9 - %scevgep6 = getelementptr float, float* %1, i64 %10 - %bound0 = icmp ult float* %scevgep, %scevgep6 - %bound1 = icmp ult float* %scevgep4, %scevgep2 - %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %pregion_for_entry.entry.i.preheader, label %vector.ph - -pregion_for_entry.entry.i.preheader: ; preds = %vector.memcheck - br label %pregion_for_entry.entry.i - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert8 = insertelement <8 x i32> undef, i32 %sub.i, i32 0 - %broadcast.splat9 = shufflevector <8 x i32> %broadcast.splatinsert8, <8 x i32> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next.3, %vector.body ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next.3, %vector.body ] - %11 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %12 = trunc <8 x i64> %11 to <8 x i32> - %13 = icmp sgt <8 x i32> %12, zeroinitializer - %14 = icmp sgt <8 x i32> %broadcast.splat9, %12 - %15 = and <8 x i1> %13, %14 - %16 = extractelement <8 x i64> %11, i32 0 - %17 = shl i64 %16, 32 - %18 = ashr exact i64 %17, 32 - %19 = getelementptr inbounds float, float* %1, i64 %18 - %20 = bitcast float* %19 to <8 x i32>* - %wide.masked.load = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %20, i32 4, <8 x i1> %15, <8 x i32> undef), !tbaa !12, !alias.scope !16 - %21 = getelementptr inbounds float, float* %0, i64 %18 - %22 = bitcast float* %21 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load, <8 x i32>* %22, i32 4, <8 x i1> %15), !tbaa !12, !alias.scope !19, !noalias !16, !llvm.access.group !21 - %vec.ind.next = add <8 x i64> %vec.ind, - %23 = add nuw nsw <8 x i64> %vec.ind.next, %broadcast.splat - %24 = trunc <8 x i64> %23 to <8 x i32> - %25 = icmp sgt <8 x i32> %24, zeroinitializer - %26 = icmp sgt <8 x i32> %broadcast.splat9, %24 - %27 = and <8 x i1> %25, %26 - %28 = extractelement <8 x i64> %23, i32 0 - %29 = shl i64 %28, 32 - %30 = ashr exact i64 %29, 32 - %31 = getelementptr inbounds float, float* %1, i64 %30 - %32 = bitcast float* %31 to <8 x i32>* - %wide.masked.load.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %32, i32 4, <8 x i1> %27, <8 x i32> undef), !tbaa !12, !alias.scope !16 - %33 = getelementptr inbounds float, float* %0, i64 %30 - %34 = bitcast float* %33 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load.1, <8 x i32>* %34, i32 4, <8 x i1> %27), !tbaa !12, !alias.scope !19, !noalias !16, !llvm.access.group !21 - %vec.ind.next.1 = add <8 x i64> %vec.ind, - %35 = add nuw nsw <8 x i64> %vec.ind.next.1, %broadcast.splat - %36 = trunc <8 x i64> %35 to <8 x i32> - %37 = icmp sgt <8 x i32> %36, zeroinitializer - %38 = icmp sgt <8 x i32> %broadcast.splat9, %36 - %39 = and <8 x i1> %37, %38 - %40 = extractelement <8 x i64> %35, i32 0 - %41 = shl i64 %40, 32 - %42 = ashr exact i64 %41, 32 - %43 = getelementptr inbounds float, float* %1, i64 %42 - %44 = bitcast float* %43 to <8 x i32>* - %wide.masked.load.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %44, i32 4, <8 x i1> %39, <8 x i32> undef), !tbaa !12, !alias.scope !16 - %45 = getelementptr inbounds float, float* %0, i64 %42 - %46 = bitcast float* %45 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load.2, <8 x i32>* %46, i32 4, <8 x i1> %39), !tbaa !12, !alias.scope !19, !noalias !16, !llvm.access.group !21 - %vec.ind.next.2 = add <8 x i64> %vec.ind, - %47 = add nuw nsw <8 x i64> %vec.ind.next.2, %broadcast.splat - %48 = trunc <8 x i64> %47 to <8 x i32> - %49 = icmp sgt <8 x i32> %48, zeroinitializer - %50 = icmp sgt <8 x i32> %broadcast.splat9, %48 - %51 = and <8 x i1> %49, %50 - %52 = extractelement <8 x i64> %47, i32 0 - %53 = shl i64 %52, 32 - %54 = ashr exact i64 %53, 32 - %55 = getelementptr inbounds float, float* %1, i64 %54 - %56 = bitcast float* %55 to <8 x i32>* - %wide.masked.load.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %56, i32 4, <8 x i1> %51, <8 x i32> undef), !tbaa !12, !alias.scope !16 - %57 = getelementptr inbounds float, float* %0, i64 %54 - %58 = bitcast float* %57 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load.3, <8 x i32>* %58, i32 4, <8 x i1> %51), !tbaa !12, !alias.scope !19, !noalias !16, !llvm.access.group !21 - %index.next.3 = add nuw nsw i64 %index, 32 - %vec.ind.next.3 = add <8 x i64> %vec.ind, - %59 = icmp eq i64 %index.next.3, 256 - br i1 %59, label %runJacobi1D_kernel2.exit.loopexit11, label %vector.body, !llvm.loop !23 - -pregion_for_entry.entry.i: ; preds = %if.end.r_exit.i.3, %pregion_for_entry.entry.i.preheader - %_local_id_x.0 = phi i64 [ %75, %if.end.r_exit.i.3 ], [ 0, %pregion_for_entry.entry.i.preheader ] - %add1.i.i = add nuw nsw i64 %_local_id_x.0, %mul.i.i - %conv.i = trunc i64 %add1.i.i to i32 - %cmp.i = icmp sgt i32 %conv.i, 0 - %cmp2.i = icmp sgt i32 %sub.i, %conv.i - %or.cond.i = and i1 %cmp.i, %cmp2.i - br i1 %or.cond.i, label %if.then.i, label %if.end.r_exit.i - -if.then.i: ; preds = %pregion_for_entry.entry.i - %sext.i = shl i64 %add1.i.i, 32 - %idxprom.i = ashr exact i64 %sext.i, 32 - %arrayidx.i = getelementptr inbounds float, float* %1, i64 %idxprom.i - %60 = bitcast float* %arrayidx.i to i32* - %61 = load i32, i32* %60, align 4, !tbaa !12 - %arrayidx5.i = getelementptr inbounds float, float* %0, i64 %idxprom.i - %62 = bitcast float* %arrayidx5.i to i32* - store i32 %61, i32* %62, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i - -if.end.r_exit.i: ; preds = %if.then.i, %pregion_for_entry.entry.i - %63 = or i64 %_local_id_x.0, 1 - %add1.i.i.1 = add nuw nsw i64 %63, %mul.i.i - %conv.i.1 = trunc i64 %add1.i.i.1 to i32 - %cmp.i.1 = icmp sgt i32 %conv.i.1, 0 - %cmp2.i.1 = icmp sgt i32 %sub.i, %conv.i.1 - %or.cond.i.1 = and i1 %cmp.i.1, %cmp2.i.1 - br i1 %or.cond.i.1, label %if.then.i.1, label %if.end.r_exit.i.1 - -runJacobi1D_kernel2.exit.loopexit: ; preds = %if.end.r_exit.i.3 - br label %runJacobi1D_kernel2.exit - -runJacobi1D_kernel2.exit.loopexit11: ; preds = %vector.body - br label %runJacobi1D_kernel2.exit - -runJacobi1D_kernel2.exit: ; preds = %runJacobi1D_kernel2.exit.loopexit11, %runJacobi1D_kernel2.exit.loopexit - ret void - -if.then.i.1: ; preds = %if.end.r_exit.i - %sext.i.1 = shl i64 %add1.i.i.1, 32 - %idxprom.i.1 = ashr exact i64 %sext.i.1, 32 - %arrayidx.i.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.1 - %64 = bitcast float* %arrayidx.i.1 to i32* - %65 = load i32, i32* %64, align 4, !tbaa !12 - %arrayidx5.i.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.1 - %66 = bitcast float* %arrayidx5.i.1 to i32* - store i32 %65, i32* %66, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.1 - -if.end.r_exit.i.1: ; preds = %if.then.i.1, %if.end.r_exit.i - %67 = or i64 %_local_id_x.0, 2 - %add1.i.i.2 = add nuw nsw i64 %67, %mul.i.i - %conv.i.2 = trunc i64 %add1.i.i.2 to i32 - %cmp.i.2 = icmp sgt i32 %conv.i.2, 0 - %cmp2.i.2 = icmp sgt i32 %sub.i, %conv.i.2 - %or.cond.i.2 = and i1 %cmp.i.2, %cmp2.i.2 - br i1 %or.cond.i.2, label %if.then.i.2, label %if.end.r_exit.i.2 - -if.then.i.2: ; preds = %if.end.r_exit.i.1 - %sext.i.2 = shl i64 %add1.i.i.2, 32 - %idxprom.i.2 = ashr exact i64 %sext.i.2, 32 - %arrayidx.i.2 = getelementptr inbounds float, float* %1, i64 %idxprom.i.2 - %68 = bitcast float* %arrayidx.i.2 to i32* - %69 = load i32, i32* %68, align 4, !tbaa !12 - %arrayidx5.i.2 = getelementptr inbounds float, float* %0, i64 %idxprom.i.2 - %70 = bitcast float* %arrayidx5.i.2 to i32* - store i32 %69, i32* %70, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.2 - -if.end.r_exit.i.2: ; preds = %if.then.i.2, %if.end.r_exit.i.1 - %71 = or i64 %_local_id_x.0, 3 - %add1.i.i.3 = add nuw nsw i64 %71, %mul.i.i - %conv.i.3 = trunc i64 %add1.i.i.3 to i32 - %cmp.i.3 = icmp sgt i32 %conv.i.3, 0 - %cmp2.i.3 = icmp sgt i32 %sub.i, %conv.i.3 - %or.cond.i.3 = and i1 %cmp.i.3, %cmp2.i.3 - br i1 %or.cond.i.3, label %if.then.i.3, label %if.end.r_exit.i.3 - -if.then.i.3: ; preds = %if.end.r_exit.i.2 - %sext.i.3 = shl i64 %add1.i.i.3, 32 - %idxprom.i.3 = ashr exact i64 %sext.i.3, 32 - %arrayidx.i.3 = getelementptr inbounds float, float* %1, i64 %idxprom.i.3 - %72 = bitcast float* %arrayidx.i.3 to i32* - %73 = load i32, i32* %72, align 4, !tbaa !12 - %arrayidx5.i.3 = getelementptr inbounds float, float* %0, i64 %idxprom.i.3 - %74 = bitcast float* %arrayidx5.i.3 to i32* - store i32 %73, i32* %74, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.3 - -if.end.r_exit.i.3: ; preds = %if.then.i.3, %if.end.r_exit.i.2 - %75 = add nuw nsw i64 %_local_id_x.0, 4 - %exitcond.not.3 = icmp eq i64 %75, 256 - br i1 %exitcond.not.3, label %runJacobi1D_kernel2.exit.loopexit, label %pregion_for_entry.entry.i, !llvm.loop !26 -} - -; Function Attrs: nofree norecurse nounwind -define void @_pocl_kernel_runJacobi1D_kernel2_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { -vector.memcheck: - %5 = bitcast i8** %0 to float*** - %6 = load float**, float*** %5, align 8 - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float*** - %10 = load float**, float*** %9, align 8 - %11 = load float*, float** %10, align 8 - %12 = getelementptr i8*, i8** %0, i64 2 - %13 = bitcast i8** %12 to i32** - %14 = load i32*, i32** %13, align 8 - %15 = load i32, i32* %14, align 4 - %mul.i.i.i = shl i64 %2, 8 - %sub.i.i = add nsw i32 %15, -1 - %16 = trunc i64 %2 to i32 - %17 = shl i32 %16, 8 - %18 = sext i32 %17 to i64 - %scevgep = getelementptr float, float* %7, i64 %18 - %19 = add nsw i64 %18, 256 - %scevgep2 = getelementptr float, float* %7, i64 %19 - %scevgep4 = getelementptr float, float* %11, i64 %18 - %scevgep6 = getelementptr float, float* %11, i64 %19 - %bound0 = icmp ult float* %scevgep, %scevgep6 - %bound1 = icmp ult float* %scevgep4, %scevgep2 - %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %pregion_for_entry.entry.i.i.preheader, label %vector.ph - -pregion_for_entry.entry.i.i.preheader: ; preds = %vector.memcheck - br label %pregion_for_entry.entry.i.i - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert8 = insertelement <8 x i32> undef, i32 %sub.i.i, i32 0 - %broadcast.splat9 = shufflevector <8 x i32> %broadcast.splatinsert8, <8 x i32> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next.3, %vector.body ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next.3, %vector.body ] - %20 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %21 = trunc <8 x i64> %20 to <8 x i32> - %22 = icmp sgt <8 x i32> %21, zeroinitializer - %23 = icmp sgt <8 x i32> %broadcast.splat9, %21 - %24 = and <8 x i1> %22, %23 - %25 = extractelement <8 x i64> %20, i32 0 - %26 = shl i64 %25, 32 - %27 = ashr exact i64 %26, 32 - %28 = getelementptr inbounds float, float* %11, i64 %27 - %29 = bitcast float* %28 to <8 x i32>* - %wide.masked.load = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %29, i32 4, <8 x i1> %24, <8 x i32> undef), !tbaa !12, !alias.scope !27 - %30 = getelementptr inbounds float, float* %7, i64 %27 - %31 = bitcast float* %30 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load, <8 x i32>* %31, i32 4, <8 x i1> %24), !tbaa !12, !alias.scope !30, !noalias !27, !llvm.access.group !21 - %vec.ind.next = add <8 x i64> %vec.ind, - %32 = add nuw nsw <8 x i64> %vec.ind.next, %broadcast.splat - %33 = trunc <8 x i64> %32 to <8 x i32> - %34 = icmp sgt <8 x i32> %33, zeroinitializer - %35 = icmp sgt <8 x i32> %broadcast.splat9, %33 - %36 = and <8 x i1> %34, %35 - %37 = extractelement <8 x i64> %32, i32 0 - %38 = shl i64 %37, 32 - %39 = ashr exact i64 %38, 32 - %40 = getelementptr inbounds float, float* %11, i64 %39 - %41 = bitcast float* %40 to <8 x i32>* - %wide.masked.load.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %41, i32 4, <8 x i1> %36, <8 x i32> undef), !tbaa !12, !alias.scope !27 - %42 = getelementptr inbounds float, float* %7, i64 %39 - %43 = bitcast float* %42 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load.1, <8 x i32>* %43, i32 4, <8 x i1> %36), !tbaa !12, !alias.scope !30, !noalias !27, !llvm.access.group !21 - %vec.ind.next.1 = add <8 x i64> %vec.ind, - %44 = add nuw nsw <8 x i64> %vec.ind.next.1, %broadcast.splat - %45 = trunc <8 x i64> %44 to <8 x i32> - %46 = icmp sgt <8 x i32> %45, zeroinitializer - %47 = icmp sgt <8 x i32> %broadcast.splat9, %45 - %48 = and <8 x i1> %46, %47 - %49 = extractelement <8 x i64> %44, i32 0 - %50 = shl i64 %49, 32 - %51 = ashr exact i64 %50, 32 - %52 = getelementptr inbounds float, float* %11, i64 %51 - %53 = bitcast float* %52 to <8 x i32>* - %wide.masked.load.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %53, i32 4, <8 x i1> %48, <8 x i32> undef), !tbaa !12, !alias.scope !27 - %54 = getelementptr inbounds float, float* %7, i64 %51 - %55 = bitcast float* %54 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load.2, <8 x i32>* %55, i32 4, <8 x i1> %48), !tbaa !12, !alias.scope !30, !noalias !27, !llvm.access.group !21 - %vec.ind.next.2 = add <8 x i64> %vec.ind, - %56 = add nuw nsw <8 x i64> %vec.ind.next.2, %broadcast.splat - %57 = trunc <8 x i64> %56 to <8 x i32> - %58 = icmp sgt <8 x i32> %57, zeroinitializer - %59 = icmp sgt <8 x i32> %broadcast.splat9, %57 - %60 = and <8 x i1> %58, %59 - %61 = extractelement <8 x i64> %56, i32 0 - %62 = shl i64 %61, 32 - %63 = ashr exact i64 %62, 32 - %64 = getelementptr inbounds float, float* %11, i64 %63 - %65 = bitcast float* %64 to <8 x i32>* - %wide.masked.load.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %65, i32 4, <8 x i1> %60, <8 x i32> undef), !tbaa !12, !alias.scope !27 - %66 = getelementptr inbounds float, float* %7, i64 %63 - %67 = bitcast float* %66 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load.3, <8 x i32>* %67, i32 4, <8 x i1> %60), !tbaa !12, !alias.scope !30, !noalias !27, !llvm.access.group !21 - %index.next.3 = add nuw nsw i64 %index, 32 - %vec.ind.next.3 = add <8 x i64> %vec.ind, - %68 = icmp eq i64 %index.next.3, 256 - br i1 %68, label %_pocl_kernel_runJacobi1D_kernel2.exit.loopexit11, label %vector.body, !llvm.loop !32 - -pregion_for_entry.entry.i.i: ; preds = %if.end.r_exit.i.i.3, %pregion_for_entry.entry.i.i.preheader - %_local_id_x.i.0 = phi i64 [ %84, %if.end.r_exit.i.i.3 ], [ 0, %pregion_for_entry.entry.i.i.preheader ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %conv.i.i = trunc i64 %add1.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %conv.i.i, 0 - %cmp2.i.i = icmp sgt i32 %sub.i.i, %conv.i.i - %or.cond.i.i = and i1 %cmp.i.i, %cmp2.i.i - br i1 %or.cond.i.i, label %if.then.i.i, label %if.end.r_exit.i.i - -if.then.i.i: ; preds = %pregion_for_entry.entry.i.i - %sext.i.i = shl i64 %add1.i.i.i, 32 - %idxprom.i.i = ashr exact i64 %sext.i.i, 32 - %arrayidx.i.i = getelementptr inbounds float, float* %11, i64 %idxprom.i.i - %69 = bitcast float* %arrayidx.i.i to i32* - %70 = load i32, i32* %69, align 4, !tbaa !12 - %arrayidx5.i.i = getelementptr inbounds float, float* %7, i64 %idxprom.i.i - %71 = bitcast float* %arrayidx5.i.i to i32* - store i32 %70, i32* %71, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i - -if.end.r_exit.i.i: ; preds = %if.then.i.i, %pregion_for_entry.entry.i.i - %72 = or i64 %_local_id_x.i.0, 1 - %add1.i.i.i.1 = add nuw nsw i64 %72, %mul.i.i.i - %conv.i.i.1 = trunc i64 %add1.i.i.i.1 to i32 - %cmp.i.i.1 = icmp sgt i32 %conv.i.i.1, 0 - %cmp2.i.i.1 = icmp sgt i32 %sub.i.i, %conv.i.i.1 - %or.cond.i.i.1 = and i1 %cmp.i.i.1, %cmp2.i.i.1 - br i1 %or.cond.i.i.1, label %if.then.i.i.1, label %if.end.r_exit.i.i.1 - -_pocl_kernel_runJacobi1D_kernel2.exit.loopexit: ; preds = %if.end.r_exit.i.i.3 - br label %_pocl_kernel_runJacobi1D_kernel2.exit - -_pocl_kernel_runJacobi1D_kernel2.exit.loopexit11: ; preds = %vector.body - br label %_pocl_kernel_runJacobi1D_kernel2.exit - -_pocl_kernel_runJacobi1D_kernel2.exit: ; preds = %_pocl_kernel_runJacobi1D_kernel2.exit.loopexit11, %_pocl_kernel_runJacobi1D_kernel2.exit.loopexit - ret void - -if.then.i.i.1: ; preds = %if.end.r_exit.i.i - %sext.i.i.1 = shl i64 %add1.i.i.i.1, 32 - %idxprom.i.i.1 = ashr exact i64 %sext.i.i.1, 32 - %arrayidx.i.i.1 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.1 - %73 = bitcast float* %arrayidx.i.i.1 to i32* - %74 = load i32, i32* %73, align 4, !tbaa !12 - %arrayidx5.i.i.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.1 - %75 = bitcast float* %arrayidx5.i.i.1 to i32* - store i32 %74, i32* %75, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.1 - -if.end.r_exit.i.i.1: ; preds = %if.then.i.i.1, %if.end.r_exit.i.i - %76 = or i64 %_local_id_x.i.0, 2 - %add1.i.i.i.2 = add nuw nsw i64 %76, %mul.i.i.i - %conv.i.i.2 = trunc i64 %add1.i.i.i.2 to i32 - %cmp.i.i.2 = icmp sgt i32 %conv.i.i.2, 0 - %cmp2.i.i.2 = icmp sgt i32 %sub.i.i, %conv.i.i.2 - %or.cond.i.i.2 = and i1 %cmp.i.i.2, %cmp2.i.i.2 - br i1 %or.cond.i.i.2, label %if.then.i.i.2, label %if.end.r_exit.i.i.2 - -if.then.i.i.2: ; preds = %if.end.r_exit.i.i.1 - %sext.i.i.2 = shl i64 %add1.i.i.i.2, 32 - %idxprom.i.i.2 = ashr exact i64 %sext.i.i.2, 32 - %arrayidx.i.i.2 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.2 - %77 = bitcast float* %arrayidx.i.i.2 to i32* - %78 = load i32, i32* %77, align 4, !tbaa !12 - %arrayidx5.i.i.2 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.2 - %79 = bitcast float* %arrayidx5.i.i.2 to i32* - store i32 %78, i32* %79, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.2 - -if.end.r_exit.i.i.2: ; preds = %if.then.i.i.2, %if.end.r_exit.i.i.1 - %80 = or i64 %_local_id_x.i.0, 3 - %add1.i.i.i.3 = add nuw nsw i64 %80, %mul.i.i.i - %conv.i.i.3 = trunc i64 %add1.i.i.i.3 to i32 - %cmp.i.i.3 = icmp sgt i32 %conv.i.i.3, 0 - %cmp2.i.i.3 = icmp sgt i32 %sub.i.i, %conv.i.i.3 - %or.cond.i.i.3 = and i1 %cmp.i.i.3, %cmp2.i.i.3 - br i1 %or.cond.i.i.3, label %if.then.i.i.3, label %if.end.r_exit.i.i.3 - -if.then.i.i.3: ; preds = %if.end.r_exit.i.i.2 - %sext.i.i.3 = shl i64 %add1.i.i.i.3, 32 - %idxprom.i.i.3 = ashr exact i64 %sext.i.i.3, 32 - %arrayidx.i.i.3 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.3 - %81 = bitcast float* %arrayidx.i.i.3 to i32* - %82 = load i32, i32* %81, align 4, !tbaa !12 - %arrayidx5.i.i.3 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.3 - %83 = bitcast float* %arrayidx5.i.i.3 to i32* - store i32 %82, i32* %83, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.3 - -if.end.r_exit.i.i.3: ; preds = %if.then.i.i.3, %if.end.r_exit.i.i.2 - %84 = add nuw nsw i64 %_local_id_x.i.0, 4 - %exitcond.not.3 = icmp eq i64 %84, 256 - br i1 %exitcond.not.3, label %_pocl_kernel_runJacobi1D_kernel2.exit.loopexit, label %pregion_for_entry.entry.i.i, !llvm.loop !33 -} - -; Function Attrs: nofree norecurse nounwind -define void @_pocl_kernel_runJacobi1D_kernel2_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { -vector.memcheck: - %5 = bitcast i8** %0 to float** - %6 = load float*, float** %5, align 8 - %7 = getelementptr i8*, i8** %0, i64 1 - %8 = bitcast i8** %7 to float** - %9 = load float*, float** %8, align 8 - %10 = getelementptr i8*, i8** %0, i64 2 - %11 = bitcast i8** %10 to i32** - %12 = load i32*, i32** %11, align 8 - %13 = load i32, i32* %12, align 4 - %mul.i.i.i = shl i64 %2, 8 - %sub.i.i = add nsw i32 %13, -1 - %14 = trunc i64 %2 to i32 - %15 = shl i32 %14, 8 - %16 = sext i32 %15 to i64 - %scevgep = getelementptr float, float* %6, i64 %16 - %17 = add nsw i64 %16, 256 - %scevgep2 = getelementptr float, float* %6, i64 %17 - %scevgep4 = getelementptr float, float* %9, i64 %16 - %scevgep6 = getelementptr float, float* %9, i64 %17 - %bound0 = icmp ult float* %scevgep, %scevgep6 - %bound1 = icmp ult float* %scevgep4, %scevgep2 - %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %pregion_for_entry.entry.i.i.preheader, label %vector.ph - -pregion_for_entry.entry.i.i.preheader: ; preds = %vector.memcheck - br label %pregion_for_entry.entry.i.i - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert8 = insertelement <8 x i32> undef, i32 %sub.i.i, i32 0 - %broadcast.splat9 = shufflevector <8 x i32> %broadcast.splatinsert8, <8 x i32> undef, <8 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next.3, %vector.body ] - %vec.ind = phi <8 x i64> [ , %vector.ph ], [ %vec.ind.next.3, %vector.body ] - %18 = add nuw nsw <8 x i64> %vec.ind, %broadcast.splat - %19 = trunc <8 x i64> %18 to <8 x i32> - %20 = icmp sgt <8 x i32> %19, zeroinitializer - %21 = icmp sgt <8 x i32> %broadcast.splat9, %19 - %22 = and <8 x i1> %20, %21 - %23 = extractelement <8 x i64> %18, i32 0 - %24 = shl i64 %23, 32 - %25 = ashr exact i64 %24, 32 - %26 = getelementptr inbounds float, float* %9, i64 %25 - %27 = bitcast float* %26 to <8 x i32>* - %wide.masked.load = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %27, i32 4, <8 x i1> %22, <8 x i32> undef), !tbaa !12, !alias.scope !34 - %28 = getelementptr inbounds float, float* %6, i64 %25 - %29 = bitcast float* %28 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load, <8 x i32>* %29, i32 4, <8 x i1> %22), !tbaa !12, !alias.scope !37, !noalias !34, !llvm.access.group !21 - %vec.ind.next = add <8 x i64> %vec.ind, - %30 = add nuw nsw <8 x i64> %vec.ind.next, %broadcast.splat - %31 = trunc <8 x i64> %30 to <8 x i32> - %32 = icmp sgt <8 x i32> %31, zeroinitializer - %33 = icmp sgt <8 x i32> %broadcast.splat9, %31 - %34 = and <8 x i1> %32, %33 - %35 = extractelement <8 x i64> %30, i32 0 - %36 = shl i64 %35, 32 - %37 = ashr exact i64 %36, 32 - %38 = getelementptr inbounds float, float* %9, i64 %37 - %39 = bitcast float* %38 to <8 x i32>* - %wide.masked.load.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %39, i32 4, <8 x i1> %34, <8 x i32> undef), !tbaa !12, !alias.scope !34 - %40 = getelementptr inbounds float, float* %6, i64 %37 - %41 = bitcast float* %40 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load.1, <8 x i32>* %41, i32 4, <8 x i1> %34), !tbaa !12, !alias.scope !37, !noalias !34, !llvm.access.group !21 - %vec.ind.next.1 = add <8 x i64> %vec.ind, - %42 = add nuw nsw <8 x i64> %vec.ind.next.1, %broadcast.splat - %43 = trunc <8 x i64> %42 to <8 x i32> - %44 = icmp sgt <8 x i32> %43, zeroinitializer - %45 = icmp sgt <8 x i32> %broadcast.splat9, %43 - %46 = and <8 x i1> %44, %45 - %47 = extractelement <8 x i64> %42, i32 0 - %48 = shl i64 %47, 32 - %49 = ashr exact i64 %48, 32 - %50 = getelementptr inbounds float, float* %9, i64 %49 - %51 = bitcast float* %50 to <8 x i32>* - %wide.masked.load.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %51, i32 4, <8 x i1> %46, <8 x i32> undef), !tbaa !12, !alias.scope !34 - %52 = getelementptr inbounds float, float* %6, i64 %49 - %53 = bitcast float* %52 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load.2, <8 x i32>* %53, i32 4, <8 x i1> %46), !tbaa !12, !alias.scope !37, !noalias !34, !llvm.access.group !21 - %vec.ind.next.2 = add <8 x i64> %vec.ind, - %54 = add nuw nsw <8 x i64> %vec.ind.next.2, %broadcast.splat - %55 = trunc <8 x i64> %54 to <8 x i32> - %56 = icmp sgt <8 x i32> %55, zeroinitializer - %57 = icmp sgt <8 x i32> %broadcast.splat9, %55 - %58 = and <8 x i1> %56, %57 - %59 = extractelement <8 x i64> %54, i32 0 - %60 = shl i64 %59, 32 - %61 = ashr exact i64 %60, 32 - %62 = getelementptr inbounds float, float* %9, i64 %61 - %63 = bitcast float* %62 to <8 x i32>* - %wide.masked.load.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %63, i32 4, <8 x i1> %58, <8 x i32> undef), !tbaa !12, !alias.scope !34 - %64 = getelementptr inbounds float, float* %6, i64 %61 - %65 = bitcast float* %64 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load.3, <8 x i32>* %65, i32 4, <8 x i1> %58), !tbaa !12, !alias.scope !37, !noalias !34, !llvm.access.group !21 - %index.next.3 = add nuw nsw i64 %index, 32 - %vec.ind.next.3 = add <8 x i64> %vec.ind, - %66 = icmp eq i64 %index.next.3, 256 - br i1 %66, label %_pocl_kernel_runJacobi1D_kernel2.exit.loopexit11, label %vector.body, !llvm.loop !39 - -pregion_for_entry.entry.i.i: ; preds = %if.end.r_exit.i.i.3, %pregion_for_entry.entry.i.i.preheader - %_local_id_x.i.0 = phi i64 [ %82, %if.end.r_exit.i.i.3 ], [ 0, %pregion_for_entry.entry.i.i.preheader ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %conv.i.i = trunc i64 %add1.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %conv.i.i, 0 - %cmp2.i.i = icmp sgt i32 %sub.i.i, %conv.i.i - %or.cond.i.i = and i1 %cmp.i.i, %cmp2.i.i - br i1 %or.cond.i.i, label %if.then.i.i, label %if.end.r_exit.i.i - -if.then.i.i: ; preds = %pregion_for_entry.entry.i.i - %sext.i.i = shl i64 %add1.i.i.i, 32 - %idxprom.i.i = ashr exact i64 %sext.i.i, 32 - %arrayidx.i.i = getelementptr inbounds float, float* %9, i64 %idxprom.i.i - %67 = bitcast float* %arrayidx.i.i to i32* - %68 = load i32, i32* %67, align 4, !tbaa !12 - %arrayidx5.i.i = getelementptr inbounds float, float* %6, i64 %idxprom.i.i - %69 = bitcast float* %arrayidx5.i.i to i32* - store i32 %68, i32* %69, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i - -if.end.r_exit.i.i: ; preds = %if.then.i.i, %pregion_for_entry.entry.i.i - %70 = or i64 %_local_id_x.i.0, 1 - %add1.i.i.i.1 = add nuw nsw i64 %70, %mul.i.i.i - %conv.i.i.1 = trunc i64 %add1.i.i.i.1 to i32 - %cmp.i.i.1 = icmp sgt i32 %conv.i.i.1, 0 - %cmp2.i.i.1 = icmp sgt i32 %sub.i.i, %conv.i.i.1 - %or.cond.i.i.1 = and i1 %cmp.i.i.1, %cmp2.i.i.1 - br i1 %or.cond.i.i.1, label %if.then.i.i.1, label %if.end.r_exit.i.i.1 - -_pocl_kernel_runJacobi1D_kernel2.exit.loopexit: ; preds = %if.end.r_exit.i.i.3 - br label %_pocl_kernel_runJacobi1D_kernel2.exit - -_pocl_kernel_runJacobi1D_kernel2.exit.loopexit11: ; preds = %vector.body - br label %_pocl_kernel_runJacobi1D_kernel2.exit - -_pocl_kernel_runJacobi1D_kernel2.exit: ; preds = %_pocl_kernel_runJacobi1D_kernel2.exit.loopexit11, %_pocl_kernel_runJacobi1D_kernel2.exit.loopexit - ret void - -if.then.i.i.1: ; preds = %if.end.r_exit.i.i - %sext.i.i.1 = shl i64 %add1.i.i.i.1, 32 - %idxprom.i.i.1 = ashr exact i64 %sext.i.i.1, 32 - %arrayidx.i.i.1 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.1 - %71 = bitcast float* %arrayidx.i.i.1 to i32* - %72 = load i32, i32* %71, align 4, !tbaa !12 - %arrayidx5.i.i.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.1 - %73 = bitcast float* %arrayidx5.i.i.1 to i32* - store i32 %72, i32* %73, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.1 - -if.end.r_exit.i.i.1: ; preds = %if.then.i.i.1, %if.end.r_exit.i.i - %74 = or i64 %_local_id_x.i.0, 2 - %add1.i.i.i.2 = add nuw nsw i64 %74, %mul.i.i.i - %conv.i.i.2 = trunc i64 %add1.i.i.i.2 to i32 - %cmp.i.i.2 = icmp sgt i32 %conv.i.i.2, 0 - %cmp2.i.i.2 = icmp sgt i32 %sub.i.i, %conv.i.i.2 - %or.cond.i.i.2 = and i1 %cmp.i.i.2, %cmp2.i.i.2 - br i1 %or.cond.i.i.2, label %if.then.i.i.2, label %if.end.r_exit.i.i.2 - -if.then.i.i.2: ; preds = %if.end.r_exit.i.i.1 - %sext.i.i.2 = shl i64 %add1.i.i.i.2, 32 - %idxprom.i.i.2 = ashr exact i64 %sext.i.i.2, 32 - %arrayidx.i.i.2 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.2 - %75 = bitcast float* %arrayidx.i.i.2 to i32* - %76 = load i32, i32* %75, align 4, !tbaa !12 - %arrayidx5.i.i.2 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.2 - %77 = bitcast float* %arrayidx5.i.i.2 to i32* - store i32 %76, i32* %77, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.2 - -if.end.r_exit.i.i.2: ; preds = %if.then.i.i.2, %if.end.r_exit.i.i.1 - %78 = or i64 %_local_id_x.i.0, 3 - %add1.i.i.i.3 = add nuw nsw i64 %78, %mul.i.i.i - %conv.i.i.3 = trunc i64 %add1.i.i.i.3 to i32 - %cmp.i.i.3 = icmp sgt i32 %conv.i.i.3, 0 - %cmp2.i.i.3 = icmp sgt i32 %sub.i.i, %conv.i.i.3 - %or.cond.i.i.3 = and i1 %cmp.i.i.3, %cmp2.i.i.3 - br i1 %or.cond.i.i.3, label %if.then.i.i.3, label %if.end.r_exit.i.i.3 - -if.then.i.i.3: ; preds = %if.end.r_exit.i.i.2 - %sext.i.i.3 = shl i64 %add1.i.i.i.3, 32 - %idxprom.i.i.3 = ashr exact i64 %sext.i.i.3, 32 - %arrayidx.i.i.3 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.3 - %79 = bitcast float* %arrayidx.i.i.3 to i32* - %80 = load i32, i32* %79, align 4, !tbaa !12 - %arrayidx5.i.i.3 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.3 - %81 = bitcast float* %arrayidx5.i.i.3 to i32* - store i32 %80, i32* %81, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.r_exit.i.i.3 - -if.end.r_exit.i.i.3: ; preds = %if.then.i.i.3, %if.end.r_exit.i.i.2 - %82 = add nuw nsw i64 %_local_id_x.i.0, 4 - %exitcond.not.3 = icmp eq i64 %82, 256 - br i1 %exitcond.not.3, label %_pocl_kernel_runJacobi1D_kernel2.exit.loopexit, label %pregion_for_entry.entry.i.i, !llvm.loop !40 -} - -; Function Attrs: argmemonly nounwind readonly willreturn -declare <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>*, i32 immarg, <8 x i1>, <8 x i32>) #2 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32 immarg, <8 x i1>) #3 - -attributes #0 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nofree norecurse nounwind } -attributes #2 = { argmemonly nounwind readonly willreturn } -attributes #3 = { argmemonly nounwind willreturn } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 0} -!6 = !{!"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"int"} -!8 = !{!"float*", !"float*", !"int"} -!9 = !{!"", !"", !""} -!10 = !{!"A", !"B", !"n"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{!17, !18} -!18 = distinct !{!18, !"LVerDomain"} -!19 = !{!20} -!20 = distinct !{!20, !18} -!21 = !{!22} -!22 = distinct !{} -!23 = distinct !{!23, !24, !25} -!24 = !{!"llvm.loop.parallel_accesses", !22} -!25 = !{!"llvm.loop.isvectorized", i32 1} -!26 = distinct !{!26, !24, !25} -!27 = !{!28} -!28 = distinct !{!28, !29} -!29 = distinct !{!29, !"LVerDomain"} -!30 = !{!31} -!31 = distinct !{!31, !29} -!32 = distinct !{!32, !24, !25} -!33 = distinct !{!33, !24, !25} -!34 = !{!35} -!35 = distinct !{!35, !36} -!36 = distinct !{!36, !"LVerDomain"} -!37 = !{!38} -!38 = distinct !{!38, !36} -!39 = distinct !{!39, !24, !25} -!40 = distinct !{!40, !24, !25} diff --git a/pocl_irs/jacobi2D_kernel1.ll b/pocl_irs/jacobi2D_kernel1.ll deleted file mode 100644 index ac1f03e..0000000 --- a/pocl_irs/jacobi2D_kernel1.ll +++ /dev/null @@ -1,1177 +0,0 @@ -; ModuleID = './PK/NHAONNAKOLMMJBLNPENBBBCNNPFLDHLMKECDA/runJacobi2D_kernel1/32-8-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_runJacobi2D_kernel1(float* nocapture readonly %0, float* nocapture %1, i32 %2, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %3, i64 %4, i64 %5, i64 %6) local_unnamed_addr #0 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul3.i.i = shl i64 %5, 3 - %mul.i.i = shl i64 %4, 5 - %sub.i = add nsw i32 %2, -1 - %8 = trunc i64 %5 to i32 - %9 = mul i32 %8, %2 - %10 = shl i32 %9, 3 - %11 = trunc i64 %4 to i32 - %12 = shl i32 %11, 5 - %13 = add i32 %10, %12 - %14 = zext i32 %2 to i64 - %15 = add i32 %13, -8 - %16 = or i32 %15, 7 - %17 = or i32 %13, 1 - %18 = shl i32 %8, 3 - %19 = or i32 %18, 1 - %20 = mul i32 %19, %2 - %21 = add i32 %20, %12 - %22 = add i32 %18, -1 - %23 = mul i32 %22, %2 - %24 = add i32 %23, %12 - %25 = trunc i64 %5 to i32 - %26 = mul i32 %25, %2 - %27 = shl i32 %26, 3 - %28 = trunc i64 %4 to i32 - %29 = shl i32 %28, 5 - %30 = add i32 %27, %29 - %31 = zext i32 %2 to i64 - %scevgep17 = getelementptr float, float* %1, i64 32 - %32 = shl i32 %25, 3 - %33 = add i32 %32, -1 - %34 = mul i32 %33, %2 - %35 = add i32 %34, %29 - %scevgep22 = getelementptr float, float* %0, i64 32 - %36 = or i32 %32, 1 - %37 = mul i32 %36, %2 - %38 = add i32 %37, %29 - %scevgep27 = getelementptr float, float* %0, i64 32 - %39 = or i32 %30, 1 - %40 = zext i32 %39 to i64 - %scevgep32 = getelementptr float, float* %0, i64 32 - %41 = add i32 %27, %29 - %42 = add i32 %41, -8 - %43 = or i32 %42, 7 - %scevgep37 = getelementptr float, float* %0, i64 32 - %scevgep42 = getelementptr float, float* %0, i64 32 - %bound056 = icmp ugt float* %scevgep42, %1 - %bound157 = icmp ugt float* %scevgep17, %0 - %found.conflict58 = and i1 %bound056, %bound157 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert60 = insertelement <8 x i32> undef, i32 %sub.i, i32 0 - %broadcast.splat61 = shufflevector <8 x i32> %broadcast.splatinsert60, <8 x i32> undef, <8 x i32> zeroinitializer - %44 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %45 = or <8 x i32> %44, - %46 = icmp sgt <8 x i32> %45, zeroinitializer - %47 = icmp sgt <8 x i32> %broadcast.splat61, %45 - %48 = and <8 x i1> %47, %46 - %49 = extractelement <8 x i32> %45, i32 0 - %50 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %51 = or <8 x i32> %50, - %52 = icmp sgt <8 x i32> %51, zeroinitializer - %53 = icmp sgt <8 x i32> %broadcast.splat61, %51 - %54 = and <8 x i1> %53, %52 - %55 = extractelement <8 x i32> %51, i32 0 - %56 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %57 = or <8 x i32> %56, - %58 = icmp sgt <8 x i32> %57, zeroinitializer - %59 = icmp sgt <8 x i32> %broadcast.splat61, %57 - %60 = and <8 x i1> %59, %58 - %61 = extractelement <8 x i32> %57, i32 0 - %62 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %63 = or <8 x i32> %62, - %64 = icmp sgt <8 x i32> %63, zeroinitializer - %65 = icmp sgt <8 x i32> %broadcast.splat61, %63 - %66 = and <8 x i1> %65, %64 - %67 = extractelement <8 x i32> %63, i32 0 - br label %pregion_for_entry.pregion_for_init.i - -pregion_for_entry.pregion_for_init.i: ; preds = %pregion_for_end.i, %7 - %_local_id_y.0 = phi i64 [ 0, %7 ], [ %217, %pregion_for_end.i ] - %68 = mul i64 %_local_id_y.0, %31 - %69 = trunc i64 %68 to i32 - %70 = add i32 %30, %69 - %71 = sext i32 %70 to i64 - %scevgep = getelementptr float, float* %1, i64 %71 - %scevgep18 = getelementptr float, float* %scevgep17, i64 %71 - %72 = trunc i64 %68 to i32 - %73 = add i32 %35, %72 - %74 = sext i32 %73 to i64 - %scevgep20 = getelementptr float, float* %0, i64 %74 - %scevgep23 = getelementptr float, float* %scevgep22, i64 %74 - %75 = trunc i64 %68 to i32 - %76 = add i32 %38, %75 - %77 = sext i32 %76 to i64 - %scevgep25 = getelementptr float, float* %0, i64 %77 - %scevgep28 = getelementptr float, float* %scevgep27, i64 %77 - %78 = add i64 %68, %40 - %sext = shl i64 %78, 32 - %79 = ashr exact i64 %sext, 32 - %scevgep30 = getelementptr float, float* %0, i64 %79 - %scevgep33 = getelementptr float, float* %scevgep32, i64 %79 - %80 = trunc i64 %68 to i32 - %81 = add i32 %43, %80 - %82 = sext i32 %81 to i64 - %scevgep35 = getelementptr float, float* %0, i64 %82 - %scevgep38 = getelementptr float, float* %scevgep37, i64 %82 - %83 = mul i64 %_local_id_y.0, %14 - %add6.i.i = add nuw nsw i64 %_local_id_y.0, %mul3.i.i - %conv.i = trunc i64 %add6.i.i to i32 - %cmp.i = icmp sgt i32 %conv.i, 0 - %mul.i = mul nsw i32 %conv.i, %2 - %add25.i = add nuw nsw i32 %conv.i, 1 - %mul26.i = mul nsw i32 %add25.i, %2 - %sub31.i = add nsw i32 %conv.i, -1 - %mul32.i = mul nsw i32 %sub31.i, %2 - %cmp4.i = icmp sgt i32 %sub.i, %conv.i - %or.cond = and i1 %cmp.i, %cmp4.i - br i1 %or.cond, label %vector.scevcheck, label %pregion_for_end.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i - %84 = trunc i64 %83 to i32 - %85 = add i32 %24, %84 - %86 = trunc i64 %83 to i32 - %87 = add i32 %21, %86 - %88 = trunc i64 %83 to i32 - %89 = add i32 %17, %88 - %90 = trunc i64 %83 to i32 - %91 = add i32 %16, %90 - %92 = trunc i64 %83 to i32 - %93 = add i32 %13, %92 - %94 = icmp sgt i32 %93, 2147483616 - %95 = icmp sgt i32 %91, 2147483616 - %96 = or i1 %94, %95 - %97 = icmp sgt i32 %89, 2147483616 - %98 = or i1 %96, %97 - %99 = icmp sgt i32 %87, 2147483616 - %100 = or i1 %98, %99 - %101 = icmp sgt i32 %85, 2147483616 - %102 = or i1 %100, %101 - br i1 %102, label %pregion_for_entry.entry.i.us.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.us.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.us.us - -vector.memcheck: ; preds = %vector.scevcheck - %bound0 = icmp ult float* %scevgep, %scevgep23 - %bound1 = icmp ult float* %scevgep20, %scevgep18 - %found.conflict = and i1 %bound0, %bound1 - %bound045 = icmp ult float* %scevgep, %scevgep28 - %bound146 = icmp ult float* %scevgep25, %scevgep18 - %found.conflict47 = and i1 %bound045, %bound146 - %conflict.rdx = or i1 %found.conflict, %found.conflict47 - %bound048 = icmp ult float* %scevgep, %scevgep33 - %bound149 = icmp ult float* %scevgep30, %scevgep18 - %found.conflict50 = and i1 %bound048, %bound149 - %conflict.rdx51 = or i1 %conflict.rdx, %found.conflict50 - %bound052 = icmp ult float* %scevgep, %scevgep38 - %bound153 = icmp ult float* %scevgep35, %scevgep18 - %found.conflict54 = and i1 %bound052, %bound153 - %conflict.rdx55 = or i1 %conflict.rdx51, %found.conflict54 - %conflict.rdx59 = or i1 %conflict.rdx55, %found.conflict58 - br i1 %conflict.rdx59, label %pregion_for_entry.entry.i.us.us.preheader, label %vector.body - -vector.body: ; preds = %vector.memcheck - %103 = add i32 %mul.i, %49 - %104 = sext i32 %103 to i64 - %105 = getelementptr inbounds float, float* %0, i64 %104 - %106 = bitcast float* %105 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %106, i32 4, <8 x i1> %48, <8 x float> undef), !tbaa !12, !alias.scope !16 - %107 = add i32 %103, -1 - %108 = sext i32 %107 to i64 - %109 = getelementptr inbounds float, float* %0, i64 %108 - %110 = bitcast float* %109 to <8 x float>* - %wide.masked.load62 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %110, i32 4, <8 x i1> %48, <8 x float> undef), !tbaa !12, !alias.scope !19 - %111 = fadd <8 x float> %wide.masked.load, %wide.masked.load62 - %112 = add i32 %103, 1 - %113 = sext i32 %112 to i64 - %114 = getelementptr inbounds float, float* %0, i64 %113 - %115 = bitcast float* %114 to <8 x float>* - %wide.masked.load63 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %115, i32 4, <8 x i1> %48, <8 x float> undef), !tbaa !12, !alias.scope !21 - %116 = fadd <8 x float> %111, %wide.masked.load63 - %117 = add nsw i32 %mul26.i, %49 - %118 = sext i32 %117 to i64 - %119 = getelementptr inbounds float, float* %0, i64 %118 - %120 = bitcast float* %119 to <8 x float>* - %wide.masked.load64 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %120, i32 4, <8 x i1> %48, <8 x float> undef), !tbaa !12, !alias.scope !23 - %121 = fadd <8 x float> %116, %wide.masked.load64 - %122 = add nsw i32 %mul32.i, %49 - %123 = sext i32 %122 to i64 - %124 = getelementptr inbounds float, float* %0, i64 %123 - %125 = bitcast float* %124 to <8 x float>* - %wide.masked.load65 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %125, i32 4, <8 x i1> %48, <8 x float> undef), !tbaa !12, !alias.scope !25 - %126 = fadd <8 x float> %121, %wide.masked.load65 - %127 = fmul <8 x float> %126, - %128 = getelementptr inbounds float, float* %1, i64 %104 - %129 = bitcast float* %128 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %127, <8 x float>* %129, i32 4, <8 x i1> %48), !tbaa !12, !alias.scope !27, !noalias !29, !llvm.access.group !30 - %130 = add i32 %mul.i, %55 - %131 = sext i32 %130 to i64 - %132 = getelementptr inbounds float, float* %0, i64 %131 - %133 = bitcast float* %132 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %133, i32 4, <8 x i1> %54, <8 x float> undef), !tbaa !12, !alias.scope !16 - %134 = add i32 %130, -1 - %135 = sext i32 %134 to i64 - %136 = getelementptr inbounds float, float* %0, i64 %135 - %137 = bitcast float* %136 to <8 x float>* - %wide.masked.load62.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %137, i32 4, <8 x i1> %54, <8 x float> undef), !tbaa !12, !alias.scope !19 - %138 = fadd <8 x float> %wide.masked.load.1, %wide.masked.load62.1 - %139 = add i32 %130, 1 - %140 = sext i32 %139 to i64 - %141 = getelementptr inbounds float, float* %0, i64 %140 - %142 = bitcast float* %141 to <8 x float>* - %wide.masked.load63.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %142, i32 4, <8 x i1> %54, <8 x float> undef), !tbaa !12, !alias.scope !21 - %143 = fadd <8 x float> %138, %wide.masked.load63.1 - %144 = add nsw i32 %mul26.i, %55 - %145 = sext i32 %144 to i64 - %146 = getelementptr inbounds float, float* %0, i64 %145 - %147 = bitcast float* %146 to <8 x float>* - %wide.masked.load64.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %147, i32 4, <8 x i1> %54, <8 x float> undef), !tbaa !12, !alias.scope !23 - %148 = fadd <8 x float> %143, %wide.masked.load64.1 - %149 = add nsw i32 %mul32.i, %55 - %150 = sext i32 %149 to i64 - %151 = getelementptr inbounds float, float* %0, i64 %150 - %152 = bitcast float* %151 to <8 x float>* - %wide.masked.load65.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %152, i32 4, <8 x i1> %54, <8 x float> undef), !tbaa !12, !alias.scope !25 - %153 = fadd <8 x float> %148, %wide.masked.load65.1 - %154 = fmul <8 x float> %153, - %155 = getelementptr inbounds float, float* %1, i64 %131 - %156 = bitcast float* %155 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %154, <8 x float>* %156, i32 4, <8 x i1> %54), !tbaa !12, !alias.scope !27, !noalias !29, !llvm.access.group !30 - %157 = add i32 %mul.i, %61 - %158 = sext i32 %157 to i64 - %159 = getelementptr inbounds float, float* %0, i64 %158 - %160 = bitcast float* %159 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %160, i32 4, <8 x i1> %60, <8 x float> undef), !tbaa !12, !alias.scope !16 - %161 = add i32 %157, -1 - %162 = sext i32 %161 to i64 - %163 = getelementptr inbounds float, float* %0, i64 %162 - %164 = bitcast float* %163 to <8 x float>* - %wide.masked.load62.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %164, i32 4, <8 x i1> %60, <8 x float> undef), !tbaa !12, !alias.scope !19 - %165 = fadd <8 x float> %wide.masked.load.2, %wide.masked.load62.2 - %166 = add i32 %157, 1 - %167 = sext i32 %166 to i64 - %168 = getelementptr inbounds float, float* %0, i64 %167 - %169 = bitcast float* %168 to <8 x float>* - %wide.masked.load63.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %169, i32 4, <8 x i1> %60, <8 x float> undef), !tbaa !12, !alias.scope !21 - %170 = fadd <8 x float> %165, %wide.masked.load63.2 - %171 = add nsw i32 %mul26.i, %61 - %172 = sext i32 %171 to i64 - %173 = getelementptr inbounds float, float* %0, i64 %172 - %174 = bitcast float* %173 to <8 x float>* - %wide.masked.load64.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %174, i32 4, <8 x i1> %60, <8 x float> undef), !tbaa !12, !alias.scope !23 - %175 = fadd <8 x float> %170, %wide.masked.load64.2 - %176 = add nsw i32 %mul32.i, %61 - %177 = sext i32 %176 to i64 - %178 = getelementptr inbounds float, float* %0, i64 %177 - %179 = bitcast float* %178 to <8 x float>* - %wide.masked.load65.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %179, i32 4, <8 x i1> %60, <8 x float> undef), !tbaa !12, !alias.scope !25 - %180 = fadd <8 x float> %175, %wide.masked.load65.2 - %181 = fmul <8 x float> %180, - %182 = getelementptr inbounds float, float* %1, i64 %158 - %183 = bitcast float* %182 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %181, <8 x float>* %183, i32 4, <8 x i1> %60), !tbaa !12, !alias.scope !27, !noalias !29, !llvm.access.group !30 - %184 = add i32 %mul.i, %67 - %185 = sext i32 %184 to i64 - %186 = getelementptr inbounds float, float* %0, i64 %185 - %187 = bitcast float* %186 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %187, i32 4, <8 x i1> %66, <8 x float> undef), !tbaa !12, !alias.scope !16 - %188 = add i32 %184, -1 - %189 = sext i32 %188 to i64 - %190 = getelementptr inbounds float, float* %0, i64 %189 - %191 = bitcast float* %190 to <8 x float>* - %wide.masked.load62.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %191, i32 4, <8 x i1> %66, <8 x float> undef), !tbaa !12, !alias.scope !19 - %192 = fadd <8 x float> %wide.masked.load.3, %wide.masked.load62.3 - %193 = add i32 %184, 1 - %194 = sext i32 %193 to i64 - %195 = getelementptr inbounds float, float* %0, i64 %194 - %196 = bitcast float* %195 to <8 x float>* - %wide.masked.load63.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %196, i32 4, <8 x i1> %66, <8 x float> undef), !tbaa !12, !alias.scope !21 - %197 = fadd <8 x float> %192, %wide.masked.load63.3 - %198 = add nsw i32 %mul26.i, %67 - %199 = sext i32 %198 to i64 - %200 = getelementptr inbounds float, float* %0, i64 %199 - %201 = bitcast float* %200 to <8 x float>* - %wide.masked.load64.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %201, i32 4, <8 x i1> %66, <8 x float> undef), !tbaa !12, !alias.scope !23 - %202 = fadd <8 x float> %197, %wide.masked.load64.3 - %203 = add nsw i32 %mul32.i, %67 - %204 = sext i32 %203 to i64 - %205 = getelementptr inbounds float, float* %0, i64 %204 - %206 = bitcast float* %205 to <8 x float>* - %wide.masked.load65.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %206, i32 4, <8 x i1> %66, <8 x float> undef), !tbaa !12, !alias.scope !25 - %207 = fadd <8 x float> %202, %wide.masked.load65.3 - %208 = fmul <8 x float> %207, - %209 = getelementptr inbounds float, float* %1, i64 %185 - %210 = bitcast float* %209 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %208, <8 x float>* %210, i32 4, <8 x i1> %66), !tbaa !12, !alias.scope !27, !noalias !29, !llvm.access.group !30 - br label %pregion_for_end.i - -pregion_for_entry.entry.i.us.us: ; preds = %if.end.i.us.us, %pregion_for_entry.entry.i.us.us.preheader - %_local_id_x.0.us.us = phi i64 [ %216, %if.end.i.us.us ], [ 0, %pregion_for_entry.entry.i.us.us.preheader ] - %add1.i.i.us.us = add nuw nsw i64 %_local_id_x.0.us.us, %mul.i.i - %conv2.i.us.us = trunc i64 %add1.i.i.us.us to i32 - %cmp7.i.us.us = icmp sgt i32 %conv2.i.us.us, 0 - %cmp11.i.us.us = icmp sgt i32 %sub.i, %conv2.i.us.us - %or.cond69.i.us.us = and i1 %cmp11.i.us.us, %cmp7.i.us.us - br i1 %or.cond69.i.us.us, label %if.then.i.us.us, label %if.end.i.us.us - -if.then.i.us.us: ; preds = %pregion_for_entry.entry.i.us.us - %add.i.us.us = add i32 %mul.i, %conv2.i.us.us - %idxprom.i.us.us = sext i32 %add.i.us.us to i64 - %arrayidx.i.us.us = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us - %211 = load float, float* %arrayidx.i.us.us, align 4, !tbaa !12 - %add15.i.us.us = add i32 %add.i.us.us, -1 - %idxprom16.i.us.us = sext i32 %add15.i.us.us to i64 - %arrayidx17.i.us.us = getelementptr inbounds float, float* %0, i64 %idxprom16.i.us.us - %212 = load float, float* %arrayidx17.i.us.us, align 4, !tbaa !12 - %add18.i.us.us = fadd float %211, %212 - %add21.i.us.us = add i32 %add.i.us.us, 1 - %idxprom22.i.us.us = sext i32 %add21.i.us.us to i64 - %arrayidx23.i.us.us = getelementptr inbounds float, float* %0, i64 %idxprom22.i.us.us - %213 = load float, float* %arrayidx23.i.us.us, align 4, !tbaa !12 - %add24.i.us.us = fadd float %add18.i.us.us, %213 - %add27.i.us.us = add nsw i32 %mul26.i, %conv2.i.us.us - %idxprom28.i.us.us = sext i32 %add27.i.us.us to i64 - %arrayidx29.i.us.us = getelementptr inbounds float, float* %0, i64 %idxprom28.i.us.us - %214 = load float, float* %arrayidx29.i.us.us, align 4, !tbaa !12 - %add30.i.us.us = fadd float %add24.i.us.us, %214 - %add33.i.us.us = add nsw i32 %mul32.i, %conv2.i.us.us - %idxprom34.i.us.us = sext i32 %add33.i.us.us to i64 - %arrayidx35.i.us.us = getelementptr inbounds float, float* %0, i64 %idxprom34.i.us.us - %215 = load float, float* %arrayidx35.i.us.us, align 4, !tbaa !12 - %add36.i.us.us = fadd float %add30.i.us.us, %215 - %mul37.i.us.us = fmul float %add36.i.us.us, 0x3FC99999A0000000 - %arrayidx41.i.us.us = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us - store float %mul37.i.us.us, float* %arrayidx41.i.us.us, align 4, !tbaa !12, !llvm.access.group !30 - br label %if.end.i.us.us - -if.end.i.us.us: ; preds = %if.then.i.us.us, %pregion_for_entry.entry.i.us.us - %216 = add nuw nsw i64 %_local_id_x.0.us.us, 1 - %exitcond.not = icmp eq i64 %216, 32 - br i1 %exitcond.not, label %pregion_for_end.i.loopexit, label %pregion_for_entry.entry.i.us.us, !llvm.loop !33 - -pregion_for_end.i.loopexit: ; preds = %if.end.i.us.us - br label %pregion_for_end.i - -pregion_for_end.i: ; preds = %pregion_for_end.i.loopexit, %vector.body, %pregion_for_entry.pregion_for_init.i - %217 = add nuw nsw i64 %_local_id_y.0, 1 - %exitcond3.not = icmp eq i64 %217, 8 - br i1 %exitcond3.not, label %runJacobi2D_kernel1.exit, label %pregion_for_entry.pregion_for_init.i, !llvm.loop !36 - -runJacobi2D_kernel1.exit: ; preds = %pregion_for_end.i - ret void -} - -; Function Attrs: nofree norecurse nounwind -define void @_pocl_kernel_runJacobi2D_kernel1_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to i32** - %15 = load i32*, i32** %14, align 8 - %16 = load i32, i32* %15, align 4 - %mul3.i.i.i = shl i64 %3, 3 - %mul.i.i.i = shl i64 %2, 5 - %sub.i.i = add nsw i32 %16, -1 - %17 = trunc i64 %3 to i32 - %18 = mul i32 %16, %17 - %19 = shl i32 %18, 3 - %20 = trunc i64 %2 to i32 - %21 = shl i32 %20, 5 - %22 = add i32 %19, %21 - %23 = zext i32 %16 to i64 - %24 = add i32 %22, -8 - %25 = or i32 %24, 7 - %26 = or i32 %22, 1 - %27 = shl i32 %17, 3 - %28 = or i32 %27, 1 - %29 = mul i32 %16, %28 - %30 = add i32 %29, %21 - %31 = add i32 %27, -1 - %32 = mul i32 %16, %31 - %33 = add i32 %32, %21 - %34 = trunc i64 %3 to i32 - %35 = mul i32 %16, %34 - %36 = shl i32 %35, 3 - %37 = trunc i64 %2 to i32 - %38 = shl i32 %37, 5 - %39 = add i32 %36, %38 - %40 = zext i32 %16 to i64 - %scevgep17 = getelementptr float, float* %12, i64 32 - %41 = shl i32 %34, 3 - %42 = add i32 %41, -1 - %43 = mul i32 %16, %42 - %44 = add i32 %43, %38 - %scevgep22 = getelementptr float, float* %8, i64 32 - %45 = or i32 %41, 1 - %46 = mul i32 %16, %45 - %47 = add i32 %46, %38 - %scevgep27 = getelementptr float, float* %8, i64 32 - %48 = or i32 %39, 1 - %49 = zext i32 %48 to i64 - %scevgep32 = getelementptr float, float* %8, i64 32 - %50 = add i32 %36, %38 - %51 = add i32 %50, -8 - %52 = or i32 %51, 7 - %scevgep37 = getelementptr float, float* %8, i64 32 - %scevgep42 = getelementptr float, float* %8, i64 32 - %bound056 = icmp ult float* %12, %scevgep42 - %bound157 = icmp ult float* %8, %scevgep17 - %found.conflict58 = and i1 %bound056, %bound157 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert60 = insertelement <8 x i32> undef, i32 %sub.i.i, i32 0 - %broadcast.splat61 = shufflevector <8 x i32> %broadcast.splatinsert60, <8 x i32> undef, <8 x i32> zeroinitializer - %53 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %54 = or <8 x i32> %53, - %55 = icmp sgt <8 x i32> %54, zeroinitializer - %56 = icmp sgt <8 x i32> %broadcast.splat61, %54 - %57 = and <8 x i1> %56, %55 - %58 = extractelement <8 x i32> %54, i32 0 - %59 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %60 = or <8 x i32> %59, - %61 = icmp sgt <8 x i32> %60, zeroinitializer - %62 = icmp sgt <8 x i32> %broadcast.splat61, %60 - %63 = and <8 x i1> %62, %61 - %64 = extractelement <8 x i32> %60, i32 0 - %65 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %66 = or <8 x i32> %65, - %67 = icmp sgt <8 x i32> %66, zeroinitializer - %68 = icmp sgt <8 x i32> %broadcast.splat61, %66 - %69 = and <8 x i1> %68, %67 - %70 = extractelement <8 x i32> %66, i32 0 - %71 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %72 = or <8 x i32> %71, - %73 = icmp sgt <8 x i32> %72, zeroinitializer - %74 = icmp sgt <8 x i32> %broadcast.splat61, %72 - %75 = and <8 x i1> %74, %73 - %76 = extractelement <8 x i32> %72, i32 0 - br label %pregion_for_entry.pregion_for_init.i.i - -pregion_for_entry.pregion_for_init.i.i: ; preds = %pregion_for_end.i.i, %5 - %_local_id_y.i.0 = phi i64 [ 0, %5 ], [ %226, %pregion_for_end.i.i ] - %77 = mul i64 %_local_id_y.i.0, %40 - %78 = trunc i64 %77 to i32 - %79 = add i32 %39, %78 - %80 = sext i32 %79 to i64 - %scevgep = getelementptr float, float* %12, i64 %80 - %scevgep18 = getelementptr float, float* %scevgep17, i64 %80 - %81 = trunc i64 %77 to i32 - %82 = add i32 %44, %81 - %83 = sext i32 %82 to i64 - %scevgep20 = getelementptr float, float* %8, i64 %83 - %scevgep23 = getelementptr float, float* %scevgep22, i64 %83 - %84 = trunc i64 %77 to i32 - %85 = add i32 %47, %84 - %86 = sext i32 %85 to i64 - %scevgep25 = getelementptr float, float* %8, i64 %86 - %scevgep28 = getelementptr float, float* %scevgep27, i64 %86 - %87 = add i64 %77, %49 - %sext = shl i64 %87, 32 - %88 = ashr exact i64 %sext, 32 - %scevgep30 = getelementptr float, float* %8, i64 %88 - %scevgep33 = getelementptr float, float* %scevgep32, i64 %88 - %89 = trunc i64 %77 to i32 - %90 = add i32 %52, %89 - %91 = sext i32 %90 to i64 - %scevgep35 = getelementptr float, float* %8, i64 %91 - %scevgep38 = getelementptr float, float* %scevgep37, i64 %91 - %92 = mul i64 %_local_id_y.i.0, %23 - %add6.i.i.i = add nuw nsw i64 %_local_id_y.i.0, %mul3.i.i.i - %conv.i.i = trunc i64 %add6.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %conv.i.i, 0 - %mul.i.i = mul nsw i32 %16, %conv.i.i - %add25.i.i = add nuw nsw i32 %conv.i.i, 1 - %mul26.i.i = mul nsw i32 %add25.i.i, %16 - %sub31.i.i = add nsw i32 %conv.i.i, -1 - %mul32.i.i = mul nsw i32 %sub31.i.i, %16 - %cmp4.i.i = icmp sgt i32 %sub.i.i, %conv.i.i - %or.cond = and i1 %cmp.i.i, %cmp4.i.i - br i1 %or.cond, label %vector.scevcheck, label %pregion_for_end.i.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.i - %93 = trunc i64 %92 to i32 - %94 = add i32 %33, %93 - %95 = trunc i64 %92 to i32 - %96 = add i32 %30, %95 - %97 = trunc i64 %92 to i32 - %98 = add i32 %26, %97 - %99 = trunc i64 %92 to i32 - %100 = add i32 %25, %99 - %101 = trunc i64 %92 to i32 - %102 = add i32 %22, %101 - %103 = icmp sgt i32 %102, 2147483616 - %104 = icmp sgt i32 %100, 2147483616 - %105 = or i1 %103, %104 - %106 = icmp sgt i32 %98, 2147483616 - %107 = or i1 %105, %106 - %108 = icmp sgt i32 %96, 2147483616 - %109 = or i1 %107, %108 - %110 = icmp sgt i32 %94, 2147483616 - %111 = or i1 %109, %110 - br i1 %111, label %pregion_for_entry.entry.i.i.us.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.us.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i.us.us - -vector.memcheck: ; preds = %vector.scevcheck - %bound0 = icmp ult float* %scevgep, %scevgep23 - %bound1 = icmp ult float* %scevgep20, %scevgep18 - %found.conflict = and i1 %bound0, %bound1 - %bound045 = icmp ult float* %scevgep, %scevgep28 - %bound146 = icmp ult float* %scevgep25, %scevgep18 - %found.conflict47 = and i1 %bound045, %bound146 - %conflict.rdx = or i1 %found.conflict, %found.conflict47 - %bound048 = icmp ult float* %scevgep, %scevgep33 - %bound149 = icmp ult float* %scevgep30, %scevgep18 - %found.conflict50 = and i1 %bound048, %bound149 - %conflict.rdx51 = or i1 %conflict.rdx, %found.conflict50 - %bound052 = icmp ult float* %scevgep, %scevgep38 - %bound153 = icmp ult float* %scevgep35, %scevgep18 - %found.conflict54 = and i1 %bound052, %bound153 - %conflict.rdx55 = or i1 %conflict.rdx51, %found.conflict54 - %conflict.rdx59 = or i1 %conflict.rdx55, %found.conflict58 - br i1 %conflict.rdx59, label %pregion_for_entry.entry.i.i.us.us.preheader, label %vector.body - -vector.body: ; preds = %vector.memcheck - %112 = add i32 %mul.i.i, %58 - %113 = sext i32 %112 to i64 - %114 = getelementptr inbounds float, float* %8, i64 %113 - %115 = bitcast float* %114 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %115, i32 4, <8 x i1> %57, <8 x float> undef), !tbaa !12, !alias.scope !38 - %116 = add i32 %112, -1 - %117 = sext i32 %116 to i64 - %118 = getelementptr inbounds float, float* %8, i64 %117 - %119 = bitcast float* %118 to <8 x float>* - %wide.masked.load62 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %119, i32 4, <8 x i1> %57, <8 x float> undef), !tbaa !12, !alias.scope !41 - %120 = fadd <8 x float> %wide.masked.load, %wide.masked.load62 - %121 = add i32 %112, 1 - %122 = sext i32 %121 to i64 - %123 = getelementptr inbounds float, float* %8, i64 %122 - %124 = bitcast float* %123 to <8 x float>* - %wide.masked.load63 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %124, i32 4, <8 x i1> %57, <8 x float> undef), !tbaa !12, !alias.scope !43 - %125 = fadd <8 x float> %120, %wide.masked.load63 - %126 = add nsw i32 %mul26.i.i, %58 - %127 = sext i32 %126 to i64 - %128 = getelementptr inbounds float, float* %8, i64 %127 - %129 = bitcast float* %128 to <8 x float>* - %wide.masked.load64 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %129, i32 4, <8 x i1> %57, <8 x float> undef), !tbaa !12, !alias.scope !45 - %130 = fadd <8 x float> %125, %wide.masked.load64 - %131 = add nsw i32 %mul32.i.i, %58 - %132 = sext i32 %131 to i64 - %133 = getelementptr inbounds float, float* %8, i64 %132 - %134 = bitcast float* %133 to <8 x float>* - %wide.masked.load65 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %134, i32 4, <8 x i1> %57, <8 x float> undef), !tbaa !12, !alias.scope !47 - %135 = fadd <8 x float> %130, %wide.masked.load65 - %136 = fmul <8 x float> %135, - %137 = getelementptr inbounds float, float* %12, i64 %113 - %138 = bitcast float* %137 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %136, <8 x float>* %138, i32 4, <8 x i1> %57), !tbaa !12, !alias.scope !49, !noalias !51, !llvm.access.group !30 - %139 = add i32 %mul.i.i, %64 - %140 = sext i32 %139 to i64 - %141 = getelementptr inbounds float, float* %8, i64 %140 - %142 = bitcast float* %141 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %142, i32 4, <8 x i1> %63, <8 x float> undef), !tbaa !12, !alias.scope !38 - %143 = add i32 %139, -1 - %144 = sext i32 %143 to i64 - %145 = getelementptr inbounds float, float* %8, i64 %144 - %146 = bitcast float* %145 to <8 x float>* - %wide.masked.load62.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %146, i32 4, <8 x i1> %63, <8 x float> undef), !tbaa !12, !alias.scope !41 - %147 = fadd <8 x float> %wide.masked.load.1, %wide.masked.load62.1 - %148 = add i32 %139, 1 - %149 = sext i32 %148 to i64 - %150 = getelementptr inbounds float, float* %8, i64 %149 - %151 = bitcast float* %150 to <8 x float>* - %wide.masked.load63.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %151, i32 4, <8 x i1> %63, <8 x float> undef), !tbaa !12, !alias.scope !43 - %152 = fadd <8 x float> %147, %wide.masked.load63.1 - %153 = add nsw i32 %mul26.i.i, %64 - %154 = sext i32 %153 to i64 - %155 = getelementptr inbounds float, float* %8, i64 %154 - %156 = bitcast float* %155 to <8 x float>* - %wide.masked.load64.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %156, i32 4, <8 x i1> %63, <8 x float> undef), !tbaa !12, !alias.scope !45 - %157 = fadd <8 x float> %152, %wide.masked.load64.1 - %158 = add nsw i32 %mul32.i.i, %64 - %159 = sext i32 %158 to i64 - %160 = getelementptr inbounds float, float* %8, i64 %159 - %161 = bitcast float* %160 to <8 x float>* - %wide.masked.load65.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %161, i32 4, <8 x i1> %63, <8 x float> undef), !tbaa !12, !alias.scope !47 - %162 = fadd <8 x float> %157, %wide.masked.load65.1 - %163 = fmul <8 x float> %162, - %164 = getelementptr inbounds float, float* %12, i64 %140 - %165 = bitcast float* %164 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %163, <8 x float>* %165, i32 4, <8 x i1> %63), !tbaa !12, !alias.scope !49, !noalias !51, !llvm.access.group !30 - %166 = add i32 %mul.i.i, %70 - %167 = sext i32 %166 to i64 - %168 = getelementptr inbounds float, float* %8, i64 %167 - %169 = bitcast float* %168 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %169, i32 4, <8 x i1> %69, <8 x float> undef), !tbaa !12, !alias.scope !38 - %170 = add i32 %166, -1 - %171 = sext i32 %170 to i64 - %172 = getelementptr inbounds float, float* %8, i64 %171 - %173 = bitcast float* %172 to <8 x float>* - %wide.masked.load62.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %173, i32 4, <8 x i1> %69, <8 x float> undef), !tbaa !12, !alias.scope !41 - %174 = fadd <8 x float> %wide.masked.load.2, %wide.masked.load62.2 - %175 = add i32 %166, 1 - %176 = sext i32 %175 to i64 - %177 = getelementptr inbounds float, float* %8, i64 %176 - %178 = bitcast float* %177 to <8 x float>* - %wide.masked.load63.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %178, i32 4, <8 x i1> %69, <8 x float> undef), !tbaa !12, !alias.scope !43 - %179 = fadd <8 x float> %174, %wide.masked.load63.2 - %180 = add nsw i32 %mul26.i.i, %70 - %181 = sext i32 %180 to i64 - %182 = getelementptr inbounds float, float* %8, i64 %181 - %183 = bitcast float* %182 to <8 x float>* - %wide.masked.load64.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %183, i32 4, <8 x i1> %69, <8 x float> undef), !tbaa !12, !alias.scope !45 - %184 = fadd <8 x float> %179, %wide.masked.load64.2 - %185 = add nsw i32 %mul32.i.i, %70 - %186 = sext i32 %185 to i64 - %187 = getelementptr inbounds float, float* %8, i64 %186 - %188 = bitcast float* %187 to <8 x float>* - %wide.masked.load65.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %188, i32 4, <8 x i1> %69, <8 x float> undef), !tbaa !12, !alias.scope !47 - %189 = fadd <8 x float> %184, %wide.masked.load65.2 - %190 = fmul <8 x float> %189, - %191 = getelementptr inbounds float, float* %12, i64 %167 - %192 = bitcast float* %191 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %190, <8 x float>* %192, i32 4, <8 x i1> %69), !tbaa !12, !alias.scope !49, !noalias !51, !llvm.access.group !30 - %193 = add i32 %mul.i.i, %76 - %194 = sext i32 %193 to i64 - %195 = getelementptr inbounds float, float* %8, i64 %194 - %196 = bitcast float* %195 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %196, i32 4, <8 x i1> %75, <8 x float> undef), !tbaa !12, !alias.scope !38 - %197 = add i32 %193, -1 - %198 = sext i32 %197 to i64 - %199 = getelementptr inbounds float, float* %8, i64 %198 - %200 = bitcast float* %199 to <8 x float>* - %wide.masked.load62.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %200, i32 4, <8 x i1> %75, <8 x float> undef), !tbaa !12, !alias.scope !41 - %201 = fadd <8 x float> %wide.masked.load.3, %wide.masked.load62.3 - %202 = add i32 %193, 1 - %203 = sext i32 %202 to i64 - %204 = getelementptr inbounds float, float* %8, i64 %203 - %205 = bitcast float* %204 to <8 x float>* - %wide.masked.load63.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %205, i32 4, <8 x i1> %75, <8 x float> undef), !tbaa !12, !alias.scope !43 - %206 = fadd <8 x float> %201, %wide.masked.load63.3 - %207 = add nsw i32 %mul26.i.i, %76 - %208 = sext i32 %207 to i64 - %209 = getelementptr inbounds float, float* %8, i64 %208 - %210 = bitcast float* %209 to <8 x float>* - %wide.masked.load64.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %210, i32 4, <8 x i1> %75, <8 x float> undef), !tbaa !12, !alias.scope !45 - %211 = fadd <8 x float> %206, %wide.masked.load64.3 - %212 = add nsw i32 %mul32.i.i, %76 - %213 = sext i32 %212 to i64 - %214 = getelementptr inbounds float, float* %8, i64 %213 - %215 = bitcast float* %214 to <8 x float>* - %wide.masked.load65.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %215, i32 4, <8 x i1> %75, <8 x float> undef), !tbaa !12, !alias.scope !47 - %216 = fadd <8 x float> %211, %wide.masked.load65.3 - %217 = fmul <8 x float> %216, - %218 = getelementptr inbounds float, float* %12, i64 %194 - %219 = bitcast float* %218 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %217, <8 x float>* %219, i32 4, <8 x i1> %75), !tbaa !12, !alias.scope !49, !noalias !51, !llvm.access.group !30 - br label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us.us: ; preds = %if.end.i.i.us.us, %pregion_for_entry.entry.i.i.us.us.preheader - %_local_id_x.i.0.us.us = phi i64 [ %225, %if.end.i.i.us.us ], [ 0, %pregion_for_entry.entry.i.i.us.us.preheader ] - %add1.i.i.i.us.us = add nuw nsw i64 %_local_id_x.i.0.us.us, %mul.i.i.i - %conv2.i.i.us.us = trunc i64 %add1.i.i.i.us.us to i32 - %cmp7.i.i.us.us = icmp sgt i32 %conv2.i.i.us.us, 0 - %cmp11.i.i.us.us = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us - %or.cond69.i.i.us.us = and i1 %cmp11.i.i.us.us, %cmp7.i.i.us.us - br i1 %or.cond69.i.i.us.us, label %if.then.i.i.us.us, label %if.end.i.i.us.us - -if.then.i.i.us.us: ; preds = %pregion_for_entry.entry.i.i.us.us - %add.i.i.us.us = add i32 %mul.i.i, %conv2.i.i.us.us - %idxprom.i.i.us.us = sext i32 %add.i.i.us.us to i64 - %arrayidx.i.i.us.us = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.us.us - %220 = load float, float* %arrayidx.i.i.us.us, align 4, !tbaa !12 - %add15.i.i.us.us = add i32 %add.i.i.us.us, -1 - %idxprom16.i.i.us.us = sext i32 %add15.i.i.us.us to i64 - %arrayidx17.i.i.us.us = getelementptr inbounds float, float* %8, i64 %idxprom16.i.i.us.us - %221 = load float, float* %arrayidx17.i.i.us.us, align 4, !tbaa !12 - %add18.i.i.us.us = fadd float %220, %221 - %add21.i.i.us.us = add i32 %add.i.i.us.us, 1 - %idxprom22.i.i.us.us = sext i32 %add21.i.i.us.us to i64 - %arrayidx23.i.i.us.us = getelementptr inbounds float, float* %8, i64 %idxprom22.i.i.us.us - %222 = load float, float* %arrayidx23.i.i.us.us, align 4, !tbaa !12 - %add24.i.i.us.us = fadd float %add18.i.i.us.us, %222 - %add27.i.i.us.us = add nsw i32 %mul26.i.i, %conv2.i.i.us.us - %idxprom28.i.i.us.us = sext i32 %add27.i.i.us.us to i64 - %arrayidx29.i.i.us.us = getelementptr inbounds float, float* %8, i64 %idxprom28.i.i.us.us - %223 = load float, float* %arrayidx29.i.i.us.us, align 4, !tbaa !12 - %add30.i.i.us.us = fadd float %add24.i.i.us.us, %223 - %add33.i.i.us.us = add nsw i32 %mul32.i.i, %conv2.i.i.us.us - %idxprom34.i.i.us.us = sext i32 %add33.i.i.us.us to i64 - %arrayidx35.i.i.us.us = getelementptr inbounds float, float* %8, i64 %idxprom34.i.i.us.us - %224 = load float, float* %arrayidx35.i.i.us.us, align 4, !tbaa !12 - %add36.i.i.us.us = fadd float %add30.i.i.us.us, %224 - %mul37.i.i.us.us = fmul float %add36.i.i.us.us, 0x3FC99999A0000000 - %arrayidx41.i.i.us.us = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.us - store float %mul37.i.i.us.us, float* %arrayidx41.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !30 - br label %if.end.i.i.us.us - -if.end.i.i.us.us: ; preds = %if.then.i.i.us.us, %pregion_for_entry.entry.i.i.us.us - %225 = add nuw nsw i64 %_local_id_x.i.0.us.us, 1 - %exitcond.not = icmp eq i64 %225, 32 - br i1 %exitcond.not, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us.us, !llvm.loop !52 - -pregion_for_end.i.i.loopexit: ; preds = %if.end.i.i.us.us - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %vector.body, %pregion_for_entry.pregion_for_init.i.i - %226 = add nuw nsw i64 %_local_id_y.i.0, 1 - %exitcond3.not = icmp eq i64 %226, 8 - br i1 %exitcond3.not, label %_pocl_kernel_runJacobi2D_kernel1.exit, label %pregion_for_entry.pregion_for_init.i.i, !llvm.loop !36 - -_pocl_kernel_runJacobi2D_kernel1.exit: ; preds = %pregion_for_end.i.i - ret void -} - -; Function Attrs: nofree norecurse nounwind -define void @_pocl_kernel_runJacobi2D_kernel1_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to i32** - %13 = load i32*, i32** %12, align 8 - %14 = load i32, i32* %13, align 4 - %mul3.i.i.i = shl i64 %3, 3 - %mul.i.i.i = shl i64 %2, 5 - %sub.i.i = add nsw i32 %14, -1 - %15 = trunc i64 %3 to i32 - %16 = mul i32 %14, %15 - %17 = shl i32 %16, 3 - %18 = trunc i64 %2 to i32 - %19 = shl i32 %18, 5 - %20 = add i32 %17, %19 - %21 = zext i32 %14 to i64 - %22 = add i32 %20, -8 - %23 = or i32 %22, 7 - %24 = or i32 %20, 1 - %25 = shl i32 %15, 3 - %26 = or i32 %25, 1 - %27 = mul i32 %14, %26 - %28 = add i32 %27, %19 - %29 = add i32 %25, -1 - %30 = mul i32 %14, %29 - %31 = add i32 %30, %19 - %32 = trunc i64 %3 to i32 - %33 = mul i32 %14, %32 - %34 = shl i32 %33, 3 - %35 = trunc i64 %2 to i32 - %36 = shl i32 %35, 5 - %37 = add i32 %34, %36 - %38 = zext i32 %14 to i64 - %scevgep17 = getelementptr float, float* %10, i64 32 - %39 = shl i32 %32, 3 - %40 = add i32 %39, -1 - %41 = mul i32 %14, %40 - %42 = add i32 %41, %36 - %scevgep22 = getelementptr float, float* %7, i64 32 - %43 = or i32 %39, 1 - %44 = mul i32 %14, %43 - %45 = add i32 %44, %36 - %scevgep27 = getelementptr float, float* %7, i64 32 - %46 = or i32 %37, 1 - %47 = zext i32 %46 to i64 - %scevgep32 = getelementptr float, float* %7, i64 32 - %48 = add i32 %34, %36 - %49 = add i32 %48, -8 - %50 = or i32 %49, 7 - %scevgep37 = getelementptr float, float* %7, i64 32 - %scevgep42 = getelementptr float, float* %7, i64 32 - %bound056 = icmp ult float* %10, %scevgep42 - %bound157 = icmp ult float* %7, %scevgep17 - %found.conflict58 = and i1 %bound056, %bound157 - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert60 = insertelement <8 x i32> undef, i32 %sub.i.i, i32 0 - %broadcast.splat61 = shufflevector <8 x i32> %broadcast.splatinsert60, <8 x i32> undef, <8 x i32> zeroinitializer - %51 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %52 = or <8 x i32> %51, - %53 = icmp sgt <8 x i32> %52, zeroinitializer - %54 = icmp sgt <8 x i32> %broadcast.splat61, %52 - %55 = and <8 x i1> %54, %53 - %56 = extractelement <8 x i32> %52, i32 0 - %57 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %58 = or <8 x i32> %57, - %59 = icmp sgt <8 x i32> %58, zeroinitializer - %60 = icmp sgt <8 x i32> %broadcast.splat61, %58 - %61 = and <8 x i1> %60, %59 - %62 = extractelement <8 x i32> %58, i32 0 - %63 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %64 = or <8 x i32> %63, - %65 = icmp sgt <8 x i32> %64, zeroinitializer - %66 = icmp sgt <8 x i32> %broadcast.splat61, %64 - %67 = and <8 x i1> %66, %65 - %68 = extractelement <8 x i32> %64, i32 0 - %69 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %70 = or <8 x i32> %69, - %71 = icmp sgt <8 x i32> %70, zeroinitializer - %72 = icmp sgt <8 x i32> %broadcast.splat61, %70 - %73 = and <8 x i1> %72, %71 - %74 = extractelement <8 x i32> %70, i32 0 - br label %pregion_for_entry.pregion_for_init.i.i - -pregion_for_entry.pregion_for_init.i.i: ; preds = %pregion_for_end.i.i, %5 - %_local_id_y.i.0 = phi i64 [ 0, %5 ], [ %224, %pregion_for_end.i.i ] - %75 = mul i64 %_local_id_y.i.0, %38 - %76 = trunc i64 %75 to i32 - %77 = add i32 %37, %76 - %78 = sext i32 %77 to i64 - %scevgep = getelementptr float, float* %10, i64 %78 - %scevgep18 = getelementptr float, float* %scevgep17, i64 %78 - %79 = trunc i64 %75 to i32 - %80 = add i32 %42, %79 - %81 = sext i32 %80 to i64 - %scevgep20 = getelementptr float, float* %7, i64 %81 - %scevgep23 = getelementptr float, float* %scevgep22, i64 %81 - %82 = trunc i64 %75 to i32 - %83 = add i32 %45, %82 - %84 = sext i32 %83 to i64 - %scevgep25 = getelementptr float, float* %7, i64 %84 - %scevgep28 = getelementptr float, float* %scevgep27, i64 %84 - %85 = add i64 %75, %47 - %sext = shl i64 %85, 32 - %86 = ashr exact i64 %sext, 32 - %scevgep30 = getelementptr float, float* %7, i64 %86 - %scevgep33 = getelementptr float, float* %scevgep32, i64 %86 - %87 = trunc i64 %75 to i32 - %88 = add i32 %50, %87 - %89 = sext i32 %88 to i64 - %scevgep35 = getelementptr float, float* %7, i64 %89 - %scevgep38 = getelementptr float, float* %scevgep37, i64 %89 - %90 = mul i64 %_local_id_y.i.0, %21 - %add6.i.i.i = add nuw nsw i64 %_local_id_y.i.0, %mul3.i.i.i - %conv.i.i = trunc i64 %add6.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %conv.i.i, 0 - %mul.i.i = mul nsw i32 %14, %conv.i.i - %add25.i.i = add nuw nsw i32 %conv.i.i, 1 - %mul26.i.i = mul nsw i32 %add25.i.i, %14 - %sub31.i.i = add nsw i32 %conv.i.i, -1 - %mul32.i.i = mul nsw i32 %sub31.i.i, %14 - %cmp4.i.i = icmp sgt i32 %sub.i.i, %conv.i.i - %or.cond = and i1 %cmp.i.i, %cmp4.i.i - br i1 %or.cond, label %vector.scevcheck, label %pregion_for_end.i.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.i - %91 = trunc i64 %90 to i32 - %92 = add i32 %31, %91 - %93 = trunc i64 %90 to i32 - %94 = add i32 %28, %93 - %95 = trunc i64 %90 to i32 - %96 = add i32 %24, %95 - %97 = trunc i64 %90 to i32 - %98 = add i32 %23, %97 - %99 = trunc i64 %90 to i32 - %100 = add i32 %20, %99 - %101 = icmp sgt i32 %100, 2147483616 - %102 = icmp sgt i32 %98, 2147483616 - %103 = or i1 %101, %102 - %104 = icmp sgt i32 %96, 2147483616 - %105 = or i1 %103, %104 - %106 = icmp sgt i32 %94, 2147483616 - %107 = or i1 %105, %106 - %108 = icmp sgt i32 %92, 2147483616 - %109 = or i1 %107, %108 - br i1 %109, label %pregion_for_entry.entry.i.i.us.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.us.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i.us.us - -vector.memcheck: ; preds = %vector.scevcheck - %bound0 = icmp ult float* %scevgep, %scevgep23 - %bound1 = icmp ult float* %scevgep20, %scevgep18 - %found.conflict = and i1 %bound0, %bound1 - %bound045 = icmp ult float* %scevgep, %scevgep28 - %bound146 = icmp ult float* %scevgep25, %scevgep18 - %found.conflict47 = and i1 %bound045, %bound146 - %conflict.rdx = or i1 %found.conflict, %found.conflict47 - %bound048 = icmp ult float* %scevgep, %scevgep33 - %bound149 = icmp ult float* %scevgep30, %scevgep18 - %found.conflict50 = and i1 %bound048, %bound149 - %conflict.rdx51 = or i1 %conflict.rdx, %found.conflict50 - %bound052 = icmp ult float* %scevgep, %scevgep38 - %bound153 = icmp ult float* %scevgep35, %scevgep18 - %found.conflict54 = and i1 %bound052, %bound153 - %conflict.rdx55 = or i1 %conflict.rdx51, %found.conflict54 - %conflict.rdx59 = or i1 %conflict.rdx55, %found.conflict58 - br i1 %conflict.rdx59, label %pregion_for_entry.entry.i.i.us.us.preheader, label %vector.body - -vector.body: ; preds = %vector.memcheck - %110 = add i32 %mul.i.i, %56 - %111 = sext i32 %110 to i64 - %112 = getelementptr inbounds float, float* %7, i64 %111 - %113 = bitcast float* %112 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %113, i32 4, <8 x i1> %55, <8 x float> undef), !tbaa !12, !alias.scope !53 - %114 = add i32 %110, -1 - %115 = sext i32 %114 to i64 - %116 = getelementptr inbounds float, float* %7, i64 %115 - %117 = bitcast float* %116 to <8 x float>* - %wide.masked.load62 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %117, i32 4, <8 x i1> %55, <8 x float> undef), !tbaa !12, !alias.scope !56 - %118 = fadd <8 x float> %wide.masked.load, %wide.masked.load62 - %119 = add i32 %110, 1 - %120 = sext i32 %119 to i64 - %121 = getelementptr inbounds float, float* %7, i64 %120 - %122 = bitcast float* %121 to <8 x float>* - %wide.masked.load63 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %122, i32 4, <8 x i1> %55, <8 x float> undef), !tbaa !12, !alias.scope !58 - %123 = fadd <8 x float> %118, %wide.masked.load63 - %124 = add nsw i32 %mul26.i.i, %56 - %125 = sext i32 %124 to i64 - %126 = getelementptr inbounds float, float* %7, i64 %125 - %127 = bitcast float* %126 to <8 x float>* - %wide.masked.load64 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %127, i32 4, <8 x i1> %55, <8 x float> undef), !tbaa !12, !alias.scope !60 - %128 = fadd <8 x float> %123, %wide.masked.load64 - %129 = add nsw i32 %mul32.i.i, %56 - %130 = sext i32 %129 to i64 - %131 = getelementptr inbounds float, float* %7, i64 %130 - %132 = bitcast float* %131 to <8 x float>* - %wide.masked.load65 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %132, i32 4, <8 x i1> %55, <8 x float> undef), !tbaa !12, !alias.scope !62 - %133 = fadd <8 x float> %128, %wide.masked.load65 - %134 = fmul <8 x float> %133, - %135 = getelementptr inbounds float, float* %10, i64 %111 - %136 = bitcast float* %135 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %134, <8 x float>* %136, i32 4, <8 x i1> %55), !tbaa !12, !alias.scope !64, !noalias !66, !llvm.access.group !30 - %137 = add i32 %mul.i.i, %62 - %138 = sext i32 %137 to i64 - %139 = getelementptr inbounds float, float* %7, i64 %138 - %140 = bitcast float* %139 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %140, i32 4, <8 x i1> %61, <8 x float> undef), !tbaa !12, !alias.scope !53 - %141 = add i32 %137, -1 - %142 = sext i32 %141 to i64 - %143 = getelementptr inbounds float, float* %7, i64 %142 - %144 = bitcast float* %143 to <8 x float>* - %wide.masked.load62.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %144, i32 4, <8 x i1> %61, <8 x float> undef), !tbaa !12, !alias.scope !56 - %145 = fadd <8 x float> %wide.masked.load.1, %wide.masked.load62.1 - %146 = add i32 %137, 1 - %147 = sext i32 %146 to i64 - %148 = getelementptr inbounds float, float* %7, i64 %147 - %149 = bitcast float* %148 to <8 x float>* - %wide.masked.load63.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %149, i32 4, <8 x i1> %61, <8 x float> undef), !tbaa !12, !alias.scope !58 - %150 = fadd <8 x float> %145, %wide.masked.load63.1 - %151 = add nsw i32 %mul26.i.i, %62 - %152 = sext i32 %151 to i64 - %153 = getelementptr inbounds float, float* %7, i64 %152 - %154 = bitcast float* %153 to <8 x float>* - %wide.masked.load64.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %154, i32 4, <8 x i1> %61, <8 x float> undef), !tbaa !12, !alias.scope !60 - %155 = fadd <8 x float> %150, %wide.masked.load64.1 - %156 = add nsw i32 %mul32.i.i, %62 - %157 = sext i32 %156 to i64 - %158 = getelementptr inbounds float, float* %7, i64 %157 - %159 = bitcast float* %158 to <8 x float>* - %wide.masked.load65.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %159, i32 4, <8 x i1> %61, <8 x float> undef), !tbaa !12, !alias.scope !62 - %160 = fadd <8 x float> %155, %wide.masked.load65.1 - %161 = fmul <8 x float> %160, - %162 = getelementptr inbounds float, float* %10, i64 %138 - %163 = bitcast float* %162 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %161, <8 x float>* %163, i32 4, <8 x i1> %61), !tbaa !12, !alias.scope !64, !noalias !66, !llvm.access.group !30 - %164 = add i32 %mul.i.i, %68 - %165 = sext i32 %164 to i64 - %166 = getelementptr inbounds float, float* %7, i64 %165 - %167 = bitcast float* %166 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %167, i32 4, <8 x i1> %67, <8 x float> undef), !tbaa !12, !alias.scope !53 - %168 = add i32 %164, -1 - %169 = sext i32 %168 to i64 - %170 = getelementptr inbounds float, float* %7, i64 %169 - %171 = bitcast float* %170 to <8 x float>* - %wide.masked.load62.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %171, i32 4, <8 x i1> %67, <8 x float> undef), !tbaa !12, !alias.scope !56 - %172 = fadd <8 x float> %wide.masked.load.2, %wide.masked.load62.2 - %173 = add i32 %164, 1 - %174 = sext i32 %173 to i64 - %175 = getelementptr inbounds float, float* %7, i64 %174 - %176 = bitcast float* %175 to <8 x float>* - %wide.masked.load63.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %176, i32 4, <8 x i1> %67, <8 x float> undef), !tbaa !12, !alias.scope !58 - %177 = fadd <8 x float> %172, %wide.masked.load63.2 - %178 = add nsw i32 %mul26.i.i, %68 - %179 = sext i32 %178 to i64 - %180 = getelementptr inbounds float, float* %7, i64 %179 - %181 = bitcast float* %180 to <8 x float>* - %wide.masked.load64.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %181, i32 4, <8 x i1> %67, <8 x float> undef), !tbaa !12, !alias.scope !60 - %182 = fadd <8 x float> %177, %wide.masked.load64.2 - %183 = add nsw i32 %mul32.i.i, %68 - %184 = sext i32 %183 to i64 - %185 = getelementptr inbounds float, float* %7, i64 %184 - %186 = bitcast float* %185 to <8 x float>* - %wide.masked.load65.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %186, i32 4, <8 x i1> %67, <8 x float> undef), !tbaa !12, !alias.scope !62 - %187 = fadd <8 x float> %182, %wide.masked.load65.2 - %188 = fmul <8 x float> %187, - %189 = getelementptr inbounds float, float* %10, i64 %165 - %190 = bitcast float* %189 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %188, <8 x float>* %190, i32 4, <8 x i1> %67), !tbaa !12, !alias.scope !64, !noalias !66, !llvm.access.group !30 - %191 = add i32 %mul.i.i, %74 - %192 = sext i32 %191 to i64 - %193 = getelementptr inbounds float, float* %7, i64 %192 - %194 = bitcast float* %193 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %194, i32 4, <8 x i1> %73, <8 x float> undef), !tbaa !12, !alias.scope !53 - %195 = add i32 %191, -1 - %196 = sext i32 %195 to i64 - %197 = getelementptr inbounds float, float* %7, i64 %196 - %198 = bitcast float* %197 to <8 x float>* - %wide.masked.load62.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %198, i32 4, <8 x i1> %73, <8 x float> undef), !tbaa !12, !alias.scope !56 - %199 = fadd <8 x float> %wide.masked.load.3, %wide.masked.load62.3 - %200 = add i32 %191, 1 - %201 = sext i32 %200 to i64 - %202 = getelementptr inbounds float, float* %7, i64 %201 - %203 = bitcast float* %202 to <8 x float>* - %wide.masked.load63.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %203, i32 4, <8 x i1> %73, <8 x float> undef), !tbaa !12, !alias.scope !58 - %204 = fadd <8 x float> %199, %wide.masked.load63.3 - %205 = add nsw i32 %mul26.i.i, %74 - %206 = sext i32 %205 to i64 - %207 = getelementptr inbounds float, float* %7, i64 %206 - %208 = bitcast float* %207 to <8 x float>* - %wide.masked.load64.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %208, i32 4, <8 x i1> %73, <8 x float> undef), !tbaa !12, !alias.scope !60 - %209 = fadd <8 x float> %204, %wide.masked.load64.3 - %210 = add nsw i32 %mul32.i.i, %74 - %211 = sext i32 %210 to i64 - %212 = getelementptr inbounds float, float* %7, i64 %211 - %213 = bitcast float* %212 to <8 x float>* - %wide.masked.load65.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %213, i32 4, <8 x i1> %73, <8 x float> undef), !tbaa !12, !alias.scope !62 - %214 = fadd <8 x float> %209, %wide.masked.load65.3 - %215 = fmul <8 x float> %214, - %216 = getelementptr inbounds float, float* %10, i64 %192 - %217 = bitcast float* %216 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %215, <8 x float>* %217, i32 4, <8 x i1> %73), !tbaa !12, !alias.scope !64, !noalias !66, !llvm.access.group !30 - br label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us.us: ; preds = %if.end.i.i.us.us, %pregion_for_entry.entry.i.i.us.us.preheader - %_local_id_x.i.0.us.us = phi i64 [ %223, %if.end.i.i.us.us ], [ 0, %pregion_for_entry.entry.i.i.us.us.preheader ] - %add1.i.i.i.us.us = add nuw nsw i64 %_local_id_x.i.0.us.us, %mul.i.i.i - %conv2.i.i.us.us = trunc i64 %add1.i.i.i.us.us to i32 - %cmp7.i.i.us.us = icmp sgt i32 %conv2.i.i.us.us, 0 - %cmp11.i.i.us.us = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us - %or.cond69.i.i.us.us = and i1 %cmp11.i.i.us.us, %cmp7.i.i.us.us - br i1 %or.cond69.i.i.us.us, label %if.then.i.i.us.us, label %if.end.i.i.us.us - -if.then.i.i.us.us: ; preds = %pregion_for_entry.entry.i.i.us.us - %add.i.i.us.us = add i32 %mul.i.i, %conv2.i.i.us.us - %idxprom.i.i.us.us = sext i32 %add.i.i.us.us to i64 - %arrayidx.i.i.us.us = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us - %218 = load float, float* %arrayidx.i.i.us.us, align 4, !tbaa !12 - %add15.i.i.us.us = add i32 %add.i.i.us.us, -1 - %idxprom16.i.i.us.us = sext i32 %add15.i.i.us.us to i64 - %arrayidx17.i.i.us.us = getelementptr inbounds float, float* %7, i64 %idxprom16.i.i.us.us - %219 = load float, float* %arrayidx17.i.i.us.us, align 4, !tbaa !12 - %add18.i.i.us.us = fadd float %218, %219 - %add21.i.i.us.us = add i32 %add.i.i.us.us, 1 - %idxprom22.i.i.us.us = sext i32 %add21.i.i.us.us to i64 - %arrayidx23.i.i.us.us = getelementptr inbounds float, float* %7, i64 %idxprom22.i.i.us.us - %220 = load float, float* %arrayidx23.i.i.us.us, align 4, !tbaa !12 - %add24.i.i.us.us = fadd float %add18.i.i.us.us, %220 - %add27.i.i.us.us = add nsw i32 %mul26.i.i, %conv2.i.i.us.us - %idxprom28.i.i.us.us = sext i32 %add27.i.i.us.us to i64 - %arrayidx29.i.i.us.us = getelementptr inbounds float, float* %7, i64 %idxprom28.i.i.us.us - %221 = load float, float* %arrayidx29.i.i.us.us, align 4, !tbaa !12 - %add30.i.i.us.us = fadd float %add24.i.i.us.us, %221 - %add33.i.i.us.us = add nsw i32 %mul32.i.i, %conv2.i.i.us.us - %idxprom34.i.i.us.us = sext i32 %add33.i.i.us.us to i64 - %arrayidx35.i.i.us.us = getelementptr inbounds float, float* %7, i64 %idxprom34.i.i.us.us - %222 = load float, float* %arrayidx35.i.i.us.us, align 4, !tbaa !12 - %add36.i.i.us.us = fadd float %add30.i.i.us.us, %222 - %mul37.i.i.us.us = fmul float %add36.i.i.us.us, 0x3FC99999A0000000 - %arrayidx41.i.i.us.us = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.us - store float %mul37.i.i.us.us, float* %arrayidx41.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !30 - br label %if.end.i.i.us.us - -if.end.i.i.us.us: ; preds = %if.then.i.i.us.us, %pregion_for_entry.entry.i.i.us.us - %223 = add nuw nsw i64 %_local_id_x.i.0.us.us, 1 - %exitcond.not = icmp eq i64 %223, 32 - br i1 %exitcond.not, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us.us, !llvm.loop !67 - -pregion_for_end.i.i.loopexit: ; preds = %if.end.i.i.us.us - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %vector.body, %pregion_for_entry.pregion_for_init.i.i - %224 = add nuw nsw i64 %_local_id_y.i.0, 1 - %exitcond3.not = icmp eq i64 %224, 8 - br i1 %exitcond3.not, label %_pocl_kernel_runJacobi2D_kernel1.exit, label %pregion_for_entry.pregion_for_init.i.i, !llvm.loop !36 - -_pocl_kernel_runJacobi2D_kernel1.exit: ; preds = %pregion_for_end.i.i - ret void -} - -; Function Attrs: argmemonly nounwind readonly willreturn -declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32 immarg, <8 x i1>, <8 x float>) #2 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>) #3 - -attributes #0 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nofree norecurse nounwind } -attributes #2 = { argmemonly nounwind readonly willreturn } -attributes #3 = { argmemonly nounwind willreturn } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 0} -!6 = !{!"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"int"} -!8 = !{!"float*", !"float*", !"int"} -!9 = !{!"", !"", !""} -!10 = !{!"A", !"B", !"n"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{!17, !18} -!18 = distinct !{!18, !"LVerDomain"} -!19 = !{!20} -!20 = distinct !{!20, !18} -!21 = !{!22} -!22 = distinct !{!22, !18} -!23 = !{!24} -!24 = distinct !{!24, !18} -!25 = !{!26} -!26 = distinct !{!26, !18} -!27 = !{!28} -!28 = distinct !{!28, !18} -!29 = !{!26, !24, !22, !20, !17} -!30 = !{!31, !32} -!31 = distinct !{} -!32 = distinct !{} -!33 = distinct !{!33, !34, !35} -!34 = !{!"llvm.loop.parallel_accesses", !31} -!35 = !{!"llvm.loop.isvectorized", i32 1} -!36 = distinct !{!36, !37} -!37 = !{!"llvm.loop.parallel_accesses", !32} -!38 = !{!39} -!39 = distinct !{!39, !40} -!40 = distinct !{!40, !"LVerDomain"} -!41 = !{!42} -!42 = distinct !{!42, !40} -!43 = !{!44} -!44 = distinct !{!44, !40} -!45 = !{!46} -!46 = distinct !{!46, !40} -!47 = !{!48} -!48 = distinct !{!48, !40} -!49 = !{!50} -!50 = distinct !{!50, !40} -!51 = !{!48, !46, !44, !42, !39} -!52 = distinct !{!52, !34, !35} -!53 = !{!54} -!54 = distinct !{!54, !55} -!55 = distinct !{!55, !"LVerDomain"} -!56 = !{!57} -!57 = distinct !{!57, !55} -!58 = !{!59} -!59 = distinct !{!59, !55} -!60 = !{!61} -!61 = distinct !{!61, !55} -!62 = !{!63} -!63 = distinct !{!63, !55} -!64 = !{!65} -!65 = distinct !{!65, !55} -!66 = !{!63, !61, !59, !57, !54} -!67 = distinct !{!67, !34, !35} diff --git a/pocl_irs/jacobi2D_kernel2.ll b/pocl_irs/jacobi2D_kernel2.ll deleted file mode 100644 index b34c2b2..0000000 --- a/pocl_irs/jacobi2D_kernel2.ll +++ /dev/null @@ -1,4730 +0,0 @@ -; ModuleID = './PK/NHAONNAKOLMMJBLNPENBBBCNNPFLDHLMKECDA/runJacobi2D_kernel2/32-8-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_runJacobi2D_kernel2(float* nocapture %0, float* nocapture readonly %1, i32 %2, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %3, i64 %4, i64 %5, i64 %6) local_unnamed_addr #0 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { -pregion_for_entry.pregion_for_init.i: - %mul3.i.i = shl i64 %5, 3 - %mul.i.i = shl i64 %4, 5 - %sub.i = add nsw i32 %2, -1 - %conv.i = trunc i64 %mul3.i.i to i32 - %cmp.i = icmp sgt i32 %conv.i, 0 - %mul.i = mul nsw i32 %conv.i, %2 - %cmp4.i = icmp sgt i32 %sub.i, %conv.i - %or.cond = and i1 %cmp.i, %cmp4.i - br i1 %or.cond, label %vector.scevcheck, label %pregion_for_end.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i - %7 = trunc i64 %5 to i32 - %8 = mul i32 %7, %2 - %9 = shl i32 %8, 3 - %10 = trunc i64 %4 to i32 - %11 = shl i32 %10, 5 - %12 = add i32 %9, %11 - %13 = icmp sgt i32 %12, 2147483616 - br i1 %13, label %pregion_for_entry.entry.i.us.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.us.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.us.us - -vector.memcheck: ; preds = %vector.scevcheck - %14 = trunc i64 %5 to i32 - %15 = mul i32 %14, %2 - %16 = shl i32 %15, 3 - %17 = trunc i64 %4 to i32 - %18 = shl i32 %17, 5 - %19 = add i32 %16, %18 - %20 = sext i32 %19 to i64 - %scevgep = getelementptr float, float* %0, i64 %20 - %21 = add nsw i64 %20, 32 - %scevgep12 = getelementptr float, float* %0, i64 %21 - %scevgep14 = getelementptr float, float* %1, i64 %20 - %scevgep16 = getelementptr float, float* %1, i64 %21 - %bound0 = icmp ult float* %scevgep, %scevgep16 - %bound1 = icmp ult float* %scevgep14, %scevgep12 - %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %pregion_for_entry.entry.i.us.us.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert18 = insertelement <8 x i32> undef, i32 %sub.i, i32 0 - %broadcast.splat19 = shufflevector <8 x i32> %broadcast.splatinsert18, <8 x i32> undef, <8 x i32> zeroinitializer - %22 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %23 = or <8 x i32> %22, - %24 = icmp sgt <8 x i32> %23, zeroinitializer - %25 = icmp sgt <8 x i32> %broadcast.splat19, %23 - %26 = and <8 x i1> %25, %24 - %27 = extractelement <8 x i32> %23, i32 0 - %28 = add nsw i32 %mul.i, %27 - %29 = sext i32 %28 to i64 - %30 = getelementptr inbounds float, float* %1, i64 %29 - %31 = bitcast float* %30 to <8 x i32>* - %wide.masked.load = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %31, i32 4, <8 x i1> %26, <8 x i32> undef), !tbaa !12, !alias.scope !16 - %32 = getelementptr inbounds float, float* %0, i64 %29 - %33 = bitcast float* %32 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load, <8 x i32>* %33, i32 4, <8 x i1> %26), !tbaa !12, !alias.scope !19, !noalias !16, !llvm.access.group !21 - %34 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %35 = or <8 x i32> %34, - %36 = icmp sgt <8 x i32> %35, zeroinitializer - %37 = icmp sgt <8 x i32> %broadcast.splat19, %35 - %38 = and <8 x i1> %37, %36 - %39 = extractelement <8 x i32> %35, i32 0 - %40 = add nsw i32 %mul.i, %39 - %41 = sext i32 %40 to i64 - %42 = getelementptr inbounds float, float* %1, i64 %41 - %43 = bitcast float* %42 to <8 x i32>* - %wide.masked.load.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %43, i32 4, <8 x i1> %38, <8 x i32> undef), !tbaa !12, !alias.scope !16 - %44 = getelementptr inbounds float, float* %0, i64 %41 - %45 = bitcast float* %44 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load.1, <8 x i32>* %45, i32 4, <8 x i1> %38), !tbaa !12, !alias.scope !19, !noalias !16, !llvm.access.group !21 - %46 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %47 = or <8 x i32> %46, - %48 = icmp sgt <8 x i32> %47, zeroinitializer - %49 = icmp sgt <8 x i32> %broadcast.splat19, %47 - %50 = and <8 x i1> %49, %48 - %51 = extractelement <8 x i32> %47, i32 0 - %52 = add nsw i32 %mul.i, %51 - %53 = sext i32 %52 to i64 - %54 = getelementptr inbounds float, float* %1, i64 %53 - %55 = bitcast float* %54 to <8 x i32>* - %wide.masked.load.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %55, i32 4, <8 x i1> %50, <8 x i32> undef), !tbaa !12, !alias.scope !16 - %56 = getelementptr inbounds float, float* %0, i64 %53 - %57 = bitcast float* %56 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load.2, <8 x i32>* %57, i32 4, <8 x i1> %50), !tbaa !12, !alias.scope !19, !noalias !16, !llvm.access.group !21 - %58 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %59 = or <8 x i32> %58, - %60 = icmp sgt <8 x i32> %59, zeroinitializer - %61 = icmp sgt <8 x i32> %broadcast.splat19, %59 - %62 = and <8 x i1> %61, %60 - %63 = extractelement <8 x i32> %59, i32 0 - %64 = add nsw i32 %mul.i, %63 - %65 = sext i32 %64 to i64 - %66 = getelementptr inbounds float, float* %1, i64 %65 - %67 = bitcast float* %66 to <8 x i32>* - %wide.masked.load.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %67, i32 4, <8 x i1> %62, <8 x i32> undef), !tbaa !12, !alias.scope !16 - %68 = getelementptr inbounds float, float* %0, i64 %65 - %69 = bitcast float* %68 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load.3, <8 x i32>* %69, i32 4, <8 x i1> %62), !tbaa !12, !alias.scope !19, !noalias !16, !llvm.access.group !21 - br label %pregion_for_end.i - -pregion_for_entry.entry.i.us.us: ; preds = %if.end.i.us.us.3304, %pregion_for_entry.entry.i.us.us.preheader - %_local_id_x.0.us.us = phi i64 [ 0, %pregion_for_entry.entry.i.us.us.preheader ], [ %617, %if.end.i.us.us.3304 ] - %add1.i.i.us.us = add nuw nsw i64 %_local_id_x.0.us.us, %mul.i.i - %conv2.i.us.us = trunc i64 %add1.i.i.us.us to i32 - %cmp7.i.us.us = icmp sgt i32 %conv2.i.us.us, 0 - %cmp11.i.us.us = icmp sgt i32 %sub.i, %conv2.i.us.us - %or.cond28.i.us.us = and i1 %cmp11.i.us.us, %cmp7.i.us.us - br i1 %or.cond28.i.us.us, label %if.then.i.us.us, label %if.end.i.us.us - -if.then.i.us.us: ; preds = %pregion_for_entry.entry.i.us.us - %add.i.us.us = add nsw i32 %mul.i, %conv2.i.us.us - %idxprom.i.us.us = sext i32 %add.i.us.us to i64 - %arrayidx.i.us.us = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us - %70 = bitcast float* %arrayidx.i.us.us to i32* - %71 = load i32, i32* %70, align 4, !tbaa !12 - %arrayidx16.i.us.us = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us - %72 = bitcast float* %arrayidx16.i.us.us to i32* - store i32 %71, i32* %72, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us - -if.end.i.us.us: ; preds = %if.then.i.us.us, %pregion_for_entry.entry.i.us.us - %73 = or i64 %_local_id_x.0.us.us, 1 - %add1.i.i.us.us.1267 = add nuw nsw i64 %73, %mul.i.i - %conv2.i.us.us.1268 = trunc i64 %add1.i.i.us.us.1267 to i32 - %cmp7.i.us.us.1269 = icmp sgt i32 %conv2.i.us.us.1268, 0 - %cmp11.i.us.us.1270 = icmp sgt i32 %sub.i, %conv2.i.us.us.1268 - %or.cond28.i.us.us.1271 = and i1 %cmp11.i.us.us.1270, %cmp7.i.us.us.1269 - br i1 %or.cond28.i.us.us.1271, label %if.then.i.us.us.1277, label %if.end.i.us.us.1278 - -pregion_for_end.i.loopexit: ; preds = %if.end.i.us.us.3304 - br label %pregion_for_end.i - -pregion_for_end.i: ; preds = %pregion_for_end.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i - %74 = trunc i64 %mul3.i.i to i32 - %conv.i.1 = or i32 %74, 1 - %cmp.i.1 = icmp sgt i32 %conv.i.1, 0 - %mul.i.1 = mul nsw i32 %conv.i.1, %2 - %cmp4.i.1 = icmp sgt i32 %sub.i, %conv.i.1 - %or.cond4 = and i1 %cmp.i.1, %cmp4.i.1 - br i1 %or.cond4, label %vector.scevcheck27, label %pregion_for_end.i.1 - -vector.scevcheck27: ; preds = %pregion_for_end.i - %75 = mul i32 %conv.i.1, %2 - %76 = trunc i64 %4 to i32 - %77 = shl i32 %76, 5 - %78 = add i32 %75, %77 - %79 = icmp sgt i32 %78, 2147483616 - br i1 %79, label %pregion_for_entry.entry.i.us.us.1.preheader, label %vector.memcheck41 - -pregion_for_entry.entry.i.us.us.1.preheader: ; preds = %vector.memcheck41, %vector.scevcheck27 - br label %pregion_for_entry.entry.i.us.us.1 - -vector.memcheck41: ; preds = %vector.scevcheck27 - %80 = mul i32 %conv.i.1, %2 - %81 = trunc i64 %4 to i32 - %82 = shl i32 %81, 5 - %83 = add i32 %80, %82 - %84 = sext i32 %83 to i64 - %scevgep29 = getelementptr float, float* %0, i64 %84 - %85 = add nsw i64 %84, 32 - %scevgep31 = getelementptr float, float* %0, i64 %85 - %scevgep33 = getelementptr float, float* %1, i64 %84 - %scevgep35 = getelementptr float, float* %1, i64 %85 - %bound037 = icmp ult float* %scevgep29, %scevgep35 - %bound138 = icmp ult float* %scevgep33, %scevgep31 - %found.conflict39 = and i1 %bound037, %bound138 - br i1 %found.conflict39, label %pregion_for_entry.entry.i.us.us.1.preheader, label %vector.ph42 - -vector.ph42: ; preds = %vector.memcheck41 - %broadcast.splatinsert49 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat50 = shufflevector <8 x i64> %broadcast.splatinsert49, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert51 = insertelement <8 x i32> undef, i32 %sub.i, i32 0 - %broadcast.splat52 = shufflevector <8 x i32> %broadcast.splatinsert51, <8 x i32> undef, <8 x i32> zeroinitializer - %86 = trunc <8 x i64> %broadcast.splat50 to <8 x i32> - %87 = or <8 x i32> %86, - %88 = icmp sgt <8 x i32> %87, zeroinitializer - %89 = icmp sgt <8 x i32> %broadcast.splat52, %87 - %90 = and <8 x i1> %89, %88 - %91 = extractelement <8 x i32> %87, i32 0 - %92 = add nsw i32 %mul.i.1, %91 - %93 = sext i32 %92 to i64 - %94 = getelementptr inbounds float, float* %1, i64 %93 - %95 = bitcast float* %94 to <8 x i32>* - %wide.masked.load53 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %95, i32 4, <8 x i1> %90, <8 x i32> undef), !tbaa !12, !alias.scope !24 - %96 = getelementptr inbounds float, float* %0, i64 %93 - %97 = bitcast float* %96 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load53, <8 x i32>* %97, i32 4, <8 x i1> %90), !tbaa !12, !alias.scope !27, !noalias !24, !llvm.access.group !21 - %98 = trunc <8 x i64> %broadcast.splat50 to <8 x i32> - %99 = or <8 x i32> %98, - %100 = icmp sgt <8 x i32> %99, zeroinitializer - %101 = icmp sgt <8 x i32> %broadcast.splat52, %99 - %102 = and <8 x i1> %101, %100 - %103 = extractelement <8 x i32> %99, i32 0 - %104 = add nsw i32 %mul.i.1, %103 - %105 = sext i32 %104 to i64 - %106 = getelementptr inbounds float, float* %1, i64 %105 - %107 = bitcast float* %106 to <8 x i32>* - %wide.masked.load53.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %107, i32 4, <8 x i1> %102, <8 x i32> undef), !tbaa !12, !alias.scope !24 - %108 = getelementptr inbounds float, float* %0, i64 %105 - %109 = bitcast float* %108 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load53.1, <8 x i32>* %109, i32 4, <8 x i1> %102), !tbaa !12, !alias.scope !27, !noalias !24, !llvm.access.group !21 - %110 = trunc <8 x i64> %broadcast.splat50 to <8 x i32> - %111 = or <8 x i32> %110, - %112 = icmp sgt <8 x i32> %111, zeroinitializer - %113 = icmp sgt <8 x i32> %broadcast.splat52, %111 - %114 = and <8 x i1> %113, %112 - %115 = extractelement <8 x i32> %111, i32 0 - %116 = add nsw i32 %mul.i.1, %115 - %117 = sext i32 %116 to i64 - %118 = getelementptr inbounds float, float* %1, i64 %117 - %119 = bitcast float* %118 to <8 x i32>* - %wide.masked.load53.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %119, i32 4, <8 x i1> %114, <8 x i32> undef), !tbaa !12, !alias.scope !24 - %120 = getelementptr inbounds float, float* %0, i64 %117 - %121 = bitcast float* %120 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load53.2, <8 x i32>* %121, i32 4, <8 x i1> %114), !tbaa !12, !alias.scope !27, !noalias !24, !llvm.access.group !21 - %122 = trunc <8 x i64> %broadcast.splat50 to <8 x i32> - %123 = or <8 x i32> %122, - %124 = icmp sgt <8 x i32> %123, zeroinitializer - %125 = icmp sgt <8 x i32> %broadcast.splat52, %123 - %126 = and <8 x i1> %125, %124 - %127 = extractelement <8 x i32> %123, i32 0 - %128 = add nsw i32 %mul.i.1, %127 - %129 = sext i32 %128 to i64 - %130 = getelementptr inbounds float, float* %1, i64 %129 - %131 = bitcast float* %130 to <8 x i32>* - %wide.masked.load53.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %131, i32 4, <8 x i1> %126, <8 x i32> undef), !tbaa !12, !alias.scope !24 - %132 = getelementptr inbounds float, float* %0, i64 %129 - %133 = bitcast float* %132 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load53.3, <8 x i32>* %133, i32 4, <8 x i1> %126), !tbaa !12, !alias.scope !27, !noalias !24, !llvm.access.group !21 - br label %pregion_for_end.i.1 - -pregion_for_entry.entry.i.us.us.1: ; preds = %if.end.i.us.us.1.3, %pregion_for_entry.entry.i.us.us.1.preheader - %_local_id_x.0.us.us.1 = phi i64 [ 0, %pregion_for_entry.entry.i.us.us.1.preheader ], [ %605, %if.end.i.us.us.1.3 ] - %add1.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.0.us.us.1, %mul.i.i - %conv2.i.us.us.1 = trunc i64 %add1.i.i.us.us.1 to i32 - %cmp7.i.us.us.1 = icmp sgt i32 %conv2.i.us.us.1, 0 - %cmp11.i.us.us.1 = icmp sgt i32 %sub.i, %conv2.i.us.us.1 - %or.cond28.i.us.us.1 = and i1 %cmp11.i.us.us.1, %cmp7.i.us.us.1 - br i1 %or.cond28.i.us.us.1, label %if.then.i.us.us.1, label %if.end.i.us.us.1 - -if.then.i.us.us.1: ; preds = %pregion_for_entry.entry.i.us.us.1 - %add.i.us.us.1 = add nsw i32 %mul.i.1, %conv2.i.us.us.1 - %idxprom.i.us.us.1 = sext i32 %add.i.us.us.1 to i64 - %arrayidx.i.us.us.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.1 - %134 = bitcast float* %arrayidx.i.us.us.1 to i32* - %135 = load i32, i32* %134, align 4, !tbaa !12 - %arrayidx16.i.us.us.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.1 - %136 = bitcast float* %arrayidx16.i.us.us.1 to i32* - store i32 %135, i32* %136, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.1 - -if.end.i.us.us.1: ; preds = %if.then.i.us.us.1, %pregion_for_entry.entry.i.us.us.1 - %137 = or i64 %_local_id_x.0.us.us.1, 1 - %add1.i.i.us.us.1.1 = add nuw nsw i64 %137, %mul.i.i - %conv2.i.us.us.1.1 = trunc i64 %add1.i.i.us.us.1.1 to i32 - %cmp7.i.us.us.1.1 = icmp sgt i32 %conv2.i.us.us.1.1, 0 - %cmp11.i.us.us.1.1 = icmp sgt i32 %sub.i, %conv2.i.us.us.1.1 - %or.cond28.i.us.us.1.1 = and i1 %cmp11.i.us.us.1.1, %cmp7.i.us.us.1.1 - br i1 %or.cond28.i.us.us.1.1, label %if.then.i.us.us.1.1, label %if.end.i.us.us.1.1 - -pregion_for_end.i.1.loopexit: ; preds = %if.end.i.us.us.1.3 - br label %pregion_for_end.i.1 - -pregion_for_end.i.1: ; preds = %pregion_for_end.i.1.loopexit, %vector.ph42, %pregion_for_end.i - %138 = trunc i64 %mul3.i.i to i32 - %conv.i.2 = or i32 %138, 2 - %cmp.i.2 = icmp sgt i32 %conv.i.2, 0 - %mul.i.2 = mul nsw i32 %conv.i.2, %2 - %cmp4.i.2 = icmp sgt i32 %sub.i, %conv.i.2 - %or.cond5 = and i1 %cmp.i.2, %cmp4.i.2 - br i1 %or.cond5, label %vector.scevcheck61, label %pregion_for_end.i.2 - -vector.scevcheck61: ; preds = %pregion_for_end.i.1 - %139 = mul i32 %conv.i.2, %2 - %140 = trunc i64 %4 to i32 - %141 = shl i32 %140, 5 - %142 = add i32 %139, %141 - %143 = icmp sgt i32 %142, 2147483616 - br i1 %143, label %pregion_for_entry.entry.i.us.us.2.preheader, label %vector.memcheck75 - -pregion_for_entry.entry.i.us.us.2.preheader: ; preds = %vector.memcheck75, %vector.scevcheck61 - br label %pregion_for_entry.entry.i.us.us.2 - -vector.memcheck75: ; preds = %vector.scevcheck61 - %144 = mul i32 %conv.i.2, %2 - %145 = trunc i64 %4 to i32 - %146 = shl i32 %145, 5 - %147 = add i32 %144, %146 - %148 = sext i32 %147 to i64 - %scevgep63 = getelementptr float, float* %0, i64 %148 - %149 = add nsw i64 %148, 32 - %scevgep65 = getelementptr float, float* %0, i64 %149 - %scevgep67 = getelementptr float, float* %1, i64 %148 - %scevgep69 = getelementptr float, float* %1, i64 %149 - %bound071 = icmp ult float* %scevgep63, %scevgep69 - %bound172 = icmp ult float* %scevgep67, %scevgep65 - %found.conflict73 = and i1 %bound071, %bound172 - br i1 %found.conflict73, label %pregion_for_entry.entry.i.us.us.2.preheader, label %vector.ph76 - -vector.ph76: ; preds = %vector.memcheck75 - %broadcast.splatinsert83 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat84 = shufflevector <8 x i64> %broadcast.splatinsert83, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert85 = insertelement <8 x i32> undef, i32 %sub.i, i32 0 - %broadcast.splat86 = shufflevector <8 x i32> %broadcast.splatinsert85, <8 x i32> undef, <8 x i32> zeroinitializer - %150 = trunc <8 x i64> %broadcast.splat84 to <8 x i32> - %151 = or <8 x i32> %150, - %152 = icmp sgt <8 x i32> %151, zeroinitializer - %153 = icmp sgt <8 x i32> %broadcast.splat86, %151 - %154 = and <8 x i1> %153, %152 - %155 = extractelement <8 x i32> %151, i32 0 - %156 = add nsw i32 %mul.i.2, %155 - %157 = sext i32 %156 to i64 - %158 = getelementptr inbounds float, float* %1, i64 %157 - %159 = bitcast float* %158 to <8 x i32>* - %wide.masked.load87 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %159, i32 4, <8 x i1> %154, <8 x i32> undef), !tbaa !12, !alias.scope !29 - %160 = getelementptr inbounds float, float* %0, i64 %157 - %161 = bitcast float* %160 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load87, <8 x i32>* %161, i32 4, <8 x i1> %154), !tbaa !12, !alias.scope !32, !noalias !29, !llvm.access.group !21 - %162 = trunc <8 x i64> %broadcast.splat84 to <8 x i32> - %163 = or <8 x i32> %162, - %164 = icmp sgt <8 x i32> %163, zeroinitializer - %165 = icmp sgt <8 x i32> %broadcast.splat86, %163 - %166 = and <8 x i1> %165, %164 - %167 = extractelement <8 x i32> %163, i32 0 - %168 = add nsw i32 %mul.i.2, %167 - %169 = sext i32 %168 to i64 - %170 = getelementptr inbounds float, float* %1, i64 %169 - %171 = bitcast float* %170 to <8 x i32>* - %wide.masked.load87.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %171, i32 4, <8 x i1> %166, <8 x i32> undef), !tbaa !12, !alias.scope !29 - %172 = getelementptr inbounds float, float* %0, i64 %169 - %173 = bitcast float* %172 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load87.1, <8 x i32>* %173, i32 4, <8 x i1> %166), !tbaa !12, !alias.scope !32, !noalias !29, !llvm.access.group !21 - %174 = trunc <8 x i64> %broadcast.splat84 to <8 x i32> - %175 = or <8 x i32> %174, - %176 = icmp sgt <8 x i32> %175, zeroinitializer - %177 = icmp sgt <8 x i32> %broadcast.splat86, %175 - %178 = and <8 x i1> %177, %176 - %179 = extractelement <8 x i32> %175, i32 0 - %180 = add nsw i32 %mul.i.2, %179 - %181 = sext i32 %180 to i64 - %182 = getelementptr inbounds float, float* %1, i64 %181 - %183 = bitcast float* %182 to <8 x i32>* - %wide.masked.load87.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %183, i32 4, <8 x i1> %178, <8 x i32> undef), !tbaa !12, !alias.scope !29 - %184 = getelementptr inbounds float, float* %0, i64 %181 - %185 = bitcast float* %184 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load87.2, <8 x i32>* %185, i32 4, <8 x i1> %178), !tbaa !12, !alias.scope !32, !noalias !29, !llvm.access.group !21 - %186 = trunc <8 x i64> %broadcast.splat84 to <8 x i32> - %187 = or <8 x i32> %186, - %188 = icmp sgt <8 x i32> %187, zeroinitializer - %189 = icmp sgt <8 x i32> %broadcast.splat86, %187 - %190 = and <8 x i1> %189, %188 - %191 = extractelement <8 x i32> %187, i32 0 - %192 = add nsw i32 %mul.i.2, %191 - %193 = sext i32 %192 to i64 - %194 = getelementptr inbounds float, float* %1, i64 %193 - %195 = bitcast float* %194 to <8 x i32>* - %wide.masked.load87.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %195, i32 4, <8 x i1> %190, <8 x i32> undef), !tbaa !12, !alias.scope !29 - %196 = getelementptr inbounds float, float* %0, i64 %193 - %197 = bitcast float* %196 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load87.3, <8 x i32>* %197, i32 4, <8 x i1> %190), !tbaa !12, !alias.scope !32, !noalias !29, !llvm.access.group !21 - br label %pregion_for_end.i.2 - -pregion_for_entry.entry.i.us.us.2: ; preds = %if.end.i.us.us.2.3, %pregion_for_entry.entry.i.us.us.2.preheader - %_local_id_x.0.us.us.2 = phi i64 [ 0, %pregion_for_entry.entry.i.us.us.2.preheader ], [ %593, %if.end.i.us.us.2.3 ] - %add1.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.0.us.us.2, %mul.i.i - %conv2.i.us.us.2 = trunc i64 %add1.i.i.us.us.2 to i32 - %cmp7.i.us.us.2 = icmp sgt i32 %conv2.i.us.us.2, 0 - %cmp11.i.us.us.2 = icmp sgt i32 %sub.i, %conv2.i.us.us.2 - %or.cond28.i.us.us.2 = and i1 %cmp11.i.us.us.2, %cmp7.i.us.us.2 - br i1 %or.cond28.i.us.us.2, label %if.then.i.us.us.2, label %if.end.i.us.us.2 - -if.then.i.us.us.2: ; preds = %pregion_for_entry.entry.i.us.us.2 - %add.i.us.us.2 = add nsw i32 %mul.i.2, %conv2.i.us.us.2 - %idxprom.i.us.us.2 = sext i32 %add.i.us.us.2 to i64 - %arrayidx.i.us.us.2 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.2 - %198 = bitcast float* %arrayidx.i.us.us.2 to i32* - %199 = load i32, i32* %198, align 4, !tbaa !12 - %arrayidx16.i.us.us.2 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.2 - %200 = bitcast float* %arrayidx16.i.us.us.2 to i32* - store i32 %199, i32* %200, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.2 - -if.end.i.us.us.2: ; preds = %if.then.i.us.us.2, %pregion_for_entry.entry.i.us.us.2 - %201 = or i64 %_local_id_x.0.us.us.2, 1 - %add1.i.i.us.us.2.1 = add nuw nsw i64 %201, %mul.i.i - %conv2.i.us.us.2.1 = trunc i64 %add1.i.i.us.us.2.1 to i32 - %cmp7.i.us.us.2.1 = icmp sgt i32 %conv2.i.us.us.2.1, 0 - %cmp11.i.us.us.2.1 = icmp sgt i32 %sub.i, %conv2.i.us.us.2.1 - %or.cond28.i.us.us.2.1 = and i1 %cmp11.i.us.us.2.1, %cmp7.i.us.us.2.1 - br i1 %or.cond28.i.us.us.2.1, label %if.then.i.us.us.2.1, label %if.end.i.us.us.2.1 - -pregion_for_end.i.2.loopexit: ; preds = %if.end.i.us.us.2.3 - br label %pregion_for_end.i.2 - -pregion_for_end.i.2: ; preds = %pregion_for_end.i.2.loopexit, %vector.ph76, %pregion_for_end.i.1 - %202 = trunc i64 %mul3.i.i to i32 - %conv.i.3 = or i32 %202, 3 - %cmp.i.3 = icmp sgt i32 %conv.i.3, 0 - %mul.i.3 = mul nsw i32 %conv.i.3, %2 - %cmp4.i.3 = icmp sgt i32 %sub.i, %conv.i.3 - %or.cond6 = and i1 %cmp.i.3, %cmp4.i.3 - br i1 %or.cond6, label %vector.scevcheck95, label %pregion_for_end.i.3 - -vector.scevcheck95: ; preds = %pregion_for_end.i.2 - %203 = mul i32 %conv.i.3, %2 - %204 = trunc i64 %4 to i32 - %205 = shl i32 %204, 5 - %206 = add i32 %203, %205 - %207 = icmp sgt i32 %206, 2147483616 - br i1 %207, label %pregion_for_entry.entry.i.us.us.3.preheader, label %vector.memcheck109 - -pregion_for_entry.entry.i.us.us.3.preheader: ; preds = %vector.memcheck109, %vector.scevcheck95 - br label %pregion_for_entry.entry.i.us.us.3 - -vector.memcheck109: ; preds = %vector.scevcheck95 - %208 = mul i32 %conv.i.3, %2 - %209 = trunc i64 %4 to i32 - %210 = shl i32 %209, 5 - %211 = add i32 %208, %210 - %212 = sext i32 %211 to i64 - %scevgep97 = getelementptr float, float* %0, i64 %212 - %213 = add nsw i64 %212, 32 - %scevgep99 = getelementptr float, float* %0, i64 %213 - %scevgep101 = getelementptr float, float* %1, i64 %212 - %scevgep103 = getelementptr float, float* %1, i64 %213 - %bound0105 = icmp ult float* %scevgep97, %scevgep103 - %bound1106 = icmp ult float* %scevgep101, %scevgep99 - %found.conflict107 = and i1 %bound0105, %bound1106 - br i1 %found.conflict107, label %pregion_for_entry.entry.i.us.us.3.preheader, label %vector.ph110 - -vector.ph110: ; preds = %vector.memcheck109 - %broadcast.splatinsert117 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat118 = shufflevector <8 x i64> %broadcast.splatinsert117, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert119 = insertelement <8 x i32> undef, i32 %sub.i, i32 0 - %broadcast.splat120 = shufflevector <8 x i32> %broadcast.splatinsert119, <8 x i32> undef, <8 x i32> zeroinitializer - %214 = trunc <8 x i64> %broadcast.splat118 to <8 x i32> - %215 = or <8 x i32> %214, - %216 = icmp sgt <8 x i32> %215, zeroinitializer - %217 = icmp sgt <8 x i32> %broadcast.splat120, %215 - %218 = and <8 x i1> %217, %216 - %219 = extractelement <8 x i32> %215, i32 0 - %220 = add nsw i32 %mul.i.3, %219 - %221 = sext i32 %220 to i64 - %222 = getelementptr inbounds float, float* %1, i64 %221 - %223 = bitcast float* %222 to <8 x i32>* - %wide.masked.load121 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %223, i32 4, <8 x i1> %218, <8 x i32> undef), !tbaa !12, !alias.scope !34 - %224 = getelementptr inbounds float, float* %0, i64 %221 - %225 = bitcast float* %224 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load121, <8 x i32>* %225, i32 4, <8 x i1> %218), !tbaa !12, !alias.scope !37, !noalias !34, !llvm.access.group !21 - %226 = trunc <8 x i64> %broadcast.splat118 to <8 x i32> - %227 = or <8 x i32> %226, - %228 = icmp sgt <8 x i32> %227, zeroinitializer - %229 = icmp sgt <8 x i32> %broadcast.splat120, %227 - %230 = and <8 x i1> %229, %228 - %231 = extractelement <8 x i32> %227, i32 0 - %232 = add nsw i32 %mul.i.3, %231 - %233 = sext i32 %232 to i64 - %234 = getelementptr inbounds float, float* %1, i64 %233 - %235 = bitcast float* %234 to <8 x i32>* - %wide.masked.load121.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %235, i32 4, <8 x i1> %230, <8 x i32> undef), !tbaa !12, !alias.scope !34 - %236 = getelementptr inbounds float, float* %0, i64 %233 - %237 = bitcast float* %236 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load121.1, <8 x i32>* %237, i32 4, <8 x i1> %230), !tbaa !12, !alias.scope !37, !noalias !34, !llvm.access.group !21 - %238 = trunc <8 x i64> %broadcast.splat118 to <8 x i32> - %239 = or <8 x i32> %238, - %240 = icmp sgt <8 x i32> %239, zeroinitializer - %241 = icmp sgt <8 x i32> %broadcast.splat120, %239 - %242 = and <8 x i1> %241, %240 - %243 = extractelement <8 x i32> %239, i32 0 - %244 = add nsw i32 %mul.i.3, %243 - %245 = sext i32 %244 to i64 - %246 = getelementptr inbounds float, float* %1, i64 %245 - %247 = bitcast float* %246 to <8 x i32>* - %wide.masked.load121.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %247, i32 4, <8 x i1> %242, <8 x i32> undef), !tbaa !12, !alias.scope !34 - %248 = getelementptr inbounds float, float* %0, i64 %245 - %249 = bitcast float* %248 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load121.2, <8 x i32>* %249, i32 4, <8 x i1> %242), !tbaa !12, !alias.scope !37, !noalias !34, !llvm.access.group !21 - %250 = trunc <8 x i64> %broadcast.splat118 to <8 x i32> - %251 = or <8 x i32> %250, - %252 = icmp sgt <8 x i32> %251, zeroinitializer - %253 = icmp sgt <8 x i32> %broadcast.splat120, %251 - %254 = and <8 x i1> %253, %252 - %255 = extractelement <8 x i32> %251, i32 0 - %256 = add nsw i32 %mul.i.3, %255 - %257 = sext i32 %256 to i64 - %258 = getelementptr inbounds float, float* %1, i64 %257 - %259 = bitcast float* %258 to <8 x i32>* - %wide.masked.load121.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %259, i32 4, <8 x i1> %254, <8 x i32> undef), !tbaa !12, !alias.scope !34 - %260 = getelementptr inbounds float, float* %0, i64 %257 - %261 = bitcast float* %260 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load121.3, <8 x i32>* %261, i32 4, <8 x i1> %254), !tbaa !12, !alias.scope !37, !noalias !34, !llvm.access.group !21 - br label %pregion_for_end.i.3 - -pregion_for_entry.entry.i.us.us.3: ; preds = %if.end.i.us.us.3.3, %pregion_for_entry.entry.i.us.us.3.preheader - %_local_id_x.0.us.us.3 = phi i64 [ 0, %pregion_for_entry.entry.i.us.us.3.preheader ], [ %581, %if.end.i.us.us.3.3 ] - %add1.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.0.us.us.3, %mul.i.i - %conv2.i.us.us.3 = trunc i64 %add1.i.i.us.us.3 to i32 - %cmp7.i.us.us.3 = icmp sgt i32 %conv2.i.us.us.3, 0 - %cmp11.i.us.us.3 = icmp sgt i32 %sub.i, %conv2.i.us.us.3 - %or.cond28.i.us.us.3 = and i1 %cmp11.i.us.us.3, %cmp7.i.us.us.3 - br i1 %or.cond28.i.us.us.3, label %if.then.i.us.us.3, label %if.end.i.us.us.3 - -if.then.i.us.us.3: ; preds = %pregion_for_entry.entry.i.us.us.3 - %add.i.us.us.3 = add nsw i32 %mul.i.3, %conv2.i.us.us.3 - %idxprom.i.us.us.3 = sext i32 %add.i.us.us.3 to i64 - %arrayidx.i.us.us.3 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.3 - %262 = bitcast float* %arrayidx.i.us.us.3 to i32* - %263 = load i32, i32* %262, align 4, !tbaa !12 - %arrayidx16.i.us.us.3 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.3 - %264 = bitcast float* %arrayidx16.i.us.us.3 to i32* - store i32 %263, i32* %264, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.3 - -if.end.i.us.us.3: ; preds = %if.then.i.us.us.3, %pregion_for_entry.entry.i.us.us.3 - %265 = or i64 %_local_id_x.0.us.us.3, 1 - %add1.i.i.us.us.3.1 = add nuw nsw i64 %265, %mul.i.i - %conv2.i.us.us.3.1 = trunc i64 %add1.i.i.us.us.3.1 to i32 - %cmp7.i.us.us.3.1 = icmp sgt i32 %conv2.i.us.us.3.1, 0 - %cmp11.i.us.us.3.1 = icmp sgt i32 %sub.i, %conv2.i.us.us.3.1 - %or.cond28.i.us.us.3.1 = and i1 %cmp11.i.us.us.3.1, %cmp7.i.us.us.3.1 - br i1 %or.cond28.i.us.us.3.1, label %if.then.i.us.us.3.1, label %if.end.i.us.us.3.1 - -pregion_for_end.i.3.loopexit: ; preds = %if.end.i.us.us.3.3 - br label %pregion_for_end.i.3 - -pregion_for_end.i.3: ; preds = %pregion_for_end.i.3.loopexit, %vector.ph110, %pregion_for_end.i.2 - %266 = trunc i64 %mul3.i.i to i32 - %conv.i.4 = or i32 %266, 4 - %cmp.i.4 = icmp sgt i32 %conv.i.4, 0 - %mul.i.4 = mul nsw i32 %conv.i.4, %2 - %cmp4.i.4 = icmp sgt i32 %sub.i, %conv.i.4 - %or.cond7 = and i1 %cmp.i.4, %cmp4.i.4 - br i1 %or.cond7, label %vector.scevcheck129, label %pregion_for_end.i.4 - -vector.scevcheck129: ; preds = %pregion_for_end.i.3 - %267 = mul i32 %conv.i.4, %2 - %268 = trunc i64 %4 to i32 - %269 = shl i32 %268, 5 - %270 = add i32 %267, %269 - %271 = icmp sgt i32 %270, 2147483616 - br i1 %271, label %pregion_for_entry.entry.i.us.us.4.preheader, label %vector.memcheck143 - -pregion_for_entry.entry.i.us.us.4.preheader: ; preds = %vector.memcheck143, %vector.scevcheck129 - br label %pregion_for_entry.entry.i.us.us.4 - -vector.memcheck143: ; preds = %vector.scevcheck129 - %272 = mul i32 %conv.i.4, %2 - %273 = trunc i64 %4 to i32 - %274 = shl i32 %273, 5 - %275 = add i32 %272, %274 - %276 = sext i32 %275 to i64 - %scevgep131 = getelementptr float, float* %0, i64 %276 - %277 = add nsw i64 %276, 32 - %scevgep133 = getelementptr float, float* %0, i64 %277 - %scevgep135 = getelementptr float, float* %1, i64 %276 - %scevgep137 = getelementptr float, float* %1, i64 %277 - %bound0139 = icmp ult float* %scevgep131, %scevgep137 - %bound1140 = icmp ult float* %scevgep135, %scevgep133 - %found.conflict141 = and i1 %bound0139, %bound1140 - br i1 %found.conflict141, label %pregion_for_entry.entry.i.us.us.4.preheader, label %vector.ph144 - -vector.ph144: ; preds = %vector.memcheck143 - %broadcast.splatinsert151 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat152 = shufflevector <8 x i64> %broadcast.splatinsert151, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert153 = insertelement <8 x i32> undef, i32 %sub.i, i32 0 - %broadcast.splat154 = shufflevector <8 x i32> %broadcast.splatinsert153, <8 x i32> undef, <8 x i32> zeroinitializer - %278 = trunc <8 x i64> %broadcast.splat152 to <8 x i32> - %279 = or <8 x i32> %278, - %280 = icmp sgt <8 x i32> %279, zeroinitializer - %281 = icmp sgt <8 x i32> %broadcast.splat154, %279 - %282 = and <8 x i1> %281, %280 - %283 = extractelement <8 x i32> %279, i32 0 - %284 = add nsw i32 %mul.i.4, %283 - %285 = sext i32 %284 to i64 - %286 = getelementptr inbounds float, float* %1, i64 %285 - %287 = bitcast float* %286 to <8 x i32>* - %wide.masked.load155 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %287, i32 4, <8 x i1> %282, <8 x i32> undef), !tbaa !12, !alias.scope !39 - %288 = getelementptr inbounds float, float* %0, i64 %285 - %289 = bitcast float* %288 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load155, <8 x i32>* %289, i32 4, <8 x i1> %282), !tbaa !12, !alias.scope !42, !noalias !39, !llvm.access.group !21 - %290 = trunc <8 x i64> %broadcast.splat152 to <8 x i32> - %291 = or <8 x i32> %290, - %292 = icmp sgt <8 x i32> %291, zeroinitializer - %293 = icmp sgt <8 x i32> %broadcast.splat154, %291 - %294 = and <8 x i1> %293, %292 - %295 = extractelement <8 x i32> %291, i32 0 - %296 = add nsw i32 %mul.i.4, %295 - %297 = sext i32 %296 to i64 - %298 = getelementptr inbounds float, float* %1, i64 %297 - %299 = bitcast float* %298 to <8 x i32>* - %wide.masked.load155.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %299, i32 4, <8 x i1> %294, <8 x i32> undef), !tbaa !12, !alias.scope !39 - %300 = getelementptr inbounds float, float* %0, i64 %297 - %301 = bitcast float* %300 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load155.1, <8 x i32>* %301, i32 4, <8 x i1> %294), !tbaa !12, !alias.scope !42, !noalias !39, !llvm.access.group !21 - %302 = trunc <8 x i64> %broadcast.splat152 to <8 x i32> - %303 = or <8 x i32> %302, - %304 = icmp sgt <8 x i32> %303, zeroinitializer - %305 = icmp sgt <8 x i32> %broadcast.splat154, %303 - %306 = and <8 x i1> %305, %304 - %307 = extractelement <8 x i32> %303, i32 0 - %308 = add nsw i32 %mul.i.4, %307 - %309 = sext i32 %308 to i64 - %310 = getelementptr inbounds float, float* %1, i64 %309 - %311 = bitcast float* %310 to <8 x i32>* - %wide.masked.load155.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %311, i32 4, <8 x i1> %306, <8 x i32> undef), !tbaa !12, !alias.scope !39 - %312 = getelementptr inbounds float, float* %0, i64 %309 - %313 = bitcast float* %312 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load155.2, <8 x i32>* %313, i32 4, <8 x i1> %306), !tbaa !12, !alias.scope !42, !noalias !39, !llvm.access.group !21 - %314 = trunc <8 x i64> %broadcast.splat152 to <8 x i32> - %315 = or <8 x i32> %314, - %316 = icmp sgt <8 x i32> %315, zeroinitializer - %317 = icmp sgt <8 x i32> %broadcast.splat154, %315 - %318 = and <8 x i1> %317, %316 - %319 = extractelement <8 x i32> %315, i32 0 - %320 = add nsw i32 %mul.i.4, %319 - %321 = sext i32 %320 to i64 - %322 = getelementptr inbounds float, float* %1, i64 %321 - %323 = bitcast float* %322 to <8 x i32>* - %wide.masked.load155.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %323, i32 4, <8 x i1> %318, <8 x i32> undef), !tbaa !12, !alias.scope !39 - %324 = getelementptr inbounds float, float* %0, i64 %321 - %325 = bitcast float* %324 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load155.3, <8 x i32>* %325, i32 4, <8 x i1> %318), !tbaa !12, !alias.scope !42, !noalias !39, !llvm.access.group !21 - br label %pregion_for_end.i.4 - -pregion_for_entry.entry.i.us.us.4: ; preds = %if.end.i.us.us.4.3, %pregion_for_entry.entry.i.us.us.4.preheader - %_local_id_x.0.us.us.4 = phi i64 [ 0, %pregion_for_entry.entry.i.us.us.4.preheader ], [ %569, %if.end.i.us.us.4.3 ] - %add1.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.0.us.us.4, %mul.i.i - %conv2.i.us.us.4 = trunc i64 %add1.i.i.us.us.4 to i32 - %cmp7.i.us.us.4 = icmp sgt i32 %conv2.i.us.us.4, 0 - %cmp11.i.us.us.4 = icmp sgt i32 %sub.i, %conv2.i.us.us.4 - %or.cond28.i.us.us.4 = and i1 %cmp11.i.us.us.4, %cmp7.i.us.us.4 - br i1 %or.cond28.i.us.us.4, label %if.then.i.us.us.4, label %if.end.i.us.us.4 - -if.then.i.us.us.4: ; preds = %pregion_for_entry.entry.i.us.us.4 - %add.i.us.us.4 = add nsw i32 %mul.i.4, %conv2.i.us.us.4 - %idxprom.i.us.us.4 = sext i32 %add.i.us.us.4 to i64 - %arrayidx.i.us.us.4 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.4 - %326 = bitcast float* %arrayidx.i.us.us.4 to i32* - %327 = load i32, i32* %326, align 4, !tbaa !12 - %arrayidx16.i.us.us.4 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.4 - %328 = bitcast float* %arrayidx16.i.us.us.4 to i32* - store i32 %327, i32* %328, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.4 - -if.end.i.us.us.4: ; preds = %if.then.i.us.us.4, %pregion_for_entry.entry.i.us.us.4 - %329 = or i64 %_local_id_x.0.us.us.4, 1 - %add1.i.i.us.us.4.1 = add nuw nsw i64 %329, %mul.i.i - %conv2.i.us.us.4.1 = trunc i64 %add1.i.i.us.us.4.1 to i32 - %cmp7.i.us.us.4.1 = icmp sgt i32 %conv2.i.us.us.4.1, 0 - %cmp11.i.us.us.4.1 = icmp sgt i32 %sub.i, %conv2.i.us.us.4.1 - %or.cond28.i.us.us.4.1 = and i1 %cmp11.i.us.us.4.1, %cmp7.i.us.us.4.1 - br i1 %or.cond28.i.us.us.4.1, label %if.then.i.us.us.4.1, label %if.end.i.us.us.4.1 - -pregion_for_end.i.4.loopexit: ; preds = %if.end.i.us.us.4.3 - br label %pregion_for_end.i.4 - -pregion_for_end.i.4: ; preds = %pregion_for_end.i.4.loopexit, %vector.ph144, %pregion_for_end.i.3 - %330 = trunc i64 %mul3.i.i to i32 - %conv.i.5 = or i32 %330, 5 - %cmp.i.5 = icmp sgt i32 %conv.i.5, 0 - %mul.i.5 = mul nsw i32 %conv.i.5, %2 - %cmp4.i.5 = icmp sgt i32 %sub.i, %conv.i.5 - %or.cond8 = and i1 %cmp.i.5, %cmp4.i.5 - br i1 %or.cond8, label %vector.scevcheck163, label %pregion_for_end.i.5 - -vector.scevcheck163: ; preds = %pregion_for_end.i.4 - %331 = mul i32 %conv.i.5, %2 - %332 = trunc i64 %4 to i32 - %333 = shl i32 %332, 5 - %334 = add i32 %331, %333 - %335 = icmp sgt i32 %334, 2147483616 - br i1 %335, label %pregion_for_entry.entry.i.us.us.5.preheader, label %vector.memcheck177 - -pregion_for_entry.entry.i.us.us.5.preheader: ; preds = %vector.memcheck177, %vector.scevcheck163 - br label %pregion_for_entry.entry.i.us.us.5 - -vector.memcheck177: ; preds = %vector.scevcheck163 - %336 = mul i32 %conv.i.5, %2 - %337 = trunc i64 %4 to i32 - %338 = shl i32 %337, 5 - %339 = add i32 %336, %338 - %340 = sext i32 %339 to i64 - %scevgep165 = getelementptr float, float* %0, i64 %340 - %341 = add nsw i64 %340, 32 - %scevgep167 = getelementptr float, float* %0, i64 %341 - %scevgep169 = getelementptr float, float* %1, i64 %340 - %scevgep171 = getelementptr float, float* %1, i64 %341 - %bound0173 = icmp ult float* %scevgep165, %scevgep171 - %bound1174 = icmp ult float* %scevgep169, %scevgep167 - %found.conflict175 = and i1 %bound0173, %bound1174 - br i1 %found.conflict175, label %pregion_for_entry.entry.i.us.us.5.preheader, label %vector.ph178 - -vector.ph178: ; preds = %vector.memcheck177 - %broadcast.splatinsert185 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat186 = shufflevector <8 x i64> %broadcast.splatinsert185, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert187 = insertelement <8 x i32> undef, i32 %sub.i, i32 0 - %broadcast.splat188 = shufflevector <8 x i32> %broadcast.splatinsert187, <8 x i32> undef, <8 x i32> zeroinitializer - %342 = trunc <8 x i64> %broadcast.splat186 to <8 x i32> - %343 = or <8 x i32> %342, - %344 = icmp sgt <8 x i32> %343, zeroinitializer - %345 = icmp sgt <8 x i32> %broadcast.splat188, %343 - %346 = and <8 x i1> %345, %344 - %347 = extractelement <8 x i32> %343, i32 0 - %348 = add nsw i32 %mul.i.5, %347 - %349 = sext i32 %348 to i64 - %350 = getelementptr inbounds float, float* %1, i64 %349 - %351 = bitcast float* %350 to <8 x i32>* - %wide.masked.load189 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %351, i32 4, <8 x i1> %346, <8 x i32> undef), !tbaa !12, !alias.scope !44 - %352 = getelementptr inbounds float, float* %0, i64 %349 - %353 = bitcast float* %352 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load189, <8 x i32>* %353, i32 4, <8 x i1> %346), !tbaa !12, !alias.scope !47, !noalias !44, !llvm.access.group !21 - %354 = trunc <8 x i64> %broadcast.splat186 to <8 x i32> - %355 = or <8 x i32> %354, - %356 = icmp sgt <8 x i32> %355, zeroinitializer - %357 = icmp sgt <8 x i32> %broadcast.splat188, %355 - %358 = and <8 x i1> %357, %356 - %359 = extractelement <8 x i32> %355, i32 0 - %360 = add nsw i32 %mul.i.5, %359 - %361 = sext i32 %360 to i64 - %362 = getelementptr inbounds float, float* %1, i64 %361 - %363 = bitcast float* %362 to <8 x i32>* - %wide.masked.load189.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %363, i32 4, <8 x i1> %358, <8 x i32> undef), !tbaa !12, !alias.scope !44 - %364 = getelementptr inbounds float, float* %0, i64 %361 - %365 = bitcast float* %364 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load189.1, <8 x i32>* %365, i32 4, <8 x i1> %358), !tbaa !12, !alias.scope !47, !noalias !44, !llvm.access.group !21 - %366 = trunc <8 x i64> %broadcast.splat186 to <8 x i32> - %367 = or <8 x i32> %366, - %368 = icmp sgt <8 x i32> %367, zeroinitializer - %369 = icmp sgt <8 x i32> %broadcast.splat188, %367 - %370 = and <8 x i1> %369, %368 - %371 = extractelement <8 x i32> %367, i32 0 - %372 = add nsw i32 %mul.i.5, %371 - %373 = sext i32 %372 to i64 - %374 = getelementptr inbounds float, float* %1, i64 %373 - %375 = bitcast float* %374 to <8 x i32>* - %wide.masked.load189.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %375, i32 4, <8 x i1> %370, <8 x i32> undef), !tbaa !12, !alias.scope !44 - %376 = getelementptr inbounds float, float* %0, i64 %373 - %377 = bitcast float* %376 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load189.2, <8 x i32>* %377, i32 4, <8 x i1> %370), !tbaa !12, !alias.scope !47, !noalias !44, !llvm.access.group !21 - %378 = trunc <8 x i64> %broadcast.splat186 to <8 x i32> - %379 = or <8 x i32> %378, - %380 = icmp sgt <8 x i32> %379, zeroinitializer - %381 = icmp sgt <8 x i32> %broadcast.splat188, %379 - %382 = and <8 x i1> %381, %380 - %383 = extractelement <8 x i32> %379, i32 0 - %384 = add nsw i32 %mul.i.5, %383 - %385 = sext i32 %384 to i64 - %386 = getelementptr inbounds float, float* %1, i64 %385 - %387 = bitcast float* %386 to <8 x i32>* - %wide.masked.load189.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %387, i32 4, <8 x i1> %382, <8 x i32> undef), !tbaa !12, !alias.scope !44 - %388 = getelementptr inbounds float, float* %0, i64 %385 - %389 = bitcast float* %388 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load189.3, <8 x i32>* %389, i32 4, <8 x i1> %382), !tbaa !12, !alias.scope !47, !noalias !44, !llvm.access.group !21 - br label %pregion_for_end.i.5 - -pregion_for_entry.entry.i.us.us.5: ; preds = %if.end.i.us.us.5.3, %pregion_for_entry.entry.i.us.us.5.preheader - %_local_id_x.0.us.us.5 = phi i64 [ 0, %pregion_for_entry.entry.i.us.us.5.preheader ], [ %557, %if.end.i.us.us.5.3 ] - %add1.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.0.us.us.5, %mul.i.i - %conv2.i.us.us.5 = trunc i64 %add1.i.i.us.us.5 to i32 - %cmp7.i.us.us.5 = icmp sgt i32 %conv2.i.us.us.5, 0 - %cmp11.i.us.us.5 = icmp sgt i32 %sub.i, %conv2.i.us.us.5 - %or.cond28.i.us.us.5 = and i1 %cmp11.i.us.us.5, %cmp7.i.us.us.5 - br i1 %or.cond28.i.us.us.5, label %if.then.i.us.us.5, label %if.end.i.us.us.5 - -if.then.i.us.us.5: ; preds = %pregion_for_entry.entry.i.us.us.5 - %add.i.us.us.5 = add nsw i32 %mul.i.5, %conv2.i.us.us.5 - %idxprom.i.us.us.5 = sext i32 %add.i.us.us.5 to i64 - %arrayidx.i.us.us.5 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.5 - %390 = bitcast float* %arrayidx.i.us.us.5 to i32* - %391 = load i32, i32* %390, align 4, !tbaa !12 - %arrayidx16.i.us.us.5 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.5 - %392 = bitcast float* %arrayidx16.i.us.us.5 to i32* - store i32 %391, i32* %392, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.5 - -if.end.i.us.us.5: ; preds = %if.then.i.us.us.5, %pregion_for_entry.entry.i.us.us.5 - %393 = or i64 %_local_id_x.0.us.us.5, 1 - %add1.i.i.us.us.5.1 = add nuw nsw i64 %393, %mul.i.i - %conv2.i.us.us.5.1 = trunc i64 %add1.i.i.us.us.5.1 to i32 - %cmp7.i.us.us.5.1 = icmp sgt i32 %conv2.i.us.us.5.1, 0 - %cmp11.i.us.us.5.1 = icmp sgt i32 %sub.i, %conv2.i.us.us.5.1 - %or.cond28.i.us.us.5.1 = and i1 %cmp11.i.us.us.5.1, %cmp7.i.us.us.5.1 - br i1 %or.cond28.i.us.us.5.1, label %if.then.i.us.us.5.1, label %if.end.i.us.us.5.1 - -pregion_for_end.i.5.loopexit: ; preds = %if.end.i.us.us.5.3 - br label %pregion_for_end.i.5 - -pregion_for_end.i.5: ; preds = %pregion_for_end.i.5.loopexit, %vector.ph178, %pregion_for_end.i.4 - %394 = trunc i64 %mul3.i.i to i32 - %conv.i.6 = or i32 %394, 6 - %cmp.i.6 = icmp sgt i32 %conv.i.6, 0 - %mul.i.6 = mul nsw i32 %conv.i.6, %2 - %cmp4.i.6 = icmp sgt i32 %sub.i, %conv.i.6 - %or.cond9 = and i1 %cmp.i.6, %cmp4.i.6 - br i1 %or.cond9, label %vector.scevcheck197, label %pregion_for_end.i.6 - -vector.scevcheck197: ; preds = %pregion_for_end.i.5 - %395 = mul i32 %conv.i.6, %2 - %396 = trunc i64 %4 to i32 - %397 = shl i32 %396, 5 - %398 = add i32 %395, %397 - %399 = icmp sgt i32 %398, 2147483616 - br i1 %399, label %pregion_for_entry.entry.i.us.us.6.preheader, label %vector.memcheck211 - -pregion_for_entry.entry.i.us.us.6.preheader: ; preds = %vector.memcheck211, %vector.scevcheck197 - br label %pregion_for_entry.entry.i.us.us.6 - -vector.memcheck211: ; preds = %vector.scevcheck197 - %400 = mul i32 %conv.i.6, %2 - %401 = trunc i64 %4 to i32 - %402 = shl i32 %401, 5 - %403 = add i32 %400, %402 - %404 = sext i32 %403 to i64 - %scevgep199 = getelementptr float, float* %0, i64 %404 - %405 = add nsw i64 %404, 32 - %scevgep201 = getelementptr float, float* %0, i64 %405 - %scevgep203 = getelementptr float, float* %1, i64 %404 - %scevgep205 = getelementptr float, float* %1, i64 %405 - %bound0207 = icmp ult float* %scevgep199, %scevgep205 - %bound1208 = icmp ult float* %scevgep203, %scevgep201 - %found.conflict209 = and i1 %bound0207, %bound1208 - br i1 %found.conflict209, label %pregion_for_entry.entry.i.us.us.6.preheader, label %vector.ph212 - -vector.ph212: ; preds = %vector.memcheck211 - %broadcast.splatinsert219 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat220 = shufflevector <8 x i64> %broadcast.splatinsert219, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert221 = insertelement <8 x i32> undef, i32 %sub.i, i32 0 - %broadcast.splat222 = shufflevector <8 x i32> %broadcast.splatinsert221, <8 x i32> undef, <8 x i32> zeroinitializer - %406 = trunc <8 x i64> %broadcast.splat220 to <8 x i32> - %407 = or <8 x i32> %406, - %408 = icmp sgt <8 x i32> %407, zeroinitializer - %409 = icmp sgt <8 x i32> %broadcast.splat222, %407 - %410 = and <8 x i1> %409, %408 - %411 = extractelement <8 x i32> %407, i32 0 - %412 = add nsw i32 %mul.i.6, %411 - %413 = sext i32 %412 to i64 - %414 = getelementptr inbounds float, float* %1, i64 %413 - %415 = bitcast float* %414 to <8 x i32>* - %wide.masked.load223 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %415, i32 4, <8 x i1> %410, <8 x i32> undef), !tbaa !12, !alias.scope !49 - %416 = getelementptr inbounds float, float* %0, i64 %413 - %417 = bitcast float* %416 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load223, <8 x i32>* %417, i32 4, <8 x i1> %410), !tbaa !12, !alias.scope !52, !noalias !49, !llvm.access.group !21 - %418 = trunc <8 x i64> %broadcast.splat220 to <8 x i32> - %419 = or <8 x i32> %418, - %420 = icmp sgt <8 x i32> %419, zeroinitializer - %421 = icmp sgt <8 x i32> %broadcast.splat222, %419 - %422 = and <8 x i1> %421, %420 - %423 = extractelement <8 x i32> %419, i32 0 - %424 = add nsw i32 %mul.i.6, %423 - %425 = sext i32 %424 to i64 - %426 = getelementptr inbounds float, float* %1, i64 %425 - %427 = bitcast float* %426 to <8 x i32>* - %wide.masked.load223.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %427, i32 4, <8 x i1> %422, <8 x i32> undef), !tbaa !12, !alias.scope !49 - %428 = getelementptr inbounds float, float* %0, i64 %425 - %429 = bitcast float* %428 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load223.1, <8 x i32>* %429, i32 4, <8 x i1> %422), !tbaa !12, !alias.scope !52, !noalias !49, !llvm.access.group !21 - %430 = trunc <8 x i64> %broadcast.splat220 to <8 x i32> - %431 = or <8 x i32> %430, - %432 = icmp sgt <8 x i32> %431, zeroinitializer - %433 = icmp sgt <8 x i32> %broadcast.splat222, %431 - %434 = and <8 x i1> %433, %432 - %435 = extractelement <8 x i32> %431, i32 0 - %436 = add nsw i32 %mul.i.6, %435 - %437 = sext i32 %436 to i64 - %438 = getelementptr inbounds float, float* %1, i64 %437 - %439 = bitcast float* %438 to <8 x i32>* - %wide.masked.load223.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %439, i32 4, <8 x i1> %434, <8 x i32> undef), !tbaa !12, !alias.scope !49 - %440 = getelementptr inbounds float, float* %0, i64 %437 - %441 = bitcast float* %440 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load223.2, <8 x i32>* %441, i32 4, <8 x i1> %434), !tbaa !12, !alias.scope !52, !noalias !49, !llvm.access.group !21 - %442 = trunc <8 x i64> %broadcast.splat220 to <8 x i32> - %443 = or <8 x i32> %442, - %444 = icmp sgt <8 x i32> %443, zeroinitializer - %445 = icmp sgt <8 x i32> %broadcast.splat222, %443 - %446 = and <8 x i1> %445, %444 - %447 = extractelement <8 x i32> %443, i32 0 - %448 = add nsw i32 %mul.i.6, %447 - %449 = sext i32 %448 to i64 - %450 = getelementptr inbounds float, float* %1, i64 %449 - %451 = bitcast float* %450 to <8 x i32>* - %wide.masked.load223.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %451, i32 4, <8 x i1> %446, <8 x i32> undef), !tbaa !12, !alias.scope !49 - %452 = getelementptr inbounds float, float* %0, i64 %449 - %453 = bitcast float* %452 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load223.3, <8 x i32>* %453, i32 4, <8 x i1> %446), !tbaa !12, !alias.scope !52, !noalias !49, !llvm.access.group !21 - br label %pregion_for_end.i.6 - -pregion_for_entry.entry.i.us.us.6: ; preds = %if.end.i.us.us.6.3, %pregion_for_entry.entry.i.us.us.6.preheader - %_local_id_x.0.us.us.6 = phi i64 [ 0, %pregion_for_entry.entry.i.us.us.6.preheader ], [ %545, %if.end.i.us.us.6.3 ] - %add1.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.0.us.us.6, %mul.i.i - %conv2.i.us.us.6 = trunc i64 %add1.i.i.us.us.6 to i32 - %cmp7.i.us.us.6 = icmp sgt i32 %conv2.i.us.us.6, 0 - %cmp11.i.us.us.6 = icmp sgt i32 %sub.i, %conv2.i.us.us.6 - %or.cond28.i.us.us.6 = and i1 %cmp11.i.us.us.6, %cmp7.i.us.us.6 - br i1 %or.cond28.i.us.us.6, label %if.then.i.us.us.6, label %if.end.i.us.us.6 - -if.then.i.us.us.6: ; preds = %pregion_for_entry.entry.i.us.us.6 - %add.i.us.us.6 = add nsw i32 %mul.i.6, %conv2.i.us.us.6 - %idxprom.i.us.us.6 = sext i32 %add.i.us.us.6 to i64 - %arrayidx.i.us.us.6 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.6 - %454 = bitcast float* %arrayidx.i.us.us.6 to i32* - %455 = load i32, i32* %454, align 4, !tbaa !12 - %arrayidx16.i.us.us.6 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.6 - %456 = bitcast float* %arrayidx16.i.us.us.6 to i32* - store i32 %455, i32* %456, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.6 - -if.end.i.us.us.6: ; preds = %if.then.i.us.us.6, %pregion_for_entry.entry.i.us.us.6 - %457 = or i64 %_local_id_x.0.us.us.6, 1 - %add1.i.i.us.us.6.1 = add nuw nsw i64 %457, %mul.i.i - %conv2.i.us.us.6.1 = trunc i64 %add1.i.i.us.us.6.1 to i32 - %cmp7.i.us.us.6.1 = icmp sgt i32 %conv2.i.us.us.6.1, 0 - %cmp11.i.us.us.6.1 = icmp sgt i32 %sub.i, %conv2.i.us.us.6.1 - %or.cond28.i.us.us.6.1 = and i1 %cmp11.i.us.us.6.1, %cmp7.i.us.us.6.1 - br i1 %or.cond28.i.us.us.6.1, label %if.then.i.us.us.6.1, label %if.end.i.us.us.6.1 - -pregion_for_end.i.6.loopexit: ; preds = %if.end.i.us.us.6.3 - br label %pregion_for_end.i.6 - -pregion_for_end.i.6: ; preds = %pregion_for_end.i.6.loopexit, %vector.ph212, %pregion_for_end.i.5 - %458 = trunc i64 %mul3.i.i to i32 - %conv.i.7 = or i32 %458, 7 - %cmp.i.7 = icmp sgt i32 %conv.i.7, 0 - %mul.i.7 = mul nsw i32 %conv.i.7, %2 - %cmp4.i.7 = icmp sgt i32 %sub.i, %conv.i.7 - %or.cond10 = and i1 %cmp.i.7, %cmp4.i.7 - br i1 %or.cond10, label %vector.scevcheck231, label %pregion_for_end.i.7 - -vector.scevcheck231: ; preds = %pregion_for_end.i.6 - %459 = mul i32 %conv.i.7, %2 - %460 = trunc i64 %4 to i32 - %461 = shl i32 %460, 5 - %462 = add i32 %459, %461 - %463 = icmp sgt i32 %462, 2147483616 - br i1 %463, label %pregion_for_entry.entry.i.us.us.7.preheader, label %vector.memcheck245 - -pregion_for_entry.entry.i.us.us.7.preheader: ; preds = %vector.memcheck245, %vector.scevcheck231 - br label %pregion_for_entry.entry.i.us.us.7 - -vector.memcheck245: ; preds = %vector.scevcheck231 - %464 = mul i32 %conv.i.7, %2 - %465 = trunc i64 %4 to i32 - %466 = shl i32 %465, 5 - %467 = add i32 %464, %466 - %468 = sext i32 %467 to i64 - %scevgep233 = getelementptr float, float* %0, i64 %468 - %469 = add nsw i64 %468, 32 - %scevgep235 = getelementptr float, float* %0, i64 %469 - %scevgep237 = getelementptr float, float* %1, i64 %468 - %scevgep239 = getelementptr float, float* %1, i64 %469 - %bound0241 = icmp ult float* %scevgep233, %scevgep239 - %bound1242 = icmp ult float* %scevgep237, %scevgep235 - %found.conflict243 = and i1 %bound0241, %bound1242 - br i1 %found.conflict243, label %pregion_for_entry.entry.i.us.us.7.preheader, label %vector.ph246 - -vector.ph246: ; preds = %vector.memcheck245 - %broadcast.splatinsert253 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat254 = shufflevector <8 x i64> %broadcast.splatinsert253, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert255 = insertelement <8 x i32> undef, i32 %sub.i, i32 0 - %broadcast.splat256 = shufflevector <8 x i32> %broadcast.splatinsert255, <8 x i32> undef, <8 x i32> zeroinitializer - %470 = trunc <8 x i64> %broadcast.splat254 to <8 x i32> - %471 = or <8 x i32> %470, - %472 = icmp sgt <8 x i32> %471, zeroinitializer - %473 = icmp sgt <8 x i32> %broadcast.splat256, %471 - %474 = and <8 x i1> %473, %472 - %475 = extractelement <8 x i32> %471, i32 0 - %476 = add nsw i32 %mul.i.7, %475 - %477 = sext i32 %476 to i64 - %478 = getelementptr inbounds float, float* %1, i64 %477 - %479 = bitcast float* %478 to <8 x i32>* - %wide.masked.load257 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %479, i32 4, <8 x i1> %474, <8 x i32> undef), !tbaa !12, !alias.scope !54 - %480 = getelementptr inbounds float, float* %0, i64 %477 - %481 = bitcast float* %480 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load257, <8 x i32>* %481, i32 4, <8 x i1> %474), !tbaa !12, !alias.scope !57, !noalias !54, !llvm.access.group !21 - %482 = trunc <8 x i64> %broadcast.splat254 to <8 x i32> - %483 = or <8 x i32> %482, - %484 = icmp sgt <8 x i32> %483, zeroinitializer - %485 = icmp sgt <8 x i32> %broadcast.splat256, %483 - %486 = and <8 x i1> %485, %484 - %487 = extractelement <8 x i32> %483, i32 0 - %488 = add nsw i32 %mul.i.7, %487 - %489 = sext i32 %488 to i64 - %490 = getelementptr inbounds float, float* %1, i64 %489 - %491 = bitcast float* %490 to <8 x i32>* - %wide.masked.load257.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %491, i32 4, <8 x i1> %486, <8 x i32> undef), !tbaa !12, !alias.scope !54 - %492 = getelementptr inbounds float, float* %0, i64 %489 - %493 = bitcast float* %492 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load257.1, <8 x i32>* %493, i32 4, <8 x i1> %486), !tbaa !12, !alias.scope !57, !noalias !54, !llvm.access.group !21 - %494 = trunc <8 x i64> %broadcast.splat254 to <8 x i32> - %495 = or <8 x i32> %494, - %496 = icmp sgt <8 x i32> %495, zeroinitializer - %497 = icmp sgt <8 x i32> %broadcast.splat256, %495 - %498 = and <8 x i1> %497, %496 - %499 = extractelement <8 x i32> %495, i32 0 - %500 = add nsw i32 %mul.i.7, %499 - %501 = sext i32 %500 to i64 - %502 = getelementptr inbounds float, float* %1, i64 %501 - %503 = bitcast float* %502 to <8 x i32>* - %wide.masked.load257.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %503, i32 4, <8 x i1> %498, <8 x i32> undef), !tbaa !12, !alias.scope !54 - %504 = getelementptr inbounds float, float* %0, i64 %501 - %505 = bitcast float* %504 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load257.2, <8 x i32>* %505, i32 4, <8 x i1> %498), !tbaa !12, !alias.scope !57, !noalias !54, !llvm.access.group !21 - %506 = trunc <8 x i64> %broadcast.splat254 to <8 x i32> - %507 = or <8 x i32> %506, - %508 = icmp sgt <8 x i32> %507, zeroinitializer - %509 = icmp sgt <8 x i32> %broadcast.splat256, %507 - %510 = and <8 x i1> %509, %508 - %511 = extractelement <8 x i32> %507, i32 0 - %512 = add nsw i32 %mul.i.7, %511 - %513 = sext i32 %512 to i64 - %514 = getelementptr inbounds float, float* %1, i64 %513 - %515 = bitcast float* %514 to <8 x i32>* - %wide.masked.load257.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %515, i32 4, <8 x i1> %510, <8 x i32> undef), !tbaa !12, !alias.scope !54 - %516 = getelementptr inbounds float, float* %0, i64 %513 - %517 = bitcast float* %516 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load257.3, <8 x i32>* %517, i32 4, <8 x i1> %510), !tbaa !12, !alias.scope !57, !noalias !54, !llvm.access.group !21 - br label %pregion_for_end.i.7 - -pregion_for_entry.entry.i.us.us.7: ; preds = %if.end.i.us.us.7.3, %pregion_for_entry.entry.i.us.us.7.preheader - %_local_id_x.0.us.us.7 = phi i64 [ 0, %pregion_for_entry.entry.i.us.us.7.preheader ], [ %533, %if.end.i.us.us.7.3 ] - %add1.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.0.us.us.7, %mul.i.i - %conv2.i.us.us.7 = trunc i64 %add1.i.i.us.us.7 to i32 - %cmp7.i.us.us.7 = icmp sgt i32 %conv2.i.us.us.7, 0 - %cmp11.i.us.us.7 = icmp sgt i32 %sub.i, %conv2.i.us.us.7 - %or.cond28.i.us.us.7 = and i1 %cmp11.i.us.us.7, %cmp7.i.us.us.7 - br i1 %or.cond28.i.us.us.7, label %if.then.i.us.us.7, label %if.end.i.us.us.7 - -if.then.i.us.us.7: ; preds = %pregion_for_entry.entry.i.us.us.7 - %add.i.us.us.7 = add nsw i32 %mul.i.7, %conv2.i.us.us.7 - %idxprom.i.us.us.7 = sext i32 %add.i.us.us.7 to i64 - %arrayidx.i.us.us.7 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.7 - %518 = bitcast float* %arrayidx.i.us.us.7 to i32* - %519 = load i32, i32* %518, align 4, !tbaa !12 - %arrayidx16.i.us.us.7 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.7 - %520 = bitcast float* %arrayidx16.i.us.us.7 to i32* - store i32 %519, i32* %520, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.7 - -if.end.i.us.us.7: ; preds = %if.then.i.us.us.7, %pregion_for_entry.entry.i.us.us.7 - %521 = or i64 %_local_id_x.0.us.us.7, 1 - %add1.i.i.us.us.7.1 = add nuw nsw i64 %521, %mul.i.i - %conv2.i.us.us.7.1 = trunc i64 %add1.i.i.us.us.7.1 to i32 - %cmp7.i.us.us.7.1 = icmp sgt i32 %conv2.i.us.us.7.1, 0 - %cmp11.i.us.us.7.1 = icmp sgt i32 %sub.i, %conv2.i.us.us.7.1 - %or.cond28.i.us.us.7.1 = and i1 %cmp11.i.us.us.7.1, %cmp7.i.us.us.7.1 - br i1 %or.cond28.i.us.us.7.1, label %if.then.i.us.us.7.1, label %if.end.i.us.us.7.1 - -pregion_for_end.i.7.loopexit: ; preds = %if.end.i.us.us.7.3 - br label %pregion_for_end.i.7 - -pregion_for_end.i.7: ; preds = %pregion_for_end.i.7.loopexit, %vector.ph246, %pregion_for_end.i.6 - ret void - -if.then.i.us.us.7.1: ; preds = %if.end.i.us.us.7 - %add.i.us.us.7.1 = add nsw i32 %mul.i.7, %conv2.i.us.us.7.1 - %idxprom.i.us.us.7.1 = sext i32 %add.i.us.us.7.1 to i64 - %arrayidx.i.us.us.7.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.7.1 - %522 = bitcast float* %arrayidx.i.us.us.7.1 to i32* - %523 = load i32, i32* %522, align 4, !tbaa !12 - %arrayidx16.i.us.us.7.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.7.1 - %524 = bitcast float* %arrayidx16.i.us.us.7.1 to i32* - store i32 %523, i32* %524, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.7.1 - -if.end.i.us.us.7.1: ; preds = %if.then.i.us.us.7.1, %if.end.i.us.us.7 - %525 = or i64 %_local_id_x.0.us.us.7, 2 - %add1.i.i.us.us.7.2 = add nuw nsw i64 %525, %mul.i.i - %conv2.i.us.us.7.2 = trunc i64 %add1.i.i.us.us.7.2 to i32 - %cmp7.i.us.us.7.2 = icmp sgt i32 %conv2.i.us.us.7.2, 0 - %cmp11.i.us.us.7.2 = icmp sgt i32 %sub.i, %conv2.i.us.us.7.2 - %or.cond28.i.us.us.7.2 = and i1 %cmp11.i.us.us.7.2, %cmp7.i.us.us.7.2 - br i1 %or.cond28.i.us.us.7.2, label %if.then.i.us.us.7.2, label %if.end.i.us.us.7.2 - -if.then.i.us.us.7.2: ; preds = %if.end.i.us.us.7.1 - %add.i.us.us.7.2 = add nsw i32 %mul.i.7, %conv2.i.us.us.7.2 - %idxprom.i.us.us.7.2 = sext i32 %add.i.us.us.7.2 to i64 - %arrayidx.i.us.us.7.2 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.7.2 - %526 = bitcast float* %arrayidx.i.us.us.7.2 to i32* - %527 = load i32, i32* %526, align 4, !tbaa !12 - %arrayidx16.i.us.us.7.2 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.7.2 - %528 = bitcast float* %arrayidx16.i.us.us.7.2 to i32* - store i32 %527, i32* %528, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.7.2 - -if.end.i.us.us.7.2: ; preds = %if.then.i.us.us.7.2, %if.end.i.us.us.7.1 - %529 = or i64 %_local_id_x.0.us.us.7, 3 - %add1.i.i.us.us.7.3 = add nuw nsw i64 %529, %mul.i.i - %conv2.i.us.us.7.3 = trunc i64 %add1.i.i.us.us.7.3 to i32 - %cmp7.i.us.us.7.3 = icmp sgt i32 %conv2.i.us.us.7.3, 0 - %cmp11.i.us.us.7.3 = icmp sgt i32 %sub.i, %conv2.i.us.us.7.3 - %or.cond28.i.us.us.7.3 = and i1 %cmp11.i.us.us.7.3, %cmp7.i.us.us.7.3 - br i1 %or.cond28.i.us.us.7.3, label %if.then.i.us.us.7.3, label %if.end.i.us.us.7.3 - -if.then.i.us.us.7.3: ; preds = %if.end.i.us.us.7.2 - %add.i.us.us.7.3 = add nsw i32 %mul.i.7, %conv2.i.us.us.7.3 - %idxprom.i.us.us.7.3 = sext i32 %add.i.us.us.7.3 to i64 - %arrayidx.i.us.us.7.3 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.7.3 - %530 = bitcast float* %arrayidx.i.us.us.7.3 to i32* - %531 = load i32, i32* %530, align 4, !tbaa !12 - %arrayidx16.i.us.us.7.3 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.7.3 - %532 = bitcast float* %arrayidx16.i.us.us.7.3 to i32* - store i32 %531, i32* %532, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.7.3 - -if.end.i.us.us.7.3: ; preds = %if.then.i.us.us.7.3, %if.end.i.us.us.7.2 - %533 = add nuw nsw i64 %_local_id_x.0.us.us.7, 4 - %exitcond.7.not.3 = icmp eq i64 %533, 32 - br i1 %exitcond.7.not.3, label %pregion_for_end.i.7.loopexit, label %pregion_for_entry.entry.i.us.us.7, !llvm.loop !59 - -if.then.i.us.us.6.1: ; preds = %if.end.i.us.us.6 - %add.i.us.us.6.1 = add nsw i32 %mul.i.6, %conv2.i.us.us.6.1 - %idxprom.i.us.us.6.1 = sext i32 %add.i.us.us.6.1 to i64 - %arrayidx.i.us.us.6.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.6.1 - %534 = bitcast float* %arrayidx.i.us.us.6.1 to i32* - %535 = load i32, i32* %534, align 4, !tbaa !12 - %arrayidx16.i.us.us.6.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.6.1 - %536 = bitcast float* %arrayidx16.i.us.us.6.1 to i32* - store i32 %535, i32* %536, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.6.1 - -if.end.i.us.us.6.1: ; preds = %if.then.i.us.us.6.1, %if.end.i.us.us.6 - %537 = or i64 %_local_id_x.0.us.us.6, 2 - %add1.i.i.us.us.6.2 = add nuw nsw i64 %537, %mul.i.i - %conv2.i.us.us.6.2 = trunc i64 %add1.i.i.us.us.6.2 to i32 - %cmp7.i.us.us.6.2 = icmp sgt i32 %conv2.i.us.us.6.2, 0 - %cmp11.i.us.us.6.2 = icmp sgt i32 %sub.i, %conv2.i.us.us.6.2 - %or.cond28.i.us.us.6.2 = and i1 %cmp11.i.us.us.6.2, %cmp7.i.us.us.6.2 - br i1 %or.cond28.i.us.us.6.2, label %if.then.i.us.us.6.2, label %if.end.i.us.us.6.2 - -if.then.i.us.us.6.2: ; preds = %if.end.i.us.us.6.1 - %add.i.us.us.6.2 = add nsw i32 %mul.i.6, %conv2.i.us.us.6.2 - %idxprom.i.us.us.6.2 = sext i32 %add.i.us.us.6.2 to i64 - %arrayidx.i.us.us.6.2 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.6.2 - %538 = bitcast float* %arrayidx.i.us.us.6.2 to i32* - %539 = load i32, i32* %538, align 4, !tbaa !12 - %arrayidx16.i.us.us.6.2 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.6.2 - %540 = bitcast float* %arrayidx16.i.us.us.6.2 to i32* - store i32 %539, i32* %540, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.6.2 - -if.end.i.us.us.6.2: ; preds = %if.then.i.us.us.6.2, %if.end.i.us.us.6.1 - %541 = or i64 %_local_id_x.0.us.us.6, 3 - %add1.i.i.us.us.6.3 = add nuw nsw i64 %541, %mul.i.i - %conv2.i.us.us.6.3 = trunc i64 %add1.i.i.us.us.6.3 to i32 - %cmp7.i.us.us.6.3 = icmp sgt i32 %conv2.i.us.us.6.3, 0 - %cmp11.i.us.us.6.3 = icmp sgt i32 %sub.i, %conv2.i.us.us.6.3 - %or.cond28.i.us.us.6.3 = and i1 %cmp11.i.us.us.6.3, %cmp7.i.us.us.6.3 - br i1 %or.cond28.i.us.us.6.3, label %if.then.i.us.us.6.3, label %if.end.i.us.us.6.3 - -if.then.i.us.us.6.3: ; preds = %if.end.i.us.us.6.2 - %add.i.us.us.6.3 = add nsw i32 %mul.i.6, %conv2.i.us.us.6.3 - %idxprom.i.us.us.6.3 = sext i32 %add.i.us.us.6.3 to i64 - %arrayidx.i.us.us.6.3 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.6.3 - %542 = bitcast float* %arrayidx.i.us.us.6.3 to i32* - %543 = load i32, i32* %542, align 4, !tbaa !12 - %arrayidx16.i.us.us.6.3 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.6.3 - %544 = bitcast float* %arrayidx16.i.us.us.6.3 to i32* - store i32 %543, i32* %544, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.6.3 - -if.end.i.us.us.6.3: ; preds = %if.then.i.us.us.6.3, %if.end.i.us.us.6.2 - %545 = add nuw nsw i64 %_local_id_x.0.us.us.6, 4 - %exitcond.6.not.3 = icmp eq i64 %545, 32 - br i1 %exitcond.6.not.3, label %pregion_for_end.i.6.loopexit, label %pregion_for_entry.entry.i.us.us.6, !llvm.loop !62 - -if.then.i.us.us.5.1: ; preds = %if.end.i.us.us.5 - %add.i.us.us.5.1 = add nsw i32 %mul.i.5, %conv2.i.us.us.5.1 - %idxprom.i.us.us.5.1 = sext i32 %add.i.us.us.5.1 to i64 - %arrayidx.i.us.us.5.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.5.1 - %546 = bitcast float* %arrayidx.i.us.us.5.1 to i32* - %547 = load i32, i32* %546, align 4, !tbaa !12 - %arrayidx16.i.us.us.5.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.5.1 - %548 = bitcast float* %arrayidx16.i.us.us.5.1 to i32* - store i32 %547, i32* %548, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.5.1 - -if.end.i.us.us.5.1: ; preds = %if.then.i.us.us.5.1, %if.end.i.us.us.5 - %549 = or i64 %_local_id_x.0.us.us.5, 2 - %add1.i.i.us.us.5.2 = add nuw nsw i64 %549, %mul.i.i - %conv2.i.us.us.5.2 = trunc i64 %add1.i.i.us.us.5.2 to i32 - %cmp7.i.us.us.5.2 = icmp sgt i32 %conv2.i.us.us.5.2, 0 - %cmp11.i.us.us.5.2 = icmp sgt i32 %sub.i, %conv2.i.us.us.5.2 - %or.cond28.i.us.us.5.2 = and i1 %cmp11.i.us.us.5.2, %cmp7.i.us.us.5.2 - br i1 %or.cond28.i.us.us.5.2, label %if.then.i.us.us.5.2, label %if.end.i.us.us.5.2 - -if.then.i.us.us.5.2: ; preds = %if.end.i.us.us.5.1 - %add.i.us.us.5.2 = add nsw i32 %mul.i.5, %conv2.i.us.us.5.2 - %idxprom.i.us.us.5.2 = sext i32 %add.i.us.us.5.2 to i64 - %arrayidx.i.us.us.5.2 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.5.2 - %550 = bitcast float* %arrayidx.i.us.us.5.2 to i32* - %551 = load i32, i32* %550, align 4, !tbaa !12 - %arrayidx16.i.us.us.5.2 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.5.2 - %552 = bitcast float* %arrayidx16.i.us.us.5.2 to i32* - store i32 %551, i32* %552, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.5.2 - -if.end.i.us.us.5.2: ; preds = %if.then.i.us.us.5.2, %if.end.i.us.us.5.1 - %553 = or i64 %_local_id_x.0.us.us.5, 3 - %add1.i.i.us.us.5.3 = add nuw nsw i64 %553, %mul.i.i - %conv2.i.us.us.5.3 = trunc i64 %add1.i.i.us.us.5.3 to i32 - %cmp7.i.us.us.5.3 = icmp sgt i32 %conv2.i.us.us.5.3, 0 - %cmp11.i.us.us.5.3 = icmp sgt i32 %sub.i, %conv2.i.us.us.5.3 - %or.cond28.i.us.us.5.3 = and i1 %cmp11.i.us.us.5.3, %cmp7.i.us.us.5.3 - br i1 %or.cond28.i.us.us.5.3, label %if.then.i.us.us.5.3, label %if.end.i.us.us.5.3 - -if.then.i.us.us.5.3: ; preds = %if.end.i.us.us.5.2 - %add.i.us.us.5.3 = add nsw i32 %mul.i.5, %conv2.i.us.us.5.3 - %idxprom.i.us.us.5.3 = sext i32 %add.i.us.us.5.3 to i64 - %arrayidx.i.us.us.5.3 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.5.3 - %554 = bitcast float* %arrayidx.i.us.us.5.3 to i32* - %555 = load i32, i32* %554, align 4, !tbaa !12 - %arrayidx16.i.us.us.5.3 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.5.3 - %556 = bitcast float* %arrayidx16.i.us.us.5.3 to i32* - store i32 %555, i32* %556, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.5.3 - -if.end.i.us.us.5.3: ; preds = %if.then.i.us.us.5.3, %if.end.i.us.us.5.2 - %557 = add nuw nsw i64 %_local_id_x.0.us.us.5, 4 - %exitcond.5.not.3 = icmp eq i64 %557, 32 - br i1 %exitcond.5.not.3, label %pregion_for_end.i.5.loopexit, label %pregion_for_entry.entry.i.us.us.5, !llvm.loop !63 - -if.then.i.us.us.4.1: ; preds = %if.end.i.us.us.4 - %add.i.us.us.4.1 = add nsw i32 %mul.i.4, %conv2.i.us.us.4.1 - %idxprom.i.us.us.4.1 = sext i32 %add.i.us.us.4.1 to i64 - %arrayidx.i.us.us.4.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.4.1 - %558 = bitcast float* %arrayidx.i.us.us.4.1 to i32* - %559 = load i32, i32* %558, align 4, !tbaa !12 - %arrayidx16.i.us.us.4.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.4.1 - %560 = bitcast float* %arrayidx16.i.us.us.4.1 to i32* - store i32 %559, i32* %560, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.4.1 - -if.end.i.us.us.4.1: ; preds = %if.then.i.us.us.4.1, %if.end.i.us.us.4 - %561 = or i64 %_local_id_x.0.us.us.4, 2 - %add1.i.i.us.us.4.2 = add nuw nsw i64 %561, %mul.i.i - %conv2.i.us.us.4.2 = trunc i64 %add1.i.i.us.us.4.2 to i32 - %cmp7.i.us.us.4.2 = icmp sgt i32 %conv2.i.us.us.4.2, 0 - %cmp11.i.us.us.4.2 = icmp sgt i32 %sub.i, %conv2.i.us.us.4.2 - %or.cond28.i.us.us.4.2 = and i1 %cmp11.i.us.us.4.2, %cmp7.i.us.us.4.2 - br i1 %or.cond28.i.us.us.4.2, label %if.then.i.us.us.4.2, label %if.end.i.us.us.4.2 - -if.then.i.us.us.4.2: ; preds = %if.end.i.us.us.4.1 - %add.i.us.us.4.2 = add nsw i32 %mul.i.4, %conv2.i.us.us.4.2 - %idxprom.i.us.us.4.2 = sext i32 %add.i.us.us.4.2 to i64 - %arrayidx.i.us.us.4.2 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.4.2 - %562 = bitcast float* %arrayidx.i.us.us.4.2 to i32* - %563 = load i32, i32* %562, align 4, !tbaa !12 - %arrayidx16.i.us.us.4.2 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.4.2 - %564 = bitcast float* %arrayidx16.i.us.us.4.2 to i32* - store i32 %563, i32* %564, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.4.2 - -if.end.i.us.us.4.2: ; preds = %if.then.i.us.us.4.2, %if.end.i.us.us.4.1 - %565 = or i64 %_local_id_x.0.us.us.4, 3 - %add1.i.i.us.us.4.3 = add nuw nsw i64 %565, %mul.i.i - %conv2.i.us.us.4.3 = trunc i64 %add1.i.i.us.us.4.3 to i32 - %cmp7.i.us.us.4.3 = icmp sgt i32 %conv2.i.us.us.4.3, 0 - %cmp11.i.us.us.4.3 = icmp sgt i32 %sub.i, %conv2.i.us.us.4.3 - %or.cond28.i.us.us.4.3 = and i1 %cmp11.i.us.us.4.3, %cmp7.i.us.us.4.3 - br i1 %or.cond28.i.us.us.4.3, label %if.then.i.us.us.4.3, label %if.end.i.us.us.4.3 - -if.then.i.us.us.4.3: ; preds = %if.end.i.us.us.4.2 - %add.i.us.us.4.3 = add nsw i32 %mul.i.4, %conv2.i.us.us.4.3 - %idxprom.i.us.us.4.3 = sext i32 %add.i.us.us.4.3 to i64 - %arrayidx.i.us.us.4.3 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.4.3 - %566 = bitcast float* %arrayidx.i.us.us.4.3 to i32* - %567 = load i32, i32* %566, align 4, !tbaa !12 - %arrayidx16.i.us.us.4.3 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.4.3 - %568 = bitcast float* %arrayidx16.i.us.us.4.3 to i32* - store i32 %567, i32* %568, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.4.3 - -if.end.i.us.us.4.3: ; preds = %if.then.i.us.us.4.3, %if.end.i.us.us.4.2 - %569 = add nuw nsw i64 %_local_id_x.0.us.us.4, 4 - %exitcond.4.not.3 = icmp eq i64 %569, 32 - br i1 %exitcond.4.not.3, label %pregion_for_end.i.4.loopexit, label %pregion_for_entry.entry.i.us.us.4, !llvm.loop !64 - -if.then.i.us.us.3.1: ; preds = %if.end.i.us.us.3 - %add.i.us.us.3.1 = add nsw i32 %mul.i.3, %conv2.i.us.us.3.1 - %idxprom.i.us.us.3.1 = sext i32 %add.i.us.us.3.1 to i64 - %arrayidx.i.us.us.3.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.3.1 - %570 = bitcast float* %arrayidx.i.us.us.3.1 to i32* - %571 = load i32, i32* %570, align 4, !tbaa !12 - %arrayidx16.i.us.us.3.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.3.1 - %572 = bitcast float* %arrayidx16.i.us.us.3.1 to i32* - store i32 %571, i32* %572, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.3.1 - -if.end.i.us.us.3.1: ; preds = %if.then.i.us.us.3.1, %if.end.i.us.us.3 - %573 = or i64 %_local_id_x.0.us.us.3, 2 - %add1.i.i.us.us.3.2 = add nuw nsw i64 %573, %mul.i.i - %conv2.i.us.us.3.2 = trunc i64 %add1.i.i.us.us.3.2 to i32 - %cmp7.i.us.us.3.2 = icmp sgt i32 %conv2.i.us.us.3.2, 0 - %cmp11.i.us.us.3.2 = icmp sgt i32 %sub.i, %conv2.i.us.us.3.2 - %or.cond28.i.us.us.3.2 = and i1 %cmp11.i.us.us.3.2, %cmp7.i.us.us.3.2 - br i1 %or.cond28.i.us.us.3.2, label %if.then.i.us.us.3.2, label %if.end.i.us.us.3.2 - -if.then.i.us.us.3.2: ; preds = %if.end.i.us.us.3.1 - %add.i.us.us.3.2 = add nsw i32 %mul.i.3, %conv2.i.us.us.3.2 - %idxprom.i.us.us.3.2 = sext i32 %add.i.us.us.3.2 to i64 - %arrayidx.i.us.us.3.2 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.3.2 - %574 = bitcast float* %arrayidx.i.us.us.3.2 to i32* - %575 = load i32, i32* %574, align 4, !tbaa !12 - %arrayidx16.i.us.us.3.2 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.3.2 - %576 = bitcast float* %arrayidx16.i.us.us.3.2 to i32* - store i32 %575, i32* %576, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.3.2 - -if.end.i.us.us.3.2: ; preds = %if.then.i.us.us.3.2, %if.end.i.us.us.3.1 - %577 = or i64 %_local_id_x.0.us.us.3, 3 - %add1.i.i.us.us.3.3 = add nuw nsw i64 %577, %mul.i.i - %conv2.i.us.us.3.3 = trunc i64 %add1.i.i.us.us.3.3 to i32 - %cmp7.i.us.us.3.3 = icmp sgt i32 %conv2.i.us.us.3.3, 0 - %cmp11.i.us.us.3.3 = icmp sgt i32 %sub.i, %conv2.i.us.us.3.3 - %or.cond28.i.us.us.3.3 = and i1 %cmp11.i.us.us.3.3, %cmp7.i.us.us.3.3 - br i1 %or.cond28.i.us.us.3.3, label %if.then.i.us.us.3.3, label %if.end.i.us.us.3.3 - -if.then.i.us.us.3.3: ; preds = %if.end.i.us.us.3.2 - %add.i.us.us.3.3 = add nsw i32 %mul.i.3, %conv2.i.us.us.3.3 - %idxprom.i.us.us.3.3 = sext i32 %add.i.us.us.3.3 to i64 - %arrayidx.i.us.us.3.3 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.3.3 - %578 = bitcast float* %arrayidx.i.us.us.3.3 to i32* - %579 = load i32, i32* %578, align 4, !tbaa !12 - %arrayidx16.i.us.us.3.3 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.3.3 - %580 = bitcast float* %arrayidx16.i.us.us.3.3 to i32* - store i32 %579, i32* %580, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.3.3 - -if.end.i.us.us.3.3: ; preds = %if.then.i.us.us.3.3, %if.end.i.us.us.3.2 - %581 = add nuw nsw i64 %_local_id_x.0.us.us.3, 4 - %exitcond.3.not.3 = icmp eq i64 %581, 32 - br i1 %exitcond.3.not.3, label %pregion_for_end.i.3.loopexit, label %pregion_for_entry.entry.i.us.us.3, !llvm.loop !65 - -if.then.i.us.us.2.1: ; preds = %if.end.i.us.us.2 - %add.i.us.us.2.1 = add nsw i32 %mul.i.2, %conv2.i.us.us.2.1 - %idxprom.i.us.us.2.1 = sext i32 %add.i.us.us.2.1 to i64 - %arrayidx.i.us.us.2.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.2.1 - %582 = bitcast float* %arrayidx.i.us.us.2.1 to i32* - %583 = load i32, i32* %582, align 4, !tbaa !12 - %arrayidx16.i.us.us.2.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.2.1 - %584 = bitcast float* %arrayidx16.i.us.us.2.1 to i32* - store i32 %583, i32* %584, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.2.1 - -if.end.i.us.us.2.1: ; preds = %if.then.i.us.us.2.1, %if.end.i.us.us.2 - %585 = or i64 %_local_id_x.0.us.us.2, 2 - %add1.i.i.us.us.2.2 = add nuw nsw i64 %585, %mul.i.i - %conv2.i.us.us.2.2 = trunc i64 %add1.i.i.us.us.2.2 to i32 - %cmp7.i.us.us.2.2 = icmp sgt i32 %conv2.i.us.us.2.2, 0 - %cmp11.i.us.us.2.2 = icmp sgt i32 %sub.i, %conv2.i.us.us.2.2 - %or.cond28.i.us.us.2.2 = and i1 %cmp11.i.us.us.2.2, %cmp7.i.us.us.2.2 - br i1 %or.cond28.i.us.us.2.2, label %if.then.i.us.us.2.2, label %if.end.i.us.us.2.2 - -if.then.i.us.us.2.2: ; preds = %if.end.i.us.us.2.1 - %add.i.us.us.2.2 = add nsw i32 %mul.i.2, %conv2.i.us.us.2.2 - %idxprom.i.us.us.2.2 = sext i32 %add.i.us.us.2.2 to i64 - %arrayidx.i.us.us.2.2 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.2.2 - %586 = bitcast float* %arrayidx.i.us.us.2.2 to i32* - %587 = load i32, i32* %586, align 4, !tbaa !12 - %arrayidx16.i.us.us.2.2 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.2.2 - %588 = bitcast float* %arrayidx16.i.us.us.2.2 to i32* - store i32 %587, i32* %588, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.2.2 - -if.end.i.us.us.2.2: ; preds = %if.then.i.us.us.2.2, %if.end.i.us.us.2.1 - %589 = or i64 %_local_id_x.0.us.us.2, 3 - %add1.i.i.us.us.2.3 = add nuw nsw i64 %589, %mul.i.i - %conv2.i.us.us.2.3 = trunc i64 %add1.i.i.us.us.2.3 to i32 - %cmp7.i.us.us.2.3 = icmp sgt i32 %conv2.i.us.us.2.3, 0 - %cmp11.i.us.us.2.3 = icmp sgt i32 %sub.i, %conv2.i.us.us.2.3 - %or.cond28.i.us.us.2.3 = and i1 %cmp11.i.us.us.2.3, %cmp7.i.us.us.2.3 - br i1 %or.cond28.i.us.us.2.3, label %if.then.i.us.us.2.3, label %if.end.i.us.us.2.3 - -if.then.i.us.us.2.3: ; preds = %if.end.i.us.us.2.2 - %add.i.us.us.2.3 = add nsw i32 %mul.i.2, %conv2.i.us.us.2.3 - %idxprom.i.us.us.2.3 = sext i32 %add.i.us.us.2.3 to i64 - %arrayidx.i.us.us.2.3 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.2.3 - %590 = bitcast float* %arrayidx.i.us.us.2.3 to i32* - %591 = load i32, i32* %590, align 4, !tbaa !12 - %arrayidx16.i.us.us.2.3 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.2.3 - %592 = bitcast float* %arrayidx16.i.us.us.2.3 to i32* - store i32 %591, i32* %592, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.2.3 - -if.end.i.us.us.2.3: ; preds = %if.then.i.us.us.2.3, %if.end.i.us.us.2.2 - %593 = add nuw nsw i64 %_local_id_x.0.us.us.2, 4 - %exitcond.2.not.3 = icmp eq i64 %593, 32 - br i1 %exitcond.2.not.3, label %pregion_for_end.i.2.loopexit, label %pregion_for_entry.entry.i.us.us.2, !llvm.loop !66 - -if.then.i.us.us.1.1: ; preds = %if.end.i.us.us.1 - %add.i.us.us.1.1 = add nsw i32 %mul.i.1, %conv2.i.us.us.1.1 - %idxprom.i.us.us.1.1 = sext i32 %add.i.us.us.1.1 to i64 - %arrayidx.i.us.us.1.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.1.1 - %594 = bitcast float* %arrayidx.i.us.us.1.1 to i32* - %595 = load i32, i32* %594, align 4, !tbaa !12 - %arrayidx16.i.us.us.1.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.1.1 - %596 = bitcast float* %arrayidx16.i.us.us.1.1 to i32* - store i32 %595, i32* %596, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.1.1 - -if.end.i.us.us.1.1: ; preds = %if.then.i.us.us.1.1, %if.end.i.us.us.1 - %597 = or i64 %_local_id_x.0.us.us.1, 2 - %add1.i.i.us.us.1.2 = add nuw nsw i64 %597, %mul.i.i - %conv2.i.us.us.1.2 = trunc i64 %add1.i.i.us.us.1.2 to i32 - %cmp7.i.us.us.1.2 = icmp sgt i32 %conv2.i.us.us.1.2, 0 - %cmp11.i.us.us.1.2 = icmp sgt i32 %sub.i, %conv2.i.us.us.1.2 - %or.cond28.i.us.us.1.2 = and i1 %cmp11.i.us.us.1.2, %cmp7.i.us.us.1.2 - br i1 %or.cond28.i.us.us.1.2, label %if.then.i.us.us.1.2, label %if.end.i.us.us.1.2 - -if.then.i.us.us.1.2: ; preds = %if.end.i.us.us.1.1 - %add.i.us.us.1.2 = add nsw i32 %mul.i.1, %conv2.i.us.us.1.2 - %idxprom.i.us.us.1.2 = sext i32 %add.i.us.us.1.2 to i64 - %arrayidx.i.us.us.1.2 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.1.2 - %598 = bitcast float* %arrayidx.i.us.us.1.2 to i32* - %599 = load i32, i32* %598, align 4, !tbaa !12 - %arrayidx16.i.us.us.1.2 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.1.2 - %600 = bitcast float* %arrayidx16.i.us.us.1.2 to i32* - store i32 %599, i32* %600, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.1.2 - -if.end.i.us.us.1.2: ; preds = %if.then.i.us.us.1.2, %if.end.i.us.us.1.1 - %601 = or i64 %_local_id_x.0.us.us.1, 3 - %add1.i.i.us.us.1.3 = add nuw nsw i64 %601, %mul.i.i - %conv2.i.us.us.1.3 = trunc i64 %add1.i.i.us.us.1.3 to i32 - %cmp7.i.us.us.1.3 = icmp sgt i32 %conv2.i.us.us.1.3, 0 - %cmp11.i.us.us.1.3 = icmp sgt i32 %sub.i, %conv2.i.us.us.1.3 - %or.cond28.i.us.us.1.3 = and i1 %cmp11.i.us.us.1.3, %cmp7.i.us.us.1.3 - br i1 %or.cond28.i.us.us.1.3, label %if.then.i.us.us.1.3, label %if.end.i.us.us.1.3 - -if.then.i.us.us.1.3: ; preds = %if.end.i.us.us.1.2 - %add.i.us.us.1.3 = add nsw i32 %mul.i.1, %conv2.i.us.us.1.3 - %idxprom.i.us.us.1.3 = sext i32 %add.i.us.us.1.3 to i64 - %arrayidx.i.us.us.1.3 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.1.3 - %602 = bitcast float* %arrayidx.i.us.us.1.3 to i32* - %603 = load i32, i32* %602, align 4, !tbaa !12 - %arrayidx16.i.us.us.1.3 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.1.3 - %604 = bitcast float* %arrayidx16.i.us.us.1.3 to i32* - store i32 %603, i32* %604, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.1.3 - -if.end.i.us.us.1.3: ; preds = %if.then.i.us.us.1.3, %if.end.i.us.us.1.2 - %605 = add nuw nsw i64 %_local_id_x.0.us.us.1, 4 - %exitcond.1.not.3 = icmp eq i64 %605, 32 - br i1 %exitcond.1.not.3, label %pregion_for_end.i.1.loopexit, label %pregion_for_entry.entry.i.us.us.1, !llvm.loop !67 - -if.then.i.us.us.1277: ; preds = %if.end.i.us.us - %add.i.us.us.1273 = add nsw i32 %mul.i, %conv2.i.us.us.1268 - %idxprom.i.us.us.1274 = sext i32 %add.i.us.us.1273 to i64 - %arrayidx.i.us.us.1275 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.1274 - %606 = bitcast float* %arrayidx.i.us.us.1275 to i32* - %607 = load i32, i32* %606, align 4, !tbaa !12 - %arrayidx16.i.us.us.1276 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.1274 - %608 = bitcast float* %arrayidx16.i.us.us.1276 to i32* - store i32 %607, i32* %608, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.1278 - -if.end.i.us.us.1278: ; preds = %if.then.i.us.us.1277, %if.end.i.us.us - %609 = or i64 %_local_id_x.0.us.us, 2 - %add1.i.i.us.us.2280 = add nuw nsw i64 %609, %mul.i.i - %conv2.i.us.us.2281 = trunc i64 %add1.i.i.us.us.2280 to i32 - %cmp7.i.us.us.2282 = icmp sgt i32 %conv2.i.us.us.2281, 0 - %cmp11.i.us.us.2283 = icmp sgt i32 %sub.i, %conv2.i.us.us.2281 - %or.cond28.i.us.us.2284 = and i1 %cmp11.i.us.us.2283, %cmp7.i.us.us.2282 - br i1 %or.cond28.i.us.us.2284, label %if.then.i.us.us.2290, label %if.end.i.us.us.2291 - -if.then.i.us.us.2290: ; preds = %if.end.i.us.us.1278 - %add.i.us.us.2286 = add nsw i32 %mul.i, %conv2.i.us.us.2281 - %idxprom.i.us.us.2287 = sext i32 %add.i.us.us.2286 to i64 - %arrayidx.i.us.us.2288 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.2287 - %610 = bitcast float* %arrayidx.i.us.us.2288 to i32* - %611 = load i32, i32* %610, align 4, !tbaa !12 - %arrayidx16.i.us.us.2289 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.2287 - %612 = bitcast float* %arrayidx16.i.us.us.2289 to i32* - store i32 %611, i32* %612, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.2291 - -if.end.i.us.us.2291: ; preds = %if.then.i.us.us.2290, %if.end.i.us.us.1278 - %613 = or i64 %_local_id_x.0.us.us, 3 - %add1.i.i.us.us.3293 = add nuw nsw i64 %613, %mul.i.i - %conv2.i.us.us.3294 = trunc i64 %add1.i.i.us.us.3293 to i32 - %cmp7.i.us.us.3295 = icmp sgt i32 %conv2.i.us.us.3294, 0 - %cmp11.i.us.us.3296 = icmp sgt i32 %sub.i, %conv2.i.us.us.3294 - %or.cond28.i.us.us.3297 = and i1 %cmp11.i.us.us.3296, %cmp7.i.us.us.3295 - br i1 %or.cond28.i.us.us.3297, label %if.then.i.us.us.3303, label %if.end.i.us.us.3304 - -if.then.i.us.us.3303: ; preds = %if.end.i.us.us.2291 - %add.i.us.us.3299 = add nsw i32 %mul.i, %conv2.i.us.us.3294 - %idxprom.i.us.us.3300 = sext i32 %add.i.us.us.3299 to i64 - %arrayidx.i.us.us.3301 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.3300 - %614 = bitcast float* %arrayidx.i.us.us.3301 to i32* - %615 = load i32, i32* %614, align 4, !tbaa !12 - %arrayidx16.i.us.us.3302 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.us.3300 - %616 = bitcast float* %arrayidx16.i.us.us.3302 to i32* - store i32 %615, i32* %616, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.us.us.3304 - -if.end.i.us.us.3304: ; preds = %if.then.i.us.us.3303, %if.end.i.us.us.2291 - %617 = add nuw nsw i64 %_local_id_x.0.us.us, 4 - %exitcond.not.3 = icmp eq i64 %617, 32 - br i1 %exitcond.not.3, label %pregion_for_end.i.loopexit, label %pregion_for_entry.entry.i.us.us, !llvm.loop !68 -} - -; Function Attrs: nofree norecurse nounwind -define void @_pocl_kernel_runJacobi2D_kernel2_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { -pregion_for_entry.pregion_for_init.i.i: - %5 = bitcast i8** %0 to float*** - %6 = load float**, float*** %5, align 8 - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float*** - %10 = load float**, float*** %9, align 8 - %11 = load float*, float** %10, align 8 - %12 = getelementptr i8*, i8** %0, i64 2 - %13 = bitcast i8** %12 to i32** - %14 = load i32*, i32** %13, align 8 - %15 = load i32, i32* %14, align 4 - %mul3.i.i.i = shl i64 %3, 3 - %mul.i.i.i = shl i64 %2, 5 - %sub.i.i = add nsw i32 %15, -1 - %conv.i.i = trunc i64 %mul3.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %conv.i.i, 0 - %mul.i.i = mul nsw i32 %15, %conv.i.i - %cmp4.i.i = icmp sgt i32 %sub.i.i, %conv.i.i - %or.cond = and i1 %cmp.i.i, %cmp4.i.i - br i1 %or.cond, label %vector.scevcheck, label %pregion_for_end.i.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.i - %16 = trunc i64 %3 to i32 - %17 = mul i32 %15, %16 - %18 = shl i32 %17, 3 - %19 = trunc i64 %2 to i32 - %20 = shl i32 %19, 5 - %21 = add i32 %18, %20 - %22 = icmp sgt i32 %21, 2147483616 - br i1 %22, label %pregion_for_entry.entry.i.i.us.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.us.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i.us.us - -vector.memcheck: ; preds = %vector.scevcheck - %23 = trunc i64 %3 to i32 - %24 = mul i32 %15, %23 - %25 = shl i32 %24, 3 - %26 = trunc i64 %2 to i32 - %27 = shl i32 %26, 5 - %28 = add i32 %25, %27 - %29 = sext i32 %28 to i64 - %scevgep = getelementptr float, float* %7, i64 %29 - %30 = add nsw i64 %29, 32 - %scevgep12 = getelementptr float, float* %7, i64 %30 - %scevgep14 = getelementptr float, float* %11, i64 %29 - %scevgep16 = getelementptr float, float* %11, i64 %30 - %bound0 = icmp ult float* %scevgep, %scevgep16 - %bound1 = icmp ult float* %scevgep14, %scevgep12 - %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %pregion_for_entry.entry.i.i.us.us.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert18 = insertelement <8 x i32> undef, i32 %sub.i.i, i32 0 - %broadcast.splat19 = shufflevector <8 x i32> %broadcast.splatinsert18, <8 x i32> undef, <8 x i32> zeroinitializer - %31 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %32 = or <8 x i32> %31, - %33 = icmp sgt <8 x i32> %32, zeroinitializer - %34 = icmp sgt <8 x i32> %broadcast.splat19, %32 - %35 = and <8 x i1> %34, %33 - %36 = extractelement <8 x i32> %32, i32 0 - %37 = add nsw i32 %mul.i.i, %36 - %38 = sext i32 %37 to i64 - %39 = getelementptr inbounds float, float* %11, i64 %38 - %40 = bitcast float* %39 to <8 x i32>* - %wide.masked.load = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %40, i32 4, <8 x i1> %35, <8 x i32> undef), !tbaa !12, !alias.scope !69 - %41 = getelementptr inbounds float, float* %7, i64 %38 - %42 = bitcast float* %41 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load, <8 x i32>* %42, i32 4, <8 x i1> %35), !tbaa !12, !alias.scope !72, !noalias !69, !llvm.access.group !21 - %43 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %44 = or <8 x i32> %43, - %45 = icmp sgt <8 x i32> %44, zeroinitializer - %46 = icmp sgt <8 x i32> %broadcast.splat19, %44 - %47 = and <8 x i1> %46, %45 - %48 = extractelement <8 x i32> %44, i32 0 - %49 = add nsw i32 %mul.i.i, %48 - %50 = sext i32 %49 to i64 - %51 = getelementptr inbounds float, float* %11, i64 %50 - %52 = bitcast float* %51 to <8 x i32>* - %wide.masked.load.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %52, i32 4, <8 x i1> %47, <8 x i32> undef), !tbaa !12, !alias.scope !69 - %53 = getelementptr inbounds float, float* %7, i64 %50 - %54 = bitcast float* %53 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load.1, <8 x i32>* %54, i32 4, <8 x i1> %47), !tbaa !12, !alias.scope !72, !noalias !69, !llvm.access.group !21 - %55 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %56 = or <8 x i32> %55, - %57 = icmp sgt <8 x i32> %56, zeroinitializer - %58 = icmp sgt <8 x i32> %broadcast.splat19, %56 - %59 = and <8 x i1> %58, %57 - %60 = extractelement <8 x i32> %56, i32 0 - %61 = add nsw i32 %mul.i.i, %60 - %62 = sext i32 %61 to i64 - %63 = getelementptr inbounds float, float* %11, i64 %62 - %64 = bitcast float* %63 to <8 x i32>* - %wide.masked.load.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %64, i32 4, <8 x i1> %59, <8 x i32> undef), !tbaa !12, !alias.scope !69 - %65 = getelementptr inbounds float, float* %7, i64 %62 - %66 = bitcast float* %65 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load.2, <8 x i32>* %66, i32 4, <8 x i1> %59), !tbaa !12, !alias.scope !72, !noalias !69, !llvm.access.group !21 - %67 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %68 = or <8 x i32> %67, - %69 = icmp sgt <8 x i32> %68, zeroinitializer - %70 = icmp sgt <8 x i32> %broadcast.splat19, %68 - %71 = and <8 x i1> %70, %69 - %72 = extractelement <8 x i32> %68, i32 0 - %73 = add nsw i32 %mul.i.i, %72 - %74 = sext i32 %73 to i64 - %75 = getelementptr inbounds float, float* %11, i64 %74 - %76 = bitcast float* %75 to <8 x i32>* - %wide.masked.load.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %76, i32 4, <8 x i1> %71, <8 x i32> undef), !tbaa !12, !alias.scope !69 - %77 = getelementptr inbounds float, float* %7, i64 %74 - %78 = bitcast float* %77 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load.3, <8 x i32>* %78, i32 4, <8 x i1> %71), !tbaa !12, !alias.scope !72, !noalias !69, !llvm.access.group !21 - br label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us.us: ; preds = %if.end.i.i.us.us.3304, %pregion_for_entry.entry.i.i.us.us.preheader - %_local_id_x.i.0.us.us = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.us.preheader ], [ %626, %if.end.i.i.us.us.3304 ] - %add1.i.i.i.us.us = add nuw nsw i64 %_local_id_x.i.0.us.us, %mul.i.i.i - %conv2.i.i.us.us = trunc i64 %add1.i.i.i.us.us to i32 - %cmp7.i.i.us.us = icmp sgt i32 %conv2.i.i.us.us, 0 - %cmp11.i.i.us.us = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us - %or.cond28.i.i.us.us = and i1 %cmp11.i.i.us.us, %cmp7.i.i.us.us - br i1 %or.cond28.i.i.us.us, label %if.then.i.i.us.us, label %if.end.i.i.us.us - -if.then.i.i.us.us: ; preds = %pregion_for_entry.entry.i.i.us.us - %add.i.i.us.us = add nsw i32 %mul.i.i, %conv2.i.i.us.us - %idxprom.i.i.us.us = sext i32 %add.i.i.us.us to i64 - %arrayidx.i.i.us.us = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us - %79 = bitcast float* %arrayidx.i.i.us.us to i32* - %80 = load i32, i32* %79, align 4, !tbaa !12 - %arrayidx16.i.i.us.us = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us - %81 = bitcast float* %arrayidx16.i.i.us.us to i32* - store i32 %80, i32* %81, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us - -if.end.i.i.us.us: ; preds = %if.then.i.i.us.us, %pregion_for_entry.entry.i.i.us.us - %82 = or i64 %_local_id_x.i.0.us.us, 1 - %add1.i.i.i.us.us.1267 = add nuw nsw i64 %82, %mul.i.i.i - %conv2.i.i.us.us.1268 = trunc i64 %add1.i.i.i.us.us.1267 to i32 - %cmp7.i.i.us.us.1269 = icmp sgt i32 %conv2.i.i.us.us.1268, 0 - %cmp11.i.i.us.us.1270 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.1268 - %or.cond28.i.i.us.us.1271 = and i1 %cmp11.i.i.us.us.1270, %cmp7.i.i.us.us.1269 - br i1 %or.cond28.i.i.us.us.1271, label %if.then.i.i.us.us.1277, label %if.end.i.i.us.us.1278 - -pregion_for_end.i.i.loopexit: ; preds = %if.end.i.i.us.us.3304 - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i.i - %83 = trunc i64 %mul3.i.i.i to i32 - %conv.i.i.1 = or i32 %83, 1 - %cmp.i.i.1 = icmp sgt i32 %conv.i.i.1, 0 - %mul.i.i.1 = mul nsw i32 %15, %conv.i.i.1 - %cmp4.i.i.1 = icmp sgt i32 %sub.i.i, %conv.i.i.1 - %or.cond4 = and i1 %cmp.i.i.1, %cmp4.i.i.1 - br i1 %or.cond4, label %vector.scevcheck27, label %pregion_for_end.i.i.1 - -vector.scevcheck27: ; preds = %pregion_for_end.i.i - %84 = mul i32 %15, %conv.i.i.1 - %85 = trunc i64 %2 to i32 - %86 = shl i32 %85, 5 - %87 = add i32 %84, %86 - %88 = icmp sgt i32 %87, 2147483616 - br i1 %88, label %pregion_for_entry.entry.i.i.us.us.1.preheader, label %vector.memcheck41 - -pregion_for_entry.entry.i.i.us.us.1.preheader: ; preds = %vector.memcheck41, %vector.scevcheck27 - br label %pregion_for_entry.entry.i.i.us.us.1 - -vector.memcheck41: ; preds = %vector.scevcheck27 - %89 = mul i32 %15, %conv.i.i.1 - %90 = trunc i64 %2 to i32 - %91 = shl i32 %90, 5 - %92 = add i32 %89, %91 - %93 = sext i32 %92 to i64 - %scevgep29 = getelementptr float, float* %7, i64 %93 - %94 = add nsw i64 %93, 32 - %scevgep31 = getelementptr float, float* %7, i64 %94 - %scevgep33 = getelementptr float, float* %11, i64 %93 - %scevgep35 = getelementptr float, float* %11, i64 %94 - %bound037 = icmp ult float* %scevgep29, %scevgep35 - %bound138 = icmp ult float* %scevgep33, %scevgep31 - %found.conflict39 = and i1 %bound037, %bound138 - br i1 %found.conflict39, label %pregion_for_entry.entry.i.i.us.us.1.preheader, label %vector.ph42 - -vector.ph42: ; preds = %vector.memcheck41 - %broadcast.splatinsert49 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat50 = shufflevector <8 x i64> %broadcast.splatinsert49, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert51 = insertelement <8 x i32> undef, i32 %sub.i.i, i32 0 - %broadcast.splat52 = shufflevector <8 x i32> %broadcast.splatinsert51, <8 x i32> undef, <8 x i32> zeroinitializer - %95 = trunc <8 x i64> %broadcast.splat50 to <8 x i32> - %96 = or <8 x i32> %95, - %97 = icmp sgt <8 x i32> %96, zeroinitializer - %98 = icmp sgt <8 x i32> %broadcast.splat52, %96 - %99 = and <8 x i1> %98, %97 - %100 = extractelement <8 x i32> %96, i32 0 - %101 = add nsw i32 %mul.i.i.1, %100 - %102 = sext i32 %101 to i64 - %103 = getelementptr inbounds float, float* %11, i64 %102 - %104 = bitcast float* %103 to <8 x i32>* - %wide.masked.load53 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %104, i32 4, <8 x i1> %99, <8 x i32> undef), !tbaa !12, !alias.scope !74 - %105 = getelementptr inbounds float, float* %7, i64 %102 - %106 = bitcast float* %105 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load53, <8 x i32>* %106, i32 4, <8 x i1> %99), !tbaa !12, !alias.scope !77, !noalias !74, !llvm.access.group !21 - %107 = trunc <8 x i64> %broadcast.splat50 to <8 x i32> - %108 = or <8 x i32> %107, - %109 = icmp sgt <8 x i32> %108, zeroinitializer - %110 = icmp sgt <8 x i32> %broadcast.splat52, %108 - %111 = and <8 x i1> %110, %109 - %112 = extractelement <8 x i32> %108, i32 0 - %113 = add nsw i32 %mul.i.i.1, %112 - %114 = sext i32 %113 to i64 - %115 = getelementptr inbounds float, float* %11, i64 %114 - %116 = bitcast float* %115 to <8 x i32>* - %wide.masked.load53.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %116, i32 4, <8 x i1> %111, <8 x i32> undef), !tbaa !12, !alias.scope !74 - %117 = getelementptr inbounds float, float* %7, i64 %114 - %118 = bitcast float* %117 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load53.1, <8 x i32>* %118, i32 4, <8 x i1> %111), !tbaa !12, !alias.scope !77, !noalias !74, !llvm.access.group !21 - %119 = trunc <8 x i64> %broadcast.splat50 to <8 x i32> - %120 = or <8 x i32> %119, - %121 = icmp sgt <8 x i32> %120, zeroinitializer - %122 = icmp sgt <8 x i32> %broadcast.splat52, %120 - %123 = and <8 x i1> %122, %121 - %124 = extractelement <8 x i32> %120, i32 0 - %125 = add nsw i32 %mul.i.i.1, %124 - %126 = sext i32 %125 to i64 - %127 = getelementptr inbounds float, float* %11, i64 %126 - %128 = bitcast float* %127 to <8 x i32>* - %wide.masked.load53.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %128, i32 4, <8 x i1> %123, <8 x i32> undef), !tbaa !12, !alias.scope !74 - %129 = getelementptr inbounds float, float* %7, i64 %126 - %130 = bitcast float* %129 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load53.2, <8 x i32>* %130, i32 4, <8 x i1> %123), !tbaa !12, !alias.scope !77, !noalias !74, !llvm.access.group !21 - %131 = trunc <8 x i64> %broadcast.splat50 to <8 x i32> - %132 = or <8 x i32> %131, - %133 = icmp sgt <8 x i32> %132, zeroinitializer - %134 = icmp sgt <8 x i32> %broadcast.splat52, %132 - %135 = and <8 x i1> %134, %133 - %136 = extractelement <8 x i32> %132, i32 0 - %137 = add nsw i32 %mul.i.i.1, %136 - %138 = sext i32 %137 to i64 - %139 = getelementptr inbounds float, float* %11, i64 %138 - %140 = bitcast float* %139 to <8 x i32>* - %wide.masked.load53.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %140, i32 4, <8 x i1> %135, <8 x i32> undef), !tbaa !12, !alias.scope !74 - %141 = getelementptr inbounds float, float* %7, i64 %138 - %142 = bitcast float* %141 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load53.3, <8 x i32>* %142, i32 4, <8 x i1> %135), !tbaa !12, !alias.scope !77, !noalias !74, !llvm.access.group !21 - br label %pregion_for_end.i.i.1 - -pregion_for_entry.entry.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.3, %pregion_for_entry.entry.i.i.us.us.1.preheader - %_local_id_x.i.0.us.us.1 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.us.1.preheader ], [ %614, %if.end.i.i.us.us.1.3 ] - %add1.i.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, %mul.i.i.i - %conv2.i.i.us.us.1 = trunc i64 %add1.i.i.i.us.us.1 to i32 - %cmp7.i.i.us.us.1 = icmp sgt i32 %conv2.i.i.us.us.1, 0 - %cmp11.i.i.us.us.1 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.1 - %or.cond28.i.i.us.us.1 = and i1 %cmp11.i.i.us.us.1, %cmp7.i.i.us.us.1 - br i1 %or.cond28.i.i.us.us.1, label %if.then.i.i.us.us.1, label %if.end.i.i.us.us.1 - -if.then.i.i.us.us.1: ; preds = %pregion_for_entry.entry.i.i.us.us.1 - %add.i.i.us.us.1 = add nsw i32 %mul.i.i.1, %conv2.i.i.us.us.1 - %idxprom.i.i.us.us.1 = sext i32 %add.i.i.us.us.1 to i64 - %arrayidx.i.i.us.us.1 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.1 - %143 = bitcast float* %arrayidx.i.i.us.us.1 to i32* - %144 = load i32, i32* %143, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.1 - %145 = bitcast float* %arrayidx16.i.i.us.us.1 to i32* - store i32 %144, i32* %145, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.1 - -if.end.i.i.us.us.1: ; preds = %if.then.i.i.us.us.1, %pregion_for_entry.entry.i.i.us.us.1 - %146 = or i64 %_local_id_x.i.0.us.us.1, 1 - %add1.i.i.i.us.us.1.1 = add nuw nsw i64 %146, %mul.i.i.i - %conv2.i.i.us.us.1.1 = trunc i64 %add1.i.i.i.us.us.1.1 to i32 - %cmp7.i.i.us.us.1.1 = icmp sgt i32 %conv2.i.i.us.us.1.1, 0 - %cmp11.i.i.us.us.1.1 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.1.1 - %or.cond28.i.i.us.us.1.1 = and i1 %cmp11.i.i.us.us.1.1, %cmp7.i.i.us.us.1.1 - br i1 %or.cond28.i.i.us.us.1.1, label %if.then.i.i.us.us.1.1, label %if.end.i.i.us.us.1.1 - -pregion_for_end.i.i.1.loopexit: ; preds = %if.end.i.i.us.us.1.3 - br label %pregion_for_end.i.i.1 - -pregion_for_end.i.i.1: ; preds = %pregion_for_end.i.i.1.loopexit, %vector.ph42, %pregion_for_end.i.i - %147 = trunc i64 %mul3.i.i.i to i32 - %conv.i.i.2 = or i32 %147, 2 - %cmp.i.i.2 = icmp sgt i32 %conv.i.i.2, 0 - %mul.i.i.2 = mul nsw i32 %15, %conv.i.i.2 - %cmp4.i.i.2 = icmp sgt i32 %sub.i.i, %conv.i.i.2 - %or.cond5 = and i1 %cmp.i.i.2, %cmp4.i.i.2 - br i1 %or.cond5, label %vector.scevcheck61, label %pregion_for_end.i.i.2 - -vector.scevcheck61: ; preds = %pregion_for_end.i.i.1 - %148 = mul i32 %15, %conv.i.i.2 - %149 = trunc i64 %2 to i32 - %150 = shl i32 %149, 5 - %151 = add i32 %148, %150 - %152 = icmp sgt i32 %151, 2147483616 - br i1 %152, label %pregion_for_entry.entry.i.i.us.us.2.preheader, label %vector.memcheck75 - -pregion_for_entry.entry.i.i.us.us.2.preheader: ; preds = %vector.memcheck75, %vector.scevcheck61 - br label %pregion_for_entry.entry.i.i.us.us.2 - -vector.memcheck75: ; preds = %vector.scevcheck61 - %153 = mul i32 %15, %conv.i.i.2 - %154 = trunc i64 %2 to i32 - %155 = shl i32 %154, 5 - %156 = add i32 %153, %155 - %157 = sext i32 %156 to i64 - %scevgep63 = getelementptr float, float* %7, i64 %157 - %158 = add nsw i64 %157, 32 - %scevgep65 = getelementptr float, float* %7, i64 %158 - %scevgep67 = getelementptr float, float* %11, i64 %157 - %scevgep69 = getelementptr float, float* %11, i64 %158 - %bound071 = icmp ult float* %scevgep63, %scevgep69 - %bound172 = icmp ult float* %scevgep67, %scevgep65 - %found.conflict73 = and i1 %bound071, %bound172 - br i1 %found.conflict73, label %pregion_for_entry.entry.i.i.us.us.2.preheader, label %vector.ph76 - -vector.ph76: ; preds = %vector.memcheck75 - %broadcast.splatinsert83 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat84 = shufflevector <8 x i64> %broadcast.splatinsert83, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert85 = insertelement <8 x i32> undef, i32 %sub.i.i, i32 0 - %broadcast.splat86 = shufflevector <8 x i32> %broadcast.splatinsert85, <8 x i32> undef, <8 x i32> zeroinitializer - %159 = trunc <8 x i64> %broadcast.splat84 to <8 x i32> - %160 = or <8 x i32> %159, - %161 = icmp sgt <8 x i32> %160, zeroinitializer - %162 = icmp sgt <8 x i32> %broadcast.splat86, %160 - %163 = and <8 x i1> %162, %161 - %164 = extractelement <8 x i32> %160, i32 0 - %165 = add nsw i32 %mul.i.i.2, %164 - %166 = sext i32 %165 to i64 - %167 = getelementptr inbounds float, float* %11, i64 %166 - %168 = bitcast float* %167 to <8 x i32>* - %wide.masked.load87 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %168, i32 4, <8 x i1> %163, <8 x i32> undef), !tbaa !12, !alias.scope !79 - %169 = getelementptr inbounds float, float* %7, i64 %166 - %170 = bitcast float* %169 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load87, <8 x i32>* %170, i32 4, <8 x i1> %163), !tbaa !12, !alias.scope !82, !noalias !79, !llvm.access.group !21 - %171 = trunc <8 x i64> %broadcast.splat84 to <8 x i32> - %172 = or <8 x i32> %171, - %173 = icmp sgt <8 x i32> %172, zeroinitializer - %174 = icmp sgt <8 x i32> %broadcast.splat86, %172 - %175 = and <8 x i1> %174, %173 - %176 = extractelement <8 x i32> %172, i32 0 - %177 = add nsw i32 %mul.i.i.2, %176 - %178 = sext i32 %177 to i64 - %179 = getelementptr inbounds float, float* %11, i64 %178 - %180 = bitcast float* %179 to <8 x i32>* - %wide.masked.load87.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %180, i32 4, <8 x i1> %175, <8 x i32> undef), !tbaa !12, !alias.scope !79 - %181 = getelementptr inbounds float, float* %7, i64 %178 - %182 = bitcast float* %181 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load87.1, <8 x i32>* %182, i32 4, <8 x i1> %175), !tbaa !12, !alias.scope !82, !noalias !79, !llvm.access.group !21 - %183 = trunc <8 x i64> %broadcast.splat84 to <8 x i32> - %184 = or <8 x i32> %183, - %185 = icmp sgt <8 x i32> %184, zeroinitializer - %186 = icmp sgt <8 x i32> %broadcast.splat86, %184 - %187 = and <8 x i1> %186, %185 - %188 = extractelement <8 x i32> %184, i32 0 - %189 = add nsw i32 %mul.i.i.2, %188 - %190 = sext i32 %189 to i64 - %191 = getelementptr inbounds float, float* %11, i64 %190 - %192 = bitcast float* %191 to <8 x i32>* - %wide.masked.load87.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %192, i32 4, <8 x i1> %187, <8 x i32> undef), !tbaa !12, !alias.scope !79 - %193 = getelementptr inbounds float, float* %7, i64 %190 - %194 = bitcast float* %193 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load87.2, <8 x i32>* %194, i32 4, <8 x i1> %187), !tbaa !12, !alias.scope !82, !noalias !79, !llvm.access.group !21 - %195 = trunc <8 x i64> %broadcast.splat84 to <8 x i32> - %196 = or <8 x i32> %195, - %197 = icmp sgt <8 x i32> %196, zeroinitializer - %198 = icmp sgt <8 x i32> %broadcast.splat86, %196 - %199 = and <8 x i1> %198, %197 - %200 = extractelement <8 x i32> %196, i32 0 - %201 = add nsw i32 %mul.i.i.2, %200 - %202 = sext i32 %201 to i64 - %203 = getelementptr inbounds float, float* %11, i64 %202 - %204 = bitcast float* %203 to <8 x i32>* - %wide.masked.load87.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %204, i32 4, <8 x i1> %199, <8 x i32> undef), !tbaa !12, !alias.scope !79 - %205 = getelementptr inbounds float, float* %7, i64 %202 - %206 = bitcast float* %205 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load87.3, <8 x i32>* %206, i32 4, <8 x i1> %199), !tbaa !12, !alias.scope !82, !noalias !79, !llvm.access.group !21 - br label %pregion_for_end.i.i.2 - -pregion_for_entry.entry.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.3, %pregion_for_entry.entry.i.i.us.us.2.preheader - %_local_id_x.i.0.us.us.2 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.us.2.preheader ], [ %602, %if.end.i.i.us.us.2.3 ] - %add1.i.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, %mul.i.i.i - %conv2.i.i.us.us.2 = trunc i64 %add1.i.i.i.us.us.2 to i32 - %cmp7.i.i.us.us.2 = icmp sgt i32 %conv2.i.i.us.us.2, 0 - %cmp11.i.i.us.us.2 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.2 - %or.cond28.i.i.us.us.2 = and i1 %cmp11.i.i.us.us.2, %cmp7.i.i.us.us.2 - br i1 %or.cond28.i.i.us.us.2, label %if.then.i.i.us.us.2, label %if.end.i.i.us.us.2 - -if.then.i.i.us.us.2: ; preds = %pregion_for_entry.entry.i.i.us.us.2 - %add.i.i.us.us.2 = add nsw i32 %mul.i.i.2, %conv2.i.i.us.us.2 - %idxprom.i.i.us.us.2 = sext i32 %add.i.i.us.us.2 to i64 - %arrayidx.i.i.us.us.2 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.2 - %207 = bitcast float* %arrayidx.i.i.us.us.2 to i32* - %208 = load i32, i32* %207, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.2 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.2 - %209 = bitcast float* %arrayidx16.i.i.us.us.2 to i32* - store i32 %208, i32* %209, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.2 - -if.end.i.i.us.us.2: ; preds = %if.then.i.i.us.us.2, %pregion_for_entry.entry.i.i.us.us.2 - %210 = or i64 %_local_id_x.i.0.us.us.2, 1 - %add1.i.i.i.us.us.2.1 = add nuw nsw i64 %210, %mul.i.i.i - %conv2.i.i.us.us.2.1 = trunc i64 %add1.i.i.i.us.us.2.1 to i32 - %cmp7.i.i.us.us.2.1 = icmp sgt i32 %conv2.i.i.us.us.2.1, 0 - %cmp11.i.i.us.us.2.1 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.2.1 - %or.cond28.i.i.us.us.2.1 = and i1 %cmp11.i.i.us.us.2.1, %cmp7.i.i.us.us.2.1 - br i1 %or.cond28.i.i.us.us.2.1, label %if.then.i.i.us.us.2.1, label %if.end.i.i.us.us.2.1 - -pregion_for_end.i.i.2.loopexit: ; preds = %if.end.i.i.us.us.2.3 - br label %pregion_for_end.i.i.2 - -pregion_for_end.i.i.2: ; preds = %pregion_for_end.i.i.2.loopexit, %vector.ph76, %pregion_for_end.i.i.1 - %211 = trunc i64 %mul3.i.i.i to i32 - %conv.i.i.3 = or i32 %211, 3 - %cmp.i.i.3 = icmp sgt i32 %conv.i.i.3, 0 - %mul.i.i.3 = mul nsw i32 %15, %conv.i.i.3 - %cmp4.i.i.3 = icmp sgt i32 %sub.i.i, %conv.i.i.3 - %or.cond6 = and i1 %cmp.i.i.3, %cmp4.i.i.3 - br i1 %or.cond6, label %vector.scevcheck95, label %pregion_for_end.i.i.3 - -vector.scevcheck95: ; preds = %pregion_for_end.i.i.2 - %212 = mul i32 %15, %conv.i.i.3 - %213 = trunc i64 %2 to i32 - %214 = shl i32 %213, 5 - %215 = add i32 %212, %214 - %216 = icmp sgt i32 %215, 2147483616 - br i1 %216, label %pregion_for_entry.entry.i.i.us.us.3.preheader, label %vector.memcheck109 - -pregion_for_entry.entry.i.i.us.us.3.preheader: ; preds = %vector.memcheck109, %vector.scevcheck95 - br label %pregion_for_entry.entry.i.i.us.us.3 - -vector.memcheck109: ; preds = %vector.scevcheck95 - %217 = mul i32 %15, %conv.i.i.3 - %218 = trunc i64 %2 to i32 - %219 = shl i32 %218, 5 - %220 = add i32 %217, %219 - %221 = sext i32 %220 to i64 - %scevgep97 = getelementptr float, float* %7, i64 %221 - %222 = add nsw i64 %221, 32 - %scevgep99 = getelementptr float, float* %7, i64 %222 - %scevgep101 = getelementptr float, float* %11, i64 %221 - %scevgep103 = getelementptr float, float* %11, i64 %222 - %bound0105 = icmp ult float* %scevgep97, %scevgep103 - %bound1106 = icmp ult float* %scevgep101, %scevgep99 - %found.conflict107 = and i1 %bound0105, %bound1106 - br i1 %found.conflict107, label %pregion_for_entry.entry.i.i.us.us.3.preheader, label %vector.ph110 - -vector.ph110: ; preds = %vector.memcheck109 - %broadcast.splatinsert117 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat118 = shufflevector <8 x i64> %broadcast.splatinsert117, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert119 = insertelement <8 x i32> undef, i32 %sub.i.i, i32 0 - %broadcast.splat120 = shufflevector <8 x i32> %broadcast.splatinsert119, <8 x i32> undef, <8 x i32> zeroinitializer - %223 = trunc <8 x i64> %broadcast.splat118 to <8 x i32> - %224 = or <8 x i32> %223, - %225 = icmp sgt <8 x i32> %224, zeroinitializer - %226 = icmp sgt <8 x i32> %broadcast.splat120, %224 - %227 = and <8 x i1> %226, %225 - %228 = extractelement <8 x i32> %224, i32 0 - %229 = add nsw i32 %mul.i.i.3, %228 - %230 = sext i32 %229 to i64 - %231 = getelementptr inbounds float, float* %11, i64 %230 - %232 = bitcast float* %231 to <8 x i32>* - %wide.masked.load121 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %232, i32 4, <8 x i1> %227, <8 x i32> undef), !tbaa !12, !alias.scope !84 - %233 = getelementptr inbounds float, float* %7, i64 %230 - %234 = bitcast float* %233 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load121, <8 x i32>* %234, i32 4, <8 x i1> %227), !tbaa !12, !alias.scope !87, !noalias !84, !llvm.access.group !21 - %235 = trunc <8 x i64> %broadcast.splat118 to <8 x i32> - %236 = or <8 x i32> %235, - %237 = icmp sgt <8 x i32> %236, zeroinitializer - %238 = icmp sgt <8 x i32> %broadcast.splat120, %236 - %239 = and <8 x i1> %238, %237 - %240 = extractelement <8 x i32> %236, i32 0 - %241 = add nsw i32 %mul.i.i.3, %240 - %242 = sext i32 %241 to i64 - %243 = getelementptr inbounds float, float* %11, i64 %242 - %244 = bitcast float* %243 to <8 x i32>* - %wide.masked.load121.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %244, i32 4, <8 x i1> %239, <8 x i32> undef), !tbaa !12, !alias.scope !84 - %245 = getelementptr inbounds float, float* %7, i64 %242 - %246 = bitcast float* %245 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load121.1, <8 x i32>* %246, i32 4, <8 x i1> %239), !tbaa !12, !alias.scope !87, !noalias !84, !llvm.access.group !21 - %247 = trunc <8 x i64> %broadcast.splat118 to <8 x i32> - %248 = or <8 x i32> %247, - %249 = icmp sgt <8 x i32> %248, zeroinitializer - %250 = icmp sgt <8 x i32> %broadcast.splat120, %248 - %251 = and <8 x i1> %250, %249 - %252 = extractelement <8 x i32> %248, i32 0 - %253 = add nsw i32 %mul.i.i.3, %252 - %254 = sext i32 %253 to i64 - %255 = getelementptr inbounds float, float* %11, i64 %254 - %256 = bitcast float* %255 to <8 x i32>* - %wide.masked.load121.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %256, i32 4, <8 x i1> %251, <8 x i32> undef), !tbaa !12, !alias.scope !84 - %257 = getelementptr inbounds float, float* %7, i64 %254 - %258 = bitcast float* %257 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load121.2, <8 x i32>* %258, i32 4, <8 x i1> %251), !tbaa !12, !alias.scope !87, !noalias !84, !llvm.access.group !21 - %259 = trunc <8 x i64> %broadcast.splat118 to <8 x i32> - %260 = or <8 x i32> %259, - %261 = icmp sgt <8 x i32> %260, zeroinitializer - %262 = icmp sgt <8 x i32> %broadcast.splat120, %260 - %263 = and <8 x i1> %262, %261 - %264 = extractelement <8 x i32> %260, i32 0 - %265 = add nsw i32 %mul.i.i.3, %264 - %266 = sext i32 %265 to i64 - %267 = getelementptr inbounds float, float* %11, i64 %266 - %268 = bitcast float* %267 to <8 x i32>* - %wide.masked.load121.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %268, i32 4, <8 x i1> %263, <8 x i32> undef), !tbaa !12, !alias.scope !84 - %269 = getelementptr inbounds float, float* %7, i64 %266 - %270 = bitcast float* %269 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load121.3, <8 x i32>* %270, i32 4, <8 x i1> %263), !tbaa !12, !alias.scope !87, !noalias !84, !llvm.access.group !21 - br label %pregion_for_end.i.i.3 - -pregion_for_entry.entry.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.3, %pregion_for_entry.entry.i.i.us.us.3.preheader - %_local_id_x.i.0.us.us.3 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.us.3.preheader ], [ %590, %if.end.i.i.us.us.3.3 ] - %add1.i.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, %mul.i.i.i - %conv2.i.i.us.us.3 = trunc i64 %add1.i.i.i.us.us.3 to i32 - %cmp7.i.i.us.us.3 = icmp sgt i32 %conv2.i.i.us.us.3, 0 - %cmp11.i.i.us.us.3 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.3 - %or.cond28.i.i.us.us.3 = and i1 %cmp11.i.i.us.us.3, %cmp7.i.i.us.us.3 - br i1 %or.cond28.i.i.us.us.3, label %if.then.i.i.us.us.3, label %if.end.i.i.us.us.3 - -if.then.i.i.us.us.3: ; preds = %pregion_for_entry.entry.i.i.us.us.3 - %add.i.i.us.us.3 = add nsw i32 %mul.i.i.3, %conv2.i.i.us.us.3 - %idxprom.i.i.us.us.3 = sext i32 %add.i.i.us.us.3 to i64 - %arrayidx.i.i.us.us.3 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.3 - %271 = bitcast float* %arrayidx.i.i.us.us.3 to i32* - %272 = load i32, i32* %271, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.3 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.3 - %273 = bitcast float* %arrayidx16.i.i.us.us.3 to i32* - store i32 %272, i32* %273, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.3 - -if.end.i.i.us.us.3: ; preds = %if.then.i.i.us.us.3, %pregion_for_entry.entry.i.i.us.us.3 - %274 = or i64 %_local_id_x.i.0.us.us.3, 1 - %add1.i.i.i.us.us.3.1 = add nuw nsw i64 %274, %mul.i.i.i - %conv2.i.i.us.us.3.1 = trunc i64 %add1.i.i.i.us.us.3.1 to i32 - %cmp7.i.i.us.us.3.1 = icmp sgt i32 %conv2.i.i.us.us.3.1, 0 - %cmp11.i.i.us.us.3.1 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.3.1 - %or.cond28.i.i.us.us.3.1 = and i1 %cmp11.i.i.us.us.3.1, %cmp7.i.i.us.us.3.1 - br i1 %or.cond28.i.i.us.us.3.1, label %if.then.i.i.us.us.3.1, label %if.end.i.i.us.us.3.1 - -pregion_for_end.i.i.3.loopexit: ; preds = %if.end.i.i.us.us.3.3 - br label %pregion_for_end.i.i.3 - -pregion_for_end.i.i.3: ; preds = %pregion_for_end.i.i.3.loopexit, %vector.ph110, %pregion_for_end.i.i.2 - %275 = trunc i64 %mul3.i.i.i to i32 - %conv.i.i.4 = or i32 %275, 4 - %cmp.i.i.4 = icmp sgt i32 %conv.i.i.4, 0 - %mul.i.i.4 = mul nsw i32 %15, %conv.i.i.4 - %cmp4.i.i.4 = icmp sgt i32 %sub.i.i, %conv.i.i.4 - %or.cond7 = and i1 %cmp.i.i.4, %cmp4.i.i.4 - br i1 %or.cond7, label %vector.scevcheck129, label %pregion_for_end.i.i.4 - -vector.scevcheck129: ; preds = %pregion_for_end.i.i.3 - %276 = mul i32 %15, %conv.i.i.4 - %277 = trunc i64 %2 to i32 - %278 = shl i32 %277, 5 - %279 = add i32 %276, %278 - %280 = icmp sgt i32 %279, 2147483616 - br i1 %280, label %pregion_for_entry.entry.i.i.us.us.4.preheader, label %vector.memcheck143 - -pregion_for_entry.entry.i.i.us.us.4.preheader: ; preds = %vector.memcheck143, %vector.scevcheck129 - br label %pregion_for_entry.entry.i.i.us.us.4 - -vector.memcheck143: ; preds = %vector.scevcheck129 - %281 = mul i32 %15, %conv.i.i.4 - %282 = trunc i64 %2 to i32 - %283 = shl i32 %282, 5 - %284 = add i32 %281, %283 - %285 = sext i32 %284 to i64 - %scevgep131 = getelementptr float, float* %7, i64 %285 - %286 = add nsw i64 %285, 32 - %scevgep133 = getelementptr float, float* %7, i64 %286 - %scevgep135 = getelementptr float, float* %11, i64 %285 - %scevgep137 = getelementptr float, float* %11, i64 %286 - %bound0139 = icmp ult float* %scevgep131, %scevgep137 - %bound1140 = icmp ult float* %scevgep135, %scevgep133 - %found.conflict141 = and i1 %bound0139, %bound1140 - br i1 %found.conflict141, label %pregion_for_entry.entry.i.i.us.us.4.preheader, label %vector.ph144 - -vector.ph144: ; preds = %vector.memcheck143 - %broadcast.splatinsert151 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat152 = shufflevector <8 x i64> %broadcast.splatinsert151, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert153 = insertelement <8 x i32> undef, i32 %sub.i.i, i32 0 - %broadcast.splat154 = shufflevector <8 x i32> %broadcast.splatinsert153, <8 x i32> undef, <8 x i32> zeroinitializer - %287 = trunc <8 x i64> %broadcast.splat152 to <8 x i32> - %288 = or <8 x i32> %287, - %289 = icmp sgt <8 x i32> %288, zeroinitializer - %290 = icmp sgt <8 x i32> %broadcast.splat154, %288 - %291 = and <8 x i1> %290, %289 - %292 = extractelement <8 x i32> %288, i32 0 - %293 = add nsw i32 %mul.i.i.4, %292 - %294 = sext i32 %293 to i64 - %295 = getelementptr inbounds float, float* %11, i64 %294 - %296 = bitcast float* %295 to <8 x i32>* - %wide.masked.load155 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %296, i32 4, <8 x i1> %291, <8 x i32> undef), !tbaa !12, !alias.scope !89 - %297 = getelementptr inbounds float, float* %7, i64 %294 - %298 = bitcast float* %297 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load155, <8 x i32>* %298, i32 4, <8 x i1> %291), !tbaa !12, !alias.scope !92, !noalias !89, !llvm.access.group !21 - %299 = trunc <8 x i64> %broadcast.splat152 to <8 x i32> - %300 = or <8 x i32> %299, - %301 = icmp sgt <8 x i32> %300, zeroinitializer - %302 = icmp sgt <8 x i32> %broadcast.splat154, %300 - %303 = and <8 x i1> %302, %301 - %304 = extractelement <8 x i32> %300, i32 0 - %305 = add nsw i32 %mul.i.i.4, %304 - %306 = sext i32 %305 to i64 - %307 = getelementptr inbounds float, float* %11, i64 %306 - %308 = bitcast float* %307 to <8 x i32>* - %wide.masked.load155.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %308, i32 4, <8 x i1> %303, <8 x i32> undef), !tbaa !12, !alias.scope !89 - %309 = getelementptr inbounds float, float* %7, i64 %306 - %310 = bitcast float* %309 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load155.1, <8 x i32>* %310, i32 4, <8 x i1> %303), !tbaa !12, !alias.scope !92, !noalias !89, !llvm.access.group !21 - %311 = trunc <8 x i64> %broadcast.splat152 to <8 x i32> - %312 = or <8 x i32> %311, - %313 = icmp sgt <8 x i32> %312, zeroinitializer - %314 = icmp sgt <8 x i32> %broadcast.splat154, %312 - %315 = and <8 x i1> %314, %313 - %316 = extractelement <8 x i32> %312, i32 0 - %317 = add nsw i32 %mul.i.i.4, %316 - %318 = sext i32 %317 to i64 - %319 = getelementptr inbounds float, float* %11, i64 %318 - %320 = bitcast float* %319 to <8 x i32>* - %wide.masked.load155.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %320, i32 4, <8 x i1> %315, <8 x i32> undef), !tbaa !12, !alias.scope !89 - %321 = getelementptr inbounds float, float* %7, i64 %318 - %322 = bitcast float* %321 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load155.2, <8 x i32>* %322, i32 4, <8 x i1> %315), !tbaa !12, !alias.scope !92, !noalias !89, !llvm.access.group !21 - %323 = trunc <8 x i64> %broadcast.splat152 to <8 x i32> - %324 = or <8 x i32> %323, - %325 = icmp sgt <8 x i32> %324, zeroinitializer - %326 = icmp sgt <8 x i32> %broadcast.splat154, %324 - %327 = and <8 x i1> %326, %325 - %328 = extractelement <8 x i32> %324, i32 0 - %329 = add nsw i32 %mul.i.i.4, %328 - %330 = sext i32 %329 to i64 - %331 = getelementptr inbounds float, float* %11, i64 %330 - %332 = bitcast float* %331 to <8 x i32>* - %wide.masked.load155.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %332, i32 4, <8 x i1> %327, <8 x i32> undef), !tbaa !12, !alias.scope !89 - %333 = getelementptr inbounds float, float* %7, i64 %330 - %334 = bitcast float* %333 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load155.3, <8 x i32>* %334, i32 4, <8 x i1> %327), !tbaa !12, !alias.scope !92, !noalias !89, !llvm.access.group !21 - br label %pregion_for_end.i.i.4 - -pregion_for_entry.entry.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.3, %pregion_for_entry.entry.i.i.us.us.4.preheader - %_local_id_x.i.0.us.us.4 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.us.4.preheader ], [ %578, %if.end.i.i.us.us.4.3 ] - %add1.i.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, %mul.i.i.i - %conv2.i.i.us.us.4 = trunc i64 %add1.i.i.i.us.us.4 to i32 - %cmp7.i.i.us.us.4 = icmp sgt i32 %conv2.i.i.us.us.4, 0 - %cmp11.i.i.us.us.4 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.4 - %or.cond28.i.i.us.us.4 = and i1 %cmp11.i.i.us.us.4, %cmp7.i.i.us.us.4 - br i1 %or.cond28.i.i.us.us.4, label %if.then.i.i.us.us.4, label %if.end.i.i.us.us.4 - -if.then.i.i.us.us.4: ; preds = %pregion_for_entry.entry.i.i.us.us.4 - %add.i.i.us.us.4 = add nsw i32 %mul.i.i.4, %conv2.i.i.us.us.4 - %idxprom.i.i.us.us.4 = sext i32 %add.i.i.us.us.4 to i64 - %arrayidx.i.i.us.us.4 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.4 - %335 = bitcast float* %arrayidx.i.i.us.us.4 to i32* - %336 = load i32, i32* %335, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.4 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.4 - %337 = bitcast float* %arrayidx16.i.i.us.us.4 to i32* - store i32 %336, i32* %337, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.4 - -if.end.i.i.us.us.4: ; preds = %if.then.i.i.us.us.4, %pregion_for_entry.entry.i.i.us.us.4 - %338 = or i64 %_local_id_x.i.0.us.us.4, 1 - %add1.i.i.i.us.us.4.1 = add nuw nsw i64 %338, %mul.i.i.i - %conv2.i.i.us.us.4.1 = trunc i64 %add1.i.i.i.us.us.4.1 to i32 - %cmp7.i.i.us.us.4.1 = icmp sgt i32 %conv2.i.i.us.us.4.1, 0 - %cmp11.i.i.us.us.4.1 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.4.1 - %or.cond28.i.i.us.us.4.1 = and i1 %cmp11.i.i.us.us.4.1, %cmp7.i.i.us.us.4.1 - br i1 %or.cond28.i.i.us.us.4.1, label %if.then.i.i.us.us.4.1, label %if.end.i.i.us.us.4.1 - -pregion_for_end.i.i.4.loopexit: ; preds = %if.end.i.i.us.us.4.3 - br label %pregion_for_end.i.i.4 - -pregion_for_end.i.i.4: ; preds = %pregion_for_end.i.i.4.loopexit, %vector.ph144, %pregion_for_end.i.i.3 - %339 = trunc i64 %mul3.i.i.i to i32 - %conv.i.i.5 = or i32 %339, 5 - %cmp.i.i.5 = icmp sgt i32 %conv.i.i.5, 0 - %mul.i.i.5 = mul nsw i32 %15, %conv.i.i.5 - %cmp4.i.i.5 = icmp sgt i32 %sub.i.i, %conv.i.i.5 - %or.cond8 = and i1 %cmp.i.i.5, %cmp4.i.i.5 - br i1 %or.cond8, label %vector.scevcheck163, label %pregion_for_end.i.i.5 - -vector.scevcheck163: ; preds = %pregion_for_end.i.i.4 - %340 = mul i32 %15, %conv.i.i.5 - %341 = trunc i64 %2 to i32 - %342 = shl i32 %341, 5 - %343 = add i32 %340, %342 - %344 = icmp sgt i32 %343, 2147483616 - br i1 %344, label %pregion_for_entry.entry.i.i.us.us.5.preheader, label %vector.memcheck177 - -pregion_for_entry.entry.i.i.us.us.5.preheader: ; preds = %vector.memcheck177, %vector.scevcheck163 - br label %pregion_for_entry.entry.i.i.us.us.5 - -vector.memcheck177: ; preds = %vector.scevcheck163 - %345 = mul i32 %15, %conv.i.i.5 - %346 = trunc i64 %2 to i32 - %347 = shl i32 %346, 5 - %348 = add i32 %345, %347 - %349 = sext i32 %348 to i64 - %scevgep165 = getelementptr float, float* %7, i64 %349 - %350 = add nsw i64 %349, 32 - %scevgep167 = getelementptr float, float* %7, i64 %350 - %scevgep169 = getelementptr float, float* %11, i64 %349 - %scevgep171 = getelementptr float, float* %11, i64 %350 - %bound0173 = icmp ult float* %scevgep165, %scevgep171 - %bound1174 = icmp ult float* %scevgep169, %scevgep167 - %found.conflict175 = and i1 %bound0173, %bound1174 - br i1 %found.conflict175, label %pregion_for_entry.entry.i.i.us.us.5.preheader, label %vector.ph178 - -vector.ph178: ; preds = %vector.memcheck177 - %broadcast.splatinsert185 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat186 = shufflevector <8 x i64> %broadcast.splatinsert185, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert187 = insertelement <8 x i32> undef, i32 %sub.i.i, i32 0 - %broadcast.splat188 = shufflevector <8 x i32> %broadcast.splatinsert187, <8 x i32> undef, <8 x i32> zeroinitializer - %351 = trunc <8 x i64> %broadcast.splat186 to <8 x i32> - %352 = or <8 x i32> %351, - %353 = icmp sgt <8 x i32> %352, zeroinitializer - %354 = icmp sgt <8 x i32> %broadcast.splat188, %352 - %355 = and <8 x i1> %354, %353 - %356 = extractelement <8 x i32> %352, i32 0 - %357 = add nsw i32 %mul.i.i.5, %356 - %358 = sext i32 %357 to i64 - %359 = getelementptr inbounds float, float* %11, i64 %358 - %360 = bitcast float* %359 to <8 x i32>* - %wide.masked.load189 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %360, i32 4, <8 x i1> %355, <8 x i32> undef), !tbaa !12, !alias.scope !94 - %361 = getelementptr inbounds float, float* %7, i64 %358 - %362 = bitcast float* %361 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load189, <8 x i32>* %362, i32 4, <8 x i1> %355), !tbaa !12, !alias.scope !97, !noalias !94, !llvm.access.group !21 - %363 = trunc <8 x i64> %broadcast.splat186 to <8 x i32> - %364 = or <8 x i32> %363, - %365 = icmp sgt <8 x i32> %364, zeroinitializer - %366 = icmp sgt <8 x i32> %broadcast.splat188, %364 - %367 = and <8 x i1> %366, %365 - %368 = extractelement <8 x i32> %364, i32 0 - %369 = add nsw i32 %mul.i.i.5, %368 - %370 = sext i32 %369 to i64 - %371 = getelementptr inbounds float, float* %11, i64 %370 - %372 = bitcast float* %371 to <8 x i32>* - %wide.masked.load189.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %372, i32 4, <8 x i1> %367, <8 x i32> undef), !tbaa !12, !alias.scope !94 - %373 = getelementptr inbounds float, float* %7, i64 %370 - %374 = bitcast float* %373 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load189.1, <8 x i32>* %374, i32 4, <8 x i1> %367), !tbaa !12, !alias.scope !97, !noalias !94, !llvm.access.group !21 - %375 = trunc <8 x i64> %broadcast.splat186 to <8 x i32> - %376 = or <8 x i32> %375, - %377 = icmp sgt <8 x i32> %376, zeroinitializer - %378 = icmp sgt <8 x i32> %broadcast.splat188, %376 - %379 = and <8 x i1> %378, %377 - %380 = extractelement <8 x i32> %376, i32 0 - %381 = add nsw i32 %mul.i.i.5, %380 - %382 = sext i32 %381 to i64 - %383 = getelementptr inbounds float, float* %11, i64 %382 - %384 = bitcast float* %383 to <8 x i32>* - %wide.masked.load189.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %384, i32 4, <8 x i1> %379, <8 x i32> undef), !tbaa !12, !alias.scope !94 - %385 = getelementptr inbounds float, float* %7, i64 %382 - %386 = bitcast float* %385 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load189.2, <8 x i32>* %386, i32 4, <8 x i1> %379), !tbaa !12, !alias.scope !97, !noalias !94, !llvm.access.group !21 - %387 = trunc <8 x i64> %broadcast.splat186 to <8 x i32> - %388 = or <8 x i32> %387, - %389 = icmp sgt <8 x i32> %388, zeroinitializer - %390 = icmp sgt <8 x i32> %broadcast.splat188, %388 - %391 = and <8 x i1> %390, %389 - %392 = extractelement <8 x i32> %388, i32 0 - %393 = add nsw i32 %mul.i.i.5, %392 - %394 = sext i32 %393 to i64 - %395 = getelementptr inbounds float, float* %11, i64 %394 - %396 = bitcast float* %395 to <8 x i32>* - %wide.masked.load189.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %396, i32 4, <8 x i1> %391, <8 x i32> undef), !tbaa !12, !alias.scope !94 - %397 = getelementptr inbounds float, float* %7, i64 %394 - %398 = bitcast float* %397 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load189.3, <8 x i32>* %398, i32 4, <8 x i1> %391), !tbaa !12, !alias.scope !97, !noalias !94, !llvm.access.group !21 - br label %pregion_for_end.i.i.5 - -pregion_for_entry.entry.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.3, %pregion_for_entry.entry.i.i.us.us.5.preheader - %_local_id_x.i.0.us.us.5 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.us.5.preheader ], [ %566, %if.end.i.i.us.us.5.3 ] - %add1.i.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, %mul.i.i.i - %conv2.i.i.us.us.5 = trunc i64 %add1.i.i.i.us.us.5 to i32 - %cmp7.i.i.us.us.5 = icmp sgt i32 %conv2.i.i.us.us.5, 0 - %cmp11.i.i.us.us.5 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.5 - %or.cond28.i.i.us.us.5 = and i1 %cmp11.i.i.us.us.5, %cmp7.i.i.us.us.5 - br i1 %or.cond28.i.i.us.us.5, label %if.then.i.i.us.us.5, label %if.end.i.i.us.us.5 - -if.then.i.i.us.us.5: ; preds = %pregion_for_entry.entry.i.i.us.us.5 - %add.i.i.us.us.5 = add nsw i32 %mul.i.i.5, %conv2.i.i.us.us.5 - %idxprom.i.i.us.us.5 = sext i32 %add.i.i.us.us.5 to i64 - %arrayidx.i.i.us.us.5 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.5 - %399 = bitcast float* %arrayidx.i.i.us.us.5 to i32* - %400 = load i32, i32* %399, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.5 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.5 - %401 = bitcast float* %arrayidx16.i.i.us.us.5 to i32* - store i32 %400, i32* %401, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.5 - -if.end.i.i.us.us.5: ; preds = %if.then.i.i.us.us.5, %pregion_for_entry.entry.i.i.us.us.5 - %402 = or i64 %_local_id_x.i.0.us.us.5, 1 - %add1.i.i.i.us.us.5.1 = add nuw nsw i64 %402, %mul.i.i.i - %conv2.i.i.us.us.5.1 = trunc i64 %add1.i.i.i.us.us.5.1 to i32 - %cmp7.i.i.us.us.5.1 = icmp sgt i32 %conv2.i.i.us.us.5.1, 0 - %cmp11.i.i.us.us.5.1 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.5.1 - %or.cond28.i.i.us.us.5.1 = and i1 %cmp11.i.i.us.us.5.1, %cmp7.i.i.us.us.5.1 - br i1 %or.cond28.i.i.us.us.5.1, label %if.then.i.i.us.us.5.1, label %if.end.i.i.us.us.5.1 - -pregion_for_end.i.i.5.loopexit: ; preds = %if.end.i.i.us.us.5.3 - br label %pregion_for_end.i.i.5 - -pregion_for_end.i.i.5: ; preds = %pregion_for_end.i.i.5.loopexit, %vector.ph178, %pregion_for_end.i.i.4 - %403 = trunc i64 %mul3.i.i.i to i32 - %conv.i.i.6 = or i32 %403, 6 - %cmp.i.i.6 = icmp sgt i32 %conv.i.i.6, 0 - %mul.i.i.6 = mul nsw i32 %15, %conv.i.i.6 - %cmp4.i.i.6 = icmp sgt i32 %sub.i.i, %conv.i.i.6 - %or.cond9 = and i1 %cmp.i.i.6, %cmp4.i.i.6 - br i1 %or.cond9, label %vector.scevcheck197, label %pregion_for_end.i.i.6 - -vector.scevcheck197: ; preds = %pregion_for_end.i.i.5 - %404 = mul i32 %15, %conv.i.i.6 - %405 = trunc i64 %2 to i32 - %406 = shl i32 %405, 5 - %407 = add i32 %404, %406 - %408 = icmp sgt i32 %407, 2147483616 - br i1 %408, label %pregion_for_entry.entry.i.i.us.us.6.preheader, label %vector.memcheck211 - -pregion_for_entry.entry.i.i.us.us.6.preheader: ; preds = %vector.memcheck211, %vector.scevcheck197 - br label %pregion_for_entry.entry.i.i.us.us.6 - -vector.memcheck211: ; preds = %vector.scevcheck197 - %409 = mul i32 %15, %conv.i.i.6 - %410 = trunc i64 %2 to i32 - %411 = shl i32 %410, 5 - %412 = add i32 %409, %411 - %413 = sext i32 %412 to i64 - %scevgep199 = getelementptr float, float* %7, i64 %413 - %414 = add nsw i64 %413, 32 - %scevgep201 = getelementptr float, float* %7, i64 %414 - %scevgep203 = getelementptr float, float* %11, i64 %413 - %scevgep205 = getelementptr float, float* %11, i64 %414 - %bound0207 = icmp ult float* %scevgep199, %scevgep205 - %bound1208 = icmp ult float* %scevgep203, %scevgep201 - %found.conflict209 = and i1 %bound0207, %bound1208 - br i1 %found.conflict209, label %pregion_for_entry.entry.i.i.us.us.6.preheader, label %vector.ph212 - -vector.ph212: ; preds = %vector.memcheck211 - %broadcast.splatinsert219 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat220 = shufflevector <8 x i64> %broadcast.splatinsert219, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert221 = insertelement <8 x i32> undef, i32 %sub.i.i, i32 0 - %broadcast.splat222 = shufflevector <8 x i32> %broadcast.splatinsert221, <8 x i32> undef, <8 x i32> zeroinitializer - %415 = trunc <8 x i64> %broadcast.splat220 to <8 x i32> - %416 = or <8 x i32> %415, - %417 = icmp sgt <8 x i32> %416, zeroinitializer - %418 = icmp sgt <8 x i32> %broadcast.splat222, %416 - %419 = and <8 x i1> %418, %417 - %420 = extractelement <8 x i32> %416, i32 0 - %421 = add nsw i32 %mul.i.i.6, %420 - %422 = sext i32 %421 to i64 - %423 = getelementptr inbounds float, float* %11, i64 %422 - %424 = bitcast float* %423 to <8 x i32>* - %wide.masked.load223 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %424, i32 4, <8 x i1> %419, <8 x i32> undef), !tbaa !12, !alias.scope !99 - %425 = getelementptr inbounds float, float* %7, i64 %422 - %426 = bitcast float* %425 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load223, <8 x i32>* %426, i32 4, <8 x i1> %419), !tbaa !12, !alias.scope !102, !noalias !99, !llvm.access.group !21 - %427 = trunc <8 x i64> %broadcast.splat220 to <8 x i32> - %428 = or <8 x i32> %427, - %429 = icmp sgt <8 x i32> %428, zeroinitializer - %430 = icmp sgt <8 x i32> %broadcast.splat222, %428 - %431 = and <8 x i1> %430, %429 - %432 = extractelement <8 x i32> %428, i32 0 - %433 = add nsw i32 %mul.i.i.6, %432 - %434 = sext i32 %433 to i64 - %435 = getelementptr inbounds float, float* %11, i64 %434 - %436 = bitcast float* %435 to <8 x i32>* - %wide.masked.load223.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %436, i32 4, <8 x i1> %431, <8 x i32> undef), !tbaa !12, !alias.scope !99 - %437 = getelementptr inbounds float, float* %7, i64 %434 - %438 = bitcast float* %437 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load223.1, <8 x i32>* %438, i32 4, <8 x i1> %431), !tbaa !12, !alias.scope !102, !noalias !99, !llvm.access.group !21 - %439 = trunc <8 x i64> %broadcast.splat220 to <8 x i32> - %440 = or <8 x i32> %439, - %441 = icmp sgt <8 x i32> %440, zeroinitializer - %442 = icmp sgt <8 x i32> %broadcast.splat222, %440 - %443 = and <8 x i1> %442, %441 - %444 = extractelement <8 x i32> %440, i32 0 - %445 = add nsw i32 %mul.i.i.6, %444 - %446 = sext i32 %445 to i64 - %447 = getelementptr inbounds float, float* %11, i64 %446 - %448 = bitcast float* %447 to <8 x i32>* - %wide.masked.load223.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %448, i32 4, <8 x i1> %443, <8 x i32> undef), !tbaa !12, !alias.scope !99 - %449 = getelementptr inbounds float, float* %7, i64 %446 - %450 = bitcast float* %449 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load223.2, <8 x i32>* %450, i32 4, <8 x i1> %443), !tbaa !12, !alias.scope !102, !noalias !99, !llvm.access.group !21 - %451 = trunc <8 x i64> %broadcast.splat220 to <8 x i32> - %452 = or <8 x i32> %451, - %453 = icmp sgt <8 x i32> %452, zeroinitializer - %454 = icmp sgt <8 x i32> %broadcast.splat222, %452 - %455 = and <8 x i1> %454, %453 - %456 = extractelement <8 x i32> %452, i32 0 - %457 = add nsw i32 %mul.i.i.6, %456 - %458 = sext i32 %457 to i64 - %459 = getelementptr inbounds float, float* %11, i64 %458 - %460 = bitcast float* %459 to <8 x i32>* - %wide.masked.load223.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %460, i32 4, <8 x i1> %455, <8 x i32> undef), !tbaa !12, !alias.scope !99 - %461 = getelementptr inbounds float, float* %7, i64 %458 - %462 = bitcast float* %461 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load223.3, <8 x i32>* %462, i32 4, <8 x i1> %455), !tbaa !12, !alias.scope !102, !noalias !99, !llvm.access.group !21 - br label %pregion_for_end.i.i.6 - -pregion_for_entry.entry.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.3, %pregion_for_entry.entry.i.i.us.us.6.preheader - %_local_id_x.i.0.us.us.6 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.us.6.preheader ], [ %554, %if.end.i.i.us.us.6.3 ] - %add1.i.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, %mul.i.i.i - %conv2.i.i.us.us.6 = trunc i64 %add1.i.i.i.us.us.6 to i32 - %cmp7.i.i.us.us.6 = icmp sgt i32 %conv2.i.i.us.us.6, 0 - %cmp11.i.i.us.us.6 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.6 - %or.cond28.i.i.us.us.6 = and i1 %cmp11.i.i.us.us.6, %cmp7.i.i.us.us.6 - br i1 %or.cond28.i.i.us.us.6, label %if.then.i.i.us.us.6, label %if.end.i.i.us.us.6 - -if.then.i.i.us.us.6: ; preds = %pregion_for_entry.entry.i.i.us.us.6 - %add.i.i.us.us.6 = add nsw i32 %mul.i.i.6, %conv2.i.i.us.us.6 - %idxprom.i.i.us.us.6 = sext i32 %add.i.i.us.us.6 to i64 - %arrayidx.i.i.us.us.6 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.6 - %463 = bitcast float* %arrayidx.i.i.us.us.6 to i32* - %464 = load i32, i32* %463, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.6 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.6 - %465 = bitcast float* %arrayidx16.i.i.us.us.6 to i32* - store i32 %464, i32* %465, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.6 - -if.end.i.i.us.us.6: ; preds = %if.then.i.i.us.us.6, %pregion_for_entry.entry.i.i.us.us.6 - %466 = or i64 %_local_id_x.i.0.us.us.6, 1 - %add1.i.i.i.us.us.6.1 = add nuw nsw i64 %466, %mul.i.i.i - %conv2.i.i.us.us.6.1 = trunc i64 %add1.i.i.i.us.us.6.1 to i32 - %cmp7.i.i.us.us.6.1 = icmp sgt i32 %conv2.i.i.us.us.6.1, 0 - %cmp11.i.i.us.us.6.1 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.6.1 - %or.cond28.i.i.us.us.6.1 = and i1 %cmp11.i.i.us.us.6.1, %cmp7.i.i.us.us.6.1 - br i1 %or.cond28.i.i.us.us.6.1, label %if.then.i.i.us.us.6.1, label %if.end.i.i.us.us.6.1 - -pregion_for_end.i.i.6.loopexit: ; preds = %if.end.i.i.us.us.6.3 - br label %pregion_for_end.i.i.6 - -pregion_for_end.i.i.6: ; preds = %pregion_for_end.i.i.6.loopexit, %vector.ph212, %pregion_for_end.i.i.5 - %467 = trunc i64 %mul3.i.i.i to i32 - %conv.i.i.7 = or i32 %467, 7 - %cmp.i.i.7 = icmp sgt i32 %conv.i.i.7, 0 - %mul.i.i.7 = mul nsw i32 %15, %conv.i.i.7 - %cmp4.i.i.7 = icmp sgt i32 %sub.i.i, %conv.i.i.7 - %or.cond10 = and i1 %cmp.i.i.7, %cmp4.i.i.7 - br i1 %or.cond10, label %vector.scevcheck231, label %pregion_for_end.i.i.7 - -vector.scevcheck231: ; preds = %pregion_for_end.i.i.6 - %468 = mul i32 %15, %conv.i.i.7 - %469 = trunc i64 %2 to i32 - %470 = shl i32 %469, 5 - %471 = add i32 %468, %470 - %472 = icmp sgt i32 %471, 2147483616 - br i1 %472, label %pregion_for_entry.entry.i.i.us.us.7.preheader, label %vector.memcheck245 - -pregion_for_entry.entry.i.i.us.us.7.preheader: ; preds = %vector.memcheck245, %vector.scevcheck231 - br label %pregion_for_entry.entry.i.i.us.us.7 - -vector.memcheck245: ; preds = %vector.scevcheck231 - %473 = mul i32 %15, %conv.i.i.7 - %474 = trunc i64 %2 to i32 - %475 = shl i32 %474, 5 - %476 = add i32 %473, %475 - %477 = sext i32 %476 to i64 - %scevgep233 = getelementptr float, float* %7, i64 %477 - %478 = add nsw i64 %477, 32 - %scevgep235 = getelementptr float, float* %7, i64 %478 - %scevgep237 = getelementptr float, float* %11, i64 %477 - %scevgep239 = getelementptr float, float* %11, i64 %478 - %bound0241 = icmp ult float* %scevgep233, %scevgep239 - %bound1242 = icmp ult float* %scevgep237, %scevgep235 - %found.conflict243 = and i1 %bound0241, %bound1242 - br i1 %found.conflict243, label %pregion_for_entry.entry.i.i.us.us.7.preheader, label %vector.ph246 - -vector.ph246: ; preds = %vector.memcheck245 - %broadcast.splatinsert253 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat254 = shufflevector <8 x i64> %broadcast.splatinsert253, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert255 = insertelement <8 x i32> undef, i32 %sub.i.i, i32 0 - %broadcast.splat256 = shufflevector <8 x i32> %broadcast.splatinsert255, <8 x i32> undef, <8 x i32> zeroinitializer - %479 = trunc <8 x i64> %broadcast.splat254 to <8 x i32> - %480 = or <8 x i32> %479, - %481 = icmp sgt <8 x i32> %480, zeroinitializer - %482 = icmp sgt <8 x i32> %broadcast.splat256, %480 - %483 = and <8 x i1> %482, %481 - %484 = extractelement <8 x i32> %480, i32 0 - %485 = add nsw i32 %mul.i.i.7, %484 - %486 = sext i32 %485 to i64 - %487 = getelementptr inbounds float, float* %11, i64 %486 - %488 = bitcast float* %487 to <8 x i32>* - %wide.masked.load257 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %488, i32 4, <8 x i1> %483, <8 x i32> undef), !tbaa !12, !alias.scope !104 - %489 = getelementptr inbounds float, float* %7, i64 %486 - %490 = bitcast float* %489 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load257, <8 x i32>* %490, i32 4, <8 x i1> %483), !tbaa !12, !alias.scope !107, !noalias !104, !llvm.access.group !21 - %491 = trunc <8 x i64> %broadcast.splat254 to <8 x i32> - %492 = or <8 x i32> %491, - %493 = icmp sgt <8 x i32> %492, zeroinitializer - %494 = icmp sgt <8 x i32> %broadcast.splat256, %492 - %495 = and <8 x i1> %494, %493 - %496 = extractelement <8 x i32> %492, i32 0 - %497 = add nsw i32 %mul.i.i.7, %496 - %498 = sext i32 %497 to i64 - %499 = getelementptr inbounds float, float* %11, i64 %498 - %500 = bitcast float* %499 to <8 x i32>* - %wide.masked.load257.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %500, i32 4, <8 x i1> %495, <8 x i32> undef), !tbaa !12, !alias.scope !104 - %501 = getelementptr inbounds float, float* %7, i64 %498 - %502 = bitcast float* %501 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load257.1, <8 x i32>* %502, i32 4, <8 x i1> %495), !tbaa !12, !alias.scope !107, !noalias !104, !llvm.access.group !21 - %503 = trunc <8 x i64> %broadcast.splat254 to <8 x i32> - %504 = or <8 x i32> %503, - %505 = icmp sgt <8 x i32> %504, zeroinitializer - %506 = icmp sgt <8 x i32> %broadcast.splat256, %504 - %507 = and <8 x i1> %506, %505 - %508 = extractelement <8 x i32> %504, i32 0 - %509 = add nsw i32 %mul.i.i.7, %508 - %510 = sext i32 %509 to i64 - %511 = getelementptr inbounds float, float* %11, i64 %510 - %512 = bitcast float* %511 to <8 x i32>* - %wide.masked.load257.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %512, i32 4, <8 x i1> %507, <8 x i32> undef), !tbaa !12, !alias.scope !104 - %513 = getelementptr inbounds float, float* %7, i64 %510 - %514 = bitcast float* %513 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load257.2, <8 x i32>* %514, i32 4, <8 x i1> %507), !tbaa !12, !alias.scope !107, !noalias !104, !llvm.access.group !21 - %515 = trunc <8 x i64> %broadcast.splat254 to <8 x i32> - %516 = or <8 x i32> %515, - %517 = icmp sgt <8 x i32> %516, zeroinitializer - %518 = icmp sgt <8 x i32> %broadcast.splat256, %516 - %519 = and <8 x i1> %518, %517 - %520 = extractelement <8 x i32> %516, i32 0 - %521 = add nsw i32 %mul.i.i.7, %520 - %522 = sext i32 %521 to i64 - %523 = getelementptr inbounds float, float* %11, i64 %522 - %524 = bitcast float* %523 to <8 x i32>* - %wide.masked.load257.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %524, i32 4, <8 x i1> %519, <8 x i32> undef), !tbaa !12, !alias.scope !104 - %525 = getelementptr inbounds float, float* %7, i64 %522 - %526 = bitcast float* %525 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load257.3, <8 x i32>* %526, i32 4, <8 x i1> %519), !tbaa !12, !alias.scope !107, !noalias !104, !llvm.access.group !21 - br label %pregion_for_end.i.i.7 - -pregion_for_entry.entry.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.3, %pregion_for_entry.entry.i.i.us.us.7.preheader - %_local_id_x.i.0.us.us.7 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.us.7.preheader ], [ %542, %if.end.i.i.us.us.7.3 ] - %add1.i.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, %mul.i.i.i - %conv2.i.i.us.us.7 = trunc i64 %add1.i.i.i.us.us.7 to i32 - %cmp7.i.i.us.us.7 = icmp sgt i32 %conv2.i.i.us.us.7, 0 - %cmp11.i.i.us.us.7 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.7 - %or.cond28.i.i.us.us.7 = and i1 %cmp11.i.i.us.us.7, %cmp7.i.i.us.us.7 - br i1 %or.cond28.i.i.us.us.7, label %if.then.i.i.us.us.7, label %if.end.i.i.us.us.7 - -if.then.i.i.us.us.7: ; preds = %pregion_for_entry.entry.i.i.us.us.7 - %add.i.i.us.us.7 = add nsw i32 %mul.i.i.7, %conv2.i.i.us.us.7 - %idxprom.i.i.us.us.7 = sext i32 %add.i.i.us.us.7 to i64 - %arrayidx.i.i.us.us.7 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.7 - %527 = bitcast float* %arrayidx.i.i.us.us.7 to i32* - %528 = load i32, i32* %527, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.7 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.7 - %529 = bitcast float* %arrayidx16.i.i.us.us.7 to i32* - store i32 %528, i32* %529, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.7 - -if.end.i.i.us.us.7: ; preds = %if.then.i.i.us.us.7, %pregion_for_entry.entry.i.i.us.us.7 - %530 = or i64 %_local_id_x.i.0.us.us.7, 1 - %add1.i.i.i.us.us.7.1 = add nuw nsw i64 %530, %mul.i.i.i - %conv2.i.i.us.us.7.1 = trunc i64 %add1.i.i.i.us.us.7.1 to i32 - %cmp7.i.i.us.us.7.1 = icmp sgt i32 %conv2.i.i.us.us.7.1, 0 - %cmp11.i.i.us.us.7.1 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.7.1 - %or.cond28.i.i.us.us.7.1 = and i1 %cmp11.i.i.us.us.7.1, %cmp7.i.i.us.us.7.1 - br i1 %or.cond28.i.i.us.us.7.1, label %if.then.i.i.us.us.7.1, label %if.end.i.i.us.us.7.1 - -pregion_for_end.i.i.7.loopexit: ; preds = %if.end.i.i.us.us.7.3 - br label %pregion_for_end.i.i.7 - -pregion_for_end.i.i.7: ; preds = %pregion_for_end.i.i.7.loopexit, %vector.ph246, %pregion_for_end.i.i.6 - ret void - -if.then.i.i.us.us.7.1: ; preds = %if.end.i.i.us.us.7 - %add.i.i.us.us.7.1 = add nsw i32 %mul.i.i.7, %conv2.i.i.us.us.7.1 - %idxprom.i.i.us.us.7.1 = sext i32 %add.i.i.us.us.7.1 to i64 - %arrayidx.i.i.us.us.7.1 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.7.1 - %531 = bitcast float* %arrayidx.i.i.us.us.7.1 to i32* - %532 = load i32, i32* %531, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.7.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.7.1 - %533 = bitcast float* %arrayidx16.i.i.us.us.7.1 to i32* - store i32 %532, i32* %533, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.7.1 - -if.end.i.i.us.us.7.1: ; preds = %if.then.i.i.us.us.7.1, %if.end.i.i.us.us.7 - %534 = or i64 %_local_id_x.i.0.us.us.7, 2 - %add1.i.i.i.us.us.7.2 = add nuw nsw i64 %534, %mul.i.i.i - %conv2.i.i.us.us.7.2 = trunc i64 %add1.i.i.i.us.us.7.2 to i32 - %cmp7.i.i.us.us.7.2 = icmp sgt i32 %conv2.i.i.us.us.7.2, 0 - %cmp11.i.i.us.us.7.2 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.7.2 - %or.cond28.i.i.us.us.7.2 = and i1 %cmp11.i.i.us.us.7.2, %cmp7.i.i.us.us.7.2 - br i1 %or.cond28.i.i.us.us.7.2, label %if.then.i.i.us.us.7.2, label %if.end.i.i.us.us.7.2 - -if.then.i.i.us.us.7.2: ; preds = %if.end.i.i.us.us.7.1 - %add.i.i.us.us.7.2 = add nsw i32 %mul.i.i.7, %conv2.i.i.us.us.7.2 - %idxprom.i.i.us.us.7.2 = sext i32 %add.i.i.us.us.7.2 to i64 - %arrayidx.i.i.us.us.7.2 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.7.2 - %535 = bitcast float* %arrayidx.i.i.us.us.7.2 to i32* - %536 = load i32, i32* %535, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.7.2 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.7.2 - %537 = bitcast float* %arrayidx16.i.i.us.us.7.2 to i32* - store i32 %536, i32* %537, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.7.2 - -if.end.i.i.us.us.7.2: ; preds = %if.then.i.i.us.us.7.2, %if.end.i.i.us.us.7.1 - %538 = or i64 %_local_id_x.i.0.us.us.7, 3 - %add1.i.i.i.us.us.7.3 = add nuw nsw i64 %538, %mul.i.i.i - %conv2.i.i.us.us.7.3 = trunc i64 %add1.i.i.i.us.us.7.3 to i32 - %cmp7.i.i.us.us.7.3 = icmp sgt i32 %conv2.i.i.us.us.7.3, 0 - %cmp11.i.i.us.us.7.3 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.7.3 - %or.cond28.i.i.us.us.7.3 = and i1 %cmp11.i.i.us.us.7.3, %cmp7.i.i.us.us.7.3 - br i1 %or.cond28.i.i.us.us.7.3, label %if.then.i.i.us.us.7.3, label %if.end.i.i.us.us.7.3 - -if.then.i.i.us.us.7.3: ; preds = %if.end.i.i.us.us.7.2 - %add.i.i.us.us.7.3 = add nsw i32 %mul.i.i.7, %conv2.i.i.us.us.7.3 - %idxprom.i.i.us.us.7.3 = sext i32 %add.i.i.us.us.7.3 to i64 - %arrayidx.i.i.us.us.7.3 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.7.3 - %539 = bitcast float* %arrayidx.i.i.us.us.7.3 to i32* - %540 = load i32, i32* %539, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.7.3 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.7.3 - %541 = bitcast float* %arrayidx16.i.i.us.us.7.3 to i32* - store i32 %540, i32* %541, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.7.3 - -if.end.i.i.us.us.7.3: ; preds = %if.then.i.i.us.us.7.3, %if.end.i.i.us.us.7.2 - %542 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, 4 - %exitcond.7.not.3 = icmp eq i64 %542, 32 - br i1 %exitcond.7.not.3, label %pregion_for_end.i.i.7.loopexit, label %pregion_for_entry.entry.i.i.us.us.7, !llvm.loop !109 - -if.then.i.i.us.us.6.1: ; preds = %if.end.i.i.us.us.6 - %add.i.i.us.us.6.1 = add nsw i32 %mul.i.i.6, %conv2.i.i.us.us.6.1 - %idxprom.i.i.us.us.6.1 = sext i32 %add.i.i.us.us.6.1 to i64 - %arrayidx.i.i.us.us.6.1 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.6.1 - %543 = bitcast float* %arrayidx.i.i.us.us.6.1 to i32* - %544 = load i32, i32* %543, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.6.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.6.1 - %545 = bitcast float* %arrayidx16.i.i.us.us.6.1 to i32* - store i32 %544, i32* %545, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.6.1 - -if.end.i.i.us.us.6.1: ; preds = %if.then.i.i.us.us.6.1, %if.end.i.i.us.us.6 - %546 = or i64 %_local_id_x.i.0.us.us.6, 2 - %add1.i.i.i.us.us.6.2 = add nuw nsw i64 %546, %mul.i.i.i - %conv2.i.i.us.us.6.2 = trunc i64 %add1.i.i.i.us.us.6.2 to i32 - %cmp7.i.i.us.us.6.2 = icmp sgt i32 %conv2.i.i.us.us.6.2, 0 - %cmp11.i.i.us.us.6.2 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.6.2 - %or.cond28.i.i.us.us.6.2 = and i1 %cmp11.i.i.us.us.6.2, %cmp7.i.i.us.us.6.2 - br i1 %or.cond28.i.i.us.us.6.2, label %if.then.i.i.us.us.6.2, label %if.end.i.i.us.us.6.2 - -if.then.i.i.us.us.6.2: ; preds = %if.end.i.i.us.us.6.1 - %add.i.i.us.us.6.2 = add nsw i32 %mul.i.i.6, %conv2.i.i.us.us.6.2 - %idxprom.i.i.us.us.6.2 = sext i32 %add.i.i.us.us.6.2 to i64 - %arrayidx.i.i.us.us.6.2 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.6.2 - %547 = bitcast float* %arrayidx.i.i.us.us.6.2 to i32* - %548 = load i32, i32* %547, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.6.2 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.6.2 - %549 = bitcast float* %arrayidx16.i.i.us.us.6.2 to i32* - store i32 %548, i32* %549, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.6.2 - -if.end.i.i.us.us.6.2: ; preds = %if.then.i.i.us.us.6.2, %if.end.i.i.us.us.6.1 - %550 = or i64 %_local_id_x.i.0.us.us.6, 3 - %add1.i.i.i.us.us.6.3 = add nuw nsw i64 %550, %mul.i.i.i - %conv2.i.i.us.us.6.3 = trunc i64 %add1.i.i.i.us.us.6.3 to i32 - %cmp7.i.i.us.us.6.3 = icmp sgt i32 %conv2.i.i.us.us.6.3, 0 - %cmp11.i.i.us.us.6.3 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.6.3 - %or.cond28.i.i.us.us.6.3 = and i1 %cmp11.i.i.us.us.6.3, %cmp7.i.i.us.us.6.3 - br i1 %or.cond28.i.i.us.us.6.3, label %if.then.i.i.us.us.6.3, label %if.end.i.i.us.us.6.3 - -if.then.i.i.us.us.6.3: ; preds = %if.end.i.i.us.us.6.2 - %add.i.i.us.us.6.3 = add nsw i32 %mul.i.i.6, %conv2.i.i.us.us.6.3 - %idxprom.i.i.us.us.6.3 = sext i32 %add.i.i.us.us.6.3 to i64 - %arrayidx.i.i.us.us.6.3 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.6.3 - %551 = bitcast float* %arrayidx.i.i.us.us.6.3 to i32* - %552 = load i32, i32* %551, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.6.3 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.6.3 - %553 = bitcast float* %arrayidx16.i.i.us.us.6.3 to i32* - store i32 %552, i32* %553, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.6.3 - -if.end.i.i.us.us.6.3: ; preds = %if.then.i.i.us.us.6.3, %if.end.i.i.us.us.6.2 - %554 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, 4 - %exitcond.6.not.3 = icmp eq i64 %554, 32 - br i1 %exitcond.6.not.3, label %pregion_for_end.i.i.6.loopexit, label %pregion_for_entry.entry.i.i.us.us.6, !llvm.loop !110 - -if.then.i.i.us.us.5.1: ; preds = %if.end.i.i.us.us.5 - %add.i.i.us.us.5.1 = add nsw i32 %mul.i.i.5, %conv2.i.i.us.us.5.1 - %idxprom.i.i.us.us.5.1 = sext i32 %add.i.i.us.us.5.1 to i64 - %arrayidx.i.i.us.us.5.1 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.5.1 - %555 = bitcast float* %arrayidx.i.i.us.us.5.1 to i32* - %556 = load i32, i32* %555, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.5.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.5.1 - %557 = bitcast float* %arrayidx16.i.i.us.us.5.1 to i32* - store i32 %556, i32* %557, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.5.1 - -if.end.i.i.us.us.5.1: ; preds = %if.then.i.i.us.us.5.1, %if.end.i.i.us.us.5 - %558 = or i64 %_local_id_x.i.0.us.us.5, 2 - %add1.i.i.i.us.us.5.2 = add nuw nsw i64 %558, %mul.i.i.i - %conv2.i.i.us.us.5.2 = trunc i64 %add1.i.i.i.us.us.5.2 to i32 - %cmp7.i.i.us.us.5.2 = icmp sgt i32 %conv2.i.i.us.us.5.2, 0 - %cmp11.i.i.us.us.5.2 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.5.2 - %or.cond28.i.i.us.us.5.2 = and i1 %cmp11.i.i.us.us.5.2, %cmp7.i.i.us.us.5.2 - br i1 %or.cond28.i.i.us.us.5.2, label %if.then.i.i.us.us.5.2, label %if.end.i.i.us.us.5.2 - -if.then.i.i.us.us.5.2: ; preds = %if.end.i.i.us.us.5.1 - %add.i.i.us.us.5.2 = add nsw i32 %mul.i.i.5, %conv2.i.i.us.us.5.2 - %idxprom.i.i.us.us.5.2 = sext i32 %add.i.i.us.us.5.2 to i64 - %arrayidx.i.i.us.us.5.2 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.5.2 - %559 = bitcast float* %arrayidx.i.i.us.us.5.2 to i32* - %560 = load i32, i32* %559, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.5.2 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.5.2 - %561 = bitcast float* %arrayidx16.i.i.us.us.5.2 to i32* - store i32 %560, i32* %561, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.5.2 - -if.end.i.i.us.us.5.2: ; preds = %if.then.i.i.us.us.5.2, %if.end.i.i.us.us.5.1 - %562 = or i64 %_local_id_x.i.0.us.us.5, 3 - %add1.i.i.i.us.us.5.3 = add nuw nsw i64 %562, %mul.i.i.i - %conv2.i.i.us.us.5.3 = trunc i64 %add1.i.i.i.us.us.5.3 to i32 - %cmp7.i.i.us.us.5.3 = icmp sgt i32 %conv2.i.i.us.us.5.3, 0 - %cmp11.i.i.us.us.5.3 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.5.3 - %or.cond28.i.i.us.us.5.3 = and i1 %cmp11.i.i.us.us.5.3, %cmp7.i.i.us.us.5.3 - br i1 %or.cond28.i.i.us.us.5.3, label %if.then.i.i.us.us.5.3, label %if.end.i.i.us.us.5.3 - -if.then.i.i.us.us.5.3: ; preds = %if.end.i.i.us.us.5.2 - %add.i.i.us.us.5.3 = add nsw i32 %mul.i.i.5, %conv2.i.i.us.us.5.3 - %idxprom.i.i.us.us.5.3 = sext i32 %add.i.i.us.us.5.3 to i64 - %arrayidx.i.i.us.us.5.3 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.5.3 - %563 = bitcast float* %arrayidx.i.i.us.us.5.3 to i32* - %564 = load i32, i32* %563, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.5.3 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.5.3 - %565 = bitcast float* %arrayidx16.i.i.us.us.5.3 to i32* - store i32 %564, i32* %565, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.5.3 - -if.end.i.i.us.us.5.3: ; preds = %if.then.i.i.us.us.5.3, %if.end.i.i.us.us.5.2 - %566 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, 4 - %exitcond.5.not.3 = icmp eq i64 %566, 32 - br i1 %exitcond.5.not.3, label %pregion_for_end.i.i.5.loopexit, label %pregion_for_entry.entry.i.i.us.us.5, !llvm.loop !111 - -if.then.i.i.us.us.4.1: ; preds = %if.end.i.i.us.us.4 - %add.i.i.us.us.4.1 = add nsw i32 %mul.i.i.4, %conv2.i.i.us.us.4.1 - %idxprom.i.i.us.us.4.1 = sext i32 %add.i.i.us.us.4.1 to i64 - %arrayidx.i.i.us.us.4.1 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.4.1 - %567 = bitcast float* %arrayidx.i.i.us.us.4.1 to i32* - %568 = load i32, i32* %567, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.4.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.4.1 - %569 = bitcast float* %arrayidx16.i.i.us.us.4.1 to i32* - store i32 %568, i32* %569, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.4.1 - -if.end.i.i.us.us.4.1: ; preds = %if.then.i.i.us.us.4.1, %if.end.i.i.us.us.4 - %570 = or i64 %_local_id_x.i.0.us.us.4, 2 - %add1.i.i.i.us.us.4.2 = add nuw nsw i64 %570, %mul.i.i.i - %conv2.i.i.us.us.4.2 = trunc i64 %add1.i.i.i.us.us.4.2 to i32 - %cmp7.i.i.us.us.4.2 = icmp sgt i32 %conv2.i.i.us.us.4.2, 0 - %cmp11.i.i.us.us.4.2 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.4.2 - %or.cond28.i.i.us.us.4.2 = and i1 %cmp11.i.i.us.us.4.2, %cmp7.i.i.us.us.4.2 - br i1 %or.cond28.i.i.us.us.4.2, label %if.then.i.i.us.us.4.2, label %if.end.i.i.us.us.4.2 - -if.then.i.i.us.us.4.2: ; preds = %if.end.i.i.us.us.4.1 - %add.i.i.us.us.4.2 = add nsw i32 %mul.i.i.4, %conv2.i.i.us.us.4.2 - %idxprom.i.i.us.us.4.2 = sext i32 %add.i.i.us.us.4.2 to i64 - %arrayidx.i.i.us.us.4.2 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.4.2 - %571 = bitcast float* %arrayidx.i.i.us.us.4.2 to i32* - %572 = load i32, i32* %571, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.4.2 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.4.2 - %573 = bitcast float* %arrayidx16.i.i.us.us.4.2 to i32* - store i32 %572, i32* %573, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.4.2 - -if.end.i.i.us.us.4.2: ; preds = %if.then.i.i.us.us.4.2, %if.end.i.i.us.us.4.1 - %574 = or i64 %_local_id_x.i.0.us.us.4, 3 - %add1.i.i.i.us.us.4.3 = add nuw nsw i64 %574, %mul.i.i.i - %conv2.i.i.us.us.4.3 = trunc i64 %add1.i.i.i.us.us.4.3 to i32 - %cmp7.i.i.us.us.4.3 = icmp sgt i32 %conv2.i.i.us.us.4.3, 0 - %cmp11.i.i.us.us.4.3 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.4.3 - %or.cond28.i.i.us.us.4.3 = and i1 %cmp11.i.i.us.us.4.3, %cmp7.i.i.us.us.4.3 - br i1 %or.cond28.i.i.us.us.4.3, label %if.then.i.i.us.us.4.3, label %if.end.i.i.us.us.4.3 - -if.then.i.i.us.us.4.3: ; preds = %if.end.i.i.us.us.4.2 - %add.i.i.us.us.4.3 = add nsw i32 %mul.i.i.4, %conv2.i.i.us.us.4.3 - %idxprom.i.i.us.us.4.3 = sext i32 %add.i.i.us.us.4.3 to i64 - %arrayidx.i.i.us.us.4.3 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.4.3 - %575 = bitcast float* %arrayidx.i.i.us.us.4.3 to i32* - %576 = load i32, i32* %575, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.4.3 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.4.3 - %577 = bitcast float* %arrayidx16.i.i.us.us.4.3 to i32* - store i32 %576, i32* %577, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.4.3 - -if.end.i.i.us.us.4.3: ; preds = %if.then.i.i.us.us.4.3, %if.end.i.i.us.us.4.2 - %578 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, 4 - %exitcond.4.not.3 = icmp eq i64 %578, 32 - br i1 %exitcond.4.not.3, label %pregion_for_end.i.i.4.loopexit, label %pregion_for_entry.entry.i.i.us.us.4, !llvm.loop !112 - -if.then.i.i.us.us.3.1: ; preds = %if.end.i.i.us.us.3 - %add.i.i.us.us.3.1 = add nsw i32 %mul.i.i.3, %conv2.i.i.us.us.3.1 - %idxprom.i.i.us.us.3.1 = sext i32 %add.i.i.us.us.3.1 to i64 - %arrayidx.i.i.us.us.3.1 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.3.1 - %579 = bitcast float* %arrayidx.i.i.us.us.3.1 to i32* - %580 = load i32, i32* %579, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.3.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.3.1 - %581 = bitcast float* %arrayidx16.i.i.us.us.3.1 to i32* - store i32 %580, i32* %581, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.3.1 - -if.end.i.i.us.us.3.1: ; preds = %if.then.i.i.us.us.3.1, %if.end.i.i.us.us.3 - %582 = or i64 %_local_id_x.i.0.us.us.3, 2 - %add1.i.i.i.us.us.3.2 = add nuw nsw i64 %582, %mul.i.i.i - %conv2.i.i.us.us.3.2 = trunc i64 %add1.i.i.i.us.us.3.2 to i32 - %cmp7.i.i.us.us.3.2 = icmp sgt i32 %conv2.i.i.us.us.3.2, 0 - %cmp11.i.i.us.us.3.2 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.3.2 - %or.cond28.i.i.us.us.3.2 = and i1 %cmp11.i.i.us.us.3.2, %cmp7.i.i.us.us.3.2 - br i1 %or.cond28.i.i.us.us.3.2, label %if.then.i.i.us.us.3.2, label %if.end.i.i.us.us.3.2 - -if.then.i.i.us.us.3.2: ; preds = %if.end.i.i.us.us.3.1 - %add.i.i.us.us.3.2 = add nsw i32 %mul.i.i.3, %conv2.i.i.us.us.3.2 - %idxprom.i.i.us.us.3.2 = sext i32 %add.i.i.us.us.3.2 to i64 - %arrayidx.i.i.us.us.3.2 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.3.2 - %583 = bitcast float* %arrayidx.i.i.us.us.3.2 to i32* - %584 = load i32, i32* %583, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.3.2 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.3.2 - %585 = bitcast float* %arrayidx16.i.i.us.us.3.2 to i32* - store i32 %584, i32* %585, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.3.2 - -if.end.i.i.us.us.3.2: ; preds = %if.then.i.i.us.us.3.2, %if.end.i.i.us.us.3.1 - %586 = or i64 %_local_id_x.i.0.us.us.3, 3 - %add1.i.i.i.us.us.3.3 = add nuw nsw i64 %586, %mul.i.i.i - %conv2.i.i.us.us.3.3 = trunc i64 %add1.i.i.i.us.us.3.3 to i32 - %cmp7.i.i.us.us.3.3 = icmp sgt i32 %conv2.i.i.us.us.3.3, 0 - %cmp11.i.i.us.us.3.3 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.3.3 - %or.cond28.i.i.us.us.3.3 = and i1 %cmp11.i.i.us.us.3.3, %cmp7.i.i.us.us.3.3 - br i1 %or.cond28.i.i.us.us.3.3, label %if.then.i.i.us.us.3.3, label %if.end.i.i.us.us.3.3 - -if.then.i.i.us.us.3.3: ; preds = %if.end.i.i.us.us.3.2 - %add.i.i.us.us.3.3 = add nsw i32 %mul.i.i.3, %conv2.i.i.us.us.3.3 - %idxprom.i.i.us.us.3.3 = sext i32 %add.i.i.us.us.3.3 to i64 - %arrayidx.i.i.us.us.3.3 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.3.3 - %587 = bitcast float* %arrayidx.i.i.us.us.3.3 to i32* - %588 = load i32, i32* %587, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.3.3 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.3.3 - %589 = bitcast float* %arrayidx16.i.i.us.us.3.3 to i32* - store i32 %588, i32* %589, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.3.3 - -if.end.i.i.us.us.3.3: ; preds = %if.then.i.i.us.us.3.3, %if.end.i.i.us.us.3.2 - %590 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, 4 - %exitcond.3.not.3 = icmp eq i64 %590, 32 - br i1 %exitcond.3.not.3, label %pregion_for_end.i.i.3.loopexit, label %pregion_for_entry.entry.i.i.us.us.3, !llvm.loop !113 - -if.then.i.i.us.us.2.1: ; preds = %if.end.i.i.us.us.2 - %add.i.i.us.us.2.1 = add nsw i32 %mul.i.i.2, %conv2.i.i.us.us.2.1 - %idxprom.i.i.us.us.2.1 = sext i32 %add.i.i.us.us.2.1 to i64 - %arrayidx.i.i.us.us.2.1 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.2.1 - %591 = bitcast float* %arrayidx.i.i.us.us.2.1 to i32* - %592 = load i32, i32* %591, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.2.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.2.1 - %593 = bitcast float* %arrayidx16.i.i.us.us.2.1 to i32* - store i32 %592, i32* %593, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.2.1 - -if.end.i.i.us.us.2.1: ; preds = %if.then.i.i.us.us.2.1, %if.end.i.i.us.us.2 - %594 = or i64 %_local_id_x.i.0.us.us.2, 2 - %add1.i.i.i.us.us.2.2 = add nuw nsw i64 %594, %mul.i.i.i - %conv2.i.i.us.us.2.2 = trunc i64 %add1.i.i.i.us.us.2.2 to i32 - %cmp7.i.i.us.us.2.2 = icmp sgt i32 %conv2.i.i.us.us.2.2, 0 - %cmp11.i.i.us.us.2.2 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.2.2 - %or.cond28.i.i.us.us.2.2 = and i1 %cmp11.i.i.us.us.2.2, %cmp7.i.i.us.us.2.2 - br i1 %or.cond28.i.i.us.us.2.2, label %if.then.i.i.us.us.2.2, label %if.end.i.i.us.us.2.2 - -if.then.i.i.us.us.2.2: ; preds = %if.end.i.i.us.us.2.1 - %add.i.i.us.us.2.2 = add nsw i32 %mul.i.i.2, %conv2.i.i.us.us.2.2 - %idxprom.i.i.us.us.2.2 = sext i32 %add.i.i.us.us.2.2 to i64 - %arrayidx.i.i.us.us.2.2 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.2.2 - %595 = bitcast float* %arrayidx.i.i.us.us.2.2 to i32* - %596 = load i32, i32* %595, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.2.2 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.2.2 - %597 = bitcast float* %arrayidx16.i.i.us.us.2.2 to i32* - store i32 %596, i32* %597, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.2.2 - -if.end.i.i.us.us.2.2: ; preds = %if.then.i.i.us.us.2.2, %if.end.i.i.us.us.2.1 - %598 = or i64 %_local_id_x.i.0.us.us.2, 3 - %add1.i.i.i.us.us.2.3 = add nuw nsw i64 %598, %mul.i.i.i - %conv2.i.i.us.us.2.3 = trunc i64 %add1.i.i.i.us.us.2.3 to i32 - %cmp7.i.i.us.us.2.3 = icmp sgt i32 %conv2.i.i.us.us.2.3, 0 - %cmp11.i.i.us.us.2.3 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.2.3 - %or.cond28.i.i.us.us.2.3 = and i1 %cmp11.i.i.us.us.2.3, %cmp7.i.i.us.us.2.3 - br i1 %or.cond28.i.i.us.us.2.3, label %if.then.i.i.us.us.2.3, label %if.end.i.i.us.us.2.3 - -if.then.i.i.us.us.2.3: ; preds = %if.end.i.i.us.us.2.2 - %add.i.i.us.us.2.3 = add nsw i32 %mul.i.i.2, %conv2.i.i.us.us.2.3 - %idxprom.i.i.us.us.2.3 = sext i32 %add.i.i.us.us.2.3 to i64 - %arrayidx.i.i.us.us.2.3 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.2.3 - %599 = bitcast float* %arrayidx.i.i.us.us.2.3 to i32* - %600 = load i32, i32* %599, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.2.3 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.2.3 - %601 = bitcast float* %arrayidx16.i.i.us.us.2.3 to i32* - store i32 %600, i32* %601, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.2.3 - -if.end.i.i.us.us.2.3: ; preds = %if.then.i.i.us.us.2.3, %if.end.i.i.us.us.2.2 - %602 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, 4 - %exitcond.2.not.3 = icmp eq i64 %602, 32 - br i1 %exitcond.2.not.3, label %pregion_for_end.i.i.2.loopexit, label %pregion_for_entry.entry.i.i.us.us.2, !llvm.loop !114 - -if.then.i.i.us.us.1.1: ; preds = %if.end.i.i.us.us.1 - %add.i.i.us.us.1.1 = add nsw i32 %mul.i.i.1, %conv2.i.i.us.us.1.1 - %idxprom.i.i.us.us.1.1 = sext i32 %add.i.i.us.us.1.1 to i64 - %arrayidx.i.i.us.us.1.1 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.1.1 - %603 = bitcast float* %arrayidx.i.i.us.us.1.1 to i32* - %604 = load i32, i32* %603, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.1.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.1.1 - %605 = bitcast float* %arrayidx16.i.i.us.us.1.1 to i32* - store i32 %604, i32* %605, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.1.1 - -if.end.i.i.us.us.1.1: ; preds = %if.then.i.i.us.us.1.1, %if.end.i.i.us.us.1 - %606 = or i64 %_local_id_x.i.0.us.us.1, 2 - %add1.i.i.i.us.us.1.2 = add nuw nsw i64 %606, %mul.i.i.i - %conv2.i.i.us.us.1.2 = trunc i64 %add1.i.i.i.us.us.1.2 to i32 - %cmp7.i.i.us.us.1.2 = icmp sgt i32 %conv2.i.i.us.us.1.2, 0 - %cmp11.i.i.us.us.1.2 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.1.2 - %or.cond28.i.i.us.us.1.2 = and i1 %cmp11.i.i.us.us.1.2, %cmp7.i.i.us.us.1.2 - br i1 %or.cond28.i.i.us.us.1.2, label %if.then.i.i.us.us.1.2, label %if.end.i.i.us.us.1.2 - -if.then.i.i.us.us.1.2: ; preds = %if.end.i.i.us.us.1.1 - %add.i.i.us.us.1.2 = add nsw i32 %mul.i.i.1, %conv2.i.i.us.us.1.2 - %idxprom.i.i.us.us.1.2 = sext i32 %add.i.i.us.us.1.2 to i64 - %arrayidx.i.i.us.us.1.2 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.1.2 - %607 = bitcast float* %arrayidx.i.i.us.us.1.2 to i32* - %608 = load i32, i32* %607, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.1.2 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.1.2 - %609 = bitcast float* %arrayidx16.i.i.us.us.1.2 to i32* - store i32 %608, i32* %609, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.1.2 - -if.end.i.i.us.us.1.2: ; preds = %if.then.i.i.us.us.1.2, %if.end.i.i.us.us.1.1 - %610 = or i64 %_local_id_x.i.0.us.us.1, 3 - %add1.i.i.i.us.us.1.3 = add nuw nsw i64 %610, %mul.i.i.i - %conv2.i.i.us.us.1.3 = trunc i64 %add1.i.i.i.us.us.1.3 to i32 - %cmp7.i.i.us.us.1.3 = icmp sgt i32 %conv2.i.i.us.us.1.3, 0 - %cmp11.i.i.us.us.1.3 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.1.3 - %or.cond28.i.i.us.us.1.3 = and i1 %cmp11.i.i.us.us.1.3, %cmp7.i.i.us.us.1.3 - br i1 %or.cond28.i.i.us.us.1.3, label %if.then.i.i.us.us.1.3, label %if.end.i.i.us.us.1.3 - -if.then.i.i.us.us.1.3: ; preds = %if.end.i.i.us.us.1.2 - %add.i.i.us.us.1.3 = add nsw i32 %mul.i.i.1, %conv2.i.i.us.us.1.3 - %idxprom.i.i.us.us.1.3 = sext i32 %add.i.i.us.us.1.3 to i64 - %arrayidx.i.i.us.us.1.3 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.1.3 - %611 = bitcast float* %arrayidx.i.i.us.us.1.3 to i32* - %612 = load i32, i32* %611, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.1.3 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.1.3 - %613 = bitcast float* %arrayidx16.i.i.us.us.1.3 to i32* - store i32 %612, i32* %613, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.1.3 - -if.end.i.i.us.us.1.3: ; preds = %if.then.i.i.us.us.1.3, %if.end.i.i.us.us.1.2 - %614 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, 4 - %exitcond.1.not.3 = icmp eq i64 %614, 32 - br i1 %exitcond.1.not.3, label %pregion_for_end.i.i.1.loopexit, label %pregion_for_entry.entry.i.i.us.us.1, !llvm.loop !115 - -if.then.i.i.us.us.1277: ; preds = %if.end.i.i.us.us - %add.i.i.us.us.1273 = add nsw i32 %mul.i.i, %conv2.i.i.us.us.1268 - %idxprom.i.i.us.us.1274 = sext i32 %add.i.i.us.us.1273 to i64 - %arrayidx.i.i.us.us.1275 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.1274 - %615 = bitcast float* %arrayidx.i.i.us.us.1275 to i32* - %616 = load i32, i32* %615, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.1276 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.1274 - %617 = bitcast float* %arrayidx16.i.i.us.us.1276 to i32* - store i32 %616, i32* %617, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.1278 - -if.end.i.i.us.us.1278: ; preds = %if.then.i.i.us.us.1277, %if.end.i.i.us.us - %618 = or i64 %_local_id_x.i.0.us.us, 2 - %add1.i.i.i.us.us.2280 = add nuw nsw i64 %618, %mul.i.i.i - %conv2.i.i.us.us.2281 = trunc i64 %add1.i.i.i.us.us.2280 to i32 - %cmp7.i.i.us.us.2282 = icmp sgt i32 %conv2.i.i.us.us.2281, 0 - %cmp11.i.i.us.us.2283 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.2281 - %or.cond28.i.i.us.us.2284 = and i1 %cmp11.i.i.us.us.2283, %cmp7.i.i.us.us.2282 - br i1 %or.cond28.i.i.us.us.2284, label %if.then.i.i.us.us.2290, label %if.end.i.i.us.us.2291 - -if.then.i.i.us.us.2290: ; preds = %if.end.i.i.us.us.1278 - %add.i.i.us.us.2286 = add nsw i32 %mul.i.i, %conv2.i.i.us.us.2281 - %idxprom.i.i.us.us.2287 = sext i32 %add.i.i.us.us.2286 to i64 - %arrayidx.i.i.us.us.2288 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.2287 - %619 = bitcast float* %arrayidx.i.i.us.us.2288 to i32* - %620 = load i32, i32* %619, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.2289 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.2287 - %621 = bitcast float* %arrayidx16.i.i.us.us.2289 to i32* - store i32 %620, i32* %621, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.2291 - -if.end.i.i.us.us.2291: ; preds = %if.then.i.i.us.us.2290, %if.end.i.i.us.us.1278 - %622 = or i64 %_local_id_x.i.0.us.us, 3 - %add1.i.i.i.us.us.3293 = add nuw nsw i64 %622, %mul.i.i.i - %conv2.i.i.us.us.3294 = trunc i64 %add1.i.i.i.us.us.3293 to i32 - %cmp7.i.i.us.us.3295 = icmp sgt i32 %conv2.i.i.us.us.3294, 0 - %cmp11.i.i.us.us.3296 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.3294 - %or.cond28.i.i.us.us.3297 = and i1 %cmp11.i.i.us.us.3296, %cmp7.i.i.us.us.3295 - br i1 %or.cond28.i.i.us.us.3297, label %if.then.i.i.us.us.3303, label %if.end.i.i.us.us.3304 - -if.then.i.i.us.us.3303: ; preds = %if.end.i.i.us.us.2291 - %add.i.i.us.us.3299 = add nsw i32 %mul.i.i, %conv2.i.i.us.us.3294 - %idxprom.i.i.us.us.3300 = sext i32 %add.i.i.us.us.3299 to i64 - %arrayidx.i.i.us.us.3301 = getelementptr inbounds float, float* %11, i64 %idxprom.i.i.us.us.3300 - %623 = bitcast float* %arrayidx.i.i.us.us.3301 to i32* - %624 = load i32, i32* %623, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.3302 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.us.3300 - %625 = bitcast float* %arrayidx16.i.i.us.us.3302 to i32* - store i32 %624, i32* %625, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.3304 - -if.end.i.i.us.us.3304: ; preds = %if.then.i.i.us.us.3303, %if.end.i.i.us.us.2291 - %626 = add nuw nsw i64 %_local_id_x.i.0.us.us, 4 - %exitcond.not.3 = icmp eq i64 %626, 32 - br i1 %exitcond.not.3, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us.us, !llvm.loop !116 -} - -; Function Attrs: nofree norecurse nounwind -define void @_pocl_kernel_runJacobi2D_kernel2_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { -pregion_for_entry.pregion_for_init.i.i: - %5 = bitcast i8** %0 to float** - %6 = load float*, float** %5, align 8 - %7 = getelementptr i8*, i8** %0, i64 1 - %8 = bitcast i8** %7 to float** - %9 = load float*, float** %8, align 8 - %10 = getelementptr i8*, i8** %0, i64 2 - %11 = bitcast i8** %10 to i32** - %12 = load i32*, i32** %11, align 8 - %13 = load i32, i32* %12, align 4 - %mul3.i.i.i = shl i64 %3, 3 - %mul.i.i.i = shl i64 %2, 5 - %sub.i.i = add nsw i32 %13, -1 - %conv.i.i = trunc i64 %mul3.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %conv.i.i, 0 - %mul.i.i = mul nsw i32 %13, %conv.i.i - %cmp4.i.i = icmp sgt i32 %sub.i.i, %conv.i.i - %or.cond = and i1 %cmp.i.i, %cmp4.i.i - br i1 %or.cond, label %vector.scevcheck, label %pregion_for_end.i.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.i - %14 = trunc i64 %3 to i32 - %15 = mul i32 %13, %14 - %16 = shl i32 %15, 3 - %17 = trunc i64 %2 to i32 - %18 = shl i32 %17, 5 - %19 = add i32 %16, %18 - %20 = icmp sgt i32 %19, 2147483616 - br i1 %20, label %pregion_for_entry.entry.i.i.us.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.us.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i.us.us - -vector.memcheck: ; preds = %vector.scevcheck - %21 = trunc i64 %3 to i32 - %22 = mul i32 %13, %21 - %23 = shl i32 %22, 3 - %24 = trunc i64 %2 to i32 - %25 = shl i32 %24, 5 - %26 = add i32 %23, %25 - %27 = sext i32 %26 to i64 - %scevgep = getelementptr float, float* %6, i64 %27 - %28 = add nsw i64 %27, 32 - %scevgep12 = getelementptr float, float* %6, i64 %28 - %scevgep14 = getelementptr float, float* %9, i64 %27 - %scevgep16 = getelementptr float, float* %9, i64 %28 - %bound0 = icmp ult float* %scevgep, %scevgep16 - %bound1 = icmp ult float* %scevgep14, %scevgep12 - %found.conflict = and i1 %bound0, %bound1 - br i1 %found.conflict, label %pregion_for_entry.entry.i.i.us.us.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert18 = insertelement <8 x i32> undef, i32 %sub.i.i, i32 0 - %broadcast.splat19 = shufflevector <8 x i32> %broadcast.splatinsert18, <8 x i32> undef, <8 x i32> zeroinitializer - %29 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %30 = or <8 x i32> %29, - %31 = icmp sgt <8 x i32> %30, zeroinitializer - %32 = icmp sgt <8 x i32> %broadcast.splat19, %30 - %33 = and <8 x i1> %32, %31 - %34 = extractelement <8 x i32> %30, i32 0 - %35 = add nsw i32 %mul.i.i, %34 - %36 = sext i32 %35 to i64 - %37 = getelementptr inbounds float, float* %9, i64 %36 - %38 = bitcast float* %37 to <8 x i32>* - %wide.masked.load = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %38, i32 4, <8 x i1> %33, <8 x i32> undef), !tbaa !12, !alias.scope !117 - %39 = getelementptr inbounds float, float* %6, i64 %36 - %40 = bitcast float* %39 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load, <8 x i32>* %40, i32 4, <8 x i1> %33), !tbaa !12, !alias.scope !120, !noalias !117, !llvm.access.group !21 - %41 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %42 = or <8 x i32> %41, - %43 = icmp sgt <8 x i32> %42, zeroinitializer - %44 = icmp sgt <8 x i32> %broadcast.splat19, %42 - %45 = and <8 x i1> %44, %43 - %46 = extractelement <8 x i32> %42, i32 0 - %47 = add nsw i32 %mul.i.i, %46 - %48 = sext i32 %47 to i64 - %49 = getelementptr inbounds float, float* %9, i64 %48 - %50 = bitcast float* %49 to <8 x i32>* - %wide.masked.load.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %50, i32 4, <8 x i1> %45, <8 x i32> undef), !tbaa !12, !alias.scope !117 - %51 = getelementptr inbounds float, float* %6, i64 %48 - %52 = bitcast float* %51 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load.1, <8 x i32>* %52, i32 4, <8 x i1> %45), !tbaa !12, !alias.scope !120, !noalias !117, !llvm.access.group !21 - %53 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %54 = or <8 x i32> %53, - %55 = icmp sgt <8 x i32> %54, zeroinitializer - %56 = icmp sgt <8 x i32> %broadcast.splat19, %54 - %57 = and <8 x i1> %56, %55 - %58 = extractelement <8 x i32> %54, i32 0 - %59 = add nsw i32 %mul.i.i, %58 - %60 = sext i32 %59 to i64 - %61 = getelementptr inbounds float, float* %9, i64 %60 - %62 = bitcast float* %61 to <8 x i32>* - %wide.masked.load.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %62, i32 4, <8 x i1> %57, <8 x i32> undef), !tbaa !12, !alias.scope !117 - %63 = getelementptr inbounds float, float* %6, i64 %60 - %64 = bitcast float* %63 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load.2, <8 x i32>* %64, i32 4, <8 x i1> %57), !tbaa !12, !alias.scope !120, !noalias !117, !llvm.access.group !21 - %65 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %66 = or <8 x i32> %65, - %67 = icmp sgt <8 x i32> %66, zeroinitializer - %68 = icmp sgt <8 x i32> %broadcast.splat19, %66 - %69 = and <8 x i1> %68, %67 - %70 = extractelement <8 x i32> %66, i32 0 - %71 = add nsw i32 %mul.i.i, %70 - %72 = sext i32 %71 to i64 - %73 = getelementptr inbounds float, float* %9, i64 %72 - %74 = bitcast float* %73 to <8 x i32>* - %wide.masked.load.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %74, i32 4, <8 x i1> %69, <8 x i32> undef), !tbaa !12, !alias.scope !117 - %75 = getelementptr inbounds float, float* %6, i64 %72 - %76 = bitcast float* %75 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load.3, <8 x i32>* %76, i32 4, <8 x i1> %69), !tbaa !12, !alias.scope !120, !noalias !117, !llvm.access.group !21 - br label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us.us: ; preds = %if.end.i.i.us.us.3304, %pregion_for_entry.entry.i.i.us.us.preheader - %_local_id_x.i.0.us.us = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.us.preheader ], [ %624, %if.end.i.i.us.us.3304 ] - %add1.i.i.i.us.us = add nuw nsw i64 %_local_id_x.i.0.us.us, %mul.i.i.i - %conv2.i.i.us.us = trunc i64 %add1.i.i.i.us.us to i32 - %cmp7.i.i.us.us = icmp sgt i32 %conv2.i.i.us.us, 0 - %cmp11.i.i.us.us = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us - %or.cond28.i.i.us.us = and i1 %cmp11.i.i.us.us, %cmp7.i.i.us.us - br i1 %or.cond28.i.i.us.us, label %if.then.i.i.us.us, label %if.end.i.i.us.us - -if.then.i.i.us.us: ; preds = %pregion_for_entry.entry.i.i.us.us - %add.i.i.us.us = add nsw i32 %mul.i.i, %conv2.i.i.us.us - %idxprom.i.i.us.us = sext i32 %add.i.i.us.us to i64 - %arrayidx.i.i.us.us = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us - %77 = bitcast float* %arrayidx.i.i.us.us to i32* - %78 = load i32, i32* %77, align 4, !tbaa !12 - %arrayidx16.i.i.us.us = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us - %79 = bitcast float* %arrayidx16.i.i.us.us to i32* - store i32 %78, i32* %79, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us - -if.end.i.i.us.us: ; preds = %if.then.i.i.us.us, %pregion_for_entry.entry.i.i.us.us - %80 = or i64 %_local_id_x.i.0.us.us, 1 - %add1.i.i.i.us.us.1267 = add nuw nsw i64 %80, %mul.i.i.i - %conv2.i.i.us.us.1268 = trunc i64 %add1.i.i.i.us.us.1267 to i32 - %cmp7.i.i.us.us.1269 = icmp sgt i32 %conv2.i.i.us.us.1268, 0 - %cmp11.i.i.us.us.1270 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.1268 - %or.cond28.i.i.us.us.1271 = and i1 %cmp11.i.i.us.us.1270, %cmp7.i.i.us.us.1269 - br i1 %or.cond28.i.i.us.us.1271, label %if.then.i.i.us.us.1277, label %if.end.i.i.us.us.1278 - -pregion_for_end.i.i.loopexit: ; preds = %if.end.i.i.us.us.3304 - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i.i - %81 = trunc i64 %mul3.i.i.i to i32 - %conv.i.i.1 = or i32 %81, 1 - %cmp.i.i.1 = icmp sgt i32 %conv.i.i.1, 0 - %mul.i.i.1 = mul nsw i32 %13, %conv.i.i.1 - %cmp4.i.i.1 = icmp sgt i32 %sub.i.i, %conv.i.i.1 - %or.cond4 = and i1 %cmp.i.i.1, %cmp4.i.i.1 - br i1 %or.cond4, label %vector.scevcheck27, label %pregion_for_end.i.i.1 - -vector.scevcheck27: ; preds = %pregion_for_end.i.i - %82 = mul i32 %13, %conv.i.i.1 - %83 = trunc i64 %2 to i32 - %84 = shl i32 %83, 5 - %85 = add i32 %82, %84 - %86 = icmp sgt i32 %85, 2147483616 - br i1 %86, label %pregion_for_entry.entry.i.i.us.us.1.preheader, label %vector.memcheck41 - -pregion_for_entry.entry.i.i.us.us.1.preheader: ; preds = %vector.memcheck41, %vector.scevcheck27 - br label %pregion_for_entry.entry.i.i.us.us.1 - -vector.memcheck41: ; preds = %vector.scevcheck27 - %87 = mul i32 %13, %conv.i.i.1 - %88 = trunc i64 %2 to i32 - %89 = shl i32 %88, 5 - %90 = add i32 %87, %89 - %91 = sext i32 %90 to i64 - %scevgep29 = getelementptr float, float* %6, i64 %91 - %92 = add nsw i64 %91, 32 - %scevgep31 = getelementptr float, float* %6, i64 %92 - %scevgep33 = getelementptr float, float* %9, i64 %91 - %scevgep35 = getelementptr float, float* %9, i64 %92 - %bound037 = icmp ult float* %scevgep29, %scevgep35 - %bound138 = icmp ult float* %scevgep33, %scevgep31 - %found.conflict39 = and i1 %bound037, %bound138 - br i1 %found.conflict39, label %pregion_for_entry.entry.i.i.us.us.1.preheader, label %vector.ph42 - -vector.ph42: ; preds = %vector.memcheck41 - %broadcast.splatinsert49 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat50 = shufflevector <8 x i64> %broadcast.splatinsert49, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert51 = insertelement <8 x i32> undef, i32 %sub.i.i, i32 0 - %broadcast.splat52 = shufflevector <8 x i32> %broadcast.splatinsert51, <8 x i32> undef, <8 x i32> zeroinitializer - %93 = trunc <8 x i64> %broadcast.splat50 to <8 x i32> - %94 = or <8 x i32> %93, - %95 = icmp sgt <8 x i32> %94, zeroinitializer - %96 = icmp sgt <8 x i32> %broadcast.splat52, %94 - %97 = and <8 x i1> %96, %95 - %98 = extractelement <8 x i32> %94, i32 0 - %99 = add nsw i32 %mul.i.i.1, %98 - %100 = sext i32 %99 to i64 - %101 = getelementptr inbounds float, float* %9, i64 %100 - %102 = bitcast float* %101 to <8 x i32>* - %wide.masked.load53 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %102, i32 4, <8 x i1> %97, <8 x i32> undef), !tbaa !12, !alias.scope !122 - %103 = getelementptr inbounds float, float* %6, i64 %100 - %104 = bitcast float* %103 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load53, <8 x i32>* %104, i32 4, <8 x i1> %97), !tbaa !12, !alias.scope !125, !noalias !122, !llvm.access.group !21 - %105 = trunc <8 x i64> %broadcast.splat50 to <8 x i32> - %106 = or <8 x i32> %105, - %107 = icmp sgt <8 x i32> %106, zeroinitializer - %108 = icmp sgt <8 x i32> %broadcast.splat52, %106 - %109 = and <8 x i1> %108, %107 - %110 = extractelement <8 x i32> %106, i32 0 - %111 = add nsw i32 %mul.i.i.1, %110 - %112 = sext i32 %111 to i64 - %113 = getelementptr inbounds float, float* %9, i64 %112 - %114 = bitcast float* %113 to <8 x i32>* - %wide.masked.load53.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %114, i32 4, <8 x i1> %109, <8 x i32> undef), !tbaa !12, !alias.scope !122 - %115 = getelementptr inbounds float, float* %6, i64 %112 - %116 = bitcast float* %115 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load53.1, <8 x i32>* %116, i32 4, <8 x i1> %109), !tbaa !12, !alias.scope !125, !noalias !122, !llvm.access.group !21 - %117 = trunc <8 x i64> %broadcast.splat50 to <8 x i32> - %118 = or <8 x i32> %117, - %119 = icmp sgt <8 x i32> %118, zeroinitializer - %120 = icmp sgt <8 x i32> %broadcast.splat52, %118 - %121 = and <8 x i1> %120, %119 - %122 = extractelement <8 x i32> %118, i32 0 - %123 = add nsw i32 %mul.i.i.1, %122 - %124 = sext i32 %123 to i64 - %125 = getelementptr inbounds float, float* %9, i64 %124 - %126 = bitcast float* %125 to <8 x i32>* - %wide.masked.load53.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %126, i32 4, <8 x i1> %121, <8 x i32> undef), !tbaa !12, !alias.scope !122 - %127 = getelementptr inbounds float, float* %6, i64 %124 - %128 = bitcast float* %127 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load53.2, <8 x i32>* %128, i32 4, <8 x i1> %121), !tbaa !12, !alias.scope !125, !noalias !122, !llvm.access.group !21 - %129 = trunc <8 x i64> %broadcast.splat50 to <8 x i32> - %130 = or <8 x i32> %129, - %131 = icmp sgt <8 x i32> %130, zeroinitializer - %132 = icmp sgt <8 x i32> %broadcast.splat52, %130 - %133 = and <8 x i1> %132, %131 - %134 = extractelement <8 x i32> %130, i32 0 - %135 = add nsw i32 %mul.i.i.1, %134 - %136 = sext i32 %135 to i64 - %137 = getelementptr inbounds float, float* %9, i64 %136 - %138 = bitcast float* %137 to <8 x i32>* - %wide.masked.load53.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %138, i32 4, <8 x i1> %133, <8 x i32> undef), !tbaa !12, !alias.scope !122 - %139 = getelementptr inbounds float, float* %6, i64 %136 - %140 = bitcast float* %139 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load53.3, <8 x i32>* %140, i32 4, <8 x i1> %133), !tbaa !12, !alias.scope !125, !noalias !122, !llvm.access.group !21 - br label %pregion_for_end.i.i.1 - -pregion_for_entry.entry.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.3, %pregion_for_entry.entry.i.i.us.us.1.preheader - %_local_id_x.i.0.us.us.1 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.us.1.preheader ], [ %612, %if.end.i.i.us.us.1.3 ] - %add1.i.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, %mul.i.i.i - %conv2.i.i.us.us.1 = trunc i64 %add1.i.i.i.us.us.1 to i32 - %cmp7.i.i.us.us.1 = icmp sgt i32 %conv2.i.i.us.us.1, 0 - %cmp11.i.i.us.us.1 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.1 - %or.cond28.i.i.us.us.1 = and i1 %cmp11.i.i.us.us.1, %cmp7.i.i.us.us.1 - br i1 %or.cond28.i.i.us.us.1, label %if.then.i.i.us.us.1, label %if.end.i.i.us.us.1 - -if.then.i.i.us.us.1: ; preds = %pregion_for_entry.entry.i.i.us.us.1 - %add.i.i.us.us.1 = add nsw i32 %mul.i.i.1, %conv2.i.i.us.us.1 - %idxprom.i.i.us.us.1 = sext i32 %add.i.i.us.us.1 to i64 - %arrayidx.i.i.us.us.1 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.1 - %141 = bitcast float* %arrayidx.i.i.us.us.1 to i32* - %142 = load i32, i32* %141, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.1 - %143 = bitcast float* %arrayidx16.i.i.us.us.1 to i32* - store i32 %142, i32* %143, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.1 - -if.end.i.i.us.us.1: ; preds = %if.then.i.i.us.us.1, %pregion_for_entry.entry.i.i.us.us.1 - %144 = or i64 %_local_id_x.i.0.us.us.1, 1 - %add1.i.i.i.us.us.1.1 = add nuw nsw i64 %144, %mul.i.i.i - %conv2.i.i.us.us.1.1 = trunc i64 %add1.i.i.i.us.us.1.1 to i32 - %cmp7.i.i.us.us.1.1 = icmp sgt i32 %conv2.i.i.us.us.1.1, 0 - %cmp11.i.i.us.us.1.1 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.1.1 - %or.cond28.i.i.us.us.1.1 = and i1 %cmp11.i.i.us.us.1.1, %cmp7.i.i.us.us.1.1 - br i1 %or.cond28.i.i.us.us.1.1, label %if.then.i.i.us.us.1.1, label %if.end.i.i.us.us.1.1 - -pregion_for_end.i.i.1.loopexit: ; preds = %if.end.i.i.us.us.1.3 - br label %pregion_for_end.i.i.1 - -pregion_for_end.i.i.1: ; preds = %pregion_for_end.i.i.1.loopexit, %vector.ph42, %pregion_for_end.i.i - %145 = trunc i64 %mul3.i.i.i to i32 - %conv.i.i.2 = or i32 %145, 2 - %cmp.i.i.2 = icmp sgt i32 %conv.i.i.2, 0 - %mul.i.i.2 = mul nsw i32 %13, %conv.i.i.2 - %cmp4.i.i.2 = icmp sgt i32 %sub.i.i, %conv.i.i.2 - %or.cond5 = and i1 %cmp.i.i.2, %cmp4.i.i.2 - br i1 %or.cond5, label %vector.scevcheck61, label %pregion_for_end.i.i.2 - -vector.scevcheck61: ; preds = %pregion_for_end.i.i.1 - %146 = mul i32 %13, %conv.i.i.2 - %147 = trunc i64 %2 to i32 - %148 = shl i32 %147, 5 - %149 = add i32 %146, %148 - %150 = icmp sgt i32 %149, 2147483616 - br i1 %150, label %pregion_for_entry.entry.i.i.us.us.2.preheader, label %vector.memcheck75 - -pregion_for_entry.entry.i.i.us.us.2.preheader: ; preds = %vector.memcheck75, %vector.scevcheck61 - br label %pregion_for_entry.entry.i.i.us.us.2 - -vector.memcheck75: ; preds = %vector.scevcheck61 - %151 = mul i32 %13, %conv.i.i.2 - %152 = trunc i64 %2 to i32 - %153 = shl i32 %152, 5 - %154 = add i32 %151, %153 - %155 = sext i32 %154 to i64 - %scevgep63 = getelementptr float, float* %6, i64 %155 - %156 = add nsw i64 %155, 32 - %scevgep65 = getelementptr float, float* %6, i64 %156 - %scevgep67 = getelementptr float, float* %9, i64 %155 - %scevgep69 = getelementptr float, float* %9, i64 %156 - %bound071 = icmp ult float* %scevgep63, %scevgep69 - %bound172 = icmp ult float* %scevgep67, %scevgep65 - %found.conflict73 = and i1 %bound071, %bound172 - br i1 %found.conflict73, label %pregion_for_entry.entry.i.i.us.us.2.preheader, label %vector.ph76 - -vector.ph76: ; preds = %vector.memcheck75 - %broadcast.splatinsert83 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat84 = shufflevector <8 x i64> %broadcast.splatinsert83, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert85 = insertelement <8 x i32> undef, i32 %sub.i.i, i32 0 - %broadcast.splat86 = shufflevector <8 x i32> %broadcast.splatinsert85, <8 x i32> undef, <8 x i32> zeroinitializer - %157 = trunc <8 x i64> %broadcast.splat84 to <8 x i32> - %158 = or <8 x i32> %157, - %159 = icmp sgt <8 x i32> %158, zeroinitializer - %160 = icmp sgt <8 x i32> %broadcast.splat86, %158 - %161 = and <8 x i1> %160, %159 - %162 = extractelement <8 x i32> %158, i32 0 - %163 = add nsw i32 %mul.i.i.2, %162 - %164 = sext i32 %163 to i64 - %165 = getelementptr inbounds float, float* %9, i64 %164 - %166 = bitcast float* %165 to <8 x i32>* - %wide.masked.load87 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %166, i32 4, <8 x i1> %161, <8 x i32> undef), !tbaa !12, !alias.scope !127 - %167 = getelementptr inbounds float, float* %6, i64 %164 - %168 = bitcast float* %167 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load87, <8 x i32>* %168, i32 4, <8 x i1> %161), !tbaa !12, !alias.scope !130, !noalias !127, !llvm.access.group !21 - %169 = trunc <8 x i64> %broadcast.splat84 to <8 x i32> - %170 = or <8 x i32> %169, - %171 = icmp sgt <8 x i32> %170, zeroinitializer - %172 = icmp sgt <8 x i32> %broadcast.splat86, %170 - %173 = and <8 x i1> %172, %171 - %174 = extractelement <8 x i32> %170, i32 0 - %175 = add nsw i32 %mul.i.i.2, %174 - %176 = sext i32 %175 to i64 - %177 = getelementptr inbounds float, float* %9, i64 %176 - %178 = bitcast float* %177 to <8 x i32>* - %wide.masked.load87.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %178, i32 4, <8 x i1> %173, <8 x i32> undef), !tbaa !12, !alias.scope !127 - %179 = getelementptr inbounds float, float* %6, i64 %176 - %180 = bitcast float* %179 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load87.1, <8 x i32>* %180, i32 4, <8 x i1> %173), !tbaa !12, !alias.scope !130, !noalias !127, !llvm.access.group !21 - %181 = trunc <8 x i64> %broadcast.splat84 to <8 x i32> - %182 = or <8 x i32> %181, - %183 = icmp sgt <8 x i32> %182, zeroinitializer - %184 = icmp sgt <8 x i32> %broadcast.splat86, %182 - %185 = and <8 x i1> %184, %183 - %186 = extractelement <8 x i32> %182, i32 0 - %187 = add nsw i32 %mul.i.i.2, %186 - %188 = sext i32 %187 to i64 - %189 = getelementptr inbounds float, float* %9, i64 %188 - %190 = bitcast float* %189 to <8 x i32>* - %wide.masked.load87.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %190, i32 4, <8 x i1> %185, <8 x i32> undef), !tbaa !12, !alias.scope !127 - %191 = getelementptr inbounds float, float* %6, i64 %188 - %192 = bitcast float* %191 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load87.2, <8 x i32>* %192, i32 4, <8 x i1> %185), !tbaa !12, !alias.scope !130, !noalias !127, !llvm.access.group !21 - %193 = trunc <8 x i64> %broadcast.splat84 to <8 x i32> - %194 = or <8 x i32> %193, - %195 = icmp sgt <8 x i32> %194, zeroinitializer - %196 = icmp sgt <8 x i32> %broadcast.splat86, %194 - %197 = and <8 x i1> %196, %195 - %198 = extractelement <8 x i32> %194, i32 0 - %199 = add nsw i32 %mul.i.i.2, %198 - %200 = sext i32 %199 to i64 - %201 = getelementptr inbounds float, float* %9, i64 %200 - %202 = bitcast float* %201 to <8 x i32>* - %wide.masked.load87.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %202, i32 4, <8 x i1> %197, <8 x i32> undef), !tbaa !12, !alias.scope !127 - %203 = getelementptr inbounds float, float* %6, i64 %200 - %204 = bitcast float* %203 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load87.3, <8 x i32>* %204, i32 4, <8 x i1> %197), !tbaa !12, !alias.scope !130, !noalias !127, !llvm.access.group !21 - br label %pregion_for_end.i.i.2 - -pregion_for_entry.entry.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.3, %pregion_for_entry.entry.i.i.us.us.2.preheader - %_local_id_x.i.0.us.us.2 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.us.2.preheader ], [ %600, %if.end.i.i.us.us.2.3 ] - %add1.i.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, %mul.i.i.i - %conv2.i.i.us.us.2 = trunc i64 %add1.i.i.i.us.us.2 to i32 - %cmp7.i.i.us.us.2 = icmp sgt i32 %conv2.i.i.us.us.2, 0 - %cmp11.i.i.us.us.2 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.2 - %or.cond28.i.i.us.us.2 = and i1 %cmp11.i.i.us.us.2, %cmp7.i.i.us.us.2 - br i1 %or.cond28.i.i.us.us.2, label %if.then.i.i.us.us.2, label %if.end.i.i.us.us.2 - -if.then.i.i.us.us.2: ; preds = %pregion_for_entry.entry.i.i.us.us.2 - %add.i.i.us.us.2 = add nsw i32 %mul.i.i.2, %conv2.i.i.us.us.2 - %idxprom.i.i.us.us.2 = sext i32 %add.i.i.us.us.2 to i64 - %arrayidx.i.i.us.us.2 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.2 - %205 = bitcast float* %arrayidx.i.i.us.us.2 to i32* - %206 = load i32, i32* %205, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.2 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.2 - %207 = bitcast float* %arrayidx16.i.i.us.us.2 to i32* - store i32 %206, i32* %207, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.2 - -if.end.i.i.us.us.2: ; preds = %if.then.i.i.us.us.2, %pregion_for_entry.entry.i.i.us.us.2 - %208 = or i64 %_local_id_x.i.0.us.us.2, 1 - %add1.i.i.i.us.us.2.1 = add nuw nsw i64 %208, %mul.i.i.i - %conv2.i.i.us.us.2.1 = trunc i64 %add1.i.i.i.us.us.2.1 to i32 - %cmp7.i.i.us.us.2.1 = icmp sgt i32 %conv2.i.i.us.us.2.1, 0 - %cmp11.i.i.us.us.2.1 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.2.1 - %or.cond28.i.i.us.us.2.1 = and i1 %cmp11.i.i.us.us.2.1, %cmp7.i.i.us.us.2.1 - br i1 %or.cond28.i.i.us.us.2.1, label %if.then.i.i.us.us.2.1, label %if.end.i.i.us.us.2.1 - -pregion_for_end.i.i.2.loopexit: ; preds = %if.end.i.i.us.us.2.3 - br label %pregion_for_end.i.i.2 - -pregion_for_end.i.i.2: ; preds = %pregion_for_end.i.i.2.loopexit, %vector.ph76, %pregion_for_end.i.i.1 - %209 = trunc i64 %mul3.i.i.i to i32 - %conv.i.i.3 = or i32 %209, 3 - %cmp.i.i.3 = icmp sgt i32 %conv.i.i.3, 0 - %mul.i.i.3 = mul nsw i32 %13, %conv.i.i.3 - %cmp4.i.i.3 = icmp sgt i32 %sub.i.i, %conv.i.i.3 - %or.cond6 = and i1 %cmp.i.i.3, %cmp4.i.i.3 - br i1 %or.cond6, label %vector.scevcheck95, label %pregion_for_end.i.i.3 - -vector.scevcheck95: ; preds = %pregion_for_end.i.i.2 - %210 = mul i32 %13, %conv.i.i.3 - %211 = trunc i64 %2 to i32 - %212 = shl i32 %211, 5 - %213 = add i32 %210, %212 - %214 = icmp sgt i32 %213, 2147483616 - br i1 %214, label %pregion_for_entry.entry.i.i.us.us.3.preheader, label %vector.memcheck109 - -pregion_for_entry.entry.i.i.us.us.3.preheader: ; preds = %vector.memcheck109, %vector.scevcheck95 - br label %pregion_for_entry.entry.i.i.us.us.3 - -vector.memcheck109: ; preds = %vector.scevcheck95 - %215 = mul i32 %13, %conv.i.i.3 - %216 = trunc i64 %2 to i32 - %217 = shl i32 %216, 5 - %218 = add i32 %215, %217 - %219 = sext i32 %218 to i64 - %scevgep97 = getelementptr float, float* %6, i64 %219 - %220 = add nsw i64 %219, 32 - %scevgep99 = getelementptr float, float* %6, i64 %220 - %scevgep101 = getelementptr float, float* %9, i64 %219 - %scevgep103 = getelementptr float, float* %9, i64 %220 - %bound0105 = icmp ult float* %scevgep97, %scevgep103 - %bound1106 = icmp ult float* %scevgep101, %scevgep99 - %found.conflict107 = and i1 %bound0105, %bound1106 - br i1 %found.conflict107, label %pregion_for_entry.entry.i.i.us.us.3.preheader, label %vector.ph110 - -vector.ph110: ; preds = %vector.memcheck109 - %broadcast.splatinsert117 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat118 = shufflevector <8 x i64> %broadcast.splatinsert117, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert119 = insertelement <8 x i32> undef, i32 %sub.i.i, i32 0 - %broadcast.splat120 = shufflevector <8 x i32> %broadcast.splatinsert119, <8 x i32> undef, <8 x i32> zeroinitializer - %221 = trunc <8 x i64> %broadcast.splat118 to <8 x i32> - %222 = or <8 x i32> %221, - %223 = icmp sgt <8 x i32> %222, zeroinitializer - %224 = icmp sgt <8 x i32> %broadcast.splat120, %222 - %225 = and <8 x i1> %224, %223 - %226 = extractelement <8 x i32> %222, i32 0 - %227 = add nsw i32 %mul.i.i.3, %226 - %228 = sext i32 %227 to i64 - %229 = getelementptr inbounds float, float* %9, i64 %228 - %230 = bitcast float* %229 to <8 x i32>* - %wide.masked.load121 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %230, i32 4, <8 x i1> %225, <8 x i32> undef), !tbaa !12, !alias.scope !132 - %231 = getelementptr inbounds float, float* %6, i64 %228 - %232 = bitcast float* %231 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load121, <8 x i32>* %232, i32 4, <8 x i1> %225), !tbaa !12, !alias.scope !135, !noalias !132, !llvm.access.group !21 - %233 = trunc <8 x i64> %broadcast.splat118 to <8 x i32> - %234 = or <8 x i32> %233, - %235 = icmp sgt <8 x i32> %234, zeroinitializer - %236 = icmp sgt <8 x i32> %broadcast.splat120, %234 - %237 = and <8 x i1> %236, %235 - %238 = extractelement <8 x i32> %234, i32 0 - %239 = add nsw i32 %mul.i.i.3, %238 - %240 = sext i32 %239 to i64 - %241 = getelementptr inbounds float, float* %9, i64 %240 - %242 = bitcast float* %241 to <8 x i32>* - %wide.masked.load121.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %242, i32 4, <8 x i1> %237, <8 x i32> undef), !tbaa !12, !alias.scope !132 - %243 = getelementptr inbounds float, float* %6, i64 %240 - %244 = bitcast float* %243 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load121.1, <8 x i32>* %244, i32 4, <8 x i1> %237), !tbaa !12, !alias.scope !135, !noalias !132, !llvm.access.group !21 - %245 = trunc <8 x i64> %broadcast.splat118 to <8 x i32> - %246 = or <8 x i32> %245, - %247 = icmp sgt <8 x i32> %246, zeroinitializer - %248 = icmp sgt <8 x i32> %broadcast.splat120, %246 - %249 = and <8 x i1> %248, %247 - %250 = extractelement <8 x i32> %246, i32 0 - %251 = add nsw i32 %mul.i.i.3, %250 - %252 = sext i32 %251 to i64 - %253 = getelementptr inbounds float, float* %9, i64 %252 - %254 = bitcast float* %253 to <8 x i32>* - %wide.masked.load121.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %254, i32 4, <8 x i1> %249, <8 x i32> undef), !tbaa !12, !alias.scope !132 - %255 = getelementptr inbounds float, float* %6, i64 %252 - %256 = bitcast float* %255 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load121.2, <8 x i32>* %256, i32 4, <8 x i1> %249), !tbaa !12, !alias.scope !135, !noalias !132, !llvm.access.group !21 - %257 = trunc <8 x i64> %broadcast.splat118 to <8 x i32> - %258 = or <8 x i32> %257, - %259 = icmp sgt <8 x i32> %258, zeroinitializer - %260 = icmp sgt <8 x i32> %broadcast.splat120, %258 - %261 = and <8 x i1> %260, %259 - %262 = extractelement <8 x i32> %258, i32 0 - %263 = add nsw i32 %mul.i.i.3, %262 - %264 = sext i32 %263 to i64 - %265 = getelementptr inbounds float, float* %9, i64 %264 - %266 = bitcast float* %265 to <8 x i32>* - %wide.masked.load121.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %266, i32 4, <8 x i1> %261, <8 x i32> undef), !tbaa !12, !alias.scope !132 - %267 = getelementptr inbounds float, float* %6, i64 %264 - %268 = bitcast float* %267 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load121.3, <8 x i32>* %268, i32 4, <8 x i1> %261), !tbaa !12, !alias.scope !135, !noalias !132, !llvm.access.group !21 - br label %pregion_for_end.i.i.3 - -pregion_for_entry.entry.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.3, %pregion_for_entry.entry.i.i.us.us.3.preheader - %_local_id_x.i.0.us.us.3 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.us.3.preheader ], [ %588, %if.end.i.i.us.us.3.3 ] - %add1.i.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, %mul.i.i.i - %conv2.i.i.us.us.3 = trunc i64 %add1.i.i.i.us.us.3 to i32 - %cmp7.i.i.us.us.3 = icmp sgt i32 %conv2.i.i.us.us.3, 0 - %cmp11.i.i.us.us.3 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.3 - %or.cond28.i.i.us.us.3 = and i1 %cmp11.i.i.us.us.3, %cmp7.i.i.us.us.3 - br i1 %or.cond28.i.i.us.us.3, label %if.then.i.i.us.us.3, label %if.end.i.i.us.us.3 - -if.then.i.i.us.us.3: ; preds = %pregion_for_entry.entry.i.i.us.us.3 - %add.i.i.us.us.3 = add nsw i32 %mul.i.i.3, %conv2.i.i.us.us.3 - %idxprom.i.i.us.us.3 = sext i32 %add.i.i.us.us.3 to i64 - %arrayidx.i.i.us.us.3 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.3 - %269 = bitcast float* %arrayidx.i.i.us.us.3 to i32* - %270 = load i32, i32* %269, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.3 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.3 - %271 = bitcast float* %arrayidx16.i.i.us.us.3 to i32* - store i32 %270, i32* %271, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.3 - -if.end.i.i.us.us.3: ; preds = %if.then.i.i.us.us.3, %pregion_for_entry.entry.i.i.us.us.3 - %272 = or i64 %_local_id_x.i.0.us.us.3, 1 - %add1.i.i.i.us.us.3.1 = add nuw nsw i64 %272, %mul.i.i.i - %conv2.i.i.us.us.3.1 = trunc i64 %add1.i.i.i.us.us.3.1 to i32 - %cmp7.i.i.us.us.3.1 = icmp sgt i32 %conv2.i.i.us.us.3.1, 0 - %cmp11.i.i.us.us.3.1 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.3.1 - %or.cond28.i.i.us.us.3.1 = and i1 %cmp11.i.i.us.us.3.1, %cmp7.i.i.us.us.3.1 - br i1 %or.cond28.i.i.us.us.3.1, label %if.then.i.i.us.us.3.1, label %if.end.i.i.us.us.3.1 - -pregion_for_end.i.i.3.loopexit: ; preds = %if.end.i.i.us.us.3.3 - br label %pregion_for_end.i.i.3 - -pregion_for_end.i.i.3: ; preds = %pregion_for_end.i.i.3.loopexit, %vector.ph110, %pregion_for_end.i.i.2 - %273 = trunc i64 %mul3.i.i.i to i32 - %conv.i.i.4 = or i32 %273, 4 - %cmp.i.i.4 = icmp sgt i32 %conv.i.i.4, 0 - %mul.i.i.4 = mul nsw i32 %13, %conv.i.i.4 - %cmp4.i.i.4 = icmp sgt i32 %sub.i.i, %conv.i.i.4 - %or.cond7 = and i1 %cmp.i.i.4, %cmp4.i.i.4 - br i1 %or.cond7, label %vector.scevcheck129, label %pregion_for_end.i.i.4 - -vector.scevcheck129: ; preds = %pregion_for_end.i.i.3 - %274 = mul i32 %13, %conv.i.i.4 - %275 = trunc i64 %2 to i32 - %276 = shl i32 %275, 5 - %277 = add i32 %274, %276 - %278 = icmp sgt i32 %277, 2147483616 - br i1 %278, label %pregion_for_entry.entry.i.i.us.us.4.preheader, label %vector.memcheck143 - -pregion_for_entry.entry.i.i.us.us.4.preheader: ; preds = %vector.memcheck143, %vector.scevcheck129 - br label %pregion_for_entry.entry.i.i.us.us.4 - -vector.memcheck143: ; preds = %vector.scevcheck129 - %279 = mul i32 %13, %conv.i.i.4 - %280 = trunc i64 %2 to i32 - %281 = shl i32 %280, 5 - %282 = add i32 %279, %281 - %283 = sext i32 %282 to i64 - %scevgep131 = getelementptr float, float* %6, i64 %283 - %284 = add nsw i64 %283, 32 - %scevgep133 = getelementptr float, float* %6, i64 %284 - %scevgep135 = getelementptr float, float* %9, i64 %283 - %scevgep137 = getelementptr float, float* %9, i64 %284 - %bound0139 = icmp ult float* %scevgep131, %scevgep137 - %bound1140 = icmp ult float* %scevgep135, %scevgep133 - %found.conflict141 = and i1 %bound0139, %bound1140 - br i1 %found.conflict141, label %pregion_for_entry.entry.i.i.us.us.4.preheader, label %vector.ph144 - -vector.ph144: ; preds = %vector.memcheck143 - %broadcast.splatinsert151 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat152 = shufflevector <8 x i64> %broadcast.splatinsert151, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert153 = insertelement <8 x i32> undef, i32 %sub.i.i, i32 0 - %broadcast.splat154 = shufflevector <8 x i32> %broadcast.splatinsert153, <8 x i32> undef, <8 x i32> zeroinitializer - %285 = trunc <8 x i64> %broadcast.splat152 to <8 x i32> - %286 = or <8 x i32> %285, - %287 = icmp sgt <8 x i32> %286, zeroinitializer - %288 = icmp sgt <8 x i32> %broadcast.splat154, %286 - %289 = and <8 x i1> %288, %287 - %290 = extractelement <8 x i32> %286, i32 0 - %291 = add nsw i32 %mul.i.i.4, %290 - %292 = sext i32 %291 to i64 - %293 = getelementptr inbounds float, float* %9, i64 %292 - %294 = bitcast float* %293 to <8 x i32>* - %wide.masked.load155 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %294, i32 4, <8 x i1> %289, <8 x i32> undef), !tbaa !12, !alias.scope !137 - %295 = getelementptr inbounds float, float* %6, i64 %292 - %296 = bitcast float* %295 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load155, <8 x i32>* %296, i32 4, <8 x i1> %289), !tbaa !12, !alias.scope !140, !noalias !137, !llvm.access.group !21 - %297 = trunc <8 x i64> %broadcast.splat152 to <8 x i32> - %298 = or <8 x i32> %297, - %299 = icmp sgt <8 x i32> %298, zeroinitializer - %300 = icmp sgt <8 x i32> %broadcast.splat154, %298 - %301 = and <8 x i1> %300, %299 - %302 = extractelement <8 x i32> %298, i32 0 - %303 = add nsw i32 %mul.i.i.4, %302 - %304 = sext i32 %303 to i64 - %305 = getelementptr inbounds float, float* %9, i64 %304 - %306 = bitcast float* %305 to <8 x i32>* - %wide.masked.load155.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %306, i32 4, <8 x i1> %301, <8 x i32> undef), !tbaa !12, !alias.scope !137 - %307 = getelementptr inbounds float, float* %6, i64 %304 - %308 = bitcast float* %307 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load155.1, <8 x i32>* %308, i32 4, <8 x i1> %301), !tbaa !12, !alias.scope !140, !noalias !137, !llvm.access.group !21 - %309 = trunc <8 x i64> %broadcast.splat152 to <8 x i32> - %310 = or <8 x i32> %309, - %311 = icmp sgt <8 x i32> %310, zeroinitializer - %312 = icmp sgt <8 x i32> %broadcast.splat154, %310 - %313 = and <8 x i1> %312, %311 - %314 = extractelement <8 x i32> %310, i32 0 - %315 = add nsw i32 %mul.i.i.4, %314 - %316 = sext i32 %315 to i64 - %317 = getelementptr inbounds float, float* %9, i64 %316 - %318 = bitcast float* %317 to <8 x i32>* - %wide.masked.load155.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %318, i32 4, <8 x i1> %313, <8 x i32> undef), !tbaa !12, !alias.scope !137 - %319 = getelementptr inbounds float, float* %6, i64 %316 - %320 = bitcast float* %319 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load155.2, <8 x i32>* %320, i32 4, <8 x i1> %313), !tbaa !12, !alias.scope !140, !noalias !137, !llvm.access.group !21 - %321 = trunc <8 x i64> %broadcast.splat152 to <8 x i32> - %322 = or <8 x i32> %321, - %323 = icmp sgt <8 x i32> %322, zeroinitializer - %324 = icmp sgt <8 x i32> %broadcast.splat154, %322 - %325 = and <8 x i1> %324, %323 - %326 = extractelement <8 x i32> %322, i32 0 - %327 = add nsw i32 %mul.i.i.4, %326 - %328 = sext i32 %327 to i64 - %329 = getelementptr inbounds float, float* %9, i64 %328 - %330 = bitcast float* %329 to <8 x i32>* - %wide.masked.load155.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %330, i32 4, <8 x i1> %325, <8 x i32> undef), !tbaa !12, !alias.scope !137 - %331 = getelementptr inbounds float, float* %6, i64 %328 - %332 = bitcast float* %331 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load155.3, <8 x i32>* %332, i32 4, <8 x i1> %325), !tbaa !12, !alias.scope !140, !noalias !137, !llvm.access.group !21 - br label %pregion_for_end.i.i.4 - -pregion_for_entry.entry.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.3, %pregion_for_entry.entry.i.i.us.us.4.preheader - %_local_id_x.i.0.us.us.4 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.us.4.preheader ], [ %576, %if.end.i.i.us.us.4.3 ] - %add1.i.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, %mul.i.i.i - %conv2.i.i.us.us.4 = trunc i64 %add1.i.i.i.us.us.4 to i32 - %cmp7.i.i.us.us.4 = icmp sgt i32 %conv2.i.i.us.us.4, 0 - %cmp11.i.i.us.us.4 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.4 - %or.cond28.i.i.us.us.4 = and i1 %cmp11.i.i.us.us.4, %cmp7.i.i.us.us.4 - br i1 %or.cond28.i.i.us.us.4, label %if.then.i.i.us.us.4, label %if.end.i.i.us.us.4 - -if.then.i.i.us.us.4: ; preds = %pregion_for_entry.entry.i.i.us.us.4 - %add.i.i.us.us.4 = add nsw i32 %mul.i.i.4, %conv2.i.i.us.us.4 - %idxprom.i.i.us.us.4 = sext i32 %add.i.i.us.us.4 to i64 - %arrayidx.i.i.us.us.4 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.4 - %333 = bitcast float* %arrayidx.i.i.us.us.4 to i32* - %334 = load i32, i32* %333, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.4 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.4 - %335 = bitcast float* %arrayidx16.i.i.us.us.4 to i32* - store i32 %334, i32* %335, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.4 - -if.end.i.i.us.us.4: ; preds = %if.then.i.i.us.us.4, %pregion_for_entry.entry.i.i.us.us.4 - %336 = or i64 %_local_id_x.i.0.us.us.4, 1 - %add1.i.i.i.us.us.4.1 = add nuw nsw i64 %336, %mul.i.i.i - %conv2.i.i.us.us.4.1 = trunc i64 %add1.i.i.i.us.us.4.1 to i32 - %cmp7.i.i.us.us.4.1 = icmp sgt i32 %conv2.i.i.us.us.4.1, 0 - %cmp11.i.i.us.us.4.1 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.4.1 - %or.cond28.i.i.us.us.4.1 = and i1 %cmp11.i.i.us.us.4.1, %cmp7.i.i.us.us.4.1 - br i1 %or.cond28.i.i.us.us.4.1, label %if.then.i.i.us.us.4.1, label %if.end.i.i.us.us.4.1 - -pregion_for_end.i.i.4.loopexit: ; preds = %if.end.i.i.us.us.4.3 - br label %pregion_for_end.i.i.4 - -pregion_for_end.i.i.4: ; preds = %pregion_for_end.i.i.4.loopexit, %vector.ph144, %pregion_for_end.i.i.3 - %337 = trunc i64 %mul3.i.i.i to i32 - %conv.i.i.5 = or i32 %337, 5 - %cmp.i.i.5 = icmp sgt i32 %conv.i.i.5, 0 - %mul.i.i.5 = mul nsw i32 %13, %conv.i.i.5 - %cmp4.i.i.5 = icmp sgt i32 %sub.i.i, %conv.i.i.5 - %or.cond8 = and i1 %cmp.i.i.5, %cmp4.i.i.5 - br i1 %or.cond8, label %vector.scevcheck163, label %pregion_for_end.i.i.5 - -vector.scevcheck163: ; preds = %pregion_for_end.i.i.4 - %338 = mul i32 %13, %conv.i.i.5 - %339 = trunc i64 %2 to i32 - %340 = shl i32 %339, 5 - %341 = add i32 %338, %340 - %342 = icmp sgt i32 %341, 2147483616 - br i1 %342, label %pregion_for_entry.entry.i.i.us.us.5.preheader, label %vector.memcheck177 - -pregion_for_entry.entry.i.i.us.us.5.preheader: ; preds = %vector.memcheck177, %vector.scevcheck163 - br label %pregion_for_entry.entry.i.i.us.us.5 - -vector.memcheck177: ; preds = %vector.scevcheck163 - %343 = mul i32 %13, %conv.i.i.5 - %344 = trunc i64 %2 to i32 - %345 = shl i32 %344, 5 - %346 = add i32 %343, %345 - %347 = sext i32 %346 to i64 - %scevgep165 = getelementptr float, float* %6, i64 %347 - %348 = add nsw i64 %347, 32 - %scevgep167 = getelementptr float, float* %6, i64 %348 - %scevgep169 = getelementptr float, float* %9, i64 %347 - %scevgep171 = getelementptr float, float* %9, i64 %348 - %bound0173 = icmp ult float* %scevgep165, %scevgep171 - %bound1174 = icmp ult float* %scevgep169, %scevgep167 - %found.conflict175 = and i1 %bound0173, %bound1174 - br i1 %found.conflict175, label %pregion_for_entry.entry.i.i.us.us.5.preheader, label %vector.ph178 - -vector.ph178: ; preds = %vector.memcheck177 - %broadcast.splatinsert185 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat186 = shufflevector <8 x i64> %broadcast.splatinsert185, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert187 = insertelement <8 x i32> undef, i32 %sub.i.i, i32 0 - %broadcast.splat188 = shufflevector <8 x i32> %broadcast.splatinsert187, <8 x i32> undef, <8 x i32> zeroinitializer - %349 = trunc <8 x i64> %broadcast.splat186 to <8 x i32> - %350 = or <8 x i32> %349, - %351 = icmp sgt <8 x i32> %350, zeroinitializer - %352 = icmp sgt <8 x i32> %broadcast.splat188, %350 - %353 = and <8 x i1> %352, %351 - %354 = extractelement <8 x i32> %350, i32 0 - %355 = add nsw i32 %mul.i.i.5, %354 - %356 = sext i32 %355 to i64 - %357 = getelementptr inbounds float, float* %9, i64 %356 - %358 = bitcast float* %357 to <8 x i32>* - %wide.masked.load189 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %358, i32 4, <8 x i1> %353, <8 x i32> undef), !tbaa !12, !alias.scope !142 - %359 = getelementptr inbounds float, float* %6, i64 %356 - %360 = bitcast float* %359 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load189, <8 x i32>* %360, i32 4, <8 x i1> %353), !tbaa !12, !alias.scope !145, !noalias !142, !llvm.access.group !21 - %361 = trunc <8 x i64> %broadcast.splat186 to <8 x i32> - %362 = or <8 x i32> %361, - %363 = icmp sgt <8 x i32> %362, zeroinitializer - %364 = icmp sgt <8 x i32> %broadcast.splat188, %362 - %365 = and <8 x i1> %364, %363 - %366 = extractelement <8 x i32> %362, i32 0 - %367 = add nsw i32 %mul.i.i.5, %366 - %368 = sext i32 %367 to i64 - %369 = getelementptr inbounds float, float* %9, i64 %368 - %370 = bitcast float* %369 to <8 x i32>* - %wide.masked.load189.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %370, i32 4, <8 x i1> %365, <8 x i32> undef), !tbaa !12, !alias.scope !142 - %371 = getelementptr inbounds float, float* %6, i64 %368 - %372 = bitcast float* %371 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load189.1, <8 x i32>* %372, i32 4, <8 x i1> %365), !tbaa !12, !alias.scope !145, !noalias !142, !llvm.access.group !21 - %373 = trunc <8 x i64> %broadcast.splat186 to <8 x i32> - %374 = or <8 x i32> %373, - %375 = icmp sgt <8 x i32> %374, zeroinitializer - %376 = icmp sgt <8 x i32> %broadcast.splat188, %374 - %377 = and <8 x i1> %376, %375 - %378 = extractelement <8 x i32> %374, i32 0 - %379 = add nsw i32 %mul.i.i.5, %378 - %380 = sext i32 %379 to i64 - %381 = getelementptr inbounds float, float* %9, i64 %380 - %382 = bitcast float* %381 to <8 x i32>* - %wide.masked.load189.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %382, i32 4, <8 x i1> %377, <8 x i32> undef), !tbaa !12, !alias.scope !142 - %383 = getelementptr inbounds float, float* %6, i64 %380 - %384 = bitcast float* %383 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load189.2, <8 x i32>* %384, i32 4, <8 x i1> %377), !tbaa !12, !alias.scope !145, !noalias !142, !llvm.access.group !21 - %385 = trunc <8 x i64> %broadcast.splat186 to <8 x i32> - %386 = or <8 x i32> %385, - %387 = icmp sgt <8 x i32> %386, zeroinitializer - %388 = icmp sgt <8 x i32> %broadcast.splat188, %386 - %389 = and <8 x i1> %388, %387 - %390 = extractelement <8 x i32> %386, i32 0 - %391 = add nsw i32 %mul.i.i.5, %390 - %392 = sext i32 %391 to i64 - %393 = getelementptr inbounds float, float* %9, i64 %392 - %394 = bitcast float* %393 to <8 x i32>* - %wide.masked.load189.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %394, i32 4, <8 x i1> %389, <8 x i32> undef), !tbaa !12, !alias.scope !142 - %395 = getelementptr inbounds float, float* %6, i64 %392 - %396 = bitcast float* %395 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load189.3, <8 x i32>* %396, i32 4, <8 x i1> %389), !tbaa !12, !alias.scope !145, !noalias !142, !llvm.access.group !21 - br label %pregion_for_end.i.i.5 - -pregion_for_entry.entry.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.3, %pregion_for_entry.entry.i.i.us.us.5.preheader - %_local_id_x.i.0.us.us.5 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.us.5.preheader ], [ %564, %if.end.i.i.us.us.5.3 ] - %add1.i.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, %mul.i.i.i - %conv2.i.i.us.us.5 = trunc i64 %add1.i.i.i.us.us.5 to i32 - %cmp7.i.i.us.us.5 = icmp sgt i32 %conv2.i.i.us.us.5, 0 - %cmp11.i.i.us.us.5 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.5 - %or.cond28.i.i.us.us.5 = and i1 %cmp11.i.i.us.us.5, %cmp7.i.i.us.us.5 - br i1 %or.cond28.i.i.us.us.5, label %if.then.i.i.us.us.5, label %if.end.i.i.us.us.5 - -if.then.i.i.us.us.5: ; preds = %pregion_for_entry.entry.i.i.us.us.5 - %add.i.i.us.us.5 = add nsw i32 %mul.i.i.5, %conv2.i.i.us.us.5 - %idxprom.i.i.us.us.5 = sext i32 %add.i.i.us.us.5 to i64 - %arrayidx.i.i.us.us.5 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.5 - %397 = bitcast float* %arrayidx.i.i.us.us.5 to i32* - %398 = load i32, i32* %397, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.5 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.5 - %399 = bitcast float* %arrayidx16.i.i.us.us.5 to i32* - store i32 %398, i32* %399, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.5 - -if.end.i.i.us.us.5: ; preds = %if.then.i.i.us.us.5, %pregion_for_entry.entry.i.i.us.us.5 - %400 = or i64 %_local_id_x.i.0.us.us.5, 1 - %add1.i.i.i.us.us.5.1 = add nuw nsw i64 %400, %mul.i.i.i - %conv2.i.i.us.us.5.1 = trunc i64 %add1.i.i.i.us.us.5.1 to i32 - %cmp7.i.i.us.us.5.1 = icmp sgt i32 %conv2.i.i.us.us.5.1, 0 - %cmp11.i.i.us.us.5.1 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.5.1 - %or.cond28.i.i.us.us.5.1 = and i1 %cmp11.i.i.us.us.5.1, %cmp7.i.i.us.us.5.1 - br i1 %or.cond28.i.i.us.us.5.1, label %if.then.i.i.us.us.5.1, label %if.end.i.i.us.us.5.1 - -pregion_for_end.i.i.5.loopexit: ; preds = %if.end.i.i.us.us.5.3 - br label %pregion_for_end.i.i.5 - -pregion_for_end.i.i.5: ; preds = %pregion_for_end.i.i.5.loopexit, %vector.ph178, %pregion_for_end.i.i.4 - %401 = trunc i64 %mul3.i.i.i to i32 - %conv.i.i.6 = or i32 %401, 6 - %cmp.i.i.6 = icmp sgt i32 %conv.i.i.6, 0 - %mul.i.i.6 = mul nsw i32 %13, %conv.i.i.6 - %cmp4.i.i.6 = icmp sgt i32 %sub.i.i, %conv.i.i.6 - %or.cond9 = and i1 %cmp.i.i.6, %cmp4.i.i.6 - br i1 %or.cond9, label %vector.scevcheck197, label %pregion_for_end.i.i.6 - -vector.scevcheck197: ; preds = %pregion_for_end.i.i.5 - %402 = mul i32 %13, %conv.i.i.6 - %403 = trunc i64 %2 to i32 - %404 = shl i32 %403, 5 - %405 = add i32 %402, %404 - %406 = icmp sgt i32 %405, 2147483616 - br i1 %406, label %pregion_for_entry.entry.i.i.us.us.6.preheader, label %vector.memcheck211 - -pregion_for_entry.entry.i.i.us.us.6.preheader: ; preds = %vector.memcheck211, %vector.scevcheck197 - br label %pregion_for_entry.entry.i.i.us.us.6 - -vector.memcheck211: ; preds = %vector.scevcheck197 - %407 = mul i32 %13, %conv.i.i.6 - %408 = trunc i64 %2 to i32 - %409 = shl i32 %408, 5 - %410 = add i32 %407, %409 - %411 = sext i32 %410 to i64 - %scevgep199 = getelementptr float, float* %6, i64 %411 - %412 = add nsw i64 %411, 32 - %scevgep201 = getelementptr float, float* %6, i64 %412 - %scevgep203 = getelementptr float, float* %9, i64 %411 - %scevgep205 = getelementptr float, float* %9, i64 %412 - %bound0207 = icmp ult float* %scevgep199, %scevgep205 - %bound1208 = icmp ult float* %scevgep203, %scevgep201 - %found.conflict209 = and i1 %bound0207, %bound1208 - br i1 %found.conflict209, label %pregion_for_entry.entry.i.i.us.us.6.preheader, label %vector.ph212 - -vector.ph212: ; preds = %vector.memcheck211 - %broadcast.splatinsert219 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat220 = shufflevector <8 x i64> %broadcast.splatinsert219, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert221 = insertelement <8 x i32> undef, i32 %sub.i.i, i32 0 - %broadcast.splat222 = shufflevector <8 x i32> %broadcast.splatinsert221, <8 x i32> undef, <8 x i32> zeroinitializer - %413 = trunc <8 x i64> %broadcast.splat220 to <8 x i32> - %414 = or <8 x i32> %413, - %415 = icmp sgt <8 x i32> %414, zeroinitializer - %416 = icmp sgt <8 x i32> %broadcast.splat222, %414 - %417 = and <8 x i1> %416, %415 - %418 = extractelement <8 x i32> %414, i32 0 - %419 = add nsw i32 %mul.i.i.6, %418 - %420 = sext i32 %419 to i64 - %421 = getelementptr inbounds float, float* %9, i64 %420 - %422 = bitcast float* %421 to <8 x i32>* - %wide.masked.load223 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %422, i32 4, <8 x i1> %417, <8 x i32> undef), !tbaa !12, !alias.scope !147 - %423 = getelementptr inbounds float, float* %6, i64 %420 - %424 = bitcast float* %423 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load223, <8 x i32>* %424, i32 4, <8 x i1> %417), !tbaa !12, !alias.scope !150, !noalias !147, !llvm.access.group !21 - %425 = trunc <8 x i64> %broadcast.splat220 to <8 x i32> - %426 = or <8 x i32> %425, - %427 = icmp sgt <8 x i32> %426, zeroinitializer - %428 = icmp sgt <8 x i32> %broadcast.splat222, %426 - %429 = and <8 x i1> %428, %427 - %430 = extractelement <8 x i32> %426, i32 0 - %431 = add nsw i32 %mul.i.i.6, %430 - %432 = sext i32 %431 to i64 - %433 = getelementptr inbounds float, float* %9, i64 %432 - %434 = bitcast float* %433 to <8 x i32>* - %wide.masked.load223.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %434, i32 4, <8 x i1> %429, <8 x i32> undef), !tbaa !12, !alias.scope !147 - %435 = getelementptr inbounds float, float* %6, i64 %432 - %436 = bitcast float* %435 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load223.1, <8 x i32>* %436, i32 4, <8 x i1> %429), !tbaa !12, !alias.scope !150, !noalias !147, !llvm.access.group !21 - %437 = trunc <8 x i64> %broadcast.splat220 to <8 x i32> - %438 = or <8 x i32> %437, - %439 = icmp sgt <8 x i32> %438, zeroinitializer - %440 = icmp sgt <8 x i32> %broadcast.splat222, %438 - %441 = and <8 x i1> %440, %439 - %442 = extractelement <8 x i32> %438, i32 0 - %443 = add nsw i32 %mul.i.i.6, %442 - %444 = sext i32 %443 to i64 - %445 = getelementptr inbounds float, float* %9, i64 %444 - %446 = bitcast float* %445 to <8 x i32>* - %wide.masked.load223.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %446, i32 4, <8 x i1> %441, <8 x i32> undef), !tbaa !12, !alias.scope !147 - %447 = getelementptr inbounds float, float* %6, i64 %444 - %448 = bitcast float* %447 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load223.2, <8 x i32>* %448, i32 4, <8 x i1> %441), !tbaa !12, !alias.scope !150, !noalias !147, !llvm.access.group !21 - %449 = trunc <8 x i64> %broadcast.splat220 to <8 x i32> - %450 = or <8 x i32> %449, - %451 = icmp sgt <8 x i32> %450, zeroinitializer - %452 = icmp sgt <8 x i32> %broadcast.splat222, %450 - %453 = and <8 x i1> %452, %451 - %454 = extractelement <8 x i32> %450, i32 0 - %455 = add nsw i32 %mul.i.i.6, %454 - %456 = sext i32 %455 to i64 - %457 = getelementptr inbounds float, float* %9, i64 %456 - %458 = bitcast float* %457 to <8 x i32>* - %wide.masked.load223.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %458, i32 4, <8 x i1> %453, <8 x i32> undef), !tbaa !12, !alias.scope !147 - %459 = getelementptr inbounds float, float* %6, i64 %456 - %460 = bitcast float* %459 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load223.3, <8 x i32>* %460, i32 4, <8 x i1> %453), !tbaa !12, !alias.scope !150, !noalias !147, !llvm.access.group !21 - br label %pregion_for_end.i.i.6 - -pregion_for_entry.entry.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.3, %pregion_for_entry.entry.i.i.us.us.6.preheader - %_local_id_x.i.0.us.us.6 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.us.6.preheader ], [ %552, %if.end.i.i.us.us.6.3 ] - %add1.i.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, %mul.i.i.i - %conv2.i.i.us.us.6 = trunc i64 %add1.i.i.i.us.us.6 to i32 - %cmp7.i.i.us.us.6 = icmp sgt i32 %conv2.i.i.us.us.6, 0 - %cmp11.i.i.us.us.6 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.6 - %or.cond28.i.i.us.us.6 = and i1 %cmp11.i.i.us.us.6, %cmp7.i.i.us.us.6 - br i1 %or.cond28.i.i.us.us.6, label %if.then.i.i.us.us.6, label %if.end.i.i.us.us.6 - -if.then.i.i.us.us.6: ; preds = %pregion_for_entry.entry.i.i.us.us.6 - %add.i.i.us.us.6 = add nsw i32 %mul.i.i.6, %conv2.i.i.us.us.6 - %idxprom.i.i.us.us.6 = sext i32 %add.i.i.us.us.6 to i64 - %arrayidx.i.i.us.us.6 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.6 - %461 = bitcast float* %arrayidx.i.i.us.us.6 to i32* - %462 = load i32, i32* %461, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.6 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.6 - %463 = bitcast float* %arrayidx16.i.i.us.us.6 to i32* - store i32 %462, i32* %463, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.6 - -if.end.i.i.us.us.6: ; preds = %if.then.i.i.us.us.6, %pregion_for_entry.entry.i.i.us.us.6 - %464 = or i64 %_local_id_x.i.0.us.us.6, 1 - %add1.i.i.i.us.us.6.1 = add nuw nsw i64 %464, %mul.i.i.i - %conv2.i.i.us.us.6.1 = trunc i64 %add1.i.i.i.us.us.6.1 to i32 - %cmp7.i.i.us.us.6.1 = icmp sgt i32 %conv2.i.i.us.us.6.1, 0 - %cmp11.i.i.us.us.6.1 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.6.1 - %or.cond28.i.i.us.us.6.1 = and i1 %cmp11.i.i.us.us.6.1, %cmp7.i.i.us.us.6.1 - br i1 %or.cond28.i.i.us.us.6.1, label %if.then.i.i.us.us.6.1, label %if.end.i.i.us.us.6.1 - -pregion_for_end.i.i.6.loopexit: ; preds = %if.end.i.i.us.us.6.3 - br label %pregion_for_end.i.i.6 - -pregion_for_end.i.i.6: ; preds = %pregion_for_end.i.i.6.loopexit, %vector.ph212, %pregion_for_end.i.i.5 - %465 = trunc i64 %mul3.i.i.i to i32 - %conv.i.i.7 = or i32 %465, 7 - %cmp.i.i.7 = icmp sgt i32 %conv.i.i.7, 0 - %mul.i.i.7 = mul nsw i32 %13, %conv.i.i.7 - %cmp4.i.i.7 = icmp sgt i32 %sub.i.i, %conv.i.i.7 - %or.cond10 = and i1 %cmp.i.i.7, %cmp4.i.i.7 - br i1 %or.cond10, label %vector.scevcheck231, label %pregion_for_end.i.i.7 - -vector.scevcheck231: ; preds = %pregion_for_end.i.i.6 - %466 = mul i32 %13, %conv.i.i.7 - %467 = trunc i64 %2 to i32 - %468 = shl i32 %467, 5 - %469 = add i32 %466, %468 - %470 = icmp sgt i32 %469, 2147483616 - br i1 %470, label %pregion_for_entry.entry.i.i.us.us.7.preheader, label %vector.memcheck245 - -pregion_for_entry.entry.i.i.us.us.7.preheader: ; preds = %vector.memcheck245, %vector.scevcheck231 - br label %pregion_for_entry.entry.i.i.us.us.7 - -vector.memcheck245: ; preds = %vector.scevcheck231 - %471 = mul i32 %13, %conv.i.i.7 - %472 = trunc i64 %2 to i32 - %473 = shl i32 %472, 5 - %474 = add i32 %471, %473 - %475 = sext i32 %474 to i64 - %scevgep233 = getelementptr float, float* %6, i64 %475 - %476 = add nsw i64 %475, 32 - %scevgep235 = getelementptr float, float* %6, i64 %476 - %scevgep237 = getelementptr float, float* %9, i64 %475 - %scevgep239 = getelementptr float, float* %9, i64 %476 - %bound0241 = icmp ult float* %scevgep233, %scevgep239 - %bound1242 = icmp ult float* %scevgep237, %scevgep235 - %found.conflict243 = and i1 %bound0241, %bound1242 - br i1 %found.conflict243, label %pregion_for_entry.entry.i.i.us.us.7.preheader, label %vector.ph246 - -vector.ph246: ; preds = %vector.memcheck245 - %broadcast.splatinsert253 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat254 = shufflevector <8 x i64> %broadcast.splatinsert253, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert255 = insertelement <8 x i32> undef, i32 %sub.i.i, i32 0 - %broadcast.splat256 = shufflevector <8 x i32> %broadcast.splatinsert255, <8 x i32> undef, <8 x i32> zeroinitializer - %477 = trunc <8 x i64> %broadcast.splat254 to <8 x i32> - %478 = or <8 x i32> %477, - %479 = icmp sgt <8 x i32> %478, zeroinitializer - %480 = icmp sgt <8 x i32> %broadcast.splat256, %478 - %481 = and <8 x i1> %480, %479 - %482 = extractelement <8 x i32> %478, i32 0 - %483 = add nsw i32 %mul.i.i.7, %482 - %484 = sext i32 %483 to i64 - %485 = getelementptr inbounds float, float* %9, i64 %484 - %486 = bitcast float* %485 to <8 x i32>* - %wide.masked.load257 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %486, i32 4, <8 x i1> %481, <8 x i32> undef), !tbaa !12, !alias.scope !152 - %487 = getelementptr inbounds float, float* %6, i64 %484 - %488 = bitcast float* %487 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load257, <8 x i32>* %488, i32 4, <8 x i1> %481), !tbaa !12, !alias.scope !155, !noalias !152, !llvm.access.group !21 - %489 = trunc <8 x i64> %broadcast.splat254 to <8 x i32> - %490 = or <8 x i32> %489, - %491 = icmp sgt <8 x i32> %490, zeroinitializer - %492 = icmp sgt <8 x i32> %broadcast.splat256, %490 - %493 = and <8 x i1> %492, %491 - %494 = extractelement <8 x i32> %490, i32 0 - %495 = add nsw i32 %mul.i.i.7, %494 - %496 = sext i32 %495 to i64 - %497 = getelementptr inbounds float, float* %9, i64 %496 - %498 = bitcast float* %497 to <8 x i32>* - %wide.masked.load257.1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %498, i32 4, <8 x i1> %493, <8 x i32> undef), !tbaa !12, !alias.scope !152 - %499 = getelementptr inbounds float, float* %6, i64 %496 - %500 = bitcast float* %499 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load257.1, <8 x i32>* %500, i32 4, <8 x i1> %493), !tbaa !12, !alias.scope !155, !noalias !152, !llvm.access.group !21 - %501 = trunc <8 x i64> %broadcast.splat254 to <8 x i32> - %502 = or <8 x i32> %501, - %503 = icmp sgt <8 x i32> %502, zeroinitializer - %504 = icmp sgt <8 x i32> %broadcast.splat256, %502 - %505 = and <8 x i1> %504, %503 - %506 = extractelement <8 x i32> %502, i32 0 - %507 = add nsw i32 %mul.i.i.7, %506 - %508 = sext i32 %507 to i64 - %509 = getelementptr inbounds float, float* %9, i64 %508 - %510 = bitcast float* %509 to <8 x i32>* - %wide.masked.load257.2 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %510, i32 4, <8 x i1> %505, <8 x i32> undef), !tbaa !12, !alias.scope !152 - %511 = getelementptr inbounds float, float* %6, i64 %508 - %512 = bitcast float* %511 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load257.2, <8 x i32>* %512, i32 4, <8 x i1> %505), !tbaa !12, !alias.scope !155, !noalias !152, !llvm.access.group !21 - %513 = trunc <8 x i64> %broadcast.splat254 to <8 x i32> - %514 = or <8 x i32> %513, - %515 = icmp sgt <8 x i32> %514, zeroinitializer - %516 = icmp sgt <8 x i32> %broadcast.splat256, %514 - %517 = and <8 x i1> %516, %515 - %518 = extractelement <8 x i32> %514, i32 0 - %519 = add nsw i32 %mul.i.i.7, %518 - %520 = sext i32 %519 to i64 - %521 = getelementptr inbounds float, float* %9, i64 %520 - %522 = bitcast float* %521 to <8 x i32>* - %wide.masked.load257.3 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %522, i32 4, <8 x i1> %517, <8 x i32> undef), !tbaa !12, !alias.scope !152 - %523 = getelementptr inbounds float, float* %6, i64 %520 - %524 = bitcast float* %523 to <8 x i32>* - call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %wide.masked.load257.3, <8 x i32>* %524, i32 4, <8 x i1> %517), !tbaa !12, !alias.scope !155, !noalias !152, !llvm.access.group !21 - br label %pregion_for_end.i.i.7 - -pregion_for_entry.entry.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.3, %pregion_for_entry.entry.i.i.us.us.7.preheader - %_local_id_x.i.0.us.us.7 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.us.7.preheader ], [ %540, %if.end.i.i.us.us.7.3 ] - %add1.i.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, %mul.i.i.i - %conv2.i.i.us.us.7 = trunc i64 %add1.i.i.i.us.us.7 to i32 - %cmp7.i.i.us.us.7 = icmp sgt i32 %conv2.i.i.us.us.7, 0 - %cmp11.i.i.us.us.7 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.7 - %or.cond28.i.i.us.us.7 = and i1 %cmp11.i.i.us.us.7, %cmp7.i.i.us.us.7 - br i1 %or.cond28.i.i.us.us.7, label %if.then.i.i.us.us.7, label %if.end.i.i.us.us.7 - -if.then.i.i.us.us.7: ; preds = %pregion_for_entry.entry.i.i.us.us.7 - %add.i.i.us.us.7 = add nsw i32 %mul.i.i.7, %conv2.i.i.us.us.7 - %idxprom.i.i.us.us.7 = sext i32 %add.i.i.us.us.7 to i64 - %arrayidx.i.i.us.us.7 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.7 - %525 = bitcast float* %arrayidx.i.i.us.us.7 to i32* - %526 = load i32, i32* %525, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.7 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.7 - %527 = bitcast float* %arrayidx16.i.i.us.us.7 to i32* - store i32 %526, i32* %527, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.7 - -if.end.i.i.us.us.7: ; preds = %if.then.i.i.us.us.7, %pregion_for_entry.entry.i.i.us.us.7 - %528 = or i64 %_local_id_x.i.0.us.us.7, 1 - %add1.i.i.i.us.us.7.1 = add nuw nsw i64 %528, %mul.i.i.i - %conv2.i.i.us.us.7.1 = trunc i64 %add1.i.i.i.us.us.7.1 to i32 - %cmp7.i.i.us.us.7.1 = icmp sgt i32 %conv2.i.i.us.us.7.1, 0 - %cmp11.i.i.us.us.7.1 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.7.1 - %or.cond28.i.i.us.us.7.1 = and i1 %cmp11.i.i.us.us.7.1, %cmp7.i.i.us.us.7.1 - br i1 %or.cond28.i.i.us.us.7.1, label %if.then.i.i.us.us.7.1, label %if.end.i.i.us.us.7.1 - -pregion_for_end.i.i.7.loopexit: ; preds = %if.end.i.i.us.us.7.3 - br label %pregion_for_end.i.i.7 - -pregion_for_end.i.i.7: ; preds = %pregion_for_end.i.i.7.loopexit, %vector.ph246, %pregion_for_end.i.i.6 - ret void - -if.then.i.i.us.us.7.1: ; preds = %if.end.i.i.us.us.7 - %add.i.i.us.us.7.1 = add nsw i32 %mul.i.i.7, %conv2.i.i.us.us.7.1 - %idxprom.i.i.us.us.7.1 = sext i32 %add.i.i.us.us.7.1 to i64 - %arrayidx.i.i.us.us.7.1 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.7.1 - %529 = bitcast float* %arrayidx.i.i.us.us.7.1 to i32* - %530 = load i32, i32* %529, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.7.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.7.1 - %531 = bitcast float* %arrayidx16.i.i.us.us.7.1 to i32* - store i32 %530, i32* %531, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.7.1 - -if.end.i.i.us.us.7.1: ; preds = %if.then.i.i.us.us.7.1, %if.end.i.i.us.us.7 - %532 = or i64 %_local_id_x.i.0.us.us.7, 2 - %add1.i.i.i.us.us.7.2 = add nuw nsw i64 %532, %mul.i.i.i - %conv2.i.i.us.us.7.2 = trunc i64 %add1.i.i.i.us.us.7.2 to i32 - %cmp7.i.i.us.us.7.2 = icmp sgt i32 %conv2.i.i.us.us.7.2, 0 - %cmp11.i.i.us.us.7.2 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.7.2 - %or.cond28.i.i.us.us.7.2 = and i1 %cmp11.i.i.us.us.7.2, %cmp7.i.i.us.us.7.2 - br i1 %or.cond28.i.i.us.us.7.2, label %if.then.i.i.us.us.7.2, label %if.end.i.i.us.us.7.2 - -if.then.i.i.us.us.7.2: ; preds = %if.end.i.i.us.us.7.1 - %add.i.i.us.us.7.2 = add nsw i32 %mul.i.i.7, %conv2.i.i.us.us.7.2 - %idxprom.i.i.us.us.7.2 = sext i32 %add.i.i.us.us.7.2 to i64 - %arrayidx.i.i.us.us.7.2 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.7.2 - %533 = bitcast float* %arrayidx.i.i.us.us.7.2 to i32* - %534 = load i32, i32* %533, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.7.2 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.7.2 - %535 = bitcast float* %arrayidx16.i.i.us.us.7.2 to i32* - store i32 %534, i32* %535, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.7.2 - -if.end.i.i.us.us.7.2: ; preds = %if.then.i.i.us.us.7.2, %if.end.i.i.us.us.7.1 - %536 = or i64 %_local_id_x.i.0.us.us.7, 3 - %add1.i.i.i.us.us.7.3 = add nuw nsw i64 %536, %mul.i.i.i - %conv2.i.i.us.us.7.3 = trunc i64 %add1.i.i.i.us.us.7.3 to i32 - %cmp7.i.i.us.us.7.3 = icmp sgt i32 %conv2.i.i.us.us.7.3, 0 - %cmp11.i.i.us.us.7.3 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.7.3 - %or.cond28.i.i.us.us.7.3 = and i1 %cmp11.i.i.us.us.7.3, %cmp7.i.i.us.us.7.3 - br i1 %or.cond28.i.i.us.us.7.3, label %if.then.i.i.us.us.7.3, label %if.end.i.i.us.us.7.3 - -if.then.i.i.us.us.7.3: ; preds = %if.end.i.i.us.us.7.2 - %add.i.i.us.us.7.3 = add nsw i32 %mul.i.i.7, %conv2.i.i.us.us.7.3 - %idxprom.i.i.us.us.7.3 = sext i32 %add.i.i.us.us.7.3 to i64 - %arrayidx.i.i.us.us.7.3 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.7.3 - %537 = bitcast float* %arrayidx.i.i.us.us.7.3 to i32* - %538 = load i32, i32* %537, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.7.3 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.7.3 - %539 = bitcast float* %arrayidx16.i.i.us.us.7.3 to i32* - store i32 %538, i32* %539, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.7.3 - -if.end.i.i.us.us.7.3: ; preds = %if.then.i.i.us.us.7.3, %if.end.i.i.us.us.7.2 - %540 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, 4 - %exitcond.7.not.3 = icmp eq i64 %540, 32 - br i1 %exitcond.7.not.3, label %pregion_for_end.i.i.7.loopexit, label %pregion_for_entry.entry.i.i.us.us.7, !llvm.loop !157 - -if.then.i.i.us.us.6.1: ; preds = %if.end.i.i.us.us.6 - %add.i.i.us.us.6.1 = add nsw i32 %mul.i.i.6, %conv2.i.i.us.us.6.1 - %idxprom.i.i.us.us.6.1 = sext i32 %add.i.i.us.us.6.1 to i64 - %arrayidx.i.i.us.us.6.1 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.6.1 - %541 = bitcast float* %arrayidx.i.i.us.us.6.1 to i32* - %542 = load i32, i32* %541, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.6.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.6.1 - %543 = bitcast float* %arrayidx16.i.i.us.us.6.1 to i32* - store i32 %542, i32* %543, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.6.1 - -if.end.i.i.us.us.6.1: ; preds = %if.then.i.i.us.us.6.1, %if.end.i.i.us.us.6 - %544 = or i64 %_local_id_x.i.0.us.us.6, 2 - %add1.i.i.i.us.us.6.2 = add nuw nsw i64 %544, %mul.i.i.i - %conv2.i.i.us.us.6.2 = trunc i64 %add1.i.i.i.us.us.6.2 to i32 - %cmp7.i.i.us.us.6.2 = icmp sgt i32 %conv2.i.i.us.us.6.2, 0 - %cmp11.i.i.us.us.6.2 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.6.2 - %or.cond28.i.i.us.us.6.2 = and i1 %cmp11.i.i.us.us.6.2, %cmp7.i.i.us.us.6.2 - br i1 %or.cond28.i.i.us.us.6.2, label %if.then.i.i.us.us.6.2, label %if.end.i.i.us.us.6.2 - -if.then.i.i.us.us.6.2: ; preds = %if.end.i.i.us.us.6.1 - %add.i.i.us.us.6.2 = add nsw i32 %mul.i.i.6, %conv2.i.i.us.us.6.2 - %idxprom.i.i.us.us.6.2 = sext i32 %add.i.i.us.us.6.2 to i64 - %arrayidx.i.i.us.us.6.2 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.6.2 - %545 = bitcast float* %arrayidx.i.i.us.us.6.2 to i32* - %546 = load i32, i32* %545, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.6.2 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.6.2 - %547 = bitcast float* %arrayidx16.i.i.us.us.6.2 to i32* - store i32 %546, i32* %547, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.6.2 - -if.end.i.i.us.us.6.2: ; preds = %if.then.i.i.us.us.6.2, %if.end.i.i.us.us.6.1 - %548 = or i64 %_local_id_x.i.0.us.us.6, 3 - %add1.i.i.i.us.us.6.3 = add nuw nsw i64 %548, %mul.i.i.i - %conv2.i.i.us.us.6.3 = trunc i64 %add1.i.i.i.us.us.6.3 to i32 - %cmp7.i.i.us.us.6.3 = icmp sgt i32 %conv2.i.i.us.us.6.3, 0 - %cmp11.i.i.us.us.6.3 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.6.3 - %or.cond28.i.i.us.us.6.3 = and i1 %cmp11.i.i.us.us.6.3, %cmp7.i.i.us.us.6.3 - br i1 %or.cond28.i.i.us.us.6.3, label %if.then.i.i.us.us.6.3, label %if.end.i.i.us.us.6.3 - -if.then.i.i.us.us.6.3: ; preds = %if.end.i.i.us.us.6.2 - %add.i.i.us.us.6.3 = add nsw i32 %mul.i.i.6, %conv2.i.i.us.us.6.3 - %idxprom.i.i.us.us.6.3 = sext i32 %add.i.i.us.us.6.3 to i64 - %arrayidx.i.i.us.us.6.3 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.6.3 - %549 = bitcast float* %arrayidx.i.i.us.us.6.3 to i32* - %550 = load i32, i32* %549, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.6.3 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.6.3 - %551 = bitcast float* %arrayidx16.i.i.us.us.6.3 to i32* - store i32 %550, i32* %551, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.6.3 - -if.end.i.i.us.us.6.3: ; preds = %if.then.i.i.us.us.6.3, %if.end.i.i.us.us.6.2 - %552 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, 4 - %exitcond.6.not.3 = icmp eq i64 %552, 32 - br i1 %exitcond.6.not.3, label %pregion_for_end.i.i.6.loopexit, label %pregion_for_entry.entry.i.i.us.us.6, !llvm.loop !158 - -if.then.i.i.us.us.5.1: ; preds = %if.end.i.i.us.us.5 - %add.i.i.us.us.5.1 = add nsw i32 %mul.i.i.5, %conv2.i.i.us.us.5.1 - %idxprom.i.i.us.us.5.1 = sext i32 %add.i.i.us.us.5.1 to i64 - %arrayidx.i.i.us.us.5.1 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.5.1 - %553 = bitcast float* %arrayidx.i.i.us.us.5.1 to i32* - %554 = load i32, i32* %553, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.5.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.5.1 - %555 = bitcast float* %arrayidx16.i.i.us.us.5.1 to i32* - store i32 %554, i32* %555, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.5.1 - -if.end.i.i.us.us.5.1: ; preds = %if.then.i.i.us.us.5.1, %if.end.i.i.us.us.5 - %556 = or i64 %_local_id_x.i.0.us.us.5, 2 - %add1.i.i.i.us.us.5.2 = add nuw nsw i64 %556, %mul.i.i.i - %conv2.i.i.us.us.5.2 = trunc i64 %add1.i.i.i.us.us.5.2 to i32 - %cmp7.i.i.us.us.5.2 = icmp sgt i32 %conv2.i.i.us.us.5.2, 0 - %cmp11.i.i.us.us.5.2 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.5.2 - %or.cond28.i.i.us.us.5.2 = and i1 %cmp11.i.i.us.us.5.2, %cmp7.i.i.us.us.5.2 - br i1 %or.cond28.i.i.us.us.5.2, label %if.then.i.i.us.us.5.2, label %if.end.i.i.us.us.5.2 - -if.then.i.i.us.us.5.2: ; preds = %if.end.i.i.us.us.5.1 - %add.i.i.us.us.5.2 = add nsw i32 %mul.i.i.5, %conv2.i.i.us.us.5.2 - %idxprom.i.i.us.us.5.2 = sext i32 %add.i.i.us.us.5.2 to i64 - %arrayidx.i.i.us.us.5.2 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.5.2 - %557 = bitcast float* %arrayidx.i.i.us.us.5.2 to i32* - %558 = load i32, i32* %557, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.5.2 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.5.2 - %559 = bitcast float* %arrayidx16.i.i.us.us.5.2 to i32* - store i32 %558, i32* %559, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.5.2 - -if.end.i.i.us.us.5.2: ; preds = %if.then.i.i.us.us.5.2, %if.end.i.i.us.us.5.1 - %560 = or i64 %_local_id_x.i.0.us.us.5, 3 - %add1.i.i.i.us.us.5.3 = add nuw nsw i64 %560, %mul.i.i.i - %conv2.i.i.us.us.5.3 = trunc i64 %add1.i.i.i.us.us.5.3 to i32 - %cmp7.i.i.us.us.5.3 = icmp sgt i32 %conv2.i.i.us.us.5.3, 0 - %cmp11.i.i.us.us.5.3 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.5.3 - %or.cond28.i.i.us.us.5.3 = and i1 %cmp11.i.i.us.us.5.3, %cmp7.i.i.us.us.5.3 - br i1 %or.cond28.i.i.us.us.5.3, label %if.then.i.i.us.us.5.3, label %if.end.i.i.us.us.5.3 - -if.then.i.i.us.us.5.3: ; preds = %if.end.i.i.us.us.5.2 - %add.i.i.us.us.5.3 = add nsw i32 %mul.i.i.5, %conv2.i.i.us.us.5.3 - %idxprom.i.i.us.us.5.3 = sext i32 %add.i.i.us.us.5.3 to i64 - %arrayidx.i.i.us.us.5.3 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.5.3 - %561 = bitcast float* %arrayidx.i.i.us.us.5.3 to i32* - %562 = load i32, i32* %561, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.5.3 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.5.3 - %563 = bitcast float* %arrayidx16.i.i.us.us.5.3 to i32* - store i32 %562, i32* %563, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.5.3 - -if.end.i.i.us.us.5.3: ; preds = %if.then.i.i.us.us.5.3, %if.end.i.i.us.us.5.2 - %564 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, 4 - %exitcond.5.not.3 = icmp eq i64 %564, 32 - br i1 %exitcond.5.not.3, label %pregion_for_end.i.i.5.loopexit, label %pregion_for_entry.entry.i.i.us.us.5, !llvm.loop !159 - -if.then.i.i.us.us.4.1: ; preds = %if.end.i.i.us.us.4 - %add.i.i.us.us.4.1 = add nsw i32 %mul.i.i.4, %conv2.i.i.us.us.4.1 - %idxprom.i.i.us.us.4.1 = sext i32 %add.i.i.us.us.4.1 to i64 - %arrayidx.i.i.us.us.4.1 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.4.1 - %565 = bitcast float* %arrayidx.i.i.us.us.4.1 to i32* - %566 = load i32, i32* %565, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.4.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.4.1 - %567 = bitcast float* %arrayidx16.i.i.us.us.4.1 to i32* - store i32 %566, i32* %567, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.4.1 - -if.end.i.i.us.us.4.1: ; preds = %if.then.i.i.us.us.4.1, %if.end.i.i.us.us.4 - %568 = or i64 %_local_id_x.i.0.us.us.4, 2 - %add1.i.i.i.us.us.4.2 = add nuw nsw i64 %568, %mul.i.i.i - %conv2.i.i.us.us.4.2 = trunc i64 %add1.i.i.i.us.us.4.2 to i32 - %cmp7.i.i.us.us.4.2 = icmp sgt i32 %conv2.i.i.us.us.4.2, 0 - %cmp11.i.i.us.us.4.2 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.4.2 - %or.cond28.i.i.us.us.4.2 = and i1 %cmp11.i.i.us.us.4.2, %cmp7.i.i.us.us.4.2 - br i1 %or.cond28.i.i.us.us.4.2, label %if.then.i.i.us.us.4.2, label %if.end.i.i.us.us.4.2 - -if.then.i.i.us.us.4.2: ; preds = %if.end.i.i.us.us.4.1 - %add.i.i.us.us.4.2 = add nsw i32 %mul.i.i.4, %conv2.i.i.us.us.4.2 - %idxprom.i.i.us.us.4.2 = sext i32 %add.i.i.us.us.4.2 to i64 - %arrayidx.i.i.us.us.4.2 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.4.2 - %569 = bitcast float* %arrayidx.i.i.us.us.4.2 to i32* - %570 = load i32, i32* %569, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.4.2 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.4.2 - %571 = bitcast float* %arrayidx16.i.i.us.us.4.2 to i32* - store i32 %570, i32* %571, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.4.2 - -if.end.i.i.us.us.4.2: ; preds = %if.then.i.i.us.us.4.2, %if.end.i.i.us.us.4.1 - %572 = or i64 %_local_id_x.i.0.us.us.4, 3 - %add1.i.i.i.us.us.4.3 = add nuw nsw i64 %572, %mul.i.i.i - %conv2.i.i.us.us.4.3 = trunc i64 %add1.i.i.i.us.us.4.3 to i32 - %cmp7.i.i.us.us.4.3 = icmp sgt i32 %conv2.i.i.us.us.4.3, 0 - %cmp11.i.i.us.us.4.3 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.4.3 - %or.cond28.i.i.us.us.4.3 = and i1 %cmp11.i.i.us.us.4.3, %cmp7.i.i.us.us.4.3 - br i1 %or.cond28.i.i.us.us.4.3, label %if.then.i.i.us.us.4.3, label %if.end.i.i.us.us.4.3 - -if.then.i.i.us.us.4.3: ; preds = %if.end.i.i.us.us.4.2 - %add.i.i.us.us.4.3 = add nsw i32 %mul.i.i.4, %conv2.i.i.us.us.4.3 - %idxprom.i.i.us.us.4.3 = sext i32 %add.i.i.us.us.4.3 to i64 - %arrayidx.i.i.us.us.4.3 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.4.3 - %573 = bitcast float* %arrayidx.i.i.us.us.4.3 to i32* - %574 = load i32, i32* %573, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.4.3 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.4.3 - %575 = bitcast float* %arrayidx16.i.i.us.us.4.3 to i32* - store i32 %574, i32* %575, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.4.3 - -if.end.i.i.us.us.4.3: ; preds = %if.then.i.i.us.us.4.3, %if.end.i.i.us.us.4.2 - %576 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, 4 - %exitcond.4.not.3 = icmp eq i64 %576, 32 - br i1 %exitcond.4.not.3, label %pregion_for_end.i.i.4.loopexit, label %pregion_for_entry.entry.i.i.us.us.4, !llvm.loop !160 - -if.then.i.i.us.us.3.1: ; preds = %if.end.i.i.us.us.3 - %add.i.i.us.us.3.1 = add nsw i32 %mul.i.i.3, %conv2.i.i.us.us.3.1 - %idxprom.i.i.us.us.3.1 = sext i32 %add.i.i.us.us.3.1 to i64 - %arrayidx.i.i.us.us.3.1 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.3.1 - %577 = bitcast float* %arrayidx.i.i.us.us.3.1 to i32* - %578 = load i32, i32* %577, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.3.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.3.1 - %579 = bitcast float* %arrayidx16.i.i.us.us.3.1 to i32* - store i32 %578, i32* %579, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.3.1 - -if.end.i.i.us.us.3.1: ; preds = %if.then.i.i.us.us.3.1, %if.end.i.i.us.us.3 - %580 = or i64 %_local_id_x.i.0.us.us.3, 2 - %add1.i.i.i.us.us.3.2 = add nuw nsw i64 %580, %mul.i.i.i - %conv2.i.i.us.us.3.2 = trunc i64 %add1.i.i.i.us.us.3.2 to i32 - %cmp7.i.i.us.us.3.2 = icmp sgt i32 %conv2.i.i.us.us.3.2, 0 - %cmp11.i.i.us.us.3.2 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.3.2 - %or.cond28.i.i.us.us.3.2 = and i1 %cmp11.i.i.us.us.3.2, %cmp7.i.i.us.us.3.2 - br i1 %or.cond28.i.i.us.us.3.2, label %if.then.i.i.us.us.3.2, label %if.end.i.i.us.us.3.2 - -if.then.i.i.us.us.3.2: ; preds = %if.end.i.i.us.us.3.1 - %add.i.i.us.us.3.2 = add nsw i32 %mul.i.i.3, %conv2.i.i.us.us.3.2 - %idxprom.i.i.us.us.3.2 = sext i32 %add.i.i.us.us.3.2 to i64 - %arrayidx.i.i.us.us.3.2 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.3.2 - %581 = bitcast float* %arrayidx.i.i.us.us.3.2 to i32* - %582 = load i32, i32* %581, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.3.2 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.3.2 - %583 = bitcast float* %arrayidx16.i.i.us.us.3.2 to i32* - store i32 %582, i32* %583, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.3.2 - -if.end.i.i.us.us.3.2: ; preds = %if.then.i.i.us.us.3.2, %if.end.i.i.us.us.3.1 - %584 = or i64 %_local_id_x.i.0.us.us.3, 3 - %add1.i.i.i.us.us.3.3 = add nuw nsw i64 %584, %mul.i.i.i - %conv2.i.i.us.us.3.3 = trunc i64 %add1.i.i.i.us.us.3.3 to i32 - %cmp7.i.i.us.us.3.3 = icmp sgt i32 %conv2.i.i.us.us.3.3, 0 - %cmp11.i.i.us.us.3.3 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.3.3 - %or.cond28.i.i.us.us.3.3 = and i1 %cmp11.i.i.us.us.3.3, %cmp7.i.i.us.us.3.3 - br i1 %or.cond28.i.i.us.us.3.3, label %if.then.i.i.us.us.3.3, label %if.end.i.i.us.us.3.3 - -if.then.i.i.us.us.3.3: ; preds = %if.end.i.i.us.us.3.2 - %add.i.i.us.us.3.3 = add nsw i32 %mul.i.i.3, %conv2.i.i.us.us.3.3 - %idxprom.i.i.us.us.3.3 = sext i32 %add.i.i.us.us.3.3 to i64 - %arrayidx.i.i.us.us.3.3 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.3.3 - %585 = bitcast float* %arrayidx.i.i.us.us.3.3 to i32* - %586 = load i32, i32* %585, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.3.3 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.3.3 - %587 = bitcast float* %arrayidx16.i.i.us.us.3.3 to i32* - store i32 %586, i32* %587, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.3.3 - -if.end.i.i.us.us.3.3: ; preds = %if.then.i.i.us.us.3.3, %if.end.i.i.us.us.3.2 - %588 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, 4 - %exitcond.3.not.3 = icmp eq i64 %588, 32 - br i1 %exitcond.3.not.3, label %pregion_for_end.i.i.3.loopexit, label %pregion_for_entry.entry.i.i.us.us.3, !llvm.loop !161 - -if.then.i.i.us.us.2.1: ; preds = %if.end.i.i.us.us.2 - %add.i.i.us.us.2.1 = add nsw i32 %mul.i.i.2, %conv2.i.i.us.us.2.1 - %idxprom.i.i.us.us.2.1 = sext i32 %add.i.i.us.us.2.1 to i64 - %arrayidx.i.i.us.us.2.1 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.2.1 - %589 = bitcast float* %arrayidx.i.i.us.us.2.1 to i32* - %590 = load i32, i32* %589, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.2.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.2.1 - %591 = bitcast float* %arrayidx16.i.i.us.us.2.1 to i32* - store i32 %590, i32* %591, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.2.1 - -if.end.i.i.us.us.2.1: ; preds = %if.then.i.i.us.us.2.1, %if.end.i.i.us.us.2 - %592 = or i64 %_local_id_x.i.0.us.us.2, 2 - %add1.i.i.i.us.us.2.2 = add nuw nsw i64 %592, %mul.i.i.i - %conv2.i.i.us.us.2.2 = trunc i64 %add1.i.i.i.us.us.2.2 to i32 - %cmp7.i.i.us.us.2.2 = icmp sgt i32 %conv2.i.i.us.us.2.2, 0 - %cmp11.i.i.us.us.2.2 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.2.2 - %or.cond28.i.i.us.us.2.2 = and i1 %cmp11.i.i.us.us.2.2, %cmp7.i.i.us.us.2.2 - br i1 %or.cond28.i.i.us.us.2.2, label %if.then.i.i.us.us.2.2, label %if.end.i.i.us.us.2.2 - -if.then.i.i.us.us.2.2: ; preds = %if.end.i.i.us.us.2.1 - %add.i.i.us.us.2.2 = add nsw i32 %mul.i.i.2, %conv2.i.i.us.us.2.2 - %idxprom.i.i.us.us.2.2 = sext i32 %add.i.i.us.us.2.2 to i64 - %arrayidx.i.i.us.us.2.2 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.2.2 - %593 = bitcast float* %arrayidx.i.i.us.us.2.2 to i32* - %594 = load i32, i32* %593, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.2.2 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.2.2 - %595 = bitcast float* %arrayidx16.i.i.us.us.2.2 to i32* - store i32 %594, i32* %595, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.2.2 - -if.end.i.i.us.us.2.2: ; preds = %if.then.i.i.us.us.2.2, %if.end.i.i.us.us.2.1 - %596 = or i64 %_local_id_x.i.0.us.us.2, 3 - %add1.i.i.i.us.us.2.3 = add nuw nsw i64 %596, %mul.i.i.i - %conv2.i.i.us.us.2.3 = trunc i64 %add1.i.i.i.us.us.2.3 to i32 - %cmp7.i.i.us.us.2.3 = icmp sgt i32 %conv2.i.i.us.us.2.3, 0 - %cmp11.i.i.us.us.2.3 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.2.3 - %or.cond28.i.i.us.us.2.3 = and i1 %cmp11.i.i.us.us.2.3, %cmp7.i.i.us.us.2.3 - br i1 %or.cond28.i.i.us.us.2.3, label %if.then.i.i.us.us.2.3, label %if.end.i.i.us.us.2.3 - -if.then.i.i.us.us.2.3: ; preds = %if.end.i.i.us.us.2.2 - %add.i.i.us.us.2.3 = add nsw i32 %mul.i.i.2, %conv2.i.i.us.us.2.3 - %idxprom.i.i.us.us.2.3 = sext i32 %add.i.i.us.us.2.3 to i64 - %arrayidx.i.i.us.us.2.3 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.2.3 - %597 = bitcast float* %arrayidx.i.i.us.us.2.3 to i32* - %598 = load i32, i32* %597, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.2.3 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.2.3 - %599 = bitcast float* %arrayidx16.i.i.us.us.2.3 to i32* - store i32 %598, i32* %599, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.2.3 - -if.end.i.i.us.us.2.3: ; preds = %if.then.i.i.us.us.2.3, %if.end.i.i.us.us.2.2 - %600 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, 4 - %exitcond.2.not.3 = icmp eq i64 %600, 32 - br i1 %exitcond.2.not.3, label %pregion_for_end.i.i.2.loopexit, label %pregion_for_entry.entry.i.i.us.us.2, !llvm.loop !162 - -if.then.i.i.us.us.1.1: ; preds = %if.end.i.i.us.us.1 - %add.i.i.us.us.1.1 = add nsw i32 %mul.i.i.1, %conv2.i.i.us.us.1.1 - %idxprom.i.i.us.us.1.1 = sext i32 %add.i.i.us.us.1.1 to i64 - %arrayidx.i.i.us.us.1.1 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.1.1 - %601 = bitcast float* %arrayidx.i.i.us.us.1.1 to i32* - %602 = load i32, i32* %601, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.1.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.1.1 - %603 = bitcast float* %arrayidx16.i.i.us.us.1.1 to i32* - store i32 %602, i32* %603, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.1.1 - -if.end.i.i.us.us.1.1: ; preds = %if.then.i.i.us.us.1.1, %if.end.i.i.us.us.1 - %604 = or i64 %_local_id_x.i.0.us.us.1, 2 - %add1.i.i.i.us.us.1.2 = add nuw nsw i64 %604, %mul.i.i.i - %conv2.i.i.us.us.1.2 = trunc i64 %add1.i.i.i.us.us.1.2 to i32 - %cmp7.i.i.us.us.1.2 = icmp sgt i32 %conv2.i.i.us.us.1.2, 0 - %cmp11.i.i.us.us.1.2 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.1.2 - %or.cond28.i.i.us.us.1.2 = and i1 %cmp11.i.i.us.us.1.2, %cmp7.i.i.us.us.1.2 - br i1 %or.cond28.i.i.us.us.1.2, label %if.then.i.i.us.us.1.2, label %if.end.i.i.us.us.1.2 - -if.then.i.i.us.us.1.2: ; preds = %if.end.i.i.us.us.1.1 - %add.i.i.us.us.1.2 = add nsw i32 %mul.i.i.1, %conv2.i.i.us.us.1.2 - %idxprom.i.i.us.us.1.2 = sext i32 %add.i.i.us.us.1.2 to i64 - %arrayidx.i.i.us.us.1.2 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.1.2 - %605 = bitcast float* %arrayidx.i.i.us.us.1.2 to i32* - %606 = load i32, i32* %605, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.1.2 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.1.2 - %607 = bitcast float* %arrayidx16.i.i.us.us.1.2 to i32* - store i32 %606, i32* %607, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.1.2 - -if.end.i.i.us.us.1.2: ; preds = %if.then.i.i.us.us.1.2, %if.end.i.i.us.us.1.1 - %608 = or i64 %_local_id_x.i.0.us.us.1, 3 - %add1.i.i.i.us.us.1.3 = add nuw nsw i64 %608, %mul.i.i.i - %conv2.i.i.us.us.1.3 = trunc i64 %add1.i.i.i.us.us.1.3 to i32 - %cmp7.i.i.us.us.1.3 = icmp sgt i32 %conv2.i.i.us.us.1.3, 0 - %cmp11.i.i.us.us.1.3 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.1.3 - %or.cond28.i.i.us.us.1.3 = and i1 %cmp11.i.i.us.us.1.3, %cmp7.i.i.us.us.1.3 - br i1 %or.cond28.i.i.us.us.1.3, label %if.then.i.i.us.us.1.3, label %if.end.i.i.us.us.1.3 - -if.then.i.i.us.us.1.3: ; preds = %if.end.i.i.us.us.1.2 - %add.i.i.us.us.1.3 = add nsw i32 %mul.i.i.1, %conv2.i.i.us.us.1.3 - %idxprom.i.i.us.us.1.3 = sext i32 %add.i.i.us.us.1.3 to i64 - %arrayidx.i.i.us.us.1.3 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.1.3 - %609 = bitcast float* %arrayidx.i.i.us.us.1.3 to i32* - %610 = load i32, i32* %609, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.1.3 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.1.3 - %611 = bitcast float* %arrayidx16.i.i.us.us.1.3 to i32* - store i32 %610, i32* %611, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.1.3 - -if.end.i.i.us.us.1.3: ; preds = %if.then.i.i.us.us.1.3, %if.end.i.i.us.us.1.2 - %612 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, 4 - %exitcond.1.not.3 = icmp eq i64 %612, 32 - br i1 %exitcond.1.not.3, label %pregion_for_end.i.i.1.loopexit, label %pregion_for_entry.entry.i.i.us.us.1, !llvm.loop !163 - -if.then.i.i.us.us.1277: ; preds = %if.end.i.i.us.us - %add.i.i.us.us.1273 = add nsw i32 %mul.i.i, %conv2.i.i.us.us.1268 - %idxprom.i.i.us.us.1274 = sext i32 %add.i.i.us.us.1273 to i64 - %arrayidx.i.i.us.us.1275 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.1274 - %613 = bitcast float* %arrayidx.i.i.us.us.1275 to i32* - %614 = load i32, i32* %613, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.1276 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.1274 - %615 = bitcast float* %arrayidx16.i.i.us.us.1276 to i32* - store i32 %614, i32* %615, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.1278 - -if.end.i.i.us.us.1278: ; preds = %if.then.i.i.us.us.1277, %if.end.i.i.us.us - %616 = or i64 %_local_id_x.i.0.us.us, 2 - %add1.i.i.i.us.us.2280 = add nuw nsw i64 %616, %mul.i.i.i - %conv2.i.i.us.us.2281 = trunc i64 %add1.i.i.i.us.us.2280 to i32 - %cmp7.i.i.us.us.2282 = icmp sgt i32 %conv2.i.i.us.us.2281, 0 - %cmp11.i.i.us.us.2283 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.2281 - %or.cond28.i.i.us.us.2284 = and i1 %cmp11.i.i.us.us.2283, %cmp7.i.i.us.us.2282 - br i1 %or.cond28.i.i.us.us.2284, label %if.then.i.i.us.us.2290, label %if.end.i.i.us.us.2291 - -if.then.i.i.us.us.2290: ; preds = %if.end.i.i.us.us.1278 - %add.i.i.us.us.2286 = add nsw i32 %mul.i.i, %conv2.i.i.us.us.2281 - %idxprom.i.i.us.us.2287 = sext i32 %add.i.i.us.us.2286 to i64 - %arrayidx.i.i.us.us.2288 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.2287 - %617 = bitcast float* %arrayidx.i.i.us.us.2288 to i32* - %618 = load i32, i32* %617, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.2289 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.2287 - %619 = bitcast float* %arrayidx16.i.i.us.us.2289 to i32* - store i32 %618, i32* %619, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.2291 - -if.end.i.i.us.us.2291: ; preds = %if.then.i.i.us.us.2290, %if.end.i.i.us.us.1278 - %620 = or i64 %_local_id_x.i.0.us.us, 3 - %add1.i.i.i.us.us.3293 = add nuw nsw i64 %620, %mul.i.i.i - %conv2.i.i.us.us.3294 = trunc i64 %add1.i.i.i.us.us.3293 to i32 - %cmp7.i.i.us.us.3295 = icmp sgt i32 %conv2.i.i.us.us.3294, 0 - %cmp11.i.i.us.us.3296 = icmp sgt i32 %sub.i.i, %conv2.i.i.us.us.3294 - %or.cond28.i.i.us.us.3297 = and i1 %cmp11.i.i.us.us.3296, %cmp7.i.i.us.us.3295 - br i1 %or.cond28.i.i.us.us.3297, label %if.then.i.i.us.us.3303, label %if.end.i.i.us.us.3304 - -if.then.i.i.us.us.3303: ; preds = %if.end.i.i.us.us.2291 - %add.i.i.us.us.3299 = add nsw i32 %mul.i.i, %conv2.i.i.us.us.3294 - %idxprom.i.i.us.us.3300 = sext i32 %add.i.i.us.us.3299 to i64 - %arrayidx.i.i.us.us.3301 = getelementptr inbounds float, float* %9, i64 %idxprom.i.i.us.us.3300 - %621 = bitcast float* %arrayidx.i.i.us.us.3301 to i32* - %622 = load i32, i32* %621, align 4, !tbaa !12 - %arrayidx16.i.i.us.us.3302 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.us.3300 - %623 = bitcast float* %arrayidx16.i.i.us.us.3302 to i32* - store i32 %622, i32* %623, align 4, !tbaa !12, !llvm.access.group !21 - br label %if.end.i.i.us.us.3304 - -if.end.i.i.us.us.3304: ; preds = %if.then.i.i.us.us.3303, %if.end.i.i.us.us.2291 - %624 = add nuw nsw i64 %_local_id_x.i.0.us.us, 4 - %exitcond.not.3 = icmp eq i64 %624, 32 - br i1 %exitcond.not.3, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us.us, !llvm.loop !164 -} - -; Function Attrs: argmemonly nounwind readonly willreturn -declare <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>*, i32 immarg, <8 x i1>, <8 x i32>) #2 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32 immarg, <8 x i1>) #3 - -attributes #0 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nofree norecurse nounwind } -attributes #2 = { argmemonly nounwind readonly willreturn } -attributes #3 = { argmemonly nounwind willreturn } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 0} -!6 = !{!"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"int"} -!8 = !{!"float*", !"float*", !"int"} -!9 = !{!"", !"", !""} -!10 = !{!"A", !"B", !"n"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{!17, !18} -!18 = distinct !{!18, !"LVerDomain"} -!19 = !{!20} -!20 = distinct !{!20, !18} -!21 = !{!22, !23} -!22 = distinct !{} -!23 = distinct !{} -!24 = !{!25} -!25 = distinct !{!25, !26} -!26 = distinct !{!26, !"LVerDomain"} -!27 = !{!28} -!28 = distinct !{!28, !26} -!29 = !{!30} -!30 = distinct !{!30, !31} -!31 = distinct !{!31, !"LVerDomain"} -!32 = !{!33} -!33 = distinct !{!33, !31} -!34 = !{!35} -!35 = distinct !{!35, !36} -!36 = distinct !{!36, !"LVerDomain"} -!37 = !{!38} -!38 = distinct !{!38, !36} -!39 = !{!40} -!40 = distinct !{!40, !41} -!41 = distinct !{!41, !"LVerDomain"} -!42 = !{!43} -!43 = distinct !{!43, !41} -!44 = !{!45} -!45 = distinct !{!45, !46} -!46 = distinct !{!46, !"LVerDomain"} -!47 = !{!48} -!48 = distinct !{!48, !46} -!49 = !{!50} -!50 = distinct !{!50, !51} -!51 = distinct !{!51, !"LVerDomain"} -!52 = !{!53} -!53 = distinct !{!53, !51} -!54 = !{!55} -!55 = distinct !{!55, !56} -!56 = distinct !{!56, !"LVerDomain"} -!57 = !{!58} -!58 = distinct !{!58, !56} -!59 = distinct !{!59, !60, !61} -!60 = !{!"llvm.loop.parallel_accesses", !22} -!61 = !{!"llvm.loop.isvectorized", i32 1} -!62 = distinct !{!62, !60, !61} -!63 = distinct !{!63, !60, !61} -!64 = distinct !{!64, !60, !61} -!65 = distinct !{!65, !60, !61} -!66 = distinct !{!66, !60, !61} -!67 = distinct !{!67, !60, !61} -!68 = distinct !{!68, !60, !61} -!69 = !{!70} -!70 = distinct !{!70, !71} -!71 = distinct !{!71, !"LVerDomain"} -!72 = !{!73} -!73 = distinct !{!73, !71} -!74 = !{!75} -!75 = distinct !{!75, !76} -!76 = distinct !{!76, !"LVerDomain"} -!77 = !{!78} -!78 = distinct !{!78, !76} -!79 = !{!80} -!80 = distinct !{!80, !81} -!81 = distinct !{!81, !"LVerDomain"} -!82 = !{!83} -!83 = distinct !{!83, !81} -!84 = !{!85} -!85 = distinct !{!85, !86} -!86 = distinct !{!86, !"LVerDomain"} -!87 = !{!88} -!88 = distinct !{!88, !86} -!89 = !{!90} -!90 = distinct !{!90, !91} -!91 = distinct !{!91, !"LVerDomain"} -!92 = !{!93} -!93 = distinct !{!93, !91} -!94 = !{!95} -!95 = distinct !{!95, !96} -!96 = distinct !{!96, !"LVerDomain"} -!97 = !{!98} -!98 = distinct !{!98, !96} -!99 = !{!100} -!100 = distinct !{!100, !101} -!101 = distinct !{!101, !"LVerDomain"} -!102 = !{!103} -!103 = distinct !{!103, !101} -!104 = !{!105} -!105 = distinct !{!105, !106} -!106 = distinct !{!106, !"LVerDomain"} -!107 = !{!108} -!108 = distinct !{!108, !106} -!109 = distinct !{!109, !60, !61} -!110 = distinct !{!110, !60, !61} -!111 = distinct !{!111, !60, !61} -!112 = distinct !{!112, !60, !61} -!113 = distinct !{!113, !60, !61} -!114 = distinct !{!114, !60, !61} -!115 = distinct !{!115, !60, !61} -!116 = distinct !{!116, !60, !61} -!117 = !{!118} -!118 = distinct !{!118, !119} -!119 = distinct !{!119, !"LVerDomain"} -!120 = !{!121} -!121 = distinct !{!121, !119} -!122 = !{!123} -!123 = distinct !{!123, !124} -!124 = distinct !{!124, !"LVerDomain"} -!125 = !{!126} -!126 = distinct !{!126, !124} -!127 = !{!128} -!128 = distinct !{!128, !129} -!129 = distinct !{!129, !"LVerDomain"} -!130 = !{!131} -!131 = distinct !{!131, !129} -!132 = !{!133} -!133 = distinct !{!133, !134} -!134 = distinct !{!134, !"LVerDomain"} -!135 = !{!136} -!136 = distinct !{!136, !134} -!137 = !{!138} -!138 = distinct !{!138, !139} -!139 = distinct !{!139, !"LVerDomain"} -!140 = !{!141} -!141 = distinct !{!141, !139} -!142 = !{!143} -!143 = distinct !{!143, !144} -!144 = distinct !{!144, !"LVerDomain"} -!145 = !{!146} -!146 = distinct !{!146, !144} -!147 = !{!148} -!148 = distinct !{!148, !149} -!149 = distinct !{!149, !"LVerDomain"} -!150 = !{!151} -!151 = distinct !{!151, !149} -!152 = !{!153} -!153 = distinct !{!153, !154} -!154 = distinct !{!154, !"LVerDomain"} -!155 = !{!156} -!156 = distinct !{!156, !154} -!157 = distinct !{!157, !60, !61} -!158 = distinct !{!158, !60, !61} -!159 = distinct !{!159, !60, !61} -!160 = distinct !{!160, !60, !61} -!161 = distinct !{!161, !60, !61} -!162 = distinct !{!162, !60, !61} -!163 = distinct !{!163, !60, !61} -!164 = distinct !{!164, !60, !61} diff --git a/pocl_irs/lu_kernel1.ll b/pocl_irs/lu_kernel1.ll deleted file mode 100644 index 69693ff..0000000 --- a/pocl_irs/lu_kernel1.ll +++ /dev/null @@ -1,220 +0,0 @@ -; ModuleID = './AH/OFKPFPHEIHFOOLEHKEIHHOHEKJAJACOLOPAPI/lu_kernel1/256-1-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_lu_kernel1(float* nocapture %0, i32 %1, i32 %2, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %3, i64 %4, i64 %5, i64 %6) local_unnamed_addr #0 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %4, 8 - %add.i = add nsw i32 %1, 1 - %mul.i = mul nsw i32 %2, %1 - %add6.i = add nsw i32 %mul.i, %1 - %idxprom7.i = sext i32 %add6.i to i64 - %arrayidx8.i = getelementptr inbounds float, float* %0, i64 %idxprom7.i - br label %pregion_for_entry.entry.i - -pregion_for_entry.entry.i: ; preds = %if.end.r_exit.i.1, %7 - %_local_id_x.0 = phi i64 [ 0, %7 ], [ %15, %if.end.r_exit.i.1 ] - %add1.i.i = add nuw nsw i64 %_local_id_x.0, %mul.i.i - %8 = trunc i64 %add1.i.i to i32 - %conv2.i = add i32 %add.i, %8 - %cmp.i = icmp slt i32 %conv2.i, %2 - br i1 %cmp.i, label %if.then.i, label %if.end.r_exit.i - -if.then.i: ; preds = %pregion_for_entry.entry.i - %add4.i = add nsw i32 %conv2.i, %mul.i - %idxprom.i = sext i32 %add4.i to i64 - %arrayidx.i = getelementptr inbounds float, float* %0, i64 %idxprom.i - %9 = load float, float* %arrayidx.i, align 4, !tbaa !12 - %10 = load float, float* %arrayidx8.i, align 4, !tbaa !12 - %div.i = fdiv float %9, %10, !fpmath !16 - store float %div.i, float* %arrayidx.i, align 4, !tbaa !12, !llvm.access.group !17 - br label %if.end.r_exit.i - -if.end.r_exit.i: ; preds = %if.then.i, %pregion_for_entry.entry.i - %11 = or i64 %_local_id_x.0, 1 - %add1.i.i.1 = add nuw nsw i64 %11, %mul.i.i - %12 = trunc i64 %add1.i.i.1 to i32 - %conv2.i.1 = add i32 %add.i, %12 - %cmp.i.1 = icmp slt i32 %conv2.i.1, %2 - br i1 %cmp.i.1, label %if.then.i.1, label %if.end.r_exit.i.1 - -lu_kernel1.exit: ; preds = %if.end.r_exit.i.1 - ret void - -if.then.i.1: ; preds = %if.end.r_exit.i - %add4.i.1 = add nsw i32 %conv2.i.1, %mul.i - %idxprom.i.1 = sext i32 %add4.i.1 to i64 - %arrayidx.i.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.1 - %13 = load float, float* %arrayidx.i.1, align 4, !tbaa !12 - %14 = load float, float* %arrayidx8.i, align 4, !tbaa !12 - %div.i.1 = fdiv float %13, %14, !fpmath !16 - store float %div.i.1, float* %arrayidx.i.1, align 4, !tbaa !12, !llvm.access.group !17 - br label %if.end.r_exit.i.1 - -if.end.r_exit.i.1: ; preds = %if.then.i.1, %if.end.r_exit.i - %15 = add nuw nsw i64 %_local_id_x.0, 2 - %exitcond.not.1 = icmp eq i64 %15, 256 - br i1 %exitcond.not.1, label %lu_kernel1.exit, label %pregion_for_entry.entry.i, !llvm.loop !19 -} - -; Function Attrs: nofree norecurse nounwind -define void @_pocl_kernel_lu_kernel1_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to i32** - %11 = load i32*, i32** %10, align 8 - %12 = load i32, i32* %11, align 4 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to i32** - %15 = load i32*, i32** %14, align 8 - %16 = load i32, i32* %15, align 4 - %mul.i.i.i = shl i64 %2, 8 - %add.i.i = add nsw i32 %12, 1 - %mul.i.i = mul nsw i32 %16, %12 - %add6.i.i = add nsw i32 %mul.i.i, %12 - %idxprom7.i.i = sext i32 %add6.i.i to i64 - %arrayidx8.i.i = getelementptr inbounds float, float* %8, i64 %idxprom7.i.i - br label %pregion_for_entry.entry.i.i - -pregion_for_entry.entry.i.i: ; preds = %if.end.r_exit.i.i.1, %5 - %_local_id_x.i.0 = phi i64 [ 0, %5 ], [ %24, %if.end.r_exit.i.i.1 ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %17 = trunc i64 %add1.i.i.i to i32 - %conv2.i.i = add i32 %add.i.i, %17 - %cmp.i.i = icmp slt i32 %conv2.i.i, %16 - br i1 %cmp.i.i, label %if.then.i.i, label %if.end.r_exit.i.i - -if.then.i.i: ; preds = %pregion_for_entry.entry.i.i - %add4.i.i = add nsw i32 %conv2.i.i, %mul.i.i - %idxprom.i.i = sext i32 %add4.i.i to i64 - %arrayidx.i.i = getelementptr inbounds float, float* %8, i64 %idxprom.i.i - %18 = load float, float* %arrayidx.i.i, align 4, !tbaa !12 - %19 = load float, float* %arrayidx8.i.i, align 4, !tbaa !12 - %div.i.i = fdiv float %18, %19, !fpmath !16 - store float %div.i.i, float* %arrayidx.i.i, align 4, !tbaa !12, !llvm.access.group !17 - br label %if.end.r_exit.i.i - -if.end.r_exit.i.i: ; preds = %if.then.i.i, %pregion_for_entry.entry.i.i - %20 = or i64 %_local_id_x.i.0, 1 - %add1.i.i.i.1 = add nuw nsw i64 %20, %mul.i.i.i - %21 = trunc i64 %add1.i.i.i.1 to i32 - %conv2.i.i.1 = add i32 %add.i.i, %21 - %cmp.i.i.1 = icmp slt i32 %conv2.i.i.1, %16 - br i1 %cmp.i.i.1, label %if.then.i.i.1, label %if.end.r_exit.i.i.1 - -_pocl_kernel_lu_kernel1.exit: ; preds = %if.end.r_exit.i.i.1 - ret void - -if.then.i.i.1: ; preds = %if.end.r_exit.i.i - %add4.i.i.1 = add nsw i32 %conv2.i.i.1, %mul.i.i - %idxprom.i.i.1 = sext i32 %add4.i.i.1 to i64 - %arrayidx.i.i.1 = getelementptr inbounds float, float* %8, i64 %idxprom.i.i.1 - %22 = load float, float* %arrayidx.i.i.1, align 4, !tbaa !12 - %23 = load float, float* %arrayidx8.i.i, align 4, !tbaa !12 - %div.i.i.1 = fdiv float %22, %23, !fpmath !16 - store float %div.i.i.1, float* %arrayidx.i.i.1, align 4, !tbaa !12, !llvm.access.group !17 - br label %if.end.r_exit.i.i.1 - -if.end.r_exit.i.i.1: ; preds = %if.then.i.i.1, %if.end.r_exit.i.i - %24 = add nuw nsw i64 %_local_id_x.i.0, 2 - %exitcond.not.1 = icmp eq i64 %24, 256 - br i1 %exitcond.not.1, label %_pocl_kernel_lu_kernel1.exit, label %pregion_for_entry.entry.i.i, !llvm.loop !19 -} - -; Function Attrs: nofree norecurse nounwind -define void @_pocl_kernel_lu_kernel1_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #1 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to i32** - %10 = load i32*, i32** %9, align 8 - %11 = load i32, i32* %10, align 4 - %12 = getelementptr i8*, i8** %0, i64 2 - %13 = bitcast i8** %12 to i32** - %14 = load i32*, i32** %13, align 8 - %15 = load i32, i32* %14, align 4 - %mul.i.i.i = shl i64 %2, 8 - %add.i.i = add nsw i32 %11, 1 - %mul.i.i = mul nsw i32 %15, %11 - %add6.i.i = add nsw i32 %mul.i.i, %11 - %idxprom7.i.i = sext i32 %add6.i.i to i64 - %arrayidx8.i.i = getelementptr inbounds float, float* %7, i64 %idxprom7.i.i - br label %pregion_for_entry.entry.i.i - -pregion_for_entry.entry.i.i: ; preds = %if.end.r_exit.i.i.1, %5 - %_local_id_x.i.0 = phi i64 [ 0, %5 ], [ %23, %if.end.r_exit.i.i.1 ] - %add1.i.i.i = add nuw nsw i64 %_local_id_x.i.0, %mul.i.i.i - %16 = trunc i64 %add1.i.i.i to i32 - %conv2.i.i = add i32 %add.i.i, %16 - %cmp.i.i = icmp slt i32 %conv2.i.i, %15 - br i1 %cmp.i.i, label %if.then.i.i, label %if.end.r_exit.i.i - -if.then.i.i: ; preds = %pregion_for_entry.entry.i.i - %add4.i.i = add nsw i32 %conv2.i.i, %mul.i.i - %idxprom.i.i = sext i32 %add4.i.i to i64 - %arrayidx.i.i = getelementptr inbounds float, float* %7, i64 %idxprom.i.i - %17 = load float, float* %arrayidx.i.i, align 4, !tbaa !12 - %18 = load float, float* %arrayidx8.i.i, align 4, !tbaa !12 - %div.i.i = fdiv float %17, %18, !fpmath !16 - store float %div.i.i, float* %arrayidx.i.i, align 4, !tbaa !12, !llvm.access.group !17 - br label %if.end.r_exit.i.i - -if.end.r_exit.i.i: ; preds = %if.then.i.i, %pregion_for_entry.entry.i.i - %19 = or i64 %_local_id_x.i.0, 1 - %add1.i.i.i.1 = add nuw nsw i64 %19, %mul.i.i.i - %20 = trunc i64 %add1.i.i.i.1 to i32 - %conv2.i.i.1 = add i32 %add.i.i, %20 - %cmp.i.i.1 = icmp slt i32 %conv2.i.i.1, %15 - br i1 %cmp.i.i.1, label %if.then.i.i.1, label %if.end.r_exit.i.i.1 - -_pocl_kernel_lu_kernel1.exit: ; preds = %if.end.r_exit.i.i.1 - ret void - -if.then.i.i.1: ; preds = %if.end.r_exit.i.i - %add4.i.i.1 = add nsw i32 %conv2.i.i.1, %mul.i.i - %idxprom.i.i.1 = sext i32 %add4.i.i.1 to i64 - %arrayidx.i.i.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.1 - %21 = load float, float* %arrayidx.i.i.1, align 4, !tbaa !12 - %22 = load float, float* %arrayidx8.i.i, align 4, !tbaa !12 - %div.i.i.1 = fdiv float %21, %22, !fpmath !16 - store float %div.i.i.1, float* %arrayidx.i.i.1, align 4, !tbaa !12, !llvm.access.group !17 - br label %if.end.r_exit.i.i.1 - -if.end.r_exit.i.i.1: ; preds = %if.then.i.i.1, %if.end.r_exit.i.i - %23 = add nuw nsw i64 %_local_id_x.i.0, 2 - %exitcond.not.1 = icmp eq i64 %23, 256 - br i1 %exitcond.not.1, label %_pocl_kernel_lu_kernel1.exit, label %pregion_for_entry.entry.i.i, !llvm.loop !19 -} - -attributes #0 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nofree norecurse nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 0, i32 0} -!6 = !{!"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"int", !"int"} -!8 = !{!"float*", !"int", !"int"} -!9 = !{!"", !"", !""} -!10 = !{!"A", !"k", !"n"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{float 2.500000e+00} -!17 = !{!18} -!18 = distinct !{} -!19 = distinct !{!19, !20} -!20 = !{!"llvm.loop.parallel_accesses", !18} diff --git a/pocl_irs/lu_kernel2.ll b/pocl_irs/lu_kernel2.ll deleted file mode 100644 index eacf60f..0000000 --- a/pocl_irs/lu_kernel2.ll +++ /dev/null @@ -1,5334 +0,0 @@ -; ModuleID = './AH/OFKPFPHEIHFOOLEHKEIHHOHEKJAJACOLOPAPI/lu_kernel2/32-8-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_lu_kernel2(float* nocapture %0, i32 %1, i32 %2, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %3, i64 %4, i64 %5, i64 %6) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { -pregion_for_entry.pregion_for_init.i: - %mul.i.i = shl i64 %4, 5 - %add.i = add nsw i32 %1, 1 - %mul3.i.i = shl i64 %5, 3 - %mul16.i = mul nsw i32 %2, %1 - %7 = trunc i64 %mul3.i.i to i32 - %conv7.i = add i32 %add.i, %7 - %cmp.i = icmp slt i32 %conv7.i, %2 - %mul.i = mul nsw i32 %conv7.i, %2 - %add13.i = add nsw i32 %mul.i, %1 - %idxprom14.i = sext i32 %add13.i to i64 - %arrayidx15.i = getelementptr inbounds float, float* %0, i64 %idxprom14.i - br i1 %cmp.i, label %vector.scevcheck, label %pregion_for_end.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i - %8 = mul i32 %conv7.i, %2 - %9 = add i32 %8, %1 - %10 = trunc i64 %4 to i32 - %11 = shl i32 %10, 5 - %12 = add i32 %9, %11 - %13 = add i32 %12, 1 - %14 = add i32 %12, 32 - %15 = icmp slt i32 %14, %13 - %16 = add i32 %2, 1 - %17 = mul i32 %16, %1 - %18 = add i32 %17, %11 - %19 = add i32 %18, 1 - %20 = add i32 %18, 32 - %21 = icmp slt i32 %20, %19 - %22 = or i1 %15, %21 - br i1 %22, label %pregion_for_entry.entry.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.us - -vector.memcheck: ; preds = %vector.scevcheck - %23 = mul i32 %conv7.i, %2 - %24 = add i32 %23, %1 - %25 = trunc i64 %4 to i32 - %26 = shl i32 %25, 5 - %27 = add i32 %24, %26 - %28 = add i32 %27, 1 - %29 = sext i32 %28 to i64 - %scevgep = getelementptr float, float* %0, i64 %29 - %scevgep6 = bitcast float* %scevgep to i8* - %30 = add nsw i64 %29, 32 - %scevgep7 = getelementptr float, float* %0, i64 %30 - %scevgep9 = getelementptr float, float* %0, i64 %idxprom14.i - %scevgep910 = bitcast float* %scevgep9 to i8* - %uglygep = getelementptr i8, i8* %scevgep910, i64 1 - %31 = add i32 %2, 1 - %32 = mul i32 %31, %1 - %33 = add i32 %32, %26 - %34 = add i32 %33, 1 - %35 = sext i32 %34 to i64 - %scevgep11 = getelementptr float, float* %0, i64 %35 - %36 = add nsw i64 %35, 32 - %scevgep13 = getelementptr float, float* %0, i64 %36 - %bound0 = icmp ugt i8* %uglygep, %scevgep6 - %bound1 = icmp ult float* %arrayidx15.i, %scevgep7 - %found.conflict = and i1 %bound0, %bound1 - %bound015 = icmp ult float* %scevgep, %scevgep13 - %bound116 = icmp ult float* %scevgep11, %scevgep7 - %found.conflict17 = and i1 %bound015, %bound116 - %conflict.rdx = or i1 %found.conflict, %found.conflict17 - br i1 %conflict.rdx, label %pregion_for_entry.entry.i.us.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert18 = insertelement <8 x i32> undef, i32 %add.i, i32 0 - %broadcast.splat19 = shufflevector <8 x i32> %broadcast.splatinsert18, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert20 = insertelement <8 x i32> undef, i32 %2, i32 0 - %broadcast.splat21 = shufflevector <8 x i32> %broadcast.splatinsert20, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert22 = insertelement <8 x float*> undef, float* %arrayidx15.i, i32 0 - %broadcast.splat23 = shufflevector <8 x float*> %broadcast.splatinsert22, <8 x float*> undef, <8 x i32> zeroinitializer - %37 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %38 = or <8 x i32> %37, - %39 = add <8 x i32> %broadcast.splat19, %38 - %40 = icmp slt <8 x i32> %39, %broadcast.splat21 - %41 = extractelement <8 x i32> %39, i32 0 - %42 = add nsw i32 %41, %mul.i - %43 = sext i32 %42 to i64 - %44 = getelementptr inbounds float, float* %0, i64 %43 - %45 = bitcast float* %44 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %45, i32 4, <8 x i1> %40, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %wide.masked.gather = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat23, i32 4, <8 x i1> %40, <8 x float> undef), !tbaa !12, !alias.scope !22 - %46 = add nsw i32 %41, %mul16.i - %47 = sext i32 %46 to i64 - %48 = getelementptr inbounds float, float* %0, i64 %47 - %49 = bitcast float* %48 to <8 x float>* - %wide.masked.load24 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %49, i32 4, <8 x i1> %40, <8 x float> undef), !tbaa !12, !alias.scope !23 - %50 = fneg <8 x float> %wide.masked.gather - %51 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %50, <8 x float> %wide.masked.load24, <8 x float> %wide.masked.load) - %52 = bitcast float* %44 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %51, <8 x float>* %52, i32 4, <8 x i1> %40), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !24 - %53 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %54 = or <8 x i32> %53, - %55 = add <8 x i32> %broadcast.splat19, %54 - %56 = icmp slt <8 x i32> %55, %broadcast.splat21 - %57 = extractelement <8 x i32> %55, i32 0 - %58 = add nsw i32 %57, %mul.i - %59 = sext i32 %58 to i64 - %60 = getelementptr inbounds float, float* %0, i64 %59 - %61 = bitcast float* %60 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %61, i32 4, <8 x i1> %56, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %wide.masked.gather.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat23, i32 4, <8 x i1> %56, <8 x float> undef), !tbaa !12, !alias.scope !22 - %62 = add nsw i32 %57, %mul16.i - %63 = sext i32 %62 to i64 - %64 = getelementptr inbounds float, float* %0, i64 %63 - %65 = bitcast float* %64 to <8 x float>* - %wide.masked.load24.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %65, i32 4, <8 x i1> %56, <8 x float> undef), !tbaa !12, !alias.scope !23 - %66 = fneg <8 x float> %wide.masked.gather.1 - %67 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %66, <8 x float> %wide.masked.load24.1, <8 x float> %wide.masked.load.1) - %68 = bitcast float* %60 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %67, <8 x float>* %68, i32 4, <8 x i1> %56), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !24 - %69 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %70 = or <8 x i32> %69, - %71 = add <8 x i32> %broadcast.splat19, %70 - %72 = icmp slt <8 x i32> %71, %broadcast.splat21 - %73 = extractelement <8 x i32> %71, i32 0 - %74 = add nsw i32 %73, %mul.i - %75 = sext i32 %74 to i64 - %76 = getelementptr inbounds float, float* %0, i64 %75 - %77 = bitcast float* %76 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %77, i32 4, <8 x i1> %72, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %wide.masked.gather.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat23, i32 4, <8 x i1> %72, <8 x float> undef), !tbaa !12, !alias.scope !22 - %78 = add nsw i32 %73, %mul16.i - %79 = sext i32 %78 to i64 - %80 = getelementptr inbounds float, float* %0, i64 %79 - %81 = bitcast float* %80 to <8 x float>* - %wide.masked.load24.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %81, i32 4, <8 x i1> %72, <8 x float> undef), !tbaa !12, !alias.scope !23 - %82 = fneg <8 x float> %wide.masked.gather.2 - %83 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %82, <8 x float> %wide.masked.load24.2, <8 x float> %wide.masked.load.2) - %84 = bitcast float* %76 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %83, <8 x float>* %84, i32 4, <8 x i1> %72), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !24 - %85 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %86 = or <8 x i32> %85, - %87 = add <8 x i32> %broadcast.splat19, %86 - %88 = icmp slt <8 x i32> %87, %broadcast.splat21 - %89 = extractelement <8 x i32> %87, i32 0 - %90 = add nsw i32 %89, %mul.i - %91 = sext i32 %90 to i64 - %92 = getelementptr inbounds float, float* %0, i64 %91 - %93 = bitcast float* %92 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %93, i32 4, <8 x i1> %88, <8 x float> undef), !tbaa !12, !alias.scope !16, !noalias !19 - %wide.masked.gather.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat23, i32 4, <8 x i1> %88, <8 x float> undef), !tbaa !12, !alias.scope !22 - %94 = add nsw i32 %89, %mul16.i - %95 = sext i32 %94 to i64 - %96 = getelementptr inbounds float, float* %0, i64 %95 - %97 = bitcast float* %96 to <8 x float>* - %wide.masked.load24.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %97, i32 4, <8 x i1> %88, <8 x float> undef), !tbaa !12, !alias.scope !23 - %98 = fneg <8 x float> %wide.masked.gather.3 - %99 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %98, <8 x float> %wide.masked.load24.3, <8 x float> %wide.masked.load.3) - %100 = bitcast float* %92 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %99, <8 x float>* %100, i32 4, <8 x i1> %88), !tbaa !12, !alias.scope !16, !noalias !19, !llvm.access.group !24 - br label %pregion_for_end.i - -pregion_for_entry.entry.i.us: ; preds = %if.end.r_exit.i.us.1403, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ 0, %pregion_for_entry.entry.i.us.preheader ], [ %861, %if.end.r_exit.i.us.1403 ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %101 = trunc i64 %add1.i.i.us to i32 - %conv2.i.us = add i32 %add.i, %101 - %cmp9.i.us = icmp slt i32 %conv2.i.us, %2 - br i1 %cmp9.i.us, label %if.then.i.us, label %if.end.r_exit.i.us - -if.then.i.us: ; preds = %pregion_for_entry.entry.i.us - %add11.i.us = add nsw i32 %conv2.i.us, %mul.i - %idxprom.i.us = sext i32 %add11.i.us to i64 - %arrayidx.i.us = getelementptr inbounds float, float* %0, i64 %idxprom.i.us - %102 = load float, float* %arrayidx.i.us, align 4, !tbaa !12 - %103 = load float, float* %arrayidx15.i, align 4, !tbaa !12 - %add17.i.us = add nsw i32 %conv2.i.us, %mul16.i - %idxprom18.i.us = sext i32 %add17.i.us to i64 - %arrayidx19.i.us = getelementptr inbounds float, float* %0, i64 %idxprom18.i.us - %104 = load float, float* %arrayidx19.i.us, align 4, !tbaa !12 - %neg.i.us = fneg float %103 - %105 = tail call float @llvm.fmuladd.f32(float %neg.i.us, float %104, float %102) #6 - store float %105, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.us - -if.end.r_exit.i.us: ; preds = %if.then.i.us, %pregion_for_entry.entry.i.us - %106 = or i64 %_local_id_x.0.us, 1 - %add1.i.i.us.1391 = add nuw nsw i64 %106, %mul.i.i - %107 = trunc i64 %add1.i.i.us.1391 to i32 - %conv2.i.us.1392 = add i32 %add.i, %107 - %cmp9.i.us.1393 = icmp slt i32 %conv2.i.us.1392, %2 - br i1 %cmp9.i.us.1393, label %if.then.i.us.1402, label %if.end.r_exit.i.us.1403 - -pregion_for_end.i.loopexit: ; preds = %if.end.r_exit.i.us.1403 - br label %pregion_for_end.i - -pregion_for_end.i: ; preds = %pregion_for_end.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i - %108 = trunc i64 %mul3.i.i to i32 - %109 = or i32 %108, 1 - %conv7.i.1 = add i32 %add.i, %109 - %cmp.i.1 = icmp slt i32 %conv7.i.1, %2 - %mul.i.1 = mul nsw i32 %conv7.i.1, %2 - %add13.i.1 = add nsw i32 %mul.i.1, %1 - %idxprom14.i.1 = sext i32 %add13.i.1 to i64 - %arrayidx15.i.1 = getelementptr inbounds float, float* %0, i64 %idxprom14.i.1 - br i1 %cmp.i.1, label %vector.scevcheck35, label %pregion_for_end.i.1 - -vector.scevcheck35: ; preds = %pregion_for_end.i - %110 = mul i32 %conv7.i.1, %2 - %111 = add i32 %110, %1 - %112 = trunc i64 %4 to i32 - %113 = shl i32 %112, 5 - %114 = add i32 %111, %113 - %115 = add i32 %114, 1 - %116 = add i32 %114, 32 - %117 = icmp slt i32 %116, %115 - %118 = add i32 %2, 1 - %119 = mul i32 %118, %1 - %120 = add i32 %119, %113 - %121 = add i32 %120, 1 - %122 = add i32 %120, 32 - %123 = icmp slt i32 %122, %121 - %124 = or i1 %117, %123 - br i1 %124, label %pregion_for_entry.entry.i.us.1.preheader, label %vector.memcheck57 - -pregion_for_entry.entry.i.us.1.preheader: ; preds = %vector.memcheck57, %vector.scevcheck35 - br label %pregion_for_entry.entry.i.us.1 - -vector.memcheck57: ; preds = %vector.scevcheck35 - %125 = mul i32 %conv7.i.1, %2 - %126 = add i32 %125, %1 - %127 = trunc i64 %4 to i32 - %128 = shl i32 %127, 5 - %129 = add i32 %126, %128 - %130 = add i32 %129, 1 - %131 = sext i32 %130 to i64 - %scevgep37 = getelementptr float, float* %0, i64 %131 - %scevgep3738 = bitcast float* %scevgep37 to i8* - %132 = add nsw i64 %131, 32 - %scevgep39 = getelementptr float, float* %0, i64 %132 - %scevgep41 = getelementptr float, float* %0, i64 %idxprom14.i.1 - %scevgep4142 = bitcast float* %scevgep41 to i8* - %uglygep43 = getelementptr i8, i8* %scevgep4142, i64 1 - %133 = add i32 %2, 1 - %134 = mul i32 %133, %1 - %135 = add i32 %134, %128 - %136 = add i32 %135, 1 - %137 = sext i32 %136 to i64 - %scevgep44 = getelementptr float, float* %0, i64 %137 - %138 = add nsw i64 %137, 32 - %scevgep46 = getelementptr float, float* %0, i64 %138 - %bound049 = icmp ugt i8* %uglygep43, %scevgep3738 - %bound150 = icmp ult float* %arrayidx15.i.1, %scevgep39 - %found.conflict51 = and i1 %bound049, %bound150 - %bound052 = icmp ult float* %scevgep37, %scevgep46 - %bound153 = icmp ult float* %scevgep44, %scevgep39 - %found.conflict54 = and i1 %bound052, %bound153 - %conflict.rdx55 = or i1 %found.conflict51, %found.conflict54 - br i1 %conflict.rdx55, label %pregion_for_entry.entry.i.us.1.preheader, label %vector.ph58 - -vector.ph58: ; preds = %vector.memcheck57 - %broadcast.splatinsert65 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat66 = shufflevector <8 x i64> %broadcast.splatinsert65, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert67 = insertelement <8 x i32> undef, i32 %add.i, i32 0 - %broadcast.splat68 = shufflevector <8 x i32> %broadcast.splatinsert67, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert69 = insertelement <8 x i32> undef, i32 %2, i32 0 - %broadcast.splat70 = shufflevector <8 x i32> %broadcast.splatinsert69, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert72 = insertelement <8 x float*> undef, float* %arrayidx15.i.1, i32 0 - %broadcast.splat73 = shufflevector <8 x float*> %broadcast.splatinsert72, <8 x float*> undef, <8 x i32> zeroinitializer - %139 = trunc <8 x i64> %broadcast.splat66 to <8 x i32> - %140 = or <8 x i32> %139, - %141 = add <8 x i32> %broadcast.splat68, %140 - %142 = icmp slt <8 x i32> %141, %broadcast.splat70 - %143 = extractelement <8 x i32> %141, i32 0 - %144 = add nsw i32 %143, %mul.i.1 - %145 = sext i32 %144 to i64 - %146 = getelementptr inbounds float, float* %0, i64 %145 - %147 = bitcast float* %146 to <8 x float>* - %wide.masked.load71 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %147, i32 4, <8 x i1> %142, <8 x float> undef), !tbaa !12, !alias.scope !27, !noalias !30 - %wide.masked.gather74 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat73, i32 4, <8 x i1> %142, <8 x float> undef), !tbaa !12, !alias.scope !33 - %148 = add nsw i32 %143, %mul16.i - %149 = sext i32 %148 to i64 - %150 = getelementptr inbounds float, float* %0, i64 %149 - %151 = bitcast float* %150 to <8 x float>* - %wide.masked.load75 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %151, i32 4, <8 x i1> %142, <8 x float> undef), !tbaa !12, !alias.scope !34 - %152 = fneg <8 x float> %wide.masked.gather74 - %153 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %152, <8 x float> %wide.masked.load75, <8 x float> %wide.masked.load71) - %154 = bitcast float* %146 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %153, <8 x float>* %154, i32 4, <8 x i1> %142), !tbaa !12, !alias.scope !27, !noalias !30, !llvm.access.group !24 - %155 = trunc <8 x i64> %broadcast.splat66 to <8 x i32> - %156 = or <8 x i32> %155, - %157 = add <8 x i32> %broadcast.splat68, %156 - %158 = icmp slt <8 x i32> %157, %broadcast.splat70 - %159 = extractelement <8 x i32> %157, i32 0 - %160 = add nsw i32 %159, %mul.i.1 - %161 = sext i32 %160 to i64 - %162 = getelementptr inbounds float, float* %0, i64 %161 - %163 = bitcast float* %162 to <8 x float>* - %wide.masked.load71.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %163, i32 4, <8 x i1> %158, <8 x float> undef), !tbaa !12, !alias.scope !27, !noalias !30 - %wide.masked.gather74.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat73, i32 4, <8 x i1> %158, <8 x float> undef), !tbaa !12, !alias.scope !33 - %164 = add nsw i32 %159, %mul16.i - %165 = sext i32 %164 to i64 - %166 = getelementptr inbounds float, float* %0, i64 %165 - %167 = bitcast float* %166 to <8 x float>* - %wide.masked.load75.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %167, i32 4, <8 x i1> %158, <8 x float> undef), !tbaa !12, !alias.scope !34 - %168 = fneg <8 x float> %wide.masked.gather74.1 - %169 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %168, <8 x float> %wide.masked.load75.1, <8 x float> %wide.masked.load71.1) - %170 = bitcast float* %162 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %169, <8 x float>* %170, i32 4, <8 x i1> %158), !tbaa !12, !alias.scope !27, !noalias !30, !llvm.access.group !24 - %171 = trunc <8 x i64> %broadcast.splat66 to <8 x i32> - %172 = or <8 x i32> %171, - %173 = add <8 x i32> %broadcast.splat68, %172 - %174 = icmp slt <8 x i32> %173, %broadcast.splat70 - %175 = extractelement <8 x i32> %173, i32 0 - %176 = add nsw i32 %175, %mul.i.1 - %177 = sext i32 %176 to i64 - %178 = getelementptr inbounds float, float* %0, i64 %177 - %179 = bitcast float* %178 to <8 x float>* - %wide.masked.load71.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %179, i32 4, <8 x i1> %174, <8 x float> undef), !tbaa !12, !alias.scope !27, !noalias !30 - %wide.masked.gather74.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat73, i32 4, <8 x i1> %174, <8 x float> undef), !tbaa !12, !alias.scope !33 - %180 = add nsw i32 %175, %mul16.i - %181 = sext i32 %180 to i64 - %182 = getelementptr inbounds float, float* %0, i64 %181 - %183 = bitcast float* %182 to <8 x float>* - %wide.masked.load75.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %183, i32 4, <8 x i1> %174, <8 x float> undef), !tbaa !12, !alias.scope !34 - %184 = fneg <8 x float> %wide.masked.gather74.2 - %185 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %184, <8 x float> %wide.masked.load75.2, <8 x float> %wide.masked.load71.2) - %186 = bitcast float* %178 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %185, <8 x float>* %186, i32 4, <8 x i1> %174), !tbaa !12, !alias.scope !27, !noalias !30, !llvm.access.group !24 - %187 = trunc <8 x i64> %broadcast.splat66 to <8 x i32> - %188 = or <8 x i32> %187, - %189 = add <8 x i32> %broadcast.splat68, %188 - %190 = icmp slt <8 x i32> %189, %broadcast.splat70 - %191 = extractelement <8 x i32> %189, i32 0 - %192 = add nsw i32 %191, %mul.i.1 - %193 = sext i32 %192 to i64 - %194 = getelementptr inbounds float, float* %0, i64 %193 - %195 = bitcast float* %194 to <8 x float>* - %wide.masked.load71.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %195, i32 4, <8 x i1> %190, <8 x float> undef), !tbaa !12, !alias.scope !27, !noalias !30 - %wide.masked.gather74.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat73, i32 4, <8 x i1> %190, <8 x float> undef), !tbaa !12, !alias.scope !33 - %196 = add nsw i32 %191, %mul16.i - %197 = sext i32 %196 to i64 - %198 = getelementptr inbounds float, float* %0, i64 %197 - %199 = bitcast float* %198 to <8 x float>* - %wide.masked.load75.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %199, i32 4, <8 x i1> %190, <8 x float> undef), !tbaa !12, !alias.scope !34 - %200 = fneg <8 x float> %wide.masked.gather74.3 - %201 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %200, <8 x float> %wide.masked.load75.3, <8 x float> %wide.masked.load71.3) - %202 = bitcast float* %194 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %201, <8 x float>* %202, i32 4, <8 x i1> %190), !tbaa !12, !alias.scope !27, !noalias !30, !llvm.access.group !24 - br label %pregion_for_end.i.1 - -pregion_for_entry.entry.i.us.1: ; preds = %if.end.r_exit.i.us.1.1, %pregion_for_entry.entry.i.us.1.preheader - %_local_id_x.0.us.1 = phi i64 [ 0, %pregion_for_entry.entry.i.us.1.preheader ], [ %856, %if.end.r_exit.i.us.1.1 ] - %add1.i.i.us.1 = add nuw nsw i64 %_local_id_x.0.us.1, %mul.i.i - %203 = trunc i64 %add1.i.i.us.1 to i32 - %conv2.i.us.1 = add i32 %add.i, %203 - %cmp9.i.us.1 = icmp slt i32 %conv2.i.us.1, %2 - br i1 %cmp9.i.us.1, label %if.then.i.us.1, label %if.end.r_exit.i.us.1 - -if.then.i.us.1: ; preds = %pregion_for_entry.entry.i.us.1 - %add11.i.us.1 = add nsw i32 %conv2.i.us.1, %mul.i.1 - %idxprom.i.us.1 = sext i32 %add11.i.us.1 to i64 - %arrayidx.i.us.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.1 - %204 = load float, float* %arrayidx.i.us.1, align 4, !tbaa !12 - %205 = load float, float* %arrayidx15.i.1, align 4, !tbaa !12 - %add17.i.us.1 = add nsw i32 %conv2.i.us.1, %mul16.i - %idxprom18.i.us.1 = sext i32 %add17.i.us.1 to i64 - %arrayidx19.i.us.1 = getelementptr inbounds float, float* %0, i64 %idxprom18.i.us.1 - %206 = load float, float* %arrayidx19.i.us.1, align 4, !tbaa !12 - %neg.i.us.1 = fneg float %205 - %207 = tail call float @llvm.fmuladd.f32(float %neg.i.us.1, float %206, float %204) #6 - store float %207, float* %arrayidx.i.us.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.us.1 - -if.end.r_exit.i.us.1: ; preds = %if.then.i.us.1, %pregion_for_entry.entry.i.us.1 - %208 = or i64 %_local_id_x.0.us.1, 1 - %add1.i.i.us.1.1 = add nuw nsw i64 %208, %mul.i.i - %209 = trunc i64 %add1.i.i.us.1.1 to i32 - %conv2.i.us.1.1 = add i32 %add.i, %209 - %cmp9.i.us.1.1 = icmp slt i32 %conv2.i.us.1.1, %2 - br i1 %cmp9.i.us.1.1, label %if.then.i.us.1.1, label %if.end.r_exit.i.us.1.1 - -pregion_for_end.i.1.loopexit: ; preds = %if.end.r_exit.i.us.1.1 - br label %pregion_for_end.i.1 - -pregion_for_end.i.1: ; preds = %pregion_for_end.i.1.loopexit, %vector.ph58, %pregion_for_end.i - %210 = trunc i64 %mul3.i.i to i32 - %211 = or i32 %210, 2 - %conv7.i.2 = add i32 %add.i, %211 - %cmp.i.2 = icmp slt i32 %conv7.i.2, %2 - %mul.i.2 = mul nsw i32 %conv7.i.2, %2 - %add13.i.2 = add nsw i32 %mul.i.2, %1 - %idxprom14.i.2 = sext i32 %add13.i.2 to i64 - %arrayidx15.i.2 = getelementptr inbounds float, float* %0, i64 %idxprom14.i.2 - br i1 %cmp.i.2, label %vector.scevcheck86, label %pregion_for_end.i.2 - -vector.scevcheck86: ; preds = %pregion_for_end.i.1 - %212 = mul i32 %conv7.i.2, %2 - %213 = add i32 %212, %1 - %214 = trunc i64 %4 to i32 - %215 = shl i32 %214, 5 - %216 = add i32 %213, %215 - %217 = add i32 %216, 1 - %218 = add i32 %216, 32 - %219 = icmp slt i32 %218, %217 - %220 = add i32 %2, 1 - %221 = mul i32 %220, %1 - %222 = add i32 %221, %215 - %223 = add i32 %222, 1 - %224 = add i32 %222, 32 - %225 = icmp slt i32 %224, %223 - %226 = or i1 %219, %225 - br i1 %226, label %pregion_for_entry.entry.i.us.2.preheader, label %vector.memcheck108 - -pregion_for_entry.entry.i.us.2.preheader: ; preds = %vector.memcheck108, %vector.scevcheck86 - br label %pregion_for_entry.entry.i.us.2 - -vector.memcheck108: ; preds = %vector.scevcheck86 - %227 = mul i32 %conv7.i.2, %2 - %228 = add i32 %227, %1 - %229 = trunc i64 %4 to i32 - %230 = shl i32 %229, 5 - %231 = add i32 %228, %230 - %232 = add i32 %231, 1 - %233 = sext i32 %232 to i64 - %scevgep88 = getelementptr float, float* %0, i64 %233 - %scevgep8889 = bitcast float* %scevgep88 to i8* - %234 = add nsw i64 %233, 32 - %scevgep90 = getelementptr float, float* %0, i64 %234 - %scevgep92 = getelementptr float, float* %0, i64 %idxprom14.i.2 - %scevgep9293 = bitcast float* %scevgep92 to i8* - %uglygep94 = getelementptr i8, i8* %scevgep9293, i64 1 - %235 = add i32 %2, 1 - %236 = mul i32 %235, %1 - %237 = add i32 %236, %230 - %238 = add i32 %237, 1 - %239 = sext i32 %238 to i64 - %scevgep95 = getelementptr float, float* %0, i64 %239 - %240 = add nsw i64 %239, 32 - %scevgep97 = getelementptr float, float* %0, i64 %240 - %bound0100 = icmp ugt i8* %uglygep94, %scevgep8889 - %bound1101 = icmp ult float* %arrayidx15.i.2, %scevgep90 - %found.conflict102 = and i1 %bound0100, %bound1101 - %bound0103 = icmp ult float* %scevgep88, %scevgep97 - %bound1104 = icmp ult float* %scevgep95, %scevgep90 - %found.conflict105 = and i1 %bound0103, %bound1104 - %conflict.rdx106 = or i1 %found.conflict102, %found.conflict105 - br i1 %conflict.rdx106, label %pregion_for_entry.entry.i.us.2.preheader, label %vector.ph109 - -vector.ph109: ; preds = %vector.memcheck108 - %broadcast.splatinsert116 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat117 = shufflevector <8 x i64> %broadcast.splatinsert116, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert118 = insertelement <8 x i32> undef, i32 %add.i, i32 0 - %broadcast.splat119 = shufflevector <8 x i32> %broadcast.splatinsert118, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert120 = insertelement <8 x i32> undef, i32 %2, i32 0 - %broadcast.splat121 = shufflevector <8 x i32> %broadcast.splatinsert120, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert123 = insertelement <8 x float*> undef, float* %arrayidx15.i.2, i32 0 - %broadcast.splat124 = shufflevector <8 x float*> %broadcast.splatinsert123, <8 x float*> undef, <8 x i32> zeroinitializer - %241 = trunc <8 x i64> %broadcast.splat117 to <8 x i32> - %242 = or <8 x i32> %241, - %243 = add <8 x i32> %broadcast.splat119, %242 - %244 = icmp slt <8 x i32> %243, %broadcast.splat121 - %245 = extractelement <8 x i32> %243, i32 0 - %246 = add nsw i32 %245, %mul.i.2 - %247 = sext i32 %246 to i64 - %248 = getelementptr inbounds float, float* %0, i64 %247 - %249 = bitcast float* %248 to <8 x float>* - %wide.masked.load122 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %249, i32 4, <8 x i1> %244, <8 x float> undef), !tbaa !12, !alias.scope !35, !noalias !38 - %wide.masked.gather125 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat124, i32 4, <8 x i1> %244, <8 x float> undef), !tbaa !12, !alias.scope !41 - %250 = add nsw i32 %245, %mul16.i - %251 = sext i32 %250 to i64 - %252 = getelementptr inbounds float, float* %0, i64 %251 - %253 = bitcast float* %252 to <8 x float>* - %wide.masked.load126 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %253, i32 4, <8 x i1> %244, <8 x float> undef), !tbaa !12, !alias.scope !42 - %254 = fneg <8 x float> %wide.masked.gather125 - %255 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %254, <8 x float> %wide.masked.load126, <8 x float> %wide.masked.load122) - %256 = bitcast float* %248 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %255, <8 x float>* %256, i32 4, <8 x i1> %244), !tbaa !12, !alias.scope !35, !noalias !38, !llvm.access.group !24 - %257 = trunc <8 x i64> %broadcast.splat117 to <8 x i32> - %258 = or <8 x i32> %257, - %259 = add <8 x i32> %broadcast.splat119, %258 - %260 = icmp slt <8 x i32> %259, %broadcast.splat121 - %261 = extractelement <8 x i32> %259, i32 0 - %262 = add nsw i32 %261, %mul.i.2 - %263 = sext i32 %262 to i64 - %264 = getelementptr inbounds float, float* %0, i64 %263 - %265 = bitcast float* %264 to <8 x float>* - %wide.masked.load122.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %265, i32 4, <8 x i1> %260, <8 x float> undef), !tbaa !12, !alias.scope !35, !noalias !38 - %wide.masked.gather125.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat124, i32 4, <8 x i1> %260, <8 x float> undef), !tbaa !12, !alias.scope !41 - %266 = add nsw i32 %261, %mul16.i - %267 = sext i32 %266 to i64 - %268 = getelementptr inbounds float, float* %0, i64 %267 - %269 = bitcast float* %268 to <8 x float>* - %wide.masked.load126.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %269, i32 4, <8 x i1> %260, <8 x float> undef), !tbaa !12, !alias.scope !42 - %270 = fneg <8 x float> %wide.masked.gather125.1 - %271 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %270, <8 x float> %wide.masked.load126.1, <8 x float> %wide.masked.load122.1) - %272 = bitcast float* %264 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %271, <8 x float>* %272, i32 4, <8 x i1> %260), !tbaa !12, !alias.scope !35, !noalias !38, !llvm.access.group !24 - %273 = trunc <8 x i64> %broadcast.splat117 to <8 x i32> - %274 = or <8 x i32> %273, - %275 = add <8 x i32> %broadcast.splat119, %274 - %276 = icmp slt <8 x i32> %275, %broadcast.splat121 - %277 = extractelement <8 x i32> %275, i32 0 - %278 = add nsw i32 %277, %mul.i.2 - %279 = sext i32 %278 to i64 - %280 = getelementptr inbounds float, float* %0, i64 %279 - %281 = bitcast float* %280 to <8 x float>* - %wide.masked.load122.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %281, i32 4, <8 x i1> %276, <8 x float> undef), !tbaa !12, !alias.scope !35, !noalias !38 - %wide.masked.gather125.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat124, i32 4, <8 x i1> %276, <8 x float> undef), !tbaa !12, !alias.scope !41 - %282 = add nsw i32 %277, %mul16.i - %283 = sext i32 %282 to i64 - %284 = getelementptr inbounds float, float* %0, i64 %283 - %285 = bitcast float* %284 to <8 x float>* - %wide.masked.load126.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %285, i32 4, <8 x i1> %276, <8 x float> undef), !tbaa !12, !alias.scope !42 - %286 = fneg <8 x float> %wide.masked.gather125.2 - %287 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %286, <8 x float> %wide.masked.load126.2, <8 x float> %wide.masked.load122.2) - %288 = bitcast float* %280 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %287, <8 x float>* %288, i32 4, <8 x i1> %276), !tbaa !12, !alias.scope !35, !noalias !38, !llvm.access.group !24 - %289 = trunc <8 x i64> %broadcast.splat117 to <8 x i32> - %290 = or <8 x i32> %289, - %291 = add <8 x i32> %broadcast.splat119, %290 - %292 = icmp slt <8 x i32> %291, %broadcast.splat121 - %293 = extractelement <8 x i32> %291, i32 0 - %294 = add nsw i32 %293, %mul.i.2 - %295 = sext i32 %294 to i64 - %296 = getelementptr inbounds float, float* %0, i64 %295 - %297 = bitcast float* %296 to <8 x float>* - %wide.masked.load122.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %297, i32 4, <8 x i1> %292, <8 x float> undef), !tbaa !12, !alias.scope !35, !noalias !38 - %wide.masked.gather125.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat124, i32 4, <8 x i1> %292, <8 x float> undef), !tbaa !12, !alias.scope !41 - %298 = add nsw i32 %293, %mul16.i - %299 = sext i32 %298 to i64 - %300 = getelementptr inbounds float, float* %0, i64 %299 - %301 = bitcast float* %300 to <8 x float>* - %wide.masked.load126.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %301, i32 4, <8 x i1> %292, <8 x float> undef), !tbaa !12, !alias.scope !42 - %302 = fneg <8 x float> %wide.masked.gather125.3 - %303 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %302, <8 x float> %wide.masked.load126.3, <8 x float> %wide.masked.load122.3) - %304 = bitcast float* %296 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %303, <8 x float>* %304, i32 4, <8 x i1> %292), !tbaa !12, !alias.scope !35, !noalias !38, !llvm.access.group !24 - br label %pregion_for_end.i.2 - -pregion_for_entry.entry.i.us.2: ; preds = %if.end.r_exit.i.us.2.1, %pregion_for_entry.entry.i.us.2.preheader - %_local_id_x.0.us.2 = phi i64 [ 0, %pregion_for_entry.entry.i.us.2.preheader ], [ %851, %if.end.r_exit.i.us.2.1 ] - %add1.i.i.us.2 = add nuw nsw i64 %_local_id_x.0.us.2, %mul.i.i - %305 = trunc i64 %add1.i.i.us.2 to i32 - %conv2.i.us.2 = add i32 %add.i, %305 - %cmp9.i.us.2 = icmp slt i32 %conv2.i.us.2, %2 - br i1 %cmp9.i.us.2, label %if.then.i.us.2, label %if.end.r_exit.i.us.2 - -if.then.i.us.2: ; preds = %pregion_for_entry.entry.i.us.2 - %add11.i.us.2 = add nsw i32 %conv2.i.us.2, %mul.i.2 - %idxprom.i.us.2 = sext i32 %add11.i.us.2 to i64 - %arrayidx.i.us.2 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.2 - %306 = load float, float* %arrayidx.i.us.2, align 4, !tbaa !12 - %307 = load float, float* %arrayidx15.i.2, align 4, !tbaa !12 - %add17.i.us.2 = add nsw i32 %conv2.i.us.2, %mul16.i - %idxprom18.i.us.2 = sext i32 %add17.i.us.2 to i64 - %arrayidx19.i.us.2 = getelementptr inbounds float, float* %0, i64 %idxprom18.i.us.2 - %308 = load float, float* %arrayidx19.i.us.2, align 4, !tbaa !12 - %neg.i.us.2 = fneg float %307 - %309 = tail call float @llvm.fmuladd.f32(float %neg.i.us.2, float %308, float %306) #6 - store float %309, float* %arrayidx.i.us.2, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.us.2 - -if.end.r_exit.i.us.2: ; preds = %if.then.i.us.2, %pregion_for_entry.entry.i.us.2 - %310 = or i64 %_local_id_x.0.us.2, 1 - %add1.i.i.us.2.1 = add nuw nsw i64 %310, %mul.i.i - %311 = trunc i64 %add1.i.i.us.2.1 to i32 - %conv2.i.us.2.1 = add i32 %add.i, %311 - %cmp9.i.us.2.1 = icmp slt i32 %conv2.i.us.2.1, %2 - br i1 %cmp9.i.us.2.1, label %if.then.i.us.2.1, label %if.end.r_exit.i.us.2.1 - -pregion_for_end.i.2.loopexit: ; preds = %if.end.r_exit.i.us.2.1 - br label %pregion_for_end.i.2 - -pregion_for_end.i.2: ; preds = %pregion_for_end.i.2.loopexit, %vector.ph109, %pregion_for_end.i.1 - %312 = trunc i64 %mul3.i.i to i32 - %313 = or i32 %312, 3 - %conv7.i.3 = add i32 %add.i, %313 - %cmp.i.3 = icmp slt i32 %conv7.i.3, %2 - %mul.i.3 = mul nsw i32 %conv7.i.3, %2 - %add13.i.3 = add nsw i32 %mul.i.3, %1 - %idxprom14.i.3 = sext i32 %add13.i.3 to i64 - %arrayidx15.i.3 = getelementptr inbounds float, float* %0, i64 %idxprom14.i.3 - br i1 %cmp.i.3, label %vector.scevcheck137, label %pregion_for_end.i.3 - -vector.scevcheck137: ; preds = %pregion_for_end.i.2 - %314 = mul i32 %conv7.i.3, %2 - %315 = add i32 %314, %1 - %316 = trunc i64 %4 to i32 - %317 = shl i32 %316, 5 - %318 = add i32 %315, %317 - %319 = add i32 %318, 1 - %320 = add i32 %318, 32 - %321 = icmp slt i32 %320, %319 - %322 = add i32 %2, 1 - %323 = mul i32 %322, %1 - %324 = add i32 %323, %317 - %325 = add i32 %324, 1 - %326 = add i32 %324, 32 - %327 = icmp slt i32 %326, %325 - %328 = or i1 %321, %327 - br i1 %328, label %pregion_for_entry.entry.i.us.3.preheader, label %vector.memcheck159 - -pregion_for_entry.entry.i.us.3.preheader: ; preds = %vector.memcheck159, %vector.scevcheck137 - br label %pregion_for_entry.entry.i.us.3 - -vector.memcheck159: ; preds = %vector.scevcheck137 - %329 = mul i32 %conv7.i.3, %2 - %330 = add i32 %329, %1 - %331 = trunc i64 %4 to i32 - %332 = shl i32 %331, 5 - %333 = add i32 %330, %332 - %334 = add i32 %333, 1 - %335 = sext i32 %334 to i64 - %scevgep139 = getelementptr float, float* %0, i64 %335 - %scevgep139140 = bitcast float* %scevgep139 to i8* - %336 = add nsw i64 %335, 32 - %scevgep141 = getelementptr float, float* %0, i64 %336 - %scevgep143 = getelementptr float, float* %0, i64 %idxprom14.i.3 - %scevgep143144 = bitcast float* %scevgep143 to i8* - %uglygep145 = getelementptr i8, i8* %scevgep143144, i64 1 - %337 = add i32 %2, 1 - %338 = mul i32 %337, %1 - %339 = add i32 %338, %332 - %340 = add i32 %339, 1 - %341 = sext i32 %340 to i64 - %scevgep146 = getelementptr float, float* %0, i64 %341 - %342 = add nsw i64 %341, 32 - %scevgep148 = getelementptr float, float* %0, i64 %342 - %bound0151 = icmp ugt i8* %uglygep145, %scevgep139140 - %bound1152 = icmp ult float* %arrayidx15.i.3, %scevgep141 - %found.conflict153 = and i1 %bound0151, %bound1152 - %bound0154 = icmp ult float* %scevgep139, %scevgep148 - %bound1155 = icmp ult float* %scevgep146, %scevgep141 - %found.conflict156 = and i1 %bound0154, %bound1155 - %conflict.rdx157 = or i1 %found.conflict153, %found.conflict156 - br i1 %conflict.rdx157, label %pregion_for_entry.entry.i.us.3.preheader, label %vector.ph160 - -vector.ph160: ; preds = %vector.memcheck159 - %broadcast.splatinsert167 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat168 = shufflevector <8 x i64> %broadcast.splatinsert167, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert169 = insertelement <8 x i32> undef, i32 %add.i, i32 0 - %broadcast.splat170 = shufflevector <8 x i32> %broadcast.splatinsert169, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert171 = insertelement <8 x i32> undef, i32 %2, i32 0 - %broadcast.splat172 = shufflevector <8 x i32> %broadcast.splatinsert171, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert174 = insertelement <8 x float*> undef, float* %arrayidx15.i.3, i32 0 - %broadcast.splat175 = shufflevector <8 x float*> %broadcast.splatinsert174, <8 x float*> undef, <8 x i32> zeroinitializer - %343 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %344 = or <8 x i32> %343, - %345 = add <8 x i32> %broadcast.splat170, %344 - %346 = icmp slt <8 x i32> %345, %broadcast.splat172 - %347 = extractelement <8 x i32> %345, i32 0 - %348 = add nsw i32 %347, %mul.i.3 - %349 = sext i32 %348 to i64 - %350 = getelementptr inbounds float, float* %0, i64 %349 - %351 = bitcast float* %350 to <8 x float>* - %wide.masked.load173 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %351, i32 4, <8 x i1> %346, <8 x float> undef), !tbaa !12, !alias.scope !43, !noalias !46 - %wide.masked.gather176 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat175, i32 4, <8 x i1> %346, <8 x float> undef), !tbaa !12, !alias.scope !49 - %352 = add nsw i32 %347, %mul16.i - %353 = sext i32 %352 to i64 - %354 = getelementptr inbounds float, float* %0, i64 %353 - %355 = bitcast float* %354 to <8 x float>* - %wide.masked.load177 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %355, i32 4, <8 x i1> %346, <8 x float> undef), !tbaa !12, !alias.scope !50 - %356 = fneg <8 x float> %wide.masked.gather176 - %357 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %356, <8 x float> %wide.masked.load177, <8 x float> %wide.masked.load173) - %358 = bitcast float* %350 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %357, <8 x float>* %358, i32 4, <8 x i1> %346), !tbaa !12, !alias.scope !43, !noalias !46, !llvm.access.group !24 - %359 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %360 = or <8 x i32> %359, - %361 = add <8 x i32> %broadcast.splat170, %360 - %362 = icmp slt <8 x i32> %361, %broadcast.splat172 - %363 = extractelement <8 x i32> %361, i32 0 - %364 = add nsw i32 %363, %mul.i.3 - %365 = sext i32 %364 to i64 - %366 = getelementptr inbounds float, float* %0, i64 %365 - %367 = bitcast float* %366 to <8 x float>* - %wide.masked.load173.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %367, i32 4, <8 x i1> %362, <8 x float> undef), !tbaa !12, !alias.scope !43, !noalias !46 - %wide.masked.gather176.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat175, i32 4, <8 x i1> %362, <8 x float> undef), !tbaa !12, !alias.scope !49 - %368 = add nsw i32 %363, %mul16.i - %369 = sext i32 %368 to i64 - %370 = getelementptr inbounds float, float* %0, i64 %369 - %371 = bitcast float* %370 to <8 x float>* - %wide.masked.load177.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %371, i32 4, <8 x i1> %362, <8 x float> undef), !tbaa !12, !alias.scope !50 - %372 = fneg <8 x float> %wide.masked.gather176.1 - %373 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %372, <8 x float> %wide.masked.load177.1, <8 x float> %wide.masked.load173.1) - %374 = bitcast float* %366 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %373, <8 x float>* %374, i32 4, <8 x i1> %362), !tbaa !12, !alias.scope !43, !noalias !46, !llvm.access.group !24 - %375 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %376 = or <8 x i32> %375, - %377 = add <8 x i32> %broadcast.splat170, %376 - %378 = icmp slt <8 x i32> %377, %broadcast.splat172 - %379 = extractelement <8 x i32> %377, i32 0 - %380 = add nsw i32 %379, %mul.i.3 - %381 = sext i32 %380 to i64 - %382 = getelementptr inbounds float, float* %0, i64 %381 - %383 = bitcast float* %382 to <8 x float>* - %wide.masked.load173.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %383, i32 4, <8 x i1> %378, <8 x float> undef), !tbaa !12, !alias.scope !43, !noalias !46 - %wide.masked.gather176.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat175, i32 4, <8 x i1> %378, <8 x float> undef), !tbaa !12, !alias.scope !49 - %384 = add nsw i32 %379, %mul16.i - %385 = sext i32 %384 to i64 - %386 = getelementptr inbounds float, float* %0, i64 %385 - %387 = bitcast float* %386 to <8 x float>* - %wide.masked.load177.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %387, i32 4, <8 x i1> %378, <8 x float> undef), !tbaa !12, !alias.scope !50 - %388 = fneg <8 x float> %wide.masked.gather176.2 - %389 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %388, <8 x float> %wide.masked.load177.2, <8 x float> %wide.masked.load173.2) - %390 = bitcast float* %382 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %389, <8 x float>* %390, i32 4, <8 x i1> %378), !tbaa !12, !alias.scope !43, !noalias !46, !llvm.access.group !24 - %391 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %392 = or <8 x i32> %391, - %393 = add <8 x i32> %broadcast.splat170, %392 - %394 = icmp slt <8 x i32> %393, %broadcast.splat172 - %395 = extractelement <8 x i32> %393, i32 0 - %396 = add nsw i32 %395, %mul.i.3 - %397 = sext i32 %396 to i64 - %398 = getelementptr inbounds float, float* %0, i64 %397 - %399 = bitcast float* %398 to <8 x float>* - %wide.masked.load173.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %399, i32 4, <8 x i1> %394, <8 x float> undef), !tbaa !12, !alias.scope !43, !noalias !46 - %wide.masked.gather176.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat175, i32 4, <8 x i1> %394, <8 x float> undef), !tbaa !12, !alias.scope !49 - %400 = add nsw i32 %395, %mul16.i - %401 = sext i32 %400 to i64 - %402 = getelementptr inbounds float, float* %0, i64 %401 - %403 = bitcast float* %402 to <8 x float>* - %wide.masked.load177.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %403, i32 4, <8 x i1> %394, <8 x float> undef), !tbaa !12, !alias.scope !50 - %404 = fneg <8 x float> %wide.masked.gather176.3 - %405 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %404, <8 x float> %wide.masked.load177.3, <8 x float> %wide.masked.load173.3) - %406 = bitcast float* %398 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %405, <8 x float>* %406, i32 4, <8 x i1> %394), !tbaa !12, !alias.scope !43, !noalias !46, !llvm.access.group !24 - br label %pregion_for_end.i.3 - -pregion_for_entry.entry.i.us.3: ; preds = %if.end.r_exit.i.us.3.1, %pregion_for_entry.entry.i.us.3.preheader - %_local_id_x.0.us.3 = phi i64 [ 0, %pregion_for_entry.entry.i.us.3.preheader ], [ %846, %if.end.r_exit.i.us.3.1 ] - %add1.i.i.us.3 = add nuw nsw i64 %_local_id_x.0.us.3, %mul.i.i - %407 = trunc i64 %add1.i.i.us.3 to i32 - %conv2.i.us.3 = add i32 %add.i, %407 - %cmp9.i.us.3 = icmp slt i32 %conv2.i.us.3, %2 - br i1 %cmp9.i.us.3, label %if.then.i.us.3, label %if.end.r_exit.i.us.3 - -if.then.i.us.3: ; preds = %pregion_for_entry.entry.i.us.3 - %add11.i.us.3 = add nsw i32 %conv2.i.us.3, %mul.i.3 - %idxprom.i.us.3 = sext i32 %add11.i.us.3 to i64 - %arrayidx.i.us.3 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.3 - %408 = load float, float* %arrayidx.i.us.3, align 4, !tbaa !12 - %409 = load float, float* %arrayidx15.i.3, align 4, !tbaa !12 - %add17.i.us.3 = add nsw i32 %conv2.i.us.3, %mul16.i - %idxprom18.i.us.3 = sext i32 %add17.i.us.3 to i64 - %arrayidx19.i.us.3 = getelementptr inbounds float, float* %0, i64 %idxprom18.i.us.3 - %410 = load float, float* %arrayidx19.i.us.3, align 4, !tbaa !12 - %neg.i.us.3 = fneg float %409 - %411 = tail call float @llvm.fmuladd.f32(float %neg.i.us.3, float %410, float %408) #6 - store float %411, float* %arrayidx.i.us.3, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.us.3 - -if.end.r_exit.i.us.3: ; preds = %if.then.i.us.3, %pregion_for_entry.entry.i.us.3 - %412 = or i64 %_local_id_x.0.us.3, 1 - %add1.i.i.us.3.1 = add nuw nsw i64 %412, %mul.i.i - %413 = trunc i64 %add1.i.i.us.3.1 to i32 - %conv2.i.us.3.1 = add i32 %add.i, %413 - %cmp9.i.us.3.1 = icmp slt i32 %conv2.i.us.3.1, %2 - br i1 %cmp9.i.us.3.1, label %if.then.i.us.3.1, label %if.end.r_exit.i.us.3.1 - -pregion_for_end.i.3.loopexit: ; preds = %if.end.r_exit.i.us.3.1 - br label %pregion_for_end.i.3 - -pregion_for_end.i.3: ; preds = %pregion_for_end.i.3.loopexit, %vector.ph160, %pregion_for_end.i.2 - %414 = trunc i64 %mul3.i.i to i32 - %415 = or i32 %414, 4 - %conv7.i.4 = add i32 %add.i, %415 - %cmp.i.4 = icmp slt i32 %conv7.i.4, %2 - %mul.i.4 = mul nsw i32 %conv7.i.4, %2 - %add13.i.4 = add nsw i32 %mul.i.4, %1 - %idxprom14.i.4 = sext i32 %add13.i.4 to i64 - %arrayidx15.i.4 = getelementptr inbounds float, float* %0, i64 %idxprom14.i.4 - br i1 %cmp.i.4, label %vector.scevcheck188, label %pregion_for_end.i.4 - -vector.scevcheck188: ; preds = %pregion_for_end.i.3 - %416 = mul i32 %conv7.i.4, %2 - %417 = add i32 %416, %1 - %418 = trunc i64 %4 to i32 - %419 = shl i32 %418, 5 - %420 = add i32 %417, %419 - %421 = add i32 %420, 1 - %422 = add i32 %420, 32 - %423 = icmp slt i32 %422, %421 - %424 = add i32 %2, 1 - %425 = mul i32 %424, %1 - %426 = add i32 %425, %419 - %427 = add i32 %426, 1 - %428 = add i32 %426, 32 - %429 = icmp slt i32 %428, %427 - %430 = or i1 %423, %429 - br i1 %430, label %pregion_for_entry.entry.i.us.4.preheader, label %vector.memcheck210 - -pregion_for_entry.entry.i.us.4.preheader: ; preds = %vector.memcheck210, %vector.scevcheck188 - br label %pregion_for_entry.entry.i.us.4 - -vector.memcheck210: ; preds = %vector.scevcheck188 - %431 = mul i32 %conv7.i.4, %2 - %432 = add i32 %431, %1 - %433 = trunc i64 %4 to i32 - %434 = shl i32 %433, 5 - %435 = add i32 %432, %434 - %436 = add i32 %435, 1 - %437 = sext i32 %436 to i64 - %scevgep190 = getelementptr float, float* %0, i64 %437 - %scevgep190191 = bitcast float* %scevgep190 to i8* - %438 = add nsw i64 %437, 32 - %scevgep192 = getelementptr float, float* %0, i64 %438 - %scevgep194 = getelementptr float, float* %0, i64 %idxprom14.i.4 - %scevgep194195 = bitcast float* %scevgep194 to i8* - %uglygep196 = getelementptr i8, i8* %scevgep194195, i64 1 - %439 = add i32 %2, 1 - %440 = mul i32 %439, %1 - %441 = add i32 %440, %434 - %442 = add i32 %441, 1 - %443 = sext i32 %442 to i64 - %scevgep197 = getelementptr float, float* %0, i64 %443 - %444 = add nsw i64 %443, 32 - %scevgep199 = getelementptr float, float* %0, i64 %444 - %bound0202 = icmp ugt i8* %uglygep196, %scevgep190191 - %bound1203 = icmp ult float* %arrayidx15.i.4, %scevgep192 - %found.conflict204 = and i1 %bound0202, %bound1203 - %bound0205 = icmp ult float* %scevgep190, %scevgep199 - %bound1206 = icmp ult float* %scevgep197, %scevgep192 - %found.conflict207 = and i1 %bound0205, %bound1206 - %conflict.rdx208 = or i1 %found.conflict204, %found.conflict207 - br i1 %conflict.rdx208, label %pregion_for_entry.entry.i.us.4.preheader, label %vector.ph211 - -vector.ph211: ; preds = %vector.memcheck210 - %broadcast.splatinsert218 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat219 = shufflevector <8 x i64> %broadcast.splatinsert218, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert220 = insertelement <8 x i32> undef, i32 %add.i, i32 0 - %broadcast.splat221 = shufflevector <8 x i32> %broadcast.splatinsert220, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert222 = insertelement <8 x i32> undef, i32 %2, i32 0 - %broadcast.splat223 = shufflevector <8 x i32> %broadcast.splatinsert222, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert225 = insertelement <8 x float*> undef, float* %arrayidx15.i.4, i32 0 - %broadcast.splat226 = shufflevector <8 x float*> %broadcast.splatinsert225, <8 x float*> undef, <8 x i32> zeroinitializer - %445 = trunc <8 x i64> %broadcast.splat219 to <8 x i32> - %446 = or <8 x i32> %445, - %447 = add <8 x i32> %broadcast.splat221, %446 - %448 = icmp slt <8 x i32> %447, %broadcast.splat223 - %449 = extractelement <8 x i32> %447, i32 0 - %450 = add nsw i32 %449, %mul.i.4 - %451 = sext i32 %450 to i64 - %452 = getelementptr inbounds float, float* %0, i64 %451 - %453 = bitcast float* %452 to <8 x float>* - %wide.masked.load224 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %453, i32 4, <8 x i1> %448, <8 x float> undef), !tbaa !12, !alias.scope !51, !noalias !54 - %wide.masked.gather227 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat226, i32 4, <8 x i1> %448, <8 x float> undef), !tbaa !12, !alias.scope !57 - %454 = add nsw i32 %449, %mul16.i - %455 = sext i32 %454 to i64 - %456 = getelementptr inbounds float, float* %0, i64 %455 - %457 = bitcast float* %456 to <8 x float>* - %wide.masked.load228 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %457, i32 4, <8 x i1> %448, <8 x float> undef), !tbaa !12, !alias.scope !58 - %458 = fneg <8 x float> %wide.masked.gather227 - %459 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %458, <8 x float> %wide.masked.load228, <8 x float> %wide.masked.load224) - %460 = bitcast float* %452 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %459, <8 x float>* %460, i32 4, <8 x i1> %448), !tbaa !12, !alias.scope !51, !noalias !54, !llvm.access.group !24 - %461 = trunc <8 x i64> %broadcast.splat219 to <8 x i32> - %462 = or <8 x i32> %461, - %463 = add <8 x i32> %broadcast.splat221, %462 - %464 = icmp slt <8 x i32> %463, %broadcast.splat223 - %465 = extractelement <8 x i32> %463, i32 0 - %466 = add nsw i32 %465, %mul.i.4 - %467 = sext i32 %466 to i64 - %468 = getelementptr inbounds float, float* %0, i64 %467 - %469 = bitcast float* %468 to <8 x float>* - %wide.masked.load224.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %469, i32 4, <8 x i1> %464, <8 x float> undef), !tbaa !12, !alias.scope !51, !noalias !54 - %wide.masked.gather227.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat226, i32 4, <8 x i1> %464, <8 x float> undef), !tbaa !12, !alias.scope !57 - %470 = add nsw i32 %465, %mul16.i - %471 = sext i32 %470 to i64 - %472 = getelementptr inbounds float, float* %0, i64 %471 - %473 = bitcast float* %472 to <8 x float>* - %wide.masked.load228.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %473, i32 4, <8 x i1> %464, <8 x float> undef), !tbaa !12, !alias.scope !58 - %474 = fneg <8 x float> %wide.masked.gather227.1 - %475 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %474, <8 x float> %wide.masked.load228.1, <8 x float> %wide.masked.load224.1) - %476 = bitcast float* %468 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %475, <8 x float>* %476, i32 4, <8 x i1> %464), !tbaa !12, !alias.scope !51, !noalias !54, !llvm.access.group !24 - %477 = trunc <8 x i64> %broadcast.splat219 to <8 x i32> - %478 = or <8 x i32> %477, - %479 = add <8 x i32> %broadcast.splat221, %478 - %480 = icmp slt <8 x i32> %479, %broadcast.splat223 - %481 = extractelement <8 x i32> %479, i32 0 - %482 = add nsw i32 %481, %mul.i.4 - %483 = sext i32 %482 to i64 - %484 = getelementptr inbounds float, float* %0, i64 %483 - %485 = bitcast float* %484 to <8 x float>* - %wide.masked.load224.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %485, i32 4, <8 x i1> %480, <8 x float> undef), !tbaa !12, !alias.scope !51, !noalias !54 - %wide.masked.gather227.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat226, i32 4, <8 x i1> %480, <8 x float> undef), !tbaa !12, !alias.scope !57 - %486 = add nsw i32 %481, %mul16.i - %487 = sext i32 %486 to i64 - %488 = getelementptr inbounds float, float* %0, i64 %487 - %489 = bitcast float* %488 to <8 x float>* - %wide.masked.load228.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %489, i32 4, <8 x i1> %480, <8 x float> undef), !tbaa !12, !alias.scope !58 - %490 = fneg <8 x float> %wide.masked.gather227.2 - %491 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %490, <8 x float> %wide.masked.load228.2, <8 x float> %wide.masked.load224.2) - %492 = bitcast float* %484 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %491, <8 x float>* %492, i32 4, <8 x i1> %480), !tbaa !12, !alias.scope !51, !noalias !54, !llvm.access.group !24 - %493 = trunc <8 x i64> %broadcast.splat219 to <8 x i32> - %494 = or <8 x i32> %493, - %495 = add <8 x i32> %broadcast.splat221, %494 - %496 = icmp slt <8 x i32> %495, %broadcast.splat223 - %497 = extractelement <8 x i32> %495, i32 0 - %498 = add nsw i32 %497, %mul.i.4 - %499 = sext i32 %498 to i64 - %500 = getelementptr inbounds float, float* %0, i64 %499 - %501 = bitcast float* %500 to <8 x float>* - %wide.masked.load224.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %501, i32 4, <8 x i1> %496, <8 x float> undef), !tbaa !12, !alias.scope !51, !noalias !54 - %wide.masked.gather227.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat226, i32 4, <8 x i1> %496, <8 x float> undef), !tbaa !12, !alias.scope !57 - %502 = add nsw i32 %497, %mul16.i - %503 = sext i32 %502 to i64 - %504 = getelementptr inbounds float, float* %0, i64 %503 - %505 = bitcast float* %504 to <8 x float>* - %wide.masked.load228.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %505, i32 4, <8 x i1> %496, <8 x float> undef), !tbaa !12, !alias.scope !58 - %506 = fneg <8 x float> %wide.masked.gather227.3 - %507 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %506, <8 x float> %wide.masked.load228.3, <8 x float> %wide.masked.load224.3) - %508 = bitcast float* %500 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %507, <8 x float>* %508, i32 4, <8 x i1> %496), !tbaa !12, !alias.scope !51, !noalias !54, !llvm.access.group !24 - br label %pregion_for_end.i.4 - -pregion_for_entry.entry.i.us.4: ; preds = %if.end.r_exit.i.us.4.1, %pregion_for_entry.entry.i.us.4.preheader - %_local_id_x.0.us.4 = phi i64 [ 0, %pregion_for_entry.entry.i.us.4.preheader ], [ %841, %if.end.r_exit.i.us.4.1 ] - %add1.i.i.us.4 = add nuw nsw i64 %_local_id_x.0.us.4, %mul.i.i - %509 = trunc i64 %add1.i.i.us.4 to i32 - %conv2.i.us.4 = add i32 %add.i, %509 - %cmp9.i.us.4 = icmp slt i32 %conv2.i.us.4, %2 - br i1 %cmp9.i.us.4, label %if.then.i.us.4, label %if.end.r_exit.i.us.4 - -if.then.i.us.4: ; preds = %pregion_for_entry.entry.i.us.4 - %add11.i.us.4 = add nsw i32 %conv2.i.us.4, %mul.i.4 - %idxprom.i.us.4 = sext i32 %add11.i.us.4 to i64 - %arrayidx.i.us.4 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.4 - %510 = load float, float* %arrayidx.i.us.4, align 4, !tbaa !12 - %511 = load float, float* %arrayidx15.i.4, align 4, !tbaa !12 - %add17.i.us.4 = add nsw i32 %conv2.i.us.4, %mul16.i - %idxprom18.i.us.4 = sext i32 %add17.i.us.4 to i64 - %arrayidx19.i.us.4 = getelementptr inbounds float, float* %0, i64 %idxprom18.i.us.4 - %512 = load float, float* %arrayidx19.i.us.4, align 4, !tbaa !12 - %neg.i.us.4 = fneg float %511 - %513 = tail call float @llvm.fmuladd.f32(float %neg.i.us.4, float %512, float %510) #6 - store float %513, float* %arrayidx.i.us.4, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.us.4 - -if.end.r_exit.i.us.4: ; preds = %if.then.i.us.4, %pregion_for_entry.entry.i.us.4 - %514 = or i64 %_local_id_x.0.us.4, 1 - %add1.i.i.us.4.1 = add nuw nsw i64 %514, %mul.i.i - %515 = trunc i64 %add1.i.i.us.4.1 to i32 - %conv2.i.us.4.1 = add i32 %add.i, %515 - %cmp9.i.us.4.1 = icmp slt i32 %conv2.i.us.4.1, %2 - br i1 %cmp9.i.us.4.1, label %if.then.i.us.4.1, label %if.end.r_exit.i.us.4.1 - -pregion_for_end.i.4.loopexit: ; preds = %if.end.r_exit.i.us.4.1 - br label %pregion_for_end.i.4 - -pregion_for_end.i.4: ; preds = %pregion_for_end.i.4.loopexit, %vector.ph211, %pregion_for_end.i.3 - %516 = trunc i64 %mul3.i.i to i32 - %517 = or i32 %516, 5 - %conv7.i.5 = add i32 %add.i, %517 - %cmp.i.5 = icmp slt i32 %conv7.i.5, %2 - %mul.i.5 = mul nsw i32 %conv7.i.5, %2 - %add13.i.5 = add nsw i32 %mul.i.5, %1 - %idxprom14.i.5 = sext i32 %add13.i.5 to i64 - %arrayidx15.i.5 = getelementptr inbounds float, float* %0, i64 %idxprom14.i.5 - br i1 %cmp.i.5, label %vector.scevcheck239, label %pregion_for_end.i.5 - -vector.scevcheck239: ; preds = %pregion_for_end.i.4 - %518 = mul i32 %conv7.i.5, %2 - %519 = add i32 %518, %1 - %520 = trunc i64 %4 to i32 - %521 = shl i32 %520, 5 - %522 = add i32 %519, %521 - %523 = add i32 %522, 1 - %524 = add i32 %522, 32 - %525 = icmp slt i32 %524, %523 - %526 = add i32 %2, 1 - %527 = mul i32 %526, %1 - %528 = add i32 %527, %521 - %529 = add i32 %528, 1 - %530 = add i32 %528, 32 - %531 = icmp slt i32 %530, %529 - %532 = or i1 %525, %531 - br i1 %532, label %pregion_for_entry.entry.i.us.5.preheader, label %vector.memcheck261 - -pregion_for_entry.entry.i.us.5.preheader: ; preds = %vector.memcheck261, %vector.scevcheck239 - br label %pregion_for_entry.entry.i.us.5 - -vector.memcheck261: ; preds = %vector.scevcheck239 - %533 = mul i32 %conv7.i.5, %2 - %534 = add i32 %533, %1 - %535 = trunc i64 %4 to i32 - %536 = shl i32 %535, 5 - %537 = add i32 %534, %536 - %538 = add i32 %537, 1 - %539 = sext i32 %538 to i64 - %scevgep241 = getelementptr float, float* %0, i64 %539 - %scevgep241242 = bitcast float* %scevgep241 to i8* - %540 = add nsw i64 %539, 32 - %scevgep243 = getelementptr float, float* %0, i64 %540 - %scevgep245 = getelementptr float, float* %0, i64 %idxprom14.i.5 - %scevgep245246 = bitcast float* %scevgep245 to i8* - %uglygep247 = getelementptr i8, i8* %scevgep245246, i64 1 - %541 = add i32 %2, 1 - %542 = mul i32 %541, %1 - %543 = add i32 %542, %536 - %544 = add i32 %543, 1 - %545 = sext i32 %544 to i64 - %scevgep248 = getelementptr float, float* %0, i64 %545 - %546 = add nsw i64 %545, 32 - %scevgep250 = getelementptr float, float* %0, i64 %546 - %bound0253 = icmp ugt i8* %uglygep247, %scevgep241242 - %bound1254 = icmp ult float* %arrayidx15.i.5, %scevgep243 - %found.conflict255 = and i1 %bound0253, %bound1254 - %bound0256 = icmp ult float* %scevgep241, %scevgep250 - %bound1257 = icmp ult float* %scevgep248, %scevgep243 - %found.conflict258 = and i1 %bound0256, %bound1257 - %conflict.rdx259 = or i1 %found.conflict255, %found.conflict258 - br i1 %conflict.rdx259, label %pregion_for_entry.entry.i.us.5.preheader, label %vector.ph262 - -vector.ph262: ; preds = %vector.memcheck261 - %broadcast.splatinsert269 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat270 = shufflevector <8 x i64> %broadcast.splatinsert269, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert271 = insertelement <8 x i32> undef, i32 %add.i, i32 0 - %broadcast.splat272 = shufflevector <8 x i32> %broadcast.splatinsert271, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert273 = insertelement <8 x i32> undef, i32 %2, i32 0 - %broadcast.splat274 = shufflevector <8 x i32> %broadcast.splatinsert273, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert276 = insertelement <8 x float*> undef, float* %arrayidx15.i.5, i32 0 - %broadcast.splat277 = shufflevector <8 x float*> %broadcast.splatinsert276, <8 x float*> undef, <8 x i32> zeroinitializer - %547 = trunc <8 x i64> %broadcast.splat270 to <8 x i32> - %548 = or <8 x i32> %547, - %549 = add <8 x i32> %broadcast.splat272, %548 - %550 = icmp slt <8 x i32> %549, %broadcast.splat274 - %551 = extractelement <8 x i32> %549, i32 0 - %552 = add nsw i32 %551, %mul.i.5 - %553 = sext i32 %552 to i64 - %554 = getelementptr inbounds float, float* %0, i64 %553 - %555 = bitcast float* %554 to <8 x float>* - %wide.masked.load275 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %555, i32 4, <8 x i1> %550, <8 x float> undef), !tbaa !12, !alias.scope !59, !noalias !62 - %wide.masked.gather278 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat277, i32 4, <8 x i1> %550, <8 x float> undef), !tbaa !12, !alias.scope !65 - %556 = add nsw i32 %551, %mul16.i - %557 = sext i32 %556 to i64 - %558 = getelementptr inbounds float, float* %0, i64 %557 - %559 = bitcast float* %558 to <8 x float>* - %wide.masked.load279 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %559, i32 4, <8 x i1> %550, <8 x float> undef), !tbaa !12, !alias.scope !66 - %560 = fneg <8 x float> %wide.masked.gather278 - %561 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %560, <8 x float> %wide.masked.load279, <8 x float> %wide.masked.load275) - %562 = bitcast float* %554 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %561, <8 x float>* %562, i32 4, <8 x i1> %550), !tbaa !12, !alias.scope !59, !noalias !62, !llvm.access.group !24 - %563 = trunc <8 x i64> %broadcast.splat270 to <8 x i32> - %564 = or <8 x i32> %563, - %565 = add <8 x i32> %broadcast.splat272, %564 - %566 = icmp slt <8 x i32> %565, %broadcast.splat274 - %567 = extractelement <8 x i32> %565, i32 0 - %568 = add nsw i32 %567, %mul.i.5 - %569 = sext i32 %568 to i64 - %570 = getelementptr inbounds float, float* %0, i64 %569 - %571 = bitcast float* %570 to <8 x float>* - %wide.masked.load275.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %571, i32 4, <8 x i1> %566, <8 x float> undef), !tbaa !12, !alias.scope !59, !noalias !62 - %wide.masked.gather278.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat277, i32 4, <8 x i1> %566, <8 x float> undef), !tbaa !12, !alias.scope !65 - %572 = add nsw i32 %567, %mul16.i - %573 = sext i32 %572 to i64 - %574 = getelementptr inbounds float, float* %0, i64 %573 - %575 = bitcast float* %574 to <8 x float>* - %wide.masked.load279.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %575, i32 4, <8 x i1> %566, <8 x float> undef), !tbaa !12, !alias.scope !66 - %576 = fneg <8 x float> %wide.masked.gather278.1 - %577 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %576, <8 x float> %wide.masked.load279.1, <8 x float> %wide.masked.load275.1) - %578 = bitcast float* %570 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %577, <8 x float>* %578, i32 4, <8 x i1> %566), !tbaa !12, !alias.scope !59, !noalias !62, !llvm.access.group !24 - %579 = trunc <8 x i64> %broadcast.splat270 to <8 x i32> - %580 = or <8 x i32> %579, - %581 = add <8 x i32> %broadcast.splat272, %580 - %582 = icmp slt <8 x i32> %581, %broadcast.splat274 - %583 = extractelement <8 x i32> %581, i32 0 - %584 = add nsw i32 %583, %mul.i.5 - %585 = sext i32 %584 to i64 - %586 = getelementptr inbounds float, float* %0, i64 %585 - %587 = bitcast float* %586 to <8 x float>* - %wide.masked.load275.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %587, i32 4, <8 x i1> %582, <8 x float> undef), !tbaa !12, !alias.scope !59, !noalias !62 - %wide.masked.gather278.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat277, i32 4, <8 x i1> %582, <8 x float> undef), !tbaa !12, !alias.scope !65 - %588 = add nsw i32 %583, %mul16.i - %589 = sext i32 %588 to i64 - %590 = getelementptr inbounds float, float* %0, i64 %589 - %591 = bitcast float* %590 to <8 x float>* - %wide.masked.load279.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %591, i32 4, <8 x i1> %582, <8 x float> undef), !tbaa !12, !alias.scope !66 - %592 = fneg <8 x float> %wide.masked.gather278.2 - %593 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %592, <8 x float> %wide.masked.load279.2, <8 x float> %wide.masked.load275.2) - %594 = bitcast float* %586 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %593, <8 x float>* %594, i32 4, <8 x i1> %582), !tbaa !12, !alias.scope !59, !noalias !62, !llvm.access.group !24 - %595 = trunc <8 x i64> %broadcast.splat270 to <8 x i32> - %596 = or <8 x i32> %595, - %597 = add <8 x i32> %broadcast.splat272, %596 - %598 = icmp slt <8 x i32> %597, %broadcast.splat274 - %599 = extractelement <8 x i32> %597, i32 0 - %600 = add nsw i32 %599, %mul.i.5 - %601 = sext i32 %600 to i64 - %602 = getelementptr inbounds float, float* %0, i64 %601 - %603 = bitcast float* %602 to <8 x float>* - %wide.masked.load275.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %603, i32 4, <8 x i1> %598, <8 x float> undef), !tbaa !12, !alias.scope !59, !noalias !62 - %wide.masked.gather278.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat277, i32 4, <8 x i1> %598, <8 x float> undef), !tbaa !12, !alias.scope !65 - %604 = add nsw i32 %599, %mul16.i - %605 = sext i32 %604 to i64 - %606 = getelementptr inbounds float, float* %0, i64 %605 - %607 = bitcast float* %606 to <8 x float>* - %wide.masked.load279.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %607, i32 4, <8 x i1> %598, <8 x float> undef), !tbaa !12, !alias.scope !66 - %608 = fneg <8 x float> %wide.masked.gather278.3 - %609 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %608, <8 x float> %wide.masked.load279.3, <8 x float> %wide.masked.load275.3) - %610 = bitcast float* %602 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %609, <8 x float>* %610, i32 4, <8 x i1> %598), !tbaa !12, !alias.scope !59, !noalias !62, !llvm.access.group !24 - br label %pregion_for_end.i.5 - -pregion_for_entry.entry.i.us.5: ; preds = %if.end.r_exit.i.us.5.1, %pregion_for_entry.entry.i.us.5.preheader - %_local_id_x.0.us.5 = phi i64 [ 0, %pregion_for_entry.entry.i.us.5.preheader ], [ %836, %if.end.r_exit.i.us.5.1 ] - %add1.i.i.us.5 = add nuw nsw i64 %_local_id_x.0.us.5, %mul.i.i - %611 = trunc i64 %add1.i.i.us.5 to i32 - %conv2.i.us.5 = add i32 %add.i, %611 - %cmp9.i.us.5 = icmp slt i32 %conv2.i.us.5, %2 - br i1 %cmp9.i.us.5, label %if.then.i.us.5, label %if.end.r_exit.i.us.5 - -if.then.i.us.5: ; preds = %pregion_for_entry.entry.i.us.5 - %add11.i.us.5 = add nsw i32 %conv2.i.us.5, %mul.i.5 - %idxprom.i.us.5 = sext i32 %add11.i.us.5 to i64 - %arrayidx.i.us.5 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.5 - %612 = load float, float* %arrayidx.i.us.5, align 4, !tbaa !12 - %613 = load float, float* %arrayidx15.i.5, align 4, !tbaa !12 - %add17.i.us.5 = add nsw i32 %conv2.i.us.5, %mul16.i - %idxprom18.i.us.5 = sext i32 %add17.i.us.5 to i64 - %arrayidx19.i.us.5 = getelementptr inbounds float, float* %0, i64 %idxprom18.i.us.5 - %614 = load float, float* %arrayidx19.i.us.5, align 4, !tbaa !12 - %neg.i.us.5 = fneg float %613 - %615 = tail call float @llvm.fmuladd.f32(float %neg.i.us.5, float %614, float %612) #6 - store float %615, float* %arrayidx.i.us.5, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.us.5 - -if.end.r_exit.i.us.5: ; preds = %if.then.i.us.5, %pregion_for_entry.entry.i.us.5 - %616 = or i64 %_local_id_x.0.us.5, 1 - %add1.i.i.us.5.1 = add nuw nsw i64 %616, %mul.i.i - %617 = trunc i64 %add1.i.i.us.5.1 to i32 - %conv2.i.us.5.1 = add i32 %add.i, %617 - %cmp9.i.us.5.1 = icmp slt i32 %conv2.i.us.5.1, %2 - br i1 %cmp9.i.us.5.1, label %if.then.i.us.5.1, label %if.end.r_exit.i.us.5.1 - -pregion_for_end.i.5.loopexit: ; preds = %if.end.r_exit.i.us.5.1 - br label %pregion_for_end.i.5 - -pregion_for_end.i.5: ; preds = %pregion_for_end.i.5.loopexit, %vector.ph262, %pregion_for_end.i.4 - %618 = trunc i64 %mul3.i.i to i32 - %619 = or i32 %618, 6 - %conv7.i.6 = add i32 %add.i, %619 - %cmp.i.6 = icmp slt i32 %conv7.i.6, %2 - %mul.i.6 = mul nsw i32 %conv7.i.6, %2 - %add13.i.6 = add nsw i32 %mul.i.6, %1 - %idxprom14.i.6 = sext i32 %add13.i.6 to i64 - %arrayidx15.i.6 = getelementptr inbounds float, float* %0, i64 %idxprom14.i.6 - br i1 %cmp.i.6, label %vector.scevcheck290, label %pregion_for_end.i.6 - -vector.scevcheck290: ; preds = %pregion_for_end.i.5 - %620 = mul i32 %conv7.i.6, %2 - %621 = add i32 %620, %1 - %622 = trunc i64 %4 to i32 - %623 = shl i32 %622, 5 - %624 = add i32 %621, %623 - %625 = add i32 %624, 1 - %626 = add i32 %624, 32 - %627 = icmp slt i32 %626, %625 - %628 = add i32 %2, 1 - %629 = mul i32 %628, %1 - %630 = add i32 %629, %623 - %631 = add i32 %630, 1 - %632 = add i32 %630, 32 - %633 = icmp slt i32 %632, %631 - %634 = or i1 %627, %633 - br i1 %634, label %pregion_for_entry.entry.i.us.6.preheader, label %vector.memcheck312 - -pregion_for_entry.entry.i.us.6.preheader: ; preds = %vector.memcheck312, %vector.scevcheck290 - br label %pregion_for_entry.entry.i.us.6 - -vector.memcheck312: ; preds = %vector.scevcheck290 - %635 = mul i32 %conv7.i.6, %2 - %636 = add i32 %635, %1 - %637 = trunc i64 %4 to i32 - %638 = shl i32 %637, 5 - %639 = add i32 %636, %638 - %640 = add i32 %639, 1 - %641 = sext i32 %640 to i64 - %scevgep292 = getelementptr float, float* %0, i64 %641 - %scevgep292293 = bitcast float* %scevgep292 to i8* - %642 = add nsw i64 %641, 32 - %scevgep294 = getelementptr float, float* %0, i64 %642 - %scevgep296 = getelementptr float, float* %0, i64 %idxprom14.i.6 - %scevgep296297 = bitcast float* %scevgep296 to i8* - %uglygep298 = getelementptr i8, i8* %scevgep296297, i64 1 - %643 = add i32 %2, 1 - %644 = mul i32 %643, %1 - %645 = add i32 %644, %638 - %646 = add i32 %645, 1 - %647 = sext i32 %646 to i64 - %scevgep299 = getelementptr float, float* %0, i64 %647 - %648 = add nsw i64 %647, 32 - %scevgep301 = getelementptr float, float* %0, i64 %648 - %bound0304 = icmp ugt i8* %uglygep298, %scevgep292293 - %bound1305 = icmp ult float* %arrayidx15.i.6, %scevgep294 - %found.conflict306 = and i1 %bound0304, %bound1305 - %bound0307 = icmp ult float* %scevgep292, %scevgep301 - %bound1308 = icmp ult float* %scevgep299, %scevgep294 - %found.conflict309 = and i1 %bound0307, %bound1308 - %conflict.rdx310 = or i1 %found.conflict306, %found.conflict309 - br i1 %conflict.rdx310, label %pregion_for_entry.entry.i.us.6.preheader, label %vector.ph313 - -vector.ph313: ; preds = %vector.memcheck312 - %broadcast.splatinsert320 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat321 = shufflevector <8 x i64> %broadcast.splatinsert320, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert322 = insertelement <8 x i32> undef, i32 %add.i, i32 0 - %broadcast.splat323 = shufflevector <8 x i32> %broadcast.splatinsert322, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert324 = insertelement <8 x i32> undef, i32 %2, i32 0 - %broadcast.splat325 = shufflevector <8 x i32> %broadcast.splatinsert324, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert327 = insertelement <8 x float*> undef, float* %arrayidx15.i.6, i32 0 - %broadcast.splat328 = shufflevector <8 x float*> %broadcast.splatinsert327, <8 x float*> undef, <8 x i32> zeroinitializer - %649 = trunc <8 x i64> %broadcast.splat321 to <8 x i32> - %650 = or <8 x i32> %649, - %651 = add <8 x i32> %broadcast.splat323, %650 - %652 = icmp slt <8 x i32> %651, %broadcast.splat325 - %653 = extractelement <8 x i32> %651, i32 0 - %654 = add nsw i32 %653, %mul.i.6 - %655 = sext i32 %654 to i64 - %656 = getelementptr inbounds float, float* %0, i64 %655 - %657 = bitcast float* %656 to <8 x float>* - %wide.masked.load326 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %657, i32 4, <8 x i1> %652, <8 x float> undef), !tbaa !12, !alias.scope !67, !noalias !70 - %wide.masked.gather329 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat328, i32 4, <8 x i1> %652, <8 x float> undef), !tbaa !12, !alias.scope !73 - %658 = add nsw i32 %653, %mul16.i - %659 = sext i32 %658 to i64 - %660 = getelementptr inbounds float, float* %0, i64 %659 - %661 = bitcast float* %660 to <8 x float>* - %wide.masked.load330 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %661, i32 4, <8 x i1> %652, <8 x float> undef), !tbaa !12, !alias.scope !74 - %662 = fneg <8 x float> %wide.masked.gather329 - %663 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %662, <8 x float> %wide.masked.load330, <8 x float> %wide.masked.load326) - %664 = bitcast float* %656 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %663, <8 x float>* %664, i32 4, <8 x i1> %652), !tbaa !12, !alias.scope !67, !noalias !70, !llvm.access.group !24 - %665 = trunc <8 x i64> %broadcast.splat321 to <8 x i32> - %666 = or <8 x i32> %665, - %667 = add <8 x i32> %broadcast.splat323, %666 - %668 = icmp slt <8 x i32> %667, %broadcast.splat325 - %669 = extractelement <8 x i32> %667, i32 0 - %670 = add nsw i32 %669, %mul.i.6 - %671 = sext i32 %670 to i64 - %672 = getelementptr inbounds float, float* %0, i64 %671 - %673 = bitcast float* %672 to <8 x float>* - %wide.masked.load326.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %673, i32 4, <8 x i1> %668, <8 x float> undef), !tbaa !12, !alias.scope !67, !noalias !70 - %wide.masked.gather329.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat328, i32 4, <8 x i1> %668, <8 x float> undef), !tbaa !12, !alias.scope !73 - %674 = add nsw i32 %669, %mul16.i - %675 = sext i32 %674 to i64 - %676 = getelementptr inbounds float, float* %0, i64 %675 - %677 = bitcast float* %676 to <8 x float>* - %wide.masked.load330.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %677, i32 4, <8 x i1> %668, <8 x float> undef), !tbaa !12, !alias.scope !74 - %678 = fneg <8 x float> %wide.masked.gather329.1 - %679 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %678, <8 x float> %wide.masked.load330.1, <8 x float> %wide.masked.load326.1) - %680 = bitcast float* %672 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %679, <8 x float>* %680, i32 4, <8 x i1> %668), !tbaa !12, !alias.scope !67, !noalias !70, !llvm.access.group !24 - %681 = trunc <8 x i64> %broadcast.splat321 to <8 x i32> - %682 = or <8 x i32> %681, - %683 = add <8 x i32> %broadcast.splat323, %682 - %684 = icmp slt <8 x i32> %683, %broadcast.splat325 - %685 = extractelement <8 x i32> %683, i32 0 - %686 = add nsw i32 %685, %mul.i.6 - %687 = sext i32 %686 to i64 - %688 = getelementptr inbounds float, float* %0, i64 %687 - %689 = bitcast float* %688 to <8 x float>* - %wide.masked.load326.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %689, i32 4, <8 x i1> %684, <8 x float> undef), !tbaa !12, !alias.scope !67, !noalias !70 - %wide.masked.gather329.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat328, i32 4, <8 x i1> %684, <8 x float> undef), !tbaa !12, !alias.scope !73 - %690 = add nsw i32 %685, %mul16.i - %691 = sext i32 %690 to i64 - %692 = getelementptr inbounds float, float* %0, i64 %691 - %693 = bitcast float* %692 to <8 x float>* - %wide.masked.load330.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %693, i32 4, <8 x i1> %684, <8 x float> undef), !tbaa !12, !alias.scope !74 - %694 = fneg <8 x float> %wide.masked.gather329.2 - %695 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %694, <8 x float> %wide.masked.load330.2, <8 x float> %wide.masked.load326.2) - %696 = bitcast float* %688 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %695, <8 x float>* %696, i32 4, <8 x i1> %684), !tbaa !12, !alias.scope !67, !noalias !70, !llvm.access.group !24 - %697 = trunc <8 x i64> %broadcast.splat321 to <8 x i32> - %698 = or <8 x i32> %697, - %699 = add <8 x i32> %broadcast.splat323, %698 - %700 = icmp slt <8 x i32> %699, %broadcast.splat325 - %701 = extractelement <8 x i32> %699, i32 0 - %702 = add nsw i32 %701, %mul.i.6 - %703 = sext i32 %702 to i64 - %704 = getelementptr inbounds float, float* %0, i64 %703 - %705 = bitcast float* %704 to <8 x float>* - %wide.masked.load326.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %705, i32 4, <8 x i1> %700, <8 x float> undef), !tbaa !12, !alias.scope !67, !noalias !70 - %wide.masked.gather329.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat328, i32 4, <8 x i1> %700, <8 x float> undef), !tbaa !12, !alias.scope !73 - %706 = add nsw i32 %701, %mul16.i - %707 = sext i32 %706 to i64 - %708 = getelementptr inbounds float, float* %0, i64 %707 - %709 = bitcast float* %708 to <8 x float>* - %wide.masked.load330.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %709, i32 4, <8 x i1> %700, <8 x float> undef), !tbaa !12, !alias.scope !74 - %710 = fneg <8 x float> %wide.masked.gather329.3 - %711 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %710, <8 x float> %wide.masked.load330.3, <8 x float> %wide.masked.load326.3) - %712 = bitcast float* %704 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %711, <8 x float>* %712, i32 4, <8 x i1> %700), !tbaa !12, !alias.scope !67, !noalias !70, !llvm.access.group !24 - br label %pregion_for_end.i.6 - -pregion_for_entry.entry.i.us.6: ; preds = %if.end.r_exit.i.us.6.1, %pregion_for_entry.entry.i.us.6.preheader - %_local_id_x.0.us.6 = phi i64 [ 0, %pregion_for_entry.entry.i.us.6.preheader ], [ %831, %if.end.r_exit.i.us.6.1 ] - %add1.i.i.us.6 = add nuw nsw i64 %_local_id_x.0.us.6, %mul.i.i - %713 = trunc i64 %add1.i.i.us.6 to i32 - %conv2.i.us.6 = add i32 %add.i, %713 - %cmp9.i.us.6 = icmp slt i32 %conv2.i.us.6, %2 - br i1 %cmp9.i.us.6, label %if.then.i.us.6, label %if.end.r_exit.i.us.6 - -if.then.i.us.6: ; preds = %pregion_for_entry.entry.i.us.6 - %add11.i.us.6 = add nsw i32 %conv2.i.us.6, %mul.i.6 - %idxprom.i.us.6 = sext i32 %add11.i.us.6 to i64 - %arrayidx.i.us.6 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.6 - %714 = load float, float* %arrayidx.i.us.6, align 4, !tbaa !12 - %715 = load float, float* %arrayidx15.i.6, align 4, !tbaa !12 - %add17.i.us.6 = add nsw i32 %conv2.i.us.6, %mul16.i - %idxprom18.i.us.6 = sext i32 %add17.i.us.6 to i64 - %arrayidx19.i.us.6 = getelementptr inbounds float, float* %0, i64 %idxprom18.i.us.6 - %716 = load float, float* %arrayidx19.i.us.6, align 4, !tbaa !12 - %neg.i.us.6 = fneg float %715 - %717 = tail call float @llvm.fmuladd.f32(float %neg.i.us.6, float %716, float %714) #6 - store float %717, float* %arrayidx.i.us.6, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.us.6 - -if.end.r_exit.i.us.6: ; preds = %if.then.i.us.6, %pregion_for_entry.entry.i.us.6 - %718 = or i64 %_local_id_x.0.us.6, 1 - %add1.i.i.us.6.1 = add nuw nsw i64 %718, %mul.i.i - %719 = trunc i64 %add1.i.i.us.6.1 to i32 - %conv2.i.us.6.1 = add i32 %add.i, %719 - %cmp9.i.us.6.1 = icmp slt i32 %conv2.i.us.6.1, %2 - br i1 %cmp9.i.us.6.1, label %if.then.i.us.6.1, label %if.end.r_exit.i.us.6.1 - -pregion_for_end.i.6.loopexit: ; preds = %if.end.r_exit.i.us.6.1 - br label %pregion_for_end.i.6 - -pregion_for_end.i.6: ; preds = %pregion_for_end.i.6.loopexit, %vector.ph313, %pregion_for_end.i.5 - %720 = trunc i64 %mul3.i.i to i32 - %721 = or i32 %720, 7 - %conv7.i.7 = add i32 %add.i, %721 - %cmp.i.7 = icmp slt i32 %conv7.i.7, %2 - %mul.i.7 = mul nsw i32 %conv7.i.7, %2 - %add13.i.7 = add nsw i32 %mul.i.7, %1 - %idxprom14.i.7 = sext i32 %add13.i.7 to i64 - %arrayidx15.i.7 = getelementptr inbounds float, float* %0, i64 %idxprom14.i.7 - br i1 %cmp.i.7, label %vector.scevcheck341, label %pregion_for_end.i.7 - -vector.scevcheck341: ; preds = %pregion_for_end.i.6 - %722 = mul i32 %conv7.i.7, %2 - %723 = add i32 %722, %1 - %724 = trunc i64 %4 to i32 - %725 = shl i32 %724, 5 - %726 = add i32 %723, %725 - %727 = add i32 %726, 1 - %728 = add i32 %726, 32 - %729 = icmp slt i32 %728, %727 - %730 = add i32 %2, 1 - %731 = mul i32 %730, %1 - %732 = add i32 %731, %725 - %733 = add i32 %732, 1 - %734 = add i32 %732, 32 - %735 = icmp slt i32 %734, %733 - %736 = or i1 %729, %735 - br i1 %736, label %pregion_for_entry.entry.i.us.7.preheader, label %vector.memcheck363 - -pregion_for_entry.entry.i.us.7.preheader: ; preds = %vector.memcheck363, %vector.scevcheck341 - br label %pregion_for_entry.entry.i.us.7 - -vector.memcheck363: ; preds = %vector.scevcheck341 - %737 = mul i32 %conv7.i.7, %2 - %738 = add i32 %737, %1 - %739 = trunc i64 %4 to i32 - %740 = shl i32 %739, 5 - %741 = add i32 %738, %740 - %742 = add i32 %741, 1 - %743 = sext i32 %742 to i64 - %scevgep343 = getelementptr float, float* %0, i64 %743 - %scevgep343344 = bitcast float* %scevgep343 to i8* - %744 = add nsw i64 %743, 32 - %scevgep345 = getelementptr float, float* %0, i64 %744 - %scevgep347 = getelementptr float, float* %0, i64 %idxprom14.i.7 - %scevgep347348 = bitcast float* %scevgep347 to i8* - %uglygep349 = getelementptr i8, i8* %scevgep347348, i64 1 - %745 = add i32 %2, 1 - %746 = mul i32 %745, %1 - %747 = add i32 %746, %740 - %748 = add i32 %747, 1 - %749 = sext i32 %748 to i64 - %scevgep350 = getelementptr float, float* %0, i64 %749 - %750 = add nsw i64 %749, 32 - %scevgep352 = getelementptr float, float* %0, i64 %750 - %bound0355 = icmp ugt i8* %uglygep349, %scevgep343344 - %bound1356 = icmp ult float* %arrayidx15.i.7, %scevgep345 - %found.conflict357 = and i1 %bound0355, %bound1356 - %bound0358 = icmp ult float* %scevgep343, %scevgep352 - %bound1359 = icmp ult float* %scevgep350, %scevgep345 - %found.conflict360 = and i1 %bound0358, %bound1359 - %conflict.rdx361 = or i1 %found.conflict357, %found.conflict360 - br i1 %conflict.rdx361, label %pregion_for_entry.entry.i.us.7.preheader, label %vector.ph364 - -vector.ph364: ; preds = %vector.memcheck363 - %broadcast.splatinsert371 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat372 = shufflevector <8 x i64> %broadcast.splatinsert371, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert373 = insertelement <8 x i32> undef, i32 %add.i, i32 0 - %broadcast.splat374 = shufflevector <8 x i32> %broadcast.splatinsert373, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert375 = insertelement <8 x i32> undef, i32 %2, i32 0 - %broadcast.splat376 = shufflevector <8 x i32> %broadcast.splatinsert375, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert378 = insertelement <8 x float*> undef, float* %arrayidx15.i.7, i32 0 - %broadcast.splat379 = shufflevector <8 x float*> %broadcast.splatinsert378, <8 x float*> undef, <8 x i32> zeroinitializer - %751 = trunc <8 x i64> %broadcast.splat372 to <8 x i32> - %752 = or <8 x i32> %751, - %753 = add <8 x i32> %broadcast.splat374, %752 - %754 = icmp slt <8 x i32> %753, %broadcast.splat376 - %755 = extractelement <8 x i32> %753, i32 0 - %756 = add nsw i32 %755, %mul.i.7 - %757 = sext i32 %756 to i64 - %758 = getelementptr inbounds float, float* %0, i64 %757 - %759 = bitcast float* %758 to <8 x float>* - %wide.masked.load377 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %759, i32 4, <8 x i1> %754, <8 x float> undef), !tbaa !12, !alias.scope !75, !noalias !78 - %wide.masked.gather380 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat379, i32 4, <8 x i1> %754, <8 x float> undef), !tbaa !12, !alias.scope !81 - %760 = add nsw i32 %755, %mul16.i - %761 = sext i32 %760 to i64 - %762 = getelementptr inbounds float, float* %0, i64 %761 - %763 = bitcast float* %762 to <8 x float>* - %wide.masked.load381 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %763, i32 4, <8 x i1> %754, <8 x float> undef), !tbaa !12, !alias.scope !82 - %764 = fneg <8 x float> %wide.masked.gather380 - %765 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %764, <8 x float> %wide.masked.load381, <8 x float> %wide.masked.load377) - %766 = bitcast float* %758 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %765, <8 x float>* %766, i32 4, <8 x i1> %754), !tbaa !12, !alias.scope !75, !noalias !78, !llvm.access.group !24 - %767 = trunc <8 x i64> %broadcast.splat372 to <8 x i32> - %768 = or <8 x i32> %767, - %769 = add <8 x i32> %broadcast.splat374, %768 - %770 = icmp slt <8 x i32> %769, %broadcast.splat376 - %771 = extractelement <8 x i32> %769, i32 0 - %772 = add nsw i32 %771, %mul.i.7 - %773 = sext i32 %772 to i64 - %774 = getelementptr inbounds float, float* %0, i64 %773 - %775 = bitcast float* %774 to <8 x float>* - %wide.masked.load377.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %775, i32 4, <8 x i1> %770, <8 x float> undef), !tbaa !12, !alias.scope !75, !noalias !78 - %wide.masked.gather380.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat379, i32 4, <8 x i1> %770, <8 x float> undef), !tbaa !12, !alias.scope !81 - %776 = add nsw i32 %771, %mul16.i - %777 = sext i32 %776 to i64 - %778 = getelementptr inbounds float, float* %0, i64 %777 - %779 = bitcast float* %778 to <8 x float>* - %wide.masked.load381.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %779, i32 4, <8 x i1> %770, <8 x float> undef), !tbaa !12, !alias.scope !82 - %780 = fneg <8 x float> %wide.masked.gather380.1 - %781 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %780, <8 x float> %wide.masked.load381.1, <8 x float> %wide.masked.load377.1) - %782 = bitcast float* %774 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %781, <8 x float>* %782, i32 4, <8 x i1> %770), !tbaa !12, !alias.scope !75, !noalias !78, !llvm.access.group !24 - %783 = trunc <8 x i64> %broadcast.splat372 to <8 x i32> - %784 = or <8 x i32> %783, - %785 = add <8 x i32> %broadcast.splat374, %784 - %786 = icmp slt <8 x i32> %785, %broadcast.splat376 - %787 = extractelement <8 x i32> %785, i32 0 - %788 = add nsw i32 %787, %mul.i.7 - %789 = sext i32 %788 to i64 - %790 = getelementptr inbounds float, float* %0, i64 %789 - %791 = bitcast float* %790 to <8 x float>* - %wide.masked.load377.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %791, i32 4, <8 x i1> %786, <8 x float> undef), !tbaa !12, !alias.scope !75, !noalias !78 - %wide.masked.gather380.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat379, i32 4, <8 x i1> %786, <8 x float> undef), !tbaa !12, !alias.scope !81 - %792 = add nsw i32 %787, %mul16.i - %793 = sext i32 %792 to i64 - %794 = getelementptr inbounds float, float* %0, i64 %793 - %795 = bitcast float* %794 to <8 x float>* - %wide.masked.load381.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %795, i32 4, <8 x i1> %786, <8 x float> undef), !tbaa !12, !alias.scope !82 - %796 = fneg <8 x float> %wide.masked.gather380.2 - %797 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %796, <8 x float> %wide.masked.load381.2, <8 x float> %wide.masked.load377.2) - %798 = bitcast float* %790 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %797, <8 x float>* %798, i32 4, <8 x i1> %786), !tbaa !12, !alias.scope !75, !noalias !78, !llvm.access.group !24 - %799 = trunc <8 x i64> %broadcast.splat372 to <8 x i32> - %800 = or <8 x i32> %799, - %801 = add <8 x i32> %broadcast.splat374, %800 - %802 = icmp slt <8 x i32> %801, %broadcast.splat376 - %803 = extractelement <8 x i32> %801, i32 0 - %804 = add nsw i32 %803, %mul.i.7 - %805 = sext i32 %804 to i64 - %806 = getelementptr inbounds float, float* %0, i64 %805 - %807 = bitcast float* %806 to <8 x float>* - %wide.masked.load377.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %807, i32 4, <8 x i1> %802, <8 x float> undef), !tbaa !12, !alias.scope !75, !noalias !78 - %wide.masked.gather380.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat379, i32 4, <8 x i1> %802, <8 x float> undef), !tbaa !12, !alias.scope !81 - %808 = add nsw i32 %803, %mul16.i - %809 = sext i32 %808 to i64 - %810 = getelementptr inbounds float, float* %0, i64 %809 - %811 = bitcast float* %810 to <8 x float>* - %wide.masked.load381.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %811, i32 4, <8 x i1> %802, <8 x float> undef), !tbaa !12, !alias.scope !82 - %812 = fneg <8 x float> %wide.masked.gather380.3 - %813 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %812, <8 x float> %wide.masked.load381.3, <8 x float> %wide.masked.load377.3) - %814 = bitcast float* %806 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %813, <8 x float>* %814, i32 4, <8 x i1> %802), !tbaa !12, !alias.scope !75, !noalias !78, !llvm.access.group !24 - br label %pregion_for_end.i.7 - -pregion_for_entry.entry.i.us.7: ; preds = %if.end.r_exit.i.us.7.1, %pregion_for_entry.entry.i.us.7.preheader - %_local_id_x.0.us.7 = phi i64 [ 0, %pregion_for_entry.entry.i.us.7.preheader ], [ %826, %if.end.r_exit.i.us.7.1 ] - %add1.i.i.us.7 = add nuw nsw i64 %_local_id_x.0.us.7, %mul.i.i - %815 = trunc i64 %add1.i.i.us.7 to i32 - %conv2.i.us.7 = add i32 %add.i, %815 - %cmp9.i.us.7 = icmp slt i32 %conv2.i.us.7, %2 - br i1 %cmp9.i.us.7, label %if.then.i.us.7, label %if.end.r_exit.i.us.7 - -if.then.i.us.7: ; preds = %pregion_for_entry.entry.i.us.7 - %add11.i.us.7 = add nsw i32 %conv2.i.us.7, %mul.i.7 - %idxprom.i.us.7 = sext i32 %add11.i.us.7 to i64 - %arrayidx.i.us.7 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.7 - %816 = load float, float* %arrayidx.i.us.7, align 4, !tbaa !12 - %817 = load float, float* %arrayidx15.i.7, align 4, !tbaa !12 - %add17.i.us.7 = add nsw i32 %conv2.i.us.7, %mul16.i - %idxprom18.i.us.7 = sext i32 %add17.i.us.7 to i64 - %arrayidx19.i.us.7 = getelementptr inbounds float, float* %0, i64 %idxprom18.i.us.7 - %818 = load float, float* %arrayidx19.i.us.7, align 4, !tbaa !12 - %neg.i.us.7 = fneg float %817 - %819 = tail call float @llvm.fmuladd.f32(float %neg.i.us.7, float %818, float %816) #6 - store float %819, float* %arrayidx.i.us.7, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.us.7 - -if.end.r_exit.i.us.7: ; preds = %if.then.i.us.7, %pregion_for_entry.entry.i.us.7 - %820 = or i64 %_local_id_x.0.us.7, 1 - %add1.i.i.us.7.1 = add nuw nsw i64 %820, %mul.i.i - %821 = trunc i64 %add1.i.i.us.7.1 to i32 - %conv2.i.us.7.1 = add i32 %add.i, %821 - %cmp9.i.us.7.1 = icmp slt i32 %conv2.i.us.7.1, %2 - br i1 %cmp9.i.us.7.1, label %if.then.i.us.7.1, label %if.end.r_exit.i.us.7.1 - -pregion_for_end.i.7.loopexit: ; preds = %if.end.r_exit.i.us.7.1 - br label %pregion_for_end.i.7 - -pregion_for_end.i.7: ; preds = %pregion_for_end.i.7.loopexit, %vector.ph364, %pregion_for_end.i.6 - ret void - -if.then.i.us.7.1: ; preds = %if.end.r_exit.i.us.7 - %add11.i.us.7.1 = add nsw i32 %conv2.i.us.7.1, %mul.i.7 - %idxprom.i.us.7.1 = sext i32 %add11.i.us.7.1 to i64 - %arrayidx.i.us.7.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.7.1 - %822 = load float, float* %arrayidx.i.us.7.1, align 4, !tbaa !12 - %823 = load float, float* %arrayidx15.i.7, align 4, !tbaa !12 - %add17.i.us.7.1 = add nsw i32 %conv2.i.us.7.1, %mul16.i - %idxprom18.i.us.7.1 = sext i32 %add17.i.us.7.1 to i64 - %arrayidx19.i.us.7.1 = getelementptr inbounds float, float* %0, i64 %idxprom18.i.us.7.1 - %824 = load float, float* %arrayidx19.i.us.7.1, align 4, !tbaa !12 - %neg.i.us.7.1 = fneg float %823 - %825 = tail call float @llvm.fmuladd.f32(float %neg.i.us.7.1, float %824, float %822) #6 - store float %825, float* %arrayidx.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.us.7.1 - -if.end.r_exit.i.us.7.1: ; preds = %if.then.i.us.7.1, %if.end.r_exit.i.us.7 - %826 = add nuw nsw i64 %_local_id_x.0.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %826, 32 - br i1 %exitcond.7.not.1, label %pregion_for_end.i.7.loopexit, label %pregion_for_entry.entry.i.us.7, !llvm.loop !83 - -if.then.i.us.6.1: ; preds = %if.end.r_exit.i.us.6 - %add11.i.us.6.1 = add nsw i32 %conv2.i.us.6.1, %mul.i.6 - %idxprom.i.us.6.1 = sext i32 %add11.i.us.6.1 to i64 - %arrayidx.i.us.6.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.6.1 - %827 = load float, float* %arrayidx.i.us.6.1, align 4, !tbaa !12 - %828 = load float, float* %arrayidx15.i.6, align 4, !tbaa !12 - %add17.i.us.6.1 = add nsw i32 %conv2.i.us.6.1, %mul16.i - %idxprom18.i.us.6.1 = sext i32 %add17.i.us.6.1 to i64 - %arrayidx19.i.us.6.1 = getelementptr inbounds float, float* %0, i64 %idxprom18.i.us.6.1 - %829 = load float, float* %arrayidx19.i.us.6.1, align 4, !tbaa !12 - %neg.i.us.6.1 = fneg float %828 - %830 = tail call float @llvm.fmuladd.f32(float %neg.i.us.6.1, float %829, float %827) #6 - store float %830, float* %arrayidx.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.us.6.1 - -if.end.r_exit.i.us.6.1: ; preds = %if.then.i.us.6.1, %if.end.r_exit.i.us.6 - %831 = add nuw nsw i64 %_local_id_x.0.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %831, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.6.loopexit, label %pregion_for_entry.entry.i.us.6, !llvm.loop !86 - -if.then.i.us.5.1: ; preds = %if.end.r_exit.i.us.5 - %add11.i.us.5.1 = add nsw i32 %conv2.i.us.5.1, %mul.i.5 - %idxprom.i.us.5.1 = sext i32 %add11.i.us.5.1 to i64 - %arrayidx.i.us.5.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.5.1 - %832 = load float, float* %arrayidx.i.us.5.1, align 4, !tbaa !12 - %833 = load float, float* %arrayidx15.i.5, align 4, !tbaa !12 - %add17.i.us.5.1 = add nsw i32 %conv2.i.us.5.1, %mul16.i - %idxprom18.i.us.5.1 = sext i32 %add17.i.us.5.1 to i64 - %arrayidx19.i.us.5.1 = getelementptr inbounds float, float* %0, i64 %idxprom18.i.us.5.1 - %834 = load float, float* %arrayidx19.i.us.5.1, align 4, !tbaa !12 - %neg.i.us.5.1 = fneg float %833 - %835 = tail call float @llvm.fmuladd.f32(float %neg.i.us.5.1, float %834, float %832) #6 - store float %835, float* %arrayidx.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.us.5.1 - -if.end.r_exit.i.us.5.1: ; preds = %if.then.i.us.5.1, %if.end.r_exit.i.us.5 - %836 = add nuw nsw i64 %_local_id_x.0.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %836, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.5.loopexit, label %pregion_for_entry.entry.i.us.5, !llvm.loop !87 - -if.then.i.us.4.1: ; preds = %if.end.r_exit.i.us.4 - %add11.i.us.4.1 = add nsw i32 %conv2.i.us.4.1, %mul.i.4 - %idxprom.i.us.4.1 = sext i32 %add11.i.us.4.1 to i64 - %arrayidx.i.us.4.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.4.1 - %837 = load float, float* %arrayidx.i.us.4.1, align 4, !tbaa !12 - %838 = load float, float* %arrayidx15.i.4, align 4, !tbaa !12 - %add17.i.us.4.1 = add nsw i32 %conv2.i.us.4.1, %mul16.i - %idxprom18.i.us.4.1 = sext i32 %add17.i.us.4.1 to i64 - %arrayidx19.i.us.4.1 = getelementptr inbounds float, float* %0, i64 %idxprom18.i.us.4.1 - %839 = load float, float* %arrayidx19.i.us.4.1, align 4, !tbaa !12 - %neg.i.us.4.1 = fneg float %838 - %840 = tail call float @llvm.fmuladd.f32(float %neg.i.us.4.1, float %839, float %837) #6 - store float %840, float* %arrayidx.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.us.4.1 - -if.end.r_exit.i.us.4.1: ; preds = %if.then.i.us.4.1, %if.end.r_exit.i.us.4 - %841 = add nuw nsw i64 %_local_id_x.0.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %841, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.4.loopexit, label %pregion_for_entry.entry.i.us.4, !llvm.loop !88 - -if.then.i.us.3.1: ; preds = %if.end.r_exit.i.us.3 - %add11.i.us.3.1 = add nsw i32 %conv2.i.us.3.1, %mul.i.3 - %idxprom.i.us.3.1 = sext i32 %add11.i.us.3.1 to i64 - %arrayidx.i.us.3.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.3.1 - %842 = load float, float* %arrayidx.i.us.3.1, align 4, !tbaa !12 - %843 = load float, float* %arrayidx15.i.3, align 4, !tbaa !12 - %add17.i.us.3.1 = add nsw i32 %conv2.i.us.3.1, %mul16.i - %idxprom18.i.us.3.1 = sext i32 %add17.i.us.3.1 to i64 - %arrayidx19.i.us.3.1 = getelementptr inbounds float, float* %0, i64 %idxprom18.i.us.3.1 - %844 = load float, float* %arrayidx19.i.us.3.1, align 4, !tbaa !12 - %neg.i.us.3.1 = fneg float %843 - %845 = tail call float @llvm.fmuladd.f32(float %neg.i.us.3.1, float %844, float %842) #6 - store float %845, float* %arrayidx.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.us.3.1 - -if.end.r_exit.i.us.3.1: ; preds = %if.then.i.us.3.1, %if.end.r_exit.i.us.3 - %846 = add nuw nsw i64 %_local_id_x.0.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %846, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.3.loopexit, label %pregion_for_entry.entry.i.us.3, !llvm.loop !89 - -if.then.i.us.2.1: ; preds = %if.end.r_exit.i.us.2 - %add11.i.us.2.1 = add nsw i32 %conv2.i.us.2.1, %mul.i.2 - %idxprom.i.us.2.1 = sext i32 %add11.i.us.2.1 to i64 - %arrayidx.i.us.2.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.2.1 - %847 = load float, float* %arrayidx.i.us.2.1, align 4, !tbaa !12 - %848 = load float, float* %arrayidx15.i.2, align 4, !tbaa !12 - %add17.i.us.2.1 = add nsw i32 %conv2.i.us.2.1, %mul16.i - %idxprom18.i.us.2.1 = sext i32 %add17.i.us.2.1 to i64 - %arrayidx19.i.us.2.1 = getelementptr inbounds float, float* %0, i64 %idxprom18.i.us.2.1 - %849 = load float, float* %arrayidx19.i.us.2.1, align 4, !tbaa !12 - %neg.i.us.2.1 = fneg float %848 - %850 = tail call float @llvm.fmuladd.f32(float %neg.i.us.2.1, float %849, float %847) #6 - store float %850, float* %arrayidx.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.us.2.1 - -if.end.r_exit.i.us.2.1: ; preds = %if.then.i.us.2.1, %if.end.r_exit.i.us.2 - %851 = add nuw nsw i64 %_local_id_x.0.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %851, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.2.loopexit, label %pregion_for_entry.entry.i.us.2, !llvm.loop !90 - -if.then.i.us.1.1: ; preds = %if.end.r_exit.i.us.1 - %add11.i.us.1.1 = add nsw i32 %conv2.i.us.1.1, %mul.i.1 - %idxprom.i.us.1.1 = sext i32 %add11.i.us.1.1 to i64 - %arrayidx.i.us.1.1 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.1.1 - %852 = load float, float* %arrayidx.i.us.1.1, align 4, !tbaa !12 - %853 = load float, float* %arrayidx15.i.1, align 4, !tbaa !12 - %add17.i.us.1.1 = add nsw i32 %conv2.i.us.1.1, %mul16.i - %idxprom18.i.us.1.1 = sext i32 %add17.i.us.1.1 to i64 - %arrayidx19.i.us.1.1 = getelementptr inbounds float, float* %0, i64 %idxprom18.i.us.1.1 - %854 = load float, float* %arrayidx19.i.us.1.1, align 4, !tbaa !12 - %neg.i.us.1.1 = fneg float %853 - %855 = tail call float @llvm.fmuladd.f32(float %neg.i.us.1.1, float %854, float %852) #6 - store float %855, float* %arrayidx.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.us.1.1 - -if.end.r_exit.i.us.1.1: ; preds = %if.then.i.us.1.1, %if.end.r_exit.i.us.1 - %856 = add nuw nsw i64 %_local_id_x.0.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %856, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.1.loopexit, label %pregion_for_entry.entry.i.us.1, !llvm.loop !91 - -if.then.i.us.1402: ; preds = %if.end.r_exit.i.us - %add11.i.us.1395 = add nsw i32 %conv2.i.us.1392, %mul.i - %idxprom.i.us.1396 = sext i32 %add11.i.us.1395 to i64 - %arrayidx.i.us.1397 = getelementptr inbounds float, float* %0, i64 %idxprom.i.us.1396 - %857 = load float, float* %arrayidx.i.us.1397, align 4, !tbaa !12 - %858 = load float, float* %arrayidx15.i, align 4, !tbaa !12 - %add17.i.us.1398 = add nsw i32 %conv2.i.us.1392, %mul16.i - %idxprom18.i.us.1399 = sext i32 %add17.i.us.1398 to i64 - %arrayidx19.i.us.1400 = getelementptr inbounds float, float* %0, i64 %idxprom18.i.us.1399 - %859 = load float, float* %arrayidx19.i.us.1400, align 4, !tbaa !12 - %neg.i.us.1401 = fneg float %858 - %860 = tail call float @llvm.fmuladd.f32(float %neg.i.us.1401, float %859, float %857) #6 - store float %860, float* %arrayidx.i.us.1397, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.us.1403 - -if.end.r_exit.i.us.1403: ; preds = %if.then.i.us.1402, %if.end.r_exit.i.us - %861 = add nuw nsw i64 %_local_id_x.0.us, 2 - %exitcond.not.1 = icmp eq i64 %861, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !92 -} - -; Function Attrs: nofree nounwind -define void @_pocl_kernel_lu_kernel2_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { -pregion_for_entry.pregion_for_init.i.i: - %5 = bitcast i8** %0 to float*** - %6 = load float**, float*** %5, align 8 - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to i32** - %10 = load i32*, i32** %9, align 8 - %11 = load i32, i32* %10, align 4 - %12 = getelementptr i8*, i8** %0, i64 2 - %13 = bitcast i8** %12 to i32** - %14 = load i32*, i32** %13, align 8 - %15 = load i32, i32* %14, align 4 - %mul.i.i.i = shl i64 %2, 5 - %add.i.i = add nsw i32 %11, 1 - %mul3.i.i.i = shl i64 %3, 3 - %mul16.i.i = mul nsw i32 %15, %11 - %16 = trunc i64 %mul3.i.i.i to i32 - %conv7.i.i = add i32 %add.i.i, %16 - %cmp.i.i = icmp slt i32 %conv7.i.i, %15 - %mul.i.i = mul nsw i32 %conv7.i.i, %15 - %add13.i.i = add nsw i32 %mul.i.i, %11 - %idxprom14.i.i = sext i32 %add13.i.i to i64 - %arrayidx15.i.i = getelementptr inbounds float, float* %7, i64 %idxprom14.i.i - br i1 %cmp.i.i, label %vector.scevcheck, label %pregion_for_end.i.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.i - %17 = mul i32 %15, %conv7.i.i - %18 = add i32 %11, %17 - %19 = trunc i64 %2 to i32 - %20 = shl i32 %19, 5 - %21 = add i32 %18, %20 - %22 = add i32 %21, 1 - %23 = add i32 %21, 32 - %24 = icmp slt i32 %23, %22 - %25 = add i32 %15, 1 - %26 = mul i32 %11, %25 - %27 = add i32 %26, %20 - %28 = add i32 %27, 1 - %29 = add i32 %27, 32 - %30 = icmp slt i32 %29, %28 - %31 = or i1 %24, %30 - br i1 %31, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i.us - -vector.memcheck: ; preds = %vector.scevcheck - %32 = mul i32 %15, %conv7.i.i - %33 = add i32 %11, %32 - %34 = trunc i64 %2 to i32 - %35 = shl i32 %34, 5 - %36 = add i32 %33, %35 - %37 = add i32 %36, 1 - %38 = sext i32 %37 to i64 - %scevgep = getelementptr float, float* %7, i64 %38 - %scevgep6 = bitcast float* %scevgep to i8* - %39 = add nsw i64 %38, 32 - %scevgep7 = getelementptr float, float* %7, i64 %39 - %scevgep9 = getelementptr float, float* %7, i64 %idxprom14.i.i - %scevgep910 = bitcast float* %scevgep9 to i8* - %uglygep = getelementptr i8, i8* %scevgep910, i64 1 - %40 = add i32 %15, 1 - %41 = mul i32 %11, %40 - %42 = add i32 %41, %35 - %43 = add i32 %42, 1 - %44 = sext i32 %43 to i64 - %scevgep11 = getelementptr float, float* %7, i64 %44 - %45 = add nsw i64 %44, 32 - %scevgep13 = getelementptr float, float* %7, i64 %45 - %bound0 = icmp ugt i8* %uglygep, %scevgep6 - %bound1 = icmp ult float* %arrayidx15.i.i, %scevgep7 - %found.conflict = and i1 %bound0, %bound1 - %bound015 = icmp ult float* %scevgep, %scevgep13 - %bound116 = icmp ult float* %scevgep11, %scevgep7 - %found.conflict17 = and i1 %bound015, %bound116 - %conflict.rdx = or i1 %found.conflict, %found.conflict17 - br i1 %conflict.rdx, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert18 = insertelement <8 x i32> undef, i32 %add.i.i, i32 0 - %broadcast.splat19 = shufflevector <8 x i32> %broadcast.splatinsert18, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert20 = insertelement <8 x i32> undef, i32 %15, i32 0 - %broadcast.splat21 = shufflevector <8 x i32> %broadcast.splatinsert20, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert22 = insertelement <8 x float*> undef, float* %arrayidx15.i.i, i32 0 - %broadcast.splat23 = shufflevector <8 x float*> %broadcast.splatinsert22, <8 x float*> undef, <8 x i32> zeroinitializer - %46 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %47 = or <8 x i32> %46, - %48 = add <8 x i32> %broadcast.splat19, %47 - %49 = icmp slt <8 x i32> %48, %broadcast.splat21 - %50 = extractelement <8 x i32> %48, i32 0 - %51 = add nsw i32 %50, %mul.i.i - %52 = sext i32 %51 to i64 - %53 = getelementptr inbounds float, float* %7, i64 %52 - %54 = bitcast float* %53 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %54, i32 4, <8 x i1> %49, <8 x float> undef), !tbaa !12, !alias.scope !93, !noalias !96 - %wide.masked.gather = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat23, i32 4, <8 x i1> %49, <8 x float> undef), !tbaa !12, !alias.scope !99 - %55 = add nsw i32 %50, %mul16.i.i - %56 = sext i32 %55 to i64 - %57 = getelementptr inbounds float, float* %7, i64 %56 - %58 = bitcast float* %57 to <8 x float>* - %wide.masked.load24 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %58, i32 4, <8 x i1> %49, <8 x float> undef), !tbaa !12, !alias.scope !100 - %59 = fneg <8 x float> %wide.masked.gather - %60 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %59, <8 x float> %wide.masked.load24, <8 x float> %wide.masked.load) - %61 = bitcast float* %53 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %60, <8 x float>* %61, i32 4, <8 x i1> %49), !tbaa !12, !alias.scope !93, !noalias !96, !llvm.access.group !24 - %62 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %63 = or <8 x i32> %62, - %64 = add <8 x i32> %broadcast.splat19, %63 - %65 = icmp slt <8 x i32> %64, %broadcast.splat21 - %66 = extractelement <8 x i32> %64, i32 0 - %67 = add nsw i32 %66, %mul.i.i - %68 = sext i32 %67 to i64 - %69 = getelementptr inbounds float, float* %7, i64 %68 - %70 = bitcast float* %69 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %70, i32 4, <8 x i1> %65, <8 x float> undef), !tbaa !12, !alias.scope !93, !noalias !96 - %wide.masked.gather.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat23, i32 4, <8 x i1> %65, <8 x float> undef), !tbaa !12, !alias.scope !99 - %71 = add nsw i32 %66, %mul16.i.i - %72 = sext i32 %71 to i64 - %73 = getelementptr inbounds float, float* %7, i64 %72 - %74 = bitcast float* %73 to <8 x float>* - %wide.masked.load24.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %74, i32 4, <8 x i1> %65, <8 x float> undef), !tbaa !12, !alias.scope !100 - %75 = fneg <8 x float> %wide.masked.gather.1 - %76 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %75, <8 x float> %wide.masked.load24.1, <8 x float> %wide.masked.load.1) - %77 = bitcast float* %69 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %76, <8 x float>* %77, i32 4, <8 x i1> %65), !tbaa !12, !alias.scope !93, !noalias !96, !llvm.access.group !24 - %78 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %79 = or <8 x i32> %78, - %80 = add <8 x i32> %broadcast.splat19, %79 - %81 = icmp slt <8 x i32> %80, %broadcast.splat21 - %82 = extractelement <8 x i32> %80, i32 0 - %83 = add nsw i32 %82, %mul.i.i - %84 = sext i32 %83 to i64 - %85 = getelementptr inbounds float, float* %7, i64 %84 - %86 = bitcast float* %85 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %86, i32 4, <8 x i1> %81, <8 x float> undef), !tbaa !12, !alias.scope !93, !noalias !96 - %wide.masked.gather.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat23, i32 4, <8 x i1> %81, <8 x float> undef), !tbaa !12, !alias.scope !99 - %87 = add nsw i32 %82, %mul16.i.i - %88 = sext i32 %87 to i64 - %89 = getelementptr inbounds float, float* %7, i64 %88 - %90 = bitcast float* %89 to <8 x float>* - %wide.masked.load24.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %90, i32 4, <8 x i1> %81, <8 x float> undef), !tbaa !12, !alias.scope !100 - %91 = fneg <8 x float> %wide.masked.gather.2 - %92 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %91, <8 x float> %wide.masked.load24.2, <8 x float> %wide.masked.load.2) - %93 = bitcast float* %85 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %92, <8 x float>* %93, i32 4, <8 x i1> %81), !tbaa !12, !alias.scope !93, !noalias !96, !llvm.access.group !24 - %94 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %95 = or <8 x i32> %94, - %96 = add <8 x i32> %broadcast.splat19, %95 - %97 = icmp slt <8 x i32> %96, %broadcast.splat21 - %98 = extractelement <8 x i32> %96, i32 0 - %99 = add nsw i32 %98, %mul.i.i - %100 = sext i32 %99 to i64 - %101 = getelementptr inbounds float, float* %7, i64 %100 - %102 = bitcast float* %101 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %102, i32 4, <8 x i1> %97, <8 x float> undef), !tbaa !12, !alias.scope !93, !noalias !96 - %wide.masked.gather.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat23, i32 4, <8 x i1> %97, <8 x float> undef), !tbaa !12, !alias.scope !99 - %103 = add nsw i32 %98, %mul16.i.i - %104 = sext i32 %103 to i64 - %105 = getelementptr inbounds float, float* %7, i64 %104 - %106 = bitcast float* %105 to <8 x float>* - %wide.masked.load24.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %106, i32 4, <8 x i1> %97, <8 x float> undef), !tbaa !12, !alias.scope !100 - %107 = fneg <8 x float> %wide.masked.gather.3 - %108 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %107, <8 x float> %wide.masked.load24.3, <8 x float> %wide.masked.load.3) - %109 = bitcast float* %101 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %108, <8 x float>* %109, i32 4, <8 x i1> %97), !tbaa !12, !alias.scope !93, !noalias !96, !llvm.access.group !24 - br label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us.1403, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.preheader ], [ %870, %if.end.r_exit.i.i.us.1403 ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %110 = trunc i64 %add1.i.i.i.us to i32 - %conv2.i.i.us = add i32 %add.i.i, %110 - %cmp9.i.i.us = icmp slt i32 %conv2.i.i.us, %15 - br i1 %cmp9.i.i.us, label %if.then.i.i.us, label %if.end.r_exit.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %add11.i.i.us = add nsw i32 %conv2.i.i.us, %mul.i.i - %idxprom.i.i.us = sext i32 %add11.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us - %111 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %112 = load float, float* %arrayidx15.i.i, align 4, !tbaa !12 - %add17.i.i.us = add nsw i32 %conv2.i.i.us, %mul16.i.i - %idxprom18.i.i.us = sext i32 %add17.i.i.us to i64 - %arrayidx19.i.i.us = getelementptr inbounds float, float* %7, i64 %idxprom18.i.i.us - %113 = load float, float* %arrayidx19.i.i.us, align 4, !tbaa !12 - %neg.i.i.us = fneg float %112 - %114 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us, float %113, float %111) #6 - store float %114, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us - %115 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1391 = add nuw nsw i64 %115, %mul.i.i.i - %116 = trunc i64 %add1.i.i.i.us.1391 to i32 - %conv2.i.i.us.1392 = add i32 %add.i.i, %116 - %cmp9.i.i.us.1393 = icmp slt i32 %conv2.i.i.us.1392, %15 - br i1 %cmp9.i.i.us.1393, label %if.then.i.i.us.1402, label %if.end.r_exit.i.i.us.1403 - -pregion_for_end.i.i.loopexit: ; preds = %if.end.r_exit.i.i.us.1403 - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i.i - %117 = trunc i64 %mul3.i.i.i to i32 - %118 = or i32 %117, 1 - %conv7.i.i.1 = add i32 %add.i.i, %118 - %cmp.i.i.1 = icmp slt i32 %conv7.i.i.1, %15 - %mul.i.i.1 = mul nsw i32 %conv7.i.i.1, %15 - %add13.i.i.1 = add nsw i32 %mul.i.i.1, %11 - %idxprom14.i.i.1 = sext i32 %add13.i.i.1 to i64 - %arrayidx15.i.i.1 = getelementptr inbounds float, float* %7, i64 %idxprom14.i.i.1 - br i1 %cmp.i.i.1, label %vector.scevcheck35, label %pregion_for_end.i.i.1 - -vector.scevcheck35: ; preds = %pregion_for_end.i.i - %119 = mul i32 %15, %conv7.i.i.1 - %120 = add i32 %11, %119 - %121 = trunc i64 %2 to i32 - %122 = shl i32 %121, 5 - %123 = add i32 %120, %122 - %124 = add i32 %123, 1 - %125 = add i32 %123, 32 - %126 = icmp slt i32 %125, %124 - %127 = add i32 %15, 1 - %128 = mul i32 %11, %127 - %129 = add i32 %128, %122 - %130 = add i32 %129, 1 - %131 = add i32 %129, 32 - %132 = icmp slt i32 %131, %130 - %133 = or i1 %126, %132 - br i1 %133, label %pregion_for_entry.entry.i.i.us.1.preheader, label %vector.memcheck57 - -pregion_for_entry.entry.i.i.us.1.preheader: ; preds = %vector.memcheck57, %vector.scevcheck35 - br label %pregion_for_entry.entry.i.i.us.1 - -vector.memcheck57: ; preds = %vector.scevcheck35 - %134 = mul i32 %15, %conv7.i.i.1 - %135 = add i32 %11, %134 - %136 = trunc i64 %2 to i32 - %137 = shl i32 %136, 5 - %138 = add i32 %135, %137 - %139 = add i32 %138, 1 - %140 = sext i32 %139 to i64 - %scevgep37 = getelementptr float, float* %7, i64 %140 - %scevgep3738 = bitcast float* %scevgep37 to i8* - %141 = add nsw i64 %140, 32 - %scevgep39 = getelementptr float, float* %7, i64 %141 - %scevgep41 = getelementptr float, float* %7, i64 %idxprom14.i.i.1 - %scevgep4142 = bitcast float* %scevgep41 to i8* - %uglygep43 = getelementptr i8, i8* %scevgep4142, i64 1 - %142 = add i32 %15, 1 - %143 = mul i32 %11, %142 - %144 = add i32 %143, %137 - %145 = add i32 %144, 1 - %146 = sext i32 %145 to i64 - %scevgep44 = getelementptr float, float* %7, i64 %146 - %147 = add nsw i64 %146, 32 - %scevgep46 = getelementptr float, float* %7, i64 %147 - %bound049 = icmp ugt i8* %uglygep43, %scevgep3738 - %bound150 = icmp ult float* %arrayidx15.i.i.1, %scevgep39 - %found.conflict51 = and i1 %bound049, %bound150 - %bound052 = icmp ult float* %scevgep37, %scevgep46 - %bound153 = icmp ult float* %scevgep44, %scevgep39 - %found.conflict54 = and i1 %bound052, %bound153 - %conflict.rdx55 = or i1 %found.conflict51, %found.conflict54 - br i1 %conflict.rdx55, label %pregion_for_entry.entry.i.i.us.1.preheader, label %vector.ph58 - -vector.ph58: ; preds = %vector.memcheck57 - %broadcast.splatinsert65 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat66 = shufflevector <8 x i64> %broadcast.splatinsert65, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert67 = insertelement <8 x i32> undef, i32 %add.i.i, i32 0 - %broadcast.splat68 = shufflevector <8 x i32> %broadcast.splatinsert67, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert69 = insertelement <8 x i32> undef, i32 %15, i32 0 - %broadcast.splat70 = shufflevector <8 x i32> %broadcast.splatinsert69, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert72 = insertelement <8 x float*> undef, float* %arrayidx15.i.i.1, i32 0 - %broadcast.splat73 = shufflevector <8 x float*> %broadcast.splatinsert72, <8 x float*> undef, <8 x i32> zeroinitializer - %148 = trunc <8 x i64> %broadcast.splat66 to <8 x i32> - %149 = or <8 x i32> %148, - %150 = add <8 x i32> %broadcast.splat68, %149 - %151 = icmp slt <8 x i32> %150, %broadcast.splat70 - %152 = extractelement <8 x i32> %150, i32 0 - %153 = add nsw i32 %152, %mul.i.i.1 - %154 = sext i32 %153 to i64 - %155 = getelementptr inbounds float, float* %7, i64 %154 - %156 = bitcast float* %155 to <8 x float>* - %wide.masked.load71 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %156, i32 4, <8 x i1> %151, <8 x float> undef), !tbaa !12, !alias.scope !101, !noalias !104 - %wide.masked.gather74 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat73, i32 4, <8 x i1> %151, <8 x float> undef), !tbaa !12, !alias.scope !107 - %157 = add nsw i32 %152, %mul16.i.i - %158 = sext i32 %157 to i64 - %159 = getelementptr inbounds float, float* %7, i64 %158 - %160 = bitcast float* %159 to <8 x float>* - %wide.masked.load75 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %160, i32 4, <8 x i1> %151, <8 x float> undef), !tbaa !12, !alias.scope !108 - %161 = fneg <8 x float> %wide.masked.gather74 - %162 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %161, <8 x float> %wide.masked.load75, <8 x float> %wide.masked.load71) - %163 = bitcast float* %155 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %162, <8 x float>* %163, i32 4, <8 x i1> %151), !tbaa !12, !alias.scope !101, !noalias !104, !llvm.access.group !24 - %164 = trunc <8 x i64> %broadcast.splat66 to <8 x i32> - %165 = or <8 x i32> %164, - %166 = add <8 x i32> %broadcast.splat68, %165 - %167 = icmp slt <8 x i32> %166, %broadcast.splat70 - %168 = extractelement <8 x i32> %166, i32 0 - %169 = add nsw i32 %168, %mul.i.i.1 - %170 = sext i32 %169 to i64 - %171 = getelementptr inbounds float, float* %7, i64 %170 - %172 = bitcast float* %171 to <8 x float>* - %wide.masked.load71.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %172, i32 4, <8 x i1> %167, <8 x float> undef), !tbaa !12, !alias.scope !101, !noalias !104 - %wide.masked.gather74.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat73, i32 4, <8 x i1> %167, <8 x float> undef), !tbaa !12, !alias.scope !107 - %173 = add nsw i32 %168, %mul16.i.i - %174 = sext i32 %173 to i64 - %175 = getelementptr inbounds float, float* %7, i64 %174 - %176 = bitcast float* %175 to <8 x float>* - %wide.masked.load75.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %176, i32 4, <8 x i1> %167, <8 x float> undef), !tbaa !12, !alias.scope !108 - %177 = fneg <8 x float> %wide.masked.gather74.1 - %178 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %177, <8 x float> %wide.masked.load75.1, <8 x float> %wide.masked.load71.1) - %179 = bitcast float* %171 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %178, <8 x float>* %179, i32 4, <8 x i1> %167), !tbaa !12, !alias.scope !101, !noalias !104, !llvm.access.group !24 - %180 = trunc <8 x i64> %broadcast.splat66 to <8 x i32> - %181 = or <8 x i32> %180, - %182 = add <8 x i32> %broadcast.splat68, %181 - %183 = icmp slt <8 x i32> %182, %broadcast.splat70 - %184 = extractelement <8 x i32> %182, i32 0 - %185 = add nsw i32 %184, %mul.i.i.1 - %186 = sext i32 %185 to i64 - %187 = getelementptr inbounds float, float* %7, i64 %186 - %188 = bitcast float* %187 to <8 x float>* - %wide.masked.load71.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %188, i32 4, <8 x i1> %183, <8 x float> undef), !tbaa !12, !alias.scope !101, !noalias !104 - %wide.masked.gather74.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat73, i32 4, <8 x i1> %183, <8 x float> undef), !tbaa !12, !alias.scope !107 - %189 = add nsw i32 %184, %mul16.i.i - %190 = sext i32 %189 to i64 - %191 = getelementptr inbounds float, float* %7, i64 %190 - %192 = bitcast float* %191 to <8 x float>* - %wide.masked.load75.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %192, i32 4, <8 x i1> %183, <8 x float> undef), !tbaa !12, !alias.scope !108 - %193 = fneg <8 x float> %wide.masked.gather74.2 - %194 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %193, <8 x float> %wide.masked.load75.2, <8 x float> %wide.masked.load71.2) - %195 = bitcast float* %187 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %194, <8 x float>* %195, i32 4, <8 x i1> %183), !tbaa !12, !alias.scope !101, !noalias !104, !llvm.access.group !24 - %196 = trunc <8 x i64> %broadcast.splat66 to <8 x i32> - %197 = or <8 x i32> %196, - %198 = add <8 x i32> %broadcast.splat68, %197 - %199 = icmp slt <8 x i32> %198, %broadcast.splat70 - %200 = extractelement <8 x i32> %198, i32 0 - %201 = add nsw i32 %200, %mul.i.i.1 - %202 = sext i32 %201 to i64 - %203 = getelementptr inbounds float, float* %7, i64 %202 - %204 = bitcast float* %203 to <8 x float>* - %wide.masked.load71.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %204, i32 4, <8 x i1> %199, <8 x float> undef), !tbaa !12, !alias.scope !101, !noalias !104 - %wide.masked.gather74.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat73, i32 4, <8 x i1> %199, <8 x float> undef), !tbaa !12, !alias.scope !107 - %205 = add nsw i32 %200, %mul16.i.i - %206 = sext i32 %205 to i64 - %207 = getelementptr inbounds float, float* %7, i64 %206 - %208 = bitcast float* %207 to <8 x float>* - %wide.masked.load75.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %208, i32 4, <8 x i1> %199, <8 x float> undef), !tbaa !12, !alias.scope !108 - %209 = fneg <8 x float> %wide.masked.gather74.3 - %210 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %209, <8 x float> %wide.masked.load75.3, <8 x float> %wide.masked.load71.3) - %211 = bitcast float* %203 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %210, <8 x float>* %211, i32 4, <8 x i1> %199), !tbaa !12, !alias.scope !101, !noalias !104, !llvm.access.group !24 - br label %pregion_for_end.i.i.1 - -pregion_for_entry.entry.i.i.us.1: ; preds = %if.end.r_exit.i.i.us.1.1, %pregion_for_entry.entry.i.i.us.1.preheader - %_local_id_x.i.0.us.1 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.1.preheader ], [ %865, %if.end.r_exit.i.i.us.1.1 ] - %add1.i.i.i.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.1, %mul.i.i.i - %212 = trunc i64 %add1.i.i.i.us.1 to i32 - %conv2.i.i.us.1 = add i32 %add.i.i, %212 - %cmp9.i.i.us.1 = icmp slt i32 %conv2.i.i.us.1, %15 - br i1 %cmp9.i.i.us.1, label %if.then.i.i.us.1, label %if.end.r_exit.i.i.us.1 - -if.then.i.i.us.1: ; preds = %pregion_for_entry.entry.i.i.us.1 - %add11.i.i.us.1 = add nsw i32 %conv2.i.i.us.1, %mul.i.i.1 - %idxprom.i.i.us.1 = sext i32 %add11.i.i.us.1 to i64 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.1 - %213 = load float, float* %arrayidx.i.i.us.1, align 4, !tbaa !12 - %214 = load float, float* %arrayidx15.i.i.1, align 4, !tbaa !12 - %add17.i.i.us.1 = add nsw i32 %conv2.i.i.us.1, %mul16.i.i - %idxprom18.i.i.us.1 = sext i32 %add17.i.i.us.1 to i64 - %arrayidx19.i.i.us.1 = getelementptr inbounds float, float* %7, i64 %idxprom18.i.i.us.1 - %215 = load float, float* %arrayidx19.i.i.us.1, align 4, !tbaa !12 - %neg.i.i.us.1 = fneg float %214 - %216 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.1, float %215, float %213) #6 - store float %216, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.1 - -if.end.r_exit.i.i.us.1: ; preds = %if.then.i.i.us.1, %pregion_for_entry.entry.i.i.us.1 - %217 = or i64 %_local_id_x.i.0.us.1, 1 - %add1.i.i.i.us.1.1 = add nuw nsw i64 %217, %mul.i.i.i - %218 = trunc i64 %add1.i.i.i.us.1.1 to i32 - %conv2.i.i.us.1.1 = add i32 %add.i.i, %218 - %cmp9.i.i.us.1.1 = icmp slt i32 %conv2.i.i.us.1.1, %15 - br i1 %cmp9.i.i.us.1.1, label %if.then.i.i.us.1.1, label %if.end.r_exit.i.i.us.1.1 - -pregion_for_end.i.i.1.loopexit: ; preds = %if.end.r_exit.i.i.us.1.1 - br label %pregion_for_end.i.i.1 - -pregion_for_end.i.i.1: ; preds = %pregion_for_end.i.i.1.loopexit, %vector.ph58, %pregion_for_end.i.i - %219 = trunc i64 %mul3.i.i.i to i32 - %220 = or i32 %219, 2 - %conv7.i.i.2 = add i32 %add.i.i, %220 - %cmp.i.i.2 = icmp slt i32 %conv7.i.i.2, %15 - %mul.i.i.2 = mul nsw i32 %conv7.i.i.2, %15 - %add13.i.i.2 = add nsw i32 %mul.i.i.2, %11 - %idxprom14.i.i.2 = sext i32 %add13.i.i.2 to i64 - %arrayidx15.i.i.2 = getelementptr inbounds float, float* %7, i64 %idxprom14.i.i.2 - br i1 %cmp.i.i.2, label %vector.scevcheck86, label %pregion_for_end.i.i.2 - -vector.scevcheck86: ; preds = %pregion_for_end.i.i.1 - %221 = mul i32 %15, %conv7.i.i.2 - %222 = add i32 %11, %221 - %223 = trunc i64 %2 to i32 - %224 = shl i32 %223, 5 - %225 = add i32 %222, %224 - %226 = add i32 %225, 1 - %227 = add i32 %225, 32 - %228 = icmp slt i32 %227, %226 - %229 = add i32 %15, 1 - %230 = mul i32 %11, %229 - %231 = add i32 %230, %224 - %232 = add i32 %231, 1 - %233 = add i32 %231, 32 - %234 = icmp slt i32 %233, %232 - %235 = or i1 %228, %234 - br i1 %235, label %pregion_for_entry.entry.i.i.us.2.preheader, label %vector.memcheck108 - -pregion_for_entry.entry.i.i.us.2.preheader: ; preds = %vector.memcheck108, %vector.scevcheck86 - br label %pregion_for_entry.entry.i.i.us.2 - -vector.memcheck108: ; preds = %vector.scevcheck86 - %236 = mul i32 %15, %conv7.i.i.2 - %237 = add i32 %11, %236 - %238 = trunc i64 %2 to i32 - %239 = shl i32 %238, 5 - %240 = add i32 %237, %239 - %241 = add i32 %240, 1 - %242 = sext i32 %241 to i64 - %scevgep88 = getelementptr float, float* %7, i64 %242 - %scevgep8889 = bitcast float* %scevgep88 to i8* - %243 = add nsw i64 %242, 32 - %scevgep90 = getelementptr float, float* %7, i64 %243 - %scevgep92 = getelementptr float, float* %7, i64 %idxprom14.i.i.2 - %scevgep9293 = bitcast float* %scevgep92 to i8* - %uglygep94 = getelementptr i8, i8* %scevgep9293, i64 1 - %244 = add i32 %15, 1 - %245 = mul i32 %11, %244 - %246 = add i32 %245, %239 - %247 = add i32 %246, 1 - %248 = sext i32 %247 to i64 - %scevgep95 = getelementptr float, float* %7, i64 %248 - %249 = add nsw i64 %248, 32 - %scevgep97 = getelementptr float, float* %7, i64 %249 - %bound0100 = icmp ugt i8* %uglygep94, %scevgep8889 - %bound1101 = icmp ult float* %arrayidx15.i.i.2, %scevgep90 - %found.conflict102 = and i1 %bound0100, %bound1101 - %bound0103 = icmp ult float* %scevgep88, %scevgep97 - %bound1104 = icmp ult float* %scevgep95, %scevgep90 - %found.conflict105 = and i1 %bound0103, %bound1104 - %conflict.rdx106 = or i1 %found.conflict102, %found.conflict105 - br i1 %conflict.rdx106, label %pregion_for_entry.entry.i.i.us.2.preheader, label %vector.ph109 - -vector.ph109: ; preds = %vector.memcheck108 - %broadcast.splatinsert116 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat117 = shufflevector <8 x i64> %broadcast.splatinsert116, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert118 = insertelement <8 x i32> undef, i32 %add.i.i, i32 0 - %broadcast.splat119 = shufflevector <8 x i32> %broadcast.splatinsert118, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert120 = insertelement <8 x i32> undef, i32 %15, i32 0 - %broadcast.splat121 = shufflevector <8 x i32> %broadcast.splatinsert120, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert123 = insertelement <8 x float*> undef, float* %arrayidx15.i.i.2, i32 0 - %broadcast.splat124 = shufflevector <8 x float*> %broadcast.splatinsert123, <8 x float*> undef, <8 x i32> zeroinitializer - %250 = trunc <8 x i64> %broadcast.splat117 to <8 x i32> - %251 = or <8 x i32> %250, - %252 = add <8 x i32> %broadcast.splat119, %251 - %253 = icmp slt <8 x i32> %252, %broadcast.splat121 - %254 = extractelement <8 x i32> %252, i32 0 - %255 = add nsw i32 %254, %mul.i.i.2 - %256 = sext i32 %255 to i64 - %257 = getelementptr inbounds float, float* %7, i64 %256 - %258 = bitcast float* %257 to <8 x float>* - %wide.masked.load122 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %258, i32 4, <8 x i1> %253, <8 x float> undef), !tbaa !12, !alias.scope !109, !noalias !112 - %wide.masked.gather125 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat124, i32 4, <8 x i1> %253, <8 x float> undef), !tbaa !12, !alias.scope !115 - %259 = add nsw i32 %254, %mul16.i.i - %260 = sext i32 %259 to i64 - %261 = getelementptr inbounds float, float* %7, i64 %260 - %262 = bitcast float* %261 to <8 x float>* - %wide.masked.load126 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %262, i32 4, <8 x i1> %253, <8 x float> undef), !tbaa !12, !alias.scope !116 - %263 = fneg <8 x float> %wide.masked.gather125 - %264 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %263, <8 x float> %wide.masked.load126, <8 x float> %wide.masked.load122) - %265 = bitcast float* %257 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %264, <8 x float>* %265, i32 4, <8 x i1> %253), !tbaa !12, !alias.scope !109, !noalias !112, !llvm.access.group !24 - %266 = trunc <8 x i64> %broadcast.splat117 to <8 x i32> - %267 = or <8 x i32> %266, - %268 = add <8 x i32> %broadcast.splat119, %267 - %269 = icmp slt <8 x i32> %268, %broadcast.splat121 - %270 = extractelement <8 x i32> %268, i32 0 - %271 = add nsw i32 %270, %mul.i.i.2 - %272 = sext i32 %271 to i64 - %273 = getelementptr inbounds float, float* %7, i64 %272 - %274 = bitcast float* %273 to <8 x float>* - %wide.masked.load122.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %274, i32 4, <8 x i1> %269, <8 x float> undef), !tbaa !12, !alias.scope !109, !noalias !112 - %wide.masked.gather125.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat124, i32 4, <8 x i1> %269, <8 x float> undef), !tbaa !12, !alias.scope !115 - %275 = add nsw i32 %270, %mul16.i.i - %276 = sext i32 %275 to i64 - %277 = getelementptr inbounds float, float* %7, i64 %276 - %278 = bitcast float* %277 to <8 x float>* - %wide.masked.load126.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %278, i32 4, <8 x i1> %269, <8 x float> undef), !tbaa !12, !alias.scope !116 - %279 = fneg <8 x float> %wide.masked.gather125.1 - %280 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %279, <8 x float> %wide.masked.load126.1, <8 x float> %wide.masked.load122.1) - %281 = bitcast float* %273 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %280, <8 x float>* %281, i32 4, <8 x i1> %269), !tbaa !12, !alias.scope !109, !noalias !112, !llvm.access.group !24 - %282 = trunc <8 x i64> %broadcast.splat117 to <8 x i32> - %283 = or <8 x i32> %282, - %284 = add <8 x i32> %broadcast.splat119, %283 - %285 = icmp slt <8 x i32> %284, %broadcast.splat121 - %286 = extractelement <8 x i32> %284, i32 0 - %287 = add nsw i32 %286, %mul.i.i.2 - %288 = sext i32 %287 to i64 - %289 = getelementptr inbounds float, float* %7, i64 %288 - %290 = bitcast float* %289 to <8 x float>* - %wide.masked.load122.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %290, i32 4, <8 x i1> %285, <8 x float> undef), !tbaa !12, !alias.scope !109, !noalias !112 - %wide.masked.gather125.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat124, i32 4, <8 x i1> %285, <8 x float> undef), !tbaa !12, !alias.scope !115 - %291 = add nsw i32 %286, %mul16.i.i - %292 = sext i32 %291 to i64 - %293 = getelementptr inbounds float, float* %7, i64 %292 - %294 = bitcast float* %293 to <8 x float>* - %wide.masked.load126.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %294, i32 4, <8 x i1> %285, <8 x float> undef), !tbaa !12, !alias.scope !116 - %295 = fneg <8 x float> %wide.masked.gather125.2 - %296 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %295, <8 x float> %wide.masked.load126.2, <8 x float> %wide.masked.load122.2) - %297 = bitcast float* %289 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %296, <8 x float>* %297, i32 4, <8 x i1> %285), !tbaa !12, !alias.scope !109, !noalias !112, !llvm.access.group !24 - %298 = trunc <8 x i64> %broadcast.splat117 to <8 x i32> - %299 = or <8 x i32> %298, - %300 = add <8 x i32> %broadcast.splat119, %299 - %301 = icmp slt <8 x i32> %300, %broadcast.splat121 - %302 = extractelement <8 x i32> %300, i32 0 - %303 = add nsw i32 %302, %mul.i.i.2 - %304 = sext i32 %303 to i64 - %305 = getelementptr inbounds float, float* %7, i64 %304 - %306 = bitcast float* %305 to <8 x float>* - %wide.masked.load122.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %306, i32 4, <8 x i1> %301, <8 x float> undef), !tbaa !12, !alias.scope !109, !noalias !112 - %wide.masked.gather125.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat124, i32 4, <8 x i1> %301, <8 x float> undef), !tbaa !12, !alias.scope !115 - %307 = add nsw i32 %302, %mul16.i.i - %308 = sext i32 %307 to i64 - %309 = getelementptr inbounds float, float* %7, i64 %308 - %310 = bitcast float* %309 to <8 x float>* - %wide.masked.load126.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %310, i32 4, <8 x i1> %301, <8 x float> undef), !tbaa !12, !alias.scope !116 - %311 = fneg <8 x float> %wide.masked.gather125.3 - %312 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %311, <8 x float> %wide.masked.load126.3, <8 x float> %wide.masked.load122.3) - %313 = bitcast float* %305 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %312, <8 x float>* %313, i32 4, <8 x i1> %301), !tbaa !12, !alias.scope !109, !noalias !112, !llvm.access.group !24 - br label %pregion_for_end.i.i.2 - -pregion_for_entry.entry.i.i.us.2: ; preds = %if.end.r_exit.i.i.us.2.1, %pregion_for_entry.entry.i.i.us.2.preheader - %_local_id_x.i.0.us.2 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.2.preheader ], [ %860, %if.end.r_exit.i.i.us.2.1 ] - %add1.i.i.i.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.2, %mul.i.i.i - %314 = trunc i64 %add1.i.i.i.us.2 to i32 - %conv2.i.i.us.2 = add i32 %add.i.i, %314 - %cmp9.i.i.us.2 = icmp slt i32 %conv2.i.i.us.2, %15 - br i1 %cmp9.i.i.us.2, label %if.then.i.i.us.2, label %if.end.r_exit.i.i.us.2 - -if.then.i.i.us.2: ; preds = %pregion_for_entry.entry.i.i.us.2 - %add11.i.i.us.2 = add nsw i32 %conv2.i.i.us.2, %mul.i.i.2 - %idxprom.i.i.us.2 = sext i32 %add11.i.i.us.2 to i64 - %arrayidx.i.i.us.2 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.2 - %315 = load float, float* %arrayidx.i.i.us.2, align 4, !tbaa !12 - %316 = load float, float* %arrayidx15.i.i.2, align 4, !tbaa !12 - %add17.i.i.us.2 = add nsw i32 %conv2.i.i.us.2, %mul16.i.i - %idxprom18.i.i.us.2 = sext i32 %add17.i.i.us.2 to i64 - %arrayidx19.i.i.us.2 = getelementptr inbounds float, float* %7, i64 %idxprom18.i.i.us.2 - %317 = load float, float* %arrayidx19.i.i.us.2, align 4, !tbaa !12 - %neg.i.i.us.2 = fneg float %316 - %318 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.2, float %317, float %315) #6 - store float %318, float* %arrayidx.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.2 - -if.end.r_exit.i.i.us.2: ; preds = %if.then.i.i.us.2, %pregion_for_entry.entry.i.i.us.2 - %319 = or i64 %_local_id_x.i.0.us.2, 1 - %add1.i.i.i.us.2.1 = add nuw nsw i64 %319, %mul.i.i.i - %320 = trunc i64 %add1.i.i.i.us.2.1 to i32 - %conv2.i.i.us.2.1 = add i32 %add.i.i, %320 - %cmp9.i.i.us.2.1 = icmp slt i32 %conv2.i.i.us.2.1, %15 - br i1 %cmp9.i.i.us.2.1, label %if.then.i.i.us.2.1, label %if.end.r_exit.i.i.us.2.1 - -pregion_for_end.i.i.2.loopexit: ; preds = %if.end.r_exit.i.i.us.2.1 - br label %pregion_for_end.i.i.2 - -pregion_for_end.i.i.2: ; preds = %pregion_for_end.i.i.2.loopexit, %vector.ph109, %pregion_for_end.i.i.1 - %321 = trunc i64 %mul3.i.i.i to i32 - %322 = or i32 %321, 3 - %conv7.i.i.3 = add i32 %add.i.i, %322 - %cmp.i.i.3 = icmp slt i32 %conv7.i.i.3, %15 - %mul.i.i.3 = mul nsw i32 %conv7.i.i.3, %15 - %add13.i.i.3 = add nsw i32 %mul.i.i.3, %11 - %idxprom14.i.i.3 = sext i32 %add13.i.i.3 to i64 - %arrayidx15.i.i.3 = getelementptr inbounds float, float* %7, i64 %idxprom14.i.i.3 - br i1 %cmp.i.i.3, label %vector.scevcheck137, label %pregion_for_end.i.i.3 - -vector.scevcheck137: ; preds = %pregion_for_end.i.i.2 - %323 = mul i32 %15, %conv7.i.i.3 - %324 = add i32 %11, %323 - %325 = trunc i64 %2 to i32 - %326 = shl i32 %325, 5 - %327 = add i32 %324, %326 - %328 = add i32 %327, 1 - %329 = add i32 %327, 32 - %330 = icmp slt i32 %329, %328 - %331 = add i32 %15, 1 - %332 = mul i32 %11, %331 - %333 = add i32 %332, %326 - %334 = add i32 %333, 1 - %335 = add i32 %333, 32 - %336 = icmp slt i32 %335, %334 - %337 = or i1 %330, %336 - br i1 %337, label %pregion_for_entry.entry.i.i.us.3.preheader, label %vector.memcheck159 - -pregion_for_entry.entry.i.i.us.3.preheader: ; preds = %vector.memcheck159, %vector.scevcheck137 - br label %pregion_for_entry.entry.i.i.us.3 - -vector.memcheck159: ; preds = %vector.scevcheck137 - %338 = mul i32 %15, %conv7.i.i.3 - %339 = add i32 %11, %338 - %340 = trunc i64 %2 to i32 - %341 = shl i32 %340, 5 - %342 = add i32 %339, %341 - %343 = add i32 %342, 1 - %344 = sext i32 %343 to i64 - %scevgep139 = getelementptr float, float* %7, i64 %344 - %scevgep139140 = bitcast float* %scevgep139 to i8* - %345 = add nsw i64 %344, 32 - %scevgep141 = getelementptr float, float* %7, i64 %345 - %scevgep143 = getelementptr float, float* %7, i64 %idxprom14.i.i.3 - %scevgep143144 = bitcast float* %scevgep143 to i8* - %uglygep145 = getelementptr i8, i8* %scevgep143144, i64 1 - %346 = add i32 %15, 1 - %347 = mul i32 %11, %346 - %348 = add i32 %347, %341 - %349 = add i32 %348, 1 - %350 = sext i32 %349 to i64 - %scevgep146 = getelementptr float, float* %7, i64 %350 - %351 = add nsw i64 %350, 32 - %scevgep148 = getelementptr float, float* %7, i64 %351 - %bound0151 = icmp ugt i8* %uglygep145, %scevgep139140 - %bound1152 = icmp ult float* %arrayidx15.i.i.3, %scevgep141 - %found.conflict153 = and i1 %bound0151, %bound1152 - %bound0154 = icmp ult float* %scevgep139, %scevgep148 - %bound1155 = icmp ult float* %scevgep146, %scevgep141 - %found.conflict156 = and i1 %bound0154, %bound1155 - %conflict.rdx157 = or i1 %found.conflict153, %found.conflict156 - br i1 %conflict.rdx157, label %pregion_for_entry.entry.i.i.us.3.preheader, label %vector.ph160 - -vector.ph160: ; preds = %vector.memcheck159 - %broadcast.splatinsert167 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat168 = shufflevector <8 x i64> %broadcast.splatinsert167, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert169 = insertelement <8 x i32> undef, i32 %add.i.i, i32 0 - %broadcast.splat170 = shufflevector <8 x i32> %broadcast.splatinsert169, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert171 = insertelement <8 x i32> undef, i32 %15, i32 0 - %broadcast.splat172 = shufflevector <8 x i32> %broadcast.splatinsert171, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert174 = insertelement <8 x float*> undef, float* %arrayidx15.i.i.3, i32 0 - %broadcast.splat175 = shufflevector <8 x float*> %broadcast.splatinsert174, <8 x float*> undef, <8 x i32> zeroinitializer - %352 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %353 = or <8 x i32> %352, - %354 = add <8 x i32> %broadcast.splat170, %353 - %355 = icmp slt <8 x i32> %354, %broadcast.splat172 - %356 = extractelement <8 x i32> %354, i32 0 - %357 = add nsw i32 %356, %mul.i.i.3 - %358 = sext i32 %357 to i64 - %359 = getelementptr inbounds float, float* %7, i64 %358 - %360 = bitcast float* %359 to <8 x float>* - %wide.masked.load173 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %360, i32 4, <8 x i1> %355, <8 x float> undef), !tbaa !12, !alias.scope !117, !noalias !120 - %wide.masked.gather176 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat175, i32 4, <8 x i1> %355, <8 x float> undef), !tbaa !12, !alias.scope !123 - %361 = add nsw i32 %356, %mul16.i.i - %362 = sext i32 %361 to i64 - %363 = getelementptr inbounds float, float* %7, i64 %362 - %364 = bitcast float* %363 to <8 x float>* - %wide.masked.load177 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %364, i32 4, <8 x i1> %355, <8 x float> undef), !tbaa !12, !alias.scope !124 - %365 = fneg <8 x float> %wide.masked.gather176 - %366 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %365, <8 x float> %wide.masked.load177, <8 x float> %wide.masked.load173) - %367 = bitcast float* %359 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %366, <8 x float>* %367, i32 4, <8 x i1> %355), !tbaa !12, !alias.scope !117, !noalias !120, !llvm.access.group !24 - %368 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %369 = or <8 x i32> %368, - %370 = add <8 x i32> %broadcast.splat170, %369 - %371 = icmp slt <8 x i32> %370, %broadcast.splat172 - %372 = extractelement <8 x i32> %370, i32 0 - %373 = add nsw i32 %372, %mul.i.i.3 - %374 = sext i32 %373 to i64 - %375 = getelementptr inbounds float, float* %7, i64 %374 - %376 = bitcast float* %375 to <8 x float>* - %wide.masked.load173.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %376, i32 4, <8 x i1> %371, <8 x float> undef), !tbaa !12, !alias.scope !117, !noalias !120 - %wide.masked.gather176.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat175, i32 4, <8 x i1> %371, <8 x float> undef), !tbaa !12, !alias.scope !123 - %377 = add nsw i32 %372, %mul16.i.i - %378 = sext i32 %377 to i64 - %379 = getelementptr inbounds float, float* %7, i64 %378 - %380 = bitcast float* %379 to <8 x float>* - %wide.masked.load177.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %380, i32 4, <8 x i1> %371, <8 x float> undef), !tbaa !12, !alias.scope !124 - %381 = fneg <8 x float> %wide.masked.gather176.1 - %382 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %381, <8 x float> %wide.masked.load177.1, <8 x float> %wide.masked.load173.1) - %383 = bitcast float* %375 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %382, <8 x float>* %383, i32 4, <8 x i1> %371), !tbaa !12, !alias.scope !117, !noalias !120, !llvm.access.group !24 - %384 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %385 = or <8 x i32> %384, - %386 = add <8 x i32> %broadcast.splat170, %385 - %387 = icmp slt <8 x i32> %386, %broadcast.splat172 - %388 = extractelement <8 x i32> %386, i32 0 - %389 = add nsw i32 %388, %mul.i.i.3 - %390 = sext i32 %389 to i64 - %391 = getelementptr inbounds float, float* %7, i64 %390 - %392 = bitcast float* %391 to <8 x float>* - %wide.masked.load173.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %392, i32 4, <8 x i1> %387, <8 x float> undef), !tbaa !12, !alias.scope !117, !noalias !120 - %wide.masked.gather176.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat175, i32 4, <8 x i1> %387, <8 x float> undef), !tbaa !12, !alias.scope !123 - %393 = add nsw i32 %388, %mul16.i.i - %394 = sext i32 %393 to i64 - %395 = getelementptr inbounds float, float* %7, i64 %394 - %396 = bitcast float* %395 to <8 x float>* - %wide.masked.load177.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %396, i32 4, <8 x i1> %387, <8 x float> undef), !tbaa !12, !alias.scope !124 - %397 = fneg <8 x float> %wide.masked.gather176.2 - %398 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %397, <8 x float> %wide.masked.load177.2, <8 x float> %wide.masked.load173.2) - %399 = bitcast float* %391 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %398, <8 x float>* %399, i32 4, <8 x i1> %387), !tbaa !12, !alias.scope !117, !noalias !120, !llvm.access.group !24 - %400 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %401 = or <8 x i32> %400, - %402 = add <8 x i32> %broadcast.splat170, %401 - %403 = icmp slt <8 x i32> %402, %broadcast.splat172 - %404 = extractelement <8 x i32> %402, i32 0 - %405 = add nsw i32 %404, %mul.i.i.3 - %406 = sext i32 %405 to i64 - %407 = getelementptr inbounds float, float* %7, i64 %406 - %408 = bitcast float* %407 to <8 x float>* - %wide.masked.load173.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %408, i32 4, <8 x i1> %403, <8 x float> undef), !tbaa !12, !alias.scope !117, !noalias !120 - %wide.masked.gather176.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat175, i32 4, <8 x i1> %403, <8 x float> undef), !tbaa !12, !alias.scope !123 - %409 = add nsw i32 %404, %mul16.i.i - %410 = sext i32 %409 to i64 - %411 = getelementptr inbounds float, float* %7, i64 %410 - %412 = bitcast float* %411 to <8 x float>* - %wide.masked.load177.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %412, i32 4, <8 x i1> %403, <8 x float> undef), !tbaa !12, !alias.scope !124 - %413 = fneg <8 x float> %wide.masked.gather176.3 - %414 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %413, <8 x float> %wide.masked.load177.3, <8 x float> %wide.masked.load173.3) - %415 = bitcast float* %407 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %414, <8 x float>* %415, i32 4, <8 x i1> %403), !tbaa !12, !alias.scope !117, !noalias !120, !llvm.access.group !24 - br label %pregion_for_end.i.i.3 - -pregion_for_entry.entry.i.i.us.3: ; preds = %if.end.r_exit.i.i.us.3.1, %pregion_for_entry.entry.i.i.us.3.preheader - %_local_id_x.i.0.us.3 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.3.preheader ], [ %855, %if.end.r_exit.i.i.us.3.1 ] - %add1.i.i.i.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.3, %mul.i.i.i - %416 = trunc i64 %add1.i.i.i.us.3 to i32 - %conv2.i.i.us.3 = add i32 %add.i.i, %416 - %cmp9.i.i.us.3 = icmp slt i32 %conv2.i.i.us.3, %15 - br i1 %cmp9.i.i.us.3, label %if.then.i.i.us.3, label %if.end.r_exit.i.i.us.3 - -if.then.i.i.us.3: ; preds = %pregion_for_entry.entry.i.i.us.3 - %add11.i.i.us.3 = add nsw i32 %conv2.i.i.us.3, %mul.i.i.3 - %idxprom.i.i.us.3 = sext i32 %add11.i.i.us.3 to i64 - %arrayidx.i.i.us.3 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.3 - %417 = load float, float* %arrayidx.i.i.us.3, align 4, !tbaa !12 - %418 = load float, float* %arrayidx15.i.i.3, align 4, !tbaa !12 - %add17.i.i.us.3 = add nsw i32 %conv2.i.i.us.3, %mul16.i.i - %idxprom18.i.i.us.3 = sext i32 %add17.i.i.us.3 to i64 - %arrayidx19.i.i.us.3 = getelementptr inbounds float, float* %7, i64 %idxprom18.i.i.us.3 - %419 = load float, float* %arrayidx19.i.i.us.3, align 4, !tbaa !12 - %neg.i.i.us.3 = fneg float %418 - %420 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.3, float %419, float %417) #6 - store float %420, float* %arrayidx.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.3 - -if.end.r_exit.i.i.us.3: ; preds = %if.then.i.i.us.3, %pregion_for_entry.entry.i.i.us.3 - %421 = or i64 %_local_id_x.i.0.us.3, 1 - %add1.i.i.i.us.3.1 = add nuw nsw i64 %421, %mul.i.i.i - %422 = trunc i64 %add1.i.i.i.us.3.1 to i32 - %conv2.i.i.us.3.1 = add i32 %add.i.i, %422 - %cmp9.i.i.us.3.1 = icmp slt i32 %conv2.i.i.us.3.1, %15 - br i1 %cmp9.i.i.us.3.1, label %if.then.i.i.us.3.1, label %if.end.r_exit.i.i.us.3.1 - -pregion_for_end.i.i.3.loopexit: ; preds = %if.end.r_exit.i.i.us.3.1 - br label %pregion_for_end.i.i.3 - -pregion_for_end.i.i.3: ; preds = %pregion_for_end.i.i.3.loopexit, %vector.ph160, %pregion_for_end.i.i.2 - %423 = trunc i64 %mul3.i.i.i to i32 - %424 = or i32 %423, 4 - %conv7.i.i.4 = add i32 %add.i.i, %424 - %cmp.i.i.4 = icmp slt i32 %conv7.i.i.4, %15 - %mul.i.i.4 = mul nsw i32 %conv7.i.i.4, %15 - %add13.i.i.4 = add nsw i32 %mul.i.i.4, %11 - %idxprom14.i.i.4 = sext i32 %add13.i.i.4 to i64 - %arrayidx15.i.i.4 = getelementptr inbounds float, float* %7, i64 %idxprom14.i.i.4 - br i1 %cmp.i.i.4, label %vector.scevcheck188, label %pregion_for_end.i.i.4 - -vector.scevcheck188: ; preds = %pregion_for_end.i.i.3 - %425 = mul i32 %15, %conv7.i.i.4 - %426 = add i32 %11, %425 - %427 = trunc i64 %2 to i32 - %428 = shl i32 %427, 5 - %429 = add i32 %426, %428 - %430 = add i32 %429, 1 - %431 = add i32 %429, 32 - %432 = icmp slt i32 %431, %430 - %433 = add i32 %15, 1 - %434 = mul i32 %11, %433 - %435 = add i32 %434, %428 - %436 = add i32 %435, 1 - %437 = add i32 %435, 32 - %438 = icmp slt i32 %437, %436 - %439 = or i1 %432, %438 - br i1 %439, label %pregion_for_entry.entry.i.i.us.4.preheader, label %vector.memcheck210 - -pregion_for_entry.entry.i.i.us.4.preheader: ; preds = %vector.memcheck210, %vector.scevcheck188 - br label %pregion_for_entry.entry.i.i.us.4 - -vector.memcheck210: ; preds = %vector.scevcheck188 - %440 = mul i32 %15, %conv7.i.i.4 - %441 = add i32 %11, %440 - %442 = trunc i64 %2 to i32 - %443 = shl i32 %442, 5 - %444 = add i32 %441, %443 - %445 = add i32 %444, 1 - %446 = sext i32 %445 to i64 - %scevgep190 = getelementptr float, float* %7, i64 %446 - %scevgep190191 = bitcast float* %scevgep190 to i8* - %447 = add nsw i64 %446, 32 - %scevgep192 = getelementptr float, float* %7, i64 %447 - %scevgep194 = getelementptr float, float* %7, i64 %idxprom14.i.i.4 - %scevgep194195 = bitcast float* %scevgep194 to i8* - %uglygep196 = getelementptr i8, i8* %scevgep194195, i64 1 - %448 = add i32 %15, 1 - %449 = mul i32 %11, %448 - %450 = add i32 %449, %443 - %451 = add i32 %450, 1 - %452 = sext i32 %451 to i64 - %scevgep197 = getelementptr float, float* %7, i64 %452 - %453 = add nsw i64 %452, 32 - %scevgep199 = getelementptr float, float* %7, i64 %453 - %bound0202 = icmp ugt i8* %uglygep196, %scevgep190191 - %bound1203 = icmp ult float* %arrayidx15.i.i.4, %scevgep192 - %found.conflict204 = and i1 %bound0202, %bound1203 - %bound0205 = icmp ult float* %scevgep190, %scevgep199 - %bound1206 = icmp ult float* %scevgep197, %scevgep192 - %found.conflict207 = and i1 %bound0205, %bound1206 - %conflict.rdx208 = or i1 %found.conflict204, %found.conflict207 - br i1 %conflict.rdx208, label %pregion_for_entry.entry.i.i.us.4.preheader, label %vector.ph211 - -vector.ph211: ; preds = %vector.memcheck210 - %broadcast.splatinsert218 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat219 = shufflevector <8 x i64> %broadcast.splatinsert218, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert220 = insertelement <8 x i32> undef, i32 %add.i.i, i32 0 - %broadcast.splat221 = shufflevector <8 x i32> %broadcast.splatinsert220, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert222 = insertelement <8 x i32> undef, i32 %15, i32 0 - %broadcast.splat223 = shufflevector <8 x i32> %broadcast.splatinsert222, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert225 = insertelement <8 x float*> undef, float* %arrayidx15.i.i.4, i32 0 - %broadcast.splat226 = shufflevector <8 x float*> %broadcast.splatinsert225, <8 x float*> undef, <8 x i32> zeroinitializer - %454 = trunc <8 x i64> %broadcast.splat219 to <8 x i32> - %455 = or <8 x i32> %454, - %456 = add <8 x i32> %broadcast.splat221, %455 - %457 = icmp slt <8 x i32> %456, %broadcast.splat223 - %458 = extractelement <8 x i32> %456, i32 0 - %459 = add nsw i32 %458, %mul.i.i.4 - %460 = sext i32 %459 to i64 - %461 = getelementptr inbounds float, float* %7, i64 %460 - %462 = bitcast float* %461 to <8 x float>* - %wide.masked.load224 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %462, i32 4, <8 x i1> %457, <8 x float> undef), !tbaa !12, !alias.scope !125, !noalias !128 - %wide.masked.gather227 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat226, i32 4, <8 x i1> %457, <8 x float> undef), !tbaa !12, !alias.scope !131 - %463 = add nsw i32 %458, %mul16.i.i - %464 = sext i32 %463 to i64 - %465 = getelementptr inbounds float, float* %7, i64 %464 - %466 = bitcast float* %465 to <8 x float>* - %wide.masked.load228 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %466, i32 4, <8 x i1> %457, <8 x float> undef), !tbaa !12, !alias.scope !132 - %467 = fneg <8 x float> %wide.masked.gather227 - %468 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %467, <8 x float> %wide.masked.load228, <8 x float> %wide.masked.load224) - %469 = bitcast float* %461 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %468, <8 x float>* %469, i32 4, <8 x i1> %457), !tbaa !12, !alias.scope !125, !noalias !128, !llvm.access.group !24 - %470 = trunc <8 x i64> %broadcast.splat219 to <8 x i32> - %471 = or <8 x i32> %470, - %472 = add <8 x i32> %broadcast.splat221, %471 - %473 = icmp slt <8 x i32> %472, %broadcast.splat223 - %474 = extractelement <8 x i32> %472, i32 0 - %475 = add nsw i32 %474, %mul.i.i.4 - %476 = sext i32 %475 to i64 - %477 = getelementptr inbounds float, float* %7, i64 %476 - %478 = bitcast float* %477 to <8 x float>* - %wide.masked.load224.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %478, i32 4, <8 x i1> %473, <8 x float> undef), !tbaa !12, !alias.scope !125, !noalias !128 - %wide.masked.gather227.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat226, i32 4, <8 x i1> %473, <8 x float> undef), !tbaa !12, !alias.scope !131 - %479 = add nsw i32 %474, %mul16.i.i - %480 = sext i32 %479 to i64 - %481 = getelementptr inbounds float, float* %7, i64 %480 - %482 = bitcast float* %481 to <8 x float>* - %wide.masked.load228.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %482, i32 4, <8 x i1> %473, <8 x float> undef), !tbaa !12, !alias.scope !132 - %483 = fneg <8 x float> %wide.masked.gather227.1 - %484 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %483, <8 x float> %wide.masked.load228.1, <8 x float> %wide.masked.load224.1) - %485 = bitcast float* %477 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %484, <8 x float>* %485, i32 4, <8 x i1> %473), !tbaa !12, !alias.scope !125, !noalias !128, !llvm.access.group !24 - %486 = trunc <8 x i64> %broadcast.splat219 to <8 x i32> - %487 = or <8 x i32> %486, - %488 = add <8 x i32> %broadcast.splat221, %487 - %489 = icmp slt <8 x i32> %488, %broadcast.splat223 - %490 = extractelement <8 x i32> %488, i32 0 - %491 = add nsw i32 %490, %mul.i.i.4 - %492 = sext i32 %491 to i64 - %493 = getelementptr inbounds float, float* %7, i64 %492 - %494 = bitcast float* %493 to <8 x float>* - %wide.masked.load224.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %494, i32 4, <8 x i1> %489, <8 x float> undef), !tbaa !12, !alias.scope !125, !noalias !128 - %wide.masked.gather227.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat226, i32 4, <8 x i1> %489, <8 x float> undef), !tbaa !12, !alias.scope !131 - %495 = add nsw i32 %490, %mul16.i.i - %496 = sext i32 %495 to i64 - %497 = getelementptr inbounds float, float* %7, i64 %496 - %498 = bitcast float* %497 to <8 x float>* - %wide.masked.load228.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %498, i32 4, <8 x i1> %489, <8 x float> undef), !tbaa !12, !alias.scope !132 - %499 = fneg <8 x float> %wide.masked.gather227.2 - %500 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %499, <8 x float> %wide.masked.load228.2, <8 x float> %wide.masked.load224.2) - %501 = bitcast float* %493 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %500, <8 x float>* %501, i32 4, <8 x i1> %489), !tbaa !12, !alias.scope !125, !noalias !128, !llvm.access.group !24 - %502 = trunc <8 x i64> %broadcast.splat219 to <8 x i32> - %503 = or <8 x i32> %502, - %504 = add <8 x i32> %broadcast.splat221, %503 - %505 = icmp slt <8 x i32> %504, %broadcast.splat223 - %506 = extractelement <8 x i32> %504, i32 0 - %507 = add nsw i32 %506, %mul.i.i.4 - %508 = sext i32 %507 to i64 - %509 = getelementptr inbounds float, float* %7, i64 %508 - %510 = bitcast float* %509 to <8 x float>* - %wide.masked.load224.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %510, i32 4, <8 x i1> %505, <8 x float> undef), !tbaa !12, !alias.scope !125, !noalias !128 - %wide.masked.gather227.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat226, i32 4, <8 x i1> %505, <8 x float> undef), !tbaa !12, !alias.scope !131 - %511 = add nsw i32 %506, %mul16.i.i - %512 = sext i32 %511 to i64 - %513 = getelementptr inbounds float, float* %7, i64 %512 - %514 = bitcast float* %513 to <8 x float>* - %wide.masked.load228.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %514, i32 4, <8 x i1> %505, <8 x float> undef), !tbaa !12, !alias.scope !132 - %515 = fneg <8 x float> %wide.masked.gather227.3 - %516 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %515, <8 x float> %wide.masked.load228.3, <8 x float> %wide.masked.load224.3) - %517 = bitcast float* %509 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %516, <8 x float>* %517, i32 4, <8 x i1> %505), !tbaa !12, !alias.scope !125, !noalias !128, !llvm.access.group !24 - br label %pregion_for_end.i.i.4 - -pregion_for_entry.entry.i.i.us.4: ; preds = %if.end.r_exit.i.i.us.4.1, %pregion_for_entry.entry.i.i.us.4.preheader - %_local_id_x.i.0.us.4 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.4.preheader ], [ %850, %if.end.r_exit.i.i.us.4.1 ] - %add1.i.i.i.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.4, %mul.i.i.i - %518 = trunc i64 %add1.i.i.i.us.4 to i32 - %conv2.i.i.us.4 = add i32 %add.i.i, %518 - %cmp9.i.i.us.4 = icmp slt i32 %conv2.i.i.us.4, %15 - br i1 %cmp9.i.i.us.4, label %if.then.i.i.us.4, label %if.end.r_exit.i.i.us.4 - -if.then.i.i.us.4: ; preds = %pregion_for_entry.entry.i.i.us.4 - %add11.i.i.us.4 = add nsw i32 %conv2.i.i.us.4, %mul.i.i.4 - %idxprom.i.i.us.4 = sext i32 %add11.i.i.us.4 to i64 - %arrayidx.i.i.us.4 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.4 - %519 = load float, float* %arrayidx.i.i.us.4, align 4, !tbaa !12 - %520 = load float, float* %arrayidx15.i.i.4, align 4, !tbaa !12 - %add17.i.i.us.4 = add nsw i32 %conv2.i.i.us.4, %mul16.i.i - %idxprom18.i.i.us.4 = sext i32 %add17.i.i.us.4 to i64 - %arrayidx19.i.i.us.4 = getelementptr inbounds float, float* %7, i64 %idxprom18.i.i.us.4 - %521 = load float, float* %arrayidx19.i.i.us.4, align 4, !tbaa !12 - %neg.i.i.us.4 = fneg float %520 - %522 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.4, float %521, float %519) #6 - store float %522, float* %arrayidx.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.4 - -if.end.r_exit.i.i.us.4: ; preds = %if.then.i.i.us.4, %pregion_for_entry.entry.i.i.us.4 - %523 = or i64 %_local_id_x.i.0.us.4, 1 - %add1.i.i.i.us.4.1 = add nuw nsw i64 %523, %mul.i.i.i - %524 = trunc i64 %add1.i.i.i.us.4.1 to i32 - %conv2.i.i.us.4.1 = add i32 %add.i.i, %524 - %cmp9.i.i.us.4.1 = icmp slt i32 %conv2.i.i.us.4.1, %15 - br i1 %cmp9.i.i.us.4.1, label %if.then.i.i.us.4.1, label %if.end.r_exit.i.i.us.4.1 - -pregion_for_end.i.i.4.loopexit: ; preds = %if.end.r_exit.i.i.us.4.1 - br label %pregion_for_end.i.i.4 - -pregion_for_end.i.i.4: ; preds = %pregion_for_end.i.i.4.loopexit, %vector.ph211, %pregion_for_end.i.i.3 - %525 = trunc i64 %mul3.i.i.i to i32 - %526 = or i32 %525, 5 - %conv7.i.i.5 = add i32 %add.i.i, %526 - %cmp.i.i.5 = icmp slt i32 %conv7.i.i.5, %15 - %mul.i.i.5 = mul nsw i32 %conv7.i.i.5, %15 - %add13.i.i.5 = add nsw i32 %mul.i.i.5, %11 - %idxprom14.i.i.5 = sext i32 %add13.i.i.5 to i64 - %arrayidx15.i.i.5 = getelementptr inbounds float, float* %7, i64 %idxprom14.i.i.5 - br i1 %cmp.i.i.5, label %vector.scevcheck239, label %pregion_for_end.i.i.5 - -vector.scevcheck239: ; preds = %pregion_for_end.i.i.4 - %527 = mul i32 %15, %conv7.i.i.5 - %528 = add i32 %11, %527 - %529 = trunc i64 %2 to i32 - %530 = shl i32 %529, 5 - %531 = add i32 %528, %530 - %532 = add i32 %531, 1 - %533 = add i32 %531, 32 - %534 = icmp slt i32 %533, %532 - %535 = add i32 %15, 1 - %536 = mul i32 %11, %535 - %537 = add i32 %536, %530 - %538 = add i32 %537, 1 - %539 = add i32 %537, 32 - %540 = icmp slt i32 %539, %538 - %541 = or i1 %534, %540 - br i1 %541, label %pregion_for_entry.entry.i.i.us.5.preheader, label %vector.memcheck261 - -pregion_for_entry.entry.i.i.us.5.preheader: ; preds = %vector.memcheck261, %vector.scevcheck239 - br label %pregion_for_entry.entry.i.i.us.5 - -vector.memcheck261: ; preds = %vector.scevcheck239 - %542 = mul i32 %15, %conv7.i.i.5 - %543 = add i32 %11, %542 - %544 = trunc i64 %2 to i32 - %545 = shl i32 %544, 5 - %546 = add i32 %543, %545 - %547 = add i32 %546, 1 - %548 = sext i32 %547 to i64 - %scevgep241 = getelementptr float, float* %7, i64 %548 - %scevgep241242 = bitcast float* %scevgep241 to i8* - %549 = add nsw i64 %548, 32 - %scevgep243 = getelementptr float, float* %7, i64 %549 - %scevgep245 = getelementptr float, float* %7, i64 %idxprom14.i.i.5 - %scevgep245246 = bitcast float* %scevgep245 to i8* - %uglygep247 = getelementptr i8, i8* %scevgep245246, i64 1 - %550 = add i32 %15, 1 - %551 = mul i32 %11, %550 - %552 = add i32 %551, %545 - %553 = add i32 %552, 1 - %554 = sext i32 %553 to i64 - %scevgep248 = getelementptr float, float* %7, i64 %554 - %555 = add nsw i64 %554, 32 - %scevgep250 = getelementptr float, float* %7, i64 %555 - %bound0253 = icmp ugt i8* %uglygep247, %scevgep241242 - %bound1254 = icmp ult float* %arrayidx15.i.i.5, %scevgep243 - %found.conflict255 = and i1 %bound0253, %bound1254 - %bound0256 = icmp ult float* %scevgep241, %scevgep250 - %bound1257 = icmp ult float* %scevgep248, %scevgep243 - %found.conflict258 = and i1 %bound0256, %bound1257 - %conflict.rdx259 = or i1 %found.conflict255, %found.conflict258 - br i1 %conflict.rdx259, label %pregion_for_entry.entry.i.i.us.5.preheader, label %vector.ph262 - -vector.ph262: ; preds = %vector.memcheck261 - %broadcast.splatinsert269 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat270 = shufflevector <8 x i64> %broadcast.splatinsert269, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert271 = insertelement <8 x i32> undef, i32 %add.i.i, i32 0 - %broadcast.splat272 = shufflevector <8 x i32> %broadcast.splatinsert271, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert273 = insertelement <8 x i32> undef, i32 %15, i32 0 - %broadcast.splat274 = shufflevector <8 x i32> %broadcast.splatinsert273, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert276 = insertelement <8 x float*> undef, float* %arrayidx15.i.i.5, i32 0 - %broadcast.splat277 = shufflevector <8 x float*> %broadcast.splatinsert276, <8 x float*> undef, <8 x i32> zeroinitializer - %556 = trunc <8 x i64> %broadcast.splat270 to <8 x i32> - %557 = or <8 x i32> %556, - %558 = add <8 x i32> %broadcast.splat272, %557 - %559 = icmp slt <8 x i32> %558, %broadcast.splat274 - %560 = extractelement <8 x i32> %558, i32 0 - %561 = add nsw i32 %560, %mul.i.i.5 - %562 = sext i32 %561 to i64 - %563 = getelementptr inbounds float, float* %7, i64 %562 - %564 = bitcast float* %563 to <8 x float>* - %wide.masked.load275 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %564, i32 4, <8 x i1> %559, <8 x float> undef), !tbaa !12, !alias.scope !133, !noalias !136 - %wide.masked.gather278 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat277, i32 4, <8 x i1> %559, <8 x float> undef), !tbaa !12, !alias.scope !139 - %565 = add nsw i32 %560, %mul16.i.i - %566 = sext i32 %565 to i64 - %567 = getelementptr inbounds float, float* %7, i64 %566 - %568 = bitcast float* %567 to <8 x float>* - %wide.masked.load279 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %568, i32 4, <8 x i1> %559, <8 x float> undef), !tbaa !12, !alias.scope !140 - %569 = fneg <8 x float> %wide.masked.gather278 - %570 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %569, <8 x float> %wide.masked.load279, <8 x float> %wide.masked.load275) - %571 = bitcast float* %563 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %570, <8 x float>* %571, i32 4, <8 x i1> %559), !tbaa !12, !alias.scope !133, !noalias !136, !llvm.access.group !24 - %572 = trunc <8 x i64> %broadcast.splat270 to <8 x i32> - %573 = or <8 x i32> %572, - %574 = add <8 x i32> %broadcast.splat272, %573 - %575 = icmp slt <8 x i32> %574, %broadcast.splat274 - %576 = extractelement <8 x i32> %574, i32 0 - %577 = add nsw i32 %576, %mul.i.i.5 - %578 = sext i32 %577 to i64 - %579 = getelementptr inbounds float, float* %7, i64 %578 - %580 = bitcast float* %579 to <8 x float>* - %wide.masked.load275.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %580, i32 4, <8 x i1> %575, <8 x float> undef), !tbaa !12, !alias.scope !133, !noalias !136 - %wide.masked.gather278.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat277, i32 4, <8 x i1> %575, <8 x float> undef), !tbaa !12, !alias.scope !139 - %581 = add nsw i32 %576, %mul16.i.i - %582 = sext i32 %581 to i64 - %583 = getelementptr inbounds float, float* %7, i64 %582 - %584 = bitcast float* %583 to <8 x float>* - %wide.masked.load279.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %584, i32 4, <8 x i1> %575, <8 x float> undef), !tbaa !12, !alias.scope !140 - %585 = fneg <8 x float> %wide.masked.gather278.1 - %586 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %585, <8 x float> %wide.masked.load279.1, <8 x float> %wide.masked.load275.1) - %587 = bitcast float* %579 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %586, <8 x float>* %587, i32 4, <8 x i1> %575), !tbaa !12, !alias.scope !133, !noalias !136, !llvm.access.group !24 - %588 = trunc <8 x i64> %broadcast.splat270 to <8 x i32> - %589 = or <8 x i32> %588, - %590 = add <8 x i32> %broadcast.splat272, %589 - %591 = icmp slt <8 x i32> %590, %broadcast.splat274 - %592 = extractelement <8 x i32> %590, i32 0 - %593 = add nsw i32 %592, %mul.i.i.5 - %594 = sext i32 %593 to i64 - %595 = getelementptr inbounds float, float* %7, i64 %594 - %596 = bitcast float* %595 to <8 x float>* - %wide.masked.load275.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %596, i32 4, <8 x i1> %591, <8 x float> undef), !tbaa !12, !alias.scope !133, !noalias !136 - %wide.masked.gather278.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat277, i32 4, <8 x i1> %591, <8 x float> undef), !tbaa !12, !alias.scope !139 - %597 = add nsw i32 %592, %mul16.i.i - %598 = sext i32 %597 to i64 - %599 = getelementptr inbounds float, float* %7, i64 %598 - %600 = bitcast float* %599 to <8 x float>* - %wide.masked.load279.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %600, i32 4, <8 x i1> %591, <8 x float> undef), !tbaa !12, !alias.scope !140 - %601 = fneg <8 x float> %wide.masked.gather278.2 - %602 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %601, <8 x float> %wide.masked.load279.2, <8 x float> %wide.masked.load275.2) - %603 = bitcast float* %595 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %602, <8 x float>* %603, i32 4, <8 x i1> %591), !tbaa !12, !alias.scope !133, !noalias !136, !llvm.access.group !24 - %604 = trunc <8 x i64> %broadcast.splat270 to <8 x i32> - %605 = or <8 x i32> %604, - %606 = add <8 x i32> %broadcast.splat272, %605 - %607 = icmp slt <8 x i32> %606, %broadcast.splat274 - %608 = extractelement <8 x i32> %606, i32 0 - %609 = add nsw i32 %608, %mul.i.i.5 - %610 = sext i32 %609 to i64 - %611 = getelementptr inbounds float, float* %7, i64 %610 - %612 = bitcast float* %611 to <8 x float>* - %wide.masked.load275.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %612, i32 4, <8 x i1> %607, <8 x float> undef), !tbaa !12, !alias.scope !133, !noalias !136 - %wide.masked.gather278.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat277, i32 4, <8 x i1> %607, <8 x float> undef), !tbaa !12, !alias.scope !139 - %613 = add nsw i32 %608, %mul16.i.i - %614 = sext i32 %613 to i64 - %615 = getelementptr inbounds float, float* %7, i64 %614 - %616 = bitcast float* %615 to <8 x float>* - %wide.masked.load279.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %616, i32 4, <8 x i1> %607, <8 x float> undef), !tbaa !12, !alias.scope !140 - %617 = fneg <8 x float> %wide.masked.gather278.3 - %618 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %617, <8 x float> %wide.masked.load279.3, <8 x float> %wide.masked.load275.3) - %619 = bitcast float* %611 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %618, <8 x float>* %619, i32 4, <8 x i1> %607), !tbaa !12, !alias.scope !133, !noalias !136, !llvm.access.group !24 - br label %pregion_for_end.i.i.5 - -pregion_for_entry.entry.i.i.us.5: ; preds = %if.end.r_exit.i.i.us.5.1, %pregion_for_entry.entry.i.i.us.5.preheader - %_local_id_x.i.0.us.5 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.5.preheader ], [ %845, %if.end.r_exit.i.i.us.5.1 ] - %add1.i.i.i.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.5, %mul.i.i.i - %620 = trunc i64 %add1.i.i.i.us.5 to i32 - %conv2.i.i.us.5 = add i32 %add.i.i, %620 - %cmp9.i.i.us.5 = icmp slt i32 %conv2.i.i.us.5, %15 - br i1 %cmp9.i.i.us.5, label %if.then.i.i.us.5, label %if.end.r_exit.i.i.us.5 - -if.then.i.i.us.5: ; preds = %pregion_for_entry.entry.i.i.us.5 - %add11.i.i.us.5 = add nsw i32 %conv2.i.i.us.5, %mul.i.i.5 - %idxprom.i.i.us.5 = sext i32 %add11.i.i.us.5 to i64 - %arrayidx.i.i.us.5 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.5 - %621 = load float, float* %arrayidx.i.i.us.5, align 4, !tbaa !12 - %622 = load float, float* %arrayidx15.i.i.5, align 4, !tbaa !12 - %add17.i.i.us.5 = add nsw i32 %conv2.i.i.us.5, %mul16.i.i - %idxprom18.i.i.us.5 = sext i32 %add17.i.i.us.5 to i64 - %arrayidx19.i.i.us.5 = getelementptr inbounds float, float* %7, i64 %idxprom18.i.i.us.5 - %623 = load float, float* %arrayidx19.i.i.us.5, align 4, !tbaa !12 - %neg.i.i.us.5 = fneg float %622 - %624 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.5, float %623, float %621) #6 - store float %624, float* %arrayidx.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.5 - -if.end.r_exit.i.i.us.5: ; preds = %if.then.i.i.us.5, %pregion_for_entry.entry.i.i.us.5 - %625 = or i64 %_local_id_x.i.0.us.5, 1 - %add1.i.i.i.us.5.1 = add nuw nsw i64 %625, %mul.i.i.i - %626 = trunc i64 %add1.i.i.i.us.5.1 to i32 - %conv2.i.i.us.5.1 = add i32 %add.i.i, %626 - %cmp9.i.i.us.5.1 = icmp slt i32 %conv2.i.i.us.5.1, %15 - br i1 %cmp9.i.i.us.5.1, label %if.then.i.i.us.5.1, label %if.end.r_exit.i.i.us.5.1 - -pregion_for_end.i.i.5.loopexit: ; preds = %if.end.r_exit.i.i.us.5.1 - br label %pregion_for_end.i.i.5 - -pregion_for_end.i.i.5: ; preds = %pregion_for_end.i.i.5.loopexit, %vector.ph262, %pregion_for_end.i.i.4 - %627 = trunc i64 %mul3.i.i.i to i32 - %628 = or i32 %627, 6 - %conv7.i.i.6 = add i32 %add.i.i, %628 - %cmp.i.i.6 = icmp slt i32 %conv7.i.i.6, %15 - %mul.i.i.6 = mul nsw i32 %conv7.i.i.6, %15 - %add13.i.i.6 = add nsw i32 %mul.i.i.6, %11 - %idxprom14.i.i.6 = sext i32 %add13.i.i.6 to i64 - %arrayidx15.i.i.6 = getelementptr inbounds float, float* %7, i64 %idxprom14.i.i.6 - br i1 %cmp.i.i.6, label %vector.scevcheck290, label %pregion_for_end.i.i.6 - -vector.scevcheck290: ; preds = %pregion_for_end.i.i.5 - %629 = mul i32 %15, %conv7.i.i.6 - %630 = add i32 %11, %629 - %631 = trunc i64 %2 to i32 - %632 = shl i32 %631, 5 - %633 = add i32 %630, %632 - %634 = add i32 %633, 1 - %635 = add i32 %633, 32 - %636 = icmp slt i32 %635, %634 - %637 = add i32 %15, 1 - %638 = mul i32 %11, %637 - %639 = add i32 %638, %632 - %640 = add i32 %639, 1 - %641 = add i32 %639, 32 - %642 = icmp slt i32 %641, %640 - %643 = or i1 %636, %642 - br i1 %643, label %pregion_for_entry.entry.i.i.us.6.preheader, label %vector.memcheck312 - -pregion_for_entry.entry.i.i.us.6.preheader: ; preds = %vector.memcheck312, %vector.scevcheck290 - br label %pregion_for_entry.entry.i.i.us.6 - -vector.memcheck312: ; preds = %vector.scevcheck290 - %644 = mul i32 %15, %conv7.i.i.6 - %645 = add i32 %11, %644 - %646 = trunc i64 %2 to i32 - %647 = shl i32 %646, 5 - %648 = add i32 %645, %647 - %649 = add i32 %648, 1 - %650 = sext i32 %649 to i64 - %scevgep292 = getelementptr float, float* %7, i64 %650 - %scevgep292293 = bitcast float* %scevgep292 to i8* - %651 = add nsw i64 %650, 32 - %scevgep294 = getelementptr float, float* %7, i64 %651 - %scevgep296 = getelementptr float, float* %7, i64 %idxprom14.i.i.6 - %scevgep296297 = bitcast float* %scevgep296 to i8* - %uglygep298 = getelementptr i8, i8* %scevgep296297, i64 1 - %652 = add i32 %15, 1 - %653 = mul i32 %11, %652 - %654 = add i32 %653, %647 - %655 = add i32 %654, 1 - %656 = sext i32 %655 to i64 - %scevgep299 = getelementptr float, float* %7, i64 %656 - %657 = add nsw i64 %656, 32 - %scevgep301 = getelementptr float, float* %7, i64 %657 - %bound0304 = icmp ugt i8* %uglygep298, %scevgep292293 - %bound1305 = icmp ult float* %arrayidx15.i.i.6, %scevgep294 - %found.conflict306 = and i1 %bound0304, %bound1305 - %bound0307 = icmp ult float* %scevgep292, %scevgep301 - %bound1308 = icmp ult float* %scevgep299, %scevgep294 - %found.conflict309 = and i1 %bound0307, %bound1308 - %conflict.rdx310 = or i1 %found.conflict306, %found.conflict309 - br i1 %conflict.rdx310, label %pregion_for_entry.entry.i.i.us.6.preheader, label %vector.ph313 - -vector.ph313: ; preds = %vector.memcheck312 - %broadcast.splatinsert320 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat321 = shufflevector <8 x i64> %broadcast.splatinsert320, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert322 = insertelement <8 x i32> undef, i32 %add.i.i, i32 0 - %broadcast.splat323 = shufflevector <8 x i32> %broadcast.splatinsert322, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert324 = insertelement <8 x i32> undef, i32 %15, i32 0 - %broadcast.splat325 = shufflevector <8 x i32> %broadcast.splatinsert324, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert327 = insertelement <8 x float*> undef, float* %arrayidx15.i.i.6, i32 0 - %broadcast.splat328 = shufflevector <8 x float*> %broadcast.splatinsert327, <8 x float*> undef, <8 x i32> zeroinitializer - %658 = trunc <8 x i64> %broadcast.splat321 to <8 x i32> - %659 = or <8 x i32> %658, - %660 = add <8 x i32> %broadcast.splat323, %659 - %661 = icmp slt <8 x i32> %660, %broadcast.splat325 - %662 = extractelement <8 x i32> %660, i32 0 - %663 = add nsw i32 %662, %mul.i.i.6 - %664 = sext i32 %663 to i64 - %665 = getelementptr inbounds float, float* %7, i64 %664 - %666 = bitcast float* %665 to <8 x float>* - %wide.masked.load326 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %666, i32 4, <8 x i1> %661, <8 x float> undef), !tbaa !12, !alias.scope !141, !noalias !144 - %wide.masked.gather329 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat328, i32 4, <8 x i1> %661, <8 x float> undef), !tbaa !12, !alias.scope !147 - %667 = add nsw i32 %662, %mul16.i.i - %668 = sext i32 %667 to i64 - %669 = getelementptr inbounds float, float* %7, i64 %668 - %670 = bitcast float* %669 to <8 x float>* - %wide.masked.load330 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %670, i32 4, <8 x i1> %661, <8 x float> undef), !tbaa !12, !alias.scope !148 - %671 = fneg <8 x float> %wide.masked.gather329 - %672 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %671, <8 x float> %wide.masked.load330, <8 x float> %wide.masked.load326) - %673 = bitcast float* %665 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %672, <8 x float>* %673, i32 4, <8 x i1> %661), !tbaa !12, !alias.scope !141, !noalias !144, !llvm.access.group !24 - %674 = trunc <8 x i64> %broadcast.splat321 to <8 x i32> - %675 = or <8 x i32> %674, - %676 = add <8 x i32> %broadcast.splat323, %675 - %677 = icmp slt <8 x i32> %676, %broadcast.splat325 - %678 = extractelement <8 x i32> %676, i32 0 - %679 = add nsw i32 %678, %mul.i.i.6 - %680 = sext i32 %679 to i64 - %681 = getelementptr inbounds float, float* %7, i64 %680 - %682 = bitcast float* %681 to <8 x float>* - %wide.masked.load326.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %682, i32 4, <8 x i1> %677, <8 x float> undef), !tbaa !12, !alias.scope !141, !noalias !144 - %wide.masked.gather329.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat328, i32 4, <8 x i1> %677, <8 x float> undef), !tbaa !12, !alias.scope !147 - %683 = add nsw i32 %678, %mul16.i.i - %684 = sext i32 %683 to i64 - %685 = getelementptr inbounds float, float* %7, i64 %684 - %686 = bitcast float* %685 to <8 x float>* - %wide.masked.load330.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %686, i32 4, <8 x i1> %677, <8 x float> undef), !tbaa !12, !alias.scope !148 - %687 = fneg <8 x float> %wide.masked.gather329.1 - %688 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %687, <8 x float> %wide.masked.load330.1, <8 x float> %wide.masked.load326.1) - %689 = bitcast float* %681 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %688, <8 x float>* %689, i32 4, <8 x i1> %677), !tbaa !12, !alias.scope !141, !noalias !144, !llvm.access.group !24 - %690 = trunc <8 x i64> %broadcast.splat321 to <8 x i32> - %691 = or <8 x i32> %690, - %692 = add <8 x i32> %broadcast.splat323, %691 - %693 = icmp slt <8 x i32> %692, %broadcast.splat325 - %694 = extractelement <8 x i32> %692, i32 0 - %695 = add nsw i32 %694, %mul.i.i.6 - %696 = sext i32 %695 to i64 - %697 = getelementptr inbounds float, float* %7, i64 %696 - %698 = bitcast float* %697 to <8 x float>* - %wide.masked.load326.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %698, i32 4, <8 x i1> %693, <8 x float> undef), !tbaa !12, !alias.scope !141, !noalias !144 - %wide.masked.gather329.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat328, i32 4, <8 x i1> %693, <8 x float> undef), !tbaa !12, !alias.scope !147 - %699 = add nsw i32 %694, %mul16.i.i - %700 = sext i32 %699 to i64 - %701 = getelementptr inbounds float, float* %7, i64 %700 - %702 = bitcast float* %701 to <8 x float>* - %wide.masked.load330.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %702, i32 4, <8 x i1> %693, <8 x float> undef), !tbaa !12, !alias.scope !148 - %703 = fneg <8 x float> %wide.masked.gather329.2 - %704 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %703, <8 x float> %wide.masked.load330.2, <8 x float> %wide.masked.load326.2) - %705 = bitcast float* %697 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %704, <8 x float>* %705, i32 4, <8 x i1> %693), !tbaa !12, !alias.scope !141, !noalias !144, !llvm.access.group !24 - %706 = trunc <8 x i64> %broadcast.splat321 to <8 x i32> - %707 = or <8 x i32> %706, - %708 = add <8 x i32> %broadcast.splat323, %707 - %709 = icmp slt <8 x i32> %708, %broadcast.splat325 - %710 = extractelement <8 x i32> %708, i32 0 - %711 = add nsw i32 %710, %mul.i.i.6 - %712 = sext i32 %711 to i64 - %713 = getelementptr inbounds float, float* %7, i64 %712 - %714 = bitcast float* %713 to <8 x float>* - %wide.masked.load326.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %714, i32 4, <8 x i1> %709, <8 x float> undef), !tbaa !12, !alias.scope !141, !noalias !144 - %wide.masked.gather329.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat328, i32 4, <8 x i1> %709, <8 x float> undef), !tbaa !12, !alias.scope !147 - %715 = add nsw i32 %710, %mul16.i.i - %716 = sext i32 %715 to i64 - %717 = getelementptr inbounds float, float* %7, i64 %716 - %718 = bitcast float* %717 to <8 x float>* - %wide.masked.load330.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %718, i32 4, <8 x i1> %709, <8 x float> undef), !tbaa !12, !alias.scope !148 - %719 = fneg <8 x float> %wide.masked.gather329.3 - %720 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %719, <8 x float> %wide.masked.load330.3, <8 x float> %wide.masked.load326.3) - %721 = bitcast float* %713 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %720, <8 x float>* %721, i32 4, <8 x i1> %709), !tbaa !12, !alias.scope !141, !noalias !144, !llvm.access.group !24 - br label %pregion_for_end.i.i.6 - -pregion_for_entry.entry.i.i.us.6: ; preds = %if.end.r_exit.i.i.us.6.1, %pregion_for_entry.entry.i.i.us.6.preheader - %_local_id_x.i.0.us.6 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.6.preheader ], [ %840, %if.end.r_exit.i.i.us.6.1 ] - %add1.i.i.i.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.6, %mul.i.i.i - %722 = trunc i64 %add1.i.i.i.us.6 to i32 - %conv2.i.i.us.6 = add i32 %add.i.i, %722 - %cmp9.i.i.us.6 = icmp slt i32 %conv2.i.i.us.6, %15 - br i1 %cmp9.i.i.us.6, label %if.then.i.i.us.6, label %if.end.r_exit.i.i.us.6 - -if.then.i.i.us.6: ; preds = %pregion_for_entry.entry.i.i.us.6 - %add11.i.i.us.6 = add nsw i32 %conv2.i.i.us.6, %mul.i.i.6 - %idxprom.i.i.us.6 = sext i32 %add11.i.i.us.6 to i64 - %arrayidx.i.i.us.6 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.6 - %723 = load float, float* %arrayidx.i.i.us.6, align 4, !tbaa !12 - %724 = load float, float* %arrayidx15.i.i.6, align 4, !tbaa !12 - %add17.i.i.us.6 = add nsw i32 %conv2.i.i.us.6, %mul16.i.i - %idxprom18.i.i.us.6 = sext i32 %add17.i.i.us.6 to i64 - %arrayidx19.i.i.us.6 = getelementptr inbounds float, float* %7, i64 %idxprom18.i.i.us.6 - %725 = load float, float* %arrayidx19.i.i.us.6, align 4, !tbaa !12 - %neg.i.i.us.6 = fneg float %724 - %726 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.6, float %725, float %723) #6 - store float %726, float* %arrayidx.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.6 - -if.end.r_exit.i.i.us.6: ; preds = %if.then.i.i.us.6, %pregion_for_entry.entry.i.i.us.6 - %727 = or i64 %_local_id_x.i.0.us.6, 1 - %add1.i.i.i.us.6.1 = add nuw nsw i64 %727, %mul.i.i.i - %728 = trunc i64 %add1.i.i.i.us.6.1 to i32 - %conv2.i.i.us.6.1 = add i32 %add.i.i, %728 - %cmp9.i.i.us.6.1 = icmp slt i32 %conv2.i.i.us.6.1, %15 - br i1 %cmp9.i.i.us.6.1, label %if.then.i.i.us.6.1, label %if.end.r_exit.i.i.us.6.1 - -pregion_for_end.i.i.6.loopexit: ; preds = %if.end.r_exit.i.i.us.6.1 - br label %pregion_for_end.i.i.6 - -pregion_for_end.i.i.6: ; preds = %pregion_for_end.i.i.6.loopexit, %vector.ph313, %pregion_for_end.i.i.5 - %729 = trunc i64 %mul3.i.i.i to i32 - %730 = or i32 %729, 7 - %conv7.i.i.7 = add i32 %add.i.i, %730 - %cmp.i.i.7 = icmp slt i32 %conv7.i.i.7, %15 - %mul.i.i.7 = mul nsw i32 %conv7.i.i.7, %15 - %add13.i.i.7 = add nsw i32 %mul.i.i.7, %11 - %idxprom14.i.i.7 = sext i32 %add13.i.i.7 to i64 - %arrayidx15.i.i.7 = getelementptr inbounds float, float* %7, i64 %idxprom14.i.i.7 - br i1 %cmp.i.i.7, label %vector.scevcheck341, label %pregion_for_end.i.i.7 - -vector.scevcheck341: ; preds = %pregion_for_end.i.i.6 - %731 = mul i32 %15, %conv7.i.i.7 - %732 = add i32 %11, %731 - %733 = trunc i64 %2 to i32 - %734 = shl i32 %733, 5 - %735 = add i32 %732, %734 - %736 = add i32 %735, 1 - %737 = add i32 %735, 32 - %738 = icmp slt i32 %737, %736 - %739 = add i32 %15, 1 - %740 = mul i32 %11, %739 - %741 = add i32 %740, %734 - %742 = add i32 %741, 1 - %743 = add i32 %741, 32 - %744 = icmp slt i32 %743, %742 - %745 = or i1 %738, %744 - br i1 %745, label %pregion_for_entry.entry.i.i.us.7.preheader, label %vector.memcheck363 - -pregion_for_entry.entry.i.i.us.7.preheader: ; preds = %vector.memcheck363, %vector.scevcheck341 - br label %pregion_for_entry.entry.i.i.us.7 - -vector.memcheck363: ; preds = %vector.scevcheck341 - %746 = mul i32 %15, %conv7.i.i.7 - %747 = add i32 %11, %746 - %748 = trunc i64 %2 to i32 - %749 = shl i32 %748, 5 - %750 = add i32 %747, %749 - %751 = add i32 %750, 1 - %752 = sext i32 %751 to i64 - %scevgep343 = getelementptr float, float* %7, i64 %752 - %scevgep343344 = bitcast float* %scevgep343 to i8* - %753 = add nsw i64 %752, 32 - %scevgep345 = getelementptr float, float* %7, i64 %753 - %scevgep347 = getelementptr float, float* %7, i64 %idxprom14.i.i.7 - %scevgep347348 = bitcast float* %scevgep347 to i8* - %uglygep349 = getelementptr i8, i8* %scevgep347348, i64 1 - %754 = add i32 %15, 1 - %755 = mul i32 %11, %754 - %756 = add i32 %755, %749 - %757 = add i32 %756, 1 - %758 = sext i32 %757 to i64 - %scevgep350 = getelementptr float, float* %7, i64 %758 - %759 = add nsw i64 %758, 32 - %scevgep352 = getelementptr float, float* %7, i64 %759 - %bound0355 = icmp ugt i8* %uglygep349, %scevgep343344 - %bound1356 = icmp ult float* %arrayidx15.i.i.7, %scevgep345 - %found.conflict357 = and i1 %bound0355, %bound1356 - %bound0358 = icmp ult float* %scevgep343, %scevgep352 - %bound1359 = icmp ult float* %scevgep350, %scevgep345 - %found.conflict360 = and i1 %bound0358, %bound1359 - %conflict.rdx361 = or i1 %found.conflict357, %found.conflict360 - br i1 %conflict.rdx361, label %pregion_for_entry.entry.i.i.us.7.preheader, label %vector.ph364 - -vector.ph364: ; preds = %vector.memcheck363 - %broadcast.splatinsert371 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat372 = shufflevector <8 x i64> %broadcast.splatinsert371, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert373 = insertelement <8 x i32> undef, i32 %add.i.i, i32 0 - %broadcast.splat374 = shufflevector <8 x i32> %broadcast.splatinsert373, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert375 = insertelement <8 x i32> undef, i32 %15, i32 0 - %broadcast.splat376 = shufflevector <8 x i32> %broadcast.splatinsert375, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert378 = insertelement <8 x float*> undef, float* %arrayidx15.i.i.7, i32 0 - %broadcast.splat379 = shufflevector <8 x float*> %broadcast.splatinsert378, <8 x float*> undef, <8 x i32> zeroinitializer - %760 = trunc <8 x i64> %broadcast.splat372 to <8 x i32> - %761 = or <8 x i32> %760, - %762 = add <8 x i32> %broadcast.splat374, %761 - %763 = icmp slt <8 x i32> %762, %broadcast.splat376 - %764 = extractelement <8 x i32> %762, i32 0 - %765 = add nsw i32 %764, %mul.i.i.7 - %766 = sext i32 %765 to i64 - %767 = getelementptr inbounds float, float* %7, i64 %766 - %768 = bitcast float* %767 to <8 x float>* - %wide.masked.load377 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %768, i32 4, <8 x i1> %763, <8 x float> undef), !tbaa !12, !alias.scope !149, !noalias !152 - %wide.masked.gather380 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat379, i32 4, <8 x i1> %763, <8 x float> undef), !tbaa !12, !alias.scope !155 - %769 = add nsw i32 %764, %mul16.i.i - %770 = sext i32 %769 to i64 - %771 = getelementptr inbounds float, float* %7, i64 %770 - %772 = bitcast float* %771 to <8 x float>* - %wide.masked.load381 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %772, i32 4, <8 x i1> %763, <8 x float> undef), !tbaa !12, !alias.scope !156 - %773 = fneg <8 x float> %wide.masked.gather380 - %774 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %773, <8 x float> %wide.masked.load381, <8 x float> %wide.masked.load377) - %775 = bitcast float* %767 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %774, <8 x float>* %775, i32 4, <8 x i1> %763), !tbaa !12, !alias.scope !149, !noalias !152, !llvm.access.group !24 - %776 = trunc <8 x i64> %broadcast.splat372 to <8 x i32> - %777 = or <8 x i32> %776, - %778 = add <8 x i32> %broadcast.splat374, %777 - %779 = icmp slt <8 x i32> %778, %broadcast.splat376 - %780 = extractelement <8 x i32> %778, i32 0 - %781 = add nsw i32 %780, %mul.i.i.7 - %782 = sext i32 %781 to i64 - %783 = getelementptr inbounds float, float* %7, i64 %782 - %784 = bitcast float* %783 to <8 x float>* - %wide.masked.load377.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %784, i32 4, <8 x i1> %779, <8 x float> undef), !tbaa !12, !alias.scope !149, !noalias !152 - %wide.masked.gather380.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat379, i32 4, <8 x i1> %779, <8 x float> undef), !tbaa !12, !alias.scope !155 - %785 = add nsw i32 %780, %mul16.i.i - %786 = sext i32 %785 to i64 - %787 = getelementptr inbounds float, float* %7, i64 %786 - %788 = bitcast float* %787 to <8 x float>* - %wide.masked.load381.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %788, i32 4, <8 x i1> %779, <8 x float> undef), !tbaa !12, !alias.scope !156 - %789 = fneg <8 x float> %wide.masked.gather380.1 - %790 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %789, <8 x float> %wide.masked.load381.1, <8 x float> %wide.masked.load377.1) - %791 = bitcast float* %783 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %790, <8 x float>* %791, i32 4, <8 x i1> %779), !tbaa !12, !alias.scope !149, !noalias !152, !llvm.access.group !24 - %792 = trunc <8 x i64> %broadcast.splat372 to <8 x i32> - %793 = or <8 x i32> %792, - %794 = add <8 x i32> %broadcast.splat374, %793 - %795 = icmp slt <8 x i32> %794, %broadcast.splat376 - %796 = extractelement <8 x i32> %794, i32 0 - %797 = add nsw i32 %796, %mul.i.i.7 - %798 = sext i32 %797 to i64 - %799 = getelementptr inbounds float, float* %7, i64 %798 - %800 = bitcast float* %799 to <8 x float>* - %wide.masked.load377.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %800, i32 4, <8 x i1> %795, <8 x float> undef), !tbaa !12, !alias.scope !149, !noalias !152 - %wide.masked.gather380.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat379, i32 4, <8 x i1> %795, <8 x float> undef), !tbaa !12, !alias.scope !155 - %801 = add nsw i32 %796, %mul16.i.i - %802 = sext i32 %801 to i64 - %803 = getelementptr inbounds float, float* %7, i64 %802 - %804 = bitcast float* %803 to <8 x float>* - %wide.masked.load381.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %804, i32 4, <8 x i1> %795, <8 x float> undef), !tbaa !12, !alias.scope !156 - %805 = fneg <8 x float> %wide.masked.gather380.2 - %806 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %805, <8 x float> %wide.masked.load381.2, <8 x float> %wide.masked.load377.2) - %807 = bitcast float* %799 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %806, <8 x float>* %807, i32 4, <8 x i1> %795), !tbaa !12, !alias.scope !149, !noalias !152, !llvm.access.group !24 - %808 = trunc <8 x i64> %broadcast.splat372 to <8 x i32> - %809 = or <8 x i32> %808, - %810 = add <8 x i32> %broadcast.splat374, %809 - %811 = icmp slt <8 x i32> %810, %broadcast.splat376 - %812 = extractelement <8 x i32> %810, i32 0 - %813 = add nsw i32 %812, %mul.i.i.7 - %814 = sext i32 %813 to i64 - %815 = getelementptr inbounds float, float* %7, i64 %814 - %816 = bitcast float* %815 to <8 x float>* - %wide.masked.load377.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %816, i32 4, <8 x i1> %811, <8 x float> undef), !tbaa !12, !alias.scope !149, !noalias !152 - %wide.masked.gather380.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat379, i32 4, <8 x i1> %811, <8 x float> undef), !tbaa !12, !alias.scope !155 - %817 = add nsw i32 %812, %mul16.i.i - %818 = sext i32 %817 to i64 - %819 = getelementptr inbounds float, float* %7, i64 %818 - %820 = bitcast float* %819 to <8 x float>* - %wide.masked.load381.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %820, i32 4, <8 x i1> %811, <8 x float> undef), !tbaa !12, !alias.scope !156 - %821 = fneg <8 x float> %wide.masked.gather380.3 - %822 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %821, <8 x float> %wide.masked.load381.3, <8 x float> %wide.masked.load377.3) - %823 = bitcast float* %815 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %822, <8 x float>* %823, i32 4, <8 x i1> %811), !tbaa !12, !alias.scope !149, !noalias !152, !llvm.access.group !24 - br label %pregion_for_end.i.i.7 - -pregion_for_entry.entry.i.i.us.7: ; preds = %if.end.r_exit.i.i.us.7.1, %pregion_for_entry.entry.i.i.us.7.preheader - %_local_id_x.i.0.us.7 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.7.preheader ], [ %835, %if.end.r_exit.i.i.us.7.1 ] - %add1.i.i.i.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.7, %mul.i.i.i - %824 = trunc i64 %add1.i.i.i.us.7 to i32 - %conv2.i.i.us.7 = add i32 %add.i.i, %824 - %cmp9.i.i.us.7 = icmp slt i32 %conv2.i.i.us.7, %15 - br i1 %cmp9.i.i.us.7, label %if.then.i.i.us.7, label %if.end.r_exit.i.i.us.7 - -if.then.i.i.us.7: ; preds = %pregion_for_entry.entry.i.i.us.7 - %add11.i.i.us.7 = add nsw i32 %conv2.i.i.us.7, %mul.i.i.7 - %idxprom.i.i.us.7 = sext i32 %add11.i.i.us.7 to i64 - %arrayidx.i.i.us.7 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.7 - %825 = load float, float* %arrayidx.i.i.us.7, align 4, !tbaa !12 - %826 = load float, float* %arrayidx15.i.i.7, align 4, !tbaa !12 - %add17.i.i.us.7 = add nsw i32 %conv2.i.i.us.7, %mul16.i.i - %idxprom18.i.i.us.7 = sext i32 %add17.i.i.us.7 to i64 - %arrayidx19.i.i.us.7 = getelementptr inbounds float, float* %7, i64 %idxprom18.i.i.us.7 - %827 = load float, float* %arrayidx19.i.i.us.7, align 4, !tbaa !12 - %neg.i.i.us.7 = fneg float %826 - %828 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.7, float %827, float %825) #6 - store float %828, float* %arrayidx.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.7 - -if.end.r_exit.i.i.us.7: ; preds = %if.then.i.i.us.7, %pregion_for_entry.entry.i.i.us.7 - %829 = or i64 %_local_id_x.i.0.us.7, 1 - %add1.i.i.i.us.7.1 = add nuw nsw i64 %829, %mul.i.i.i - %830 = trunc i64 %add1.i.i.i.us.7.1 to i32 - %conv2.i.i.us.7.1 = add i32 %add.i.i, %830 - %cmp9.i.i.us.7.1 = icmp slt i32 %conv2.i.i.us.7.1, %15 - br i1 %cmp9.i.i.us.7.1, label %if.then.i.i.us.7.1, label %if.end.r_exit.i.i.us.7.1 - -pregion_for_end.i.i.7.loopexit: ; preds = %if.end.r_exit.i.i.us.7.1 - br label %pregion_for_end.i.i.7 - -pregion_for_end.i.i.7: ; preds = %pregion_for_end.i.i.7.loopexit, %vector.ph364, %pregion_for_end.i.i.6 - ret void - -if.then.i.i.us.7.1: ; preds = %if.end.r_exit.i.i.us.7 - %add11.i.i.us.7.1 = add nsw i32 %conv2.i.i.us.7.1, %mul.i.i.7 - %idxprom.i.i.us.7.1 = sext i32 %add11.i.i.us.7.1 to i64 - %arrayidx.i.i.us.7.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.7.1 - %831 = load float, float* %arrayidx.i.i.us.7.1, align 4, !tbaa !12 - %832 = load float, float* %arrayidx15.i.i.7, align 4, !tbaa !12 - %add17.i.i.us.7.1 = add nsw i32 %conv2.i.i.us.7.1, %mul16.i.i - %idxprom18.i.i.us.7.1 = sext i32 %add17.i.i.us.7.1 to i64 - %arrayidx19.i.i.us.7.1 = getelementptr inbounds float, float* %7, i64 %idxprom18.i.i.us.7.1 - %833 = load float, float* %arrayidx19.i.i.us.7.1, align 4, !tbaa !12 - %neg.i.i.us.7.1 = fneg float %832 - %834 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.7.1, float %833, float %831) #6 - store float %834, float* %arrayidx.i.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.7.1 - -if.end.r_exit.i.i.us.7.1: ; preds = %if.then.i.i.us.7.1, %if.end.r_exit.i.i.us.7 - %835 = add nuw nsw i64 %_local_id_x.i.0.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %835, 32 - br i1 %exitcond.7.not.1, label %pregion_for_end.i.i.7.loopexit, label %pregion_for_entry.entry.i.i.us.7, !llvm.loop !157 - -if.then.i.i.us.6.1: ; preds = %if.end.r_exit.i.i.us.6 - %add11.i.i.us.6.1 = add nsw i32 %conv2.i.i.us.6.1, %mul.i.i.6 - %idxprom.i.i.us.6.1 = sext i32 %add11.i.i.us.6.1 to i64 - %arrayidx.i.i.us.6.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.6.1 - %836 = load float, float* %arrayidx.i.i.us.6.1, align 4, !tbaa !12 - %837 = load float, float* %arrayidx15.i.i.6, align 4, !tbaa !12 - %add17.i.i.us.6.1 = add nsw i32 %conv2.i.i.us.6.1, %mul16.i.i - %idxprom18.i.i.us.6.1 = sext i32 %add17.i.i.us.6.1 to i64 - %arrayidx19.i.i.us.6.1 = getelementptr inbounds float, float* %7, i64 %idxprom18.i.i.us.6.1 - %838 = load float, float* %arrayidx19.i.i.us.6.1, align 4, !tbaa !12 - %neg.i.i.us.6.1 = fneg float %837 - %839 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.6.1, float %838, float %836) #6 - store float %839, float* %arrayidx.i.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.6.1 - -if.end.r_exit.i.i.us.6.1: ; preds = %if.then.i.i.us.6.1, %if.end.r_exit.i.i.us.6 - %840 = add nuw nsw i64 %_local_id_x.i.0.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %840, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.i.6.loopexit, label %pregion_for_entry.entry.i.i.us.6, !llvm.loop !158 - -if.then.i.i.us.5.1: ; preds = %if.end.r_exit.i.i.us.5 - %add11.i.i.us.5.1 = add nsw i32 %conv2.i.i.us.5.1, %mul.i.i.5 - %idxprom.i.i.us.5.1 = sext i32 %add11.i.i.us.5.1 to i64 - %arrayidx.i.i.us.5.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.5.1 - %841 = load float, float* %arrayidx.i.i.us.5.1, align 4, !tbaa !12 - %842 = load float, float* %arrayidx15.i.i.5, align 4, !tbaa !12 - %add17.i.i.us.5.1 = add nsw i32 %conv2.i.i.us.5.1, %mul16.i.i - %idxprom18.i.i.us.5.1 = sext i32 %add17.i.i.us.5.1 to i64 - %arrayidx19.i.i.us.5.1 = getelementptr inbounds float, float* %7, i64 %idxprom18.i.i.us.5.1 - %843 = load float, float* %arrayidx19.i.i.us.5.1, align 4, !tbaa !12 - %neg.i.i.us.5.1 = fneg float %842 - %844 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.5.1, float %843, float %841) #6 - store float %844, float* %arrayidx.i.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.5.1 - -if.end.r_exit.i.i.us.5.1: ; preds = %if.then.i.i.us.5.1, %if.end.r_exit.i.i.us.5 - %845 = add nuw nsw i64 %_local_id_x.i.0.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %845, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.i.5.loopexit, label %pregion_for_entry.entry.i.i.us.5, !llvm.loop !159 - -if.then.i.i.us.4.1: ; preds = %if.end.r_exit.i.i.us.4 - %add11.i.i.us.4.1 = add nsw i32 %conv2.i.i.us.4.1, %mul.i.i.4 - %idxprom.i.i.us.4.1 = sext i32 %add11.i.i.us.4.1 to i64 - %arrayidx.i.i.us.4.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.4.1 - %846 = load float, float* %arrayidx.i.i.us.4.1, align 4, !tbaa !12 - %847 = load float, float* %arrayidx15.i.i.4, align 4, !tbaa !12 - %add17.i.i.us.4.1 = add nsw i32 %conv2.i.i.us.4.1, %mul16.i.i - %idxprom18.i.i.us.4.1 = sext i32 %add17.i.i.us.4.1 to i64 - %arrayidx19.i.i.us.4.1 = getelementptr inbounds float, float* %7, i64 %idxprom18.i.i.us.4.1 - %848 = load float, float* %arrayidx19.i.i.us.4.1, align 4, !tbaa !12 - %neg.i.i.us.4.1 = fneg float %847 - %849 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.4.1, float %848, float %846) #6 - store float %849, float* %arrayidx.i.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.4.1 - -if.end.r_exit.i.i.us.4.1: ; preds = %if.then.i.i.us.4.1, %if.end.r_exit.i.i.us.4 - %850 = add nuw nsw i64 %_local_id_x.i.0.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %850, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.i.4.loopexit, label %pregion_for_entry.entry.i.i.us.4, !llvm.loop !160 - -if.then.i.i.us.3.1: ; preds = %if.end.r_exit.i.i.us.3 - %add11.i.i.us.3.1 = add nsw i32 %conv2.i.i.us.3.1, %mul.i.i.3 - %idxprom.i.i.us.3.1 = sext i32 %add11.i.i.us.3.1 to i64 - %arrayidx.i.i.us.3.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.3.1 - %851 = load float, float* %arrayidx.i.i.us.3.1, align 4, !tbaa !12 - %852 = load float, float* %arrayidx15.i.i.3, align 4, !tbaa !12 - %add17.i.i.us.3.1 = add nsw i32 %conv2.i.i.us.3.1, %mul16.i.i - %idxprom18.i.i.us.3.1 = sext i32 %add17.i.i.us.3.1 to i64 - %arrayidx19.i.i.us.3.1 = getelementptr inbounds float, float* %7, i64 %idxprom18.i.i.us.3.1 - %853 = load float, float* %arrayidx19.i.i.us.3.1, align 4, !tbaa !12 - %neg.i.i.us.3.1 = fneg float %852 - %854 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.3.1, float %853, float %851) #6 - store float %854, float* %arrayidx.i.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.3.1 - -if.end.r_exit.i.i.us.3.1: ; preds = %if.then.i.i.us.3.1, %if.end.r_exit.i.i.us.3 - %855 = add nuw nsw i64 %_local_id_x.i.0.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %855, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.i.3.loopexit, label %pregion_for_entry.entry.i.i.us.3, !llvm.loop !161 - -if.then.i.i.us.2.1: ; preds = %if.end.r_exit.i.i.us.2 - %add11.i.i.us.2.1 = add nsw i32 %conv2.i.i.us.2.1, %mul.i.i.2 - %idxprom.i.i.us.2.1 = sext i32 %add11.i.i.us.2.1 to i64 - %arrayidx.i.i.us.2.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.2.1 - %856 = load float, float* %arrayidx.i.i.us.2.1, align 4, !tbaa !12 - %857 = load float, float* %arrayidx15.i.i.2, align 4, !tbaa !12 - %add17.i.i.us.2.1 = add nsw i32 %conv2.i.i.us.2.1, %mul16.i.i - %idxprom18.i.i.us.2.1 = sext i32 %add17.i.i.us.2.1 to i64 - %arrayidx19.i.i.us.2.1 = getelementptr inbounds float, float* %7, i64 %idxprom18.i.i.us.2.1 - %858 = load float, float* %arrayidx19.i.i.us.2.1, align 4, !tbaa !12 - %neg.i.i.us.2.1 = fneg float %857 - %859 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.2.1, float %858, float %856) #6 - store float %859, float* %arrayidx.i.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.2.1 - -if.end.r_exit.i.i.us.2.1: ; preds = %if.then.i.i.us.2.1, %if.end.r_exit.i.i.us.2 - %860 = add nuw nsw i64 %_local_id_x.i.0.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %860, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.i.2.loopexit, label %pregion_for_entry.entry.i.i.us.2, !llvm.loop !162 - -if.then.i.i.us.1.1: ; preds = %if.end.r_exit.i.i.us.1 - %add11.i.i.us.1.1 = add nsw i32 %conv2.i.i.us.1.1, %mul.i.i.1 - %idxprom.i.i.us.1.1 = sext i32 %add11.i.i.us.1.1 to i64 - %arrayidx.i.i.us.1.1 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.1.1 - %861 = load float, float* %arrayidx.i.i.us.1.1, align 4, !tbaa !12 - %862 = load float, float* %arrayidx15.i.i.1, align 4, !tbaa !12 - %add17.i.i.us.1.1 = add nsw i32 %conv2.i.i.us.1.1, %mul16.i.i - %idxprom18.i.i.us.1.1 = sext i32 %add17.i.i.us.1.1 to i64 - %arrayidx19.i.i.us.1.1 = getelementptr inbounds float, float* %7, i64 %idxprom18.i.i.us.1.1 - %863 = load float, float* %arrayidx19.i.i.us.1.1, align 4, !tbaa !12 - %neg.i.i.us.1.1 = fneg float %862 - %864 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.1.1, float %863, float %861) #6 - store float %864, float* %arrayidx.i.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.1.1 - -if.end.r_exit.i.i.us.1.1: ; preds = %if.then.i.i.us.1.1, %if.end.r_exit.i.i.us.1 - %865 = add nuw nsw i64 %_local_id_x.i.0.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %865, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.i.1.loopexit, label %pregion_for_entry.entry.i.i.us.1, !llvm.loop !163 - -if.then.i.i.us.1402: ; preds = %if.end.r_exit.i.i.us - %add11.i.i.us.1395 = add nsw i32 %conv2.i.i.us.1392, %mul.i.i - %idxprom.i.i.us.1396 = sext i32 %add11.i.i.us.1395 to i64 - %arrayidx.i.i.us.1397 = getelementptr inbounds float, float* %7, i64 %idxprom.i.i.us.1396 - %866 = load float, float* %arrayidx.i.i.us.1397, align 4, !tbaa !12 - %867 = load float, float* %arrayidx15.i.i, align 4, !tbaa !12 - %add17.i.i.us.1398 = add nsw i32 %conv2.i.i.us.1392, %mul16.i.i - %idxprom18.i.i.us.1399 = sext i32 %add17.i.i.us.1398 to i64 - %arrayidx19.i.i.us.1400 = getelementptr inbounds float, float* %7, i64 %idxprom18.i.i.us.1399 - %868 = load float, float* %arrayidx19.i.i.us.1400, align 4, !tbaa !12 - %neg.i.i.us.1401 = fneg float %867 - %869 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.1401, float %868, float %866) #6 - store float %869, float* %arrayidx.i.i.us.1397, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.1403 - -if.end.r_exit.i.i.us.1403: ; preds = %if.then.i.i.us.1402, %if.end.r_exit.i.i.us - %870 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %870, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !164 -} - -; Function Attrs: nofree nounwind -define void @_pocl_kernel_lu_kernel2_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { -pregion_for_entry.pregion_for_init.i.i: - %5 = bitcast i8** %0 to float** - %6 = load float*, float** %5, align 8 - %7 = getelementptr i8*, i8** %0, i64 1 - %8 = bitcast i8** %7 to i32** - %9 = load i32*, i32** %8, align 8 - %10 = load i32, i32* %9, align 4 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to i32** - %13 = load i32*, i32** %12, align 8 - %14 = load i32, i32* %13, align 4 - %mul.i.i.i = shl i64 %2, 5 - %add.i.i = add nsw i32 %10, 1 - %mul3.i.i.i = shl i64 %3, 3 - %mul16.i.i = mul nsw i32 %14, %10 - %15 = trunc i64 %mul3.i.i.i to i32 - %conv7.i.i = add i32 %add.i.i, %15 - %cmp.i.i = icmp slt i32 %conv7.i.i, %14 - %mul.i.i = mul nsw i32 %conv7.i.i, %14 - %add13.i.i = add nsw i32 %mul.i.i, %10 - %idxprom14.i.i = sext i32 %add13.i.i to i64 - %arrayidx15.i.i = getelementptr inbounds float, float* %6, i64 %idxprom14.i.i - br i1 %cmp.i.i, label %vector.scevcheck, label %pregion_for_end.i.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.i - %16 = mul i32 %14, %conv7.i.i - %17 = add i32 %10, %16 - %18 = trunc i64 %2 to i32 - %19 = shl i32 %18, 5 - %20 = add i32 %17, %19 - %21 = add i32 %20, 1 - %22 = add i32 %20, 32 - %23 = icmp slt i32 %22, %21 - %24 = add i32 %14, 1 - %25 = mul i32 %10, %24 - %26 = add i32 %25, %19 - %27 = add i32 %26, 1 - %28 = add i32 %26, 32 - %29 = icmp slt i32 %28, %27 - %30 = or i1 %23, %29 - br i1 %30, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.memcheck - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %vector.memcheck, %vector.scevcheck - br label %pregion_for_entry.entry.i.i.us - -vector.memcheck: ; preds = %vector.scevcheck - %31 = mul i32 %14, %conv7.i.i - %32 = add i32 %10, %31 - %33 = trunc i64 %2 to i32 - %34 = shl i32 %33, 5 - %35 = add i32 %32, %34 - %36 = add i32 %35, 1 - %37 = sext i32 %36 to i64 - %scevgep = getelementptr float, float* %6, i64 %37 - %scevgep6 = bitcast float* %scevgep to i8* - %38 = add nsw i64 %37, 32 - %scevgep7 = getelementptr float, float* %6, i64 %38 - %scevgep9 = getelementptr float, float* %6, i64 %idxprom14.i.i - %scevgep910 = bitcast float* %scevgep9 to i8* - %uglygep = getelementptr i8, i8* %scevgep910, i64 1 - %39 = add i32 %14, 1 - %40 = mul i32 %10, %39 - %41 = add i32 %40, %34 - %42 = add i32 %41, 1 - %43 = sext i32 %42 to i64 - %scevgep11 = getelementptr float, float* %6, i64 %43 - %44 = add nsw i64 %43, 32 - %scevgep13 = getelementptr float, float* %6, i64 %44 - %bound0 = icmp ugt i8* %uglygep, %scevgep6 - %bound1 = icmp ult float* %arrayidx15.i.i, %scevgep7 - %found.conflict = and i1 %bound0, %bound1 - %bound015 = icmp ult float* %scevgep, %scevgep13 - %bound116 = icmp ult float* %scevgep11, %scevgep7 - %found.conflict17 = and i1 %bound015, %bound116 - %conflict.rdx = or i1 %found.conflict, %found.conflict17 - br i1 %conflict.rdx, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.ph - -vector.ph: ; preds = %vector.memcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert18 = insertelement <8 x i32> undef, i32 %add.i.i, i32 0 - %broadcast.splat19 = shufflevector <8 x i32> %broadcast.splatinsert18, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert20 = insertelement <8 x i32> undef, i32 %14, i32 0 - %broadcast.splat21 = shufflevector <8 x i32> %broadcast.splatinsert20, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert22 = insertelement <8 x float*> undef, float* %arrayidx15.i.i, i32 0 - %broadcast.splat23 = shufflevector <8 x float*> %broadcast.splatinsert22, <8 x float*> undef, <8 x i32> zeroinitializer - %45 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %46 = or <8 x i32> %45, - %47 = add <8 x i32> %broadcast.splat19, %46 - %48 = icmp slt <8 x i32> %47, %broadcast.splat21 - %49 = extractelement <8 x i32> %47, i32 0 - %50 = add nsw i32 %49, %mul.i.i - %51 = sext i32 %50 to i64 - %52 = getelementptr inbounds float, float* %6, i64 %51 - %53 = bitcast float* %52 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %53, i32 4, <8 x i1> %48, <8 x float> undef), !tbaa !12, !alias.scope !165, !noalias !168 - %wide.masked.gather = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat23, i32 4, <8 x i1> %48, <8 x float> undef), !tbaa !12, !alias.scope !171 - %54 = add nsw i32 %49, %mul16.i.i - %55 = sext i32 %54 to i64 - %56 = getelementptr inbounds float, float* %6, i64 %55 - %57 = bitcast float* %56 to <8 x float>* - %wide.masked.load24 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %57, i32 4, <8 x i1> %48, <8 x float> undef), !tbaa !12, !alias.scope !172 - %58 = fneg <8 x float> %wide.masked.gather - %59 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %58, <8 x float> %wide.masked.load24, <8 x float> %wide.masked.load) - %60 = bitcast float* %52 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %59, <8 x float>* %60, i32 4, <8 x i1> %48), !tbaa !12, !alias.scope !165, !noalias !168, !llvm.access.group !24 - %61 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %62 = or <8 x i32> %61, - %63 = add <8 x i32> %broadcast.splat19, %62 - %64 = icmp slt <8 x i32> %63, %broadcast.splat21 - %65 = extractelement <8 x i32> %63, i32 0 - %66 = add nsw i32 %65, %mul.i.i - %67 = sext i32 %66 to i64 - %68 = getelementptr inbounds float, float* %6, i64 %67 - %69 = bitcast float* %68 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %69, i32 4, <8 x i1> %64, <8 x float> undef), !tbaa !12, !alias.scope !165, !noalias !168 - %wide.masked.gather.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat23, i32 4, <8 x i1> %64, <8 x float> undef), !tbaa !12, !alias.scope !171 - %70 = add nsw i32 %65, %mul16.i.i - %71 = sext i32 %70 to i64 - %72 = getelementptr inbounds float, float* %6, i64 %71 - %73 = bitcast float* %72 to <8 x float>* - %wide.masked.load24.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %73, i32 4, <8 x i1> %64, <8 x float> undef), !tbaa !12, !alias.scope !172 - %74 = fneg <8 x float> %wide.masked.gather.1 - %75 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %74, <8 x float> %wide.masked.load24.1, <8 x float> %wide.masked.load.1) - %76 = bitcast float* %68 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %75, <8 x float>* %76, i32 4, <8 x i1> %64), !tbaa !12, !alias.scope !165, !noalias !168, !llvm.access.group !24 - %77 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %78 = or <8 x i32> %77, - %79 = add <8 x i32> %broadcast.splat19, %78 - %80 = icmp slt <8 x i32> %79, %broadcast.splat21 - %81 = extractelement <8 x i32> %79, i32 0 - %82 = add nsw i32 %81, %mul.i.i - %83 = sext i32 %82 to i64 - %84 = getelementptr inbounds float, float* %6, i64 %83 - %85 = bitcast float* %84 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %85, i32 4, <8 x i1> %80, <8 x float> undef), !tbaa !12, !alias.scope !165, !noalias !168 - %wide.masked.gather.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat23, i32 4, <8 x i1> %80, <8 x float> undef), !tbaa !12, !alias.scope !171 - %86 = add nsw i32 %81, %mul16.i.i - %87 = sext i32 %86 to i64 - %88 = getelementptr inbounds float, float* %6, i64 %87 - %89 = bitcast float* %88 to <8 x float>* - %wide.masked.load24.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %89, i32 4, <8 x i1> %80, <8 x float> undef), !tbaa !12, !alias.scope !172 - %90 = fneg <8 x float> %wide.masked.gather.2 - %91 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %90, <8 x float> %wide.masked.load24.2, <8 x float> %wide.masked.load.2) - %92 = bitcast float* %84 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %91, <8 x float>* %92, i32 4, <8 x i1> %80), !tbaa !12, !alias.scope !165, !noalias !168, !llvm.access.group !24 - %93 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %94 = or <8 x i32> %93, - %95 = add <8 x i32> %broadcast.splat19, %94 - %96 = icmp slt <8 x i32> %95, %broadcast.splat21 - %97 = extractelement <8 x i32> %95, i32 0 - %98 = add nsw i32 %97, %mul.i.i - %99 = sext i32 %98 to i64 - %100 = getelementptr inbounds float, float* %6, i64 %99 - %101 = bitcast float* %100 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %101, i32 4, <8 x i1> %96, <8 x float> undef), !tbaa !12, !alias.scope !165, !noalias !168 - %wide.masked.gather.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat23, i32 4, <8 x i1> %96, <8 x float> undef), !tbaa !12, !alias.scope !171 - %102 = add nsw i32 %97, %mul16.i.i - %103 = sext i32 %102 to i64 - %104 = getelementptr inbounds float, float* %6, i64 %103 - %105 = bitcast float* %104 to <8 x float>* - %wide.masked.load24.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %105, i32 4, <8 x i1> %96, <8 x float> undef), !tbaa !12, !alias.scope !172 - %106 = fneg <8 x float> %wide.masked.gather.3 - %107 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %106, <8 x float> %wide.masked.load24.3, <8 x float> %wide.masked.load.3) - %108 = bitcast float* %100 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %107, <8 x float>* %108, i32 4, <8 x i1> %96), !tbaa !12, !alias.scope !165, !noalias !168, !llvm.access.group !24 - br label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us.1403, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.preheader ], [ %869, %if.end.r_exit.i.i.us.1403 ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %109 = trunc i64 %add1.i.i.i.us to i32 - %conv2.i.i.us = add i32 %add.i.i, %109 - %cmp9.i.i.us = icmp slt i32 %conv2.i.i.us, %14 - br i1 %cmp9.i.i.us, label %if.then.i.i.us, label %if.end.r_exit.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %add11.i.i.us = add nsw i32 %conv2.i.i.us, %mul.i.i - %idxprom.i.i.us = sext i32 %add11.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us - %110 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %111 = load float, float* %arrayidx15.i.i, align 4, !tbaa !12 - %add17.i.i.us = add nsw i32 %conv2.i.i.us, %mul16.i.i - %idxprom18.i.i.us = sext i32 %add17.i.i.us to i64 - %arrayidx19.i.i.us = getelementptr inbounds float, float* %6, i64 %idxprom18.i.i.us - %112 = load float, float* %arrayidx19.i.i.us, align 4, !tbaa !12 - %neg.i.i.us = fneg float %111 - %113 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us, float %112, float %110) #6 - store float %113, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us - %114 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1391 = add nuw nsw i64 %114, %mul.i.i.i - %115 = trunc i64 %add1.i.i.i.us.1391 to i32 - %conv2.i.i.us.1392 = add i32 %add.i.i, %115 - %cmp9.i.i.us.1393 = icmp slt i32 %conv2.i.i.us.1392, %14 - br i1 %cmp9.i.i.us.1393, label %if.then.i.i.us.1402, label %if.end.r_exit.i.i.us.1403 - -pregion_for_end.i.i.loopexit: ; preds = %if.end.r_exit.i.i.us.1403 - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i.i - %116 = trunc i64 %mul3.i.i.i to i32 - %117 = or i32 %116, 1 - %conv7.i.i.1 = add i32 %add.i.i, %117 - %cmp.i.i.1 = icmp slt i32 %conv7.i.i.1, %14 - %mul.i.i.1 = mul nsw i32 %conv7.i.i.1, %14 - %add13.i.i.1 = add nsw i32 %mul.i.i.1, %10 - %idxprom14.i.i.1 = sext i32 %add13.i.i.1 to i64 - %arrayidx15.i.i.1 = getelementptr inbounds float, float* %6, i64 %idxprom14.i.i.1 - br i1 %cmp.i.i.1, label %vector.scevcheck35, label %pregion_for_end.i.i.1 - -vector.scevcheck35: ; preds = %pregion_for_end.i.i - %118 = mul i32 %14, %conv7.i.i.1 - %119 = add i32 %10, %118 - %120 = trunc i64 %2 to i32 - %121 = shl i32 %120, 5 - %122 = add i32 %119, %121 - %123 = add i32 %122, 1 - %124 = add i32 %122, 32 - %125 = icmp slt i32 %124, %123 - %126 = add i32 %14, 1 - %127 = mul i32 %10, %126 - %128 = add i32 %127, %121 - %129 = add i32 %128, 1 - %130 = add i32 %128, 32 - %131 = icmp slt i32 %130, %129 - %132 = or i1 %125, %131 - br i1 %132, label %pregion_for_entry.entry.i.i.us.1.preheader, label %vector.memcheck57 - -pregion_for_entry.entry.i.i.us.1.preheader: ; preds = %vector.memcheck57, %vector.scevcheck35 - br label %pregion_for_entry.entry.i.i.us.1 - -vector.memcheck57: ; preds = %vector.scevcheck35 - %133 = mul i32 %14, %conv7.i.i.1 - %134 = add i32 %10, %133 - %135 = trunc i64 %2 to i32 - %136 = shl i32 %135, 5 - %137 = add i32 %134, %136 - %138 = add i32 %137, 1 - %139 = sext i32 %138 to i64 - %scevgep37 = getelementptr float, float* %6, i64 %139 - %scevgep3738 = bitcast float* %scevgep37 to i8* - %140 = add nsw i64 %139, 32 - %scevgep39 = getelementptr float, float* %6, i64 %140 - %scevgep41 = getelementptr float, float* %6, i64 %idxprom14.i.i.1 - %scevgep4142 = bitcast float* %scevgep41 to i8* - %uglygep43 = getelementptr i8, i8* %scevgep4142, i64 1 - %141 = add i32 %14, 1 - %142 = mul i32 %10, %141 - %143 = add i32 %142, %136 - %144 = add i32 %143, 1 - %145 = sext i32 %144 to i64 - %scevgep44 = getelementptr float, float* %6, i64 %145 - %146 = add nsw i64 %145, 32 - %scevgep46 = getelementptr float, float* %6, i64 %146 - %bound049 = icmp ugt i8* %uglygep43, %scevgep3738 - %bound150 = icmp ult float* %arrayidx15.i.i.1, %scevgep39 - %found.conflict51 = and i1 %bound049, %bound150 - %bound052 = icmp ult float* %scevgep37, %scevgep46 - %bound153 = icmp ult float* %scevgep44, %scevgep39 - %found.conflict54 = and i1 %bound052, %bound153 - %conflict.rdx55 = or i1 %found.conflict51, %found.conflict54 - br i1 %conflict.rdx55, label %pregion_for_entry.entry.i.i.us.1.preheader, label %vector.ph58 - -vector.ph58: ; preds = %vector.memcheck57 - %broadcast.splatinsert65 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat66 = shufflevector <8 x i64> %broadcast.splatinsert65, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert67 = insertelement <8 x i32> undef, i32 %add.i.i, i32 0 - %broadcast.splat68 = shufflevector <8 x i32> %broadcast.splatinsert67, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert69 = insertelement <8 x i32> undef, i32 %14, i32 0 - %broadcast.splat70 = shufflevector <8 x i32> %broadcast.splatinsert69, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert72 = insertelement <8 x float*> undef, float* %arrayidx15.i.i.1, i32 0 - %broadcast.splat73 = shufflevector <8 x float*> %broadcast.splatinsert72, <8 x float*> undef, <8 x i32> zeroinitializer - %147 = trunc <8 x i64> %broadcast.splat66 to <8 x i32> - %148 = or <8 x i32> %147, - %149 = add <8 x i32> %broadcast.splat68, %148 - %150 = icmp slt <8 x i32> %149, %broadcast.splat70 - %151 = extractelement <8 x i32> %149, i32 0 - %152 = add nsw i32 %151, %mul.i.i.1 - %153 = sext i32 %152 to i64 - %154 = getelementptr inbounds float, float* %6, i64 %153 - %155 = bitcast float* %154 to <8 x float>* - %wide.masked.load71 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %155, i32 4, <8 x i1> %150, <8 x float> undef), !tbaa !12, !alias.scope !173, !noalias !176 - %wide.masked.gather74 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat73, i32 4, <8 x i1> %150, <8 x float> undef), !tbaa !12, !alias.scope !179 - %156 = add nsw i32 %151, %mul16.i.i - %157 = sext i32 %156 to i64 - %158 = getelementptr inbounds float, float* %6, i64 %157 - %159 = bitcast float* %158 to <8 x float>* - %wide.masked.load75 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %159, i32 4, <8 x i1> %150, <8 x float> undef), !tbaa !12, !alias.scope !180 - %160 = fneg <8 x float> %wide.masked.gather74 - %161 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %160, <8 x float> %wide.masked.load75, <8 x float> %wide.masked.load71) - %162 = bitcast float* %154 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %161, <8 x float>* %162, i32 4, <8 x i1> %150), !tbaa !12, !alias.scope !173, !noalias !176, !llvm.access.group !24 - %163 = trunc <8 x i64> %broadcast.splat66 to <8 x i32> - %164 = or <8 x i32> %163, - %165 = add <8 x i32> %broadcast.splat68, %164 - %166 = icmp slt <8 x i32> %165, %broadcast.splat70 - %167 = extractelement <8 x i32> %165, i32 0 - %168 = add nsw i32 %167, %mul.i.i.1 - %169 = sext i32 %168 to i64 - %170 = getelementptr inbounds float, float* %6, i64 %169 - %171 = bitcast float* %170 to <8 x float>* - %wide.masked.load71.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %171, i32 4, <8 x i1> %166, <8 x float> undef), !tbaa !12, !alias.scope !173, !noalias !176 - %wide.masked.gather74.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat73, i32 4, <8 x i1> %166, <8 x float> undef), !tbaa !12, !alias.scope !179 - %172 = add nsw i32 %167, %mul16.i.i - %173 = sext i32 %172 to i64 - %174 = getelementptr inbounds float, float* %6, i64 %173 - %175 = bitcast float* %174 to <8 x float>* - %wide.masked.load75.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %175, i32 4, <8 x i1> %166, <8 x float> undef), !tbaa !12, !alias.scope !180 - %176 = fneg <8 x float> %wide.masked.gather74.1 - %177 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %176, <8 x float> %wide.masked.load75.1, <8 x float> %wide.masked.load71.1) - %178 = bitcast float* %170 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %177, <8 x float>* %178, i32 4, <8 x i1> %166), !tbaa !12, !alias.scope !173, !noalias !176, !llvm.access.group !24 - %179 = trunc <8 x i64> %broadcast.splat66 to <8 x i32> - %180 = or <8 x i32> %179, - %181 = add <8 x i32> %broadcast.splat68, %180 - %182 = icmp slt <8 x i32> %181, %broadcast.splat70 - %183 = extractelement <8 x i32> %181, i32 0 - %184 = add nsw i32 %183, %mul.i.i.1 - %185 = sext i32 %184 to i64 - %186 = getelementptr inbounds float, float* %6, i64 %185 - %187 = bitcast float* %186 to <8 x float>* - %wide.masked.load71.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %187, i32 4, <8 x i1> %182, <8 x float> undef), !tbaa !12, !alias.scope !173, !noalias !176 - %wide.masked.gather74.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat73, i32 4, <8 x i1> %182, <8 x float> undef), !tbaa !12, !alias.scope !179 - %188 = add nsw i32 %183, %mul16.i.i - %189 = sext i32 %188 to i64 - %190 = getelementptr inbounds float, float* %6, i64 %189 - %191 = bitcast float* %190 to <8 x float>* - %wide.masked.load75.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %191, i32 4, <8 x i1> %182, <8 x float> undef), !tbaa !12, !alias.scope !180 - %192 = fneg <8 x float> %wide.masked.gather74.2 - %193 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %192, <8 x float> %wide.masked.load75.2, <8 x float> %wide.masked.load71.2) - %194 = bitcast float* %186 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %193, <8 x float>* %194, i32 4, <8 x i1> %182), !tbaa !12, !alias.scope !173, !noalias !176, !llvm.access.group !24 - %195 = trunc <8 x i64> %broadcast.splat66 to <8 x i32> - %196 = or <8 x i32> %195, - %197 = add <8 x i32> %broadcast.splat68, %196 - %198 = icmp slt <8 x i32> %197, %broadcast.splat70 - %199 = extractelement <8 x i32> %197, i32 0 - %200 = add nsw i32 %199, %mul.i.i.1 - %201 = sext i32 %200 to i64 - %202 = getelementptr inbounds float, float* %6, i64 %201 - %203 = bitcast float* %202 to <8 x float>* - %wide.masked.load71.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %203, i32 4, <8 x i1> %198, <8 x float> undef), !tbaa !12, !alias.scope !173, !noalias !176 - %wide.masked.gather74.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat73, i32 4, <8 x i1> %198, <8 x float> undef), !tbaa !12, !alias.scope !179 - %204 = add nsw i32 %199, %mul16.i.i - %205 = sext i32 %204 to i64 - %206 = getelementptr inbounds float, float* %6, i64 %205 - %207 = bitcast float* %206 to <8 x float>* - %wide.masked.load75.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %207, i32 4, <8 x i1> %198, <8 x float> undef), !tbaa !12, !alias.scope !180 - %208 = fneg <8 x float> %wide.masked.gather74.3 - %209 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %208, <8 x float> %wide.masked.load75.3, <8 x float> %wide.masked.load71.3) - %210 = bitcast float* %202 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %209, <8 x float>* %210, i32 4, <8 x i1> %198), !tbaa !12, !alias.scope !173, !noalias !176, !llvm.access.group !24 - br label %pregion_for_end.i.i.1 - -pregion_for_entry.entry.i.i.us.1: ; preds = %if.end.r_exit.i.i.us.1.1, %pregion_for_entry.entry.i.i.us.1.preheader - %_local_id_x.i.0.us.1 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.1.preheader ], [ %864, %if.end.r_exit.i.i.us.1.1 ] - %add1.i.i.i.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.1, %mul.i.i.i - %211 = trunc i64 %add1.i.i.i.us.1 to i32 - %conv2.i.i.us.1 = add i32 %add.i.i, %211 - %cmp9.i.i.us.1 = icmp slt i32 %conv2.i.i.us.1, %14 - br i1 %cmp9.i.i.us.1, label %if.then.i.i.us.1, label %if.end.r_exit.i.i.us.1 - -if.then.i.i.us.1: ; preds = %pregion_for_entry.entry.i.i.us.1 - %add11.i.i.us.1 = add nsw i32 %conv2.i.i.us.1, %mul.i.i.1 - %idxprom.i.i.us.1 = sext i32 %add11.i.i.us.1 to i64 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.1 - %212 = load float, float* %arrayidx.i.i.us.1, align 4, !tbaa !12 - %213 = load float, float* %arrayidx15.i.i.1, align 4, !tbaa !12 - %add17.i.i.us.1 = add nsw i32 %conv2.i.i.us.1, %mul16.i.i - %idxprom18.i.i.us.1 = sext i32 %add17.i.i.us.1 to i64 - %arrayidx19.i.i.us.1 = getelementptr inbounds float, float* %6, i64 %idxprom18.i.i.us.1 - %214 = load float, float* %arrayidx19.i.i.us.1, align 4, !tbaa !12 - %neg.i.i.us.1 = fneg float %213 - %215 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.1, float %214, float %212) #6 - store float %215, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.1 - -if.end.r_exit.i.i.us.1: ; preds = %if.then.i.i.us.1, %pregion_for_entry.entry.i.i.us.1 - %216 = or i64 %_local_id_x.i.0.us.1, 1 - %add1.i.i.i.us.1.1 = add nuw nsw i64 %216, %mul.i.i.i - %217 = trunc i64 %add1.i.i.i.us.1.1 to i32 - %conv2.i.i.us.1.1 = add i32 %add.i.i, %217 - %cmp9.i.i.us.1.1 = icmp slt i32 %conv2.i.i.us.1.1, %14 - br i1 %cmp9.i.i.us.1.1, label %if.then.i.i.us.1.1, label %if.end.r_exit.i.i.us.1.1 - -pregion_for_end.i.i.1.loopexit: ; preds = %if.end.r_exit.i.i.us.1.1 - br label %pregion_for_end.i.i.1 - -pregion_for_end.i.i.1: ; preds = %pregion_for_end.i.i.1.loopexit, %vector.ph58, %pregion_for_end.i.i - %218 = trunc i64 %mul3.i.i.i to i32 - %219 = or i32 %218, 2 - %conv7.i.i.2 = add i32 %add.i.i, %219 - %cmp.i.i.2 = icmp slt i32 %conv7.i.i.2, %14 - %mul.i.i.2 = mul nsw i32 %conv7.i.i.2, %14 - %add13.i.i.2 = add nsw i32 %mul.i.i.2, %10 - %idxprom14.i.i.2 = sext i32 %add13.i.i.2 to i64 - %arrayidx15.i.i.2 = getelementptr inbounds float, float* %6, i64 %idxprom14.i.i.2 - br i1 %cmp.i.i.2, label %vector.scevcheck86, label %pregion_for_end.i.i.2 - -vector.scevcheck86: ; preds = %pregion_for_end.i.i.1 - %220 = mul i32 %14, %conv7.i.i.2 - %221 = add i32 %10, %220 - %222 = trunc i64 %2 to i32 - %223 = shl i32 %222, 5 - %224 = add i32 %221, %223 - %225 = add i32 %224, 1 - %226 = add i32 %224, 32 - %227 = icmp slt i32 %226, %225 - %228 = add i32 %14, 1 - %229 = mul i32 %10, %228 - %230 = add i32 %229, %223 - %231 = add i32 %230, 1 - %232 = add i32 %230, 32 - %233 = icmp slt i32 %232, %231 - %234 = or i1 %227, %233 - br i1 %234, label %pregion_for_entry.entry.i.i.us.2.preheader, label %vector.memcheck108 - -pregion_for_entry.entry.i.i.us.2.preheader: ; preds = %vector.memcheck108, %vector.scevcheck86 - br label %pregion_for_entry.entry.i.i.us.2 - -vector.memcheck108: ; preds = %vector.scevcheck86 - %235 = mul i32 %14, %conv7.i.i.2 - %236 = add i32 %10, %235 - %237 = trunc i64 %2 to i32 - %238 = shl i32 %237, 5 - %239 = add i32 %236, %238 - %240 = add i32 %239, 1 - %241 = sext i32 %240 to i64 - %scevgep88 = getelementptr float, float* %6, i64 %241 - %scevgep8889 = bitcast float* %scevgep88 to i8* - %242 = add nsw i64 %241, 32 - %scevgep90 = getelementptr float, float* %6, i64 %242 - %scevgep92 = getelementptr float, float* %6, i64 %idxprom14.i.i.2 - %scevgep9293 = bitcast float* %scevgep92 to i8* - %uglygep94 = getelementptr i8, i8* %scevgep9293, i64 1 - %243 = add i32 %14, 1 - %244 = mul i32 %10, %243 - %245 = add i32 %244, %238 - %246 = add i32 %245, 1 - %247 = sext i32 %246 to i64 - %scevgep95 = getelementptr float, float* %6, i64 %247 - %248 = add nsw i64 %247, 32 - %scevgep97 = getelementptr float, float* %6, i64 %248 - %bound0100 = icmp ugt i8* %uglygep94, %scevgep8889 - %bound1101 = icmp ult float* %arrayidx15.i.i.2, %scevgep90 - %found.conflict102 = and i1 %bound0100, %bound1101 - %bound0103 = icmp ult float* %scevgep88, %scevgep97 - %bound1104 = icmp ult float* %scevgep95, %scevgep90 - %found.conflict105 = and i1 %bound0103, %bound1104 - %conflict.rdx106 = or i1 %found.conflict102, %found.conflict105 - br i1 %conflict.rdx106, label %pregion_for_entry.entry.i.i.us.2.preheader, label %vector.ph109 - -vector.ph109: ; preds = %vector.memcheck108 - %broadcast.splatinsert116 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat117 = shufflevector <8 x i64> %broadcast.splatinsert116, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert118 = insertelement <8 x i32> undef, i32 %add.i.i, i32 0 - %broadcast.splat119 = shufflevector <8 x i32> %broadcast.splatinsert118, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert120 = insertelement <8 x i32> undef, i32 %14, i32 0 - %broadcast.splat121 = shufflevector <8 x i32> %broadcast.splatinsert120, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert123 = insertelement <8 x float*> undef, float* %arrayidx15.i.i.2, i32 0 - %broadcast.splat124 = shufflevector <8 x float*> %broadcast.splatinsert123, <8 x float*> undef, <8 x i32> zeroinitializer - %249 = trunc <8 x i64> %broadcast.splat117 to <8 x i32> - %250 = or <8 x i32> %249, - %251 = add <8 x i32> %broadcast.splat119, %250 - %252 = icmp slt <8 x i32> %251, %broadcast.splat121 - %253 = extractelement <8 x i32> %251, i32 0 - %254 = add nsw i32 %253, %mul.i.i.2 - %255 = sext i32 %254 to i64 - %256 = getelementptr inbounds float, float* %6, i64 %255 - %257 = bitcast float* %256 to <8 x float>* - %wide.masked.load122 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %257, i32 4, <8 x i1> %252, <8 x float> undef), !tbaa !12, !alias.scope !181, !noalias !184 - %wide.masked.gather125 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat124, i32 4, <8 x i1> %252, <8 x float> undef), !tbaa !12, !alias.scope !187 - %258 = add nsw i32 %253, %mul16.i.i - %259 = sext i32 %258 to i64 - %260 = getelementptr inbounds float, float* %6, i64 %259 - %261 = bitcast float* %260 to <8 x float>* - %wide.masked.load126 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %261, i32 4, <8 x i1> %252, <8 x float> undef), !tbaa !12, !alias.scope !188 - %262 = fneg <8 x float> %wide.masked.gather125 - %263 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %262, <8 x float> %wide.masked.load126, <8 x float> %wide.masked.load122) - %264 = bitcast float* %256 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %263, <8 x float>* %264, i32 4, <8 x i1> %252), !tbaa !12, !alias.scope !181, !noalias !184, !llvm.access.group !24 - %265 = trunc <8 x i64> %broadcast.splat117 to <8 x i32> - %266 = or <8 x i32> %265, - %267 = add <8 x i32> %broadcast.splat119, %266 - %268 = icmp slt <8 x i32> %267, %broadcast.splat121 - %269 = extractelement <8 x i32> %267, i32 0 - %270 = add nsw i32 %269, %mul.i.i.2 - %271 = sext i32 %270 to i64 - %272 = getelementptr inbounds float, float* %6, i64 %271 - %273 = bitcast float* %272 to <8 x float>* - %wide.masked.load122.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %273, i32 4, <8 x i1> %268, <8 x float> undef), !tbaa !12, !alias.scope !181, !noalias !184 - %wide.masked.gather125.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat124, i32 4, <8 x i1> %268, <8 x float> undef), !tbaa !12, !alias.scope !187 - %274 = add nsw i32 %269, %mul16.i.i - %275 = sext i32 %274 to i64 - %276 = getelementptr inbounds float, float* %6, i64 %275 - %277 = bitcast float* %276 to <8 x float>* - %wide.masked.load126.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %277, i32 4, <8 x i1> %268, <8 x float> undef), !tbaa !12, !alias.scope !188 - %278 = fneg <8 x float> %wide.masked.gather125.1 - %279 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %278, <8 x float> %wide.masked.load126.1, <8 x float> %wide.masked.load122.1) - %280 = bitcast float* %272 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %279, <8 x float>* %280, i32 4, <8 x i1> %268), !tbaa !12, !alias.scope !181, !noalias !184, !llvm.access.group !24 - %281 = trunc <8 x i64> %broadcast.splat117 to <8 x i32> - %282 = or <8 x i32> %281, - %283 = add <8 x i32> %broadcast.splat119, %282 - %284 = icmp slt <8 x i32> %283, %broadcast.splat121 - %285 = extractelement <8 x i32> %283, i32 0 - %286 = add nsw i32 %285, %mul.i.i.2 - %287 = sext i32 %286 to i64 - %288 = getelementptr inbounds float, float* %6, i64 %287 - %289 = bitcast float* %288 to <8 x float>* - %wide.masked.load122.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %289, i32 4, <8 x i1> %284, <8 x float> undef), !tbaa !12, !alias.scope !181, !noalias !184 - %wide.masked.gather125.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat124, i32 4, <8 x i1> %284, <8 x float> undef), !tbaa !12, !alias.scope !187 - %290 = add nsw i32 %285, %mul16.i.i - %291 = sext i32 %290 to i64 - %292 = getelementptr inbounds float, float* %6, i64 %291 - %293 = bitcast float* %292 to <8 x float>* - %wide.masked.load126.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %293, i32 4, <8 x i1> %284, <8 x float> undef), !tbaa !12, !alias.scope !188 - %294 = fneg <8 x float> %wide.masked.gather125.2 - %295 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %294, <8 x float> %wide.masked.load126.2, <8 x float> %wide.masked.load122.2) - %296 = bitcast float* %288 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %295, <8 x float>* %296, i32 4, <8 x i1> %284), !tbaa !12, !alias.scope !181, !noalias !184, !llvm.access.group !24 - %297 = trunc <8 x i64> %broadcast.splat117 to <8 x i32> - %298 = or <8 x i32> %297, - %299 = add <8 x i32> %broadcast.splat119, %298 - %300 = icmp slt <8 x i32> %299, %broadcast.splat121 - %301 = extractelement <8 x i32> %299, i32 0 - %302 = add nsw i32 %301, %mul.i.i.2 - %303 = sext i32 %302 to i64 - %304 = getelementptr inbounds float, float* %6, i64 %303 - %305 = bitcast float* %304 to <8 x float>* - %wide.masked.load122.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %305, i32 4, <8 x i1> %300, <8 x float> undef), !tbaa !12, !alias.scope !181, !noalias !184 - %wide.masked.gather125.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat124, i32 4, <8 x i1> %300, <8 x float> undef), !tbaa !12, !alias.scope !187 - %306 = add nsw i32 %301, %mul16.i.i - %307 = sext i32 %306 to i64 - %308 = getelementptr inbounds float, float* %6, i64 %307 - %309 = bitcast float* %308 to <8 x float>* - %wide.masked.load126.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %309, i32 4, <8 x i1> %300, <8 x float> undef), !tbaa !12, !alias.scope !188 - %310 = fneg <8 x float> %wide.masked.gather125.3 - %311 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %310, <8 x float> %wide.masked.load126.3, <8 x float> %wide.masked.load122.3) - %312 = bitcast float* %304 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %311, <8 x float>* %312, i32 4, <8 x i1> %300), !tbaa !12, !alias.scope !181, !noalias !184, !llvm.access.group !24 - br label %pregion_for_end.i.i.2 - -pregion_for_entry.entry.i.i.us.2: ; preds = %if.end.r_exit.i.i.us.2.1, %pregion_for_entry.entry.i.i.us.2.preheader - %_local_id_x.i.0.us.2 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.2.preheader ], [ %859, %if.end.r_exit.i.i.us.2.1 ] - %add1.i.i.i.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.2, %mul.i.i.i - %313 = trunc i64 %add1.i.i.i.us.2 to i32 - %conv2.i.i.us.2 = add i32 %add.i.i, %313 - %cmp9.i.i.us.2 = icmp slt i32 %conv2.i.i.us.2, %14 - br i1 %cmp9.i.i.us.2, label %if.then.i.i.us.2, label %if.end.r_exit.i.i.us.2 - -if.then.i.i.us.2: ; preds = %pregion_for_entry.entry.i.i.us.2 - %add11.i.i.us.2 = add nsw i32 %conv2.i.i.us.2, %mul.i.i.2 - %idxprom.i.i.us.2 = sext i32 %add11.i.i.us.2 to i64 - %arrayidx.i.i.us.2 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.2 - %314 = load float, float* %arrayidx.i.i.us.2, align 4, !tbaa !12 - %315 = load float, float* %arrayidx15.i.i.2, align 4, !tbaa !12 - %add17.i.i.us.2 = add nsw i32 %conv2.i.i.us.2, %mul16.i.i - %idxprom18.i.i.us.2 = sext i32 %add17.i.i.us.2 to i64 - %arrayidx19.i.i.us.2 = getelementptr inbounds float, float* %6, i64 %idxprom18.i.i.us.2 - %316 = load float, float* %arrayidx19.i.i.us.2, align 4, !tbaa !12 - %neg.i.i.us.2 = fneg float %315 - %317 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.2, float %316, float %314) #6 - store float %317, float* %arrayidx.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.2 - -if.end.r_exit.i.i.us.2: ; preds = %if.then.i.i.us.2, %pregion_for_entry.entry.i.i.us.2 - %318 = or i64 %_local_id_x.i.0.us.2, 1 - %add1.i.i.i.us.2.1 = add nuw nsw i64 %318, %mul.i.i.i - %319 = trunc i64 %add1.i.i.i.us.2.1 to i32 - %conv2.i.i.us.2.1 = add i32 %add.i.i, %319 - %cmp9.i.i.us.2.1 = icmp slt i32 %conv2.i.i.us.2.1, %14 - br i1 %cmp9.i.i.us.2.1, label %if.then.i.i.us.2.1, label %if.end.r_exit.i.i.us.2.1 - -pregion_for_end.i.i.2.loopexit: ; preds = %if.end.r_exit.i.i.us.2.1 - br label %pregion_for_end.i.i.2 - -pregion_for_end.i.i.2: ; preds = %pregion_for_end.i.i.2.loopexit, %vector.ph109, %pregion_for_end.i.i.1 - %320 = trunc i64 %mul3.i.i.i to i32 - %321 = or i32 %320, 3 - %conv7.i.i.3 = add i32 %add.i.i, %321 - %cmp.i.i.3 = icmp slt i32 %conv7.i.i.3, %14 - %mul.i.i.3 = mul nsw i32 %conv7.i.i.3, %14 - %add13.i.i.3 = add nsw i32 %mul.i.i.3, %10 - %idxprom14.i.i.3 = sext i32 %add13.i.i.3 to i64 - %arrayidx15.i.i.3 = getelementptr inbounds float, float* %6, i64 %idxprom14.i.i.3 - br i1 %cmp.i.i.3, label %vector.scevcheck137, label %pregion_for_end.i.i.3 - -vector.scevcheck137: ; preds = %pregion_for_end.i.i.2 - %322 = mul i32 %14, %conv7.i.i.3 - %323 = add i32 %10, %322 - %324 = trunc i64 %2 to i32 - %325 = shl i32 %324, 5 - %326 = add i32 %323, %325 - %327 = add i32 %326, 1 - %328 = add i32 %326, 32 - %329 = icmp slt i32 %328, %327 - %330 = add i32 %14, 1 - %331 = mul i32 %10, %330 - %332 = add i32 %331, %325 - %333 = add i32 %332, 1 - %334 = add i32 %332, 32 - %335 = icmp slt i32 %334, %333 - %336 = or i1 %329, %335 - br i1 %336, label %pregion_for_entry.entry.i.i.us.3.preheader, label %vector.memcheck159 - -pregion_for_entry.entry.i.i.us.3.preheader: ; preds = %vector.memcheck159, %vector.scevcheck137 - br label %pregion_for_entry.entry.i.i.us.3 - -vector.memcheck159: ; preds = %vector.scevcheck137 - %337 = mul i32 %14, %conv7.i.i.3 - %338 = add i32 %10, %337 - %339 = trunc i64 %2 to i32 - %340 = shl i32 %339, 5 - %341 = add i32 %338, %340 - %342 = add i32 %341, 1 - %343 = sext i32 %342 to i64 - %scevgep139 = getelementptr float, float* %6, i64 %343 - %scevgep139140 = bitcast float* %scevgep139 to i8* - %344 = add nsw i64 %343, 32 - %scevgep141 = getelementptr float, float* %6, i64 %344 - %scevgep143 = getelementptr float, float* %6, i64 %idxprom14.i.i.3 - %scevgep143144 = bitcast float* %scevgep143 to i8* - %uglygep145 = getelementptr i8, i8* %scevgep143144, i64 1 - %345 = add i32 %14, 1 - %346 = mul i32 %10, %345 - %347 = add i32 %346, %340 - %348 = add i32 %347, 1 - %349 = sext i32 %348 to i64 - %scevgep146 = getelementptr float, float* %6, i64 %349 - %350 = add nsw i64 %349, 32 - %scevgep148 = getelementptr float, float* %6, i64 %350 - %bound0151 = icmp ugt i8* %uglygep145, %scevgep139140 - %bound1152 = icmp ult float* %arrayidx15.i.i.3, %scevgep141 - %found.conflict153 = and i1 %bound0151, %bound1152 - %bound0154 = icmp ult float* %scevgep139, %scevgep148 - %bound1155 = icmp ult float* %scevgep146, %scevgep141 - %found.conflict156 = and i1 %bound0154, %bound1155 - %conflict.rdx157 = or i1 %found.conflict153, %found.conflict156 - br i1 %conflict.rdx157, label %pregion_for_entry.entry.i.i.us.3.preheader, label %vector.ph160 - -vector.ph160: ; preds = %vector.memcheck159 - %broadcast.splatinsert167 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat168 = shufflevector <8 x i64> %broadcast.splatinsert167, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert169 = insertelement <8 x i32> undef, i32 %add.i.i, i32 0 - %broadcast.splat170 = shufflevector <8 x i32> %broadcast.splatinsert169, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert171 = insertelement <8 x i32> undef, i32 %14, i32 0 - %broadcast.splat172 = shufflevector <8 x i32> %broadcast.splatinsert171, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert174 = insertelement <8 x float*> undef, float* %arrayidx15.i.i.3, i32 0 - %broadcast.splat175 = shufflevector <8 x float*> %broadcast.splatinsert174, <8 x float*> undef, <8 x i32> zeroinitializer - %351 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %352 = or <8 x i32> %351, - %353 = add <8 x i32> %broadcast.splat170, %352 - %354 = icmp slt <8 x i32> %353, %broadcast.splat172 - %355 = extractelement <8 x i32> %353, i32 0 - %356 = add nsw i32 %355, %mul.i.i.3 - %357 = sext i32 %356 to i64 - %358 = getelementptr inbounds float, float* %6, i64 %357 - %359 = bitcast float* %358 to <8 x float>* - %wide.masked.load173 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %359, i32 4, <8 x i1> %354, <8 x float> undef), !tbaa !12, !alias.scope !189, !noalias !192 - %wide.masked.gather176 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat175, i32 4, <8 x i1> %354, <8 x float> undef), !tbaa !12, !alias.scope !195 - %360 = add nsw i32 %355, %mul16.i.i - %361 = sext i32 %360 to i64 - %362 = getelementptr inbounds float, float* %6, i64 %361 - %363 = bitcast float* %362 to <8 x float>* - %wide.masked.load177 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %363, i32 4, <8 x i1> %354, <8 x float> undef), !tbaa !12, !alias.scope !196 - %364 = fneg <8 x float> %wide.masked.gather176 - %365 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %364, <8 x float> %wide.masked.load177, <8 x float> %wide.masked.load173) - %366 = bitcast float* %358 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %365, <8 x float>* %366, i32 4, <8 x i1> %354), !tbaa !12, !alias.scope !189, !noalias !192, !llvm.access.group !24 - %367 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %368 = or <8 x i32> %367, - %369 = add <8 x i32> %broadcast.splat170, %368 - %370 = icmp slt <8 x i32> %369, %broadcast.splat172 - %371 = extractelement <8 x i32> %369, i32 0 - %372 = add nsw i32 %371, %mul.i.i.3 - %373 = sext i32 %372 to i64 - %374 = getelementptr inbounds float, float* %6, i64 %373 - %375 = bitcast float* %374 to <8 x float>* - %wide.masked.load173.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %375, i32 4, <8 x i1> %370, <8 x float> undef), !tbaa !12, !alias.scope !189, !noalias !192 - %wide.masked.gather176.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat175, i32 4, <8 x i1> %370, <8 x float> undef), !tbaa !12, !alias.scope !195 - %376 = add nsw i32 %371, %mul16.i.i - %377 = sext i32 %376 to i64 - %378 = getelementptr inbounds float, float* %6, i64 %377 - %379 = bitcast float* %378 to <8 x float>* - %wide.masked.load177.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %379, i32 4, <8 x i1> %370, <8 x float> undef), !tbaa !12, !alias.scope !196 - %380 = fneg <8 x float> %wide.masked.gather176.1 - %381 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %380, <8 x float> %wide.masked.load177.1, <8 x float> %wide.masked.load173.1) - %382 = bitcast float* %374 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %381, <8 x float>* %382, i32 4, <8 x i1> %370), !tbaa !12, !alias.scope !189, !noalias !192, !llvm.access.group !24 - %383 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %384 = or <8 x i32> %383, - %385 = add <8 x i32> %broadcast.splat170, %384 - %386 = icmp slt <8 x i32> %385, %broadcast.splat172 - %387 = extractelement <8 x i32> %385, i32 0 - %388 = add nsw i32 %387, %mul.i.i.3 - %389 = sext i32 %388 to i64 - %390 = getelementptr inbounds float, float* %6, i64 %389 - %391 = bitcast float* %390 to <8 x float>* - %wide.masked.load173.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %391, i32 4, <8 x i1> %386, <8 x float> undef), !tbaa !12, !alias.scope !189, !noalias !192 - %wide.masked.gather176.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat175, i32 4, <8 x i1> %386, <8 x float> undef), !tbaa !12, !alias.scope !195 - %392 = add nsw i32 %387, %mul16.i.i - %393 = sext i32 %392 to i64 - %394 = getelementptr inbounds float, float* %6, i64 %393 - %395 = bitcast float* %394 to <8 x float>* - %wide.masked.load177.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %395, i32 4, <8 x i1> %386, <8 x float> undef), !tbaa !12, !alias.scope !196 - %396 = fneg <8 x float> %wide.masked.gather176.2 - %397 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %396, <8 x float> %wide.masked.load177.2, <8 x float> %wide.masked.load173.2) - %398 = bitcast float* %390 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %397, <8 x float>* %398, i32 4, <8 x i1> %386), !tbaa !12, !alias.scope !189, !noalias !192, !llvm.access.group !24 - %399 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %400 = or <8 x i32> %399, - %401 = add <8 x i32> %broadcast.splat170, %400 - %402 = icmp slt <8 x i32> %401, %broadcast.splat172 - %403 = extractelement <8 x i32> %401, i32 0 - %404 = add nsw i32 %403, %mul.i.i.3 - %405 = sext i32 %404 to i64 - %406 = getelementptr inbounds float, float* %6, i64 %405 - %407 = bitcast float* %406 to <8 x float>* - %wide.masked.load173.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %407, i32 4, <8 x i1> %402, <8 x float> undef), !tbaa !12, !alias.scope !189, !noalias !192 - %wide.masked.gather176.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat175, i32 4, <8 x i1> %402, <8 x float> undef), !tbaa !12, !alias.scope !195 - %408 = add nsw i32 %403, %mul16.i.i - %409 = sext i32 %408 to i64 - %410 = getelementptr inbounds float, float* %6, i64 %409 - %411 = bitcast float* %410 to <8 x float>* - %wide.masked.load177.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %411, i32 4, <8 x i1> %402, <8 x float> undef), !tbaa !12, !alias.scope !196 - %412 = fneg <8 x float> %wide.masked.gather176.3 - %413 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %412, <8 x float> %wide.masked.load177.3, <8 x float> %wide.masked.load173.3) - %414 = bitcast float* %406 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %413, <8 x float>* %414, i32 4, <8 x i1> %402), !tbaa !12, !alias.scope !189, !noalias !192, !llvm.access.group !24 - br label %pregion_for_end.i.i.3 - -pregion_for_entry.entry.i.i.us.3: ; preds = %if.end.r_exit.i.i.us.3.1, %pregion_for_entry.entry.i.i.us.3.preheader - %_local_id_x.i.0.us.3 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.3.preheader ], [ %854, %if.end.r_exit.i.i.us.3.1 ] - %add1.i.i.i.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.3, %mul.i.i.i - %415 = trunc i64 %add1.i.i.i.us.3 to i32 - %conv2.i.i.us.3 = add i32 %add.i.i, %415 - %cmp9.i.i.us.3 = icmp slt i32 %conv2.i.i.us.3, %14 - br i1 %cmp9.i.i.us.3, label %if.then.i.i.us.3, label %if.end.r_exit.i.i.us.3 - -if.then.i.i.us.3: ; preds = %pregion_for_entry.entry.i.i.us.3 - %add11.i.i.us.3 = add nsw i32 %conv2.i.i.us.3, %mul.i.i.3 - %idxprom.i.i.us.3 = sext i32 %add11.i.i.us.3 to i64 - %arrayidx.i.i.us.3 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.3 - %416 = load float, float* %arrayidx.i.i.us.3, align 4, !tbaa !12 - %417 = load float, float* %arrayidx15.i.i.3, align 4, !tbaa !12 - %add17.i.i.us.3 = add nsw i32 %conv2.i.i.us.3, %mul16.i.i - %idxprom18.i.i.us.3 = sext i32 %add17.i.i.us.3 to i64 - %arrayidx19.i.i.us.3 = getelementptr inbounds float, float* %6, i64 %idxprom18.i.i.us.3 - %418 = load float, float* %arrayidx19.i.i.us.3, align 4, !tbaa !12 - %neg.i.i.us.3 = fneg float %417 - %419 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.3, float %418, float %416) #6 - store float %419, float* %arrayidx.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.3 - -if.end.r_exit.i.i.us.3: ; preds = %if.then.i.i.us.3, %pregion_for_entry.entry.i.i.us.3 - %420 = or i64 %_local_id_x.i.0.us.3, 1 - %add1.i.i.i.us.3.1 = add nuw nsw i64 %420, %mul.i.i.i - %421 = trunc i64 %add1.i.i.i.us.3.1 to i32 - %conv2.i.i.us.3.1 = add i32 %add.i.i, %421 - %cmp9.i.i.us.3.1 = icmp slt i32 %conv2.i.i.us.3.1, %14 - br i1 %cmp9.i.i.us.3.1, label %if.then.i.i.us.3.1, label %if.end.r_exit.i.i.us.3.1 - -pregion_for_end.i.i.3.loopexit: ; preds = %if.end.r_exit.i.i.us.3.1 - br label %pregion_for_end.i.i.3 - -pregion_for_end.i.i.3: ; preds = %pregion_for_end.i.i.3.loopexit, %vector.ph160, %pregion_for_end.i.i.2 - %422 = trunc i64 %mul3.i.i.i to i32 - %423 = or i32 %422, 4 - %conv7.i.i.4 = add i32 %add.i.i, %423 - %cmp.i.i.4 = icmp slt i32 %conv7.i.i.4, %14 - %mul.i.i.4 = mul nsw i32 %conv7.i.i.4, %14 - %add13.i.i.4 = add nsw i32 %mul.i.i.4, %10 - %idxprom14.i.i.4 = sext i32 %add13.i.i.4 to i64 - %arrayidx15.i.i.4 = getelementptr inbounds float, float* %6, i64 %idxprom14.i.i.4 - br i1 %cmp.i.i.4, label %vector.scevcheck188, label %pregion_for_end.i.i.4 - -vector.scevcheck188: ; preds = %pregion_for_end.i.i.3 - %424 = mul i32 %14, %conv7.i.i.4 - %425 = add i32 %10, %424 - %426 = trunc i64 %2 to i32 - %427 = shl i32 %426, 5 - %428 = add i32 %425, %427 - %429 = add i32 %428, 1 - %430 = add i32 %428, 32 - %431 = icmp slt i32 %430, %429 - %432 = add i32 %14, 1 - %433 = mul i32 %10, %432 - %434 = add i32 %433, %427 - %435 = add i32 %434, 1 - %436 = add i32 %434, 32 - %437 = icmp slt i32 %436, %435 - %438 = or i1 %431, %437 - br i1 %438, label %pregion_for_entry.entry.i.i.us.4.preheader, label %vector.memcheck210 - -pregion_for_entry.entry.i.i.us.4.preheader: ; preds = %vector.memcheck210, %vector.scevcheck188 - br label %pregion_for_entry.entry.i.i.us.4 - -vector.memcheck210: ; preds = %vector.scevcheck188 - %439 = mul i32 %14, %conv7.i.i.4 - %440 = add i32 %10, %439 - %441 = trunc i64 %2 to i32 - %442 = shl i32 %441, 5 - %443 = add i32 %440, %442 - %444 = add i32 %443, 1 - %445 = sext i32 %444 to i64 - %scevgep190 = getelementptr float, float* %6, i64 %445 - %scevgep190191 = bitcast float* %scevgep190 to i8* - %446 = add nsw i64 %445, 32 - %scevgep192 = getelementptr float, float* %6, i64 %446 - %scevgep194 = getelementptr float, float* %6, i64 %idxprom14.i.i.4 - %scevgep194195 = bitcast float* %scevgep194 to i8* - %uglygep196 = getelementptr i8, i8* %scevgep194195, i64 1 - %447 = add i32 %14, 1 - %448 = mul i32 %10, %447 - %449 = add i32 %448, %442 - %450 = add i32 %449, 1 - %451 = sext i32 %450 to i64 - %scevgep197 = getelementptr float, float* %6, i64 %451 - %452 = add nsw i64 %451, 32 - %scevgep199 = getelementptr float, float* %6, i64 %452 - %bound0202 = icmp ugt i8* %uglygep196, %scevgep190191 - %bound1203 = icmp ult float* %arrayidx15.i.i.4, %scevgep192 - %found.conflict204 = and i1 %bound0202, %bound1203 - %bound0205 = icmp ult float* %scevgep190, %scevgep199 - %bound1206 = icmp ult float* %scevgep197, %scevgep192 - %found.conflict207 = and i1 %bound0205, %bound1206 - %conflict.rdx208 = or i1 %found.conflict204, %found.conflict207 - br i1 %conflict.rdx208, label %pregion_for_entry.entry.i.i.us.4.preheader, label %vector.ph211 - -vector.ph211: ; preds = %vector.memcheck210 - %broadcast.splatinsert218 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat219 = shufflevector <8 x i64> %broadcast.splatinsert218, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert220 = insertelement <8 x i32> undef, i32 %add.i.i, i32 0 - %broadcast.splat221 = shufflevector <8 x i32> %broadcast.splatinsert220, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert222 = insertelement <8 x i32> undef, i32 %14, i32 0 - %broadcast.splat223 = shufflevector <8 x i32> %broadcast.splatinsert222, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert225 = insertelement <8 x float*> undef, float* %arrayidx15.i.i.4, i32 0 - %broadcast.splat226 = shufflevector <8 x float*> %broadcast.splatinsert225, <8 x float*> undef, <8 x i32> zeroinitializer - %453 = trunc <8 x i64> %broadcast.splat219 to <8 x i32> - %454 = or <8 x i32> %453, - %455 = add <8 x i32> %broadcast.splat221, %454 - %456 = icmp slt <8 x i32> %455, %broadcast.splat223 - %457 = extractelement <8 x i32> %455, i32 0 - %458 = add nsw i32 %457, %mul.i.i.4 - %459 = sext i32 %458 to i64 - %460 = getelementptr inbounds float, float* %6, i64 %459 - %461 = bitcast float* %460 to <8 x float>* - %wide.masked.load224 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %461, i32 4, <8 x i1> %456, <8 x float> undef), !tbaa !12, !alias.scope !197, !noalias !200 - %wide.masked.gather227 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat226, i32 4, <8 x i1> %456, <8 x float> undef), !tbaa !12, !alias.scope !203 - %462 = add nsw i32 %457, %mul16.i.i - %463 = sext i32 %462 to i64 - %464 = getelementptr inbounds float, float* %6, i64 %463 - %465 = bitcast float* %464 to <8 x float>* - %wide.masked.load228 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %465, i32 4, <8 x i1> %456, <8 x float> undef), !tbaa !12, !alias.scope !204 - %466 = fneg <8 x float> %wide.masked.gather227 - %467 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %466, <8 x float> %wide.masked.load228, <8 x float> %wide.masked.load224) - %468 = bitcast float* %460 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %467, <8 x float>* %468, i32 4, <8 x i1> %456), !tbaa !12, !alias.scope !197, !noalias !200, !llvm.access.group !24 - %469 = trunc <8 x i64> %broadcast.splat219 to <8 x i32> - %470 = or <8 x i32> %469, - %471 = add <8 x i32> %broadcast.splat221, %470 - %472 = icmp slt <8 x i32> %471, %broadcast.splat223 - %473 = extractelement <8 x i32> %471, i32 0 - %474 = add nsw i32 %473, %mul.i.i.4 - %475 = sext i32 %474 to i64 - %476 = getelementptr inbounds float, float* %6, i64 %475 - %477 = bitcast float* %476 to <8 x float>* - %wide.masked.load224.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %477, i32 4, <8 x i1> %472, <8 x float> undef), !tbaa !12, !alias.scope !197, !noalias !200 - %wide.masked.gather227.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat226, i32 4, <8 x i1> %472, <8 x float> undef), !tbaa !12, !alias.scope !203 - %478 = add nsw i32 %473, %mul16.i.i - %479 = sext i32 %478 to i64 - %480 = getelementptr inbounds float, float* %6, i64 %479 - %481 = bitcast float* %480 to <8 x float>* - %wide.masked.load228.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %481, i32 4, <8 x i1> %472, <8 x float> undef), !tbaa !12, !alias.scope !204 - %482 = fneg <8 x float> %wide.masked.gather227.1 - %483 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %482, <8 x float> %wide.masked.load228.1, <8 x float> %wide.masked.load224.1) - %484 = bitcast float* %476 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %483, <8 x float>* %484, i32 4, <8 x i1> %472), !tbaa !12, !alias.scope !197, !noalias !200, !llvm.access.group !24 - %485 = trunc <8 x i64> %broadcast.splat219 to <8 x i32> - %486 = or <8 x i32> %485, - %487 = add <8 x i32> %broadcast.splat221, %486 - %488 = icmp slt <8 x i32> %487, %broadcast.splat223 - %489 = extractelement <8 x i32> %487, i32 0 - %490 = add nsw i32 %489, %mul.i.i.4 - %491 = sext i32 %490 to i64 - %492 = getelementptr inbounds float, float* %6, i64 %491 - %493 = bitcast float* %492 to <8 x float>* - %wide.masked.load224.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %493, i32 4, <8 x i1> %488, <8 x float> undef), !tbaa !12, !alias.scope !197, !noalias !200 - %wide.masked.gather227.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat226, i32 4, <8 x i1> %488, <8 x float> undef), !tbaa !12, !alias.scope !203 - %494 = add nsw i32 %489, %mul16.i.i - %495 = sext i32 %494 to i64 - %496 = getelementptr inbounds float, float* %6, i64 %495 - %497 = bitcast float* %496 to <8 x float>* - %wide.masked.load228.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %497, i32 4, <8 x i1> %488, <8 x float> undef), !tbaa !12, !alias.scope !204 - %498 = fneg <8 x float> %wide.masked.gather227.2 - %499 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %498, <8 x float> %wide.masked.load228.2, <8 x float> %wide.masked.load224.2) - %500 = bitcast float* %492 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %499, <8 x float>* %500, i32 4, <8 x i1> %488), !tbaa !12, !alias.scope !197, !noalias !200, !llvm.access.group !24 - %501 = trunc <8 x i64> %broadcast.splat219 to <8 x i32> - %502 = or <8 x i32> %501, - %503 = add <8 x i32> %broadcast.splat221, %502 - %504 = icmp slt <8 x i32> %503, %broadcast.splat223 - %505 = extractelement <8 x i32> %503, i32 0 - %506 = add nsw i32 %505, %mul.i.i.4 - %507 = sext i32 %506 to i64 - %508 = getelementptr inbounds float, float* %6, i64 %507 - %509 = bitcast float* %508 to <8 x float>* - %wide.masked.load224.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %509, i32 4, <8 x i1> %504, <8 x float> undef), !tbaa !12, !alias.scope !197, !noalias !200 - %wide.masked.gather227.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat226, i32 4, <8 x i1> %504, <8 x float> undef), !tbaa !12, !alias.scope !203 - %510 = add nsw i32 %505, %mul16.i.i - %511 = sext i32 %510 to i64 - %512 = getelementptr inbounds float, float* %6, i64 %511 - %513 = bitcast float* %512 to <8 x float>* - %wide.masked.load228.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %513, i32 4, <8 x i1> %504, <8 x float> undef), !tbaa !12, !alias.scope !204 - %514 = fneg <8 x float> %wide.masked.gather227.3 - %515 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %514, <8 x float> %wide.masked.load228.3, <8 x float> %wide.masked.load224.3) - %516 = bitcast float* %508 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %515, <8 x float>* %516, i32 4, <8 x i1> %504), !tbaa !12, !alias.scope !197, !noalias !200, !llvm.access.group !24 - br label %pregion_for_end.i.i.4 - -pregion_for_entry.entry.i.i.us.4: ; preds = %if.end.r_exit.i.i.us.4.1, %pregion_for_entry.entry.i.i.us.4.preheader - %_local_id_x.i.0.us.4 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.4.preheader ], [ %849, %if.end.r_exit.i.i.us.4.1 ] - %add1.i.i.i.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.4, %mul.i.i.i - %517 = trunc i64 %add1.i.i.i.us.4 to i32 - %conv2.i.i.us.4 = add i32 %add.i.i, %517 - %cmp9.i.i.us.4 = icmp slt i32 %conv2.i.i.us.4, %14 - br i1 %cmp9.i.i.us.4, label %if.then.i.i.us.4, label %if.end.r_exit.i.i.us.4 - -if.then.i.i.us.4: ; preds = %pregion_for_entry.entry.i.i.us.4 - %add11.i.i.us.4 = add nsw i32 %conv2.i.i.us.4, %mul.i.i.4 - %idxprom.i.i.us.4 = sext i32 %add11.i.i.us.4 to i64 - %arrayidx.i.i.us.4 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.4 - %518 = load float, float* %arrayidx.i.i.us.4, align 4, !tbaa !12 - %519 = load float, float* %arrayidx15.i.i.4, align 4, !tbaa !12 - %add17.i.i.us.4 = add nsw i32 %conv2.i.i.us.4, %mul16.i.i - %idxprom18.i.i.us.4 = sext i32 %add17.i.i.us.4 to i64 - %arrayidx19.i.i.us.4 = getelementptr inbounds float, float* %6, i64 %idxprom18.i.i.us.4 - %520 = load float, float* %arrayidx19.i.i.us.4, align 4, !tbaa !12 - %neg.i.i.us.4 = fneg float %519 - %521 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.4, float %520, float %518) #6 - store float %521, float* %arrayidx.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.4 - -if.end.r_exit.i.i.us.4: ; preds = %if.then.i.i.us.4, %pregion_for_entry.entry.i.i.us.4 - %522 = or i64 %_local_id_x.i.0.us.4, 1 - %add1.i.i.i.us.4.1 = add nuw nsw i64 %522, %mul.i.i.i - %523 = trunc i64 %add1.i.i.i.us.4.1 to i32 - %conv2.i.i.us.4.1 = add i32 %add.i.i, %523 - %cmp9.i.i.us.4.1 = icmp slt i32 %conv2.i.i.us.4.1, %14 - br i1 %cmp9.i.i.us.4.1, label %if.then.i.i.us.4.1, label %if.end.r_exit.i.i.us.4.1 - -pregion_for_end.i.i.4.loopexit: ; preds = %if.end.r_exit.i.i.us.4.1 - br label %pregion_for_end.i.i.4 - -pregion_for_end.i.i.4: ; preds = %pregion_for_end.i.i.4.loopexit, %vector.ph211, %pregion_for_end.i.i.3 - %524 = trunc i64 %mul3.i.i.i to i32 - %525 = or i32 %524, 5 - %conv7.i.i.5 = add i32 %add.i.i, %525 - %cmp.i.i.5 = icmp slt i32 %conv7.i.i.5, %14 - %mul.i.i.5 = mul nsw i32 %conv7.i.i.5, %14 - %add13.i.i.5 = add nsw i32 %mul.i.i.5, %10 - %idxprom14.i.i.5 = sext i32 %add13.i.i.5 to i64 - %arrayidx15.i.i.5 = getelementptr inbounds float, float* %6, i64 %idxprom14.i.i.5 - br i1 %cmp.i.i.5, label %vector.scevcheck239, label %pregion_for_end.i.i.5 - -vector.scevcheck239: ; preds = %pregion_for_end.i.i.4 - %526 = mul i32 %14, %conv7.i.i.5 - %527 = add i32 %10, %526 - %528 = trunc i64 %2 to i32 - %529 = shl i32 %528, 5 - %530 = add i32 %527, %529 - %531 = add i32 %530, 1 - %532 = add i32 %530, 32 - %533 = icmp slt i32 %532, %531 - %534 = add i32 %14, 1 - %535 = mul i32 %10, %534 - %536 = add i32 %535, %529 - %537 = add i32 %536, 1 - %538 = add i32 %536, 32 - %539 = icmp slt i32 %538, %537 - %540 = or i1 %533, %539 - br i1 %540, label %pregion_for_entry.entry.i.i.us.5.preheader, label %vector.memcheck261 - -pregion_for_entry.entry.i.i.us.5.preheader: ; preds = %vector.memcheck261, %vector.scevcheck239 - br label %pregion_for_entry.entry.i.i.us.5 - -vector.memcheck261: ; preds = %vector.scevcheck239 - %541 = mul i32 %14, %conv7.i.i.5 - %542 = add i32 %10, %541 - %543 = trunc i64 %2 to i32 - %544 = shl i32 %543, 5 - %545 = add i32 %542, %544 - %546 = add i32 %545, 1 - %547 = sext i32 %546 to i64 - %scevgep241 = getelementptr float, float* %6, i64 %547 - %scevgep241242 = bitcast float* %scevgep241 to i8* - %548 = add nsw i64 %547, 32 - %scevgep243 = getelementptr float, float* %6, i64 %548 - %scevgep245 = getelementptr float, float* %6, i64 %idxprom14.i.i.5 - %scevgep245246 = bitcast float* %scevgep245 to i8* - %uglygep247 = getelementptr i8, i8* %scevgep245246, i64 1 - %549 = add i32 %14, 1 - %550 = mul i32 %10, %549 - %551 = add i32 %550, %544 - %552 = add i32 %551, 1 - %553 = sext i32 %552 to i64 - %scevgep248 = getelementptr float, float* %6, i64 %553 - %554 = add nsw i64 %553, 32 - %scevgep250 = getelementptr float, float* %6, i64 %554 - %bound0253 = icmp ugt i8* %uglygep247, %scevgep241242 - %bound1254 = icmp ult float* %arrayidx15.i.i.5, %scevgep243 - %found.conflict255 = and i1 %bound0253, %bound1254 - %bound0256 = icmp ult float* %scevgep241, %scevgep250 - %bound1257 = icmp ult float* %scevgep248, %scevgep243 - %found.conflict258 = and i1 %bound0256, %bound1257 - %conflict.rdx259 = or i1 %found.conflict255, %found.conflict258 - br i1 %conflict.rdx259, label %pregion_for_entry.entry.i.i.us.5.preheader, label %vector.ph262 - -vector.ph262: ; preds = %vector.memcheck261 - %broadcast.splatinsert269 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat270 = shufflevector <8 x i64> %broadcast.splatinsert269, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert271 = insertelement <8 x i32> undef, i32 %add.i.i, i32 0 - %broadcast.splat272 = shufflevector <8 x i32> %broadcast.splatinsert271, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert273 = insertelement <8 x i32> undef, i32 %14, i32 0 - %broadcast.splat274 = shufflevector <8 x i32> %broadcast.splatinsert273, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert276 = insertelement <8 x float*> undef, float* %arrayidx15.i.i.5, i32 0 - %broadcast.splat277 = shufflevector <8 x float*> %broadcast.splatinsert276, <8 x float*> undef, <8 x i32> zeroinitializer - %555 = trunc <8 x i64> %broadcast.splat270 to <8 x i32> - %556 = or <8 x i32> %555, - %557 = add <8 x i32> %broadcast.splat272, %556 - %558 = icmp slt <8 x i32> %557, %broadcast.splat274 - %559 = extractelement <8 x i32> %557, i32 0 - %560 = add nsw i32 %559, %mul.i.i.5 - %561 = sext i32 %560 to i64 - %562 = getelementptr inbounds float, float* %6, i64 %561 - %563 = bitcast float* %562 to <8 x float>* - %wide.masked.load275 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %563, i32 4, <8 x i1> %558, <8 x float> undef), !tbaa !12, !alias.scope !205, !noalias !208 - %wide.masked.gather278 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat277, i32 4, <8 x i1> %558, <8 x float> undef), !tbaa !12, !alias.scope !211 - %564 = add nsw i32 %559, %mul16.i.i - %565 = sext i32 %564 to i64 - %566 = getelementptr inbounds float, float* %6, i64 %565 - %567 = bitcast float* %566 to <8 x float>* - %wide.masked.load279 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %567, i32 4, <8 x i1> %558, <8 x float> undef), !tbaa !12, !alias.scope !212 - %568 = fneg <8 x float> %wide.masked.gather278 - %569 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %568, <8 x float> %wide.masked.load279, <8 x float> %wide.masked.load275) - %570 = bitcast float* %562 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %569, <8 x float>* %570, i32 4, <8 x i1> %558), !tbaa !12, !alias.scope !205, !noalias !208, !llvm.access.group !24 - %571 = trunc <8 x i64> %broadcast.splat270 to <8 x i32> - %572 = or <8 x i32> %571, - %573 = add <8 x i32> %broadcast.splat272, %572 - %574 = icmp slt <8 x i32> %573, %broadcast.splat274 - %575 = extractelement <8 x i32> %573, i32 0 - %576 = add nsw i32 %575, %mul.i.i.5 - %577 = sext i32 %576 to i64 - %578 = getelementptr inbounds float, float* %6, i64 %577 - %579 = bitcast float* %578 to <8 x float>* - %wide.masked.load275.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %579, i32 4, <8 x i1> %574, <8 x float> undef), !tbaa !12, !alias.scope !205, !noalias !208 - %wide.masked.gather278.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat277, i32 4, <8 x i1> %574, <8 x float> undef), !tbaa !12, !alias.scope !211 - %580 = add nsw i32 %575, %mul16.i.i - %581 = sext i32 %580 to i64 - %582 = getelementptr inbounds float, float* %6, i64 %581 - %583 = bitcast float* %582 to <8 x float>* - %wide.masked.load279.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %583, i32 4, <8 x i1> %574, <8 x float> undef), !tbaa !12, !alias.scope !212 - %584 = fneg <8 x float> %wide.masked.gather278.1 - %585 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %584, <8 x float> %wide.masked.load279.1, <8 x float> %wide.masked.load275.1) - %586 = bitcast float* %578 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %585, <8 x float>* %586, i32 4, <8 x i1> %574), !tbaa !12, !alias.scope !205, !noalias !208, !llvm.access.group !24 - %587 = trunc <8 x i64> %broadcast.splat270 to <8 x i32> - %588 = or <8 x i32> %587, - %589 = add <8 x i32> %broadcast.splat272, %588 - %590 = icmp slt <8 x i32> %589, %broadcast.splat274 - %591 = extractelement <8 x i32> %589, i32 0 - %592 = add nsw i32 %591, %mul.i.i.5 - %593 = sext i32 %592 to i64 - %594 = getelementptr inbounds float, float* %6, i64 %593 - %595 = bitcast float* %594 to <8 x float>* - %wide.masked.load275.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %595, i32 4, <8 x i1> %590, <8 x float> undef), !tbaa !12, !alias.scope !205, !noalias !208 - %wide.masked.gather278.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat277, i32 4, <8 x i1> %590, <8 x float> undef), !tbaa !12, !alias.scope !211 - %596 = add nsw i32 %591, %mul16.i.i - %597 = sext i32 %596 to i64 - %598 = getelementptr inbounds float, float* %6, i64 %597 - %599 = bitcast float* %598 to <8 x float>* - %wide.masked.load279.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %599, i32 4, <8 x i1> %590, <8 x float> undef), !tbaa !12, !alias.scope !212 - %600 = fneg <8 x float> %wide.masked.gather278.2 - %601 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %600, <8 x float> %wide.masked.load279.2, <8 x float> %wide.masked.load275.2) - %602 = bitcast float* %594 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %601, <8 x float>* %602, i32 4, <8 x i1> %590), !tbaa !12, !alias.scope !205, !noalias !208, !llvm.access.group !24 - %603 = trunc <8 x i64> %broadcast.splat270 to <8 x i32> - %604 = or <8 x i32> %603, - %605 = add <8 x i32> %broadcast.splat272, %604 - %606 = icmp slt <8 x i32> %605, %broadcast.splat274 - %607 = extractelement <8 x i32> %605, i32 0 - %608 = add nsw i32 %607, %mul.i.i.5 - %609 = sext i32 %608 to i64 - %610 = getelementptr inbounds float, float* %6, i64 %609 - %611 = bitcast float* %610 to <8 x float>* - %wide.masked.load275.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %611, i32 4, <8 x i1> %606, <8 x float> undef), !tbaa !12, !alias.scope !205, !noalias !208 - %wide.masked.gather278.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat277, i32 4, <8 x i1> %606, <8 x float> undef), !tbaa !12, !alias.scope !211 - %612 = add nsw i32 %607, %mul16.i.i - %613 = sext i32 %612 to i64 - %614 = getelementptr inbounds float, float* %6, i64 %613 - %615 = bitcast float* %614 to <8 x float>* - %wide.masked.load279.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %615, i32 4, <8 x i1> %606, <8 x float> undef), !tbaa !12, !alias.scope !212 - %616 = fneg <8 x float> %wide.masked.gather278.3 - %617 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %616, <8 x float> %wide.masked.load279.3, <8 x float> %wide.masked.load275.3) - %618 = bitcast float* %610 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %617, <8 x float>* %618, i32 4, <8 x i1> %606), !tbaa !12, !alias.scope !205, !noalias !208, !llvm.access.group !24 - br label %pregion_for_end.i.i.5 - -pregion_for_entry.entry.i.i.us.5: ; preds = %if.end.r_exit.i.i.us.5.1, %pregion_for_entry.entry.i.i.us.5.preheader - %_local_id_x.i.0.us.5 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.5.preheader ], [ %844, %if.end.r_exit.i.i.us.5.1 ] - %add1.i.i.i.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.5, %mul.i.i.i - %619 = trunc i64 %add1.i.i.i.us.5 to i32 - %conv2.i.i.us.5 = add i32 %add.i.i, %619 - %cmp9.i.i.us.5 = icmp slt i32 %conv2.i.i.us.5, %14 - br i1 %cmp9.i.i.us.5, label %if.then.i.i.us.5, label %if.end.r_exit.i.i.us.5 - -if.then.i.i.us.5: ; preds = %pregion_for_entry.entry.i.i.us.5 - %add11.i.i.us.5 = add nsw i32 %conv2.i.i.us.5, %mul.i.i.5 - %idxprom.i.i.us.5 = sext i32 %add11.i.i.us.5 to i64 - %arrayidx.i.i.us.5 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.5 - %620 = load float, float* %arrayidx.i.i.us.5, align 4, !tbaa !12 - %621 = load float, float* %arrayidx15.i.i.5, align 4, !tbaa !12 - %add17.i.i.us.5 = add nsw i32 %conv2.i.i.us.5, %mul16.i.i - %idxprom18.i.i.us.5 = sext i32 %add17.i.i.us.5 to i64 - %arrayidx19.i.i.us.5 = getelementptr inbounds float, float* %6, i64 %idxprom18.i.i.us.5 - %622 = load float, float* %arrayidx19.i.i.us.5, align 4, !tbaa !12 - %neg.i.i.us.5 = fneg float %621 - %623 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.5, float %622, float %620) #6 - store float %623, float* %arrayidx.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.5 - -if.end.r_exit.i.i.us.5: ; preds = %if.then.i.i.us.5, %pregion_for_entry.entry.i.i.us.5 - %624 = or i64 %_local_id_x.i.0.us.5, 1 - %add1.i.i.i.us.5.1 = add nuw nsw i64 %624, %mul.i.i.i - %625 = trunc i64 %add1.i.i.i.us.5.1 to i32 - %conv2.i.i.us.5.1 = add i32 %add.i.i, %625 - %cmp9.i.i.us.5.1 = icmp slt i32 %conv2.i.i.us.5.1, %14 - br i1 %cmp9.i.i.us.5.1, label %if.then.i.i.us.5.1, label %if.end.r_exit.i.i.us.5.1 - -pregion_for_end.i.i.5.loopexit: ; preds = %if.end.r_exit.i.i.us.5.1 - br label %pregion_for_end.i.i.5 - -pregion_for_end.i.i.5: ; preds = %pregion_for_end.i.i.5.loopexit, %vector.ph262, %pregion_for_end.i.i.4 - %626 = trunc i64 %mul3.i.i.i to i32 - %627 = or i32 %626, 6 - %conv7.i.i.6 = add i32 %add.i.i, %627 - %cmp.i.i.6 = icmp slt i32 %conv7.i.i.6, %14 - %mul.i.i.6 = mul nsw i32 %conv7.i.i.6, %14 - %add13.i.i.6 = add nsw i32 %mul.i.i.6, %10 - %idxprom14.i.i.6 = sext i32 %add13.i.i.6 to i64 - %arrayidx15.i.i.6 = getelementptr inbounds float, float* %6, i64 %idxprom14.i.i.6 - br i1 %cmp.i.i.6, label %vector.scevcheck290, label %pregion_for_end.i.i.6 - -vector.scevcheck290: ; preds = %pregion_for_end.i.i.5 - %628 = mul i32 %14, %conv7.i.i.6 - %629 = add i32 %10, %628 - %630 = trunc i64 %2 to i32 - %631 = shl i32 %630, 5 - %632 = add i32 %629, %631 - %633 = add i32 %632, 1 - %634 = add i32 %632, 32 - %635 = icmp slt i32 %634, %633 - %636 = add i32 %14, 1 - %637 = mul i32 %10, %636 - %638 = add i32 %637, %631 - %639 = add i32 %638, 1 - %640 = add i32 %638, 32 - %641 = icmp slt i32 %640, %639 - %642 = or i1 %635, %641 - br i1 %642, label %pregion_for_entry.entry.i.i.us.6.preheader, label %vector.memcheck312 - -pregion_for_entry.entry.i.i.us.6.preheader: ; preds = %vector.memcheck312, %vector.scevcheck290 - br label %pregion_for_entry.entry.i.i.us.6 - -vector.memcheck312: ; preds = %vector.scevcheck290 - %643 = mul i32 %14, %conv7.i.i.6 - %644 = add i32 %10, %643 - %645 = trunc i64 %2 to i32 - %646 = shl i32 %645, 5 - %647 = add i32 %644, %646 - %648 = add i32 %647, 1 - %649 = sext i32 %648 to i64 - %scevgep292 = getelementptr float, float* %6, i64 %649 - %scevgep292293 = bitcast float* %scevgep292 to i8* - %650 = add nsw i64 %649, 32 - %scevgep294 = getelementptr float, float* %6, i64 %650 - %scevgep296 = getelementptr float, float* %6, i64 %idxprom14.i.i.6 - %scevgep296297 = bitcast float* %scevgep296 to i8* - %uglygep298 = getelementptr i8, i8* %scevgep296297, i64 1 - %651 = add i32 %14, 1 - %652 = mul i32 %10, %651 - %653 = add i32 %652, %646 - %654 = add i32 %653, 1 - %655 = sext i32 %654 to i64 - %scevgep299 = getelementptr float, float* %6, i64 %655 - %656 = add nsw i64 %655, 32 - %scevgep301 = getelementptr float, float* %6, i64 %656 - %bound0304 = icmp ugt i8* %uglygep298, %scevgep292293 - %bound1305 = icmp ult float* %arrayidx15.i.i.6, %scevgep294 - %found.conflict306 = and i1 %bound0304, %bound1305 - %bound0307 = icmp ult float* %scevgep292, %scevgep301 - %bound1308 = icmp ult float* %scevgep299, %scevgep294 - %found.conflict309 = and i1 %bound0307, %bound1308 - %conflict.rdx310 = or i1 %found.conflict306, %found.conflict309 - br i1 %conflict.rdx310, label %pregion_for_entry.entry.i.i.us.6.preheader, label %vector.ph313 - -vector.ph313: ; preds = %vector.memcheck312 - %broadcast.splatinsert320 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat321 = shufflevector <8 x i64> %broadcast.splatinsert320, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert322 = insertelement <8 x i32> undef, i32 %add.i.i, i32 0 - %broadcast.splat323 = shufflevector <8 x i32> %broadcast.splatinsert322, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert324 = insertelement <8 x i32> undef, i32 %14, i32 0 - %broadcast.splat325 = shufflevector <8 x i32> %broadcast.splatinsert324, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert327 = insertelement <8 x float*> undef, float* %arrayidx15.i.i.6, i32 0 - %broadcast.splat328 = shufflevector <8 x float*> %broadcast.splatinsert327, <8 x float*> undef, <8 x i32> zeroinitializer - %657 = trunc <8 x i64> %broadcast.splat321 to <8 x i32> - %658 = or <8 x i32> %657, - %659 = add <8 x i32> %broadcast.splat323, %658 - %660 = icmp slt <8 x i32> %659, %broadcast.splat325 - %661 = extractelement <8 x i32> %659, i32 0 - %662 = add nsw i32 %661, %mul.i.i.6 - %663 = sext i32 %662 to i64 - %664 = getelementptr inbounds float, float* %6, i64 %663 - %665 = bitcast float* %664 to <8 x float>* - %wide.masked.load326 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %665, i32 4, <8 x i1> %660, <8 x float> undef), !tbaa !12, !alias.scope !213, !noalias !216 - %wide.masked.gather329 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat328, i32 4, <8 x i1> %660, <8 x float> undef), !tbaa !12, !alias.scope !219 - %666 = add nsw i32 %661, %mul16.i.i - %667 = sext i32 %666 to i64 - %668 = getelementptr inbounds float, float* %6, i64 %667 - %669 = bitcast float* %668 to <8 x float>* - %wide.masked.load330 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %669, i32 4, <8 x i1> %660, <8 x float> undef), !tbaa !12, !alias.scope !220 - %670 = fneg <8 x float> %wide.masked.gather329 - %671 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %670, <8 x float> %wide.masked.load330, <8 x float> %wide.masked.load326) - %672 = bitcast float* %664 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %671, <8 x float>* %672, i32 4, <8 x i1> %660), !tbaa !12, !alias.scope !213, !noalias !216, !llvm.access.group !24 - %673 = trunc <8 x i64> %broadcast.splat321 to <8 x i32> - %674 = or <8 x i32> %673, - %675 = add <8 x i32> %broadcast.splat323, %674 - %676 = icmp slt <8 x i32> %675, %broadcast.splat325 - %677 = extractelement <8 x i32> %675, i32 0 - %678 = add nsw i32 %677, %mul.i.i.6 - %679 = sext i32 %678 to i64 - %680 = getelementptr inbounds float, float* %6, i64 %679 - %681 = bitcast float* %680 to <8 x float>* - %wide.masked.load326.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %681, i32 4, <8 x i1> %676, <8 x float> undef), !tbaa !12, !alias.scope !213, !noalias !216 - %wide.masked.gather329.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat328, i32 4, <8 x i1> %676, <8 x float> undef), !tbaa !12, !alias.scope !219 - %682 = add nsw i32 %677, %mul16.i.i - %683 = sext i32 %682 to i64 - %684 = getelementptr inbounds float, float* %6, i64 %683 - %685 = bitcast float* %684 to <8 x float>* - %wide.masked.load330.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %685, i32 4, <8 x i1> %676, <8 x float> undef), !tbaa !12, !alias.scope !220 - %686 = fneg <8 x float> %wide.masked.gather329.1 - %687 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %686, <8 x float> %wide.masked.load330.1, <8 x float> %wide.masked.load326.1) - %688 = bitcast float* %680 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %687, <8 x float>* %688, i32 4, <8 x i1> %676), !tbaa !12, !alias.scope !213, !noalias !216, !llvm.access.group !24 - %689 = trunc <8 x i64> %broadcast.splat321 to <8 x i32> - %690 = or <8 x i32> %689, - %691 = add <8 x i32> %broadcast.splat323, %690 - %692 = icmp slt <8 x i32> %691, %broadcast.splat325 - %693 = extractelement <8 x i32> %691, i32 0 - %694 = add nsw i32 %693, %mul.i.i.6 - %695 = sext i32 %694 to i64 - %696 = getelementptr inbounds float, float* %6, i64 %695 - %697 = bitcast float* %696 to <8 x float>* - %wide.masked.load326.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %697, i32 4, <8 x i1> %692, <8 x float> undef), !tbaa !12, !alias.scope !213, !noalias !216 - %wide.masked.gather329.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat328, i32 4, <8 x i1> %692, <8 x float> undef), !tbaa !12, !alias.scope !219 - %698 = add nsw i32 %693, %mul16.i.i - %699 = sext i32 %698 to i64 - %700 = getelementptr inbounds float, float* %6, i64 %699 - %701 = bitcast float* %700 to <8 x float>* - %wide.masked.load330.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %701, i32 4, <8 x i1> %692, <8 x float> undef), !tbaa !12, !alias.scope !220 - %702 = fneg <8 x float> %wide.masked.gather329.2 - %703 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %702, <8 x float> %wide.masked.load330.2, <8 x float> %wide.masked.load326.2) - %704 = bitcast float* %696 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %703, <8 x float>* %704, i32 4, <8 x i1> %692), !tbaa !12, !alias.scope !213, !noalias !216, !llvm.access.group !24 - %705 = trunc <8 x i64> %broadcast.splat321 to <8 x i32> - %706 = or <8 x i32> %705, - %707 = add <8 x i32> %broadcast.splat323, %706 - %708 = icmp slt <8 x i32> %707, %broadcast.splat325 - %709 = extractelement <8 x i32> %707, i32 0 - %710 = add nsw i32 %709, %mul.i.i.6 - %711 = sext i32 %710 to i64 - %712 = getelementptr inbounds float, float* %6, i64 %711 - %713 = bitcast float* %712 to <8 x float>* - %wide.masked.load326.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %713, i32 4, <8 x i1> %708, <8 x float> undef), !tbaa !12, !alias.scope !213, !noalias !216 - %wide.masked.gather329.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat328, i32 4, <8 x i1> %708, <8 x float> undef), !tbaa !12, !alias.scope !219 - %714 = add nsw i32 %709, %mul16.i.i - %715 = sext i32 %714 to i64 - %716 = getelementptr inbounds float, float* %6, i64 %715 - %717 = bitcast float* %716 to <8 x float>* - %wide.masked.load330.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %717, i32 4, <8 x i1> %708, <8 x float> undef), !tbaa !12, !alias.scope !220 - %718 = fneg <8 x float> %wide.masked.gather329.3 - %719 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %718, <8 x float> %wide.masked.load330.3, <8 x float> %wide.masked.load326.3) - %720 = bitcast float* %712 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %719, <8 x float>* %720, i32 4, <8 x i1> %708), !tbaa !12, !alias.scope !213, !noalias !216, !llvm.access.group !24 - br label %pregion_for_end.i.i.6 - -pregion_for_entry.entry.i.i.us.6: ; preds = %if.end.r_exit.i.i.us.6.1, %pregion_for_entry.entry.i.i.us.6.preheader - %_local_id_x.i.0.us.6 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.6.preheader ], [ %839, %if.end.r_exit.i.i.us.6.1 ] - %add1.i.i.i.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.6, %mul.i.i.i - %721 = trunc i64 %add1.i.i.i.us.6 to i32 - %conv2.i.i.us.6 = add i32 %add.i.i, %721 - %cmp9.i.i.us.6 = icmp slt i32 %conv2.i.i.us.6, %14 - br i1 %cmp9.i.i.us.6, label %if.then.i.i.us.6, label %if.end.r_exit.i.i.us.6 - -if.then.i.i.us.6: ; preds = %pregion_for_entry.entry.i.i.us.6 - %add11.i.i.us.6 = add nsw i32 %conv2.i.i.us.6, %mul.i.i.6 - %idxprom.i.i.us.6 = sext i32 %add11.i.i.us.6 to i64 - %arrayidx.i.i.us.6 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.6 - %722 = load float, float* %arrayidx.i.i.us.6, align 4, !tbaa !12 - %723 = load float, float* %arrayidx15.i.i.6, align 4, !tbaa !12 - %add17.i.i.us.6 = add nsw i32 %conv2.i.i.us.6, %mul16.i.i - %idxprom18.i.i.us.6 = sext i32 %add17.i.i.us.6 to i64 - %arrayidx19.i.i.us.6 = getelementptr inbounds float, float* %6, i64 %idxprom18.i.i.us.6 - %724 = load float, float* %arrayidx19.i.i.us.6, align 4, !tbaa !12 - %neg.i.i.us.6 = fneg float %723 - %725 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.6, float %724, float %722) #6 - store float %725, float* %arrayidx.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.6 - -if.end.r_exit.i.i.us.6: ; preds = %if.then.i.i.us.6, %pregion_for_entry.entry.i.i.us.6 - %726 = or i64 %_local_id_x.i.0.us.6, 1 - %add1.i.i.i.us.6.1 = add nuw nsw i64 %726, %mul.i.i.i - %727 = trunc i64 %add1.i.i.i.us.6.1 to i32 - %conv2.i.i.us.6.1 = add i32 %add.i.i, %727 - %cmp9.i.i.us.6.1 = icmp slt i32 %conv2.i.i.us.6.1, %14 - br i1 %cmp9.i.i.us.6.1, label %if.then.i.i.us.6.1, label %if.end.r_exit.i.i.us.6.1 - -pregion_for_end.i.i.6.loopexit: ; preds = %if.end.r_exit.i.i.us.6.1 - br label %pregion_for_end.i.i.6 - -pregion_for_end.i.i.6: ; preds = %pregion_for_end.i.i.6.loopexit, %vector.ph313, %pregion_for_end.i.i.5 - %728 = trunc i64 %mul3.i.i.i to i32 - %729 = or i32 %728, 7 - %conv7.i.i.7 = add i32 %add.i.i, %729 - %cmp.i.i.7 = icmp slt i32 %conv7.i.i.7, %14 - %mul.i.i.7 = mul nsw i32 %conv7.i.i.7, %14 - %add13.i.i.7 = add nsw i32 %mul.i.i.7, %10 - %idxprom14.i.i.7 = sext i32 %add13.i.i.7 to i64 - %arrayidx15.i.i.7 = getelementptr inbounds float, float* %6, i64 %idxprom14.i.i.7 - br i1 %cmp.i.i.7, label %vector.scevcheck341, label %pregion_for_end.i.i.7 - -vector.scevcheck341: ; preds = %pregion_for_end.i.i.6 - %730 = mul i32 %14, %conv7.i.i.7 - %731 = add i32 %10, %730 - %732 = trunc i64 %2 to i32 - %733 = shl i32 %732, 5 - %734 = add i32 %731, %733 - %735 = add i32 %734, 1 - %736 = add i32 %734, 32 - %737 = icmp slt i32 %736, %735 - %738 = add i32 %14, 1 - %739 = mul i32 %10, %738 - %740 = add i32 %739, %733 - %741 = add i32 %740, 1 - %742 = add i32 %740, 32 - %743 = icmp slt i32 %742, %741 - %744 = or i1 %737, %743 - br i1 %744, label %pregion_for_entry.entry.i.i.us.7.preheader, label %vector.memcheck363 - -pregion_for_entry.entry.i.i.us.7.preheader: ; preds = %vector.memcheck363, %vector.scevcheck341 - br label %pregion_for_entry.entry.i.i.us.7 - -vector.memcheck363: ; preds = %vector.scevcheck341 - %745 = mul i32 %14, %conv7.i.i.7 - %746 = add i32 %10, %745 - %747 = trunc i64 %2 to i32 - %748 = shl i32 %747, 5 - %749 = add i32 %746, %748 - %750 = add i32 %749, 1 - %751 = sext i32 %750 to i64 - %scevgep343 = getelementptr float, float* %6, i64 %751 - %scevgep343344 = bitcast float* %scevgep343 to i8* - %752 = add nsw i64 %751, 32 - %scevgep345 = getelementptr float, float* %6, i64 %752 - %scevgep347 = getelementptr float, float* %6, i64 %idxprom14.i.i.7 - %scevgep347348 = bitcast float* %scevgep347 to i8* - %uglygep349 = getelementptr i8, i8* %scevgep347348, i64 1 - %753 = add i32 %14, 1 - %754 = mul i32 %10, %753 - %755 = add i32 %754, %748 - %756 = add i32 %755, 1 - %757 = sext i32 %756 to i64 - %scevgep350 = getelementptr float, float* %6, i64 %757 - %758 = add nsw i64 %757, 32 - %scevgep352 = getelementptr float, float* %6, i64 %758 - %bound0355 = icmp ugt i8* %uglygep349, %scevgep343344 - %bound1356 = icmp ult float* %arrayidx15.i.i.7, %scevgep345 - %found.conflict357 = and i1 %bound0355, %bound1356 - %bound0358 = icmp ult float* %scevgep343, %scevgep352 - %bound1359 = icmp ult float* %scevgep350, %scevgep345 - %found.conflict360 = and i1 %bound0358, %bound1359 - %conflict.rdx361 = or i1 %found.conflict357, %found.conflict360 - br i1 %conflict.rdx361, label %pregion_for_entry.entry.i.i.us.7.preheader, label %vector.ph364 - -vector.ph364: ; preds = %vector.memcheck363 - %broadcast.splatinsert371 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat372 = shufflevector <8 x i64> %broadcast.splatinsert371, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert373 = insertelement <8 x i32> undef, i32 %add.i.i, i32 0 - %broadcast.splat374 = shufflevector <8 x i32> %broadcast.splatinsert373, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert375 = insertelement <8 x i32> undef, i32 %14, i32 0 - %broadcast.splat376 = shufflevector <8 x i32> %broadcast.splatinsert375, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert378 = insertelement <8 x float*> undef, float* %arrayidx15.i.i.7, i32 0 - %broadcast.splat379 = shufflevector <8 x float*> %broadcast.splatinsert378, <8 x float*> undef, <8 x i32> zeroinitializer - %759 = trunc <8 x i64> %broadcast.splat372 to <8 x i32> - %760 = or <8 x i32> %759, - %761 = add <8 x i32> %broadcast.splat374, %760 - %762 = icmp slt <8 x i32> %761, %broadcast.splat376 - %763 = extractelement <8 x i32> %761, i32 0 - %764 = add nsw i32 %763, %mul.i.i.7 - %765 = sext i32 %764 to i64 - %766 = getelementptr inbounds float, float* %6, i64 %765 - %767 = bitcast float* %766 to <8 x float>* - %wide.masked.load377 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %767, i32 4, <8 x i1> %762, <8 x float> undef), !tbaa !12, !alias.scope !221, !noalias !224 - %wide.masked.gather380 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat379, i32 4, <8 x i1> %762, <8 x float> undef), !tbaa !12, !alias.scope !227 - %768 = add nsw i32 %763, %mul16.i.i - %769 = sext i32 %768 to i64 - %770 = getelementptr inbounds float, float* %6, i64 %769 - %771 = bitcast float* %770 to <8 x float>* - %wide.masked.load381 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %771, i32 4, <8 x i1> %762, <8 x float> undef), !tbaa !12, !alias.scope !228 - %772 = fneg <8 x float> %wide.masked.gather380 - %773 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %772, <8 x float> %wide.masked.load381, <8 x float> %wide.masked.load377) - %774 = bitcast float* %766 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %773, <8 x float>* %774, i32 4, <8 x i1> %762), !tbaa !12, !alias.scope !221, !noalias !224, !llvm.access.group !24 - %775 = trunc <8 x i64> %broadcast.splat372 to <8 x i32> - %776 = or <8 x i32> %775, - %777 = add <8 x i32> %broadcast.splat374, %776 - %778 = icmp slt <8 x i32> %777, %broadcast.splat376 - %779 = extractelement <8 x i32> %777, i32 0 - %780 = add nsw i32 %779, %mul.i.i.7 - %781 = sext i32 %780 to i64 - %782 = getelementptr inbounds float, float* %6, i64 %781 - %783 = bitcast float* %782 to <8 x float>* - %wide.masked.load377.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %783, i32 4, <8 x i1> %778, <8 x float> undef), !tbaa !12, !alias.scope !221, !noalias !224 - %wide.masked.gather380.1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat379, i32 4, <8 x i1> %778, <8 x float> undef), !tbaa !12, !alias.scope !227 - %784 = add nsw i32 %779, %mul16.i.i - %785 = sext i32 %784 to i64 - %786 = getelementptr inbounds float, float* %6, i64 %785 - %787 = bitcast float* %786 to <8 x float>* - %wide.masked.load381.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %787, i32 4, <8 x i1> %778, <8 x float> undef), !tbaa !12, !alias.scope !228 - %788 = fneg <8 x float> %wide.masked.gather380.1 - %789 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %788, <8 x float> %wide.masked.load381.1, <8 x float> %wide.masked.load377.1) - %790 = bitcast float* %782 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %789, <8 x float>* %790, i32 4, <8 x i1> %778), !tbaa !12, !alias.scope !221, !noalias !224, !llvm.access.group !24 - %791 = trunc <8 x i64> %broadcast.splat372 to <8 x i32> - %792 = or <8 x i32> %791, - %793 = add <8 x i32> %broadcast.splat374, %792 - %794 = icmp slt <8 x i32> %793, %broadcast.splat376 - %795 = extractelement <8 x i32> %793, i32 0 - %796 = add nsw i32 %795, %mul.i.i.7 - %797 = sext i32 %796 to i64 - %798 = getelementptr inbounds float, float* %6, i64 %797 - %799 = bitcast float* %798 to <8 x float>* - %wide.masked.load377.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %799, i32 4, <8 x i1> %794, <8 x float> undef), !tbaa !12, !alias.scope !221, !noalias !224 - %wide.masked.gather380.2 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat379, i32 4, <8 x i1> %794, <8 x float> undef), !tbaa !12, !alias.scope !227 - %800 = add nsw i32 %795, %mul16.i.i - %801 = sext i32 %800 to i64 - %802 = getelementptr inbounds float, float* %6, i64 %801 - %803 = bitcast float* %802 to <8 x float>* - %wide.masked.load381.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %803, i32 4, <8 x i1> %794, <8 x float> undef), !tbaa !12, !alias.scope !228 - %804 = fneg <8 x float> %wide.masked.gather380.2 - %805 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %804, <8 x float> %wide.masked.load381.2, <8 x float> %wide.masked.load377.2) - %806 = bitcast float* %798 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %805, <8 x float>* %806, i32 4, <8 x i1> %794), !tbaa !12, !alias.scope !221, !noalias !224, !llvm.access.group !24 - %807 = trunc <8 x i64> %broadcast.splat372 to <8 x i32> - %808 = or <8 x i32> %807, - %809 = add <8 x i32> %broadcast.splat374, %808 - %810 = icmp slt <8 x i32> %809, %broadcast.splat376 - %811 = extractelement <8 x i32> %809, i32 0 - %812 = add nsw i32 %811, %mul.i.i.7 - %813 = sext i32 %812 to i64 - %814 = getelementptr inbounds float, float* %6, i64 %813 - %815 = bitcast float* %814 to <8 x float>* - %wide.masked.load377.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %815, i32 4, <8 x i1> %810, <8 x float> undef), !tbaa !12, !alias.scope !221, !noalias !224 - %wide.masked.gather380.3 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %broadcast.splat379, i32 4, <8 x i1> %810, <8 x float> undef), !tbaa !12, !alias.scope !227 - %816 = add nsw i32 %811, %mul16.i.i - %817 = sext i32 %816 to i64 - %818 = getelementptr inbounds float, float* %6, i64 %817 - %819 = bitcast float* %818 to <8 x float>* - %wide.masked.load381.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %819, i32 4, <8 x i1> %810, <8 x float> undef), !tbaa !12, !alias.scope !228 - %820 = fneg <8 x float> %wide.masked.gather380.3 - %821 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %820, <8 x float> %wide.masked.load381.3, <8 x float> %wide.masked.load377.3) - %822 = bitcast float* %814 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %821, <8 x float>* %822, i32 4, <8 x i1> %810), !tbaa !12, !alias.scope !221, !noalias !224, !llvm.access.group !24 - br label %pregion_for_end.i.i.7 - -pregion_for_entry.entry.i.i.us.7: ; preds = %if.end.r_exit.i.i.us.7.1, %pregion_for_entry.entry.i.i.us.7.preheader - %_local_id_x.i.0.us.7 = phi i64 [ 0, %pregion_for_entry.entry.i.i.us.7.preheader ], [ %834, %if.end.r_exit.i.i.us.7.1 ] - %add1.i.i.i.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.7, %mul.i.i.i - %823 = trunc i64 %add1.i.i.i.us.7 to i32 - %conv2.i.i.us.7 = add i32 %add.i.i, %823 - %cmp9.i.i.us.7 = icmp slt i32 %conv2.i.i.us.7, %14 - br i1 %cmp9.i.i.us.7, label %if.then.i.i.us.7, label %if.end.r_exit.i.i.us.7 - -if.then.i.i.us.7: ; preds = %pregion_for_entry.entry.i.i.us.7 - %add11.i.i.us.7 = add nsw i32 %conv2.i.i.us.7, %mul.i.i.7 - %idxprom.i.i.us.7 = sext i32 %add11.i.i.us.7 to i64 - %arrayidx.i.i.us.7 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.7 - %824 = load float, float* %arrayidx.i.i.us.7, align 4, !tbaa !12 - %825 = load float, float* %arrayidx15.i.i.7, align 4, !tbaa !12 - %add17.i.i.us.7 = add nsw i32 %conv2.i.i.us.7, %mul16.i.i - %idxprom18.i.i.us.7 = sext i32 %add17.i.i.us.7 to i64 - %arrayidx19.i.i.us.7 = getelementptr inbounds float, float* %6, i64 %idxprom18.i.i.us.7 - %826 = load float, float* %arrayidx19.i.i.us.7, align 4, !tbaa !12 - %neg.i.i.us.7 = fneg float %825 - %827 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.7, float %826, float %824) #6 - store float %827, float* %arrayidx.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.7 - -if.end.r_exit.i.i.us.7: ; preds = %if.then.i.i.us.7, %pregion_for_entry.entry.i.i.us.7 - %828 = or i64 %_local_id_x.i.0.us.7, 1 - %add1.i.i.i.us.7.1 = add nuw nsw i64 %828, %mul.i.i.i - %829 = trunc i64 %add1.i.i.i.us.7.1 to i32 - %conv2.i.i.us.7.1 = add i32 %add.i.i, %829 - %cmp9.i.i.us.7.1 = icmp slt i32 %conv2.i.i.us.7.1, %14 - br i1 %cmp9.i.i.us.7.1, label %if.then.i.i.us.7.1, label %if.end.r_exit.i.i.us.7.1 - -pregion_for_end.i.i.7.loopexit: ; preds = %if.end.r_exit.i.i.us.7.1 - br label %pregion_for_end.i.i.7 - -pregion_for_end.i.i.7: ; preds = %pregion_for_end.i.i.7.loopexit, %vector.ph364, %pregion_for_end.i.i.6 - ret void - -if.then.i.i.us.7.1: ; preds = %if.end.r_exit.i.i.us.7 - %add11.i.i.us.7.1 = add nsw i32 %conv2.i.i.us.7.1, %mul.i.i.7 - %idxprom.i.i.us.7.1 = sext i32 %add11.i.i.us.7.1 to i64 - %arrayidx.i.i.us.7.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.7.1 - %830 = load float, float* %arrayidx.i.i.us.7.1, align 4, !tbaa !12 - %831 = load float, float* %arrayidx15.i.i.7, align 4, !tbaa !12 - %add17.i.i.us.7.1 = add nsw i32 %conv2.i.i.us.7.1, %mul16.i.i - %idxprom18.i.i.us.7.1 = sext i32 %add17.i.i.us.7.1 to i64 - %arrayidx19.i.i.us.7.1 = getelementptr inbounds float, float* %6, i64 %idxprom18.i.i.us.7.1 - %832 = load float, float* %arrayidx19.i.i.us.7.1, align 4, !tbaa !12 - %neg.i.i.us.7.1 = fneg float %831 - %833 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.7.1, float %832, float %830) #6 - store float %833, float* %arrayidx.i.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.7.1 - -if.end.r_exit.i.i.us.7.1: ; preds = %if.then.i.i.us.7.1, %if.end.r_exit.i.i.us.7 - %834 = add nuw nsw i64 %_local_id_x.i.0.us.7, 2 - %exitcond.7.not.1 = icmp eq i64 %834, 32 - br i1 %exitcond.7.not.1, label %pregion_for_end.i.i.7.loopexit, label %pregion_for_entry.entry.i.i.us.7, !llvm.loop !229 - -if.then.i.i.us.6.1: ; preds = %if.end.r_exit.i.i.us.6 - %add11.i.i.us.6.1 = add nsw i32 %conv2.i.i.us.6.1, %mul.i.i.6 - %idxprom.i.i.us.6.1 = sext i32 %add11.i.i.us.6.1 to i64 - %arrayidx.i.i.us.6.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.6.1 - %835 = load float, float* %arrayidx.i.i.us.6.1, align 4, !tbaa !12 - %836 = load float, float* %arrayidx15.i.i.6, align 4, !tbaa !12 - %add17.i.i.us.6.1 = add nsw i32 %conv2.i.i.us.6.1, %mul16.i.i - %idxprom18.i.i.us.6.1 = sext i32 %add17.i.i.us.6.1 to i64 - %arrayidx19.i.i.us.6.1 = getelementptr inbounds float, float* %6, i64 %idxprom18.i.i.us.6.1 - %837 = load float, float* %arrayidx19.i.i.us.6.1, align 4, !tbaa !12 - %neg.i.i.us.6.1 = fneg float %836 - %838 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.6.1, float %837, float %835) #6 - store float %838, float* %arrayidx.i.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.6.1 - -if.end.r_exit.i.i.us.6.1: ; preds = %if.then.i.i.us.6.1, %if.end.r_exit.i.i.us.6 - %839 = add nuw nsw i64 %_local_id_x.i.0.us.6, 2 - %exitcond.6.not.1 = icmp eq i64 %839, 32 - br i1 %exitcond.6.not.1, label %pregion_for_end.i.i.6.loopexit, label %pregion_for_entry.entry.i.i.us.6, !llvm.loop !230 - -if.then.i.i.us.5.1: ; preds = %if.end.r_exit.i.i.us.5 - %add11.i.i.us.5.1 = add nsw i32 %conv2.i.i.us.5.1, %mul.i.i.5 - %idxprom.i.i.us.5.1 = sext i32 %add11.i.i.us.5.1 to i64 - %arrayidx.i.i.us.5.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.5.1 - %840 = load float, float* %arrayidx.i.i.us.5.1, align 4, !tbaa !12 - %841 = load float, float* %arrayidx15.i.i.5, align 4, !tbaa !12 - %add17.i.i.us.5.1 = add nsw i32 %conv2.i.i.us.5.1, %mul16.i.i - %idxprom18.i.i.us.5.1 = sext i32 %add17.i.i.us.5.1 to i64 - %arrayidx19.i.i.us.5.1 = getelementptr inbounds float, float* %6, i64 %idxprom18.i.i.us.5.1 - %842 = load float, float* %arrayidx19.i.i.us.5.1, align 4, !tbaa !12 - %neg.i.i.us.5.1 = fneg float %841 - %843 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.5.1, float %842, float %840) #6 - store float %843, float* %arrayidx.i.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.5.1 - -if.end.r_exit.i.i.us.5.1: ; preds = %if.then.i.i.us.5.1, %if.end.r_exit.i.i.us.5 - %844 = add nuw nsw i64 %_local_id_x.i.0.us.5, 2 - %exitcond.5.not.1 = icmp eq i64 %844, 32 - br i1 %exitcond.5.not.1, label %pregion_for_end.i.i.5.loopexit, label %pregion_for_entry.entry.i.i.us.5, !llvm.loop !231 - -if.then.i.i.us.4.1: ; preds = %if.end.r_exit.i.i.us.4 - %add11.i.i.us.4.1 = add nsw i32 %conv2.i.i.us.4.1, %mul.i.i.4 - %idxprom.i.i.us.4.1 = sext i32 %add11.i.i.us.4.1 to i64 - %arrayidx.i.i.us.4.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.4.1 - %845 = load float, float* %arrayidx.i.i.us.4.1, align 4, !tbaa !12 - %846 = load float, float* %arrayidx15.i.i.4, align 4, !tbaa !12 - %add17.i.i.us.4.1 = add nsw i32 %conv2.i.i.us.4.1, %mul16.i.i - %idxprom18.i.i.us.4.1 = sext i32 %add17.i.i.us.4.1 to i64 - %arrayidx19.i.i.us.4.1 = getelementptr inbounds float, float* %6, i64 %idxprom18.i.i.us.4.1 - %847 = load float, float* %arrayidx19.i.i.us.4.1, align 4, !tbaa !12 - %neg.i.i.us.4.1 = fneg float %846 - %848 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.4.1, float %847, float %845) #6 - store float %848, float* %arrayidx.i.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.4.1 - -if.end.r_exit.i.i.us.4.1: ; preds = %if.then.i.i.us.4.1, %if.end.r_exit.i.i.us.4 - %849 = add nuw nsw i64 %_local_id_x.i.0.us.4, 2 - %exitcond.4.not.1 = icmp eq i64 %849, 32 - br i1 %exitcond.4.not.1, label %pregion_for_end.i.i.4.loopexit, label %pregion_for_entry.entry.i.i.us.4, !llvm.loop !232 - -if.then.i.i.us.3.1: ; preds = %if.end.r_exit.i.i.us.3 - %add11.i.i.us.3.1 = add nsw i32 %conv2.i.i.us.3.1, %mul.i.i.3 - %idxprom.i.i.us.3.1 = sext i32 %add11.i.i.us.3.1 to i64 - %arrayidx.i.i.us.3.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.3.1 - %850 = load float, float* %arrayidx.i.i.us.3.1, align 4, !tbaa !12 - %851 = load float, float* %arrayidx15.i.i.3, align 4, !tbaa !12 - %add17.i.i.us.3.1 = add nsw i32 %conv2.i.i.us.3.1, %mul16.i.i - %idxprom18.i.i.us.3.1 = sext i32 %add17.i.i.us.3.1 to i64 - %arrayidx19.i.i.us.3.1 = getelementptr inbounds float, float* %6, i64 %idxprom18.i.i.us.3.1 - %852 = load float, float* %arrayidx19.i.i.us.3.1, align 4, !tbaa !12 - %neg.i.i.us.3.1 = fneg float %851 - %853 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.3.1, float %852, float %850) #6 - store float %853, float* %arrayidx.i.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.3.1 - -if.end.r_exit.i.i.us.3.1: ; preds = %if.then.i.i.us.3.1, %if.end.r_exit.i.i.us.3 - %854 = add nuw nsw i64 %_local_id_x.i.0.us.3, 2 - %exitcond.3.not.1 = icmp eq i64 %854, 32 - br i1 %exitcond.3.not.1, label %pregion_for_end.i.i.3.loopexit, label %pregion_for_entry.entry.i.i.us.3, !llvm.loop !233 - -if.then.i.i.us.2.1: ; preds = %if.end.r_exit.i.i.us.2 - %add11.i.i.us.2.1 = add nsw i32 %conv2.i.i.us.2.1, %mul.i.i.2 - %idxprom.i.i.us.2.1 = sext i32 %add11.i.i.us.2.1 to i64 - %arrayidx.i.i.us.2.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.2.1 - %855 = load float, float* %arrayidx.i.i.us.2.1, align 4, !tbaa !12 - %856 = load float, float* %arrayidx15.i.i.2, align 4, !tbaa !12 - %add17.i.i.us.2.1 = add nsw i32 %conv2.i.i.us.2.1, %mul16.i.i - %idxprom18.i.i.us.2.1 = sext i32 %add17.i.i.us.2.1 to i64 - %arrayidx19.i.i.us.2.1 = getelementptr inbounds float, float* %6, i64 %idxprom18.i.i.us.2.1 - %857 = load float, float* %arrayidx19.i.i.us.2.1, align 4, !tbaa !12 - %neg.i.i.us.2.1 = fneg float %856 - %858 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.2.1, float %857, float %855) #6 - store float %858, float* %arrayidx.i.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.2.1 - -if.end.r_exit.i.i.us.2.1: ; preds = %if.then.i.i.us.2.1, %if.end.r_exit.i.i.us.2 - %859 = add nuw nsw i64 %_local_id_x.i.0.us.2, 2 - %exitcond.2.not.1 = icmp eq i64 %859, 32 - br i1 %exitcond.2.not.1, label %pregion_for_end.i.i.2.loopexit, label %pregion_for_entry.entry.i.i.us.2, !llvm.loop !234 - -if.then.i.i.us.1.1: ; preds = %if.end.r_exit.i.i.us.1 - %add11.i.i.us.1.1 = add nsw i32 %conv2.i.i.us.1.1, %mul.i.i.1 - %idxprom.i.i.us.1.1 = sext i32 %add11.i.i.us.1.1 to i64 - %arrayidx.i.i.us.1.1 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.1.1 - %860 = load float, float* %arrayidx.i.i.us.1.1, align 4, !tbaa !12 - %861 = load float, float* %arrayidx15.i.i.1, align 4, !tbaa !12 - %add17.i.i.us.1.1 = add nsw i32 %conv2.i.i.us.1.1, %mul16.i.i - %idxprom18.i.i.us.1.1 = sext i32 %add17.i.i.us.1.1 to i64 - %arrayidx19.i.i.us.1.1 = getelementptr inbounds float, float* %6, i64 %idxprom18.i.i.us.1.1 - %862 = load float, float* %arrayidx19.i.i.us.1.1, align 4, !tbaa !12 - %neg.i.i.us.1.1 = fneg float %861 - %863 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.1.1, float %862, float %860) #6 - store float %863, float* %arrayidx.i.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.1.1 - -if.end.r_exit.i.i.us.1.1: ; preds = %if.then.i.i.us.1.1, %if.end.r_exit.i.i.us.1 - %864 = add nuw nsw i64 %_local_id_x.i.0.us.1, 2 - %exitcond.1.not.1 = icmp eq i64 %864, 32 - br i1 %exitcond.1.not.1, label %pregion_for_end.i.i.1.loopexit, label %pregion_for_entry.entry.i.i.us.1, !llvm.loop !235 - -if.then.i.i.us.1402: ; preds = %if.end.r_exit.i.i.us - %add11.i.i.us.1395 = add nsw i32 %conv2.i.i.us.1392, %mul.i.i - %idxprom.i.i.us.1396 = sext i32 %add11.i.i.us.1395 to i64 - %arrayidx.i.i.us.1397 = getelementptr inbounds float, float* %6, i64 %idxprom.i.i.us.1396 - %865 = load float, float* %arrayidx.i.i.us.1397, align 4, !tbaa !12 - %866 = load float, float* %arrayidx15.i.i, align 4, !tbaa !12 - %add17.i.i.us.1398 = add nsw i32 %conv2.i.i.us.1392, %mul16.i.i - %idxprom18.i.i.us.1399 = sext i32 %add17.i.i.us.1398 to i64 - %arrayidx19.i.i.us.1400 = getelementptr inbounds float, float* %6, i64 %idxprom18.i.i.us.1399 - %867 = load float, float* %arrayidx19.i.i.us.1400, align 4, !tbaa !12 - %neg.i.i.us.1401 = fneg float %866 - %868 = tail call float @llvm.fmuladd.f32(float %neg.i.i.us.1401, float %867, float %865) #6 - store float %868, float* %arrayidx.i.i.us.1397, align 4, !tbaa !12, !llvm.access.group !24 - br label %if.end.r_exit.i.i.us.1403 - -if.end.r_exit.i.i.us.1403: ; preds = %if.then.i.i.us.1402, %if.end.r_exit.i.i.us - %869 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %869, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !236 -} - -; Function Attrs: argmemonly nounwind readonly willreturn -declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32 immarg, <8 x i1>, <8 x float>) #3 - -; Function Attrs: nounwind readonly willreturn -declare <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*>, i32 immarg, <8 x i1>, <8 x float>) #4 - -; Function Attrs: nounwind readnone speculatable willreturn -declare <8 x float> @llvm.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>) #0 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>) #5 - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nofree nounwind } -attributes #3 = { argmemonly nounwind readonly willreturn } -attributes #4 = { nounwind readonly willreturn } -attributes #5 = { argmemonly nounwind willreturn } -attributes #6 = { nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 0, i32 0} -!6 = !{!"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"int", !"int"} -!8 = !{!"float*", !"int", !"int"} -!9 = !{!"", !"", !""} -!10 = !{!"A", !"k", !"n"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{!17, !18} -!18 = distinct !{!18, !"LVerDomain"} -!19 = !{!20, !21} -!20 = distinct !{!20, !18} -!21 = distinct !{!21, !18} -!22 = !{!20} -!23 = !{!21} -!24 = !{!25, !26} -!25 = distinct !{} -!26 = distinct !{} -!27 = !{!28} -!28 = distinct !{!28, !29} -!29 = distinct !{!29, !"LVerDomain"} -!30 = !{!31, !32} -!31 = distinct !{!31, !29} -!32 = distinct !{!32, !29} -!33 = !{!31} -!34 = !{!32} -!35 = !{!36} -!36 = distinct !{!36, !37} -!37 = distinct !{!37, !"LVerDomain"} -!38 = !{!39, !40} -!39 = distinct !{!39, !37} -!40 = distinct !{!40, !37} -!41 = !{!39} -!42 = !{!40} -!43 = !{!44} -!44 = distinct !{!44, !45} -!45 = distinct !{!45, !"LVerDomain"} -!46 = !{!47, !48} -!47 = distinct !{!47, !45} -!48 = distinct !{!48, !45} -!49 = !{!47} -!50 = !{!48} -!51 = !{!52} -!52 = distinct !{!52, !53} -!53 = distinct !{!53, !"LVerDomain"} -!54 = !{!55, !56} -!55 = distinct !{!55, !53} -!56 = distinct !{!56, !53} -!57 = !{!55} -!58 = !{!56} -!59 = !{!60} -!60 = distinct !{!60, !61} -!61 = distinct !{!61, !"LVerDomain"} -!62 = !{!63, !64} -!63 = distinct !{!63, !61} -!64 = distinct !{!64, !61} -!65 = !{!63} -!66 = !{!64} -!67 = !{!68} -!68 = distinct !{!68, !69} -!69 = distinct !{!69, !"LVerDomain"} -!70 = !{!71, !72} -!71 = distinct !{!71, !69} -!72 = distinct !{!72, !69} -!73 = !{!71} -!74 = !{!72} -!75 = !{!76} -!76 = distinct !{!76, !77} -!77 = distinct !{!77, !"LVerDomain"} -!78 = !{!79, !80} -!79 = distinct !{!79, !77} -!80 = distinct !{!80, !77} -!81 = !{!79} -!82 = !{!80} -!83 = distinct !{!83, !84, !85} -!84 = !{!"llvm.loop.parallel_accesses", !25} -!85 = !{!"llvm.loop.isvectorized", i32 1} -!86 = distinct !{!86, !84, !85} -!87 = distinct !{!87, !84, !85} -!88 = distinct !{!88, !84, !85} -!89 = distinct !{!89, !84, !85} -!90 = distinct !{!90, !84, !85} -!91 = distinct !{!91, !84, !85} -!92 = distinct !{!92, !84, !85} -!93 = !{!94} -!94 = distinct !{!94, !95} -!95 = distinct !{!95, !"LVerDomain"} -!96 = !{!97, !98} -!97 = distinct !{!97, !95} -!98 = distinct !{!98, !95} -!99 = !{!97} -!100 = !{!98} -!101 = !{!102} -!102 = distinct !{!102, !103} -!103 = distinct !{!103, !"LVerDomain"} -!104 = !{!105, !106} -!105 = distinct !{!105, !103} -!106 = distinct !{!106, !103} -!107 = !{!105} -!108 = !{!106} -!109 = !{!110} -!110 = distinct !{!110, !111} -!111 = distinct !{!111, !"LVerDomain"} -!112 = !{!113, !114} -!113 = distinct !{!113, !111} -!114 = distinct !{!114, !111} -!115 = !{!113} -!116 = !{!114} -!117 = !{!118} -!118 = distinct !{!118, !119} -!119 = distinct !{!119, !"LVerDomain"} -!120 = !{!121, !122} -!121 = distinct !{!121, !119} -!122 = distinct !{!122, !119} -!123 = !{!121} -!124 = !{!122} -!125 = !{!126} -!126 = distinct !{!126, !127} -!127 = distinct !{!127, !"LVerDomain"} -!128 = !{!129, !130} -!129 = distinct !{!129, !127} -!130 = distinct !{!130, !127} -!131 = !{!129} -!132 = !{!130} -!133 = !{!134} -!134 = distinct !{!134, !135} -!135 = distinct !{!135, !"LVerDomain"} -!136 = !{!137, !138} -!137 = distinct !{!137, !135} -!138 = distinct !{!138, !135} -!139 = !{!137} -!140 = !{!138} -!141 = !{!142} -!142 = distinct !{!142, !143} -!143 = distinct !{!143, !"LVerDomain"} -!144 = !{!145, !146} -!145 = distinct !{!145, !143} -!146 = distinct !{!146, !143} -!147 = !{!145} -!148 = !{!146} -!149 = !{!150} -!150 = distinct !{!150, !151} -!151 = distinct !{!151, !"LVerDomain"} -!152 = !{!153, !154} -!153 = distinct !{!153, !151} -!154 = distinct !{!154, !151} -!155 = !{!153} -!156 = !{!154} -!157 = distinct !{!157, !84, !85} -!158 = distinct !{!158, !84, !85} -!159 = distinct !{!159, !84, !85} -!160 = distinct !{!160, !84, !85} -!161 = distinct !{!161, !84, !85} -!162 = distinct !{!162, !84, !85} -!163 = distinct !{!163, !84, !85} -!164 = distinct !{!164, !84, !85} -!165 = !{!166} -!166 = distinct !{!166, !167} -!167 = distinct !{!167, !"LVerDomain"} -!168 = !{!169, !170} -!169 = distinct !{!169, !167} -!170 = distinct !{!170, !167} -!171 = !{!169} -!172 = !{!170} -!173 = !{!174} -!174 = distinct !{!174, !175} -!175 = distinct !{!175, !"LVerDomain"} -!176 = !{!177, !178} -!177 = distinct !{!177, !175} -!178 = distinct !{!178, !175} -!179 = !{!177} -!180 = !{!178} -!181 = !{!182} -!182 = distinct !{!182, !183} -!183 = distinct !{!183, !"LVerDomain"} -!184 = !{!185, !186} -!185 = distinct !{!185, !183} -!186 = distinct !{!186, !183} -!187 = !{!185} -!188 = !{!186} -!189 = !{!190} -!190 = distinct !{!190, !191} -!191 = distinct !{!191, !"LVerDomain"} -!192 = !{!193, !194} -!193 = distinct !{!193, !191} -!194 = distinct !{!194, !191} -!195 = !{!193} -!196 = !{!194} -!197 = !{!198} -!198 = distinct !{!198, !199} -!199 = distinct !{!199, !"LVerDomain"} -!200 = !{!201, !202} -!201 = distinct !{!201, !199} -!202 = distinct !{!202, !199} -!203 = !{!201} -!204 = !{!202} -!205 = !{!206} -!206 = distinct !{!206, !207} -!207 = distinct !{!207, !"LVerDomain"} -!208 = !{!209, !210} -!209 = distinct !{!209, !207} -!210 = distinct !{!210, !207} -!211 = !{!209} -!212 = !{!210} -!213 = !{!214} -!214 = distinct !{!214, !215} -!215 = distinct !{!215, !"LVerDomain"} -!216 = !{!217, !218} -!217 = distinct !{!217, !215} -!218 = distinct !{!218, !215} -!219 = !{!217} -!220 = !{!218} -!221 = !{!222} -!222 = distinct !{!222, !223} -!223 = distinct !{!223, !"LVerDomain"} -!224 = !{!225, !226} -!225 = distinct !{!225, !223} -!226 = distinct !{!226, !223} -!227 = !{!225} -!228 = !{!226} -!229 = distinct !{!229, !84, !85} -!230 = distinct !{!230, !84, !85} -!231 = distinct !{!231, !84, !85} -!232 = distinct !{!232, !84, !85} -!233 = distinct !{!233, !84, !85} -!234 = distinct !{!234, !84, !85} -!235 = distinct !{!235, !84, !85} -!236 = distinct !{!236, !84, !85} diff --git a/pocl_irs/mvt_kernel1.ll b/pocl_irs/mvt_kernel1.ll deleted file mode 100644 index 1a681a5..0000000 --- a/pocl_irs/mvt_kernel1.ll +++ /dev/null @@ -1,330 +0,0 @@ -; ModuleID = './MC/LMFNEGBKGLDAHJJJJIGACABAGJJKPAABHFKIN/mvt_kernel1/32-1-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_mvt_kernel1(float* nocapture readonly %0, float* nocapture %1, float* nocapture readonly %2, i32 %3, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %4, i64 %5, i64 %6, i64 %7) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %5, 5 - %cmp218.i = icmp sgt i32 %3, 0 - %wide.trip.count.i = zext i32 %3 to i64 - br i1 %cmp218.i, label %pregion_for_entry.entry.i.us.preheader, label %mvt_kernel1.exit - -pregion_for_entry.entry.i.us.preheader: ; preds = %8 - br label %pregion_for_entry.entry.i.us - -pregion_for_entry.entry.i.us: ; preds = %if.end.r_exit.i.us.1, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ %22, %if.end.r_exit.i.us.1 ], [ 0, %pregion_for_entry.entry.i.us.preheader ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp.i.us = icmp slt i32 %conv.i.us, %3 - br i1 %cmp.i.us, label %for.body.lr.ph.i.us, label %if.end.r_exit.i.us - -for.body.lr.ph.i.us: ; preds = %pregion_for_entry.entry.i.us - %mul.i.us = mul nsw i32 %conv.i.us, %3 - %sext.i.us = shl i64 %add1.i.i.us, 32 - %idxprom7.i.us = ashr exact i64 %sext.i.us, 32 - %arrayidx8.i.us = getelementptr inbounds float, float* %1, i64 %idxprom7.i.us - %9 = sext i32 %mul.i.us to i64 - %.pre.i1.us4 = load float, float* %arrayidx8.i.us, align 4, !tbaa !12 - br label %for.body.i.us - -for.body.i.us: ; preds = %for.body.i.us, %for.body.lr.ph.i.us - %indvars.iv.next.i3.us = phi i64 [ %indvars.iv.next.i.us, %for.body.i.us ], [ 0, %for.body.lr.ph.i.us ] - %10 = phi float [ %14, %for.body.i.us ], [ %.pre.i1.us4, %for.body.lr.ph.i.us ] - %11 = add nsw i64 %indvars.iv.next.i3.us, %9 - %arrayidx.i.us = getelementptr inbounds float, float* %0, i64 %11 - %12 = load float, float* %arrayidx.i.us, align 4, !tbaa !12 - %arrayidx5.i.us = getelementptr inbounds float, float* %2, i64 %indvars.iv.next.i3.us - %13 = load float, float* %arrayidx5.i.us, align 4, !tbaa !12 - %14 = tail call float @llvm.fmuladd.f32(float %12, float %13, float %10) #2 - store float %14, float* %arrayidx8.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us = add nuw nsw i64 %indvars.iv.next.i3.us, 1 - %exitcond.not.i.us = icmp eq i64 %indvars.iv.next.i.us, %wide.trip.count.i - br i1 %exitcond.not.i.us, label %if.end.r_exit.i.us.loopexit, label %for.body.i.us, !llvm.loop !18 - -if.end.r_exit.i.us.loopexit: ; preds = %for.body.i.us - br label %if.end.r_exit.i.us - -if.end.r_exit.i.us: ; preds = %if.end.r_exit.i.us.loopexit, %pregion_for_entry.entry.i.us - %15 = or i64 %_local_id_x.0.us, 1 - %add1.i.i.us.1 = add nuw nsw i64 %15, %mul.i.i - %conv.i.us.1 = trunc i64 %add1.i.i.us.1 to i32 - %cmp.i.us.1 = icmp slt i32 %conv.i.us.1, %3 - br i1 %cmp.i.us.1, label %for.body.lr.ph.i.us.1, label %if.end.r_exit.i.us.1 - -mvt_kernel1.exit.loopexit: ; preds = %if.end.r_exit.i.us.1 - br label %mvt_kernel1.exit - -mvt_kernel1.exit: ; preds = %mvt_kernel1.exit.loopexit, %8 - ret void - -for.body.lr.ph.i.us.1: ; preds = %if.end.r_exit.i.us - %mul.i.us.1 = mul nsw i32 %conv.i.us.1, %3 - %sext.i.us.1 = shl i64 %add1.i.i.us.1, 32 - %idxprom7.i.us.1 = ashr exact i64 %sext.i.us.1, 32 - %arrayidx8.i.us.1 = getelementptr inbounds float, float* %1, i64 %idxprom7.i.us.1 - %16 = sext i32 %mul.i.us.1 to i64 - %.pre.i1.us4.1 = load float, float* %arrayidx8.i.us.1, align 4, !tbaa !12 - br label %for.body.i.us.1 - -for.body.i.us.1: ; preds = %for.body.i.us.1, %for.body.lr.ph.i.us.1 - %indvars.iv.next.i3.us.1 = phi i64 [ %indvars.iv.next.i.us.1, %for.body.i.us.1 ], [ 0, %for.body.lr.ph.i.us.1 ] - %17 = phi float [ %21, %for.body.i.us.1 ], [ %.pre.i1.us4.1, %for.body.lr.ph.i.us.1 ] - %18 = add nsw i64 %indvars.iv.next.i3.us.1, %16 - %arrayidx.i.us.1 = getelementptr inbounds float, float* %0, i64 %18 - %19 = load float, float* %arrayidx.i.us.1, align 4, !tbaa !12 - %arrayidx5.i.us.1 = getelementptr inbounds float, float* %2, i64 %indvars.iv.next.i3.us.1 - %20 = load float, float* %arrayidx5.i.us.1, align 4, !tbaa !12 - %21 = tail call float @llvm.fmuladd.f32(float %19, float %20, float %17) #2 - store float %21, float* %arrayidx8.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.1 = add nuw nsw i64 %indvars.iv.next.i3.us.1, 1 - %exitcond.not.i.us.1 = icmp eq i64 %indvars.iv.next.i.us.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.1, label %if.end.r_exit.i.us.1.loopexit, label %for.body.i.us.1, !llvm.loop !18 - -if.end.r_exit.i.us.1.loopexit: ; preds = %for.body.i.us.1 - br label %if.end.r_exit.i.us.1 - -if.end.r_exit.i.us.1: ; preds = %if.end.r_exit.i.us.1.loopexit, %if.end.r_exit.i.us - %22 = add nuw nsw i64 %_local_id_x.0.us, 2 - %exitcond.not.1 = icmp eq i64 %22, 32 - br i1 %exitcond.not.1, label %mvt_kernel1.exit.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !20 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_mvt_kernel1_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to float*** - %15 = load float**, float*** %14, align 8 - %16 = load float*, float** %15, align 8 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %mul.i.i.i = shl i64 %2, 5 - %cmp218.i.i = icmp sgt i32 %20, 0 - %wide.trip.count.i.i = zext i32 %20 to i64 - br i1 %cmp218.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %_pocl_kernel_mvt_kernel1.exit - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us.1, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %34, %if.end.r_exit.i.i.us.1 ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %20, %conv.i.i.us - br i1 %cmp.i.i.us, label %for.body.lr.ph.i.i.us, label %if.end.r_exit.i.i.us - -for.body.lr.ph.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %mul.i.i.us = mul nsw i32 %20, %conv.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom7.i.i.us = ashr exact i64 %sext.i.i.us, 32 - %arrayidx8.i.i.us = getelementptr inbounds float, float* %12, i64 %idxprom7.i.i.us - %21 = sext i32 %mul.i.i.us to i64 - %.pre.i.i1.us4 = load float, float* %arrayidx8.i.i.us, align 4, !tbaa !12 - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %for.body.lr.ph.i.i.us - %indvars.iv.next.i.i3.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %for.body.lr.ph.i.i.us ] - %22 = phi float [ %26, %for.body.i.i.us ], [ %.pre.i.i1.us4, %for.body.lr.ph.i.i.us ] - %23 = add nsw i64 %indvars.iv.next.i.i3.us, %21 - %arrayidx.i.i.us = getelementptr inbounds float, float* %8, i64 %23 - %24 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %arrayidx5.i.i.us = getelementptr inbounds float, float* %16, i64 %indvars.iv.next.i.i3.us - %25 = load float, float* %arrayidx5.i.i.us, align 4, !tbaa !12 - %26 = tail call float @llvm.fmuladd.f32(float %24, float %25, float %22) #2 - store float %26, float* %arrayidx8.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i3.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us, label %if.end.r_exit.i.i.us.loopexit, label %for.body.i.i.us, !llvm.loop !18 - -if.end.r_exit.i.i.us.loopexit: ; preds = %for.body.i.i.us - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %if.end.r_exit.i.i.us.loopexit, %pregion_for_entry.entry.i.i.us - %27 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1 = add nuw nsw i64 %27, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp.i.i.us.1 = icmp sgt i32 %20, %conv.i.i.us.1 - br i1 %cmp.i.i.us.1, label %for.body.lr.ph.i.i.us.1, label %if.end.r_exit.i.i.us.1 - -_pocl_kernel_mvt_kernel1.exit.loopexit: ; preds = %if.end.r_exit.i.i.us.1 - br label %_pocl_kernel_mvt_kernel1.exit - -_pocl_kernel_mvt_kernel1.exit: ; preds = %_pocl_kernel_mvt_kernel1.exit.loopexit, %5 - ret void - -for.body.lr.ph.i.i.us.1: ; preds = %if.end.r_exit.i.i.us - %mul.i.i.us.1 = mul nsw i32 %20, %conv.i.i.us.1 - %sext.i.i.us.1 = shl i64 %add1.i.i.i.us.1, 32 - %idxprom7.i.i.us.1 = ashr exact i64 %sext.i.i.us.1, 32 - %arrayidx8.i.i.us.1 = getelementptr inbounds float, float* %12, i64 %idxprom7.i.i.us.1 - %28 = sext i32 %mul.i.i.us.1 to i64 - %.pre.i.i1.us4.1 = load float, float* %arrayidx8.i.i.us.1, align 4, !tbaa !12 - br label %for.body.i.i.us.1 - -for.body.i.i.us.1: ; preds = %for.body.i.i.us.1, %for.body.lr.ph.i.i.us.1 - %indvars.iv.next.i.i3.us.1 = phi i64 [ %indvars.iv.next.i.i.us.1, %for.body.i.i.us.1 ], [ 0, %for.body.lr.ph.i.i.us.1 ] - %29 = phi float [ %33, %for.body.i.i.us.1 ], [ %.pre.i.i1.us4.1, %for.body.lr.ph.i.i.us.1 ] - %30 = add nsw i64 %indvars.iv.next.i.i3.us.1, %28 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %8, i64 %30 - %31 = load float, float* %arrayidx.i.i.us.1, align 4, !tbaa !12 - %arrayidx5.i.i.us.1 = getelementptr inbounds float, float* %16, i64 %indvars.iv.next.i.i3.us.1 - %32 = load float, float* %arrayidx5.i.i.us.1, align 4, !tbaa !12 - %33 = tail call float @llvm.fmuladd.f32(float %31, float %32, float %29) #2 - store float %33, float* %arrayidx8.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.1, 1 - %exitcond.not.i.i.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.1, label %if.end.r_exit.i.i.us.1.loopexit, label %for.body.i.i.us.1, !llvm.loop !18 - -if.end.r_exit.i.i.us.1.loopexit: ; preds = %for.body.i.i.us.1 - br label %if.end.r_exit.i.i.us.1 - -if.end.r_exit.i.i.us.1: ; preds = %if.end.r_exit.i.i.us.1.loopexit, %if.end.r_exit.i.i.us - %34 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %34, 32 - br i1 %exitcond.not.1, label %_pocl_kernel_mvt_kernel1.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !20 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_mvt_kernel1_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to float** - %13 = load float*, float** %12, align 8 - %14 = getelementptr i8*, i8** %0, i64 3 - %15 = bitcast i8** %14 to i32** - %16 = load i32*, i32** %15, align 8 - %17 = load i32, i32* %16, align 4 - %mul.i.i.i = shl i64 %2, 5 - %cmp218.i.i = icmp sgt i32 %17, 0 - %wide.trip.count.i.i = zext i32 %17 to i64 - br i1 %cmp218.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %_pocl_kernel_mvt_kernel1.exit - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us.1, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %31, %if.end.r_exit.i.i.us.1 ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %17, %conv.i.i.us - br i1 %cmp.i.i.us, label %for.body.lr.ph.i.i.us, label %if.end.r_exit.i.i.us - -for.body.lr.ph.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %mul.i.i.us = mul nsw i32 %17, %conv.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom7.i.i.us = ashr exact i64 %sext.i.i.us, 32 - %arrayidx8.i.i.us = getelementptr inbounds float, float* %10, i64 %idxprom7.i.i.us - %18 = sext i32 %mul.i.i.us to i64 - %.pre.i.i1.us4 = load float, float* %arrayidx8.i.i.us, align 4, !tbaa !12 - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %for.body.lr.ph.i.i.us - %indvars.iv.next.i.i3.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %for.body.lr.ph.i.i.us ] - %19 = phi float [ %23, %for.body.i.i.us ], [ %.pre.i.i1.us4, %for.body.lr.ph.i.i.us ] - %20 = add nsw i64 %indvars.iv.next.i.i3.us, %18 - %arrayidx.i.i.us = getelementptr inbounds float, float* %7, i64 %20 - %21 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %arrayidx5.i.i.us = getelementptr inbounds float, float* %13, i64 %indvars.iv.next.i.i3.us - %22 = load float, float* %arrayidx5.i.i.us, align 4, !tbaa !12 - %23 = tail call float @llvm.fmuladd.f32(float %21, float %22, float %19) #2 - store float %23, float* %arrayidx8.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i3.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us, label %if.end.r_exit.i.i.us.loopexit, label %for.body.i.i.us, !llvm.loop !18 - -if.end.r_exit.i.i.us.loopexit: ; preds = %for.body.i.i.us - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %if.end.r_exit.i.i.us.loopexit, %pregion_for_entry.entry.i.i.us - %24 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1 = add nuw nsw i64 %24, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp.i.i.us.1 = icmp sgt i32 %17, %conv.i.i.us.1 - br i1 %cmp.i.i.us.1, label %for.body.lr.ph.i.i.us.1, label %if.end.r_exit.i.i.us.1 - -_pocl_kernel_mvt_kernel1.exit.loopexit: ; preds = %if.end.r_exit.i.i.us.1 - br label %_pocl_kernel_mvt_kernel1.exit - -_pocl_kernel_mvt_kernel1.exit: ; preds = %_pocl_kernel_mvt_kernel1.exit.loopexit, %5 - ret void - -for.body.lr.ph.i.i.us.1: ; preds = %if.end.r_exit.i.i.us - %mul.i.i.us.1 = mul nsw i32 %17, %conv.i.i.us.1 - %sext.i.i.us.1 = shl i64 %add1.i.i.i.us.1, 32 - %idxprom7.i.i.us.1 = ashr exact i64 %sext.i.i.us.1, 32 - %arrayidx8.i.i.us.1 = getelementptr inbounds float, float* %10, i64 %idxprom7.i.i.us.1 - %25 = sext i32 %mul.i.i.us.1 to i64 - %.pre.i.i1.us4.1 = load float, float* %arrayidx8.i.i.us.1, align 4, !tbaa !12 - br label %for.body.i.i.us.1 - -for.body.i.i.us.1: ; preds = %for.body.i.i.us.1, %for.body.lr.ph.i.i.us.1 - %indvars.iv.next.i.i3.us.1 = phi i64 [ %indvars.iv.next.i.i.us.1, %for.body.i.i.us.1 ], [ 0, %for.body.lr.ph.i.i.us.1 ] - %26 = phi float [ %30, %for.body.i.i.us.1 ], [ %.pre.i.i1.us4.1, %for.body.lr.ph.i.i.us.1 ] - %27 = add nsw i64 %indvars.iv.next.i.i3.us.1, %25 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %7, i64 %27 - %28 = load float, float* %arrayidx.i.i.us.1, align 4, !tbaa !12 - %arrayidx5.i.i.us.1 = getelementptr inbounds float, float* %13, i64 %indvars.iv.next.i.i3.us.1 - %29 = load float, float* %arrayidx5.i.i.us.1, align 4, !tbaa !12 - %30 = tail call float @llvm.fmuladd.f32(float %28, float %29, float %26) #2 - store float %30, float* %arrayidx8.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.1, 1 - %exitcond.not.i.i.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.1, label %if.end.r_exit.i.i.us.1.loopexit, label %for.body.i.i.us.1, !llvm.loop !18 - -if.end.r_exit.i.i.us.1.loopexit: ; preds = %for.body.i.i.us.1 - br label %if.end.r_exit.i.i.us.1 - -if.end.r_exit.i.i.us.1: ; preds = %if.end.r_exit.i.i.us.1.loopexit, %if.end.r_exit.i.i.us - %31 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %31, 32 - br i1 %exitcond.not.1, label %_pocl_kernel_mvt_kernel1.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !20 -} - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0} -!6 = !{!"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"int"} -!9 = !{!"", !"", !"", !""} -!10 = !{!"a", !"x1", !"y1", !"n"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{} -!18 = distinct !{!18, !19} -!19 = !{!"llvm.loop.unroll.disable"} -!20 = distinct !{!20, !21} -!21 = !{!"llvm.loop.parallel_accesses", !17} diff --git a/pocl_irs/mvt_kernel2.ll b/pocl_irs/mvt_kernel2.ll deleted file mode 100644 index c8b77ad..0000000 --- a/pocl_irs/mvt_kernel2.ll +++ /dev/null @@ -1,324 +0,0 @@ -; ModuleID = './MC/LMFNEGBKGLDAHJJJJIGACABAGJJKPAABHFKIN/mvt_kernel2/32-1-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_mvt_kernel2(float* nocapture readonly %0, float* nocapture %1, float* nocapture readonly %2, i32 %3, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %4, i64 %5, i64 %6, i64 %7) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %5, 5 - %cmp218.i = icmp sgt i32 %3, 0 - %9 = zext i32 %3 to i64 - br i1 %cmp218.i, label %pregion_for_entry.entry.i.us.preheader, label %mvt_kernel2.exit - -pregion_for_entry.entry.i.us.preheader: ; preds = %8 - br label %pregion_for_entry.entry.i.us - -pregion_for_entry.entry.i.us: ; preds = %if.end.r_exit.i.us.1, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ %23, %if.end.r_exit.i.us.1 ], [ 0, %pregion_for_entry.entry.i.us.preheader ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp.i.us = icmp slt i32 %conv.i.us, %3 - br i1 %cmp.i.us, label %for.body.lr.ph.i.us, label %if.end.r_exit.i.us - -for.body.lr.ph.i.us: ; preds = %pregion_for_entry.entry.i.us - %sext.i.us = shl i64 %add1.i.i.us, 32 - %idxprom7.i.us = ashr exact i64 %sext.i.us, 32 - %arrayidx8.i.us = getelementptr inbounds float, float* %1, i64 %idxprom7.i.us - %.pre.i1.us4 = load float, float* %arrayidx8.i.us, align 4, !tbaa !12 - br label %for.body.i.us - -for.body.i.us: ; preds = %for.body.i.us, %for.body.lr.ph.i.us - %indvars.iv.next.i3.us = phi i64 [ %indvars.iv.next.i.us, %for.body.i.us ], [ 0, %for.body.lr.ph.i.us ] - %10 = phi float [ %15, %for.body.i.us ], [ %.pre.i1.us4, %for.body.lr.ph.i.us ] - %11 = mul nuw nsw i64 %indvars.iv.next.i3.us, %9 - %12 = add nsw i64 %11, %idxprom7.i.us - %arrayidx.i.us = getelementptr inbounds float, float* %0, i64 %12 - %13 = load float, float* %arrayidx.i.us, align 4, !tbaa !12 - %arrayidx5.i.us = getelementptr inbounds float, float* %2, i64 %indvars.iv.next.i3.us - %14 = load float, float* %arrayidx5.i.us, align 4, !tbaa !12 - %15 = tail call float @llvm.fmuladd.f32(float %13, float %14, float %10) #2 - store float %15, float* %arrayidx8.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us = add nuw nsw i64 %indvars.iv.next.i3.us, 1 - %exitcond.not.i.us = icmp eq i64 %indvars.iv.next.i.us, %9 - br i1 %exitcond.not.i.us, label %if.end.r_exit.i.us.loopexit, label %for.body.i.us, !llvm.loop !18 - -if.end.r_exit.i.us.loopexit: ; preds = %for.body.i.us - br label %if.end.r_exit.i.us - -if.end.r_exit.i.us: ; preds = %if.end.r_exit.i.us.loopexit, %pregion_for_entry.entry.i.us - %16 = or i64 %_local_id_x.0.us, 1 - %add1.i.i.us.1 = add nuw nsw i64 %16, %mul.i.i - %conv.i.us.1 = trunc i64 %add1.i.i.us.1 to i32 - %cmp.i.us.1 = icmp slt i32 %conv.i.us.1, %3 - br i1 %cmp.i.us.1, label %for.body.lr.ph.i.us.1, label %if.end.r_exit.i.us.1 - -mvt_kernel2.exit.loopexit: ; preds = %if.end.r_exit.i.us.1 - br label %mvt_kernel2.exit - -mvt_kernel2.exit: ; preds = %mvt_kernel2.exit.loopexit, %8 - ret void - -for.body.lr.ph.i.us.1: ; preds = %if.end.r_exit.i.us - %sext.i.us.1 = shl i64 %add1.i.i.us.1, 32 - %idxprom7.i.us.1 = ashr exact i64 %sext.i.us.1, 32 - %arrayidx8.i.us.1 = getelementptr inbounds float, float* %1, i64 %idxprom7.i.us.1 - %.pre.i1.us4.1 = load float, float* %arrayidx8.i.us.1, align 4, !tbaa !12 - br label %for.body.i.us.1 - -for.body.i.us.1: ; preds = %for.body.i.us.1, %for.body.lr.ph.i.us.1 - %indvars.iv.next.i3.us.1 = phi i64 [ %indvars.iv.next.i.us.1, %for.body.i.us.1 ], [ 0, %for.body.lr.ph.i.us.1 ] - %17 = phi float [ %22, %for.body.i.us.1 ], [ %.pre.i1.us4.1, %for.body.lr.ph.i.us.1 ] - %18 = mul nuw nsw i64 %indvars.iv.next.i3.us.1, %9 - %19 = add nsw i64 %18, %idxprom7.i.us.1 - %arrayidx.i.us.1 = getelementptr inbounds float, float* %0, i64 %19 - %20 = load float, float* %arrayidx.i.us.1, align 4, !tbaa !12 - %arrayidx5.i.us.1 = getelementptr inbounds float, float* %2, i64 %indvars.iv.next.i3.us.1 - %21 = load float, float* %arrayidx5.i.us.1, align 4, !tbaa !12 - %22 = tail call float @llvm.fmuladd.f32(float %20, float %21, float %17) #2 - store float %22, float* %arrayidx8.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.1 = add nuw nsw i64 %indvars.iv.next.i3.us.1, 1 - %exitcond.not.i.us.1 = icmp eq i64 %indvars.iv.next.i.us.1, %9 - br i1 %exitcond.not.i.us.1, label %if.end.r_exit.i.us.1.loopexit, label %for.body.i.us.1, !llvm.loop !18 - -if.end.r_exit.i.us.1.loopexit: ; preds = %for.body.i.us.1 - br label %if.end.r_exit.i.us.1 - -if.end.r_exit.i.us.1: ; preds = %if.end.r_exit.i.us.1.loopexit, %if.end.r_exit.i.us - %23 = add nuw nsw i64 %_local_id_x.0.us, 2 - %exitcond.not.1 = icmp eq i64 %23, 32 - br i1 %exitcond.not.1, label %mvt_kernel2.exit.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !20 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_mvt_kernel2_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to float*** - %15 = load float**, float*** %14, align 8 - %16 = load float*, float** %15, align 8 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to i32** - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %19, align 4 - %mul.i.i.i = shl i64 %2, 5 - %cmp218.i.i = icmp sgt i32 %20, 0 - %21 = zext i32 %20 to i64 - br i1 %cmp218.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %_pocl_kernel_mvt_kernel2.exit - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us.1, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %35, %if.end.r_exit.i.i.us.1 ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %20, %conv.i.i.us - br i1 %cmp.i.i.us, label %for.body.lr.ph.i.i.us, label %if.end.r_exit.i.i.us - -for.body.lr.ph.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom7.i.i.us = ashr exact i64 %sext.i.i.us, 32 - %arrayidx8.i.i.us = getelementptr inbounds float, float* %12, i64 %idxprom7.i.i.us - %.pre.i.i1.us4 = load float, float* %arrayidx8.i.i.us, align 4, !tbaa !12 - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %for.body.lr.ph.i.i.us - %indvars.iv.next.i.i3.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %for.body.lr.ph.i.i.us ] - %22 = phi float [ %27, %for.body.i.i.us ], [ %.pre.i.i1.us4, %for.body.lr.ph.i.i.us ] - %23 = mul nuw nsw i64 %indvars.iv.next.i.i3.us, %21 - %24 = add nsw i64 %23, %idxprom7.i.i.us - %arrayidx.i.i.us = getelementptr inbounds float, float* %8, i64 %24 - %25 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %arrayidx5.i.i.us = getelementptr inbounds float, float* %16, i64 %indvars.iv.next.i.i3.us - %26 = load float, float* %arrayidx5.i.i.us, align 4, !tbaa !12 - %27 = tail call float @llvm.fmuladd.f32(float %25, float %26, float %22) #2 - store float %27, float* %arrayidx8.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i3.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %21 - br i1 %exitcond.not.i.i.us, label %if.end.r_exit.i.i.us.loopexit, label %for.body.i.i.us, !llvm.loop !18 - -if.end.r_exit.i.i.us.loopexit: ; preds = %for.body.i.i.us - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %if.end.r_exit.i.i.us.loopexit, %pregion_for_entry.entry.i.i.us - %28 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1 = add nuw nsw i64 %28, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp.i.i.us.1 = icmp sgt i32 %20, %conv.i.i.us.1 - br i1 %cmp.i.i.us.1, label %for.body.lr.ph.i.i.us.1, label %if.end.r_exit.i.i.us.1 - -_pocl_kernel_mvt_kernel2.exit.loopexit: ; preds = %if.end.r_exit.i.i.us.1 - br label %_pocl_kernel_mvt_kernel2.exit - -_pocl_kernel_mvt_kernel2.exit: ; preds = %_pocl_kernel_mvt_kernel2.exit.loopexit, %5 - ret void - -for.body.lr.ph.i.i.us.1: ; preds = %if.end.r_exit.i.i.us - %sext.i.i.us.1 = shl i64 %add1.i.i.i.us.1, 32 - %idxprom7.i.i.us.1 = ashr exact i64 %sext.i.i.us.1, 32 - %arrayidx8.i.i.us.1 = getelementptr inbounds float, float* %12, i64 %idxprom7.i.i.us.1 - %.pre.i.i1.us4.1 = load float, float* %arrayidx8.i.i.us.1, align 4, !tbaa !12 - br label %for.body.i.i.us.1 - -for.body.i.i.us.1: ; preds = %for.body.i.i.us.1, %for.body.lr.ph.i.i.us.1 - %indvars.iv.next.i.i3.us.1 = phi i64 [ %indvars.iv.next.i.i.us.1, %for.body.i.i.us.1 ], [ 0, %for.body.lr.ph.i.i.us.1 ] - %29 = phi float [ %34, %for.body.i.i.us.1 ], [ %.pre.i.i1.us4.1, %for.body.lr.ph.i.i.us.1 ] - %30 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.1, %21 - %31 = add nsw i64 %30, %idxprom7.i.i.us.1 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %8, i64 %31 - %32 = load float, float* %arrayidx.i.i.us.1, align 4, !tbaa !12 - %arrayidx5.i.i.us.1 = getelementptr inbounds float, float* %16, i64 %indvars.iv.next.i.i3.us.1 - %33 = load float, float* %arrayidx5.i.i.us.1, align 4, !tbaa !12 - %34 = tail call float @llvm.fmuladd.f32(float %32, float %33, float %29) #2 - store float %34, float* %arrayidx8.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.1, 1 - %exitcond.not.i.i.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.1, %21 - br i1 %exitcond.not.i.i.us.1, label %if.end.r_exit.i.i.us.1.loopexit, label %for.body.i.i.us.1, !llvm.loop !18 - -if.end.r_exit.i.i.us.1.loopexit: ; preds = %for.body.i.i.us.1 - br label %if.end.r_exit.i.i.us.1 - -if.end.r_exit.i.i.us.1: ; preds = %if.end.r_exit.i.i.us.1.loopexit, %if.end.r_exit.i.i.us - %35 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %35, 32 - br i1 %exitcond.not.1, label %_pocl_kernel_mvt_kernel2.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !20 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_mvt_kernel2_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to float** - %13 = load float*, float** %12, align 8 - %14 = getelementptr i8*, i8** %0, i64 3 - %15 = bitcast i8** %14 to i32** - %16 = load i32*, i32** %15, align 8 - %17 = load i32, i32* %16, align 4 - %mul.i.i.i = shl i64 %2, 5 - %cmp218.i.i = icmp sgt i32 %17, 0 - %18 = zext i32 %17 to i64 - br i1 %cmp218.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %_pocl_kernel_mvt_kernel2.exit - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %5 - br label %pregion_for_entry.entry.i.i.us - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.r_exit.i.i.us.1, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %32, %if.end.r_exit.i.i.us.1 ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp.i.i.us = icmp sgt i32 %17, %conv.i.i.us - br i1 %cmp.i.i.us, label %for.body.lr.ph.i.i.us, label %if.end.r_exit.i.i.us - -for.body.lr.ph.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %sext.i.i.us = shl i64 %add1.i.i.i.us, 32 - %idxprom7.i.i.us = ashr exact i64 %sext.i.i.us, 32 - %arrayidx8.i.i.us = getelementptr inbounds float, float* %10, i64 %idxprom7.i.i.us - %.pre.i.i1.us4 = load float, float* %arrayidx8.i.i.us, align 4, !tbaa !12 - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %for.body.lr.ph.i.i.us - %indvars.iv.next.i.i3.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %for.body.lr.ph.i.i.us ] - %19 = phi float [ %24, %for.body.i.i.us ], [ %.pre.i.i1.us4, %for.body.lr.ph.i.i.us ] - %20 = mul nuw nsw i64 %indvars.iv.next.i.i3.us, %18 - %21 = add nsw i64 %20, %idxprom7.i.i.us - %arrayidx.i.i.us = getelementptr inbounds float, float* %7, i64 %21 - %22 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %arrayidx5.i.i.us = getelementptr inbounds float, float* %13, i64 %indvars.iv.next.i.i3.us - %23 = load float, float* %arrayidx5.i.i.us, align 4, !tbaa !12 - %24 = tail call float @llvm.fmuladd.f32(float %22, float %23, float %19) #2 - store float %24, float* %arrayidx8.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i3.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %18 - br i1 %exitcond.not.i.i.us, label %if.end.r_exit.i.i.us.loopexit, label %for.body.i.i.us, !llvm.loop !18 - -if.end.r_exit.i.i.us.loopexit: ; preds = %for.body.i.i.us - br label %if.end.r_exit.i.i.us - -if.end.r_exit.i.i.us: ; preds = %if.end.r_exit.i.i.us.loopexit, %pregion_for_entry.entry.i.i.us - %25 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1 = add nuw nsw i64 %25, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp.i.i.us.1 = icmp sgt i32 %17, %conv.i.i.us.1 - br i1 %cmp.i.i.us.1, label %for.body.lr.ph.i.i.us.1, label %if.end.r_exit.i.i.us.1 - -_pocl_kernel_mvt_kernel2.exit.loopexit: ; preds = %if.end.r_exit.i.i.us.1 - br label %_pocl_kernel_mvt_kernel2.exit - -_pocl_kernel_mvt_kernel2.exit: ; preds = %_pocl_kernel_mvt_kernel2.exit.loopexit, %5 - ret void - -for.body.lr.ph.i.i.us.1: ; preds = %if.end.r_exit.i.i.us - %sext.i.i.us.1 = shl i64 %add1.i.i.i.us.1, 32 - %idxprom7.i.i.us.1 = ashr exact i64 %sext.i.i.us.1, 32 - %arrayidx8.i.i.us.1 = getelementptr inbounds float, float* %10, i64 %idxprom7.i.i.us.1 - %.pre.i.i1.us4.1 = load float, float* %arrayidx8.i.i.us.1, align 4, !tbaa !12 - br label %for.body.i.i.us.1 - -for.body.i.i.us.1: ; preds = %for.body.i.i.us.1, %for.body.lr.ph.i.i.us.1 - %indvars.iv.next.i.i3.us.1 = phi i64 [ %indvars.iv.next.i.i.us.1, %for.body.i.i.us.1 ], [ 0, %for.body.lr.ph.i.i.us.1 ] - %26 = phi float [ %31, %for.body.i.i.us.1 ], [ %.pre.i.i1.us4.1, %for.body.lr.ph.i.i.us.1 ] - %27 = mul nuw nsw i64 %indvars.iv.next.i.i3.us.1, %18 - %28 = add nsw i64 %27, %idxprom7.i.i.us.1 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %7, i64 %28 - %29 = load float, float* %arrayidx.i.i.us.1, align 4, !tbaa !12 - %arrayidx5.i.i.us.1 = getelementptr inbounds float, float* %13, i64 %indvars.iv.next.i.i3.us.1 - %30 = load float, float* %arrayidx5.i.i.us.1, align 4, !tbaa !12 - %31 = tail call float @llvm.fmuladd.f32(float %29, float %30, float %26) #2 - store float %31, float* %arrayidx8.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.1, 1 - %exitcond.not.i.i.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.1, %18 - br i1 %exitcond.not.i.i.us.1, label %if.end.r_exit.i.i.us.1.loopexit, label %for.body.i.i.us.1, !llvm.loop !18 - -if.end.r_exit.i.i.us.1.loopexit: ; preds = %for.body.i.i.us.1 - br label %if.end.r_exit.i.i.us.1 - -if.end.r_exit.i.i.us.1: ; preds = %if.end.r_exit.i.i.us.1.loopexit, %if.end.r_exit.i.i.us - %32 = add nuw nsw i64 %_local_id_x.i.0.us, 2 - %exitcond.not.1 = icmp eq i64 %32, 32 - br i1 %exitcond.not.1, label %_pocl_kernel_mvt_kernel2.exit.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !20 -} - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0} -!6 = !{!"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"int"} -!9 = !{!"", !"", !"", !""} -!10 = !{!"a", !"x2", !"y2", !"n"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17} -!17 = distinct !{} -!18 = distinct !{!18, !19} -!19 = !{!"llvm.loop.unroll.disable"} -!20 = distinct !{!20, !21} -!21 = !{!"llvm.loop.parallel_accesses", !17} diff --git a/pocl_irs/syr2k.ll b/pocl_irs/syr2k.ll deleted file mode 100644 index 5a3e4ca..0000000 --- a/pocl_irs/syr2k.ll +++ /dev/null @@ -1,348 +0,0 @@ -; ModuleID = './GJ/HELFLMCCOOHFANMDMHGFFJGEBKLIOJDJFCKLI/syr2k_kernel/32-8-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_syr2k_kernel(float* nocapture readonly %0, float* nocapture readonly %1, float* nocapture %2, float %3, float %4, i32 %5, i32 %6, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %7, i64 %8, i64 %9, i64 %10) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %8, 5 - %mul3.i.i = shl i64 %9, 3 - %cmp761.i = icmp sgt i32 %5, 0 - %wide.trip.count.i = zext i32 %5 to i64 - br label %pregion_for_entry.pregion_for_init.i - -pregion_for_entry.pregion_for_init.i: ; preds = %pregion_for_end.i, %11 - %_local_id_y.0 = phi i64 [ 0, %11 ], [ %23, %pregion_for_end.i ] - %add6.i.i = add nuw nsw i64 %_local_id_y.0, %mul3.i.i - %conv2.i = trunc i64 %add6.i.i to i32 - %cmp.i = icmp slt i32 %conv2.i, %6 - %mul.i = mul nsw i32 %conv2.i, %6 - %mul9.i = mul nsw i32 %conv2.i, %5 - %12 = sext i32 %mul9.i to i64 - br i1 %cmp.i, label %pregion_for_entry.entry.i.us.preheader, label %pregion_for_end.i - -pregion_for_entry.entry.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i - br label %pregion_for_entry.entry.i.us - -pregion_for_entry.entry.i.us: ; preds = %if.end.i.us, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ %22, %if.end.i.us ], [ 0, %pregion_for_entry.entry.i.us.preheader ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp4.i.us = icmp slt i32 %conv.i.us, %6 - br i1 %cmp4.i.us, label %if.then.i.us, label %if.end.i.us - -if.then.i.us: ; preds = %pregion_for_entry.entry.i.us - %add.i.us = add nsw i32 %mul.i, %conv.i.us - %idxprom.i.us = sext i32 %add.i.us to i64 - %arrayidx.i.us = getelementptr inbounds float, float* %2, i64 %idxprom.i.us - %13 = load float, float* %arrayidx.i.us, align 4, !tbaa !12 - %mul6.i.us = fmul float %13, %4 - store float %mul6.i.us, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br i1 %cmp761.i, label %for.body.lr.ph.i.us, label %if.end.i.us - -for.body.lr.ph.i.us: ; preds = %if.then.i.us - %mul14.i.us = mul nsw i32 %conv.i.us, %5 - %14 = sext i32 %mul14.i.us to i64 - br label %for.body.i.us - -for.body.i.us: ; preds = %for.body.i.us, %for.body.lr.ph.i.us - %indvars.iv.next.i4.us = phi i64 [ %indvars.iv.next.i.us, %for.body.i.us ], [ 0, %for.body.lr.ph.i.us ] - %add33.i2.us = phi float [ %add33.i.us, %for.body.i.us ], [ %mul6.i.us, %for.body.lr.ph.i.us ] - %15 = add nsw i64 %indvars.iv.next.i4.us, %12 - %arrayidx12.i.us = getelementptr inbounds float, float* %0, i64 %15 - %16 = load float, float* %arrayidx12.i.us, align 4, !tbaa !12 - %mul13.i.us = fmul float %16, %3 - %17 = add nsw i64 %indvars.iv.next.i4.us, %14 - %arrayidx17.i.us = getelementptr inbounds float, float* %1, i64 %17 - %18 = load float, float* %arrayidx17.i.us, align 4, !tbaa !12 - %arrayidx22.i.us = getelementptr inbounds float, float* %1, i64 %15 - %19 = load float, float* %arrayidx22.i.us, align 4, !tbaa !12 - %mul23.i.us = fmul float %19, %3 - %arrayidx27.i.us = getelementptr inbounds float, float* %0, i64 %17 - %20 = load float, float* %arrayidx27.i.us, align 4, !tbaa !12 - %mul28.i.us = fmul float %mul23.i.us, %20 - %21 = tail call float @llvm.fmuladd.f32(float %mul13.i.us, float %18, float %mul28.i.us) #2 - %add33.i.us = fadd float %add33.i2.us, %21 - store float %add33.i.us, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us = add nuw nsw i64 %indvars.iv.next.i4.us, 1 - %exitcond.not.i.us = icmp eq i64 %indvars.iv.next.i.us, %wide.trip.count.i - br i1 %exitcond.not.i.us, label %if.end.i.us.loopexit, label %for.body.i.us, !llvm.loop !19 - -if.end.i.us.loopexit: ; preds = %for.body.i.us - br label %if.end.i.us - -if.end.i.us: ; preds = %if.end.i.us.loopexit, %if.then.i.us, %pregion_for_entry.entry.i.us - %22 = add nuw nsw i64 %_local_id_x.0.us, 1 - %exitcond.not = icmp eq i64 %22, 32 - br i1 %exitcond.not, label %pregion_for_end.i.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !21 - -pregion_for_end.i.loopexit: ; preds = %if.end.i.us - br label %pregion_for_end.i - -pregion_for_end.i: ; preds = %pregion_for_end.i.loopexit, %pregion_for_entry.pregion_for_init.i - %23 = add nuw nsw i64 %_local_id_y.0, 1 - %exitcond6.not = icmp eq i64 %23, 8 - br i1 %exitcond6.not, label %syr2k_kernel.exit, label %pregion_for_entry.pregion_for_init.i, !llvm.loop !23 - -syr2k_kernel.exit: ; preds = %pregion_for_end.i - ret void -} - -; Function Attrs: nounwind -define void @_pocl_kernel_syr2k_kernel_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to float*** - %15 = load float**, float*** %14, align 8 - %16 = load float*, float** %15, align 8 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to float** - %19 = load float*, float** %18, align 8 - %20 = load float, float* %19, align 4 - %21 = getelementptr i8*, i8** %0, i64 4 - %22 = bitcast i8** %21 to float** - %23 = load float*, float** %22, align 8 - %24 = load float, float* %23, align 4 - %25 = getelementptr i8*, i8** %0, i64 5 - %26 = bitcast i8** %25 to i32** - %27 = load i32*, i32** %26, align 8 - %28 = load i32, i32* %27, align 4 - %29 = getelementptr i8*, i8** %0, i64 6 - %30 = bitcast i8** %29 to i32** - %31 = load i32*, i32** %30, align 8 - %32 = load i32, i32* %31, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %cmp761.i.i = icmp sgt i32 %28, 0 - %wide.trip.count.i.i = zext i32 %28 to i64 - br label %pregion_for_entry.pregion_for_init.i.i - -pregion_for_entry.pregion_for_init.i.i: ; preds = %pregion_for_end.i.i, %5 - %_local_id_y.i.0 = phi i64 [ 0, %5 ], [ %44, %pregion_for_end.i.i ] - %add6.i.i.i = add nuw nsw i64 %_local_id_y.i.0, %mul3.i.i.i - %conv2.i.i = trunc i64 %add6.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %32, %conv2.i.i - %mul.i.i = mul nsw i32 %32, %conv2.i.i - %mul9.i.i = mul nsw i32 %28, %conv2.i.i - %33 = sext i32 %mul9.i.i to i64 - br i1 %cmp.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i - br label %pregion_for_entry.entry.i.i.us - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %43, %if.end.i.i.us ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp4.i.i.us = icmp sgt i32 %32, %conv.i.i.us - br i1 %cmp4.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %add.i.i.us = add nsw i32 %mul.i.i, %conv.i.i.us - %idxprom.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %16, i64 %idxprom.i.i.us - %34 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %mul6.i.i.us = fmul float %24, %34 - store float %mul6.i.i.us, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br i1 %cmp761.i.i, label %for.body.lr.ph.i.i.us, label %if.end.i.i.us - -for.body.lr.ph.i.i.us: ; preds = %if.then.i.i.us - %mul14.i.i.us = mul nsw i32 %28, %conv.i.i.us - %35 = sext i32 %mul14.i.i.us to i64 - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %for.body.lr.ph.i.i.us - %indvars.iv.next.i.i4.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %for.body.lr.ph.i.i.us ] - %add33.i.i2.us = phi float [ %add33.i.i.us, %for.body.i.i.us ], [ %mul6.i.i.us, %for.body.lr.ph.i.i.us ] - %36 = add nsw i64 %indvars.iv.next.i.i4.us, %33 - %arrayidx12.i.i.us = getelementptr inbounds float, float* %8, i64 %36 - %37 = load float, float* %arrayidx12.i.i.us, align 4, !tbaa !12 - %mul13.i.i.us = fmul float %20, %37 - %38 = add nsw i64 %indvars.iv.next.i.i4.us, %35 - %arrayidx17.i.i.us = getelementptr inbounds float, float* %12, i64 %38 - %39 = load float, float* %arrayidx17.i.i.us, align 4, !tbaa !12 - %arrayidx22.i.i.us = getelementptr inbounds float, float* %12, i64 %36 - %40 = load float, float* %arrayidx22.i.i.us, align 4, !tbaa !12 - %mul23.i.i.us = fmul float %20, %40 - %arrayidx27.i.i.us = getelementptr inbounds float, float* %8, i64 %38 - %41 = load float, float* %arrayidx27.i.i.us, align 4, !tbaa !12 - %mul28.i.i.us = fmul float %mul23.i.i.us, %41 - %42 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us, float %39, float %mul28.i.i.us) #2 - %add33.i.i.us = fadd float %add33.i.i2.us, %42 - store float %add33.i.i.us, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i4.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us, label %if.end.i.i.us.loopexit, label %for.body.i.i.us, !llvm.loop !19 - -if.end.i.i.us.loopexit: ; preds = %for.body.i.i.us - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.end.i.i.us.loopexit, %if.then.i.i.us, %pregion_for_entry.entry.i.i.us - %43 = add nuw nsw i64 %_local_id_x.i.0.us, 1 - %exitcond.not = icmp eq i64 %43, 32 - br i1 %exitcond.not, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !21 - -pregion_for_end.i.i.loopexit: ; preds = %if.end.i.i.us - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %pregion_for_entry.pregion_for_init.i.i - %44 = add nuw nsw i64 %_local_id_y.i.0, 1 - %exitcond6.not = icmp eq i64 %44, 8 - br i1 %exitcond6.not, label %_pocl_kernel_syr2k_kernel.exit, label %pregion_for_entry.pregion_for_init.i.i, !llvm.loop !23 - -_pocl_kernel_syr2k_kernel.exit: ; preds = %pregion_for_end.i.i - ret void -} - -; Function Attrs: nounwind -define void @_pocl_kernel_syr2k_kernel_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to float** - %13 = load float*, float** %12, align 8 - %14 = getelementptr i8*, i8** %0, i64 3 - %15 = bitcast i8** %14 to float** - %16 = load float*, float** %15, align 8 - %17 = load float, float* %16, align 4 - %18 = getelementptr i8*, i8** %0, i64 4 - %19 = bitcast i8** %18 to float** - %20 = load float*, float** %19, align 8 - %21 = load float, float* %20, align 4 - %22 = getelementptr i8*, i8** %0, i64 5 - %23 = bitcast i8** %22 to i32** - %24 = load i32*, i32** %23, align 8 - %25 = load i32, i32* %24, align 4 - %26 = getelementptr i8*, i8** %0, i64 6 - %27 = bitcast i8** %26 to i32** - %28 = load i32*, i32** %27, align 8 - %29 = load i32, i32* %28, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %cmp761.i.i = icmp sgt i32 %25, 0 - %wide.trip.count.i.i = zext i32 %25 to i64 - br label %pregion_for_entry.pregion_for_init.i.i - -pregion_for_entry.pregion_for_init.i.i: ; preds = %pregion_for_end.i.i, %5 - %_local_id_y.i.0 = phi i64 [ 0, %5 ], [ %41, %pregion_for_end.i.i ] - %add6.i.i.i = add nuw nsw i64 %_local_id_y.i.0, %mul3.i.i.i - %conv2.i.i = trunc i64 %add6.i.i.i to i32 - %cmp.i.i = icmp sgt i32 %29, %conv2.i.i - %mul.i.i = mul nsw i32 %29, %conv2.i.i - %mul9.i.i = mul nsw i32 %25, %conv2.i.i - %30 = sext i32 %mul9.i.i to i64 - br i1 %cmp.i.i, label %pregion_for_entry.entry.i.i.us.preheader, label %pregion_for_end.i.i - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i - br label %pregion_for_entry.entry.i.i.us - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.i.i.us, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %40, %if.end.i.i.us ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp4.i.i.us = icmp sgt i32 %29, %conv.i.i.us - br i1 %cmp4.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %add.i.i.us = add nsw i32 %mul.i.i, %conv.i.i.us - %idxprom.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %13, i64 %idxprom.i.i.us - %31 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %mul6.i.i.us = fmul float %21, %31 - store float %mul6.i.i.us, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br i1 %cmp761.i.i, label %for.body.lr.ph.i.i.us, label %if.end.i.i.us - -for.body.lr.ph.i.i.us: ; preds = %if.then.i.i.us - %mul14.i.i.us = mul nsw i32 %25, %conv.i.i.us - %32 = sext i32 %mul14.i.i.us to i64 - br label %for.body.i.i.us - -for.body.i.i.us: ; preds = %for.body.i.i.us, %for.body.lr.ph.i.i.us - %indvars.iv.next.i.i4.us = phi i64 [ %indvars.iv.next.i.i.us, %for.body.i.i.us ], [ 0, %for.body.lr.ph.i.i.us ] - %add33.i.i2.us = phi float [ %add33.i.i.us, %for.body.i.i.us ], [ %mul6.i.i.us, %for.body.lr.ph.i.i.us ] - %33 = add nsw i64 %indvars.iv.next.i.i4.us, %30 - %arrayidx12.i.i.us = getelementptr inbounds float, float* %7, i64 %33 - %34 = load float, float* %arrayidx12.i.i.us, align 4, !tbaa !12 - %mul13.i.i.us = fmul float %17, %34 - %35 = add nsw i64 %indvars.iv.next.i.i4.us, %32 - %arrayidx17.i.i.us = getelementptr inbounds float, float* %10, i64 %35 - %36 = load float, float* %arrayidx17.i.i.us, align 4, !tbaa !12 - %arrayidx22.i.i.us = getelementptr inbounds float, float* %10, i64 %33 - %37 = load float, float* %arrayidx22.i.i.us, align 4, !tbaa !12 - %mul23.i.i.us = fmul float %17, %37 - %arrayidx27.i.i.us = getelementptr inbounds float, float* %7, i64 %35 - %38 = load float, float* %arrayidx27.i.i.us, align 4, !tbaa !12 - %mul28.i.i.us = fmul float %mul23.i.i.us, %38 - %39 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us, float %36, float %mul28.i.i.us) #2 - %add33.i.i.us = fadd float %add33.i.i2.us, %39 - store float %add33.i.i.us, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us = add nuw nsw i64 %indvars.iv.next.i.i4.us, 1 - %exitcond.not.i.i.us = icmp eq i64 %indvars.iv.next.i.i.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us, label %if.end.i.i.us.loopexit, label %for.body.i.i.us, !llvm.loop !19 - -if.end.i.i.us.loopexit: ; preds = %for.body.i.i.us - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.end.i.i.us.loopexit, %if.then.i.i.us, %pregion_for_entry.entry.i.i.us - %40 = add nuw nsw i64 %_local_id_x.i.0.us, 1 - %exitcond.not = icmp eq i64 %40, 32 - br i1 %exitcond.not, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !21 - -pregion_for_end.i.i.loopexit: ; preds = %if.end.i.i.us - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %pregion_for_entry.pregion_for_init.i.i - %41 = add nuw nsw i64 %_local_id_y.i.0, 1 - %exitcond6.not = icmp eq i64 %41, 8 - br i1 %exitcond6.not, label %_pocl_kernel_syr2k_kernel.exit, label %pregion_for_entry.pregion_for_init.i.i, !llvm.loop !23 - -_pocl_kernel_syr2k_kernel.exit: ; preds = %pregion_for_end.i.i - ret void -} - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE", !"DATA_TYPE", !"int", !"int"} -!8 = !{!"float*", !"float*", !"float*", !"float", !"float", !"int", !"int"} -!9 = !{!"", !"", !"", !"", !"", !"", !""} -!10 = !{!"a", !"b", !"c", !"alpha", !"beta", !"ni", !"nj"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17, !18} -!17 = distinct !{} -!18 = distinct !{} -!19 = distinct !{!19, !20} -!20 = !{!"llvm.loop.unroll.disable"} -!21 = distinct !{!21, !22} -!22 = !{!"llvm.loop.parallel_accesses", !17} -!23 = distinct !{!23, !24} -!24 = !{!"llvm.loop.parallel_accesses", !18} diff --git a/pocl_irs/syrk.ll b/pocl_irs/syrk.ll deleted file mode 100644 index 671357a..0000000 --- a/pocl_irs/syrk.ll +++ /dev/null @@ -1,5040 +0,0 @@ -; ModuleID = './KA/BHKEMAGMCBFLCOFGKKELDLCCFAPLFCEIFHLBN/syrk_kernel/32-8-1-goffs0-smallgrid/parallel.bc' -source_filename = "parallel_bc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fmuladd.f32(float, float, float) #0 - -; Function Attrs: alwaysinline nofree norecurse nounwind -define void @_pocl_kernel_syrk_kernel(float* nocapture readonly %0, float* nocapture %1, float %2, float %3, i32 %4, i32 %5, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %6, i64 %7, i64 %8, i64 %9) local_unnamed_addr #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !9 !kernel_arg_name !10 !pocl_generated !11 { - %mul.i.i = shl i64 %7, 5 - %mul3.i.i = shl i64 %8, 3 - %cmp742.i = icmp sgt i32 %4, 0 - %wide.trip.count.i = zext i32 %4 to i64 - %conv2.i.us = trunc i64 %mul3.i.i to i32 - %cmp.i.us = icmp slt i32 %conv2.i.us, %5 - %mul.i.us = mul nsw i32 %conv2.i.us, %5 - br i1 %cmp742.i, label %pregion_for_entry.pregion_for_init.i.us, label %pregion_for_entry.pregion_for_init.i.preheader - -pregion_for_entry.pregion_for_init.i.preheader: ; preds = %10 - br i1 %cmp.i.us, label %vector.scevcheck, label %pregion_for_end.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.preheader - %11 = trunc i64 %8 to i32 - %12 = mul i32 %11, %5 - %13 = shl i32 %12, 3 - %14 = trunc i64 %7 to i32 - %15 = shl i32 %14, 5 - %16 = add i32 %13, %15 - %17 = icmp sgt i32 %16, 2147483616 - br i1 %17, label %pregion_for_entry.entry.i.us.preheader, label %vector.ph - -pregion_for_entry.entry.i.us.preheader: ; preds = %vector.scevcheck - br label %pregion_for_entry.entry.i.us - -vector.ph: ; preds = %vector.scevcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert38 = insertelement <8 x i32> undef, i32 %5, i32 0 - %broadcast.splat39 = shufflevector <8 x i32> %broadcast.splatinsert38, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert40 = insertelement <8 x float> undef, float %3, i32 0 - %broadcast.splat41 = shufflevector <8 x float> %broadcast.splatinsert40, <8 x float> undef, <8 x i32> zeroinitializer - %18 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %19 = or <8 x i32> %18, - %20 = icmp sgt <8 x i32> %broadcast.splat39, %19 - %21 = extractelement <8 x i32> %19, i32 0 - %22 = add nsw i32 %mul.i.us, %21 - %23 = sext i32 %22 to i64 - %24 = getelementptr inbounds float, float* %1, i64 %23 - %25 = bitcast float* %24 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %25, i32 4, <8 x i1> %20, <8 x float> undef), !tbaa !12 - %26 = fmul <8 x float> %wide.masked.load, %broadcast.splat41 - %27 = bitcast float* %24 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %26, <8 x float>* %27, i32 4, <8 x i1> %20), !tbaa !12, !llvm.access.group !16 - %28 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %29 = or <8 x i32> %28, - %30 = icmp sgt <8 x i32> %broadcast.splat39, %29 - %31 = extractelement <8 x i32> %29, i32 0 - %32 = add nsw i32 %mul.i.us, %31 - %33 = sext i32 %32 to i64 - %34 = getelementptr inbounds float, float* %1, i64 %33 - %35 = bitcast float* %34 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %35, i32 4, <8 x i1> %30, <8 x float> undef), !tbaa !12 - %36 = fmul <8 x float> %wide.masked.load.1, %broadcast.splat41 - %37 = bitcast float* %34 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %36, <8 x float>* %37, i32 4, <8 x i1> %30), !tbaa !12, !llvm.access.group !16 - %38 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %39 = or <8 x i32> %38, - %40 = icmp sgt <8 x i32> %broadcast.splat39, %39 - %41 = extractelement <8 x i32> %39, i32 0 - %42 = add nsw i32 %mul.i.us, %41 - %43 = sext i32 %42 to i64 - %44 = getelementptr inbounds float, float* %1, i64 %43 - %45 = bitcast float* %44 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %45, i32 4, <8 x i1> %40, <8 x float> undef), !tbaa !12 - %46 = fmul <8 x float> %wide.masked.load.2, %broadcast.splat41 - %47 = bitcast float* %44 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %46, <8 x float>* %47, i32 4, <8 x i1> %40), !tbaa !12, !llvm.access.group !16 - %48 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %49 = or <8 x i32> %48, - %50 = icmp sgt <8 x i32> %broadcast.splat39, %49 - %51 = extractelement <8 x i32> %49, i32 0 - %52 = add nsw i32 %mul.i.us, %51 - %53 = sext i32 %52 to i64 - %54 = getelementptr inbounds float, float* %1, i64 %53 - %55 = bitcast float* %54 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %55, i32 4, <8 x i1> %50, <8 x float> undef), !tbaa !12 - %56 = fmul <8 x float> %wide.masked.load.3, %broadcast.splat41 - %57 = bitcast float* %54 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %56, <8 x float>* %57, i32 4, <8 x i1> %50), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i - -pregion_for_entry.pregion_for_init.i.us: ; preds = %10 - %mul9.i.us = mul nsw i32 %conv2.i.us, %4 - %58 = sext i32 %mul9.i.us to i64 - br i1 %cmp.i.us, label %pregion_for_entry.entry.i.us.us.preheader, label %pregion_for_end.i.us - -pregion_for_entry.entry.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.us - br label %pregion_for_entry.entry.i.us.us - -pregion_for_end.i.us.loopexit: ; preds = %if.end.i.us.us - br label %pregion_for_end.i.us - -pregion_for_end.i.us: ; preds = %pregion_for_end.i.us.loopexit, %pregion_for_entry.pregion_for_init.i.us - %59 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.1 = or i32 %59, 1 - %cmp.i.us.1 = icmp slt i32 %conv2.i.us.1, %5 - %mul.i.us.1 = mul nsw i32 %conv2.i.us.1, %5 - %mul9.i.us.1 = mul nsw i32 %conv2.i.us.1, %4 - %60 = sext i32 %mul9.i.us.1 to i64 - br i1 %cmp.i.us.1, label %pregion_for_entry.entry.i.us.us.1.preheader, label %pregion_for_end.i.us.1 - -pregion_for_entry.entry.i.us.us.1.preheader: ; preds = %pregion_for_end.i.us - br label %pregion_for_entry.entry.i.us.us.1 - -pregion_for_entry.entry.i.us.us: ; preds = %if.end.i.us.us, %pregion_for_entry.entry.i.us.us.preheader - %_local_id_x.0.us.us = phi i64 [ %63, %if.end.i.us.us ], [ 0, %pregion_for_entry.entry.i.us.us.preheader ] - %add1.i.i.us.us = add nuw nsw i64 %_local_id_x.0.us.us, %mul.i.i - %conv.i.us.us = trunc i64 %add1.i.i.us.us to i32 - %cmp4.i.us.us = icmp slt i32 %conv.i.us.us, %5 - br i1 %cmp4.i.us.us, label %if.then.i.us.us, label %if.end.i.us.us - -if.then.i.us.us: ; preds = %pregion_for_entry.entry.i.us.us - %add.i.us.us = add nsw i32 %mul.i.us, %conv.i.us.us - %idxprom.i.us.us = sext i32 %add.i.us.us to i64 - %arrayidx.i.us.us = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us - %61 = load float, float* %arrayidx.i.us.us, align 4, !tbaa !12 - %mul6.i.us.us = fmul float %61, %3 - store float %mul6.i.us.us, float* %arrayidx.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %mul14.i.us.us = mul nsw i32 %conv.i.us.us, %4 - %62 = sext i32 %mul14.i.us.us to i64 - br label %for.body.i.us.us - -if.end.i.us.us.loopexit: ; preds = %for.body.i.us.us - br label %if.end.i.us.us - -if.end.i.us.us: ; preds = %if.end.i.us.us.loopexit, %pregion_for_entry.entry.i.us.us - %63 = add nuw nsw i64 %_local_id_x.0.us.us, 1 - %exitcond.not = icmp eq i64 %63, 32 - br i1 %exitcond.not, label %pregion_for_end.i.us.loopexit, label %pregion_for_entry.entry.i.us.us, !llvm.loop !19 - -for.body.i.us.us: ; preds = %for.body.i.us.us, %if.then.i.us.us - %indvars.iv.next.i3.us.us = phi i64 [ %indvars.iv.next.i.us.us, %for.body.i.us.us ], [ 0, %if.then.i.us.us ] - %64 = phi float [ %69, %for.body.i.us.us ], [ %mul6.i.us.us, %if.then.i.us.us ] - %65 = add nsw i64 %indvars.iv.next.i3.us.us, %58 - %arrayidx12.i.us.us = getelementptr inbounds float, float* %0, i64 %65 - %66 = load float, float* %arrayidx12.i.us.us, align 4, !tbaa !12 - %mul13.i.us.us = fmul float %66, %2 - %67 = add nsw i64 %indvars.iv.next.i3.us.us, %62 - %arrayidx17.i.us.us = getelementptr inbounds float, float* %0, i64 %67 - %68 = load float, float* %arrayidx17.i.us.us, align 4, !tbaa !12 - %69 = tail call float @llvm.fmuladd.f32(float %mul13.i.us.us, float %68, float %64) #2 - store float %69, float* %arrayidx.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us = add nuw nsw i64 %indvars.iv.next.i3.us.us, 1 - %exitcond.not.i.us.us = icmp eq i64 %indvars.iv.next.i.us.us, %wide.trip.count.i - br i1 %exitcond.not.i.us.us, label %if.end.i.us.us.loopexit, label %for.body.i.us.us, !llvm.loop !21 - -pregion_for_entry.entry.i.us: ; preds = %if.end.i.us.3237, %pregion_for_entry.entry.i.us.preheader - %_local_id_x.0.us = phi i64 [ %530, %if.end.i.us.3237 ], [ 0, %pregion_for_entry.entry.i.us.preheader ] - %add1.i.i.us = add nuw nsw i64 %_local_id_x.0.us, %mul.i.i - %conv.i.us = trunc i64 %add1.i.i.us to i32 - %cmp4.i.us = icmp slt i32 %conv.i.us, %5 - br i1 %cmp4.i.us, label %if.then.i.us, label %if.end.i.us - -if.then.i.us: ; preds = %pregion_for_entry.entry.i.us - %add.i.us = add nsw i32 %mul.i.us, %conv.i.us - %idxprom.i.us = sext i32 %add.i.us to i64 - %arrayidx.i.us = getelementptr inbounds float, float* %1, i64 %idxprom.i.us - %70 = load float, float* %arrayidx.i.us, align 4, !tbaa !12 - %mul6.i.us = fmul float %70, %3 - store float %mul6.i.us, float* %arrayidx.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us - -if.end.i.us: ; preds = %if.then.i.us, %pregion_for_entry.entry.i.us - %71 = or i64 %_local_id_x.0.us, 1 - %add1.i.i.us.1206 = add nuw nsw i64 %71, %mul.i.i - %conv.i.us.1207 = trunc i64 %add1.i.i.us.1206 to i32 - %cmp4.i.us.1208 = icmp slt i32 %conv.i.us.1207, %5 - br i1 %cmp4.i.us.1208, label %if.then.i.us.1214, label %if.end.i.us.1215 - -pregion_for_end.i.loopexit: ; preds = %if.end.i.us.3237 - br label %pregion_for_end.i - -pregion_for_end.i: ; preds = %pregion_for_end.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i.preheader - %72 = trunc i64 %mul3.i.i to i32 - %conv2.i.1 = or i32 %72, 1 - %cmp.i.1 = icmp slt i32 %conv2.i.1, %5 - %mul.i.1 = mul nsw i32 %conv2.i.1, %5 - br i1 %cmp.i.1, label %vector.scevcheck49, label %pregion_for_end.i.1 - -vector.scevcheck49: ; preds = %pregion_for_end.i - %73 = mul i32 %conv2.i.1, %5 - %74 = trunc i64 %7 to i32 - %75 = shl i32 %74, 5 - %76 = add i32 %73, %75 - %77 = icmp sgt i32 %76, 2147483616 - br i1 %77, label %pregion_for_entry.entry.i.us.1.preheader, label %vector.ph50 - -pregion_for_entry.entry.i.us.1.preheader: ; preds = %vector.scevcheck49 - br label %pregion_for_entry.entry.i.us.1 - -vector.ph50: ; preds = %vector.scevcheck49 - %broadcast.splatinsert57 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat58 = shufflevector <8 x i64> %broadcast.splatinsert57, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert59 = insertelement <8 x i32> undef, i32 %5, i32 0 - %broadcast.splat60 = shufflevector <8 x i32> %broadcast.splatinsert59, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert62 = insertelement <8 x float> undef, float %3, i32 0 - %broadcast.splat63 = shufflevector <8 x float> %broadcast.splatinsert62, <8 x float> undef, <8 x i32> zeroinitializer - %78 = trunc <8 x i64> %broadcast.splat58 to <8 x i32> - %79 = or <8 x i32> %78, - %80 = icmp sgt <8 x i32> %broadcast.splat60, %79 - %81 = extractelement <8 x i32> %79, i32 0 - %82 = add nsw i32 %mul.i.1, %81 - %83 = sext i32 %82 to i64 - %84 = getelementptr inbounds float, float* %1, i64 %83 - %85 = bitcast float* %84 to <8 x float>* - %wide.masked.load61 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %85, i32 4, <8 x i1> %80, <8 x float> undef), !tbaa !12 - %86 = fmul <8 x float> %wide.masked.load61, %broadcast.splat63 - %87 = bitcast float* %84 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %86, <8 x float>* %87, i32 4, <8 x i1> %80), !tbaa !12, !llvm.access.group !16 - %88 = trunc <8 x i64> %broadcast.splat58 to <8 x i32> - %89 = or <8 x i32> %88, - %90 = icmp sgt <8 x i32> %broadcast.splat60, %89 - %91 = extractelement <8 x i32> %89, i32 0 - %92 = add nsw i32 %mul.i.1, %91 - %93 = sext i32 %92 to i64 - %94 = getelementptr inbounds float, float* %1, i64 %93 - %95 = bitcast float* %94 to <8 x float>* - %wide.masked.load61.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %95, i32 4, <8 x i1> %90, <8 x float> undef), !tbaa !12 - %96 = fmul <8 x float> %wide.masked.load61.1, %broadcast.splat63 - %97 = bitcast float* %94 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %96, <8 x float>* %97, i32 4, <8 x i1> %90), !tbaa !12, !llvm.access.group !16 - %98 = trunc <8 x i64> %broadcast.splat58 to <8 x i32> - %99 = or <8 x i32> %98, - %100 = icmp sgt <8 x i32> %broadcast.splat60, %99 - %101 = extractelement <8 x i32> %99, i32 0 - %102 = add nsw i32 %mul.i.1, %101 - %103 = sext i32 %102 to i64 - %104 = getelementptr inbounds float, float* %1, i64 %103 - %105 = bitcast float* %104 to <8 x float>* - %wide.masked.load61.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %105, i32 4, <8 x i1> %100, <8 x float> undef), !tbaa !12 - %106 = fmul <8 x float> %wide.masked.load61.2, %broadcast.splat63 - %107 = bitcast float* %104 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %106, <8 x float>* %107, i32 4, <8 x i1> %100), !tbaa !12, !llvm.access.group !16 - %108 = trunc <8 x i64> %broadcast.splat58 to <8 x i32> - %109 = or <8 x i32> %108, - %110 = icmp sgt <8 x i32> %broadcast.splat60, %109 - %111 = extractelement <8 x i32> %109, i32 0 - %112 = add nsw i32 %mul.i.1, %111 - %113 = sext i32 %112 to i64 - %114 = getelementptr inbounds float, float* %1, i64 %113 - %115 = bitcast float* %114 to <8 x float>* - %wide.masked.load61.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %115, i32 4, <8 x i1> %110, <8 x float> undef), !tbaa !12 - %116 = fmul <8 x float> %wide.masked.load61.3, %broadcast.splat63 - %117 = bitcast float* %114 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %116, <8 x float>* %117, i32 4, <8 x i1> %110), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.1 - -syrk_kernel.exit.loopexit: ; preds = %if.end.i.us.us.7 - br label %syrk_kernel.exit - -syrk_kernel.exit.loopexit238: ; preds = %if.end.i.us.7.3 - br label %syrk_kernel.exit - -syrk_kernel.exit: ; preds = %pregion_for_end.i.us.6, %vector.ph182, %pregion_for_end.i.6, %syrk_kernel.exit.loopexit238, %syrk_kernel.exit.loopexit - ret void - -pregion_for_entry.entry.i.us.1: ; preds = %if.end.i.us.1.3, %pregion_for_entry.entry.i.us.1.preheader - %_local_id_x.0.us.1 = phi i64 [ %524, %if.end.i.us.1.3 ], [ 0, %pregion_for_entry.entry.i.us.1.preheader ] - %add1.i.i.us.1 = add nuw nsw i64 %_local_id_x.0.us.1, %mul.i.i - %conv.i.us.1 = trunc i64 %add1.i.i.us.1 to i32 - %cmp4.i.us.1 = icmp slt i32 %conv.i.us.1, %5 - br i1 %cmp4.i.us.1, label %if.then.i.us.1, label %if.end.i.us.1 - -if.then.i.us.1: ; preds = %pregion_for_entry.entry.i.us.1 - %add.i.us.1 = add nsw i32 %mul.i.1, %conv.i.us.1 - %idxprom.i.us.1 = sext i32 %add.i.us.1 to i64 - %arrayidx.i.us.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.1 - %118 = load float, float* %arrayidx.i.us.1, align 4, !tbaa !12 - %mul6.i.us.1 = fmul float %118, %3 - store float %mul6.i.us.1, float* %arrayidx.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.1 - -if.end.i.us.1: ; preds = %if.then.i.us.1, %pregion_for_entry.entry.i.us.1 - %119 = or i64 %_local_id_x.0.us.1, 1 - %add1.i.i.us.1.1 = add nuw nsw i64 %119, %mul.i.i - %conv.i.us.1.1 = trunc i64 %add1.i.i.us.1.1 to i32 - %cmp4.i.us.1.1 = icmp slt i32 %conv.i.us.1.1, %5 - br i1 %cmp4.i.us.1.1, label %if.then.i.us.1.1, label %if.end.i.us.1.1 - -pregion_for_end.i.1.loopexit: ; preds = %if.end.i.us.1.3 - br label %pregion_for_end.i.1 - -pregion_for_end.i.1: ; preds = %pregion_for_end.i.1.loopexit, %vector.ph50, %pregion_for_end.i - %120 = trunc i64 %mul3.i.i to i32 - %conv2.i.2 = or i32 %120, 2 - %cmp.i.2 = icmp slt i32 %conv2.i.2, %5 - %mul.i.2 = mul nsw i32 %conv2.i.2, %5 - br i1 %cmp.i.2, label %vector.scevcheck71, label %pregion_for_end.i.2 - -vector.scevcheck71: ; preds = %pregion_for_end.i.1 - %121 = mul i32 %conv2.i.2, %5 - %122 = trunc i64 %7 to i32 - %123 = shl i32 %122, 5 - %124 = add i32 %121, %123 - %125 = icmp sgt i32 %124, 2147483616 - br i1 %125, label %pregion_for_entry.entry.i.us.2.preheader, label %vector.ph72 - -pregion_for_entry.entry.i.us.2.preheader: ; preds = %vector.scevcheck71 - br label %pregion_for_entry.entry.i.us.2 - -vector.ph72: ; preds = %vector.scevcheck71 - %broadcast.splatinsert79 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat80 = shufflevector <8 x i64> %broadcast.splatinsert79, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert81 = insertelement <8 x i32> undef, i32 %5, i32 0 - %broadcast.splat82 = shufflevector <8 x i32> %broadcast.splatinsert81, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert84 = insertelement <8 x float> undef, float %3, i32 0 - %broadcast.splat85 = shufflevector <8 x float> %broadcast.splatinsert84, <8 x float> undef, <8 x i32> zeroinitializer - %126 = trunc <8 x i64> %broadcast.splat80 to <8 x i32> - %127 = or <8 x i32> %126, - %128 = icmp sgt <8 x i32> %broadcast.splat82, %127 - %129 = extractelement <8 x i32> %127, i32 0 - %130 = add nsw i32 %mul.i.2, %129 - %131 = sext i32 %130 to i64 - %132 = getelementptr inbounds float, float* %1, i64 %131 - %133 = bitcast float* %132 to <8 x float>* - %wide.masked.load83 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %133, i32 4, <8 x i1> %128, <8 x float> undef), !tbaa !12 - %134 = fmul <8 x float> %wide.masked.load83, %broadcast.splat85 - %135 = bitcast float* %132 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %134, <8 x float>* %135, i32 4, <8 x i1> %128), !tbaa !12, !llvm.access.group !16 - %136 = trunc <8 x i64> %broadcast.splat80 to <8 x i32> - %137 = or <8 x i32> %136, - %138 = icmp sgt <8 x i32> %broadcast.splat82, %137 - %139 = extractelement <8 x i32> %137, i32 0 - %140 = add nsw i32 %mul.i.2, %139 - %141 = sext i32 %140 to i64 - %142 = getelementptr inbounds float, float* %1, i64 %141 - %143 = bitcast float* %142 to <8 x float>* - %wide.masked.load83.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %143, i32 4, <8 x i1> %138, <8 x float> undef), !tbaa !12 - %144 = fmul <8 x float> %wide.masked.load83.1, %broadcast.splat85 - %145 = bitcast float* %142 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %144, <8 x float>* %145, i32 4, <8 x i1> %138), !tbaa !12, !llvm.access.group !16 - %146 = trunc <8 x i64> %broadcast.splat80 to <8 x i32> - %147 = or <8 x i32> %146, - %148 = icmp sgt <8 x i32> %broadcast.splat82, %147 - %149 = extractelement <8 x i32> %147, i32 0 - %150 = add nsw i32 %mul.i.2, %149 - %151 = sext i32 %150 to i64 - %152 = getelementptr inbounds float, float* %1, i64 %151 - %153 = bitcast float* %152 to <8 x float>* - %wide.masked.load83.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %153, i32 4, <8 x i1> %148, <8 x float> undef), !tbaa !12 - %154 = fmul <8 x float> %wide.masked.load83.2, %broadcast.splat85 - %155 = bitcast float* %152 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %154, <8 x float>* %155, i32 4, <8 x i1> %148), !tbaa !12, !llvm.access.group !16 - %156 = trunc <8 x i64> %broadcast.splat80 to <8 x i32> - %157 = or <8 x i32> %156, - %158 = icmp sgt <8 x i32> %broadcast.splat82, %157 - %159 = extractelement <8 x i32> %157, i32 0 - %160 = add nsw i32 %mul.i.2, %159 - %161 = sext i32 %160 to i64 - %162 = getelementptr inbounds float, float* %1, i64 %161 - %163 = bitcast float* %162 to <8 x float>* - %wide.masked.load83.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %163, i32 4, <8 x i1> %158, <8 x float> undef), !tbaa !12 - %164 = fmul <8 x float> %wide.masked.load83.3, %broadcast.splat85 - %165 = bitcast float* %162 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %164, <8 x float>* %165, i32 4, <8 x i1> %158), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.2 - -pregion_for_entry.entry.i.us.2: ; preds = %if.end.i.us.2.3, %pregion_for_entry.entry.i.us.2.preheader - %_local_id_x.0.us.2 = phi i64 [ %518, %if.end.i.us.2.3 ], [ 0, %pregion_for_entry.entry.i.us.2.preheader ] - %add1.i.i.us.2 = add nuw nsw i64 %_local_id_x.0.us.2, %mul.i.i - %conv.i.us.2 = trunc i64 %add1.i.i.us.2 to i32 - %cmp4.i.us.2 = icmp slt i32 %conv.i.us.2, %5 - br i1 %cmp4.i.us.2, label %if.then.i.us.2, label %if.end.i.us.2 - -if.then.i.us.2: ; preds = %pregion_for_entry.entry.i.us.2 - %add.i.us.2 = add nsw i32 %mul.i.2, %conv.i.us.2 - %idxprom.i.us.2 = sext i32 %add.i.us.2 to i64 - %arrayidx.i.us.2 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.2 - %166 = load float, float* %arrayidx.i.us.2, align 4, !tbaa !12 - %mul6.i.us.2 = fmul float %166, %3 - store float %mul6.i.us.2, float* %arrayidx.i.us.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.2 - -if.end.i.us.2: ; preds = %if.then.i.us.2, %pregion_for_entry.entry.i.us.2 - %167 = or i64 %_local_id_x.0.us.2, 1 - %add1.i.i.us.2.1 = add nuw nsw i64 %167, %mul.i.i - %conv.i.us.2.1 = trunc i64 %add1.i.i.us.2.1 to i32 - %cmp4.i.us.2.1 = icmp slt i32 %conv.i.us.2.1, %5 - br i1 %cmp4.i.us.2.1, label %if.then.i.us.2.1, label %if.end.i.us.2.1 - -pregion_for_end.i.2.loopexit: ; preds = %if.end.i.us.2.3 - br label %pregion_for_end.i.2 - -pregion_for_end.i.2: ; preds = %pregion_for_end.i.2.loopexit, %vector.ph72, %pregion_for_end.i.1 - %168 = trunc i64 %mul3.i.i to i32 - %conv2.i.3 = or i32 %168, 3 - %cmp.i.3 = icmp slt i32 %conv2.i.3, %5 - %mul.i.3 = mul nsw i32 %conv2.i.3, %5 - br i1 %cmp.i.3, label %vector.scevcheck93, label %pregion_for_end.i.3 - -vector.scevcheck93: ; preds = %pregion_for_end.i.2 - %169 = mul i32 %conv2.i.3, %5 - %170 = trunc i64 %7 to i32 - %171 = shl i32 %170, 5 - %172 = add i32 %169, %171 - %173 = icmp sgt i32 %172, 2147483616 - br i1 %173, label %pregion_for_entry.entry.i.us.3.preheader, label %vector.ph94 - -pregion_for_entry.entry.i.us.3.preheader: ; preds = %vector.scevcheck93 - br label %pregion_for_entry.entry.i.us.3 - -vector.ph94: ; preds = %vector.scevcheck93 - %broadcast.splatinsert101 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat102 = shufflevector <8 x i64> %broadcast.splatinsert101, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert103 = insertelement <8 x i32> undef, i32 %5, i32 0 - %broadcast.splat104 = shufflevector <8 x i32> %broadcast.splatinsert103, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert106 = insertelement <8 x float> undef, float %3, i32 0 - %broadcast.splat107 = shufflevector <8 x float> %broadcast.splatinsert106, <8 x float> undef, <8 x i32> zeroinitializer - %174 = trunc <8 x i64> %broadcast.splat102 to <8 x i32> - %175 = or <8 x i32> %174, - %176 = icmp sgt <8 x i32> %broadcast.splat104, %175 - %177 = extractelement <8 x i32> %175, i32 0 - %178 = add nsw i32 %mul.i.3, %177 - %179 = sext i32 %178 to i64 - %180 = getelementptr inbounds float, float* %1, i64 %179 - %181 = bitcast float* %180 to <8 x float>* - %wide.masked.load105 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %181, i32 4, <8 x i1> %176, <8 x float> undef), !tbaa !12 - %182 = fmul <8 x float> %wide.masked.load105, %broadcast.splat107 - %183 = bitcast float* %180 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %182, <8 x float>* %183, i32 4, <8 x i1> %176), !tbaa !12, !llvm.access.group !16 - %184 = trunc <8 x i64> %broadcast.splat102 to <8 x i32> - %185 = or <8 x i32> %184, - %186 = icmp sgt <8 x i32> %broadcast.splat104, %185 - %187 = extractelement <8 x i32> %185, i32 0 - %188 = add nsw i32 %mul.i.3, %187 - %189 = sext i32 %188 to i64 - %190 = getelementptr inbounds float, float* %1, i64 %189 - %191 = bitcast float* %190 to <8 x float>* - %wide.masked.load105.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %191, i32 4, <8 x i1> %186, <8 x float> undef), !tbaa !12 - %192 = fmul <8 x float> %wide.masked.load105.1, %broadcast.splat107 - %193 = bitcast float* %190 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %192, <8 x float>* %193, i32 4, <8 x i1> %186), !tbaa !12, !llvm.access.group !16 - %194 = trunc <8 x i64> %broadcast.splat102 to <8 x i32> - %195 = or <8 x i32> %194, - %196 = icmp sgt <8 x i32> %broadcast.splat104, %195 - %197 = extractelement <8 x i32> %195, i32 0 - %198 = add nsw i32 %mul.i.3, %197 - %199 = sext i32 %198 to i64 - %200 = getelementptr inbounds float, float* %1, i64 %199 - %201 = bitcast float* %200 to <8 x float>* - %wide.masked.load105.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %201, i32 4, <8 x i1> %196, <8 x float> undef), !tbaa !12 - %202 = fmul <8 x float> %wide.masked.load105.2, %broadcast.splat107 - %203 = bitcast float* %200 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %202, <8 x float>* %203, i32 4, <8 x i1> %196), !tbaa !12, !llvm.access.group !16 - %204 = trunc <8 x i64> %broadcast.splat102 to <8 x i32> - %205 = or <8 x i32> %204, - %206 = icmp sgt <8 x i32> %broadcast.splat104, %205 - %207 = extractelement <8 x i32> %205, i32 0 - %208 = add nsw i32 %mul.i.3, %207 - %209 = sext i32 %208 to i64 - %210 = getelementptr inbounds float, float* %1, i64 %209 - %211 = bitcast float* %210 to <8 x float>* - %wide.masked.load105.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %211, i32 4, <8 x i1> %206, <8 x float> undef), !tbaa !12 - %212 = fmul <8 x float> %wide.masked.load105.3, %broadcast.splat107 - %213 = bitcast float* %210 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %212, <8 x float>* %213, i32 4, <8 x i1> %206), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.3 - -pregion_for_entry.entry.i.us.3: ; preds = %if.end.i.us.3.3, %pregion_for_entry.entry.i.us.3.preheader - %_local_id_x.0.us.3 = phi i64 [ %512, %if.end.i.us.3.3 ], [ 0, %pregion_for_entry.entry.i.us.3.preheader ] - %add1.i.i.us.3 = add nuw nsw i64 %_local_id_x.0.us.3, %mul.i.i - %conv.i.us.3 = trunc i64 %add1.i.i.us.3 to i32 - %cmp4.i.us.3 = icmp slt i32 %conv.i.us.3, %5 - br i1 %cmp4.i.us.3, label %if.then.i.us.3, label %if.end.i.us.3 - -if.then.i.us.3: ; preds = %pregion_for_entry.entry.i.us.3 - %add.i.us.3 = add nsw i32 %mul.i.3, %conv.i.us.3 - %idxprom.i.us.3 = sext i32 %add.i.us.3 to i64 - %arrayidx.i.us.3 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.3 - %214 = load float, float* %arrayidx.i.us.3, align 4, !tbaa !12 - %mul6.i.us.3 = fmul float %214, %3 - store float %mul6.i.us.3, float* %arrayidx.i.us.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.3 - -if.end.i.us.3: ; preds = %if.then.i.us.3, %pregion_for_entry.entry.i.us.3 - %215 = or i64 %_local_id_x.0.us.3, 1 - %add1.i.i.us.3.1 = add nuw nsw i64 %215, %mul.i.i - %conv.i.us.3.1 = trunc i64 %add1.i.i.us.3.1 to i32 - %cmp4.i.us.3.1 = icmp slt i32 %conv.i.us.3.1, %5 - br i1 %cmp4.i.us.3.1, label %if.then.i.us.3.1, label %if.end.i.us.3.1 - -pregion_for_end.i.3.loopexit: ; preds = %if.end.i.us.3.3 - br label %pregion_for_end.i.3 - -pregion_for_end.i.3: ; preds = %pregion_for_end.i.3.loopexit, %vector.ph94, %pregion_for_end.i.2 - %216 = trunc i64 %mul3.i.i to i32 - %conv2.i.4 = or i32 %216, 4 - %cmp.i.4 = icmp slt i32 %conv2.i.4, %5 - %mul.i.4 = mul nsw i32 %conv2.i.4, %5 - br i1 %cmp.i.4, label %vector.scevcheck115, label %pregion_for_end.i.4 - -vector.scevcheck115: ; preds = %pregion_for_end.i.3 - %217 = mul i32 %conv2.i.4, %5 - %218 = trunc i64 %7 to i32 - %219 = shl i32 %218, 5 - %220 = add i32 %217, %219 - %221 = icmp sgt i32 %220, 2147483616 - br i1 %221, label %pregion_for_entry.entry.i.us.4.preheader, label %vector.ph116 - -pregion_for_entry.entry.i.us.4.preheader: ; preds = %vector.scevcheck115 - br label %pregion_for_entry.entry.i.us.4 - -vector.ph116: ; preds = %vector.scevcheck115 - %broadcast.splatinsert123 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat124 = shufflevector <8 x i64> %broadcast.splatinsert123, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert125 = insertelement <8 x i32> undef, i32 %5, i32 0 - %broadcast.splat126 = shufflevector <8 x i32> %broadcast.splatinsert125, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert128 = insertelement <8 x float> undef, float %3, i32 0 - %broadcast.splat129 = shufflevector <8 x float> %broadcast.splatinsert128, <8 x float> undef, <8 x i32> zeroinitializer - %222 = trunc <8 x i64> %broadcast.splat124 to <8 x i32> - %223 = or <8 x i32> %222, - %224 = icmp sgt <8 x i32> %broadcast.splat126, %223 - %225 = extractelement <8 x i32> %223, i32 0 - %226 = add nsw i32 %mul.i.4, %225 - %227 = sext i32 %226 to i64 - %228 = getelementptr inbounds float, float* %1, i64 %227 - %229 = bitcast float* %228 to <8 x float>* - %wide.masked.load127 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %229, i32 4, <8 x i1> %224, <8 x float> undef), !tbaa !12 - %230 = fmul <8 x float> %wide.masked.load127, %broadcast.splat129 - %231 = bitcast float* %228 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %230, <8 x float>* %231, i32 4, <8 x i1> %224), !tbaa !12, !llvm.access.group !16 - %232 = trunc <8 x i64> %broadcast.splat124 to <8 x i32> - %233 = or <8 x i32> %232, - %234 = icmp sgt <8 x i32> %broadcast.splat126, %233 - %235 = extractelement <8 x i32> %233, i32 0 - %236 = add nsw i32 %mul.i.4, %235 - %237 = sext i32 %236 to i64 - %238 = getelementptr inbounds float, float* %1, i64 %237 - %239 = bitcast float* %238 to <8 x float>* - %wide.masked.load127.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %239, i32 4, <8 x i1> %234, <8 x float> undef), !tbaa !12 - %240 = fmul <8 x float> %wide.masked.load127.1, %broadcast.splat129 - %241 = bitcast float* %238 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %240, <8 x float>* %241, i32 4, <8 x i1> %234), !tbaa !12, !llvm.access.group !16 - %242 = trunc <8 x i64> %broadcast.splat124 to <8 x i32> - %243 = or <8 x i32> %242, - %244 = icmp sgt <8 x i32> %broadcast.splat126, %243 - %245 = extractelement <8 x i32> %243, i32 0 - %246 = add nsw i32 %mul.i.4, %245 - %247 = sext i32 %246 to i64 - %248 = getelementptr inbounds float, float* %1, i64 %247 - %249 = bitcast float* %248 to <8 x float>* - %wide.masked.load127.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %249, i32 4, <8 x i1> %244, <8 x float> undef), !tbaa !12 - %250 = fmul <8 x float> %wide.masked.load127.2, %broadcast.splat129 - %251 = bitcast float* %248 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %250, <8 x float>* %251, i32 4, <8 x i1> %244), !tbaa !12, !llvm.access.group !16 - %252 = trunc <8 x i64> %broadcast.splat124 to <8 x i32> - %253 = or <8 x i32> %252, - %254 = icmp sgt <8 x i32> %broadcast.splat126, %253 - %255 = extractelement <8 x i32> %253, i32 0 - %256 = add nsw i32 %mul.i.4, %255 - %257 = sext i32 %256 to i64 - %258 = getelementptr inbounds float, float* %1, i64 %257 - %259 = bitcast float* %258 to <8 x float>* - %wide.masked.load127.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %259, i32 4, <8 x i1> %254, <8 x float> undef), !tbaa !12 - %260 = fmul <8 x float> %wide.masked.load127.3, %broadcast.splat129 - %261 = bitcast float* %258 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %260, <8 x float>* %261, i32 4, <8 x i1> %254), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.4 - -pregion_for_entry.entry.i.us.4: ; preds = %if.end.i.us.4.3, %pregion_for_entry.entry.i.us.4.preheader - %_local_id_x.0.us.4 = phi i64 [ %506, %if.end.i.us.4.3 ], [ 0, %pregion_for_entry.entry.i.us.4.preheader ] - %add1.i.i.us.4 = add nuw nsw i64 %_local_id_x.0.us.4, %mul.i.i - %conv.i.us.4 = trunc i64 %add1.i.i.us.4 to i32 - %cmp4.i.us.4 = icmp slt i32 %conv.i.us.4, %5 - br i1 %cmp4.i.us.4, label %if.then.i.us.4, label %if.end.i.us.4 - -if.then.i.us.4: ; preds = %pregion_for_entry.entry.i.us.4 - %add.i.us.4 = add nsw i32 %mul.i.4, %conv.i.us.4 - %idxprom.i.us.4 = sext i32 %add.i.us.4 to i64 - %arrayidx.i.us.4 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.4 - %262 = load float, float* %arrayidx.i.us.4, align 4, !tbaa !12 - %mul6.i.us.4 = fmul float %262, %3 - store float %mul6.i.us.4, float* %arrayidx.i.us.4, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.4 - -if.end.i.us.4: ; preds = %if.then.i.us.4, %pregion_for_entry.entry.i.us.4 - %263 = or i64 %_local_id_x.0.us.4, 1 - %add1.i.i.us.4.1 = add nuw nsw i64 %263, %mul.i.i - %conv.i.us.4.1 = trunc i64 %add1.i.i.us.4.1 to i32 - %cmp4.i.us.4.1 = icmp slt i32 %conv.i.us.4.1, %5 - br i1 %cmp4.i.us.4.1, label %if.then.i.us.4.1, label %if.end.i.us.4.1 - -pregion_for_end.i.4.loopexit: ; preds = %if.end.i.us.4.3 - br label %pregion_for_end.i.4 - -pregion_for_end.i.4: ; preds = %pregion_for_end.i.4.loopexit, %vector.ph116, %pregion_for_end.i.3 - %264 = trunc i64 %mul3.i.i to i32 - %conv2.i.5 = or i32 %264, 5 - %cmp.i.5 = icmp slt i32 %conv2.i.5, %5 - %mul.i.5 = mul nsw i32 %conv2.i.5, %5 - br i1 %cmp.i.5, label %vector.scevcheck137, label %pregion_for_end.i.5 - -vector.scevcheck137: ; preds = %pregion_for_end.i.4 - %265 = mul i32 %conv2.i.5, %5 - %266 = trunc i64 %7 to i32 - %267 = shl i32 %266, 5 - %268 = add i32 %265, %267 - %269 = icmp sgt i32 %268, 2147483616 - br i1 %269, label %pregion_for_entry.entry.i.us.5.preheader, label %vector.ph138 - -pregion_for_entry.entry.i.us.5.preheader: ; preds = %vector.scevcheck137 - br label %pregion_for_entry.entry.i.us.5 - -vector.ph138: ; preds = %vector.scevcheck137 - %broadcast.splatinsert145 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat146 = shufflevector <8 x i64> %broadcast.splatinsert145, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert147 = insertelement <8 x i32> undef, i32 %5, i32 0 - %broadcast.splat148 = shufflevector <8 x i32> %broadcast.splatinsert147, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert150 = insertelement <8 x float> undef, float %3, i32 0 - %broadcast.splat151 = shufflevector <8 x float> %broadcast.splatinsert150, <8 x float> undef, <8 x i32> zeroinitializer - %270 = trunc <8 x i64> %broadcast.splat146 to <8 x i32> - %271 = or <8 x i32> %270, - %272 = icmp sgt <8 x i32> %broadcast.splat148, %271 - %273 = extractelement <8 x i32> %271, i32 0 - %274 = add nsw i32 %mul.i.5, %273 - %275 = sext i32 %274 to i64 - %276 = getelementptr inbounds float, float* %1, i64 %275 - %277 = bitcast float* %276 to <8 x float>* - %wide.masked.load149 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %277, i32 4, <8 x i1> %272, <8 x float> undef), !tbaa !12 - %278 = fmul <8 x float> %wide.masked.load149, %broadcast.splat151 - %279 = bitcast float* %276 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %278, <8 x float>* %279, i32 4, <8 x i1> %272), !tbaa !12, !llvm.access.group !16 - %280 = trunc <8 x i64> %broadcast.splat146 to <8 x i32> - %281 = or <8 x i32> %280, - %282 = icmp sgt <8 x i32> %broadcast.splat148, %281 - %283 = extractelement <8 x i32> %281, i32 0 - %284 = add nsw i32 %mul.i.5, %283 - %285 = sext i32 %284 to i64 - %286 = getelementptr inbounds float, float* %1, i64 %285 - %287 = bitcast float* %286 to <8 x float>* - %wide.masked.load149.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %287, i32 4, <8 x i1> %282, <8 x float> undef), !tbaa !12 - %288 = fmul <8 x float> %wide.masked.load149.1, %broadcast.splat151 - %289 = bitcast float* %286 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %288, <8 x float>* %289, i32 4, <8 x i1> %282), !tbaa !12, !llvm.access.group !16 - %290 = trunc <8 x i64> %broadcast.splat146 to <8 x i32> - %291 = or <8 x i32> %290, - %292 = icmp sgt <8 x i32> %broadcast.splat148, %291 - %293 = extractelement <8 x i32> %291, i32 0 - %294 = add nsw i32 %mul.i.5, %293 - %295 = sext i32 %294 to i64 - %296 = getelementptr inbounds float, float* %1, i64 %295 - %297 = bitcast float* %296 to <8 x float>* - %wide.masked.load149.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %297, i32 4, <8 x i1> %292, <8 x float> undef), !tbaa !12 - %298 = fmul <8 x float> %wide.masked.load149.2, %broadcast.splat151 - %299 = bitcast float* %296 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %298, <8 x float>* %299, i32 4, <8 x i1> %292), !tbaa !12, !llvm.access.group !16 - %300 = trunc <8 x i64> %broadcast.splat146 to <8 x i32> - %301 = or <8 x i32> %300, - %302 = icmp sgt <8 x i32> %broadcast.splat148, %301 - %303 = extractelement <8 x i32> %301, i32 0 - %304 = add nsw i32 %mul.i.5, %303 - %305 = sext i32 %304 to i64 - %306 = getelementptr inbounds float, float* %1, i64 %305 - %307 = bitcast float* %306 to <8 x float>* - %wide.masked.load149.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %307, i32 4, <8 x i1> %302, <8 x float> undef), !tbaa !12 - %308 = fmul <8 x float> %wide.masked.load149.3, %broadcast.splat151 - %309 = bitcast float* %306 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %308, <8 x float>* %309, i32 4, <8 x i1> %302), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.5 - -pregion_for_entry.entry.i.us.5: ; preds = %if.end.i.us.5.3, %pregion_for_entry.entry.i.us.5.preheader - %_local_id_x.0.us.5 = phi i64 [ %500, %if.end.i.us.5.3 ], [ 0, %pregion_for_entry.entry.i.us.5.preheader ] - %add1.i.i.us.5 = add nuw nsw i64 %_local_id_x.0.us.5, %mul.i.i - %conv.i.us.5 = trunc i64 %add1.i.i.us.5 to i32 - %cmp4.i.us.5 = icmp slt i32 %conv.i.us.5, %5 - br i1 %cmp4.i.us.5, label %if.then.i.us.5, label %if.end.i.us.5 - -if.then.i.us.5: ; preds = %pregion_for_entry.entry.i.us.5 - %add.i.us.5 = add nsw i32 %mul.i.5, %conv.i.us.5 - %idxprom.i.us.5 = sext i32 %add.i.us.5 to i64 - %arrayidx.i.us.5 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.5 - %310 = load float, float* %arrayidx.i.us.5, align 4, !tbaa !12 - %mul6.i.us.5 = fmul float %310, %3 - store float %mul6.i.us.5, float* %arrayidx.i.us.5, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.5 - -if.end.i.us.5: ; preds = %if.then.i.us.5, %pregion_for_entry.entry.i.us.5 - %311 = or i64 %_local_id_x.0.us.5, 1 - %add1.i.i.us.5.1 = add nuw nsw i64 %311, %mul.i.i - %conv.i.us.5.1 = trunc i64 %add1.i.i.us.5.1 to i32 - %cmp4.i.us.5.1 = icmp slt i32 %conv.i.us.5.1, %5 - br i1 %cmp4.i.us.5.1, label %if.then.i.us.5.1, label %if.end.i.us.5.1 - -pregion_for_end.i.5.loopexit: ; preds = %if.end.i.us.5.3 - br label %pregion_for_end.i.5 - -pregion_for_end.i.5: ; preds = %pregion_for_end.i.5.loopexit, %vector.ph138, %pregion_for_end.i.4 - %312 = trunc i64 %mul3.i.i to i32 - %conv2.i.6 = or i32 %312, 6 - %cmp.i.6 = icmp slt i32 %conv2.i.6, %5 - %mul.i.6 = mul nsw i32 %conv2.i.6, %5 - br i1 %cmp.i.6, label %vector.scevcheck159, label %pregion_for_end.i.6 - -vector.scevcheck159: ; preds = %pregion_for_end.i.5 - %313 = mul i32 %conv2.i.6, %5 - %314 = trunc i64 %7 to i32 - %315 = shl i32 %314, 5 - %316 = add i32 %313, %315 - %317 = icmp sgt i32 %316, 2147483616 - br i1 %317, label %pregion_for_entry.entry.i.us.6.preheader, label %vector.ph160 - -pregion_for_entry.entry.i.us.6.preheader: ; preds = %vector.scevcheck159 - br label %pregion_for_entry.entry.i.us.6 - -vector.ph160: ; preds = %vector.scevcheck159 - %broadcast.splatinsert167 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat168 = shufflevector <8 x i64> %broadcast.splatinsert167, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert169 = insertelement <8 x i32> undef, i32 %5, i32 0 - %broadcast.splat170 = shufflevector <8 x i32> %broadcast.splatinsert169, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert172 = insertelement <8 x float> undef, float %3, i32 0 - %broadcast.splat173 = shufflevector <8 x float> %broadcast.splatinsert172, <8 x float> undef, <8 x i32> zeroinitializer - %318 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %319 = or <8 x i32> %318, - %320 = icmp sgt <8 x i32> %broadcast.splat170, %319 - %321 = extractelement <8 x i32> %319, i32 0 - %322 = add nsw i32 %mul.i.6, %321 - %323 = sext i32 %322 to i64 - %324 = getelementptr inbounds float, float* %1, i64 %323 - %325 = bitcast float* %324 to <8 x float>* - %wide.masked.load171 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %325, i32 4, <8 x i1> %320, <8 x float> undef), !tbaa !12 - %326 = fmul <8 x float> %wide.masked.load171, %broadcast.splat173 - %327 = bitcast float* %324 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %326, <8 x float>* %327, i32 4, <8 x i1> %320), !tbaa !12, !llvm.access.group !16 - %328 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %329 = or <8 x i32> %328, - %330 = icmp sgt <8 x i32> %broadcast.splat170, %329 - %331 = extractelement <8 x i32> %329, i32 0 - %332 = add nsw i32 %mul.i.6, %331 - %333 = sext i32 %332 to i64 - %334 = getelementptr inbounds float, float* %1, i64 %333 - %335 = bitcast float* %334 to <8 x float>* - %wide.masked.load171.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %335, i32 4, <8 x i1> %330, <8 x float> undef), !tbaa !12 - %336 = fmul <8 x float> %wide.masked.load171.1, %broadcast.splat173 - %337 = bitcast float* %334 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %336, <8 x float>* %337, i32 4, <8 x i1> %330), !tbaa !12, !llvm.access.group !16 - %338 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %339 = or <8 x i32> %338, - %340 = icmp sgt <8 x i32> %broadcast.splat170, %339 - %341 = extractelement <8 x i32> %339, i32 0 - %342 = add nsw i32 %mul.i.6, %341 - %343 = sext i32 %342 to i64 - %344 = getelementptr inbounds float, float* %1, i64 %343 - %345 = bitcast float* %344 to <8 x float>* - %wide.masked.load171.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %345, i32 4, <8 x i1> %340, <8 x float> undef), !tbaa !12 - %346 = fmul <8 x float> %wide.masked.load171.2, %broadcast.splat173 - %347 = bitcast float* %344 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %346, <8 x float>* %347, i32 4, <8 x i1> %340), !tbaa !12, !llvm.access.group !16 - %348 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %349 = or <8 x i32> %348, - %350 = icmp sgt <8 x i32> %broadcast.splat170, %349 - %351 = extractelement <8 x i32> %349, i32 0 - %352 = add nsw i32 %mul.i.6, %351 - %353 = sext i32 %352 to i64 - %354 = getelementptr inbounds float, float* %1, i64 %353 - %355 = bitcast float* %354 to <8 x float>* - %wide.masked.load171.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %355, i32 4, <8 x i1> %350, <8 x float> undef), !tbaa !12 - %356 = fmul <8 x float> %wide.masked.load171.3, %broadcast.splat173 - %357 = bitcast float* %354 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %356, <8 x float>* %357, i32 4, <8 x i1> %350), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.6 - -pregion_for_entry.entry.i.us.6: ; preds = %if.end.i.us.6.3, %pregion_for_entry.entry.i.us.6.preheader - %_local_id_x.0.us.6 = phi i64 [ %494, %if.end.i.us.6.3 ], [ 0, %pregion_for_entry.entry.i.us.6.preheader ] - %add1.i.i.us.6 = add nuw nsw i64 %_local_id_x.0.us.6, %mul.i.i - %conv.i.us.6 = trunc i64 %add1.i.i.us.6 to i32 - %cmp4.i.us.6 = icmp slt i32 %conv.i.us.6, %5 - br i1 %cmp4.i.us.6, label %if.then.i.us.6, label %if.end.i.us.6 - -if.then.i.us.6: ; preds = %pregion_for_entry.entry.i.us.6 - %add.i.us.6 = add nsw i32 %mul.i.6, %conv.i.us.6 - %idxprom.i.us.6 = sext i32 %add.i.us.6 to i64 - %arrayidx.i.us.6 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.6 - %358 = load float, float* %arrayidx.i.us.6, align 4, !tbaa !12 - %mul6.i.us.6 = fmul float %358, %3 - store float %mul6.i.us.6, float* %arrayidx.i.us.6, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.6 - -if.end.i.us.6: ; preds = %if.then.i.us.6, %pregion_for_entry.entry.i.us.6 - %359 = or i64 %_local_id_x.0.us.6, 1 - %add1.i.i.us.6.1 = add nuw nsw i64 %359, %mul.i.i - %conv.i.us.6.1 = trunc i64 %add1.i.i.us.6.1 to i32 - %cmp4.i.us.6.1 = icmp slt i32 %conv.i.us.6.1, %5 - br i1 %cmp4.i.us.6.1, label %if.then.i.us.6.1, label %if.end.i.us.6.1 - -pregion_for_end.i.6.loopexit: ; preds = %if.end.i.us.6.3 - br label %pregion_for_end.i.6 - -pregion_for_end.i.6: ; preds = %pregion_for_end.i.6.loopexit, %vector.ph160, %pregion_for_end.i.5 - %360 = trunc i64 %mul3.i.i to i32 - %conv2.i.7 = or i32 %360, 7 - %cmp.i.7 = icmp slt i32 %conv2.i.7, %5 - %mul.i.7 = mul nsw i32 %conv2.i.7, %5 - br i1 %cmp.i.7, label %vector.scevcheck181, label %syrk_kernel.exit - -vector.scevcheck181: ; preds = %pregion_for_end.i.6 - %361 = mul i32 %conv2.i.7, %5 - %362 = trunc i64 %7 to i32 - %363 = shl i32 %362, 5 - %364 = add i32 %361, %363 - %365 = icmp sgt i32 %364, 2147483616 - br i1 %365, label %pregion_for_entry.entry.i.us.7.preheader, label %vector.ph182 - -pregion_for_entry.entry.i.us.7.preheader: ; preds = %vector.scevcheck181 - br label %pregion_for_entry.entry.i.us.7 - -vector.ph182: ; preds = %vector.scevcheck181 - %broadcast.splatinsert189 = insertelement <8 x i64> undef, i64 %mul.i.i, i32 0 - %broadcast.splat190 = shufflevector <8 x i64> %broadcast.splatinsert189, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert191 = insertelement <8 x i32> undef, i32 %5, i32 0 - %broadcast.splat192 = shufflevector <8 x i32> %broadcast.splatinsert191, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert194 = insertelement <8 x float> undef, float %3, i32 0 - %broadcast.splat195 = shufflevector <8 x float> %broadcast.splatinsert194, <8 x float> undef, <8 x i32> zeroinitializer - %366 = trunc <8 x i64> %broadcast.splat190 to <8 x i32> - %367 = or <8 x i32> %366, - %368 = icmp sgt <8 x i32> %broadcast.splat192, %367 - %369 = extractelement <8 x i32> %367, i32 0 - %370 = add nsw i32 %mul.i.7, %369 - %371 = sext i32 %370 to i64 - %372 = getelementptr inbounds float, float* %1, i64 %371 - %373 = bitcast float* %372 to <8 x float>* - %wide.masked.load193 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %373, i32 4, <8 x i1> %368, <8 x float> undef), !tbaa !12 - %374 = fmul <8 x float> %wide.masked.load193, %broadcast.splat195 - %375 = bitcast float* %372 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %374, <8 x float>* %375, i32 4, <8 x i1> %368), !tbaa !12, !llvm.access.group !16 - %376 = trunc <8 x i64> %broadcast.splat190 to <8 x i32> - %377 = or <8 x i32> %376, - %378 = icmp sgt <8 x i32> %broadcast.splat192, %377 - %379 = extractelement <8 x i32> %377, i32 0 - %380 = add nsw i32 %mul.i.7, %379 - %381 = sext i32 %380 to i64 - %382 = getelementptr inbounds float, float* %1, i64 %381 - %383 = bitcast float* %382 to <8 x float>* - %wide.masked.load193.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %383, i32 4, <8 x i1> %378, <8 x float> undef), !tbaa !12 - %384 = fmul <8 x float> %wide.masked.load193.1, %broadcast.splat195 - %385 = bitcast float* %382 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %384, <8 x float>* %385, i32 4, <8 x i1> %378), !tbaa !12, !llvm.access.group !16 - %386 = trunc <8 x i64> %broadcast.splat190 to <8 x i32> - %387 = or <8 x i32> %386, - %388 = icmp sgt <8 x i32> %broadcast.splat192, %387 - %389 = extractelement <8 x i32> %387, i32 0 - %390 = add nsw i32 %mul.i.7, %389 - %391 = sext i32 %390 to i64 - %392 = getelementptr inbounds float, float* %1, i64 %391 - %393 = bitcast float* %392 to <8 x float>* - %wide.masked.load193.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %393, i32 4, <8 x i1> %388, <8 x float> undef), !tbaa !12 - %394 = fmul <8 x float> %wide.masked.load193.2, %broadcast.splat195 - %395 = bitcast float* %392 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %394, <8 x float>* %395, i32 4, <8 x i1> %388), !tbaa !12, !llvm.access.group !16 - %396 = trunc <8 x i64> %broadcast.splat190 to <8 x i32> - %397 = or <8 x i32> %396, - %398 = icmp sgt <8 x i32> %broadcast.splat192, %397 - %399 = extractelement <8 x i32> %397, i32 0 - %400 = add nsw i32 %mul.i.7, %399 - %401 = sext i32 %400 to i64 - %402 = getelementptr inbounds float, float* %1, i64 %401 - %403 = bitcast float* %402 to <8 x float>* - %wide.masked.load193.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %403, i32 4, <8 x i1> %398, <8 x float> undef), !tbaa !12 - %404 = fmul <8 x float> %wide.masked.load193.3, %broadcast.splat195 - %405 = bitcast float* %402 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %404, <8 x float>* %405, i32 4, <8 x i1> %398), !tbaa !12, !llvm.access.group !16 - br label %syrk_kernel.exit - -pregion_for_entry.entry.i.us.7: ; preds = %if.end.i.us.7.3, %pregion_for_entry.entry.i.us.7.preheader - %_local_id_x.0.us.7 = phi i64 [ %488, %if.end.i.us.7.3 ], [ 0, %pregion_for_entry.entry.i.us.7.preheader ] - %add1.i.i.us.7 = add nuw nsw i64 %_local_id_x.0.us.7, %mul.i.i - %conv.i.us.7 = trunc i64 %add1.i.i.us.7 to i32 - %cmp4.i.us.7 = icmp slt i32 %conv.i.us.7, %5 - br i1 %cmp4.i.us.7, label %if.then.i.us.7, label %if.end.i.us.7 - -if.then.i.us.7: ; preds = %pregion_for_entry.entry.i.us.7 - %add.i.us.7 = add nsw i32 %mul.i.7, %conv.i.us.7 - %idxprom.i.us.7 = sext i32 %add.i.us.7 to i64 - %arrayidx.i.us.7 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.7 - %406 = load float, float* %arrayidx.i.us.7, align 4, !tbaa !12 - %mul6.i.us.7 = fmul float %406, %3 - store float %mul6.i.us.7, float* %arrayidx.i.us.7, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.7 - -if.end.i.us.7: ; preds = %if.then.i.us.7, %pregion_for_entry.entry.i.us.7 - %407 = or i64 %_local_id_x.0.us.7, 1 - %add1.i.i.us.7.1 = add nuw nsw i64 %407, %mul.i.i - %conv.i.us.7.1 = trunc i64 %add1.i.i.us.7.1 to i32 - %cmp4.i.us.7.1 = icmp slt i32 %conv.i.us.7.1, %5 - br i1 %cmp4.i.us.7.1, label %if.then.i.us.7.1, label %if.end.i.us.7.1 - -pregion_for_entry.entry.i.us.us.1: ; preds = %if.end.i.us.us.1, %pregion_for_entry.entry.i.us.us.1.preheader - %_local_id_x.0.us.us.1 = phi i64 [ %416, %if.end.i.us.us.1 ], [ 0, %pregion_for_entry.entry.i.us.us.1.preheader ] - %add1.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.0.us.us.1, %mul.i.i - %conv.i.us.us.1 = trunc i64 %add1.i.i.us.us.1 to i32 - %cmp4.i.us.us.1 = icmp slt i32 %conv.i.us.us.1, %5 - br i1 %cmp4.i.us.us.1, label %if.then.i.us.us.1, label %if.end.i.us.us.1 - -if.then.i.us.us.1: ; preds = %pregion_for_entry.entry.i.us.us.1 - %add.i.us.us.1 = add nsw i32 %mul.i.us.1, %conv.i.us.us.1 - %idxprom.i.us.us.1 = sext i32 %add.i.us.us.1 to i64 - %arrayidx.i.us.us.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.1 - %408 = load float, float* %arrayidx.i.us.us.1, align 4, !tbaa !12 - %mul6.i.us.us.1 = fmul float %408, %3 - store float %mul6.i.us.us.1, float* %arrayidx.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %mul14.i.us.us.1 = mul nsw i32 %conv.i.us.us.1, %4 - %409 = sext i32 %mul14.i.us.us.1 to i64 - br label %for.body.i.us.us.1 - -for.body.i.us.us.1: ; preds = %for.body.i.us.us.1, %if.then.i.us.us.1 - %indvars.iv.next.i3.us.us.1 = phi i64 [ %indvars.iv.next.i.us.us.1, %for.body.i.us.us.1 ], [ 0, %if.then.i.us.us.1 ] - %410 = phi float [ %415, %for.body.i.us.us.1 ], [ %mul6.i.us.us.1, %if.then.i.us.us.1 ] - %411 = add nsw i64 %indvars.iv.next.i3.us.us.1, %60 - %arrayidx12.i.us.us.1 = getelementptr inbounds float, float* %0, i64 %411 - %412 = load float, float* %arrayidx12.i.us.us.1, align 4, !tbaa !12 - %mul13.i.us.us.1 = fmul float %412, %2 - %413 = add nsw i64 %indvars.iv.next.i3.us.us.1, %409 - %arrayidx17.i.us.us.1 = getelementptr inbounds float, float* %0, i64 %413 - %414 = load float, float* %arrayidx17.i.us.us.1, align 4, !tbaa !12 - %415 = tail call float @llvm.fmuladd.f32(float %mul13.i.us.us.1, float %414, float %410) #2 - store float %415, float* %arrayidx.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.1 = add nuw nsw i64 %indvars.iv.next.i3.us.us.1, 1 - %exitcond.not.i.us.us.1 = icmp eq i64 %indvars.iv.next.i.us.us.1, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.1, label %if.end.i.us.us.1.loopexit, label %for.body.i.us.us.1, !llvm.loop !21 - -if.end.i.us.us.1.loopexit: ; preds = %for.body.i.us.us.1 - br label %if.end.i.us.us.1 - -if.end.i.us.us.1: ; preds = %if.end.i.us.us.1.loopexit, %pregion_for_entry.entry.i.us.us.1 - %416 = add nuw nsw i64 %_local_id_x.0.us.us.1, 1 - %exitcond.not.1 = icmp eq i64 %416, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.us.1.loopexit, label %pregion_for_entry.entry.i.us.us.1, !llvm.loop !19 - -pregion_for_end.i.us.1.loopexit: ; preds = %if.end.i.us.us.1 - br label %pregion_for_end.i.us.1 - -pregion_for_end.i.us.1: ; preds = %pregion_for_end.i.us.1.loopexit, %pregion_for_end.i.us - %417 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.2 = or i32 %417, 2 - %cmp.i.us.2 = icmp slt i32 %conv2.i.us.2, %5 - %mul.i.us.2 = mul nsw i32 %conv2.i.us.2, %5 - %mul9.i.us.2 = mul nsw i32 %conv2.i.us.2, %4 - %418 = sext i32 %mul9.i.us.2 to i64 - br i1 %cmp.i.us.2, label %pregion_for_entry.entry.i.us.us.2.preheader, label %pregion_for_end.i.us.2 - -pregion_for_entry.entry.i.us.us.2.preheader: ; preds = %pregion_for_end.i.us.1 - br label %pregion_for_entry.entry.i.us.us.2 - -pregion_for_entry.entry.i.us.us.2: ; preds = %if.end.i.us.us.2, %pregion_for_entry.entry.i.us.us.2.preheader - %_local_id_x.0.us.us.2 = phi i64 [ %427, %if.end.i.us.us.2 ], [ 0, %pregion_for_entry.entry.i.us.us.2.preheader ] - %add1.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.0.us.us.2, %mul.i.i - %conv.i.us.us.2 = trunc i64 %add1.i.i.us.us.2 to i32 - %cmp4.i.us.us.2 = icmp slt i32 %conv.i.us.us.2, %5 - br i1 %cmp4.i.us.us.2, label %if.then.i.us.us.2, label %if.end.i.us.us.2 - -if.then.i.us.us.2: ; preds = %pregion_for_entry.entry.i.us.us.2 - %add.i.us.us.2 = add nsw i32 %mul.i.us.2, %conv.i.us.us.2 - %idxprom.i.us.us.2 = sext i32 %add.i.us.us.2 to i64 - %arrayidx.i.us.us.2 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.2 - %419 = load float, float* %arrayidx.i.us.us.2, align 4, !tbaa !12 - %mul6.i.us.us.2 = fmul float %419, %3 - store float %mul6.i.us.us.2, float* %arrayidx.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %mul14.i.us.us.2 = mul nsw i32 %conv.i.us.us.2, %4 - %420 = sext i32 %mul14.i.us.us.2 to i64 - br label %for.body.i.us.us.2 - -for.body.i.us.us.2: ; preds = %for.body.i.us.us.2, %if.then.i.us.us.2 - %indvars.iv.next.i3.us.us.2 = phi i64 [ %indvars.iv.next.i.us.us.2, %for.body.i.us.us.2 ], [ 0, %if.then.i.us.us.2 ] - %421 = phi float [ %426, %for.body.i.us.us.2 ], [ %mul6.i.us.us.2, %if.then.i.us.us.2 ] - %422 = add nsw i64 %indvars.iv.next.i3.us.us.2, %418 - %arrayidx12.i.us.us.2 = getelementptr inbounds float, float* %0, i64 %422 - %423 = load float, float* %arrayidx12.i.us.us.2, align 4, !tbaa !12 - %mul13.i.us.us.2 = fmul float %423, %2 - %424 = add nsw i64 %indvars.iv.next.i3.us.us.2, %420 - %arrayidx17.i.us.us.2 = getelementptr inbounds float, float* %0, i64 %424 - %425 = load float, float* %arrayidx17.i.us.us.2, align 4, !tbaa !12 - %426 = tail call float @llvm.fmuladd.f32(float %mul13.i.us.us.2, float %425, float %421) #2 - store float %426, float* %arrayidx.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.2 = add nuw nsw i64 %indvars.iv.next.i3.us.us.2, 1 - %exitcond.not.i.us.us.2 = icmp eq i64 %indvars.iv.next.i.us.us.2, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.2, label %if.end.i.us.us.2.loopexit, label %for.body.i.us.us.2, !llvm.loop !21 - -if.end.i.us.us.2.loopexit: ; preds = %for.body.i.us.us.2 - br label %if.end.i.us.us.2 - -if.end.i.us.us.2: ; preds = %if.end.i.us.us.2.loopexit, %pregion_for_entry.entry.i.us.us.2 - %427 = add nuw nsw i64 %_local_id_x.0.us.us.2, 1 - %exitcond.not.2 = icmp eq i64 %427, 32 - br i1 %exitcond.not.2, label %pregion_for_end.i.us.2.loopexit, label %pregion_for_entry.entry.i.us.us.2, !llvm.loop !19 - -pregion_for_end.i.us.2.loopexit: ; preds = %if.end.i.us.us.2 - br label %pregion_for_end.i.us.2 - -pregion_for_end.i.us.2: ; preds = %pregion_for_end.i.us.2.loopexit, %pregion_for_end.i.us.1 - %428 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.3 = or i32 %428, 3 - %cmp.i.us.3 = icmp slt i32 %conv2.i.us.3, %5 - %mul.i.us.3 = mul nsw i32 %conv2.i.us.3, %5 - %mul9.i.us.3 = mul nsw i32 %conv2.i.us.3, %4 - %429 = sext i32 %mul9.i.us.3 to i64 - br i1 %cmp.i.us.3, label %pregion_for_entry.entry.i.us.us.3.preheader, label %pregion_for_end.i.us.3 - -pregion_for_entry.entry.i.us.us.3.preheader: ; preds = %pregion_for_end.i.us.2 - br label %pregion_for_entry.entry.i.us.us.3 - -pregion_for_entry.entry.i.us.us.3: ; preds = %if.end.i.us.us.3, %pregion_for_entry.entry.i.us.us.3.preheader - %_local_id_x.0.us.us.3 = phi i64 [ %438, %if.end.i.us.us.3 ], [ 0, %pregion_for_entry.entry.i.us.us.3.preheader ] - %add1.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.0.us.us.3, %mul.i.i - %conv.i.us.us.3 = trunc i64 %add1.i.i.us.us.3 to i32 - %cmp4.i.us.us.3 = icmp slt i32 %conv.i.us.us.3, %5 - br i1 %cmp4.i.us.us.3, label %if.then.i.us.us.3, label %if.end.i.us.us.3 - -if.then.i.us.us.3: ; preds = %pregion_for_entry.entry.i.us.us.3 - %add.i.us.us.3 = add nsw i32 %mul.i.us.3, %conv.i.us.us.3 - %idxprom.i.us.us.3 = sext i32 %add.i.us.us.3 to i64 - %arrayidx.i.us.us.3 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.3 - %430 = load float, float* %arrayidx.i.us.us.3, align 4, !tbaa !12 - %mul6.i.us.us.3 = fmul float %430, %3 - store float %mul6.i.us.us.3, float* %arrayidx.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %mul14.i.us.us.3 = mul nsw i32 %conv.i.us.us.3, %4 - %431 = sext i32 %mul14.i.us.us.3 to i64 - br label %for.body.i.us.us.3 - -for.body.i.us.us.3: ; preds = %for.body.i.us.us.3, %if.then.i.us.us.3 - %indvars.iv.next.i3.us.us.3 = phi i64 [ %indvars.iv.next.i.us.us.3, %for.body.i.us.us.3 ], [ 0, %if.then.i.us.us.3 ] - %432 = phi float [ %437, %for.body.i.us.us.3 ], [ %mul6.i.us.us.3, %if.then.i.us.us.3 ] - %433 = add nsw i64 %indvars.iv.next.i3.us.us.3, %429 - %arrayidx12.i.us.us.3 = getelementptr inbounds float, float* %0, i64 %433 - %434 = load float, float* %arrayidx12.i.us.us.3, align 4, !tbaa !12 - %mul13.i.us.us.3 = fmul float %434, %2 - %435 = add nsw i64 %indvars.iv.next.i3.us.us.3, %431 - %arrayidx17.i.us.us.3 = getelementptr inbounds float, float* %0, i64 %435 - %436 = load float, float* %arrayidx17.i.us.us.3, align 4, !tbaa !12 - %437 = tail call float @llvm.fmuladd.f32(float %mul13.i.us.us.3, float %436, float %432) #2 - store float %437, float* %arrayidx.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.3 = add nuw nsw i64 %indvars.iv.next.i3.us.us.3, 1 - %exitcond.not.i.us.us.3 = icmp eq i64 %indvars.iv.next.i.us.us.3, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.3, label %if.end.i.us.us.3.loopexit, label %for.body.i.us.us.3, !llvm.loop !21 - -if.end.i.us.us.3.loopexit: ; preds = %for.body.i.us.us.3 - br label %if.end.i.us.us.3 - -if.end.i.us.us.3: ; preds = %if.end.i.us.us.3.loopexit, %pregion_for_entry.entry.i.us.us.3 - %438 = add nuw nsw i64 %_local_id_x.0.us.us.3, 1 - %exitcond.not.3 = icmp eq i64 %438, 32 - br i1 %exitcond.not.3, label %pregion_for_end.i.us.3.loopexit, label %pregion_for_entry.entry.i.us.us.3, !llvm.loop !19 - -pregion_for_end.i.us.3.loopexit: ; preds = %if.end.i.us.us.3 - br label %pregion_for_end.i.us.3 - -pregion_for_end.i.us.3: ; preds = %pregion_for_end.i.us.3.loopexit, %pregion_for_end.i.us.2 - %439 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.4 = or i32 %439, 4 - %cmp.i.us.4 = icmp slt i32 %conv2.i.us.4, %5 - %mul.i.us.4 = mul nsw i32 %conv2.i.us.4, %5 - %mul9.i.us.4 = mul nsw i32 %conv2.i.us.4, %4 - %440 = sext i32 %mul9.i.us.4 to i64 - br i1 %cmp.i.us.4, label %pregion_for_entry.entry.i.us.us.4.preheader, label %pregion_for_end.i.us.4 - -pregion_for_entry.entry.i.us.us.4.preheader: ; preds = %pregion_for_end.i.us.3 - br label %pregion_for_entry.entry.i.us.us.4 - -pregion_for_entry.entry.i.us.us.4: ; preds = %if.end.i.us.us.4, %pregion_for_entry.entry.i.us.us.4.preheader - %_local_id_x.0.us.us.4 = phi i64 [ %449, %if.end.i.us.us.4 ], [ 0, %pregion_for_entry.entry.i.us.us.4.preheader ] - %add1.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.0.us.us.4, %mul.i.i - %conv.i.us.us.4 = trunc i64 %add1.i.i.us.us.4 to i32 - %cmp4.i.us.us.4 = icmp slt i32 %conv.i.us.us.4, %5 - br i1 %cmp4.i.us.us.4, label %if.then.i.us.us.4, label %if.end.i.us.us.4 - -if.then.i.us.us.4: ; preds = %pregion_for_entry.entry.i.us.us.4 - %add.i.us.us.4 = add nsw i32 %mul.i.us.4, %conv.i.us.us.4 - %idxprom.i.us.us.4 = sext i32 %add.i.us.us.4 to i64 - %arrayidx.i.us.us.4 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.4 - %441 = load float, float* %arrayidx.i.us.us.4, align 4, !tbaa !12 - %mul6.i.us.us.4 = fmul float %441, %3 - store float %mul6.i.us.us.4, float* %arrayidx.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %mul14.i.us.us.4 = mul nsw i32 %conv.i.us.us.4, %4 - %442 = sext i32 %mul14.i.us.us.4 to i64 - br label %for.body.i.us.us.4 - -for.body.i.us.us.4: ; preds = %for.body.i.us.us.4, %if.then.i.us.us.4 - %indvars.iv.next.i3.us.us.4 = phi i64 [ %indvars.iv.next.i.us.us.4, %for.body.i.us.us.4 ], [ 0, %if.then.i.us.us.4 ] - %443 = phi float [ %448, %for.body.i.us.us.4 ], [ %mul6.i.us.us.4, %if.then.i.us.us.4 ] - %444 = add nsw i64 %indvars.iv.next.i3.us.us.4, %440 - %arrayidx12.i.us.us.4 = getelementptr inbounds float, float* %0, i64 %444 - %445 = load float, float* %arrayidx12.i.us.us.4, align 4, !tbaa !12 - %mul13.i.us.us.4 = fmul float %445, %2 - %446 = add nsw i64 %indvars.iv.next.i3.us.us.4, %442 - %arrayidx17.i.us.us.4 = getelementptr inbounds float, float* %0, i64 %446 - %447 = load float, float* %arrayidx17.i.us.us.4, align 4, !tbaa !12 - %448 = tail call float @llvm.fmuladd.f32(float %mul13.i.us.us.4, float %447, float %443) #2 - store float %448, float* %arrayidx.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.4 = add nuw nsw i64 %indvars.iv.next.i3.us.us.4, 1 - %exitcond.not.i.us.us.4 = icmp eq i64 %indvars.iv.next.i.us.us.4, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.4, label %if.end.i.us.us.4.loopexit, label %for.body.i.us.us.4, !llvm.loop !21 - -if.end.i.us.us.4.loopexit: ; preds = %for.body.i.us.us.4 - br label %if.end.i.us.us.4 - -if.end.i.us.us.4: ; preds = %if.end.i.us.us.4.loopexit, %pregion_for_entry.entry.i.us.us.4 - %449 = add nuw nsw i64 %_local_id_x.0.us.us.4, 1 - %exitcond.not.4 = icmp eq i64 %449, 32 - br i1 %exitcond.not.4, label %pregion_for_end.i.us.4.loopexit, label %pregion_for_entry.entry.i.us.us.4, !llvm.loop !19 - -pregion_for_end.i.us.4.loopexit: ; preds = %if.end.i.us.us.4 - br label %pregion_for_end.i.us.4 - -pregion_for_end.i.us.4: ; preds = %pregion_for_end.i.us.4.loopexit, %pregion_for_end.i.us.3 - %450 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.5 = or i32 %450, 5 - %cmp.i.us.5 = icmp slt i32 %conv2.i.us.5, %5 - %mul.i.us.5 = mul nsw i32 %conv2.i.us.5, %5 - %mul9.i.us.5 = mul nsw i32 %conv2.i.us.5, %4 - %451 = sext i32 %mul9.i.us.5 to i64 - br i1 %cmp.i.us.5, label %pregion_for_entry.entry.i.us.us.5.preheader, label %pregion_for_end.i.us.5 - -pregion_for_entry.entry.i.us.us.5.preheader: ; preds = %pregion_for_end.i.us.4 - br label %pregion_for_entry.entry.i.us.us.5 - -pregion_for_entry.entry.i.us.us.5: ; preds = %if.end.i.us.us.5, %pregion_for_entry.entry.i.us.us.5.preheader - %_local_id_x.0.us.us.5 = phi i64 [ %460, %if.end.i.us.us.5 ], [ 0, %pregion_for_entry.entry.i.us.us.5.preheader ] - %add1.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.0.us.us.5, %mul.i.i - %conv.i.us.us.5 = trunc i64 %add1.i.i.us.us.5 to i32 - %cmp4.i.us.us.5 = icmp slt i32 %conv.i.us.us.5, %5 - br i1 %cmp4.i.us.us.5, label %if.then.i.us.us.5, label %if.end.i.us.us.5 - -if.then.i.us.us.5: ; preds = %pregion_for_entry.entry.i.us.us.5 - %add.i.us.us.5 = add nsw i32 %mul.i.us.5, %conv.i.us.us.5 - %idxprom.i.us.us.5 = sext i32 %add.i.us.us.5 to i64 - %arrayidx.i.us.us.5 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.5 - %452 = load float, float* %arrayidx.i.us.us.5, align 4, !tbaa !12 - %mul6.i.us.us.5 = fmul float %452, %3 - store float %mul6.i.us.us.5, float* %arrayidx.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %mul14.i.us.us.5 = mul nsw i32 %conv.i.us.us.5, %4 - %453 = sext i32 %mul14.i.us.us.5 to i64 - br label %for.body.i.us.us.5 - -for.body.i.us.us.5: ; preds = %for.body.i.us.us.5, %if.then.i.us.us.5 - %indvars.iv.next.i3.us.us.5 = phi i64 [ %indvars.iv.next.i.us.us.5, %for.body.i.us.us.5 ], [ 0, %if.then.i.us.us.5 ] - %454 = phi float [ %459, %for.body.i.us.us.5 ], [ %mul6.i.us.us.5, %if.then.i.us.us.5 ] - %455 = add nsw i64 %indvars.iv.next.i3.us.us.5, %451 - %arrayidx12.i.us.us.5 = getelementptr inbounds float, float* %0, i64 %455 - %456 = load float, float* %arrayidx12.i.us.us.5, align 4, !tbaa !12 - %mul13.i.us.us.5 = fmul float %456, %2 - %457 = add nsw i64 %indvars.iv.next.i3.us.us.5, %453 - %arrayidx17.i.us.us.5 = getelementptr inbounds float, float* %0, i64 %457 - %458 = load float, float* %arrayidx17.i.us.us.5, align 4, !tbaa !12 - %459 = tail call float @llvm.fmuladd.f32(float %mul13.i.us.us.5, float %458, float %454) #2 - store float %459, float* %arrayidx.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.5 = add nuw nsw i64 %indvars.iv.next.i3.us.us.5, 1 - %exitcond.not.i.us.us.5 = icmp eq i64 %indvars.iv.next.i.us.us.5, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.5, label %if.end.i.us.us.5.loopexit, label %for.body.i.us.us.5, !llvm.loop !21 - -if.end.i.us.us.5.loopexit: ; preds = %for.body.i.us.us.5 - br label %if.end.i.us.us.5 - -if.end.i.us.us.5: ; preds = %if.end.i.us.us.5.loopexit, %pregion_for_entry.entry.i.us.us.5 - %460 = add nuw nsw i64 %_local_id_x.0.us.us.5, 1 - %exitcond.not.5 = icmp eq i64 %460, 32 - br i1 %exitcond.not.5, label %pregion_for_end.i.us.5.loopexit, label %pregion_for_entry.entry.i.us.us.5, !llvm.loop !19 - -pregion_for_end.i.us.5.loopexit: ; preds = %if.end.i.us.us.5 - br label %pregion_for_end.i.us.5 - -pregion_for_end.i.us.5: ; preds = %pregion_for_end.i.us.5.loopexit, %pregion_for_end.i.us.4 - %461 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.6 = or i32 %461, 6 - %cmp.i.us.6 = icmp slt i32 %conv2.i.us.6, %5 - %mul.i.us.6 = mul nsw i32 %conv2.i.us.6, %5 - %mul9.i.us.6 = mul nsw i32 %conv2.i.us.6, %4 - %462 = sext i32 %mul9.i.us.6 to i64 - br i1 %cmp.i.us.6, label %pregion_for_entry.entry.i.us.us.6.preheader, label %pregion_for_end.i.us.6 - -pregion_for_entry.entry.i.us.us.6.preheader: ; preds = %pregion_for_end.i.us.5 - br label %pregion_for_entry.entry.i.us.us.6 - -pregion_for_entry.entry.i.us.us.6: ; preds = %if.end.i.us.us.6, %pregion_for_entry.entry.i.us.us.6.preheader - %_local_id_x.0.us.us.6 = phi i64 [ %471, %if.end.i.us.us.6 ], [ 0, %pregion_for_entry.entry.i.us.us.6.preheader ] - %add1.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.0.us.us.6, %mul.i.i - %conv.i.us.us.6 = trunc i64 %add1.i.i.us.us.6 to i32 - %cmp4.i.us.us.6 = icmp slt i32 %conv.i.us.us.6, %5 - br i1 %cmp4.i.us.us.6, label %if.then.i.us.us.6, label %if.end.i.us.us.6 - -if.then.i.us.us.6: ; preds = %pregion_for_entry.entry.i.us.us.6 - %add.i.us.us.6 = add nsw i32 %mul.i.us.6, %conv.i.us.us.6 - %idxprom.i.us.us.6 = sext i32 %add.i.us.us.6 to i64 - %arrayidx.i.us.us.6 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.6 - %463 = load float, float* %arrayidx.i.us.us.6, align 4, !tbaa !12 - %mul6.i.us.us.6 = fmul float %463, %3 - store float %mul6.i.us.us.6, float* %arrayidx.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %mul14.i.us.us.6 = mul nsw i32 %conv.i.us.us.6, %4 - %464 = sext i32 %mul14.i.us.us.6 to i64 - br label %for.body.i.us.us.6 - -for.body.i.us.us.6: ; preds = %for.body.i.us.us.6, %if.then.i.us.us.6 - %indvars.iv.next.i3.us.us.6 = phi i64 [ %indvars.iv.next.i.us.us.6, %for.body.i.us.us.6 ], [ 0, %if.then.i.us.us.6 ] - %465 = phi float [ %470, %for.body.i.us.us.6 ], [ %mul6.i.us.us.6, %if.then.i.us.us.6 ] - %466 = add nsw i64 %indvars.iv.next.i3.us.us.6, %462 - %arrayidx12.i.us.us.6 = getelementptr inbounds float, float* %0, i64 %466 - %467 = load float, float* %arrayidx12.i.us.us.6, align 4, !tbaa !12 - %mul13.i.us.us.6 = fmul float %467, %2 - %468 = add nsw i64 %indvars.iv.next.i3.us.us.6, %464 - %arrayidx17.i.us.us.6 = getelementptr inbounds float, float* %0, i64 %468 - %469 = load float, float* %arrayidx17.i.us.us.6, align 4, !tbaa !12 - %470 = tail call float @llvm.fmuladd.f32(float %mul13.i.us.us.6, float %469, float %465) #2 - store float %470, float* %arrayidx.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.6 = add nuw nsw i64 %indvars.iv.next.i3.us.us.6, 1 - %exitcond.not.i.us.us.6 = icmp eq i64 %indvars.iv.next.i.us.us.6, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.6, label %if.end.i.us.us.6.loopexit, label %for.body.i.us.us.6, !llvm.loop !21 - -if.end.i.us.us.6.loopexit: ; preds = %for.body.i.us.us.6 - br label %if.end.i.us.us.6 - -if.end.i.us.us.6: ; preds = %if.end.i.us.us.6.loopexit, %pregion_for_entry.entry.i.us.us.6 - %471 = add nuw nsw i64 %_local_id_x.0.us.us.6, 1 - %exitcond.not.6 = icmp eq i64 %471, 32 - br i1 %exitcond.not.6, label %pregion_for_end.i.us.6.loopexit, label %pregion_for_entry.entry.i.us.us.6, !llvm.loop !19 - -pregion_for_end.i.us.6.loopexit: ; preds = %if.end.i.us.us.6 - br label %pregion_for_end.i.us.6 - -pregion_for_end.i.us.6: ; preds = %pregion_for_end.i.us.6.loopexit, %pregion_for_end.i.us.5 - %472 = trunc i64 %mul3.i.i to i32 - %conv2.i.us.7 = or i32 %472, 7 - %cmp.i.us.7 = icmp slt i32 %conv2.i.us.7, %5 - %mul.i.us.7 = mul nsw i32 %conv2.i.us.7, %5 - %mul9.i.us.7 = mul nsw i32 %conv2.i.us.7, %4 - %473 = sext i32 %mul9.i.us.7 to i64 - br i1 %cmp.i.us.7, label %pregion_for_entry.entry.i.us.us.7.preheader, label %syrk_kernel.exit - -pregion_for_entry.entry.i.us.us.7.preheader: ; preds = %pregion_for_end.i.us.6 - br label %pregion_for_entry.entry.i.us.us.7 - -pregion_for_entry.entry.i.us.us.7: ; preds = %if.end.i.us.us.7, %pregion_for_entry.entry.i.us.us.7.preheader - %_local_id_x.0.us.us.7 = phi i64 [ %482, %if.end.i.us.us.7 ], [ 0, %pregion_for_entry.entry.i.us.us.7.preheader ] - %add1.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.0.us.us.7, %mul.i.i - %conv.i.us.us.7 = trunc i64 %add1.i.i.us.us.7 to i32 - %cmp4.i.us.us.7 = icmp slt i32 %conv.i.us.us.7, %5 - br i1 %cmp4.i.us.us.7, label %if.then.i.us.us.7, label %if.end.i.us.us.7 - -if.then.i.us.us.7: ; preds = %pregion_for_entry.entry.i.us.us.7 - %add.i.us.us.7 = add nsw i32 %mul.i.us.7, %conv.i.us.us.7 - %idxprom.i.us.us.7 = sext i32 %add.i.us.us.7 to i64 - %arrayidx.i.us.us.7 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.us.7 - %474 = load float, float* %arrayidx.i.us.us.7, align 4, !tbaa !12 - %mul6.i.us.us.7 = fmul float %474, %3 - store float %mul6.i.us.us.7, float* %arrayidx.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %mul14.i.us.us.7 = mul nsw i32 %conv.i.us.us.7, %4 - %475 = sext i32 %mul14.i.us.us.7 to i64 - br label %for.body.i.us.us.7 - -for.body.i.us.us.7: ; preds = %for.body.i.us.us.7, %if.then.i.us.us.7 - %indvars.iv.next.i3.us.us.7 = phi i64 [ %indvars.iv.next.i.us.us.7, %for.body.i.us.us.7 ], [ 0, %if.then.i.us.us.7 ] - %476 = phi float [ %481, %for.body.i.us.us.7 ], [ %mul6.i.us.us.7, %if.then.i.us.us.7 ] - %477 = add nsw i64 %indvars.iv.next.i3.us.us.7, %473 - %arrayidx12.i.us.us.7 = getelementptr inbounds float, float* %0, i64 %477 - %478 = load float, float* %arrayidx12.i.us.us.7, align 4, !tbaa !12 - %mul13.i.us.us.7 = fmul float %478, %2 - %479 = add nsw i64 %indvars.iv.next.i3.us.us.7, %475 - %arrayidx17.i.us.us.7 = getelementptr inbounds float, float* %0, i64 %479 - %480 = load float, float* %arrayidx17.i.us.us.7, align 4, !tbaa !12 - %481 = tail call float @llvm.fmuladd.f32(float %mul13.i.us.us.7, float %480, float %476) #2 - store float %481, float* %arrayidx.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.us.us.7 = add nuw nsw i64 %indvars.iv.next.i3.us.us.7, 1 - %exitcond.not.i.us.us.7 = icmp eq i64 %indvars.iv.next.i.us.us.7, %wide.trip.count.i - br i1 %exitcond.not.i.us.us.7, label %if.end.i.us.us.7.loopexit, label %for.body.i.us.us.7, !llvm.loop !21 - -if.end.i.us.us.7.loopexit: ; preds = %for.body.i.us.us.7 - br label %if.end.i.us.us.7 - -if.end.i.us.us.7: ; preds = %if.end.i.us.us.7.loopexit, %pregion_for_entry.entry.i.us.us.7 - %482 = add nuw nsw i64 %_local_id_x.0.us.us.7, 1 - %exitcond.not.7 = icmp eq i64 %482, 32 - br i1 %exitcond.not.7, label %syrk_kernel.exit.loopexit, label %pregion_for_entry.entry.i.us.us.7, !llvm.loop !19 - -if.then.i.us.7.1: ; preds = %if.end.i.us.7 - %add.i.us.7.1 = add nsw i32 %mul.i.7, %conv.i.us.7.1 - %idxprom.i.us.7.1 = sext i32 %add.i.us.7.1 to i64 - %arrayidx.i.us.7.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.7.1 - %483 = load float, float* %arrayidx.i.us.7.1, align 4, !tbaa !12 - %mul6.i.us.7.1 = fmul float %483, %3 - store float %mul6.i.us.7.1, float* %arrayidx.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.7.1 - -if.end.i.us.7.1: ; preds = %if.then.i.us.7.1, %if.end.i.us.7 - %484 = or i64 %_local_id_x.0.us.7, 2 - %add1.i.i.us.7.2 = add nuw nsw i64 %484, %mul.i.i - %conv.i.us.7.2 = trunc i64 %add1.i.i.us.7.2 to i32 - %cmp4.i.us.7.2 = icmp slt i32 %conv.i.us.7.2, %5 - br i1 %cmp4.i.us.7.2, label %if.then.i.us.7.2, label %if.end.i.us.7.2 - -if.then.i.us.7.2: ; preds = %if.end.i.us.7.1 - %add.i.us.7.2 = add nsw i32 %mul.i.7, %conv.i.us.7.2 - %idxprom.i.us.7.2 = sext i32 %add.i.us.7.2 to i64 - %arrayidx.i.us.7.2 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.7.2 - %485 = load float, float* %arrayidx.i.us.7.2, align 4, !tbaa !12 - %mul6.i.us.7.2 = fmul float %485, %3 - store float %mul6.i.us.7.2, float* %arrayidx.i.us.7.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.7.2 - -if.end.i.us.7.2: ; preds = %if.then.i.us.7.2, %if.end.i.us.7.1 - %486 = or i64 %_local_id_x.0.us.7, 3 - %add1.i.i.us.7.3 = add nuw nsw i64 %486, %mul.i.i - %conv.i.us.7.3 = trunc i64 %add1.i.i.us.7.3 to i32 - %cmp4.i.us.7.3 = icmp slt i32 %conv.i.us.7.3, %5 - br i1 %cmp4.i.us.7.3, label %if.then.i.us.7.3, label %if.end.i.us.7.3 - -if.then.i.us.7.3: ; preds = %if.end.i.us.7.2 - %add.i.us.7.3 = add nsw i32 %mul.i.7, %conv.i.us.7.3 - %idxprom.i.us.7.3 = sext i32 %add.i.us.7.3 to i64 - %arrayidx.i.us.7.3 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.7.3 - %487 = load float, float* %arrayidx.i.us.7.3, align 4, !tbaa !12 - %mul6.i.us.7.3 = fmul float %487, %3 - store float %mul6.i.us.7.3, float* %arrayidx.i.us.7.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.7.3 - -if.end.i.us.7.3: ; preds = %if.then.i.us.7.3, %if.end.i.us.7.2 - %488 = add nuw nsw i64 %_local_id_x.0.us.7, 4 - %exitcond34.7.not.3 = icmp eq i64 %488, 32 - br i1 %exitcond34.7.not.3, label %syrk_kernel.exit.loopexit238, label %pregion_for_entry.entry.i.us.7, !llvm.loop !23 - -if.then.i.us.6.1: ; preds = %if.end.i.us.6 - %add.i.us.6.1 = add nsw i32 %mul.i.6, %conv.i.us.6.1 - %idxprom.i.us.6.1 = sext i32 %add.i.us.6.1 to i64 - %arrayidx.i.us.6.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.6.1 - %489 = load float, float* %arrayidx.i.us.6.1, align 4, !tbaa !12 - %mul6.i.us.6.1 = fmul float %489, %3 - store float %mul6.i.us.6.1, float* %arrayidx.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.6.1 - -if.end.i.us.6.1: ; preds = %if.then.i.us.6.1, %if.end.i.us.6 - %490 = or i64 %_local_id_x.0.us.6, 2 - %add1.i.i.us.6.2 = add nuw nsw i64 %490, %mul.i.i - %conv.i.us.6.2 = trunc i64 %add1.i.i.us.6.2 to i32 - %cmp4.i.us.6.2 = icmp slt i32 %conv.i.us.6.2, %5 - br i1 %cmp4.i.us.6.2, label %if.then.i.us.6.2, label %if.end.i.us.6.2 - -if.then.i.us.6.2: ; preds = %if.end.i.us.6.1 - %add.i.us.6.2 = add nsw i32 %mul.i.6, %conv.i.us.6.2 - %idxprom.i.us.6.2 = sext i32 %add.i.us.6.2 to i64 - %arrayidx.i.us.6.2 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.6.2 - %491 = load float, float* %arrayidx.i.us.6.2, align 4, !tbaa !12 - %mul6.i.us.6.2 = fmul float %491, %3 - store float %mul6.i.us.6.2, float* %arrayidx.i.us.6.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.6.2 - -if.end.i.us.6.2: ; preds = %if.then.i.us.6.2, %if.end.i.us.6.1 - %492 = or i64 %_local_id_x.0.us.6, 3 - %add1.i.i.us.6.3 = add nuw nsw i64 %492, %mul.i.i - %conv.i.us.6.3 = trunc i64 %add1.i.i.us.6.3 to i32 - %cmp4.i.us.6.3 = icmp slt i32 %conv.i.us.6.3, %5 - br i1 %cmp4.i.us.6.3, label %if.then.i.us.6.3, label %if.end.i.us.6.3 - -if.then.i.us.6.3: ; preds = %if.end.i.us.6.2 - %add.i.us.6.3 = add nsw i32 %mul.i.6, %conv.i.us.6.3 - %idxprom.i.us.6.3 = sext i32 %add.i.us.6.3 to i64 - %arrayidx.i.us.6.3 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.6.3 - %493 = load float, float* %arrayidx.i.us.6.3, align 4, !tbaa !12 - %mul6.i.us.6.3 = fmul float %493, %3 - store float %mul6.i.us.6.3, float* %arrayidx.i.us.6.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.6.3 - -if.end.i.us.6.3: ; preds = %if.then.i.us.6.3, %if.end.i.us.6.2 - %494 = add nuw nsw i64 %_local_id_x.0.us.6, 4 - %exitcond34.6.not.3 = icmp eq i64 %494, 32 - br i1 %exitcond34.6.not.3, label %pregion_for_end.i.6.loopexit, label %pregion_for_entry.entry.i.us.6, !llvm.loop !25 - -if.then.i.us.5.1: ; preds = %if.end.i.us.5 - %add.i.us.5.1 = add nsw i32 %mul.i.5, %conv.i.us.5.1 - %idxprom.i.us.5.1 = sext i32 %add.i.us.5.1 to i64 - %arrayidx.i.us.5.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.5.1 - %495 = load float, float* %arrayidx.i.us.5.1, align 4, !tbaa !12 - %mul6.i.us.5.1 = fmul float %495, %3 - store float %mul6.i.us.5.1, float* %arrayidx.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.5.1 - -if.end.i.us.5.1: ; preds = %if.then.i.us.5.1, %if.end.i.us.5 - %496 = or i64 %_local_id_x.0.us.5, 2 - %add1.i.i.us.5.2 = add nuw nsw i64 %496, %mul.i.i - %conv.i.us.5.2 = trunc i64 %add1.i.i.us.5.2 to i32 - %cmp4.i.us.5.2 = icmp slt i32 %conv.i.us.5.2, %5 - br i1 %cmp4.i.us.5.2, label %if.then.i.us.5.2, label %if.end.i.us.5.2 - -if.then.i.us.5.2: ; preds = %if.end.i.us.5.1 - %add.i.us.5.2 = add nsw i32 %mul.i.5, %conv.i.us.5.2 - %idxprom.i.us.5.2 = sext i32 %add.i.us.5.2 to i64 - %arrayidx.i.us.5.2 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.5.2 - %497 = load float, float* %arrayidx.i.us.5.2, align 4, !tbaa !12 - %mul6.i.us.5.2 = fmul float %497, %3 - store float %mul6.i.us.5.2, float* %arrayidx.i.us.5.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.5.2 - -if.end.i.us.5.2: ; preds = %if.then.i.us.5.2, %if.end.i.us.5.1 - %498 = or i64 %_local_id_x.0.us.5, 3 - %add1.i.i.us.5.3 = add nuw nsw i64 %498, %mul.i.i - %conv.i.us.5.3 = trunc i64 %add1.i.i.us.5.3 to i32 - %cmp4.i.us.5.3 = icmp slt i32 %conv.i.us.5.3, %5 - br i1 %cmp4.i.us.5.3, label %if.then.i.us.5.3, label %if.end.i.us.5.3 - -if.then.i.us.5.3: ; preds = %if.end.i.us.5.2 - %add.i.us.5.3 = add nsw i32 %mul.i.5, %conv.i.us.5.3 - %idxprom.i.us.5.3 = sext i32 %add.i.us.5.3 to i64 - %arrayidx.i.us.5.3 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.5.3 - %499 = load float, float* %arrayidx.i.us.5.3, align 4, !tbaa !12 - %mul6.i.us.5.3 = fmul float %499, %3 - store float %mul6.i.us.5.3, float* %arrayidx.i.us.5.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.5.3 - -if.end.i.us.5.3: ; preds = %if.then.i.us.5.3, %if.end.i.us.5.2 - %500 = add nuw nsw i64 %_local_id_x.0.us.5, 4 - %exitcond34.5.not.3 = icmp eq i64 %500, 32 - br i1 %exitcond34.5.not.3, label %pregion_for_end.i.5.loopexit, label %pregion_for_entry.entry.i.us.5, !llvm.loop !26 - -if.then.i.us.4.1: ; preds = %if.end.i.us.4 - %add.i.us.4.1 = add nsw i32 %mul.i.4, %conv.i.us.4.1 - %idxprom.i.us.4.1 = sext i32 %add.i.us.4.1 to i64 - %arrayidx.i.us.4.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.4.1 - %501 = load float, float* %arrayidx.i.us.4.1, align 4, !tbaa !12 - %mul6.i.us.4.1 = fmul float %501, %3 - store float %mul6.i.us.4.1, float* %arrayidx.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.4.1 - -if.end.i.us.4.1: ; preds = %if.then.i.us.4.1, %if.end.i.us.4 - %502 = or i64 %_local_id_x.0.us.4, 2 - %add1.i.i.us.4.2 = add nuw nsw i64 %502, %mul.i.i - %conv.i.us.4.2 = trunc i64 %add1.i.i.us.4.2 to i32 - %cmp4.i.us.4.2 = icmp slt i32 %conv.i.us.4.2, %5 - br i1 %cmp4.i.us.4.2, label %if.then.i.us.4.2, label %if.end.i.us.4.2 - -if.then.i.us.4.2: ; preds = %if.end.i.us.4.1 - %add.i.us.4.2 = add nsw i32 %mul.i.4, %conv.i.us.4.2 - %idxprom.i.us.4.2 = sext i32 %add.i.us.4.2 to i64 - %arrayidx.i.us.4.2 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.4.2 - %503 = load float, float* %arrayidx.i.us.4.2, align 4, !tbaa !12 - %mul6.i.us.4.2 = fmul float %503, %3 - store float %mul6.i.us.4.2, float* %arrayidx.i.us.4.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.4.2 - -if.end.i.us.4.2: ; preds = %if.then.i.us.4.2, %if.end.i.us.4.1 - %504 = or i64 %_local_id_x.0.us.4, 3 - %add1.i.i.us.4.3 = add nuw nsw i64 %504, %mul.i.i - %conv.i.us.4.3 = trunc i64 %add1.i.i.us.4.3 to i32 - %cmp4.i.us.4.3 = icmp slt i32 %conv.i.us.4.3, %5 - br i1 %cmp4.i.us.4.3, label %if.then.i.us.4.3, label %if.end.i.us.4.3 - -if.then.i.us.4.3: ; preds = %if.end.i.us.4.2 - %add.i.us.4.3 = add nsw i32 %mul.i.4, %conv.i.us.4.3 - %idxprom.i.us.4.3 = sext i32 %add.i.us.4.3 to i64 - %arrayidx.i.us.4.3 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.4.3 - %505 = load float, float* %arrayidx.i.us.4.3, align 4, !tbaa !12 - %mul6.i.us.4.3 = fmul float %505, %3 - store float %mul6.i.us.4.3, float* %arrayidx.i.us.4.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.4.3 - -if.end.i.us.4.3: ; preds = %if.then.i.us.4.3, %if.end.i.us.4.2 - %506 = add nuw nsw i64 %_local_id_x.0.us.4, 4 - %exitcond34.4.not.3 = icmp eq i64 %506, 32 - br i1 %exitcond34.4.not.3, label %pregion_for_end.i.4.loopexit, label %pregion_for_entry.entry.i.us.4, !llvm.loop !27 - -if.then.i.us.3.1: ; preds = %if.end.i.us.3 - %add.i.us.3.1 = add nsw i32 %mul.i.3, %conv.i.us.3.1 - %idxprom.i.us.3.1 = sext i32 %add.i.us.3.1 to i64 - %arrayidx.i.us.3.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.3.1 - %507 = load float, float* %arrayidx.i.us.3.1, align 4, !tbaa !12 - %mul6.i.us.3.1 = fmul float %507, %3 - store float %mul6.i.us.3.1, float* %arrayidx.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.3.1 - -if.end.i.us.3.1: ; preds = %if.then.i.us.3.1, %if.end.i.us.3 - %508 = or i64 %_local_id_x.0.us.3, 2 - %add1.i.i.us.3.2 = add nuw nsw i64 %508, %mul.i.i - %conv.i.us.3.2 = trunc i64 %add1.i.i.us.3.2 to i32 - %cmp4.i.us.3.2 = icmp slt i32 %conv.i.us.3.2, %5 - br i1 %cmp4.i.us.3.2, label %if.then.i.us.3.2, label %if.end.i.us.3.2 - -if.then.i.us.3.2: ; preds = %if.end.i.us.3.1 - %add.i.us.3.2 = add nsw i32 %mul.i.3, %conv.i.us.3.2 - %idxprom.i.us.3.2 = sext i32 %add.i.us.3.2 to i64 - %arrayidx.i.us.3.2 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.3.2 - %509 = load float, float* %arrayidx.i.us.3.2, align 4, !tbaa !12 - %mul6.i.us.3.2 = fmul float %509, %3 - store float %mul6.i.us.3.2, float* %arrayidx.i.us.3.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.3.2 - -if.end.i.us.3.2: ; preds = %if.then.i.us.3.2, %if.end.i.us.3.1 - %510 = or i64 %_local_id_x.0.us.3, 3 - %add1.i.i.us.3.3 = add nuw nsw i64 %510, %mul.i.i - %conv.i.us.3.3 = trunc i64 %add1.i.i.us.3.3 to i32 - %cmp4.i.us.3.3 = icmp slt i32 %conv.i.us.3.3, %5 - br i1 %cmp4.i.us.3.3, label %if.then.i.us.3.3, label %if.end.i.us.3.3 - -if.then.i.us.3.3: ; preds = %if.end.i.us.3.2 - %add.i.us.3.3 = add nsw i32 %mul.i.3, %conv.i.us.3.3 - %idxprom.i.us.3.3 = sext i32 %add.i.us.3.3 to i64 - %arrayidx.i.us.3.3 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.3.3 - %511 = load float, float* %arrayidx.i.us.3.3, align 4, !tbaa !12 - %mul6.i.us.3.3 = fmul float %511, %3 - store float %mul6.i.us.3.3, float* %arrayidx.i.us.3.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.3.3 - -if.end.i.us.3.3: ; preds = %if.then.i.us.3.3, %if.end.i.us.3.2 - %512 = add nuw nsw i64 %_local_id_x.0.us.3, 4 - %exitcond34.3.not.3 = icmp eq i64 %512, 32 - br i1 %exitcond34.3.not.3, label %pregion_for_end.i.3.loopexit, label %pregion_for_entry.entry.i.us.3, !llvm.loop !28 - -if.then.i.us.2.1: ; preds = %if.end.i.us.2 - %add.i.us.2.1 = add nsw i32 %mul.i.2, %conv.i.us.2.1 - %idxprom.i.us.2.1 = sext i32 %add.i.us.2.1 to i64 - %arrayidx.i.us.2.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.2.1 - %513 = load float, float* %arrayidx.i.us.2.1, align 4, !tbaa !12 - %mul6.i.us.2.1 = fmul float %513, %3 - store float %mul6.i.us.2.1, float* %arrayidx.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.2.1 - -if.end.i.us.2.1: ; preds = %if.then.i.us.2.1, %if.end.i.us.2 - %514 = or i64 %_local_id_x.0.us.2, 2 - %add1.i.i.us.2.2 = add nuw nsw i64 %514, %mul.i.i - %conv.i.us.2.2 = trunc i64 %add1.i.i.us.2.2 to i32 - %cmp4.i.us.2.2 = icmp slt i32 %conv.i.us.2.2, %5 - br i1 %cmp4.i.us.2.2, label %if.then.i.us.2.2, label %if.end.i.us.2.2 - -if.then.i.us.2.2: ; preds = %if.end.i.us.2.1 - %add.i.us.2.2 = add nsw i32 %mul.i.2, %conv.i.us.2.2 - %idxprom.i.us.2.2 = sext i32 %add.i.us.2.2 to i64 - %arrayidx.i.us.2.2 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.2.2 - %515 = load float, float* %arrayidx.i.us.2.2, align 4, !tbaa !12 - %mul6.i.us.2.2 = fmul float %515, %3 - store float %mul6.i.us.2.2, float* %arrayidx.i.us.2.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.2.2 - -if.end.i.us.2.2: ; preds = %if.then.i.us.2.2, %if.end.i.us.2.1 - %516 = or i64 %_local_id_x.0.us.2, 3 - %add1.i.i.us.2.3 = add nuw nsw i64 %516, %mul.i.i - %conv.i.us.2.3 = trunc i64 %add1.i.i.us.2.3 to i32 - %cmp4.i.us.2.3 = icmp slt i32 %conv.i.us.2.3, %5 - br i1 %cmp4.i.us.2.3, label %if.then.i.us.2.3, label %if.end.i.us.2.3 - -if.then.i.us.2.3: ; preds = %if.end.i.us.2.2 - %add.i.us.2.3 = add nsw i32 %mul.i.2, %conv.i.us.2.3 - %idxprom.i.us.2.3 = sext i32 %add.i.us.2.3 to i64 - %arrayidx.i.us.2.3 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.2.3 - %517 = load float, float* %arrayidx.i.us.2.3, align 4, !tbaa !12 - %mul6.i.us.2.3 = fmul float %517, %3 - store float %mul6.i.us.2.3, float* %arrayidx.i.us.2.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.2.3 - -if.end.i.us.2.3: ; preds = %if.then.i.us.2.3, %if.end.i.us.2.2 - %518 = add nuw nsw i64 %_local_id_x.0.us.2, 4 - %exitcond34.2.not.3 = icmp eq i64 %518, 32 - br i1 %exitcond34.2.not.3, label %pregion_for_end.i.2.loopexit, label %pregion_for_entry.entry.i.us.2, !llvm.loop !29 - -if.then.i.us.1.1: ; preds = %if.end.i.us.1 - %add.i.us.1.1 = add nsw i32 %mul.i.1, %conv.i.us.1.1 - %idxprom.i.us.1.1 = sext i32 %add.i.us.1.1 to i64 - %arrayidx.i.us.1.1 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.1.1 - %519 = load float, float* %arrayidx.i.us.1.1, align 4, !tbaa !12 - %mul6.i.us.1.1 = fmul float %519, %3 - store float %mul6.i.us.1.1, float* %arrayidx.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.1.1 - -if.end.i.us.1.1: ; preds = %if.then.i.us.1.1, %if.end.i.us.1 - %520 = or i64 %_local_id_x.0.us.1, 2 - %add1.i.i.us.1.2 = add nuw nsw i64 %520, %mul.i.i - %conv.i.us.1.2 = trunc i64 %add1.i.i.us.1.2 to i32 - %cmp4.i.us.1.2 = icmp slt i32 %conv.i.us.1.2, %5 - br i1 %cmp4.i.us.1.2, label %if.then.i.us.1.2, label %if.end.i.us.1.2 - -if.then.i.us.1.2: ; preds = %if.end.i.us.1.1 - %add.i.us.1.2 = add nsw i32 %mul.i.1, %conv.i.us.1.2 - %idxprom.i.us.1.2 = sext i32 %add.i.us.1.2 to i64 - %arrayidx.i.us.1.2 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.1.2 - %521 = load float, float* %arrayidx.i.us.1.2, align 4, !tbaa !12 - %mul6.i.us.1.2 = fmul float %521, %3 - store float %mul6.i.us.1.2, float* %arrayidx.i.us.1.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.1.2 - -if.end.i.us.1.2: ; preds = %if.then.i.us.1.2, %if.end.i.us.1.1 - %522 = or i64 %_local_id_x.0.us.1, 3 - %add1.i.i.us.1.3 = add nuw nsw i64 %522, %mul.i.i - %conv.i.us.1.3 = trunc i64 %add1.i.i.us.1.3 to i32 - %cmp4.i.us.1.3 = icmp slt i32 %conv.i.us.1.3, %5 - br i1 %cmp4.i.us.1.3, label %if.then.i.us.1.3, label %if.end.i.us.1.3 - -if.then.i.us.1.3: ; preds = %if.end.i.us.1.2 - %add.i.us.1.3 = add nsw i32 %mul.i.1, %conv.i.us.1.3 - %idxprom.i.us.1.3 = sext i32 %add.i.us.1.3 to i64 - %arrayidx.i.us.1.3 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.1.3 - %523 = load float, float* %arrayidx.i.us.1.3, align 4, !tbaa !12 - %mul6.i.us.1.3 = fmul float %523, %3 - store float %mul6.i.us.1.3, float* %arrayidx.i.us.1.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.1.3 - -if.end.i.us.1.3: ; preds = %if.then.i.us.1.3, %if.end.i.us.1.2 - %524 = add nuw nsw i64 %_local_id_x.0.us.1, 4 - %exitcond34.1.not.3 = icmp eq i64 %524, 32 - br i1 %exitcond34.1.not.3, label %pregion_for_end.i.1.loopexit, label %pregion_for_entry.entry.i.us.1, !llvm.loop !30 - -if.then.i.us.1214: ; preds = %if.end.i.us - %add.i.us.1210 = add nsw i32 %mul.i.us, %conv.i.us.1207 - %idxprom.i.us.1211 = sext i32 %add.i.us.1210 to i64 - %arrayidx.i.us.1212 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.1211 - %525 = load float, float* %arrayidx.i.us.1212, align 4, !tbaa !12 - %mul6.i.us.1213 = fmul float %525, %3 - store float %mul6.i.us.1213, float* %arrayidx.i.us.1212, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.1215 - -if.end.i.us.1215: ; preds = %if.then.i.us.1214, %if.end.i.us - %526 = or i64 %_local_id_x.0.us, 2 - %add1.i.i.us.2217 = add nuw nsw i64 %526, %mul.i.i - %conv.i.us.2218 = trunc i64 %add1.i.i.us.2217 to i32 - %cmp4.i.us.2219 = icmp slt i32 %conv.i.us.2218, %5 - br i1 %cmp4.i.us.2219, label %if.then.i.us.2225, label %if.end.i.us.2226 - -if.then.i.us.2225: ; preds = %if.end.i.us.1215 - %add.i.us.2221 = add nsw i32 %mul.i.us, %conv.i.us.2218 - %idxprom.i.us.2222 = sext i32 %add.i.us.2221 to i64 - %arrayidx.i.us.2223 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.2222 - %527 = load float, float* %arrayidx.i.us.2223, align 4, !tbaa !12 - %mul6.i.us.2224 = fmul float %527, %3 - store float %mul6.i.us.2224, float* %arrayidx.i.us.2223, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.2226 - -if.end.i.us.2226: ; preds = %if.then.i.us.2225, %if.end.i.us.1215 - %528 = or i64 %_local_id_x.0.us, 3 - %add1.i.i.us.3228 = add nuw nsw i64 %528, %mul.i.i - %conv.i.us.3229 = trunc i64 %add1.i.i.us.3228 to i32 - %cmp4.i.us.3230 = icmp slt i32 %conv.i.us.3229, %5 - br i1 %cmp4.i.us.3230, label %if.then.i.us.3236, label %if.end.i.us.3237 - -if.then.i.us.3236: ; preds = %if.end.i.us.2226 - %add.i.us.3232 = add nsw i32 %mul.i.us, %conv.i.us.3229 - %idxprom.i.us.3233 = sext i32 %add.i.us.3232 to i64 - %arrayidx.i.us.3234 = getelementptr inbounds float, float* %1, i64 %idxprom.i.us.3233 - %529 = load float, float* %arrayidx.i.us.3234, align 4, !tbaa !12 - %mul6.i.us.3235 = fmul float %529, %3 - store float %mul6.i.us.3235, float* %arrayidx.i.us.3234, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.us.3237 - -if.end.i.us.3237: ; preds = %if.then.i.us.3236, %if.end.i.us.2226 - %530 = add nuw nsw i64 %_local_id_x.0.us, 4 - %exitcond34.not.3 = icmp eq i64 %530, 32 - br i1 %exitcond34.not.3, label %pregion_for_end.i.loopexit, label %pregion_for_entry.entry.i.us, !llvm.loop !31 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_syrk_kernel_workgroup(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float*** - %7 = load float**, float*** %6, align 8 - %8 = load float*, float** %7, align 8 - %9 = getelementptr i8*, i8** %0, i64 1 - %10 = bitcast i8** %9 to float*** - %11 = load float**, float*** %10, align 8 - %12 = load float*, float** %11, align 8 - %13 = getelementptr i8*, i8** %0, i64 2 - %14 = bitcast i8** %13 to float** - %15 = load float*, float** %14, align 8 - %16 = load float, float* %15, align 4 - %17 = getelementptr i8*, i8** %0, i64 3 - %18 = bitcast i8** %17 to float** - %19 = load float*, float** %18, align 8 - %20 = load float, float* %19, align 4 - %21 = getelementptr i8*, i8** %0, i64 4 - %22 = bitcast i8** %21 to i32** - %23 = load i32*, i32** %22, align 8 - %24 = load i32, i32* %23, align 4 - %25 = getelementptr i8*, i8** %0, i64 5 - %26 = bitcast i8** %25 to i32** - %27 = load i32*, i32** %26, align 8 - %28 = load i32, i32* %27, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %cmp742.i.i = icmp sgt i32 %24, 0 - %wide.trip.count.i.i = zext i32 %24 to i64 - %conv2.i.i.us = trunc i64 %mul3.i.i.i to i32 - %cmp.i.i.us = icmp sgt i32 %28, %conv2.i.i.us - %mul.i.i.us = mul nsw i32 %28, %conv2.i.i.us - br i1 %cmp742.i.i, label %pregion_for_entry.pregion_for_init.i.i.us, label %pregion_for_entry.pregion_for_init.i.i.preheader - -pregion_for_entry.pregion_for_init.i.i.preheader: ; preds = %5 - br i1 %cmp.i.i.us, label %vector.scevcheck, label %pregion_for_end.i.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.i.preheader - %29 = trunc i64 %3 to i32 - %30 = mul i32 %28, %29 - %31 = shl i32 %30, 3 - %32 = trunc i64 %2 to i32 - %33 = shl i32 %32, 5 - %34 = add i32 %31, %33 - %35 = icmp sgt i32 %34, 2147483616 - br i1 %35, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.ph - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %vector.scevcheck - br label %pregion_for_entry.entry.i.i.us - -vector.ph: ; preds = %vector.scevcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert38 = insertelement <8 x i32> undef, i32 %28, i32 0 - %broadcast.splat39 = shufflevector <8 x i32> %broadcast.splatinsert38, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert40 = insertelement <8 x float> undef, float %20, i32 0 - %broadcast.splat41 = shufflevector <8 x float> %broadcast.splatinsert40, <8 x float> undef, <8 x i32> zeroinitializer - %36 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %37 = or <8 x i32> %36, - %38 = icmp sgt <8 x i32> %broadcast.splat39, %37 - %39 = extractelement <8 x i32> %37, i32 0 - %40 = add nsw i32 %mul.i.i.us, %39 - %41 = sext i32 %40 to i64 - %42 = getelementptr inbounds float, float* %12, i64 %41 - %43 = bitcast float* %42 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %43, i32 4, <8 x i1> %38, <8 x float> undef), !tbaa !12 - %44 = fmul <8 x float> %broadcast.splat41, %wide.masked.load - %45 = bitcast float* %42 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %44, <8 x float>* %45, i32 4, <8 x i1> %38), !tbaa !12, !llvm.access.group !16 - %46 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %47 = or <8 x i32> %46, - %48 = icmp sgt <8 x i32> %broadcast.splat39, %47 - %49 = extractelement <8 x i32> %47, i32 0 - %50 = add nsw i32 %mul.i.i.us, %49 - %51 = sext i32 %50 to i64 - %52 = getelementptr inbounds float, float* %12, i64 %51 - %53 = bitcast float* %52 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %53, i32 4, <8 x i1> %48, <8 x float> undef), !tbaa !12 - %54 = fmul <8 x float> %broadcast.splat41, %wide.masked.load.1 - %55 = bitcast float* %52 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %54, <8 x float>* %55, i32 4, <8 x i1> %48), !tbaa !12, !llvm.access.group !16 - %56 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %57 = or <8 x i32> %56, - %58 = icmp sgt <8 x i32> %broadcast.splat39, %57 - %59 = extractelement <8 x i32> %57, i32 0 - %60 = add nsw i32 %mul.i.i.us, %59 - %61 = sext i32 %60 to i64 - %62 = getelementptr inbounds float, float* %12, i64 %61 - %63 = bitcast float* %62 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %63, i32 4, <8 x i1> %58, <8 x float> undef), !tbaa !12 - %64 = fmul <8 x float> %broadcast.splat41, %wide.masked.load.2 - %65 = bitcast float* %62 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %64, <8 x float>* %65, i32 4, <8 x i1> %58), !tbaa !12, !llvm.access.group !16 - %66 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %67 = or <8 x i32> %66, - %68 = icmp sgt <8 x i32> %broadcast.splat39, %67 - %69 = extractelement <8 x i32> %67, i32 0 - %70 = add nsw i32 %mul.i.i.us, %69 - %71 = sext i32 %70 to i64 - %72 = getelementptr inbounds float, float* %12, i64 %71 - %73 = bitcast float* %72 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %73, i32 4, <8 x i1> %68, <8 x float> undef), !tbaa !12 - %74 = fmul <8 x float> %broadcast.splat41, %wide.masked.load.3 - %75 = bitcast float* %72 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %74, <8 x float>* %75, i32 4, <8 x i1> %68), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i - -pregion_for_entry.pregion_for_init.i.i.us: ; preds = %5 - %mul9.i.i.us = mul nsw i32 %24, %conv2.i.i.us - %76 = sext i32 %mul9.i.i.us to i64 - br i1 %cmp.i.i.us, label %pregion_for_entry.entry.i.i.us.us.preheader, label %pregion_for_end.i.i.us - -pregion_for_entry.entry.i.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i.us - br label %pregion_for_entry.entry.i.i.us.us - -pregion_for_end.i.i.us.loopexit: ; preds = %if.end.i.i.us.us - br label %pregion_for_end.i.i.us - -pregion_for_end.i.i.us: ; preds = %pregion_for_end.i.i.us.loopexit, %pregion_for_entry.pregion_for_init.i.i.us - %77 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.1 = or i32 %77, 1 - %cmp.i.i.us.1 = icmp sgt i32 %28, %conv2.i.i.us.1 - %mul.i.i.us.1 = mul nsw i32 %28, %conv2.i.i.us.1 - %mul9.i.i.us.1 = mul nsw i32 %24, %conv2.i.i.us.1 - %78 = sext i32 %mul9.i.i.us.1 to i64 - br i1 %cmp.i.i.us.1, label %pregion_for_entry.entry.i.i.us.us.1.preheader, label %pregion_for_end.i.i.us.1 - -pregion_for_entry.entry.i.i.us.us.1.preheader: ; preds = %pregion_for_end.i.i.us - br label %pregion_for_entry.entry.i.i.us.us.1 - -pregion_for_entry.entry.i.i.us.us: ; preds = %if.end.i.i.us.us, %pregion_for_entry.entry.i.i.us.us.preheader - %_local_id_x.i.0.us.us = phi i64 [ %81, %if.end.i.i.us.us ], [ 0, %pregion_for_entry.entry.i.i.us.us.preheader ] - %add1.i.i.i.us.us = add nuw nsw i64 %_local_id_x.i.0.us.us, %mul.i.i.i - %conv.i.i.us.us = trunc i64 %add1.i.i.i.us.us to i32 - %cmp4.i.i.us.us = icmp sgt i32 %28, %conv.i.i.us.us - br i1 %cmp4.i.i.us.us, label %if.then.i.i.us.us, label %if.end.i.i.us.us - -if.then.i.i.us.us: ; preds = %pregion_for_entry.entry.i.i.us.us - %add.i.i.us.us = add nsw i32 %mul.i.i.us, %conv.i.i.us.us - %idxprom.i.i.us.us = sext i32 %add.i.i.us.us to i64 - %arrayidx.i.i.us.us = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.us - %79 = load float, float* %arrayidx.i.i.us.us, align 4, !tbaa !12 - %mul6.i.i.us.us = fmul float %20, %79 - store float %mul6.i.i.us.us, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %mul14.i.i.us.us = mul nsw i32 %24, %conv.i.i.us.us - %80 = sext i32 %mul14.i.i.us.us to i64 - br label %for.body.i.i.us.us - -if.end.i.i.us.us.loopexit: ; preds = %for.body.i.i.us.us - br label %if.end.i.i.us.us - -if.end.i.i.us.us: ; preds = %if.end.i.i.us.us.loopexit, %pregion_for_entry.entry.i.i.us.us - %81 = add nuw nsw i64 %_local_id_x.i.0.us.us, 1 - %exitcond.not = icmp eq i64 %81, 32 - br i1 %exitcond.not, label %pregion_for_end.i.i.us.loopexit, label %pregion_for_entry.entry.i.i.us.us, !llvm.loop !19 - -for.body.i.i.us.us: ; preds = %for.body.i.i.us.us, %if.then.i.i.us.us - %indvars.iv.next.i.i3.us.us = phi i64 [ %indvars.iv.next.i.i.us.us, %for.body.i.i.us.us ], [ 0, %if.then.i.i.us.us ] - %82 = phi float [ %87, %for.body.i.i.us.us ], [ %mul6.i.i.us.us, %if.then.i.i.us.us ] - %83 = add nsw i64 %indvars.iv.next.i.i3.us.us, %76 - %arrayidx12.i.i.us.us = getelementptr inbounds float, float* %8, i64 %83 - %84 = load float, float* %arrayidx12.i.i.us.us, align 4, !tbaa !12 - %mul13.i.i.us.us = fmul float %16, %84 - %85 = add nsw i64 %indvars.iv.next.i.i3.us.us, %80 - %arrayidx17.i.i.us.us = getelementptr inbounds float, float* %8, i64 %85 - %86 = load float, float* %arrayidx17.i.i.us.us, align 4, !tbaa !12 - %87 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us, float %86, float %82) #2 - store float %87, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us = add nuw nsw i64 %indvars.iv.next.i.i3.us.us, 1 - %exitcond.not.i.i.us.us = icmp eq i64 %indvars.iv.next.i.i.us.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us, label %if.end.i.i.us.us.loopexit, label %for.body.i.i.us.us, !llvm.loop !21 - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.i.i.us.3237, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %548, %if.end.i.i.us.3237 ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp4.i.i.us = icmp sgt i32 %28, %conv.i.i.us - br i1 %cmp4.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %add.i.i.us = add nsw i32 %mul.i.i.us, %conv.i.i.us - %idxprom.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us - %88 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %mul6.i.i.us = fmul float %20, %88 - store float %mul6.i.i.us, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us - %89 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1206 = add nuw nsw i64 %89, %mul.i.i.i - %conv.i.i.us.1207 = trunc i64 %add1.i.i.i.us.1206 to i32 - %cmp4.i.i.us.1208 = icmp sgt i32 %28, %conv.i.i.us.1207 - br i1 %cmp4.i.i.us.1208, label %if.then.i.i.us.1214, label %if.end.i.i.us.1215 - -pregion_for_end.i.i.loopexit: ; preds = %if.end.i.i.us.3237 - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i.i.preheader - %90 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.1 = or i32 %90, 1 - %cmp.i.i.1 = icmp sgt i32 %28, %conv2.i.i.1 - %mul.i.i.1 = mul nsw i32 %28, %conv2.i.i.1 - br i1 %cmp.i.i.1, label %vector.scevcheck49, label %pregion_for_end.i.i.1 - -vector.scevcheck49: ; preds = %pregion_for_end.i.i - %91 = mul i32 %28, %conv2.i.i.1 - %92 = trunc i64 %2 to i32 - %93 = shl i32 %92, 5 - %94 = add i32 %91, %93 - %95 = icmp sgt i32 %94, 2147483616 - br i1 %95, label %pregion_for_entry.entry.i.i.us.1.preheader, label %vector.ph50 - -pregion_for_entry.entry.i.i.us.1.preheader: ; preds = %vector.scevcheck49 - br label %pregion_for_entry.entry.i.i.us.1 - -vector.ph50: ; preds = %vector.scevcheck49 - %broadcast.splatinsert57 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat58 = shufflevector <8 x i64> %broadcast.splatinsert57, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert59 = insertelement <8 x i32> undef, i32 %28, i32 0 - %broadcast.splat60 = shufflevector <8 x i32> %broadcast.splatinsert59, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert62 = insertelement <8 x float> undef, float %20, i32 0 - %broadcast.splat63 = shufflevector <8 x float> %broadcast.splatinsert62, <8 x float> undef, <8 x i32> zeroinitializer - %96 = trunc <8 x i64> %broadcast.splat58 to <8 x i32> - %97 = or <8 x i32> %96, - %98 = icmp sgt <8 x i32> %broadcast.splat60, %97 - %99 = extractelement <8 x i32> %97, i32 0 - %100 = add nsw i32 %mul.i.i.1, %99 - %101 = sext i32 %100 to i64 - %102 = getelementptr inbounds float, float* %12, i64 %101 - %103 = bitcast float* %102 to <8 x float>* - %wide.masked.load61 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %103, i32 4, <8 x i1> %98, <8 x float> undef), !tbaa !12 - %104 = fmul <8 x float> %broadcast.splat63, %wide.masked.load61 - %105 = bitcast float* %102 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %104, <8 x float>* %105, i32 4, <8 x i1> %98), !tbaa !12, !llvm.access.group !16 - %106 = trunc <8 x i64> %broadcast.splat58 to <8 x i32> - %107 = or <8 x i32> %106, - %108 = icmp sgt <8 x i32> %broadcast.splat60, %107 - %109 = extractelement <8 x i32> %107, i32 0 - %110 = add nsw i32 %mul.i.i.1, %109 - %111 = sext i32 %110 to i64 - %112 = getelementptr inbounds float, float* %12, i64 %111 - %113 = bitcast float* %112 to <8 x float>* - %wide.masked.load61.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %113, i32 4, <8 x i1> %108, <8 x float> undef), !tbaa !12 - %114 = fmul <8 x float> %broadcast.splat63, %wide.masked.load61.1 - %115 = bitcast float* %112 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %114, <8 x float>* %115, i32 4, <8 x i1> %108), !tbaa !12, !llvm.access.group !16 - %116 = trunc <8 x i64> %broadcast.splat58 to <8 x i32> - %117 = or <8 x i32> %116, - %118 = icmp sgt <8 x i32> %broadcast.splat60, %117 - %119 = extractelement <8 x i32> %117, i32 0 - %120 = add nsw i32 %mul.i.i.1, %119 - %121 = sext i32 %120 to i64 - %122 = getelementptr inbounds float, float* %12, i64 %121 - %123 = bitcast float* %122 to <8 x float>* - %wide.masked.load61.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %123, i32 4, <8 x i1> %118, <8 x float> undef), !tbaa !12 - %124 = fmul <8 x float> %broadcast.splat63, %wide.masked.load61.2 - %125 = bitcast float* %122 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %124, <8 x float>* %125, i32 4, <8 x i1> %118), !tbaa !12, !llvm.access.group !16 - %126 = trunc <8 x i64> %broadcast.splat58 to <8 x i32> - %127 = or <8 x i32> %126, - %128 = icmp sgt <8 x i32> %broadcast.splat60, %127 - %129 = extractelement <8 x i32> %127, i32 0 - %130 = add nsw i32 %mul.i.i.1, %129 - %131 = sext i32 %130 to i64 - %132 = getelementptr inbounds float, float* %12, i64 %131 - %133 = bitcast float* %132 to <8 x float>* - %wide.masked.load61.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %133, i32 4, <8 x i1> %128, <8 x float> undef), !tbaa !12 - %134 = fmul <8 x float> %broadcast.splat63, %wide.masked.load61.3 - %135 = bitcast float* %132 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %134, <8 x float>* %135, i32 4, <8 x i1> %128), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.1 - -_pocl_kernel_syrk_kernel.exit.loopexit: ; preds = %if.end.i.i.us.us.7 - br label %_pocl_kernel_syrk_kernel.exit - -_pocl_kernel_syrk_kernel.exit.loopexit238: ; preds = %if.end.i.i.us.7.3 - br label %_pocl_kernel_syrk_kernel.exit - -_pocl_kernel_syrk_kernel.exit: ; preds = %pregion_for_end.i.i.us.6, %vector.ph182, %pregion_for_end.i.i.6, %_pocl_kernel_syrk_kernel.exit.loopexit238, %_pocl_kernel_syrk_kernel.exit.loopexit - ret void - -pregion_for_entry.entry.i.i.us.1: ; preds = %if.end.i.i.us.1.3, %pregion_for_entry.entry.i.i.us.1.preheader - %_local_id_x.i.0.us.1 = phi i64 [ %542, %if.end.i.i.us.1.3 ], [ 0, %pregion_for_entry.entry.i.i.us.1.preheader ] - %add1.i.i.i.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.1, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp4.i.i.us.1 = icmp sgt i32 %28, %conv.i.i.us.1 - br i1 %cmp4.i.i.us.1, label %if.then.i.i.us.1, label %if.end.i.i.us.1 - -if.then.i.i.us.1: ; preds = %pregion_for_entry.entry.i.i.us.1 - %add.i.i.us.1 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1 - %idxprom.i.i.us.1 = sext i32 %add.i.i.us.1 to i64 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.1 - %136 = load float, float* %arrayidx.i.i.us.1, align 4, !tbaa !12 - %mul6.i.i.us.1 = fmul float %20, %136 - store float %mul6.i.i.us.1, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1 - -if.end.i.i.us.1: ; preds = %if.then.i.i.us.1, %pregion_for_entry.entry.i.i.us.1 - %137 = or i64 %_local_id_x.i.0.us.1, 1 - %add1.i.i.i.us.1.1 = add nuw nsw i64 %137, %mul.i.i.i - %conv.i.i.us.1.1 = trunc i64 %add1.i.i.i.us.1.1 to i32 - %cmp4.i.i.us.1.1 = icmp sgt i32 %28, %conv.i.i.us.1.1 - br i1 %cmp4.i.i.us.1.1, label %if.then.i.i.us.1.1, label %if.end.i.i.us.1.1 - -pregion_for_end.i.i.1.loopexit: ; preds = %if.end.i.i.us.1.3 - br label %pregion_for_end.i.i.1 - -pregion_for_end.i.i.1: ; preds = %pregion_for_end.i.i.1.loopexit, %vector.ph50, %pregion_for_end.i.i - %138 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.2 = or i32 %138, 2 - %cmp.i.i.2 = icmp sgt i32 %28, %conv2.i.i.2 - %mul.i.i.2 = mul nsw i32 %28, %conv2.i.i.2 - br i1 %cmp.i.i.2, label %vector.scevcheck71, label %pregion_for_end.i.i.2 - -vector.scevcheck71: ; preds = %pregion_for_end.i.i.1 - %139 = mul i32 %28, %conv2.i.i.2 - %140 = trunc i64 %2 to i32 - %141 = shl i32 %140, 5 - %142 = add i32 %139, %141 - %143 = icmp sgt i32 %142, 2147483616 - br i1 %143, label %pregion_for_entry.entry.i.i.us.2.preheader, label %vector.ph72 - -pregion_for_entry.entry.i.i.us.2.preheader: ; preds = %vector.scevcheck71 - br label %pregion_for_entry.entry.i.i.us.2 - -vector.ph72: ; preds = %vector.scevcheck71 - %broadcast.splatinsert79 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat80 = shufflevector <8 x i64> %broadcast.splatinsert79, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert81 = insertelement <8 x i32> undef, i32 %28, i32 0 - %broadcast.splat82 = shufflevector <8 x i32> %broadcast.splatinsert81, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert84 = insertelement <8 x float> undef, float %20, i32 0 - %broadcast.splat85 = shufflevector <8 x float> %broadcast.splatinsert84, <8 x float> undef, <8 x i32> zeroinitializer - %144 = trunc <8 x i64> %broadcast.splat80 to <8 x i32> - %145 = or <8 x i32> %144, - %146 = icmp sgt <8 x i32> %broadcast.splat82, %145 - %147 = extractelement <8 x i32> %145, i32 0 - %148 = add nsw i32 %mul.i.i.2, %147 - %149 = sext i32 %148 to i64 - %150 = getelementptr inbounds float, float* %12, i64 %149 - %151 = bitcast float* %150 to <8 x float>* - %wide.masked.load83 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %151, i32 4, <8 x i1> %146, <8 x float> undef), !tbaa !12 - %152 = fmul <8 x float> %broadcast.splat85, %wide.masked.load83 - %153 = bitcast float* %150 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %152, <8 x float>* %153, i32 4, <8 x i1> %146), !tbaa !12, !llvm.access.group !16 - %154 = trunc <8 x i64> %broadcast.splat80 to <8 x i32> - %155 = or <8 x i32> %154, - %156 = icmp sgt <8 x i32> %broadcast.splat82, %155 - %157 = extractelement <8 x i32> %155, i32 0 - %158 = add nsw i32 %mul.i.i.2, %157 - %159 = sext i32 %158 to i64 - %160 = getelementptr inbounds float, float* %12, i64 %159 - %161 = bitcast float* %160 to <8 x float>* - %wide.masked.load83.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %161, i32 4, <8 x i1> %156, <8 x float> undef), !tbaa !12 - %162 = fmul <8 x float> %broadcast.splat85, %wide.masked.load83.1 - %163 = bitcast float* %160 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %162, <8 x float>* %163, i32 4, <8 x i1> %156), !tbaa !12, !llvm.access.group !16 - %164 = trunc <8 x i64> %broadcast.splat80 to <8 x i32> - %165 = or <8 x i32> %164, - %166 = icmp sgt <8 x i32> %broadcast.splat82, %165 - %167 = extractelement <8 x i32> %165, i32 0 - %168 = add nsw i32 %mul.i.i.2, %167 - %169 = sext i32 %168 to i64 - %170 = getelementptr inbounds float, float* %12, i64 %169 - %171 = bitcast float* %170 to <8 x float>* - %wide.masked.load83.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %171, i32 4, <8 x i1> %166, <8 x float> undef), !tbaa !12 - %172 = fmul <8 x float> %broadcast.splat85, %wide.masked.load83.2 - %173 = bitcast float* %170 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %172, <8 x float>* %173, i32 4, <8 x i1> %166), !tbaa !12, !llvm.access.group !16 - %174 = trunc <8 x i64> %broadcast.splat80 to <8 x i32> - %175 = or <8 x i32> %174, - %176 = icmp sgt <8 x i32> %broadcast.splat82, %175 - %177 = extractelement <8 x i32> %175, i32 0 - %178 = add nsw i32 %mul.i.i.2, %177 - %179 = sext i32 %178 to i64 - %180 = getelementptr inbounds float, float* %12, i64 %179 - %181 = bitcast float* %180 to <8 x float>* - %wide.masked.load83.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %181, i32 4, <8 x i1> %176, <8 x float> undef), !tbaa !12 - %182 = fmul <8 x float> %broadcast.splat85, %wide.masked.load83.3 - %183 = bitcast float* %180 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %182, <8 x float>* %183, i32 4, <8 x i1> %176), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.2 - -pregion_for_entry.entry.i.i.us.2: ; preds = %if.end.i.i.us.2.3, %pregion_for_entry.entry.i.i.us.2.preheader - %_local_id_x.i.0.us.2 = phi i64 [ %536, %if.end.i.i.us.2.3 ], [ 0, %pregion_for_entry.entry.i.i.us.2.preheader ] - %add1.i.i.i.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.2, %mul.i.i.i - %conv.i.i.us.2 = trunc i64 %add1.i.i.i.us.2 to i32 - %cmp4.i.i.us.2 = icmp sgt i32 %28, %conv.i.i.us.2 - br i1 %cmp4.i.i.us.2, label %if.then.i.i.us.2, label %if.end.i.i.us.2 - -if.then.i.i.us.2: ; preds = %pregion_for_entry.entry.i.i.us.2 - %add.i.i.us.2 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2 - %idxprom.i.i.us.2 = sext i32 %add.i.i.us.2 to i64 - %arrayidx.i.i.us.2 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.2 - %184 = load float, float* %arrayidx.i.i.us.2, align 4, !tbaa !12 - %mul6.i.i.us.2 = fmul float %20, %184 - store float %mul6.i.i.us.2, float* %arrayidx.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2 - -if.end.i.i.us.2: ; preds = %if.then.i.i.us.2, %pregion_for_entry.entry.i.i.us.2 - %185 = or i64 %_local_id_x.i.0.us.2, 1 - %add1.i.i.i.us.2.1 = add nuw nsw i64 %185, %mul.i.i.i - %conv.i.i.us.2.1 = trunc i64 %add1.i.i.i.us.2.1 to i32 - %cmp4.i.i.us.2.1 = icmp sgt i32 %28, %conv.i.i.us.2.1 - br i1 %cmp4.i.i.us.2.1, label %if.then.i.i.us.2.1, label %if.end.i.i.us.2.1 - -pregion_for_end.i.i.2.loopexit: ; preds = %if.end.i.i.us.2.3 - br label %pregion_for_end.i.i.2 - -pregion_for_end.i.i.2: ; preds = %pregion_for_end.i.i.2.loopexit, %vector.ph72, %pregion_for_end.i.i.1 - %186 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.3 = or i32 %186, 3 - %cmp.i.i.3 = icmp sgt i32 %28, %conv2.i.i.3 - %mul.i.i.3 = mul nsw i32 %28, %conv2.i.i.3 - br i1 %cmp.i.i.3, label %vector.scevcheck93, label %pregion_for_end.i.i.3 - -vector.scevcheck93: ; preds = %pregion_for_end.i.i.2 - %187 = mul i32 %28, %conv2.i.i.3 - %188 = trunc i64 %2 to i32 - %189 = shl i32 %188, 5 - %190 = add i32 %187, %189 - %191 = icmp sgt i32 %190, 2147483616 - br i1 %191, label %pregion_for_entry.entry.i.i.us.3.preheader, label %vector.ph94 - -pregion_for_entry.entry.i.i.us.3.preheader: ; preds = %vector.scevcheck93 - br label %pregion_for_entry.entry.i.i.us.3 - -vector.ph94: ; preds = %vector.scevcheck93 - %broadcast.splatinsert101 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat102 = shufflevector <8 x i64> %broadcast.splatinsert101, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert103 = insertelement <8 x i32> undef, i32 %28, i32 0 - %broadcast.splat104 = shufflevector <8 x i32> %broadcast.splatinsert103, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert106 = insertelement <8 x float> undef, float %20, i32 0 - %broadcast.splat107 = shufflevector <8 x float> %broadcast.splatinsert106, <8 x float> undef, <8 x i32> zeroinitializer - %192 = trunc <8 x i64> %broadcast.splat102 to <8 x i32> - %193 = or <8 x i32> %192, - %194 = icmp sgt <8 x i32> %broadcast.splat104, %193 - %195 = extractelement <8 x i32> %193, i32 0 - %196 = add nsw i32 %mul.i.i.3, %195 - %197 = sext i32 %196 to i64 - %198 = getelementptr inbounds float, float* %12, i64 %197 - %199 = bitcast float* %198 to <8 x float>* - %wide.masked.load105 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %199, i32 4, <8 x i1> %194, <8 x float> undef), !tbaa !12 - %200 = fmul <8 x float> %broadcast.splat107, %wide.masked.load105 - %201 = bitcast float* %198 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %200, <8 x float>* %201, i32 4, <8 x i1> %194), !tbaa !12, !llvm.access.group !16 - %202 = trunc <8 x i64> %broadcast.splat102 to <8 x i32> - %203 = or <8 x i32> %202, - %204 = icmp sgt <8 x i32> %broadcast.splat104, %203 - %205 = extractelement <8 x i32> %203, i32 0 - %206 = add nsw i32 %mul.i.i.3, %205 - %207 = sext i32 %206 to i64 - %208 = getelementptr inbounds float, float* %12, i64 %207 - %209 = bitcast float* %208 to <8 x float>* - %wide.masked.load105.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %209, i32 4, <8 x i1> %204, <8 x float> undef), !tbaa !12 - %210 = fmul <8 x float> %broadcast.splat107, %wide.masked.load105.1 - %211 = bitcast float* %208 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %210, <8 x float>* %211, i32 4, <8 x i1> %204), !tbaa !12, !llvm.access.group !16 - %212 = trunc <8 x i64> %broadcast.splat102 to <8 x i32> - %213 = or <8 x i32> %212, - %214 = icmp sgt <8 x i32> %broadcast.splat104, %213 - %215 = extractelement <8 x i32> %213, i32 0 - %216 = add nsw i32 %mul.i.i.3, %215 - %217 = sext i32 %216 to i64 - %218 = getelementptr inbounds float, float* %12, i64 %217 - %219 = bitcast float* %218 to <8 x float>* - %wide.masked.load105.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %219, i32 4, <8 x i1> %214, <8 x float> undef), !tbaa !12 - %220 = fmul <8 x float> %broadcast.splat107, %wide.masked.load105.2 - %221 = bitcast float* %218 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %220, <8 x float>* %221, i32 4, <8 x i1> %214), !tbaa !12, !llvm.access.group !16 - %222 = trunc <8 x i64> %broadcast.splat102 to <8 x i32> - %223 = or <8 x i32> %222, - %224 = icmp sgt <8 x i32> %broadcast.splat104, %223 - %225 = extractelement <8 x i32> %223, i32 0 - %226 = add nsw i32 %mul.i.i.3, %225 - %227 = sext i32 %226 to i64 - %228 = getelementptr inbounds float, float* %12, i64 %227 - %229 = bitcast float* %228 to <8 x float>* - %wide.masked.load105.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %229, i32 4, <8 x i1> %224, <8 x float> undef), !tbaa !12 - %230 = fmul <8 x float> %broadcast.splat107, %wide.masked.load105.3 - %231 = bitcast float* %228 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %230, <8 x float>* %231, i32 4, <8 x i1> %224), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.3 - -pregion_for_entry.entry.i.i.us.3: ; preds = %if.end.i.i.us.3.3, %pregion_for_entry.entry.i.i.us.3.preheader - %_local_id_x.i.0.us.3 = phi i64 [ %530, %if.end.i.i.us.3.3 ], [ 0, %pregion_for_entry.entry.i.i.us.3.preheader ] - %add1.i.i.i.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.3, %mul.i.i.i - %conv.i.i.us.3 = trunc i64 %add1.i.i.i.us.3 to i32 - %cmp4.i.i.us.3 = icmp sgt i32 %28, %conv.i.i.us.3 - br i1 %cmp4.i.i.us.3, label %if.then.i.i.us.3, label %if.end.i.i.us.3 - -if.then.i.i.us.3: ; preds = %pregion_for_entry.entry.i.i.us.3 - %add.i.i.us.3 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3 - %idxprom.i.i.us.3 = sext i32 %add.i.i.us.3 to i64 - %arrayidx.i.i.us.3 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.3 - %232 = load float, float* %arrayidx.i.i.us.3, align 4, !tbaa !12 - %mul6.i.i.us.3 = fmul float %20, %232 - store float %mul6.i.i.us.3, float* %arrayidx.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3 - -if.end.i.i.us.3: ; preds = %if.then.i.i.us.3, %pregion_for_entry.entry.i.i.us.3 - %233 = or i64 %_local_id_x.i.0.us.3, 1 - %add1.i.i.i.us.3.1 = add nuw nsw i64 %233, %mul.i.i.i - %conv.i.i.us.3.1 = trunc i64 %add1.i.i.i.us.3.1 to i32 - %cmp4.i.i.us.3.1 = icmp sgt i32 %28, %conv.i.i.us.3.1 - br i1 %cmp4.i.i.us.3.1, label %if.then.i.i.us.3.1, label %if.end.i.i.us.3.1 - -pregion_for_end.i.i.3.loopexit: ; preds = %if.end.i.i.us.3.3 - br label %pregion_for_end.i.i.3 - -pregion_for_end.i.i.3: ; preds = %pregion_for_end.i.i.3.loopexit, %vector.ph94, %pregion_for_end.i.i.2 - %234 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.4 = or i32 %234, 4 - %cmp.i.i.4 = icmp sgt i32 %28, %conv2.i.i.4 - %mul.i.i.4 = mul nsw i32 %28, %conv2.i.i.4 - br i1 %cmp.i.i.4, label %vector.scevcheck115, label %pregion_for_end.i.i.4 - -vector.scevcheck115: ; preds = %pregion_for_end.i.i.3 - %235 = mul i32 %28, %conv2.i.i.4 - %236 = trunc i64 %2 to i32 - %237 = shl i32 %236, 5 - %238 = add i32 %235, %237 - %239 = icmp sgt i32 %238, 2147483616 - br i1 %239, label %pregion_for_entry.entry.i.i.us.4.preheader, label %vector.ph116 - -pregion_for_entry.entry.i.i.us.4.preheader: ; preds = %vector.scevcheck115 - br label %pregion_for_entry.entry.i.i.us.4 - -vector.ph116: ; preds = %vector.scevcheck115 - %broadcast.splatinsert123 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat124 = shufflevector <8 x i64> %broadcast.splatinsert123, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert125 = insertelement <8 x i32> undef, i32 %28, i32 0 - %broadcast.splat126 = shufflevector <8 x i32> %broadcast.splatinsert125, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert128 = insertelement <8 x float> undef, float %20, i32 0 - %broadcast.splat129 = shufflevector <8 x float> %broadcast.splatinsert128, <8 x float> undef, <8 x i32> zeroinitializer - %240 = trunc <8 x i64> %broadcast.splat124 to <8 x i32> - %241 = or <8 x i32> %240, - %242 = icmp sgt <8 x i32> %broadcast.splat126, %241 - %243 = extractelement <8 x i32> %241, i32 0 - %244 = add nsw i32 %mul.i.i.4, %243 - %245 = sext i32 %244 to i64 - %246 = getelementptr inbounds float, float* %12, i64 %245 - %247 = bitcast float* %246 to <8 x float>* - %wide.masked.load127 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %247, i32 4, <8 x i1> %242, <8 x float> undef), !tbaa !12 - %248 = fmul <8 x float> %broadcast.splat129, %wide.masked.load127 - %249 = bitcast float* %246 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %248, <8 x float>* %249, i32 4, <8 x i1> %242), !tbaa !12, !llvm.access.group !16 - %250 = trunc <8 x i64> %broadcast.splat124 to <8 x i32> - %251 = or <8 x i32> %250, - %252 = icmp sgt <8 x i32> %broadcast.splat126, %251 - %253 = extractelement <8 x i32> %251, i32 0 - %254 = add nsw i32 %mul.i.i.4, %253 - %255 = sext i32 %254 to i64 - %256 = getelementptr inbounds float, float* %12, i64 %255 - %257 = bitcast float* %256 to <8 x float>* - %wide.masked.load127.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %257, i32 4, <8 x i1> %252, <8 x float> undef), !tbaa !12 - %258 = fmul <8 x float> %broadcast.splat129, %wide.masked.load127.1 - %259 = bitcast float* %256 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %258, <8 x float>* %259, i32 4, <8 x i1> %252), !tbaa !12, !llvm.access.group !16 - %260 = trunc <8 x i64> %broadcast.splat124 to <8 x i32> - %261 = or <8 x i32> %260, - %262 = icmp sgt <8 x i32> %broadcast.splat126, %261 - %263 = extractelement <8 x i32> %261, i32 0 - %264 = add nsw i32 %mul.i.i.4, %263 - %265 = sext i32 %264 to i64 - %266 = getelementptr inbounds float, float* %12, i64 %265 - %267 = bitcast float* %266 to <8 x float>* - %wide.masked.load127.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %267, i32 4, <8 x i1> %262, <8 x float> undef), !tbaa !12 - %268 = fmul <8 x float> %broadcast.splat129, %wide.masked.load127.2 - %269 = bitcast float* %266 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %268, <8 x float>* %269, i32 4, <8 x i1> %262), !tbaa !12, !llvm.access.group !16 - %270 = trunc <8 x i64> %broadcast.splat124 to <8 x i32> - %271 = or <8 x i32> %270, - %272 = icmp sgt <8 x i32> %broadcast.splat126, %271 - %273 = extractelement <8 x i32> %271, i32 0 - %274 = add nsw i32 %mul.i.i.4, %273 - %275 = sext i32 %274 to i64 - %276 = getelementptr inbounds float, float* %12, i64 %275 - %277 = bitcast float* %276 to <8 x float>* - %wide.masked.load127.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %277, i32 4, <8 x i1> %272, <8 x float> undef), !tbaa !12 - %278 = fmul <8 x float> %broadcast.splat129, %wide.masked.load127.3 - %279 = bitcast float* %276 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %278, <8 x float>* %279, i32 4, <8 x i1> %272), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.4 - -pregion_for_entry.entry.i.i.us.4: ; preds = %if.end.i.i.us.4.3, %pregion_for_entry.entry.i.i.us.4.preheader - %_local_id_x.i.0.us.4 = phi i64 [ %524, %if.end.i.i.us.4.3 ], [ 0, %pregion_for_entry.entry.i.i.us.4.preheader ] - %add1.i.i.i.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.4, %mul.i.i.i - %conv.i.i.us.4 = trunc i64 %add1.i.i.i.us.4 to i32 - %cmp4.i.i.us.4 = icmp sgt i32 %28, %conv.i.i.us.4 - br i1 %cmp4.i.i.us.4, label %if.then.i.i.us.4, label %if.end.i.i.us.4 - -if.then.i.i.us.4: ; preds = %pregion_for_entry.entry.i.i.us.4 - %add.i.i.us.4 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4 - %idxprom.i.i.us.4 = sext i32 %add.i.i.us.4 to i64 - %arrayidx.i.i.us.4 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.4 - %280 = load float, float* %arrayidx.i.i.us.4, align 4, !tbaa !12 - %mul6.i.i.us.4 = fmul float %20, %280 - store float %mul6.i.i.us.4, float* %arrayidx.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4 - -if.end.i.i.us.4: ; preds = %if.then.i.i.us.4, %pregion_for_entry.entry.i.i.us.4 - %281 = or i64 %_local_id_x.i.0.us.4, 1 - %add1.i.i.i.us.4.1 = add nuw nsw i64 %281, %mul.i.i.i - %conv.i.i.us.4.1 = trunc i64 %add1.i.i.i.us.4.1 to i32 - %cmp4.i.i.us.4.1 = icmp sgt i32 %28, %conv.i.i.us.4.1 - br i1 %cmp4.i.i.us.4.1, label %if.then.i.i.us.4.1, label %if.end.i.i.us.4.1 - -pregion_for_end.i.i.4.loopexit: ; preds = %if.end.i.i.us.4.3 - br label %pregion_for_end.i.i.4 - -pregion_for_end.i.i.4: ; preds = %pregion_for_end.i.i.4.loopexit, %vector.ph116, %pregion_for_end.i.i.3 - %282 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.5 = or i32 %282, 5 - %cmp.i.i.5 = icmp sgt i32 %28, %conv2.i.i.5 - %mul.i.i.5 = mul nsw i32 %28, %conv2.i.i.5 - br i1 %cmp.i.i.5, label %vector.scevcheck137, label %pregion_for_end.i.i.5 - -vector.scevcheck137: ; preds = %pregion_for_end.i.i.4 - %283 = mul i32 %28, %conv2.i.i.5 - %284 = trunc i64 %2 to i32 - %285 = shl i32 %284, 5 - %286 = add i32 %283, %285 - %287 = icmp sgt i32 %286, 2147483616 - br i1 %287, label %pregion_for_entry.entry.i.i.us.5.preheader, label %vector.ph138 - -pregion_for_entry.entry.i.i.us.5.preheader: ; preds = %vector.scevcheck137 - br label %pregion_for_entry.entry.i.i.us.5 - -vector.ph138: ; preds = %vector.scevcheck137 - %broadcast.splatinsert145 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat146 = shufflevector <8 x i64> %broadcast.splatinsert145, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert147 = insertelement <8 x i32> undef, i32 %28, i32 0 - %broadcast.splat148 = shufflevector <8 x i32> %broadcast.splatinsert147, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert150 = insertelement <8 x float> undef, float %20, i32 0 - %broadcast.splat151 = shufflevector <8 x float> %broadcast.splatinsert150, <8 x float> undef, <8 x i32> zeroinitializer - %288 = trunc <8 x i64> %broadcast.splat146 to <8 x i32> - %289 = or <8 x i32> %288, - %290 = icmp sgt <8 x i32> %broadcast.splat148, %289 - %291 = extractelement <8 x i32> %289, i32 0 - %292 = add nsw i32 %mul.i.i.5, %291 - %293 = sext i32 %292 to i64 - %294 = getelementptr inbounds float, float* %12, i64 %293 - %295 = bitcast float* %294 to <8 x float>* - %wide.masked.load149 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %295, i32 4, <8 x i1> %290, <8 x float> undef), !tbaa !12 - %296 = fmul <8 x float> %broadcast.splat151, %wide.masked.load149 - %297 = bitcast float* %294 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %296, <8 x float>* %297, i32 4, <8 x i1> %290), !tbaa !12, !llvm.access.group !16 - %298 = trunc <8 x i64> %broadcast.splat146 to <8 x i32> - %299 = or <8 x i32> %298, - %300 = icmp sgt <8 x i32> %broadcast.splat148, %299 - %301 = extractelement <8 x i32> %299, i32 0 - %302 = add nsw i32 %mul.i.i.5, %301 - %303 = sext i32 %302 to i64 - %304 = getelementptr inbounds float, float* %12, i64 %303 - %305 = bitcast float* %304 to <8 x float>* - %wide.masked.load149.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %305, i32 4, <8 x i1> %300, <8 x float> undef), !tbaa !12 - %306 = fmul <8 x float> %broadcast.splat151, %wide.masked.load149.1 - %307 = bitcast float* %304 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %306, <8 x float>* %307, i32 4, <8 x i1> %300), !tbaa !12, !llvm.access.group !16 - %308 = trunc <8 x i64> %broadcast.splat146 to <8 x i32> - %309 = or <8 x i32> %308, - %310 = icmp sgt <8 x i32> %broadcast.splat148, %309 - %311 = extractelement <8 x i32> %309, i32 0 - %312 = add nsw i32 %mul.i.i.5, %311 - %313 = sext i32 %312 to i64 - %314 = getelementptr inbounds float, float* %12, i64 %313 - %315 = bitcast float* %314 to <8 x float>* - %wide.masked.load149.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %315, i32 4, <8 x i1> %310, <8 x float> undef), !tbaa !12 - %316 = fmul <8 x float> %broadcast.splat151, %wide.masked.load149.2 - %317 = bitcast float* %314 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %316, <8 x float>* %317, i32 4, <8 x i1> %310), !tbaa !12, !llvm.access.group !16 - %318 = trunc <8 x i64> %broadcast.splat146 to <8 x i32> - %319 = or <8 x i32> %318, - %320 = icmp sgt <8 x i32> %broadcast.splat148, %319 - %321 = extractelement <8 x i32> %319, i32 0 - %322 = add nsw i32 %mul.i.i.5, %321 - %323 = sext i32 %322 to i64 - %324 = getelementptr inbounds float, float* %12, i64 %323 - %325 = bitcast float* %324 to <8 x float>* - %wide.masked.load149.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %325, i32 4, <8 x i1> %320, <8 x float> undef), !tbaa !12 - %326 = fmul <8 x float> %broadcast.splat151, %wide.masked.load149.3 - %327 = bitcast float* %324 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %326, <8 x float>* %327, i32 4, <8 x i1> %320), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.5 - -pregion_for_entry.entry.i.i.us.5: ; preds = %if.end.i.i.us.5.3, %pregion_for_entry.entry.i.i.us.5.preheader - %_local_id_x.i.0.us.5 = phi i64 [ %518, %if.end.i.i.us.5.3 ], [ 0, %pregion_for_entry.entry.i.i.us.5.preheader ] - %add1.i.i.i.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.5, %mul.i.i.i - %conv.i.i.us.5 = trunc i64 %add1.i.i.i.us.5 to i32 - %cmp4.i.i.us.5 = icmp sgt i32 %28, %conv.i.i.us.5 - br i1 %cmp4.i.i.us.5, label %if.then.i.i.us.5, label %if.end.i.i.us.5 - -if.then.i.i.us.5: ; preds = %pregion_for_entry.entry.i.i.us.5 - %add.i.i.us.5 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5 - %idxprom.i.i.us.5 = sext i32 %add.i.i.us.5 to i64 - %arrayidx.i.i.us.5 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.5 - %328 = load float, float* %arrayidx.i.i.us.5, align 4, !tbaa !12 - %mul6.i.i.us.5 = fmul float %20, %328 - store float %mul6.i.i.us.5, float* %arrayidx.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5 - -if.end.i.i.us.5: ; preds = %if.then.i.i.us.5, %pregion_for_entry.entry.i.i.us.5 - %329 = or i64 %_local_id_x.i.0.us.5, 1 - %add1.i.i.i.us.5.1 = add nuw nsw i64 %329, %mul.i.i.i - %conv.i.i.us.5.1 = trunc i64 %add1.i.i.i.us.5.1 to i32 - %cmp4.i.i.us.5.1 = icmp sgt i32 %28, %conv.i.i.us.5.1 - br i1 %cmp4.i.i.us.5.1, label %if.then.i.i.us.5.1, label %if.end.i.i.us.5.1 - -pregion_for_end.i.i.5.loopexit: ; preds = %if.end.i.i.us.5.3 - br label %pregion_for_end.i.i.5 - -pregion_for_end.i.i.5: ; preds = %pregion_for_end.i.i.5.loopexit, %vector.ph138, %pregion_for_end.i.i.4 - %330 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.6 = or i32 %330, 6 - %cmp.i.i.6 = icmp sgt i32 %28, %conv2.i.i.6 - %mul.i.i.6 = mul nsw i32 %28, %conv2.i.i.6 - br i1 %cmp.i.i.6, label %vector.scevcheck159, label %pregion_for_end.i.i.6 - -vector.scevcheck159: ; preds = %pregion_for_end.i.i.5 - %331 = mul i32 %28, %conv2.i.i.6 - %332 = trunc i64 %2 to i32 - %333 = shl i32 %332, 5 - %334 = add i32 %331, %333 - %335 = icmp sgt i32 %334, 2147483616 - br i1 %335, label %pregion_for_entry.entry.i.i.us.6.preheader, label %vector.ph160 - -pregion_for_entry.entry.i.i.us.6.preheader: ; preds = %vector.scevcheck159 - br label %pregion_for_entry.entry.i.i.us.6 - -vector.ph160: ; preds = %vector.scevcheck159 - %broadcast.splatinsert167 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat168 = shufflevector <8 x i64> %broadcast.splatinsert167, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert169 = insertelement <8 x i32> undef, i32 %28, i32 0 - %broadcast.splat170 = shufflevector <8 x i32> %broadcast.splatinsert169, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert172 = insertelement <8 x float> undef, float %20, i32 0 - %broadcast.splat173 = shufflevector <8 x float> %broadcast.splatinsert172, <8 x float> undef, <8 x i32> zeroinitializer - %336 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %337 = or <8 x i32> %336, - %338 = icmp sgt <8 x i32> %broadcast.splat170, %337 - %339 = extractelement <8 x i32> %337, i32 0 - %340 = add nsw i32 %mul.i.i.6, %339 - %341 = sext i32 %340 to i64 - %342 = getelementptr inbounds float, float* %12, i64 %341 - %343 = bitcast float* %342 to <8 x float>* - %wide.masked.load171 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %343, i32 4, <8 x i1> %338, <8 x float> undef), !tbaa !12 - %344 = fmul <8 x float> %broadcast.splat173, %wide.masked.load171 - %345 = bitcast float* %342 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %344, <8 x float>* %345, i32 4, <8 x i1> %338), !tbaa !12, !llvm.access.group !16 - %346 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %347 = or <8 x i32> %346, - %348 = icmp sgt <8 x i32> %broadcast.splat170, %347 - %349 = extractelement <8 x i32> %347, i32 0 - %350 = add nsw i32 %mul.i.i.6, %349 - %351 = sext i32 %350 to i64 - %352 = getelementptr inbounds float, float* %12, i64 %351 - %353 = bitcast float* %352 to <8 x float>* - %wide.masked.load171.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %353, i32 4, <8 x i1> %348, <8 x float> undef), !tbaa !12 - %354 = fmul <8 x float> %broadcast.splat173, %wide.masked.load171.1 - %355 = bitcast float* %352 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %354, <8 x float>* %355, i32 4, <8 x i1> %348), !tbaa !12, !llvm.access.group !16 - %356 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %357 = or <8 x i32> %356, - %358 = icmp sgt <8 x i32> %broadcast.splat170, %357 - %359 = extractelement <8 x i32> %357, i32 0 - %360 = add nsw i32 %mul.i.i.6, %359 - %361 = sext i32 %360 to i64 - %362 = getelementptr inbounds float, float* %12, i64 %361 - %363 = bitcast float* %362 to <8 x float>* - %wide.masked.load171.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %363, i32 4, <8 x i1> %358, <8 x float> undef), !tbaa !12 - %364 = fmul <8 x float> %broadcast.splat173, %wide.masked.load171.2 - %365 = bitcast float* %362 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %364, <8 x float>* %365, i32 4, <8 x i1> %358), !tbaa !12, !llvm.access.group !16 - %366 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %367 = or <8 x i32> %366, - %368 = icmp sgt <8 x i32> %broadcast.splat170, %367 - %369 = extractelement <8 x i32> %367, i32 0 - %370 = add nsw i32 %mul.i.i.6, %369 - %371 = sext i32 %370 to i64 - %372 = getelementptr inbounds float, float* %12, i64 %371 - %373 = bitcast float* %372 to <8 x float>* - %wide.masked.load171.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %373, i32 4, <8 x i1> %368, <8 x float> undef), !tbaa !12 - %374 = fmul <8 x float> %broadcast.splat173, %wide.masked.load171.3 - %375 = bitcast float* %372 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %374, <8 x float>* %375, i32 4, <8 x i1> %368), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.6 - -pregion_for_entry.entry.i.i.us.6: ; preds = %if.end.i.i.us.6.3, %pregion_for_entry.entry.i.i.us.6.preheader - %_local_id_x.i.0.us.6 = phi i64 [ %512, %if.end.i.i.us.6.3 ], [ 0, %pregion_for_entry.entry.i.i.us.6.preheader ] - %add1.i.i.i.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.6, %mul.i.i.i - %conv.i.i.us.6 = trunc i64 %add1.i.i.i.us.6 to i32 - %cmp4.i.i.us.6 = icmp sgt i32 %28, %conv.i.i.us.6 - br i1 %cmp4.i.i.us.6, label %if.then.i.i.us.6, label %if.end.i.i.us.6 - -if.then.i.i.us.6: ; preds = %pregion_for_entry.entry.i.i.us.6 - %add.i.i.us.6 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6 - %idxprom.i.i.us.6 = sext i32 %add.i.i.us.6 to i64 - %arrayidx.i.i.us.6 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.6 - %376 = load float, float* %arrayidx.i.i.us.6, align 4, !tbaa !12 - %mul6.i.i.us.6 = fmul float %20, %376 - store float %mul6.i.i.us.6, float* %arrayidx.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6 - -if.end.i.i.us.6: ; preds = %if.then.i.i.us.6, %pregion_for_entry.entry.i.i.us.6 - %377 = or i64 %_local_id_x.i.0.us.6, 1 - %add1.i.i.i.us.6.1 = add nuw nsw i64 %377, %mul.i.i.i - %conv.i.i.us.6.1 = trunc i64 %add1.i.i.i.us.6.1 to i32 - %cmp4.i.i.us.6.1 = icmp sgt i32 %28, %conv.i.i.us.6.1 - br i1 %cmp4.i.i.us.6.1, label %if.then.i.i.us.6.1, label %if.end.i.i.us.6.1 - -pregion_for_end.i.i.6.loopexit: ; preds = %if.end.i.i.us.6.3 - br label %pregion_for_end.i.i.6 - -pregion_for_end.i.i.6: ; preds = %pregion_for_end.i.i.6.loopexit, %vector.ph160, %pregion_for_end.i.i.5 - %378 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.7 = or i32 %378, 7 - %cmp.i.i.7 = icmp sgt i32 %28, %conv2.i.i.7 - %mul.i.i.7 = mul nsw i32 %28, %conv2.i.i.7 - br i1 %cmp.i.i.7, label %vector.scevcheck181, label %_pocl_kernel_syrk_kernel.exit - -vector.scevcheck181: ; preds = %pregion_for_end.i.i.6 - %379 = mul i32 %28, %conv2.i.i.7 - %380 = trunc i64 %2 to i32 - %381 = shl i32 %380, 5 - %382 = add i32 %379, %381 - %383 = icmp sgt i32 %382, 2147483616 - br i1 %383, label %pregion_for_entry.entry.i.i.us.7.preheader, label %vector.ph182 - -pregion_for_entry.entry.i.i.us.7.preheader: ; preds = %vector.scevcheck181 - br label %pregion_for_entry.entry.i.i.us.7 - -vector.ph182: ; preds = %vector.scevcheck181 - %broadcast.splatinsert189 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat190 = shufflevector <8 x i64> %broadcast.splatinsert189, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert191 = insertelement <8 x i32> undef, i32 %28, i32 0 - %broadcast.splat192 = shufflevector <8 x i32> %broadcast.splatinsert191, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert194 = insertelement <8 x float> undef, float %20, i32 0 - %broadcast.splat195 = shufflevector <8 x float> %broadcast.splatinsert194, <8 x float> undef, <8 x i32> zeroinitializer - %384 = trunc <8 x i64> %broadcast.splat190 to <8 x i32> - %385 = or <8 x i32> %384, - %386 = icmp sgt <8 x i32> %broadcast.splat192, %385 - %387 = extractelement <8 x i32> %385, i32 0 - %388 = add nsw i32 %mul.i.i.7, %387 - %389 = sext i32 %388 to i64 - %390 = getelementptr inbounds float, float* %12, i64 %389 - %391 = bitcast float* %390 to <8 x float>* - %wide.masked.load193 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %391, i32 4, <8 x i1> %386, <8 x float> undef), !tbaa !12 - %392 = fmul <8 x float> %broadcast.splat195, %wide.masked.load193 - %393 = bitcast float* %390 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %392, <8 x float>* %393, i32 4, <8 x i1> %386), !tbaa !12, !llvm.access.group !16 - %394 = trunc <8 x i64> %broadcast.splat190 to <8 x i32> - %395 = or <8 x i32> %394, - %396 = icmp sgt <8 x i32> %broadcast.splat192, %395 - %397 = extractelement <8 x i32> %395, i32 0 - %398 = add nsw i32 %mul.i.i.7, %397 - %399 = sext i32 %398 to i64 - %400 = getelementptr inbounds float, float* %12, i64 %399 - %401 = bitcast float* %400 to <8 x float>* - %wide.masked.load193.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %401, i32 4, <8 x i1> %396, <8 x float> undef), !tbaa !12 - %402 = fmul <8 x float> %broadcast.splat195, %wide.masked.load193.1 - %403 = bitcast float* %400 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %402, <8 x float>* %403, i32 4, <8 x i1> %396), !tbaa !12, !llvm.access.group !16 - %404 = trunc <8 x i64> %broadcast.splat190 to <8 x i32> - %405 = or <8 x i32> %404, - %406 = icmp sgt <8 x i32> %broadcast.splat192, %405 - %407 = extractelement <8 x i32> %405, i32 0 - %408 = add nsw i32 %mul.i.i.7, %407 - %409 = sext i32 %408 to i64 - %410 = getelementptr inbounds float, float* %12, i64 %409 - %411 = bitcast float* %410 to <8 x float>* - %wide.masked.load193.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %411, i32 4, <8 x i1> %406, <8 x float> undef), !tbaa !12 - %412 = fmul <8 x float> %broadcast.splat195, %wide.masked.load193.2 - %413 = bitcast float* %410 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %412, <8 x float>* %413, i32 4, <8 x i1> %406), !tbaa !12, !llvm.access.group !16 - %414 = trunc <8 x i64> %broadcast.splat190 to <8 x i32> - %415 = or <8 x i32> %414, - %416 = icmp sgt <8 x i32> %broadcast.splat192, %415 - %417 = extractelement <8 x i32> %415, i32 0 - %418 = add nsw i32 %mul.i.i.7, %417 - %419 = sext i32 %418 to i64 - %420 = getelementptr inbounds float, float* %12, i64 %419 - %421 = bitcast float* %420 to <8 x float>* - %wide.masked.load193.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %421, i32 4, <8 x i1> %416, <8 x float> undef), !tbaa !12 - %422 = fmul <8 x float> %broadcast.splat195, %wide.masked.load193.3 - %423 = bitcast float* %420 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %422, <8 x float>* %423, i32 4, <8 x i1> %416), !tbaa !12, !llvm.access.group !16 - br label %_pocl_kernel_syrk_kernel.exit - -pregion_for_entry.entry.i.i.us.7: ; preds = %if.end.i.i.us.7.3, %pregion_for_entry.entry.i.i.us.7.preheader - %_local_id_x.i.0.us.7 = phi i64 [ %506, %if.end.i.i.us.7.3 ], [ 0, %pregion_for_entry.entry.i.i.us.7.preheader ] - %add1.i.i.i.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.7, %mul.i.i.i - %conv.i.i.us.7 = trunc i64 %add1.i.i.i.us.7 to i32 - %cmp4.i.i.us.7 = icmp sgt i32 %28, %conv.i.i.us.7 - br i1 %cmp4.i.i.us.7, label %if.then.i.i.us.7, label %if.end.i.i.us.7 - -if.then.i.i.us.7: ; preds = %pregion_for_entry.entry.i.i.us.7 - %add.i.i.us.7 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7 - %idxprom.i.i.us.7 = sext i32 %add.i.i.us.7 to i64 - %arrayidx.i.i.us.7 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.7 - %424 = load float, float* %arrayidx.i.i.us.7, align 4, !tbaa !12 - %mul6.i.i.us.7 = fmul float %20, %424 - store float %mul6.i.i.us.7, float* %arrayidx.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7 - -if.end.i.i.us.7: ; preds = %if.then.i.i.us.7, %pregion_for_entry.entry.i.i.us.7 - %425 = or i64 %_local_id_x.i.0.us.7, 1 - %add1.i.i.i.us.7.1 = add nuw nsw i64 %425, %mul.i.i.i - %conv.i.i.us.7.1 = trunc i64 %add1.i.i.i.us.7.1 to i32 - %cmp4.i.i.us.7.1 = icmp sgt i32 %28, %conv.i.i.us.7.1 - br i1 %cmp4.i.i.us.7.1, label %if.then.i.i.us.7.1, label %if.end.i.i.us.7.1 - -pregion_for_entry.entry.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1, %pregion_for_entry.entry.i.i.us.us.1.preheader - %_local_id_x.i.0.us.us.1 = phi i64 [ %434, %if.end.i.i.us.us.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.1.preheader ] - %add1.i.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, %mul.i.i.i - %conv.i.i.us.us.1 = trunc i64 %add1.i.i.i.us.us.1 to i32 - %cmp4.i.i.us.us.1 = icmp sgt i32 %28, %conv.i.i.us.us.1 - br i1 %cmp4.i.i.us.us.1, label %if.then.i.i.us.us.1, label %if.end.i.i.us.us.1 - -if.then.i.i.us.us.1: ; preds = %pregion_for_entry.entry.i.i.us.us.1 - %add.i.i.us.us.1 = add nsw i32 %mul.i.i.us.1, %conv.i.i.us.us.1 - %idxprom.i.i.us.us.1 = sext i32 %add.i.i.us.us.1 to i64 - %arrayidx.i.i.us.us.1 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.us.1 - %426 = load float, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12 - %mul6.i.i.us.us.1 = fmul float %20, %426 - store float %mul6.i.i.us.us.1, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %mul14.i.i.us.us.1 = mul nsw i32 %24, %conv.i.i.us.us.1 - %427 = sext i32 %mul14.i.i.us.us.1 to i64 - br label %for.body.i.i.us.us.1 - -for.body.i.i.us.us.1: ; preds = %for.body.i.i.us.us.1, %if.then.i.i.us.us.1 - %indvars.iv.next.i.i3.us.us.1 = phi i64 [ %indvars.iv.next.i.i.us.us.1, %for.body.i.i.us.us.1 ], [ 0, %if.then.i.i.us.us.1 ] - %428 = phi float [ %433, %for.body.i.i.us.us.1 ], [ %mul6.i.i.us.us.1, %if.then.i.i.us.us.1 ] - %429 = add nsw i64 %indvars.iv.next.i.i3.us.us.1, %78 - %arrayidx12.i.i.us.us.1 = getelementptr inbounds float, float* %8, i64 %429 - %430 = load float, float* %arrayidx12.i.i.us.us.1, align 4, !tbaa !12 - %mul13.i.i.us.us.1 = fmul float %16, %430 - %431 = add nsw i64 %indvars.iv.next.i.i3.us.us.1, %427 - %arrayidx17.i.i.us.us.1 = getelementptr inbounds float, float* %8, i64 %431 - %432 = load float, float* %arrayidx17.i.i.us.us.1, align 4, !tbaa !12 - %433 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.1, float %432, float %428) #2 - store float %433, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.1, 1 - %exitcond.not.i.i.us.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.1, label %if.end.i.i.us.us.1.loopexit, label %for.body.i.i.us.us.1, !llvm.loop !21 - -if.end.i.i.us.us.1.loopexit: ; preds = %for.body.i.i.us.us.1 - br label %if.end.i.i.us.us.1 - -if.end.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.loopexit, %pregion_for_entry.entry.i.i.us.us.1 - %434 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, 1 - %exitcond.not.1 = icmp eq i64 %434, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.us.1.loopexit, label %pregion_for_entry.entry.i.i.us.us.1, !llvm.loop !19 - -pregion_for_end.i.i.us.1.loopexit: ; preds = %if.end.i.i.us.us.1 - br label %pregion_for_end.i.i.us.1 - -pregion_for_end.i.i.us.1: ; preds = %pregion_for_end.i.i.us.1.loopexit, %pregion_for_end.i.i.us - %435 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.2 = or i32 %435, 2 - %cmp.i.i.us.2 = icmp sgt i32 %28, %conv2.i.i.us.2 - %mul.i.i.us.2 = mul nsw i32 %28, %conv2.i.i.us.2 - %mul9.i.i.us.2 = mul nsw i32 %24, %conv2.i.i.us.2 - %436 = sext i32 %mul9.i.i.us.2 to i64 - br i1 %cmp.i.i.us.2, label %pregion_for_entry.entry.i.i.us.us.2.preheader, label %pregion_for_end.i.i.us.2 - -pregion_for_entry.entry.i.i.us.us.2.preheader: ; preds = %pregion_for_end.i.i.us.1 - br label %pregion_for_entry.entry.i.i.us.us.2 - -pregion_for_entry.entry.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2, %pregion_for_entry.entry.i.i.us.us.2.preheader - %_local_id_x.i.0.us.us.2 = phi i64 [ %445, %if.end.i.i.us.us.2 ], [ 0, %pregion_for_entry.entry.i.i.us.us.2.preheader ] - %add1.i.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, %mul.i.i.i - %conv.i.i.us.us.2 = trunc i64 %add1.i.i.i.us.us.2 to i32 - %cmp4.i.i.us.us.2 = icmp sgt i32 %28, %conv.i.i.us.us.2 - br i1 %cmp4.i.i.us.us.2, label %if.then.i.i.us.us.2, label %if.end.i.i.us.us.2 - -if.then.i.i.us.us.2: ; preds = %pregion_for_entry.entry.i.i.us.us.2 - %add.i.i.us.us.2 = add nsw i32 %mul.i.i.us.2, %conv.i.i.us.us.2 - %idxprom.i.i.us.us.2 = sext i32 %add.i.i.us.us.2 to i64 - %arrayidx.i.i.us.us.2 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.us.2 - %437 = load float, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12 - %mul6.i.i.us.us.2 = fmul float %20, %437 - store float %mul6.i.i.us.us.2, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %mul14.i.i.us.us.2 = mul nsw i32 %24, %conv.i.i.us.us.2 - %438 = sext i32 %mul14.i.i.us.us.2 to i64 - br label %for.body.i.i.us.us.2 - -for.body.i.i.us.us.2: ; preds = %for.body.i.i.us.us.2, %if.then.i.i.us.us.2 - %indvars.iv.next.i.i3.us.us.2 = phi i64 [ %indvars.iv.next.i.i.us.us.2, %for.body.i.i.us.us.2 ], [ 0, %if.then.i.i.us.us.2 ] - %439 = phi float [ %444, %for.body.i.i.us.us.2 ], [ %mul6.i.i.us.us.2, %if.then.i.i.us.us.2 ] - %440 = add nsw i64 %indvars.iv.next.i.i3.us.us.2, %436 - %arrayidx12.i.i.us.us.2 = getelementptr inbounds float, float* %8, i64 %440 - %441 = load float, float* %arrayidx12.i.i.us.us.2, align 4, !tbaa !12 - %mul13.i.i.us.us.2 = fmul float %16, %441 - %442 = add nsw i64 %indvars.iv.next.i.i3.us.us.2, %438 - %arrayidx17.i.i.us.us.2 = getelementptr inbounds float, float* %8, i64 %442 - %443 = load float, float* %arrayidx17.i.i.us.us.2, align 4, !tbaa !12 - %444 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.2, float %443, float %439) #2 - store float %444, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.2 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.2, 1 - %exitcond.not.i.i.us.us.2 = icmp eq i64 %indvars.iv.next.i.i.us.us.2, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.2, label %if.end.i.i.us.us.2.loopexit, label %for.body.i.i.us.us.2, !llvm.loop !21 - -if.end.i.i.us.us.2.loopexit: ; preds = %for.body.i.i.us.us.2 - br label %if.end.i.i.us.us.2 - -if.end.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.loopexit, %pregion_for_entry.entry.i.i.us.us.2 - %445 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, 1 - %exitcond.not.2 = icmp eq i64 %445, 32 - br i1 %exitcond.not.2, label %pregion_for_end.i.i.us.2.loopexit, label %pregion_for_entry.entry.i.i.us.us.2, !llvm.loop !19 - -pregion_for_end.i.i.us.2.loopexit: ; preds = %if.end.i.i.us.us.2 - br label %pregion_for_end.i.i.us.2 - -pregion_for_end.i.i.us.2: ; preds = %pregion_for_end.i.i.us.2.loopexit, %pregion_for_end.i.i.us.1 - %446 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.3 = or i32 %446, 3 - %cmp.i.i.us.3 = icmp sgt i32 %28, %conv2.i.i.us.3 - %mul.i.i.us.3 = mul nsw i32 %28, %conv2.i.i.us.3 - %mul9.i.i.us.3 = mul nsw i32 %24, %conv2.i.i.us.3 - %447 = sext i32 %mul9.i.i.us.3 to i64 - br i1 %cmp.i.i.us.3, label %pregion_for_entry.entry.i.i.us.us.3.preheader, label %pregion_for_end.i.i.us.3 - -pregion_for_entry.entry.i.i.us.us.3.preheader: ; preds = %pregion_for_end.i.i.us.2 - br label %pregion_for_entry.entry.i.i.us.us.3 - -pregion_for_entry.entry.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3, %pregion_for_entry.entry.i.i.us.us.3.preheader - %_local_id_x.i.0.us.us.3 = phi i64 [ %456, %if.end.i.i.us.us.3 ], [ 0, %pregion_for_entry.entry.i.i.us.us.3.preheader ] - %add1.i.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, %mul.i.i.i - %conv.i.i.us.us.3 = trunc i64 %add1.i.i.i.us.us.3 to i32 - %cmp4.i.i.us.us.3 = icmp sgt i32 %28, %conv.i.i.us.us.3 - br i1 %cmp4.i.i.us.us.3, label %if.then.i.i.us.us.3, label %if.end.i.i.us.us.3 - -if.then.i.i.us.us.3: ; preds = %pregion_for_entry.entry.i.i.us.us.3 - %add.i.i.us.us.3 = add nsw i32 %mul.i.i.us.3, %conv.i.i.us.us.3 - %idxprom.i.i.us.us.3 = sext i32 %add.i.i.us.us.3 to i64 - %arrayidx.i.i.us.us.3 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.us.3 - %448 = load float, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12 - %mul6.i.i.us.us.3 = fmul float %20, %448 - store float %mul6.i.i.us.us.3, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %mul14.i.i.us.us.3 = mul nsw i32 %24, %conv.i.i.us.us.3 - %449 = sext i32 %mul14.i.i.us.us.3 to i64 - br label %for.body.i.i.us.us.3 - -for.body.i.i.us.us.3: ; preds = %for.body.i.i.us.us.3, %if.then.i.i.us.us.3 - %indvars.iv.next.i.i3.us.us.3 = phi i64 [ %indvars.iv.next.i.i.us.us.3, %for.body.i.i.us.us.3 ], [ 0, %if.then.i.i.us.us.3 ] - %450 = phi float [ %455, %for.body.i.i.us.us.3 ], [ %mul6.i.i.us.us.3, %if.then.i.i.us.us.3 ] - %451 = add nsw i64 %indvars.iv.next.i.i3.us.us.3, %447 - %arrayidx12.i.i.us.us.3 = getelementptr inbounds float, float* %8, i64 %451 - %452 = load float, float* %arrayidx12.i.i.us.us.3, align 4, !tbaa !12 - %mul13.i.i.us.us.3 = fmul float %16, %452 - %453 = add nsw i64 %indvars.iv.next.i.i3.us.us.3, %449 - %arrayidx17.i.i.us.us.3 = getelementptr inbounds float, float* %8, i64 %453 - %454 = load float, float* %arrayidx17.i.i.us.us.3, align 4, !tbaa !12 - %455 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.3, float %454, float %450) #2 - store float %455, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.3 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.3, 1 - %exitcond.not.i.i.us.us.3 = icmp eq i64 %indvars.iv.next.i.i.us.us.3, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.3, label %if.end.i.i.us.us.3.loopexit, label %for.body.i.i.us.us.3, !llvm.loop !21 - -if.end.i.i.us.us.3.loopexit: ; preds = %for.body.i.i.us.us.3 - br label %if.end.i.i.us.us.3 - -if.end.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.loopexit, %pregion_for_entry.entry.i.i.us.us.3 - %456 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, 1 - %exitcond.not.3 = icmp eq i64 %456, 32 - br i1 %exitcond.not.3, label %pregion_for_end.i.i.us.3.loopexit, label %pregion_for_entry.entry.i.i.us.us.3, !llvm.loop !19 - -pregion_for_end.i.i.us.3.loopexit: ; preds = %if.end.i.i.us.us.3 - br label %pregion_for_end.i.i.us.3 - -pregion_for_end.i.i.us.3: ; preds = %pregion_for_end.i.i.us.3.loopexit, %pregion_for_end.i.i.us.2 - %457 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.4 = or i32 %457, 4 - %cmp.i.i.us.4 = icmp sgt i32 %28, %conv2.i.i.us.4 - %mul.i.i.us.4 = mul nsw i32 %28, %conv2.i.i.us.4 - %mul9.i.i.us.4 = mul nsw i32 %24, %conv2.i.i.us.4 - %458 = sext i32 %mul9.i.i.us.4 to i64 - br i1 %cmp.i.i.us.4, label %pregion_for_entry.entry.i.i.us.us.4.preheader, label %pregion_for_end.i.i.us.4 - -pregion_for_entry.entry.i.i.us.us.4.preheader: ; preds = %pregion_for_end.i.i.us.3 - br label %pregion_for_entry.entry.i.i.us.us.4 - -pregion_for_entry.entry.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4, %pregion_for_entry.entry.i.i.us.us.4.preheader - %_local_id_x.i.0.us.us.4 = phi i64 [ %467, %if.end.i.i.us.us.4 ], [ 0, %pregion_for_entry.entry.i.i.us.us.4.preheader ] - %add1.i.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, %mul.i.i.i - %conv.i.i.us.us.4 = trunc i64 %add1.i.i.i.us.us.4 to i32 - %cmp4.i.i.us.us.4 = icmp sgt i32 %28, %conv.i.i.us.us.4 - br i1 %cmp4.i.i.us.us.4, label %if.then.i.i.us.us.4, label %if.end.i.i.us.us.4 - -if.then.i.i.us.us.4: ; preds = %pregion_for_entry.entry.i.i.us.us.4 - %add.i.i.us.us.4 = add nsw i32 %mul.i.i.us.4, %conv.i.i.us.us.4 - %idxprom.i.i.us.us.4 = sext i32 %add.i.i.us.us.4 to i64 - %arrayidx.i.i.us.us.4 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.us.4 - %459 = load float, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12 - %mul6.i.i.us.us.4 = fmul float %20, %459 - store float %mul6.i.i.us.us.4, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %mul14.i.i.us.us.4 = mul nsw i32 %24, %conv.i.i.us.us.4 - %460 = sext i32 %mul14.i.i.us.us.4 to i64 - br label %for.body.i.i.us.us.4 - -for.body.i.i.us.us.4: ; preds = %for.body.i.i.us.us.4, %if.then.i.i.us.us.4 - %indvars.iv.next.i.i3.us.us.4 = phi i64 [ %indvars.iv.next.i.i.us.us.4, %for.body.i.i.us.us.4 ], [ 0, %if.then.i.i.us.us.4 ] - %461 = phi float [ %466, %for.body.i.i.us.us.4 ], [ %mul6.i.i.us.us.4, %if.then.i.i.us.us.4 ] - %462 = add nsw i64 %indvars.iv.next.i.i3.us.us.4, %458 - %arrayidx12.i.i.us.us.4 = getelementptr inbounds float, float* %8, i64 %462 - %463 = load float, float* %arrayidx12.i.i.us.us.4, align 4, !tbaa !12 - %mul13.i.i.us.us.4 = fmul float %16, %463 - %464 = add nsw i64 %indvars.iv.next.i.i3.us.us.4, %460 - %arrayidx17.i.i.us.us.4 = getelementptr inbounds float, float* %8, i64 %464 - %465 = load float, float* %arrayidx17.i.i.us.us.4, align 4, !tbaa !12 - %466 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.4, float %465, float %461) #2 - store float %466, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.4 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.4, 1 - %exitcond.not.i.i.us.us.4 = icmp eq i64 %indvars.iv.next.i.i.us.us.4, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.4, label %if.end.i.i.us.us.4.loopexit, label %for.body.i.i.us.us.4, !llvm.loop !21 - -if.end.i.i.us.us.4.loopexit: ; preds = %for.body.i.i.us.us.4 - br label %if.end.i.i.us.us.4 - -if.end.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.loopexit, %pregion_for_entry.entry.i.i.us.us.4 - %467 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, 1 - %exitcond.not.4 = icmp eq i64 %467, 32 - br i1 %exitcond.not.4, label %pregion_for_end.i.i.us.4.loopexit, label %pregion_for_entry.entry.i.i.us.us.4, !llvm.loop !19 - -pregion_for_end.i.i.us.4.loopexit: ; preds = %if.end.i.i.us.us.4 - br label %pregion_for_end.i.i.us.4 - -pregion_for_end.i.i.us.4: ; preds = %pregion_for_end.i.i.us.4.loopexit, %pregion_for_end.i.i.us.3 - %468 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.5 = or i32 %468, 5 - %cmp.i.i.us.5 = icmp sgt i32 %28, %conv2.i.i.us.5 - %mul.i.i.us.5 = mul nsw i32 %28, %conv2.i.i.us.5 - %mul9.i.i.us.5 = mul nsw i32 %24, %conv2.i.i.us.5 - %469 = sext i32 %mul9.i.i.us.5 to i64 - br i1 %cmp.i.i.us.5, label %pregion_for_entry.entry.i.i.us.us.5.preheader, label %pregion_for_end.i.i.us.5 - -pregion_for_entry.entry.i.i.us.us.5.preheader: ; preds = %pregion_for_end.i.i.us.4 - br label %pregion_for_entry.entry.i.i.us.us.5 - -pregion_for_entry.entry.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5, %pregion_for_entry.entry.i.i.us.us.5.preheader - %_local_id_x.i.0.us.us.5 = phi i64 [ %478, %if.end.i.i.us.us.5 ], [ 0, %pregion_for_entry.entry.i.i.us.us.5.preheader ] - %add1.i.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, %mul.i.i.i - %conv.i.i.us.us.5 = trunc i64 %add1.i.i.i.us.us.5 to i32 - %cmp4.i.i.us.us.5 = icmp sgt i32 %28, %conv.i.i.us.us.5 - br i1 %cmp4.i.i.us.us.5, label %if.then.i.i.us.us.5, label %if.end.i.i.us.us.5 - -if.then.i.i.us.us.5: ; preds = %pregion_for_entry.entry.i.i.us.us.5 - %add.i.i.us.us.5 = add nsw i32 %mul.i.i.us.5, %conv.i.i.us.us.5 - %idxprom.i.i.us.us.5 = sext i32 %add.i.i.us.us.5 to i64 - %arrayidx.i.i.us.us.5 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.us.5 - %470 = load float, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12 - %mul6.i.i.us.us.5 = fmul float %20, %470 - store float %mul6.i.i.us.us.5, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %mul14.i.i.us.us.5 = mul nsw i32 %24, %conv.i.i.us.us.5 - %471 = sext i32 %mul14.i.i.us.us.5 to i64 - br label %for.body.i.i.us.us.5 - -for.body.i.i.us.us.5: ; preds = %for.body.i.i.us.us.5, %if.then.i.i.us.us.5 - %indvars.iv.next.i.i3.us.us.5 = phi i64 [ %indvars.iv.next.i.i.us.us.5, %for.body.i.i.us.us.5 ], [ 0, %if.then.i.i.us.us.5 ] - %472 = phi float [ %477, %for.body.i.i.us.us.5 ], [ %mul6.i.i.us.us.5, %if.then.i.i.us.us.5 ] - %473 = add nsw i64 %indvars.iv.next.i.i3.us.us.5, %469 - %arrayidx12.i.i.us.us.5 = getelementptr inbounds float, float* %8, i64 %473 - %474 = load float, float* %arrayidx12.i.i.us.us.5, align 4, !tbaa !12 - %mul13.i.i.us.us.5 = fmul float %16, %474 - %475 = add nsw i64 %indvars.iv.next.i.i3.us.us.5, %471 - %arrayidx17.i.i.us.us.5 = getelementptr inbounds float, float* %8, i64 %475 - %476 = load float, float* %arrayidx17.i.i.us.us.5, align 4, !tbaa !12 - %477 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.5, float %476, float %472) #2 - store float %477, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.5 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.5, 1 - %exitcond.not.i.i.us.us.5 = icmp eq i64 %indvars.iv.next.i.i.us.us.5, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.5, label %if.end.i.i.us.us.5.loopexit, label %for.body.i.i.us.us.5, !llvm.loop !21 - -if.end.i.i.us.us.5.loopexit: ; preds = %for.body.i.i.us.us.5 - br label %if.end.i.i.us.us.5 - -if.end.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.loopexit, %pregion_for_entry.entry.i.i.us.us.5 - %478 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, 1 - %exitcond.not.5 = icmp eq i64 %478, 32 - br i1 %exitcond.not.5, label %pregion_for_end.i.i.us.5.loopexit, label %pregion_for_entry.entry.i.i.us.us.5, !llvm.loop !19 - -pregion_for_end.i.i.us.5.loopexit: ; preds = %if.end.i.i.us.us.5 - br label %pregion_for_end.i.i.us.5 - -pregion_for_end.i.i.us.5: ; preds = %pregion_for_end.i.i.us.5.loopexit, %pregion_for_end.i.i.us.4 - %479 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.6 = or i32 %479, 6 - %cmp.i.i.us.6 = icmp sgt i32 %28, %conv2.i.i.us.6 - %mul.i.i.us.6 = mul nsw i32 %28, %conv2.i.i.us.6 - %mul9.i.i.us.6 = mul nsw i32 %24, %conv2.i.i.us.6 - %480 = sext i32 %mul9.i.i.us.6 to i64 - br i1 %cmp.i.i.us.6, label %pregion_for_entry.entry.i.i.us.us.6.preheader, label %pregion_for_end.i.i.us.6 - -pregion_for_entry.entry.i.i.us.us.6.preheader: ; preds = %pregion_for_end.i.i.us.5 - br label %pregion_for_entry.entry.i.i.us.us.6 - -pregion_for_entry.entry.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6, %pregion_for_entry.entry.i.i.us.us.6.preheader - %_local_id_x.i.0.us.us.6 = phi i64 [ %489, %if.end.i.i.us.us.6 ], [ 0, %pregion_for_entry.entry.i.i.us.us.6.preheader ] - %add1.i.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, %mul.i.i.i - %conv.i.i.us.us.6 = trunc i64 %add1.i.i.i.us.us.6 to i32 - %cmp4.i.i.us.us.6 = icmp sgt i32 %28, %conv.i.i.us.us.6 - br i1 %cmp4.i.i.us.us.6, label %if.then.i.i.us.us.6, label %if.end.i.i.us.us.6 - -if.then.i.i.us.us.6: ; preds = %pregion_for_entry.entry.i.i.us.us.6 - %add.i.i.us.us.6 = add nsw i32 %mul.i.i.us.6, %conv.i.i.us.us.6 - %idxprom.i.i.us.us.6 = sext i32 %add.i.i.us.us.6 to i64 - %arrayidx.i.i.us.us.6 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.us.6 - %481 = load float, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12 - %mul6.i.i.us.us.6 = fmul float %20, %481 - store float %mul6.i.i.us.us.6, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %mul14.i.i.us.us.6 = mul nsw i32 %24, %conv.i.i.us.us.6 - %482 = sext i32 %mul14.i.i.us.us.6 to i64 - br label %for.body.i.i.us.us.6 - -for.body.i.i.us.us.6: ; preds = %for.body.i.i.us.us.6, %if.then.i.i.us.us.6 - %indvars.iv.next.i.i3.us.us.6 = phi i64 [ %indvars.iv.next.i.i.us.us.6, %for.body.i.i.us.us.6 ], [ 0, %if.then.i.i.us.us.6 ] - %483 = phi float [ %488, %for.body.i.i.us.us.6 ], [ %mul6.i.i.us.us.6, %if.then.i.i.us.us.6 ] - %484 = add nsw i64 %indvars.iv.next.i.i3.us.us.6, %480 - %arrayidx12.i.i.us.us.6 = getelementptr inbounds float, float* %8, i64 %484 - %485 = load float, float* %arrayidx12.i.i.us.us.6, align 4, !tbaa !12 - %mul13.i.i.us.us.6 = fmul float %16, %485 - %486 = add nsw i64 %indvars.iv.next.i.i3.us.us.6, %482 - %arrayidx17.i.i.us.us.6 = getelementptr inbounds float, float* %8, i64 %486 - %487 = load float, float* %arrayidx17.i.i.us.us.6, align 4, !tbaa !12 - %488 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.6, float %487, float %483) #2 - store float %488, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.6 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.6, 1 - %exitcond.not.i.i.us.us.6 = icmp eq i64 %indvars.iv.next.i.i.us.us.6, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.6, label %if.end.i.i.us.us.6.loopexit, label %for.body.i.i.us.us.6, !llvm.loop !21 - -if.end.i.i.us.us.6.loopexit: ; preds = %for.body.i.i.us.us.6 - br label %if.end.i.i.us.us.6 - -if.end.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.loopexit, %pregion_for_entry.entry.i.i.us.us.6 - %489 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, 1 - %exitcond.not.6 = icmp eq i64 %489, 32 - br i1 %exitcond.not.6, label %pregion_for_end.i.i.us.6.loopexit, label %pregion_for_entry.entry.i.i.us.us.6, !llvm.loop !19 - -pregion_for_end.i.i.us.6.loopexit: ; preds = %if.end.i.i.us.us.6 - br label %pregion_for_end.i.i.us.6 - -pregion_for_end.i.i.us.6: ; preds = %pregion_for_end.i.i.us.6.loopexit, %pregion_for_end.i.i.us.5 - %490 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.7 = or i32 %490, 7 - %cmp.i.i.us.7 = icmp sgt i32 %28, %conv2.i.i.us.7 - %mul.i.i.us.7 = mul nsw i32 %28, %conv2.i.i.us.7 - %mul9.i.i.us.7 = mul nsw i32 %24, %conv2.i.i.us.7 - %491 = sext i32 %mul9.i.i.us.7 to i64 - br i1 %cmp.i.i.us.7, label %pregion_for_entry.entry.i.i.us.us.7.preheader, label %_pocl_kernel_syrk_kernel.exit - -pregion_for_entry.entry.i.i.us.us.7.preheader: ; preds = %pregion_for_end.i.i.us.6 - br label %pregion_for_entry.entry.i.i.us.us.7 - -pregion_for_entry.entry.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7, %pregion_for_entry.entry.i.i.us.us.7.preheader - %_local_id_x.i.0.us.us.7 = phi i64 [ %500, %if.end.i.i.us.us.7 ], [ 0, %pregion_for_entry.entry.i.i.us.us.7.preheader ] - %add1.i.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, %mul.i.i.i - %conv.i.i.us.us.7 = trunc i64 %add1.i.i.i.us.us.7 to i32 - %cmp4.i.i.us.us.7 = icmp sgt i32 %28, %conv.i.i.us.us.7 - br i1 %cmp4.i.i.us.us.7, label %if.then.i.i.us.us.7, label %if.end.i.i.us.us.7 - -if.then.i.i.us.us.7: ; preds = %pregion_for_entry.entry.i.i.us.us.7 - %add.i.i.us.us.7 = add nsw i32 %mul.i.i.us.7, %conv.i.i.us.us.7 - %idxprom.i.i.us.us.7 = sext i32 %add.i.i.us.us.7 to i64 - %arrayidx.i.i.us.us.7 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.us.7 - %492 = load float, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12 - %mul6.i.i.us.us.7 = fmul float %20, %492 - store float %mul6.i.i.us.us.7, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %mul14.i.i.us.us.7 = mul nsw i32 %24, %conv.i.i.us.us.7 - %493 = sext i32 %mul14.i.i.us.us.7 to i64 - br label %for.body.i.i.us.us.7 - -for.body.i.i.us.us.7: ; preds = %for.body.i.i.us.us.7, %if.then.i.i.us.us.7 - %indvars.iv.next.i.i3.us.us.7 = phi i64 [ %indvars.iv.next.i.i.us.us.7, %for.body.i.i.us.us.7 ], [ 0, %if.then.i.i.us.us.7 ] - %494 = phi float [ %499, %for.body.i.i.us.us.7 ], [ %mul6.i.i.us.us.7, %if.then.i.i.us.us.7 ] - %495 = add nsw i64 %indvars.iv.next.i.i3.us.us.7, %491 - %arrayidx12.i.i.us.us.7 = getelementptr inbounds float, float* %8, i64 %495 - %496 = load float, float* %arrayidx12.i.i.us.us.7, align 4, !tbaa !12 - %mul13.i.i.us.us.7 = fmul float %16, %496 - %497 = add nsw i64 %indvars.iv.next.i.i3.us.us.7, %493 - %arrayidx17.i.i.us.us.7 = getelementptr inbounds float, float* %8, i64 %497 - %498 = load float, float* %arrayidx17.i.i.us.us.7, align 4, !tbaa !12 - %499 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.7, float %498, float %494) #2 - store float %499, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.7 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.7, 1 - %exitcond.not.i.i.us.us.7 = icmp eq i64 %indvars.iv.next.i.i.us.us.7, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.7, label %if.end.i.i.us.us.7.loopexit, label %for.body.i.i.us.us.7, !llvm.loop !21 - -if.end.i.i.us.us.7.loopexit: ; preds = %for.body.i.i.us.us.7 - br label %if.end.i.i.us.us.7 - -if.end.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.loopexit, %pregion_for_entry.entry.i.i.us.us.7 - %500 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, 1 - %exitcond.not.7 = icmp eq i64 %500, 32 - br i1 %exitcond.not.7, label %_pocl_kernel_syrk_kernel.exit.loopexit, label %pregion_for_entry.entry.i.i.us.us.7, !llvm.loop !19 - -if.then.i.i.us.7.1: ; preds = %if.end.i.i.us.7 - %add.i.i.us.7.1 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7.1 - %idxprom.i.i.us.7.1 = sext i32 %add.i.i.us.7.1 to i64 - %arrayidx.i.i.us.7.1 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.7.1 - %501 = load float, float* %arrayidx.i.i.us.7.1, align 4, !tbaa !12 - %mul6.i.i.us.7.1 = fmul float %20, %501 - store float %mul6.i.i.us.7.1, float* %arrayidx.i.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7.1 - -if.end.i.i.us.7.1: ; preds = %if.then.i.i.us.7.1, %if.end.i.i.us.7 - %502 = or i64 %_local_id_x.i.0.us.7, 2 - %add1.i.i.i.us.7.2 = add nuw nsw i64 %502, %mul.i.i.i - %conv.i.i.us.7.2 = trunc i64 %add1.i.i.i.us.7.2 to i32 - %cmp4.i.i.us.7.2 = icmp sgt i32 %28, %conv.i.i.us.7.2 - br i1 %cmp4.i.i.us.7.2, label %if.then.i.i.us.7.2, label %if.end.i.i.us.7.2 - -if.then.i.i.us.7.2: ; preds = %if.end.i.i.us.7.1 - %add.i.i.us.7.2 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7.2 - %idxprom.i.i.us.7.2 = sext i32 %add.i.i.us.7.2 to i64 - %arrayidx.i.i.us.7.2 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.7.2 - %503 = load float, float* %arrayidx.i.i.us.7.2, align 4, !tbaa !12 - %mul6.i.i.us.7.2 = fmul float %20, %503 - store float %mul6.i.i.us.7.2, float* %arrayidx.i.i.us.7.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7.2 - -if.end.i.i.us.7.2: ; preds = %if.then.i.i.us.7.2, %if.end.i.i.us.7.1 - %504 = or i64 %_local_id_x.i.0.us.7, 3 - %add1.i.i.i.us.7.3 = add nuw nsw i64 %504, %mul.i.i.i - %conv.i.i.us.7.3 = trunc i64 %add1.i.i.i.us.7.3 to i32 - %cmp4.i.i.us.7.3 = icmp sgt i32 %28, %conv.i.i.us.7.3 - br i1 %cmp4.i.i.us.7.3, label %if.then.i.i.us.7.3, label %if.end.i.i.us.7.3 - -if.then.i.i.us.7.3: ; preds = %if.end.i.i.us.7.2 - %add.i.i.us.7.3 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7.3 - %idxprom.i.i.us.7.3 = sext i32 %add.i.i.us.7.3 to i64 - %arrayidx.i.i.us.7.3 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.7.3 - %505 = load float, float* %arrayidx.i.i.us.7.3, align 4, !tbaa !12 - %mul6.i.i.us.7.3 = fmul float %20, %505 - store float %mul6.i.i.us.7.3, float* %arrayidx.i.i.us.7.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7.3 - -if.end.i.i.us.7.3: ; preds = %if.then.i.i.us.7.3, %if.end.i.i.us.7.2 - %506 = add nuw nsw i64 %_local_id_x.i.0.us.7, 4 - %exitcond34.7.not.3 = icmp eq i64 %506, 32 - br i1 %exitcond34.7.not.3, label %_pocl_kernel_syrk_kernel.exit.loopexit238, label %pregion_for_entry.entry.i.i.us.7, !llvm.loop !32 - -if.then.i.i.us.6.1: ; preds = %if.end.i.i.us.6 - %add.i.i.us.6.1 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6.1 - %idxprom.i.i.us.6.1 = sext i32 %add.i.i.us.6.1 to i64 - %arrayidx.i.i.us.6.1 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.6.1 - %507 = load float, float* %arrayidx.i.i.us.6.1, align 4, !tbaa !12 - %mul6.i.i.us.6.1 = fmul float %20, %507 - store float %mul6.i.i.us.6.1, float* %arrayidx.i.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6.1 - -if.end.i.i.us.6.1: ; preds = %if.then.i.i.us.6.1, %if.end.i.i.us.6 - %508 = or i64 %_local_id_x.i.0.us.6, 2 - %add1.i.i.i.us.6.2 = add nuw nsw i64 %508, %mul.i.i.i - %conv.i.i.us.6.2 = trunc i64 %add1.i.i.i.us.6.2 to i32 - %cmp4.i.i.us.6.2 = icmp sgt i32 %28, %conv.i.i.us.6.2 - br i1 %cmp4.i.i.us.6.2, label %if.then.i.i.us.6.2, label %if.end.i.i.us.6.2 - -if.then.i.i.us.6.2: ; preds = %if.end.i.i.us.6.1 - %add.i.i.us.6.2 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6.2 - %idxprom.i.i.us.6.2 = sext i32 %add.i.i.us.6.2 to i64 - %arrayidx.i.i.us.6.2 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.6.2 - %509 = load float, float* %arrayidx.i.i.us.6.2, align 4, !tbaa !12 - %mul6.i.i.us.6.2 = fmul float %20, %509 - store float %mul6.i.i.us.6.2, float* %arrayidx.i.i.us.6.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6.2 - -if.end.i.i.us.6.2: ; preds = %if.then.i.i.us.6.2, %if.end.i.i.us.6.1 - %510 = or i64 %_local_id_x.i.0.us.6, 3 - %add1.i.i.i.us.6.3 = add nuw nsw i64 %510, %mul.i.i.i - %conv.i.i.us.6.3 = trunc i64 %add1.i.i.i.us.6.3 to i32 - %cmp4.i.i.us.6.3 = icmp sgt i32 %28, %conv.i.i.us.6.3 - br i1 %cmp4.i.i.us.6.3, label %if.then.i.i.us.6.3, label %if.end.i.i.us.6.3 - -if.then.i.i.us.6.3: ; preds = %if.end.i.i.us.6.2 - %add.i.i.us.6.3 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6.3 - %idxprom.i.i.us.6.3 = sext i32 %add.i.i.us.6.3 to i64 - %arrayidx.i.i.us.6.3 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.6.3 - %511 = load float, float* %arrayidx.i.i.us.6.3, align 4, !tbaa !12 - %mul6.i.i.us.6.3 = fmul float %20, %511 - store float %mul6.i.i.us.6.3, float* %arrayidx.i.i.us.6.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6.3 - -if.end.i.i.us.6.3: ; preds = %if.then.i.i.us.6.3, %if.end.i.i.us.6.2 - %512 = add nuw nsw i64 %_local_id_x.i.0.us.6, 4 - %exitcond34.6.not.3 = icmp eq i64 %512, 32 - br i1 %exitcond34.6.not.3, label %pregion_for_end.i.i.6.loopexit, label %pregion_for_entry.entry.i.i.us.6, !llvm.loop !33 - -if.then.i.i.us.5.1: ; preds = %if.end.i.i.us.5 - %add.i.i.us.5.1 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5.1 - %idxprom.i.i.us.5.1 = sext i32 %add.i.i.us.5.1 to i64 - %arrayidx.i.i.us.5.1 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.5.1 - %513 = load float, float* %arrayidx.i.i.us.5.1, align 4, !tbaa !12 - %mul6.i.i.us.5.1 = fmul float %20, %513 - store float %mul6.i.i.us.5.1, float* %arrayidx.i.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5.1 - -if.end.i.i.us.5.1: ; preds = %if.then.i.i.us.5.1, %if.end.i.i.us.5 - %514 = or i64 %_local_id_x.i.0.us.5, 2 - %add1.i.i.i.us.5.2 = add nuw nsw i64 %514, %mul.i.i.i - %conv.i.i.us.5.2 = trunc i64 %add1.i.i.i.us.5.2 to i32 - %cmp4.i.i.us.5.2 = icmp sgt i32 %28, %conv.i.i.us.5.2 - br i1 %cmp4.i.i.us.5.2, label %if.then.i.i.us.5.2, label %if.end.i.i.us.5.2 - -if.then.i.i.us.5.2: ; preds = %if.end.i.i.us.5.1 - %add.i.i.us.5.2 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5.2 - %idxprom.i.i.us.5.2 = sext i32 %add.i.i.us.5.2 to i64 - %arrayidx.i.i.us.5.2 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.5.2 - %515 = load float, float* %arrayidx.i.i.us.5.2, align 4, !tbaa !12 - %mul6.i.i.us.5.2 = fmul float %20, %515 - store float %mul6.i.i.us.5.2, float* %arrayidx.i.i.us.5.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5.2 - -if.end.i.i.us.5.2: ; preds = %if.then.i.i.us.5.2, %if.end.i.i.us.5.1 - %516 = or i64 %_local_id_x.i.0.us.5, 3 - %add1.i.i.i.us.5.3 = add nuw nsw i64 %516, %mul.i.i.i - %conv.i.i.us.5.3 = trunc i64 %add1.i.i.i.us.5.3 to i32 - %cmp4.i.i.us.5.3 = icmp sgt i32 %28, %conv.i.i.us.5.3 - br i1 %cmp4.i.i.us.5.3, label %if.then.i.i.us.5.3, label %if.end.i.i.us.5.3 - -if.then.i.i.us.5.3: ; preds = %if.end.i.i.us.5.2 - %add.i.i.us.5.3 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5.3 - %idxprom.i.i.us.5.3 = sext i32 %add.i.i.us.5.3 to i64 - %arrayidx.i.i.us.5.3 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.5.3 - %517 = load float, float* %arrayidx.i.i.us.5.3, align 4, !tbaa !12 - %mul6.i.i.us.5.3 = fmul float %20, %517 - store float %mul6.i.i.us.5.3, float* %arrayidx.i.i.us.5.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5.3 - -if.end.i.i.us.5.3: ; preds = %if.then.i.i.us.5.3, %if.end.i.i.us.5.2 - %518 = add nuw nsw i64 %_local_id_x.i.0.us.5, 4 - %exitcond34.5.not.3 = icmp eq i64 %518, 32 - br i1 %exitcond34.5.not.3, label %pregion_for_end.i.i.5.loopexit, label %pregion_for_entry.entry.i.i.us.5, !llvm.loop !34 - -if.then.i.i.us.4.1: ; preds = %if.end.i.i.us.4 - %add.i.i.us.4.1 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4.1 - %idxprom.i.i.us.4.1 = sext i32 %add.i.i.us.4.1 to i64 - %arrayidx.i.i.us.4.1 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.4.1 - %519 = load float, float* %arrayidx.i.i.us.4.1, align 4, !tbaa !12 - %mul6.i.i.us.4.1 = fmul float %20, %519 - store float %mul6.i.i.us.4.1, float* %arrayidx.i.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4.1 - -if.end.i.i.us.4.1: ; preds = %if.then.i.i.us.4.1, %if.end.i.i.us.4 - %520 = or i64 %_local_id_x.i.0.us.4, 2 - %add1.i.i.i.us.4.2 = add nuw nsw i64 %520, %mul.i.i.i - %conv.i.i.us.4.2 = trunc i64 %add1.i.i.i.us.4.2 to i32 - %cmp4.i.i.us.4.2 = icmp sgt i32 %28, %conv.i.i.us.4.2 - br i1 %cmp4.i.i.us.4.2, label %if.then.i.i.us.4.2, label %if.end.i.i.us.4.2 - -if.then.i.i.us.4.2: ; preds = %if.end.i.i.us.4.1 - %add.i.i.us.4.2 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4.2 - %idxprom.i.i.us.4.2 = sext i32 %add.i.i.us.4.2 to i64 - %arrayidx.i.i.us.4.2 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.4.2 - %521 = load float, float* %arrayidx.i.i.us.4.2, align 4, !tbaa !12 - %mul6.i.i.us.4.2 = fmul float %20, %521 - store float %mul6.i.i.us.4.2, float* %arrayidx.i.i.us.4.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4.2 - -if.end.i.i.us.4.2: ; preds = %if.then.i.i.us.4.2, %if.end.i.i.us.4.1 - %522 = or i64 %_local_id_x.i.0.us.4, 3 - %add1.i.i.i.us.4.3 = add nuw nsw i64 %522, %mul.i.i.i - %conv.i.i.us.4.3 = trunc i64 %add1.i.i.i.us.4.3 to i32 - %cmp4.i.i.us.4.3 = icmp sgt i32 %28, %conv.i.i.us.4.3 - br i1 %cmp4.i.i.us.4.3, label %if.then.i.i.us.4.3, label %if.end.i.i.us.4.3 - -if.then.i.i.us.4.3: ; preds = %if.end.i.i.us.4.2 - %add.i.i.us.4.3 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4.3 - %idxprom.i.i.us.4.3 = sext i32 %add.i.i.us.4.3 to i64 - %arrayidx.i.i.us.4.3 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.4.3 - %523 = load float, float* %arrayidx.i.i.us.4.3, align 4, !tbaa !12 - %mul6.i.i.us.4.3 = fmul float %20, %523 - store float %mul6.i.i.us.4.3, float* %arrayidx.i.i.us.4.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4.3 - -if.end.i.i.us.4.3: ; preds = %if.then.i.i.us.4.3, %if.end.i.i.us.4.2 - %524 = add nuw nsw i64 %_local_id_x.i.0.us.4, 4 - %exitcond34.4.not.3 = icmp eq i64 %524, 32 - br i1 %exitcond34.4.not.3, label %pregion_for_end.i.i.4.loopexit, label %pregion_for_entry.entry.i.i.us.4, !llvm.loop !35 - -if.then.i.i.us.3.1: ; preds = %if.end.i.i.us.3 - %add.i.i.us.3.1 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3.1 - %idxprom.i.i.us.3.1 = sext i32 %add.i.i.us.3.1 to i64 - %arrayidx.i.i.us.3.1 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.3.1 - %525 = load float, float* %arrayidx.i.i.us.3.1, align 4, !tbaa !12 - %mul6.i.i.us.3.1 = fmul float %20, %525 - store float %mul6.i.i.us.3.1, float* %arrayidx.i.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3.1 - -if.end.i.i.us.3.1: ; preds = %if.then.i.i.us.3.1, %if.end.i.i.us.3 - %526 = or i64 %_local_id_x.i.0.us.3, 2 - %add1.i.i.i.us.3.2 = add nuw nsw i64 %526, %mul.i.i.i - %conv.i.i.us.3.2 = trunc i64 %add1.i.i.i.us.3.2 to i32 - %cmp4.i.i.us.3.2 = icmp sgt i32 %28, %conv.i.i.us.3.2 - br i1 %cmp4.i.i.us.3.2, label %if.then.i.i.us.3.2, label %if.end.i.i.us.3.2 - -if.then.i.i.us.3.2: ; preds = %if.end.i.i.us.3.1 - %add.i.i.us.3.2 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3.2 - %idxprom.i.i.us.3.2 = sext i32 %add.i.i.us.3.2 to i64 - %arrayidx.i.i.us.3.2 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.3.2 - %527 = load float, float* %arrayidx.i.i.us.3.2, align 4, !tbaa !12 - %mul6.i.i.us.3.2 = fmul float %20, %527 - store float %mul6.i.i.us.3.2, float* %arrayidx.i.i.us.3.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3.2 - -if.end.i.i.us.3.2: ; preds = %if.then.i.i.us.3.2, %if.end.i.i.us.3.1 - %528 = or i64 %_local_id_x.i.0.us.3, 3 - %add1.i.i.i.us.3.3 = add nuw nsw i64 %528, %mul.i.i.i - %conv.i.i.us.3.3 = trunc i64 %add1.i.i.i.us.3.3 to i32 - %cmp4.i.i.us.3.3 = icmp sgt i32 %28, %conv.i.i.us.3.3 - br i1 %cmp4.i.i.us.3.3, label %if.then.i.i.us.3.3, label %if.end.i.i.us.3.3 - -if.then.i.i.us.3.3: ; preds = %if.end.i.i.us.3.2 - %add.i.i.us.3.3 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3.3 - %idxprom.i.i.us.3.3 = sext i32 %add.i.i.us.3.3 to i64 - %arrayidx.i.i.us.3.3 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.3.3 - %529 = load float, float* %arrayidx.i.i.us.3.3, align 4, !tbaa !12 - %mul6.i.i.us.3.3 = fmul float %20, %529 - store float %mul6.i.i.us.3.3, float* %arrayidx.i.i.us.3.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3.3 - -if.end.i.i.us.3.3: ; preds = %if.then.i.i.us.3.3, %if.end.i.i.us.3.2 - %530 = add nuw nsw i64 %_local_id_x.i.0.us.3, 4 - %exitcond34.3.not.3 = icmp eq i64 %530, 32 - br i1 %exitcond34.3.not.3, label %pregion_for_end.i.i.3.loopexit, label %pregion_for_entry.entry.i.i.us.3, !llvm.loop !36 - -if.then.i.i.us.2.1: ; preds = %if.end.i.i.us.2 - %add.i.i.us.2.1 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2.1 - %idxprom.i.i.us.2.1 = sext i32 %add.i.i.us.2.1 to i64 - %arrayidx.i.i.us.2.1 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.2.1 - %531 = load float, float* %arrayidx.i.i.us.2.1, align 4, !tbaa !12 - %mul6.i.i.us.2.1 = fmul float %20, %531 - store float %mul6.i.i.us.2.1, float* %arrayidx.i.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2.1 - -if.end.i.i.us.2.1: ; preds = %if.then.i.i.us.2.1, %if.end.i.i.us.2 - %532 = or i64 %_local_id_x.i.0.us.2, 2 - %add1.i.i.i.us.2.2 = add nuw nsw i64 %532, %mul.i.i.i - %conv.i.i.us.2.2 = trunc i64 %add1.i.i.i.us.2.2 to i32 - %cmp4.i.i.us.2.2 = icmp sgt i32 %28, %conv.i.i.us.2.2 - br i1 %cmp4.i.i.us.2.2, label %if.then.i.i.us.2.2, label %if.end.i.i.us.2.2 - -if.then.i.i.us.2.2: ; preds = %if.end.i.i.us.2.1 - %add.i.i.us.2.2 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2.2 - %idxprom.i.i.us.2.2 = sext i32 %add.i.i.us.2.2 to i64 - %arrayidx.i.i.us.2.2 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.2.2 - %533 = load float, float* %arrayidx.i.i.us.2.2, align 4, !tbaa !12 - %mul6.i.i.us.2.2 = fmul float %20, %533 - store float %mul6.i.i.us.2.2, float* %arrayidx.i.i.us.2.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2.2 - -if.end.i.i.us.2.2: ; preds = %if.then.i.i.us.2.2, %if.end.i.i.us.2.1 - %534 = or i64 %_local_id_x.i.0.us.2, 3 - %add1.i.i.i.us.2.3 = add nuw nsw i64 %534, %mul.i.i.i - %conv.i.i.us.2.3 = trunc i64 %add1.i.i.i.us.2.3 to i32 - %cmp4.i.i.us.2.3 = icmp sgt i32 %28, %conv.i.i.us.2.3 - br i1 %cmp4.i.i.us.2.3, label %if.then.i.i.us.2.3, label %if.end.i.i.us.2.3 - -if.then.i.i.us.2.3: ; preds = %if.end.i.i.us.2.2 - %add.i.i.us.2.3 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2.3 - %idxprom.i.i.us.2.3 = sext i32 %add.i.i.us.2.3 to i64 - %arrayidx.i.i.us.2.3 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.2.3 - %535 = load float, float* %arrayidx.i.i.us.2.3, align 4, !tbaa !12 - %mul6.i.i.us.2.3 = fmul float %20, %535 - store float %mul6.i.i.us.2.3, float* %arrayidx.i.i.us.2.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2.3 - -if.end.i.i.us.2.3: ; preds = %if.then.i.i.us.2.3, %if.end.i.i.us.2.2 - %536 = add nuw nsw i64 %_local_id_x.i.0.us.2, 4 - %exitcond34.2.not.3 = icmp eq i64 %536, 32 - br i1 %exitcond34.2.not.3, label %pregion_for_end.i.i.2.loopexit, label %pregion_for_entry.entry.i.i.us.2, !llvm.loop !37 - -if.then.i.i.us.1.1: ; preds = %if.end.i.i.us.1 - %add.i.i.us.1.1 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1.1 - %idxprom.i.i.us.1.1 = sext i32 %add.i.i.us.1.1 to i64 - %arrayidx.i.i.us.1.1 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.1.1 - %537 = load float, float* %arrayidx.i.i.us.1.1, align 4, !tbaa !12 - %mul6.i.i.us.1.1 = fmul float %20, %537 - store float %mul6.i.i.us.1.1, float* %arrayidx.i.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1.1 - -if.end.i.i.us.1.1: ; preds = %if.then.i.i.us.1.1, %if.end.i.i.us.1 - %538 = or i64 %_local_id_x.i.0.us.1, 2 - %add1.i.i.i.us.1.2 = add nuw nsw i64 %538, %mul.i.i.i - %conv.i.i.us.1.2 = trunc i64 %add1.i.i.i.us.1.2 to i32 - %cmp4.i.i.us.1.2 = icmp sgt i32 %28, %conv.i.i.us.1.2 - br i1 %cmp4.i.i.us.1.2, label %if.then.i.i.us.1.2, label %if.end.i.i.us.1.2 - -if.then.i.i.us.1.2: ; preds = %if.end.i.i.us.1.1 - %add.i.i.us.1.2 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1.2 - %idxprom.i.i.us.1.2 = sext i32 %add.i.i.us.1.2 to i64 - %arrayidx.i.i.us.1.2 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.1.2 - %539 = load float, float* %arrayidx.i.i.us.1.2, align 4, !tbaa !12 - %mul6.i.i.us.1.2 = fmul float %20, %539 - store float %mul6.i.i.us.1.2, float* %arrayidx.i.i.us.1.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1.2 - -if.end.i.i.us.1.2: ; preds = %if.then.i.i.us.1.2, %if.end.i.i.us.1.1 - %540 = or i64 %_local_id_x.i.0.us.1, 3 - %add1.i.i.i.us.1.3 = add nuw nsw i64 %540, %mul.i.i.i - %conv.i.i.us.1.3 = trunc i64 %add1.i.i.i.us.1.3 to i32 - %cmp4.i.i.us.1.3 = icmp sgt i32 %28, %conv.i.i.us.1.3 - br i1 %cmp4.i.i.us.1.3, label %if.then.i.i.us.1.3, label %if.end.i.i.us.1.3 - -if.then.i.i.us.1.3: ; preds = %if.end.i.i.us.1.2 - %add.i.i.us.1.3 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1.3 - %idxprom.i.i.us.1.3 = sext i32 %add.i.i.us.1.3 to i64 - %arrayidx.i.i.us.1.3 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.1.3 - %541 = load float, float* %arrayidx.i.i.us.1.3, align 4, !tbaa !12 - %mul6.i.i.us.1.3 = fmul float %20, %541 - store float %mul6.i.i.us.1.3, float* %arrayidx.i.i.us.1.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1.3 - -if.end.i.i.us.1.3: ; preds = %if.then.i.i.us.1.3, %if.end.i.i.us.1.2 - %542 = add nuw nsw i64 %_local_id_x.i.0.us.1, 4 - %exitcond34.1.not.3 = icmp eq i64 %542, 32 - br i1 %exitcond34.1.not.3, label %pregion_for_end.i.i.1.loopexit, label %pregion_for_entry.entry.i.i.us.1, !llvm.loop !38 - -if.then.i.i.us.1214: ; preds = %if.end.i.i.us - %add.i.i.us.1210 = add nsw i32 %mul.i.i.us, %conv.i.i.us.1207 - %idxprom.i.i.us.1211 = sext i32 %add.i.i.us.1210 to i64 - %arrayidx.i.i.us.1212 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.1211 - %543 = load float, float* %arrayidx.i.i.us.1212, align 4, !tbaa !12 - %mul6.i.i.us.1213 = fmul float %20, %543 - store float %mul6.i.i.us.1213, float* %arrayidx.i.i.us.1212, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1215 - -if.end.i.i.us.1215: ; preds = %if.then.i.i.us.1214, %if.end.i.i.us - %544 = or i64 %_local_id_x.i.0.us, 2 - %add1.i.i.i.us.2217 = add nuw nsw i64 %544, %mul.i.i.i - %conv.i.i.us.2218 = trunc i64 %add1.i.i.i.us.2217 to i32 - %cmp4.i.i.us.2219 = icmp sgt i32 %28, %conv.i.i.us.2218 - br i1 %cmp4.i.i.us.2219, label %if.then.i.i.us.2225, label %if.end.i.i.us.2226 - -if.then.i.i.us.2225: ; preds = %if.end.i.i.us.1215 - %add.i.i.us.2221 = add nsw i32 %mul.i.i.us, %conv.i.i.us.2218 - %idxprom.i.i.us.2222 = sext i32 %add.i.i.us.2221 to i64 - %arrayidx.i.i.us.2223 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.2222 - %545 = load float, float* %arrayidx.i.i.us.2223, align 4, !tbaa !12 - %mul6.i.i.us.2224 = fmul float %20, %545 - store float %mul6.i.i.us.2224, float* %arrayidx.i.i.us.2223, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2226 - -if.end.i.i.us.2226: ; preds = %if.then.i.i.us.2225, %if.end.i.i.us.1215 - %546 = or i64 %_local_id_x.i.0.us, 3 - %add1.i.i.i.us.3228 = add nuw nsw i64 %546, %mul.i.i.i - %conv.i.i.us.3229 = trunc i64 %add1.i.i.i.us.3228 to i32 - %cmp4.i.i.us.3230 = icmp sgt i32 %28, %conv.i.i.us.3229 - br i1 %cmp4.i.i.us.3230, label %if.then.i.i.us.3236, label %if.end.i.i.us.3237 - -if.then.i.i.us.3236: ; preds = %if.end.i.i.us.2226 - %add.i.i.us.3232 = add nsw i32 %mul.i.i.us, %conv.i.i.us.3229 - %idxprom.i.i.us.3233 = sext i32 %add.i.i.us.3232 to i64 - %arrayidx.i.i.us.3234 = getelementptr inbounds float, float* %12, i64 %idxprom.i.i.us.3233 - %547 = load float, float* %arrayidx.i.i.us.3234, align 4, !tbaa !12 - %mul6.i.i.us.3235 = fmul float %20, %547 - store float %mul6.i.i.us.3235, float* %arrayidx.i.i.us.3234, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3237 - -if.end.i.i.us.3237: ; preds = %if.then.i.i.us.3236, %if.end.i.i.us.2226 - %548 = add nuw nsw i64 %_local_id_x.i.0.us, 4 - %exitcond34.not.3 = icmp eq i64 %548, 32 - br i1 %exitcond34.not.3, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !39 -} - -; Function Attrs: nounwind -define void @_pocl_kernel_syrk_kernel_workgroup_fast(i8** nocapture readonly %0, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone %1, i64 %2, i64 %3, i64 %4) local_unnamed_addr #2 { - %6 = bitcast i8** %0 to float** - %7 = load float*, float** %6, align 8 - %8 = getelementptr i8*, i8** %0, i64 1 - %9 = bitcast i8** %8 to float** - %10 = load float*, float** %9, align 8 - %11 = getelementptr i8*, i8** %0, i64 2 - %12 = bitcast i8** %11 to float** - %13 = load float*, float** %12, align 8 - %14 = load float, float* %13, align 4 - %15 = getelementptr i8*, i8** %0, i64 3 - %16 = bitcast i8** %15 to float** - %17 = load float*, float** %16, align 8 - %18 = load float, float* %17, align 4 - %19 = getelementptr i8*, i8** %0, i64 4 - %20 = bitcast i8** %19 to i32** - %21 = load i32*, i32** %20, align 8 - %22 = load i32, i32* %21, align 4 - %23 = getelementptr i8*, i8** %0, i64 5 - %24 = bitcast i8** %23 to i32** - %25 = load i32*, i32** %24, align 8 - %26 = load i32, i32* %25, align 4 - %mul.i.i.i = shl i64 %2, 5 - %mul3.i.i.i = shl i64 %3, 3 - %cmp742.i.i = icmp sgt i32 %22, 0 - %wide.trip.count.i.i = zext i32 %22 to i64 - %conv2.i.i.us = trunc i64 %mul3.i.i.i to i32 - %cmp.i.i.us = icmp sgt i32 %26, %conv2.i.i.us - %mul.i.i.us = mul nsw i32 %26, %conv2.i.i.us - br i1 %cmp742.i.i, label %pregion_for_entry.pregion_for_init.i.i.us, label %pregion_for_entry.pregion_for_init.i.i.preheader - -pregion_for_entry.pregion_for_init.i.i.preheader: ; preds = %5 - br i1 %cmp.i.i.us, label %vector.scevcheck, label %pregion_for_end.i.i - -vector.scevcheck: ; preds = %pregion_for_entry.pregion_for_init.i.i.preheader - %27 = trunc i64 %3 to i32 - %28 = mul i32 %26, %27 - %29 = shl i32 %28, 3 - %30 = trunc i64 %2 to i32 - %31 = shl i32 %30, 5 - %32 = add i32 %29, %31 - %33 = icmp sgt i32 %32, 2147483616 - br i1 %33, label %pregion_for_entry.entry.i.i.us.preheader, label %vector.ph - -pregion_for_entry.entry.i.i.us.preheader: ; preds = %vector.scevcheck - br label %pregion_for_entry.entry.i.i.us - -vector.ph: ; preds = %vector.scevcheck - %broadcast.splatinsert = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat = shufflevector <8 x i64> %broadcast.splatinsert, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert38 = insertelement <8 x i32> undef, i32 %26, i32 0 - %broadcast.splat39 = shufflevector <8 x i32> %broadcast.splatinsert38, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert40 = insertelement <8 x float> undef, float %18, i32 0 - %broadcast.splat41 = shufflevector <8 x float> %broadcast.splatinsert40, <8 x float> undef, <8 x i32> zeroinitializer - %34 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %35 = or <8 x i32> %34, - %36 = icmp sgt <8 x i32> %broadcast.splat39, %35 - %37 = extractelement <8 x i32> %35, i32 0 - %38 = add nsw i32 %mul.i.i.us, %37 - %39 = sext i32 %38 to i64 - %40 = getelementptr inbounds float, float* %10, i64 %39 - %41 = bitcast float* %40 to <8 x float>* - %wide.masked.load = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %41, i32 4, <8 x i1> %36, <8 x float> undef), !tbaa !12 - %42 = fmul <8 x float> %broadcast.splat41, %wide.masked.load - %43 = bitcast float* %40 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %42, <8 x float>* %43, i32 4, <8 x i1> %36), !tbaa !12, !llvm.access.group !16 - %44 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %45 = or <8 x i32> %44, - %46 = icmp sgt <8 x i32> %broadcast.splat39, %45 - %47 = extractelement <8 x i32> %45, i32 0 - %48 = add nsw i32 %mul.i.i.us, %47 - %49 = sext i32 %48 to i64 - %50 = getelementptr inbounds float, float* %10, i64 %49 - %51 = bitcast float* %50 to <8 x float>* - %wide.masked.load.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %51, i32 4, <8 x i1> %46, <8 x float> undef), !tbaa !12 - %52 = fmul <8 x float> %broadcast.splat41, %wide.masked.load.1 - %53 = bitcast float* %50 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %52, <8 x float>* %53, i32 4, <8 x i1> %46), !tbaa !12, !llvm.access.group !16 - %54 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %55 = or <8 x i32> %54, - %56 = icmp sgt <8 x i32> %broadcast.splat39, %55 - %57 = extractelement <8 x i32> %55, i32 0 - %58 = add nsw i32 %mul.i.i.us, %57 - %59 = sext i32 %58 to i64 - %60 = getelementptr inbounds float, float* %10, i64 %59 - %61 = bitcast float* %60 to <8 x float>* - %wide.masked.load.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %61, i32 4, <8 x i1> %56, <8 x float> undef), !tbaa !12 - %62 = fmul <8 x float> %broadcast.splat41, %wide.masked.load.2 - %63 = bitcast float* %60 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %62, <8 x float>* %63, i32 4, <8 x i1> %56), !tbaa !12, !llvm.access.group !16 - %64 = trunc <8 x i64> %broadcast.splat to <8 x i32> - %65 = or <8 x i32> %64, - %66 = icmp sgt <8 x i32> %broadcast.splat39, %65 - %67 = extractelement <8 x i32> %65, i32 0 - %68 = add nsw i32 %mul.i.i.us, %67 - %69 = sext i32 %68 to i64 - %70 = getelementptr inbounds float, float* %10, i64 %69 - %71 = bitcast float* %70 to <8 x float>* - %wide.masked.load.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %71, i32 4, <8 x i1> %66, <8 x float> undef), !tbaa !12 - %72 = fmul <8 x float> %broadcast.splat41, %wide.masked.load.3 - %73 = bitcast float* %70 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %72, <8 x float>* %73, i32 4, <8 x i1> %66), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i - -pregion_for_entry.pregion_for_init.i.i.us: ; preds = %5 - %mul9.i.i.us = mul nsw i32 %22, %conv2.i.i.us - %74 = sext i32 %mul9.i.i.us to i64 - br i1 %cmp.i.i.us, label %pregion_for_entry.entry.i.i.us.us.preheader, label %pregion_for_end.i.i.us - -pregion_for_entry.entry.i.i.us.us.preheader: ; preds = %pregion_for_entry.pregion_for_init.i.i.us - br label %pregion_for_entry.entry.i.i.us.us - -pregion_for_end.i.i.us.loopexit: ; preds = %if.end.i.i.us.us - br label %pregion_for_end.i.i.us - -pregion_for_end.i.i.us: ; preds = %pregion_for_end.i.i.us.loopexit, %pregion_for_entry.pregion_for_init.i.i.us - %75 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.1 = or i32 %75, 1 - %cmp.i.i.us.1 = icmp sgt i32 %26, %conv2.i.i.us.1 - %mul.i.i.us.1 = mul nsw i32 %26, %conv2.i.i.us.1 - %mul9.i.i.us.1 = mul nsw i32 %22, %conv2.i.i.us.1 - %76 = sext i32 %mul9.i.i.us.1 to i64 - br i1 %cmp.i.i.us.1, label %pregion_for_entry.entry.i.i.us.us.1.preheader, label %pregion_for_end.i.i.us.1 - -pregion_for_entry.entry.i.i.us.us.1.preheader: ; preds = %pregion_for_end.i.i.us - br label %pregion_for_entry.entry.i.i.us.us.1 - -pregion_for_entry.entry.i.i.us.us: ; preds = %if.end.i.i.us.us, %pregion_for_entry.entry.i.i.us.us.preheader - %_local_id_x.i.0.us.us = phi i64 [ %79, %if.end.i.i.us.us ], [ 0, %pregion_for_entry.entry.i.i.us.us.preheader ] - %add1.i.i.i.us.us = add nuw nsw i64 %_local_id_x.i.0.us.us, %mul.i.i.i - %conv.i.i.us.us = trunc i64 %add1.i.i.i.us.us to i32 - %cmp4.i.i.us.us = icmp sgt i32 %26, %conv.i.i.us.us - br i1 %cmp4.i.i.us.us, label %if.then.i.i.us.us, label %if.end.i.i.us.us - -if.then.i.i.us.us: ; preds = %pregion_for_entry.entry.i.i.us.us - %add.i.i.us.us = add nsw i32 %mul.i.i.us, %conv.i.i.us.us - %idxprom.i.i.us.us = sext i32 %add.i.i.us.us to i64 - %arrayidx.i.i.us.us = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.us - %77 = load float, float* %arrayidx.i.i.us.us, align 4, !tbaa !12 - %mul6.i.i.us.us = fmul float %18, %77 - store float %mul6.i.i.us.us, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %mul14.i.i.us.us = mul nsw i32 %22, %conv.i.i.us.us - %78 = sext i32 %mul14.i.i.us.us to i64 - br label %for.body.i.i.us.us - -if.end.i.i.us.us.loopexit: ; preds = %for.body.i.i.us.us - br label %if.end.i.i.us.us - -if.end.i.i.us.us: ; preds = %if.end.i.i.us.us.loopexit, %pregion_for_entry.entry.i.i.us.us - %79 = add nuw nsw i64 %_local_id_x.i.0.us.us, 1 - %exitcond.not = icmp eq i64 %79, 32 - br i1 %exitcond.not, label %pregion_for_end.i.i.us.loopexit, label %pregion_for_entry.entry.i.i.us.us, !llvm.loop !19 - -for.body.i.i.us.us: ; preds = %for.body.i.i.us.us, %if.then.i.i.us.us - %indvars.iv.next.i.i3.us.us = phi i64 [ %indvars.iv.next.i.i.us.us, %for.body.i.i.us.us ], [ 0, %if.then.i.i.us.us ] - %80 = phi float [ %85, %for.body.i.i.us.us ], [ %mul6.i.i.us.us, %if.then.i.i.us.us ] - %81 = add nsw i64 %indvars.iv.next.i.i3.us.us, %74 - %arrayidx12.i.i.us.us = getelementptr inbounds float, float* %7, i64 %81 - %82 = load float, float* %arrayidx12.i.i.us.us, align 4, !tbaa !12 - %mul13.i.i.us.us = fmul float %14, %82 - %83 = add nsw i64 %indvars.iv.next.i.i3.us.us, %78 - %arrayidx17.i.i.us.us = getelementptr inbounds float, float* %7, i64 %83 - %84 = load float, float* %arrayidx17.i.i.us.us, align 4, !tbaa !12 - %85 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us, float %84, float %80) #2 - store float %85, float* %arrayidx.i.i.us.us, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us = add nuw nsw i64 %indvars.iv.next.i.i3.us.us, 1 - %exitcond.not.i.i.us.us = icmp eq i64 %indvars.iv.next.i.i.us.us, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us, label %if.end.i.i.us.us.loopexit, label %for.body.i.i.us.us, !llvm.loop !21 - -pregion_for_entry.entry.i.i.us: ; preds = %if.end.i.i.us.3237, %pregion_for_entry.entry.i.i.us.preheader - %_local_id_x.i.0.us = phi i64 [ %546, %if.end.i.i.us.3237 ], [ 0, %pregion_for_entry.entry.i.i.us.preheader ] - %add1.i.i.i.us = add nuw nsw i64 %_local_id_x.i.0.us, %mul.i.i.i - %conv.i.i.us = trunc i64 %add1.i.i.i.us to i32 - %cmp4.i.i.us = icmp sgt i32 %26, %conv.i.i.us - br i1 %cmp4.i.i.us, label %if.then.i.i.us, label %if.end.i.i.us - -if.then.i.i.us: ; preds = %pregion_for_entry.entry.i.i.us - %add.i.i.us = add nsw i32 %mul.i.i.us, %conv.i.i.us - %idxprom.i.i.us = sext i32 %add.i.i.us to i64 - %arrayidx.i.i.us = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us - %86 = load float, float* %arrayidx.i.i.us, align 4, !tbaa !12 - %mul6.i.i.us = fmul float %18, %86 - store float %mul6.i.i.us, float* %arrayidx.i.i.us, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us - -if.end.i.i.us: ; preds = %if.then.i.i.us, %pregion_for_entry.entry.i.i.us - %87 = or i64 %_local_id_x.i.0.us, 1 - %add1.i.i.i.us.1206 = add nuw nsw i64 %87, %mul.i.i.i - %conv.i.i.us.1207 = trunc i64 %add1.i.i.i.us.1206 to i32 - %cmp4.i.i.us.1208 = icmp sgt i32 %26, %conv.i.i.us.1207 - br i1 %cmp4.i.i.us.1208, label %if.then.i.i.us.1214, label %if.end.i.i.us.1215 - -pregion_for_end.i.i.loopexit: ; preds = %if.end.i.i.us.3237 - br label %pregion_for_end.i.i - -pregion_for_end.i.i: ; preds = %pregion_for_end.i.i.loopexit, %vector.ph, %pregion_for_entry.pregion_for_init.i.i.preheader - %88 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.1 = or i32 %88, 1 - %cmp.i.i.1 = icmp sgt i32 %26, %conv2.i.i.1 - %mul.i.i.1 = mul nsw i32 %26, %conv2.i.i.1 - br i1 %cmp.i.i.1, label %vector.scevcheck49, label %pregion_for_end.i.i.1 - -vector.scevcheck49: ; preds = %pregion_for_end.i.i - %89 = mul i32 %26, %conv2.i.i.1 - %90 = trunc i64 %2 to i32 - %91 = shl i32 %90, 5 - %92 = add i32 %89, %91 - %93 = icmp sgt i32 %92, 2147483616 - br i1 %93, label %pregion_for_entry.entry.i.i.us.1.preheader, label %vector.ph50 - -pregion_for_entry.entry.i.i.us.1.preheader: ; preds = %vector.scevcheck49 - br label %pregion_for_entry.entry.i.i.us.1 - -vector.ph50: ; preds = %vector.scevcheck49 - %broadcast.splatinsert57 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat58 = shufflevector <8 x i64> %broadcast.splatinsert57, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert59 = insertelement <8 x i32> undef, i32 %26, i32 0 - %broadcast.splat60 = shufflevector <8 x i32> %broadcast.splatinsert59, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert62 = insertelement <8 x float> undef, float %18, i32 0 - %broadcast.splat63 = shufflevector <8 x float> %broadcast.splatinsert62, <8 x float> undef, <8 x i32> zeroinitializer - %94 = trunc <8 x i64> %broadcast.splat58 to <8 x i32> - %95 = or <8 x i32> %94, - %96 = icmp sgt <8 x i32> %broadcast.splat60, %95 - %97 = extractelement <8 x i32> %95, i32 0 - %98 = add nsw i32 %mul.i.i.1, %97 - %99 = sext i32 %98 to i64 - %100 = getelementptr inbounds float, float* %10, i64 %99 - %101 = bitcast float* %100 to <8 x float>* - %wide.masked.load61 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %101, i32 4, <8 x i1> %96, <8 x float> undef), !tbaa !12 - %102 = fmul <8 x float> %broadcast.splat63, %wide.masked.load61 - %103 = bitcast float* %100 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %102, <8 x float>* %103, i32 4, <8 x i1> %96), !tbaa !12, !llvm.access.group !16 - %104 = trunc <8 x i64> %broadcast.splat58 to <8 x i32> - %105 = or <8 x i32> %104, - %106 = icmp sgt <8 x i32> %broadcast.splat60, %105 - %107 = extractelement <8 x i32> %105, i32 0 - %108 = add nsw i32 %mul.i.i.1, %107 - %109 = sext i32 %108 to i64 - %110 = getelementptr inbounds float, float* %10, i64 %109 - %111 = bitcast float* %110 to <8 x float>* - %wide.masked.load61.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %111, i32 4, <8 x i1> %106, <8 x float> undef), !tbaa !12 - %112 = fmul <8 x float> %broadcast.splat63, %wide.masked.load61.1 - %113 = bitcast float* %110 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %112, <8 x float>* %113, i32 4, <8 x i1> %106), !tbaa !12, !llvm.access.group !16 - %114 = trunc <8 x i64> %broadcast.splat58 to <8 x i32> - %115 = or <8 x i32> %114, - %116 = icmp sgt <8 x i32> %broadcast.splat60, %115 - %117 = extractelement <8 x i32> %115, i32 0 - %118 = add nsw i32 %mul.i.i.1, %117 - %119 = sext i32 %118 to i64 - %120 = getelementptr inbounds float, float* %10, i64 %119 - %121 = bitcast float* %120 to <8 x float>* - %wide.masked.load61.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %121, i32 4, <8 x i1> %116, <8 x float> undef), !tbaa !12 - %122 = fmul <8 x float> %broadcast.splat63, %wide.masked.load61.2 - %123 = bitcast float* %120 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %122, <8 x float>* %123, i32 4, <8 x i1> %116), !tbaa !12, !llvm.access.group !16 - %124 = trunc <8 x i64> %broadcast.splat58 to <8 x i32> - %125 = or <8 x i32> %124, - %126 = icmp sgt <8 x i32> %broadcast.splat60, %125 - %127 = extractelement <8 x i32> %125, i32 0 - %128 = add nsw i32 %mul.i.i.1, %127 - %129 = sext i32 %128 to i64 - %130 = getelementptr inbounds float, float* %10, i64 %129 - %131 = bitcast float* %130 to <8 x float>* - %wide.masked.load61.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %131, i32 4, <8 x i1> %126, <8 x float> undef), !tbaa !12 - %132 = fmul <8 x float> %broadcast.splat63, %wide.masked.load61.3 - %133 = bitcast float* %130 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %132, <8 x float>* %133, i32 4, <8 x i1> %126), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.1 - -_pocl_kernel_syrk_kernel.exit.loopexit: ; preds = %if.end.i.i.us.us.7 - br label %_pocl_kernel_syrk_kernel.exit - -_pocl_kernel_syrk_kernel.exit.loopexit238: ; preds = %if.end.i.i.us.7.3 - br label %_pocl_kernel_syrk_kernel.exit - -_pocl_kernel_syrk_kernel.exit: ; preds = %pregion_for_end.i.i.us.6, %vector.ph182, %pregion_for_end.i.i.6, %_pocl_kernel_syrk_kernel.exit.loopexit238, %_pocl_kernel_syrk_kernel.exit.loopexit - ret void - -pregion_for_entry.entry.i.i.us.1: ; preds = %if.end.i.i.us.1.3, %pregion_for_entry.entry.i.i.us.1.preheader - %_local_id_x.i.0.us.1 = phi i64 [ %540, %if.end.i.i.us.1.3 ], [ 0, %pregion_for_entry.entry.i.i.us.1.preheader ] - %add1.i.i.i.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.1, %mul.i.i.i - %conv.i.i.us.1 = trunc i64 %add1.i.i.i.us.1 to i32 - %cmp4.i.i.us.1 = icmp sgt i32 %26, %conv.i.i.us.1 - br i1 %cmp4.i.i.us.1, label %if.then.i.i.us.1, label %if.end.i.i.us.1 - -if.then.i.i.us.1: ; preds = %pregion_for_entry.entry.i.i.us.1 - %add.i.i.us.1 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1 - %idxprom.i.i.us.1 = sext i32 %add.i.i.us.1 to i64 - %arrayidx.i.i.us.1 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.1 - %134 = load float, float* %arrayidx.i.i.us.1, align 4, !tbaa !12 - %mul6.i.i.us.1 = fmul float %18, %134 - store float %mul6.i.i.us.1, float* %arrayidx.i.i.us.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1 - -if.end.i.i.us.1: ; preds = %if.then.i.i.us.1, %pregion_for_entry.entry.i.i.us.1 - %135 = or i64 %_local_id_x.i.0.us.1, 1 - %add1.i.i.i.us.1.1 = add nuw nsw i64 %135, %mul.i.i.i - %conv.i.i.us.1.1 = trunc i64 %add1.i.i.i.us.1.1 to i32 - %cmp4.i.i.us.1.1 = icmp sgt i32 %26, %conv.i.i.us.1.1 - br i1 %cmp4.i.i.us.1.1, label %if.then.i.i.us.1.1, label %if.end.i.i.us.1.1 - -pregion_for_end.i.i.1.loopexit: ; preds = %if.end.i.i.us.1.3 - br label %pregion_for_end.i.i.1 - -pregion_for_end.i.i.1: ; preds = %pregion_for_end.i.i.1.loopexit, %vector.ph50, %pregion_for_end.i.i - %136 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.2 = or i32 %136, 2 - %cmp.i.i.2 = icmp sgt i32 %26, %conv2.i.i.2 - %mul.i.i.2 = mul nsw i32 %26, %conv2.i.i.2 - br i1 %cmp.i.i.2, label %vector.scevcheck71, label %pregion_for_end.i.i.2 - -vector.scevcheck71: ; preds = %pregion_for_end.i.i.1 - %137 = mul i32 %26, %conv2.i.i.2 - %138 = trunc i64 %2 to i32 - %139 = shl i32 %138, 5 - %140 = add i32 %137, %139 - %141 = icmp sgt i32 %140, 2147483616 - br i1 %141, label %pregion_for_entry.entry.i.i.us.2.preheader, label %vector.ph72 - -pregion_for_entry.entry.i.i.us.2.preheader: ; preds = %vector.scevcheck71 - br label %pregion_for_entry.entry.i.i.us.2 - -vector.ph72: ; preds = %vector.scevcheck71 - %broadcast.splatinsert79 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat80 = shufflevector <8 x i64> %broadcast.splatinsert79, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert81 = insertelement <8 x i32> undef, i32 %26, i32 0 - %broadcast.splat82 = shufflevector <8 x i32> %broadcast.splatinsert81, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert84 = insertelement <8 x float> undef, float %18, i32 0 - %broadcast.splat85 = shufflevector <8 x float> %broadcast.splatinsert84, <8 x float> undef, <8 x i32> zeroinitializer - %142 = trunc <8 x i64> %broadcast.splat80 to <8 x i32> - %143 = or <8 x i32> %142, - %144 = icmp sgt <8 x i32> %broadcast.splat82, %143 - %145 = extractelement <8 x i32> %143, i32 0 - %146 = add nsw i32 %mul.i.i.2, %145 - %147 = sext i32 %146 to i64 - %148 = getelementptr inbounds float, float* %10, i64 %147 - %149 = bitcast float* %148 to <8 x float>* - %wide.masked.load83 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %149, i32 4, <8 x i1> %144, <8 x float> undef), !tbaa !12 - %150 = fmul <8 x float> %broadcast.splat85, %wide.masked.load83 - %151 = bitcast float* %148 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %150, <8 x float>* %151, i32 4, <8 x i1> %144), !tbaa !12, !llvm.access.group !16 - %152 = trunc <8 x i64> %broadcast.splat80 to <8 x i32> - %153 = or <8 x i32> %152, - %154 = icmp sgt <8 x i32> %broadcast.splat82, %153 - %155 = extractelement <8 x i32> %153, i32 0 - %156 = add nsw i32 %mul.i.i.2, %155 - %157 = sext i32 %156 to i64 - %158 = getelementptr inbounds float, float* %10, i64 %157 - %159 = bitcast float* %158 to <8 x float>* - %wide.masked.load83.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %159, i32 4, <8 x i1> %154, <8 x float> undef), !tbaa !12 - %160 = fmul <8 x float> %broadcast.splat85, %wide.masked.load83.1 - %161 = bitcast float* %158 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %160, <8 x float>* %161, i32 4, <8 x i1> %154), !tbaa !12, !llvm.access.group !16 - %162 = trunc <8 x i64> %broadcast.splat80 to <8 x i32> - %163 = or <8 x i32> %162, - %164 = icmp sgt <8 x i32> %broadcast.splat82, %163 - %165 = extractelement <8 x i32> %163, i32 0 - %166 = add nsw i32 %mul.i.i.2, %165 - %167 = sext i32 %166 to i64 - %168 = getelementptr inbounds float, float* %10, i64 %167 - %169 = bitcast float* %168 to <8 x float>* - %wide.masked.load83.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %169, i32 4, <8 x i1> %164, <8 x float> undef), !tbaa !12 - %170 = fmul <8 x float> %broadcast.splat85, %wide.masked.load83.2 - %171 = bitcast float* %168 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %170, <8 x float>* %171, i32 4, <8 x i1> %164), !tbaa !12, !llvm.access.group !16 - %172 = trunc <8 x i64> %broadcast.splat80 to <8 x i32> - %173 = or <8 x i32> %172, - %174 = icmp sgt <8 x i32> %broadcast.splat82, %173 - %175 = extractelement <8 x i32> %173, i32 0 - %176 = add nsw i32 %mul.i.i.2, %175 - %177 = sext i32 %176 to i64 - %178 = getelementptr inbounds float, float* %10, i64 %177 - %179 = bitcast float* %178 to <8 x float>* - %wide.masked.load83.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %179, i32 4, <8 x i1> %174, <8 x float> undef), !tbaa !12 - %180 = fmul <8 x float> %broadcast.splat85, %wide.masked.load83.3 - %181 = bitcast float* %178 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %180, <8 x float>* %181, i32 4, <8 x i1> %174), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.2 - -pregion_for_entry.entry.i.i.us.2: ; preds = %if.end.i.i.us.2.3, %pregion_for_entry.entry.i.i.us.2.preheader - %_local_id_x.i.0.us.2 = phi i64 [ %534, %if.end.i.i.us.2.3 ], [ 0, %pregion_for_entry.entry.i.i.us.2.preheader ] - %add1.i.i.i.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.2, %mul.i.i.i - %conv.i.i.us.2 = trunc i64 %add1.i.i.i.us.2 to i32 - %cmp4.i.i.us.2 = icmp sgt i32 %26, %conv.i.i.us.2 - br i1 %cmp4.i.i.us.2, label %if.then.i.i.us.2, label %if.end.i.i.us.2 - -if.then.i.i.us.2: ; preds = %pregion_for_entry.entry.i.i.us.2 - %add.i.i.us.2 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2 - %idxprom.i.i.us.2 = sext i32 %add.i.i.us.2 to i64 - %arrayidx.i.i.us.2 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.2 - %182 = load float, float* %arrayidx.i.i.us.2, align 4, !tbaa !12 - %mul6.i.i.us.2 = fmul float %18, %182 - store float %mul6.i.i.us.2, float* %arrayidx.i.i.us.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2 - -if.end.i.i.us.2: ; preds = %if.then.i.i.us.2, %pregion_for_entry.entry.i.i.us.2 - %183 = or i64 %_local_id_x.i.0.us.2, 1 - %add1.i.i.i.us.2.1 = add nuw nsw i64 %183, %mul.i.i.i - %conv.i.i.us.2.1 = trunc i64 %add1.i.i.i.us.2.1 to i32 - %cmp4.i.i.us.2.1 = icmp sgt i32 %26, %conv.i.i.us.2.1 - br i1 %cmp4.i.i.us.2.1, label %if.then.i.i.us.2.1, label %if.end.i.i.us.2.1 - -pregion_for_end.i.i.2.loopexit: ; preds = %if.end.i.i.us.2.3 - br label %pregion_for_end.i.i.2 - -pregion_for_end.i.i.2: ; preds = %pregion_for_end.i.i.2.loopexit, %vector.ph72, %pregion_for_end.i.i.1 - %184 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.3 = or i32 %184, 3 - %cmp.i.i.3 = icmp sgt i32 %26, %conv2.i.i.3 - %mul.i.i.3 = mul nsw i32 %26, %conv2.i.i.3 - br i1 %cmp.i.i.3, label %vector.scevcheck93, label %pregion_for_end.i.i.3 - -vector.scevcheck93: ; preds = %pregion_for_end.i.i.2 - %185 = mul i32 %26, %conv2.i.i.3 - %186 = trunc i64 %2 to i32 - %187 = shl i32 %186, 5 - %188 = add i32 %185, %187 - %189 = icmp sgt i32 %188, 2147483616 - br i1 %189, label %pregion_for_entry.entry.i.i.us.3.preheader, label %vector.ph94 - -pregion_for_entry.entry.i.i.us.3.preheader: ; preds = %vector.scevcheck93 - br label %pregion_for_entry.entry.i.i.us.3 - -vector.ph94: ; preds = %vector.scevcheck93 - %broadcast.splatinsert101 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat102 = shufflevector <8 x i64> %broadcast.splatinsert101, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert103 = insertelement <8 x i32> undef, i32 %26, i32 0 - %broadcast.splat104 = shufflevector <8 x i32> %broadcast.splatinsert103, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert106 = insertelement <8 x float> undef, float %18, i32 0 - %broadcast.splat107 = shufflevector <8 x float> %broadcast.splatinsert106, <8 x float> undef, <8 x i32> zeroinitializer - %190 = trunc <8 x i64> %broadcast.splat102 to <8 x i32> - %191 = or <8 x i32> %190, - %192 = icmp sgt <8 x i32> %broadcast.splat104, %191 - %193 = extractelement <8 x i32> %191, i32 0 - %194 = add nsw i32 %mul.i.i.3, %193 - %195 = sext i32 %194 to i64 - %196 = getelementptr inbounds float, float* %10, i64 %195 - %197 = bitcast float* %196 to <8 x float>* - %wide.masked.load105 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %197, i32 4, <8 x i1> %192, <8 x float> undef), !tbaa !12 - %198 = fmul <8 x float> %broadcast.splat107, %wide.masked.load105 - %199 = bitcast float* %196 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %198, <8 x float>* %199, i32 4, <8 x i1> %192), !tbaa !12, !llvm.access.group !16 - %200 = trunc <8 x i64> %broadcast.splat102 to <8 x i32> - %201 = or <8 x i32> %200, - %202 = icmp sgt <8 x i32> %broadcast.splat104, %201 - %203 = extractelement <8 x i32> %201, i32 0 - %204 = add nsw i32 %mul.i.i.3, %203 - %205 = sext i32 %204 to i64 - %206 = getelementptr inbounds float, float* %10, i64 %205 - %207 = bitcast float* %206 to <8 x float>* - %wide.masked.load105.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %207, i32 4, <8 x i1> %202, <8 x float> undef), !tbaa !12 - %208 = fmul <8 x float> %broadcast.splat107, %wide.masked.load105.1 - %209 = bitcast float* %206 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %208, <8 x float>* %209, i32 4, <8 x i1> %202), !tbaa !12, !llvm.access.group !16 - %210 = trunc <8 x i64> %broadcast.splat102 to <8 x i32> - %211 = or <8 x i32> %210, - %212 = icmp sgt <8 x i32> %broadcast.splat104, %211 - %213 = extractelement <8 x i32> %211, i32 0 - %214 = add nsw i32 %mul.i.i.3, %213 - %215 = sext i32 %214 to i64 - %216 = getelementptr inbounds float, float* %10, i64 %215 - %217 = bitcast float* %216 to <8 x float>* - %wide.masked.load105.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %217, i32 4, <8 x i1> %212, <8 x float> undef), !tbaa !12 - %218 = fmul <8 x float> %broadcast.splat107, %wide.masked.load105.2 - %219 = bitcast float* %216 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %218, <8 x float>* %219, i32 4, <8 x i1> %212), !tbaa !12, !llvm.access.group !16 - %220 = trunc <8 x i64> %broadcast.splat102 to <8 x i32> - %221 = or <8 x i32> %220, - %222 = icmp sgt <8 x i32> %broadcast.splat104, %221 - %223 = extractelement <8 x i32> %221, i32 0 - %224 = add nsw i32 %mul.i.i.3, %223 - %225 = sext i32 %224 to i64 - %226 = getelementptr inbounds float, float* %10, i64 %225 - %227 = bitcast float* %226 to <8 x float>* - %wide.masked.load105.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %227, i32 4, <8 x i1> %222, <8 x float> undef), !tbaa !12 - %228 = fmul <8 x float> %broadcast.splat107, %wide.masked.load105.3 - %229 = bitcast float* %226 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %228, <8 x float>* %229, i32 4, <8 x i1> %222), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.3 - -pregion_for_entry.entry.i.i.us.3: ; preds = %if.end.i.i.us.3.3, %pregion_for_entry.entry.i.i.us.3.preheader - %_local_id_x.i.0.us.3 = phi i64 [ %528, %if.end.i.i.us.3.3 ], [ 0, %pregion_for_entry.entry.i.i.us.3.preheader ] - %add1.i.i.i.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.3, %mul.i.i.i - %conv.i.i.us.3 = trunc i64 %add1.i.i.i.us.3 to i32 - %cmp4.i.i.us.3 = icmp sgt i32 %26, %conv.i.i.us.3 - br i1 %cmp4.i.i.us.3, label %if.then.i.i.us.3, label %if.end.i.i.us.3 - -if.then.i.i.us.3: ; preds = %pregion_for_entry.entry.i.i.us.3 - %add.i.i.us.3 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3 - %idxprom.i.i.us.3 = sext i32 %add.i.i.us.3 to i64 - %arrayidx.i.i.us.3 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.3 - %230 = load float, float* %arrayidx.i.i.us.3, align 4, !tbaa !12 - %mul6.i.i.us.3 = fmul float %18, %230 - store float %mul6.i.i.us.3, float* %arrayidx.i.i.us.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3 - -if.end.i.i.us.3: ; preds = %if.then.i.i.us.3, %pregion_for_entry.entry.i.i.us.3 - %231 = or i64 %_local_id_x.i.0.us.3, 1 - %add1.i.i.i.us.3.1 = add nuw nsw i64 %231, %mul.i.i.i - %conv.i.i.us.3.1 = trunc i64 %add1.i.i.i.us.3.1 to i32 - %cmp4.i.i.us.3.1 = icmp sgt i32 %26, %conv.i.i.us.3.1 - br i1 %cmp4.i.i.us.3.1, label %if.then.i.i.us.3.1, label %if.end.i.i.us.3.1 - -pregion_for_end.i.i.3.loopexit: ; preds = %if.end.i.i.us.3.3 - br label %pregion_for_end.i.i.3 - -pregion_for_end.i.i.3: ; preds = %pregion_for_end.i.i.3.loopexit, %vector.ph94, %pregion_for_end.i.i.2 - %232 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.4 = or i32 %232, 4 - %cmp.i.i.4 = icmp sgt i32 %26, %conv2.i.i.4 - %mul.i.i.4 = mul nsw i32 %26, %conv2.i.i.4 - br i1 %cmp.i.i.4, label %vector.scevcheck115, label %pregion_for_end.i.i.4 - -vector.scevcheck115: ; preds = %pregion_for_end.i.i.3 - %233 = mul i32 %26, %conv2.i.i.4 - %234 = trunc i64 %2 to i32 - %235 = shl i32 %234, 5 - %236 = add i32 %233, %235 - %237 = icmp sgt i32 %236, 2147483616 - br i1 %237, label %pregion_for_entry.entry.i.i.us.4.preheader, label %vector.ph116 - -pregion_for_entry.entry.i.i.us.4.preheader: ; preds = %vector.scevcheck115 - br label %pregion_for_entry.entry.i.i.us.4 - -vector.ph116: ; preds = %vector.scevcheck115 - %broadcast.splatinsert123 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat124 = shufflevector <8 x i64> %broadcast.splatinsert123, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert125 = insertelement <8 x i32> undef, i32 %26, i32 0 - %broadcast.splat126 = shufflevector <8 x i32> %broadcast.splatinsert125, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert128 = insertelement <8 x float> undef, float %18, i32 0 - %broadcast.splat129 = shufflevector <8 x float> %broadcast.splatinsert128, <8 x float> undef, <8 x i32> zeroinitializer - %238 = trunc <8 x i64> %broadcast.splat124 to <8 x i32> - %239 = or <8 x i32> %238, - %240 = icmp sgt <8 x i32> %broadcast.splat126, %239 - %241 = extractelement <8 x i32> %239, i32 0 - %242 = add nsw i32 %mul.i.i.4, %241 - %243 = sext i32 %242 to i64 - %244 = getelementptr inbounds float, float* %10, i64 %243 - %245 = bitcast float* %244 to <8 x float>* - %wide.masked.load127 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %245, i32 4, <8 x i1> %240, <8 x float> undef), !tbaa !12 - %246 = fmul <8 x float> %broadcast.splat129, %wide.masked.load127 - %247 = bitcast float* %244 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %246, <8 x float>* %247, i32 4, <8 x i1> %240), !tbaa !12, !llvm.access.group !16 - %248 = trunc <8 x i64> %broadcast.splat124 to <8 x i32> - %249 = or <8 x i32> %248, - %250 = icmp sgt <8 x i32> %broadcast.splat126, %249 - %251 = extractelement <8 x i32> %249, i32 0 - %252 = add nsw i32 %mul.i.i.4, %251 - %253 = sext i32 %252 to i64 - %254 = getelementptr inbounds float, float* %10, i64 %253 - %255 = bitcast float* %254 to <8 x float>* - %wide.masked.load127.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %255, i32 4, <8 x i1> %250, <8 x float> undef), !tbaa !12 - %256 = fmul <8 x float> %broadcast.splat129, %wide.masked.load127.1 - %257 = bitcast float* %254 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %256, <8 x float>* %257, i32 4, <8 x i1> %250), !tbaa !12, !llvm.access.group !16 - %258 = trunc <8 x i64> %broadcast.splat124 to <8 x i32> - %259 = or <8 x i32> %258, - %260 = icmp sgt <8 x i32> %broadcast.splat126, %259 - %261 = extractelement <8 x i32> %259, i32 0 - %262 = add nsw i32 %mul.i.i.4, %261 - %263 = sext i32 %262 to i64 - %264 = getelementptr inbounds float, float* %10, i64 %263 - %265 = bitcast float* %264 to <8 x float>* - %wide.masked.load127.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %265, i32 4, <8 x i1> %260, <8 x float> undef), !tbaa !12 - %266 = fmul <8 x float> %broadcast.splat129, %wide.masked.load127.2 - %267 = bitcast float* %264 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %266, <8 x float>* %267, i32 4, <8 x i1> %260), !tbaa !12, !llvm.access.group !16 - %268 = trunc <8 x i64> %broadcast.splat124 to <8 x i32> - %269 = or <8 x i32> %268, - %270 = icmp sgt <8 x i32> %broadcast.splat126, %269 - %271 = extractelement <8 x i32> %269, i32 0 - %272 = add nsw i32 %mul.i.i.4, %271 - %273 = sext i32 %272 to i64 - %274 = getelementptr inbounds float, float* %10, i64 %273 - %275 = bitcast float* %274 to <8 x float>* - %wide.masked.load127.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %275, i32 4, <8 x i1> %270, <8 x float> undef), !tbaa !12 - %276 = fmul <8 x float> %broadcast.splat129, %wide.masked.load127.3 - %277 = bitcast float* %274 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %276, <8 x float>* %277, i32 4, <8 x i1> %270), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.4 - -pregion_for_entry.entry.i.i.us.4: ; preds = %if.end.i.i.us.4.3, %pregion_for_entry.entry.i.i.us.4.preheader - %_local_id_x.i.0.us.4 = phi i64 [ %522, %if.end.i.i.us.4.3 ], [ 0, %pregion_for_entry.entry.i.i.us.4.preheader ] - %add1.i.i.i.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.4, %mul.i.i.i - %conv.i.i.us.4 = trunc i64 %add1.i.i.i.us.4 to i32 - %cmp4.i.i.us.4 = icmp sgt i32 %26, %conv.i.i.us.4 - br i1 %cmp4.i.i.us.4, label %if.then.i.i.us.4, label %if.end.i.i.us.4 - -if.then.i.i.us.4: ; preds = %pregion_for_entry.entry.i.i.us.4 - %add.i.i.us.4 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4 - %idxprom.i.i.us.4 = sext i32 %add.i.i.us.4 to i64 - %arrayidx.i.i.us.4 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.4 - %278 = load float, float* %arrayidx.i.i.us.4, align 4, !tbaa !12 - %mul6.i.i.us.4 = fmul float %18, %278 - store float %mul6.i.i.us.4, float* %arrayidx.i.i.us.4, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4 - -if.end.i.i.us.4: ; preds = %if.then.i.i.us.4, %pregion_for_entry.entry.i.i.us.4 - %279 = or i64 %_local_id_x.i.0.us.4, 1 - %add1.i.i.i.us.4.1 = add nuw nsw i64 %279, %mul.i.i.i - %conv.i.i.us.4.1 = trunc i64 %add1.i.i.i.us.4.1 to i32 - %cmp4.i.i.us.4.1 = icmp sgt i32 %26, %conv.i.i.us.4.1 - br i1 %cmp4.i.i.us.4.1, label %if.then.i.i.us.4.1, label %if.end.i.i.us.4.1 - -pregion_for_end.i.i.4.loopexit: ; preds = %if.end.i.i.us.4.3 - br label %pregion_for_end.i.i.4 - -pregion_for_end.i.i.4: ; preds = %pregion_for_end.i.i.4.loopexit, %vector.ph116, %pregion_for_end.i.i.3 - %280 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.5 = or i32 %280, 5 - %cmp.i.i.5 = icmp sgt i32 %26, %conv2.i.i.5 - %mul.i.i.5 = mul nsw i32 %26, %conv2.i.i.5 - br i1 %cmp.i.i.5, label %vector.scevcheck137, label %pregion_for_end.i.i.5 - -vector.scevcheck137: ; preds = %pregion_for_end.i.i.4 - %281 = mul i32 %26, %conv2.i.i.5 - %282 = trunc i64 %2 to i32 - %283 = shl i32 %282, 5 - %284 = add i32 %281, %283 - %285 = icmp sgt i32 %284, 2147483616 - br i1 %285, label %pregion_for_entry.entry.i.i.us.5.preheader, label %vector.ph138 - -pregion_for_entry.entry.i.i.us.5.preheader: ; preds = %vector.scevcheck137 - br label %pregion_for_entry.entry.i.i.us.5 - -vector.ph138: ; preds = %vector.scevcheck137 - %broadcast.splatinsert145 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat146 = shufflevector <8 x i64> %broadcast.splatinsert145, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert147 = insertelement <8 x i32> undef, i32 %26, i32 0 - %broadcast.splat148 = shufflevector <8 x i32> %broadcast.splatinsert147, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert150 = insertelement <8 x float> undef, float %18, i32 0 - %broadcast.splat151 = shufflevector <8 x float> %broadcast.splatinsert150, <8 x float> undef, <8 x i32> zeroinitializer - %286 = trunc <8 x i64> %broadcast.splat146 to <8 x i32> - %287 = or <8 x i32> %286, - %288 = icmp sgt <8 x i32> %broadcast.splat148, %287 - %289 = extractelement <8 x i32> %287, i32 0 - %290 = add nsw i32 %mul.i.i.5, %289 - %291 = sext i32 %290 to i64 - %292 = getelementptr inbounds float, float* %10, i64 %291 - %293 = bitcast float* %292 to <8 x float>* - %wide.masked.load149 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %293, i32 4, <8 x i1> %288, <8 x float> undef), !tbaa !12 - %294 = fmul <8 x float> %broadcast.splat151, %wide.masked.load149 - %295 = bitcast float* %292 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %294, <8 x float>* %295, i32 4, <8 x i1> %288), !tbaa !12, !llvm.access.group !16 - %296 = trunc <8 x i64> %broadcast.splat146 to <8 x i32> - %297 = or <8 x i32> %296, - %298 = icmp sgt <8 x i32> %broadcast.splat148, %297 - %299 = extractelement <8 x i32> %297, i32 0 - %300 = add nsw i32 %mul.i.i.5, %299 - %301 = sext i32 %300 to i64 - %302 = getelementptr inbounds float, float* %10, i64 %301 - %303 = bitcast float* %302 to <8 x float>* - %wide.masked.load149.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %303, i32 4, <8 x i1> %298, <8 x float> undef), !tbaa !12 - %304 = fmul <8 x float> %broadcast.splat151, %wide.masked.load149.1 - %305 = bitcast float* %302 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %304, <8 x float>* %305, i32 4, <8 x i1> %298), !tbaa !12, !llvm.access.group !16 - %306 = trunc <8 x i64> %broadcast.splat146 to <8 x i32> - %307 = or <8 x i32> %306, - %308 = icmp sgt <8 x i32> %broadcast.splat148, %307 - %309 = extractelement <8 x i32> %307, i32 0 - %310 = add nsw i32 %mul.i.i.5, %309 - %311 = sext i32 %310 to i64 - %312 = getelementptr inbounds float, float* %10, i64 %311 - %313 = bitcast float* %312 to <8 x float>* - %wide.masked.load149.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %313, i32 4, <8 x i1> %308, <8 x float> undef), !tbaa !12 - %314 = fmul <8 x float> %broadcast.splat151, %wide.masked.load149.2 - %315 = bitcast float* %312 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %314, <8 x float>* %315, i32 4, <8 x i1> %308), !tbaa !12, !llvm.access.group !16 - %316 = trunc <8 x i64> %broadcast.splat146 to <8 x i32> - %317 = or <8 x i32> %316, - %318 = icmp sgt <8 x i32> %broadcast.splat148, %317 - %319 = extractelement <8 x i32> %317, i32 0 - %320 = add nsw i32 %mul.i.i.5, %319 - %321 = sext i32 %320 to i64 - %322 = getelementptr inbounds float, float* %10, i64 %321 - %323 = bitcast float* %322 to <8 x float>* - %wide.masked.load149.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %323, i32 4, <8 x i1> %318, <8 x float> undef), !tbaa !12 - %324 = fmul <8 x float> %broadcast.splat151, %wide.masked.load149.3 - %325 = bitcast float* %322 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %324, <8 x float>* %325, i32 4, <8 x i1> %318), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.5 - -pregion_for_entry.entry.i.i.us.5: ; preds = %if.end.i.i.us.5.3, %pregion_for_entry.entry.i.i.us.5.preheader - %_local_id_x.i.0.us.5 = phi i64 [ %516, %if.end.i.i.us.5.3 ], [ 0, %pregion_for_entry.entry.i.i.us.5.preheader ] - %add1.i.i.i.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.5, %mul.i.i.i - %conv.i.i.us.5 = trunc i64 %add1.i.i.i.us.5 to i32 - %cmp4.i.i.us.5 = icmp sgt i32 %26, %conv.i.i.us.5 - br i1 %cmp4.i.i.us.5, label %if.then.i.i.us.5, label %if.end.i.i.us.5 - -if.then.i.i.us.5: ; preds = %pregion_for_entry.entry.i.i.us.5 - %add.i.i.us.5 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5 - %idxprom.i.i.us.5 = sext i32 %add.i.i.us.5 to i64 - %arrayidx.i.i.us.5 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.5 - %326 = load float, float* %arrayidx.i.i.us.5, align 4, !tbaa !12 - %mul6.i.i.us.5 = fmul float %18, %326 - store float %mul6.i.i.us.5, float* %arrayidx.i.i.us.5, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5 - -if.end.i.i.us.5: ; preds = %if.then.i.i.us.5, %pregion_for_entry.entry.i.i.us.5 - %327 = or i64 %_local_id_x.i.0.us.5, 1 - %add1.i.i.i.us.5.1 = add nuw nsw i64 %327, %mul.i.i.i - %conv.i.i.us.5.1 = trunc i64 %add1.i.i.i.us.5.1 to i32 - %cmp4.i.i.us.5.1 = icmp sgt i32 %26, %conv.i.i.us.5.1 - br i1 %cmp4.i.i.us.5.1, label %if.then.i.i.us.5.1, label %if.end.i.i.us.5.1 - -pregion_for_end.i.i.5.loopexit: ; preds = %if.end.i.i.us.5.3 - br label %pregion_for_end.i.i.5 - -pregion_for_end.i.i.5: ; preds = %pregion_for_end.i.i.5.loopexit, %vector.ph138, %pregion_for_end.i.i.4 - %328 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.6 = or i32 %328, 6 - %cmp.i.i.6 = icmp sgt i32 %26, %conv2.i.i.6 - %mul.i.i.6 = mul nsw i32 %26, %conv2.i.i.6 - br i1 %cmp.i.i.6, label %vector.scevcheck159, label %pregion_for_end.i.i.6 - -vector.scevcheck159: ; preds = %pregion_for_end.i.i.5 - %329 = mul i32 %26, %conv2.i.i.6 - %330 = trunc i64 %2 to i32 - %331 = shl i32 %330, 5 - %332 = add i32 %329, %331 - %333 = icmp sgt i32 %332, 2147483616 - br i1 %333, label %pregion_for_entry.entry.i.i.us.6.preheader, label %vector.ph160 - -pregion_for_entry.entry.i.i.us.6.preheader: ; preds = %vector.scevcheck159 - br label %pregion_for_entry.entry.i.i.us.6 - -vector.ph160: ; preds = %vector.scevcheck159 - %broadcast.splatinsert167 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat168 = shufflevector <8 x i64> %broadcast.splatinsert167, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert169 = insertelement <8 x i32> undef, i32 %26, i32 0 - %broadcast.splat170 = shufflevector <8 x i32> %broadcast.splatinsert169, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert172 = insertelement <8 x float> undef, float %18, i32 0 - %broadcast.splat173 = shufflevector <8 x float> %broadcast.splatinsert172, <8 x float> undef, <8 x i32> zeroinitializer - %334 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %335 = or <8 x i32> %334, - %336 = icmp sgt <8 x i32> %broadcast.splat170, %335 - %337 = extractelement <8 x i32> %335, i32 0 - %338 = add nsw i32 %mul.i.i.6, %337 - %339 = sext i32 %338 to i64 - %340 = getelementptr inbounds float, float* %10, i64 %339 - %341 = bitcast float* %340 to <8 x float>* - %wide.masked.load171 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %341, i32 4, <8 x i1> %336, <8 x float> undef), !tbaa !12 - %342 = fmul <8 x float> %broadcast.splat173, %wide.masked.load171 - %343 = bitcast float* %340 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %342, <8 x float>* %343, i32 4, <8 x i1> %336), !tbaa !12, !llvm.access.group !16 - %344 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %345 = or <8 x i32> %344, - %346 = icmp sgt <8 x i32> %broadcast.splat170, %345 - %347 = extractelement <8 x i32> %345, i32 0 - %348 = add nsw i32 %mul.i.i.6, %347 - %349 = sext i32 %348 to i64 - %350 = getelementptr inbounds float, float* %10, i64 %349 - %351 = bitcast float* %350 to <8 x float>* - %wide.masked.load171.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %351, i32 4, <8 x i1> %346, <8 x float> undef), !tbaa !12 - %352 = fmul <8 x float> %broadcast.splat173, %wide.masked.load171.1 - %353 = bitcast float* %350 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %352, <8 x float>* %353, i32 4, <8 x i1> %346), !tbaa !12, !llvm.access.group !16 - %354 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %355 = or <8 x i32> %354, - %356 = icmp sgt <8 x i32> %broadcast.splat170, %355 - %357 = extractelement <8 x i32> %355, i32 0 - %358 = add nsw i32 %mul.i.i.6, %357 - %359 = sext i32 %358 to i64 - %360 = getelementptr inbounds float, float* %10, i64 %359 - %361 = bitcast float* %360 to <8 x float>* - %wide.masked.load171.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %361, i32 4, <8 x i1> %356, <8 x float> undef), !tbaa !12 - %362 = fmul <8 x float> %broadcast.splat173, %wide.masked.load171.2 - %363 = bitcast float* %360 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %362, <8 x float>* %363, i32 4, <8 x i1> %356), !tbaa !12, !llvm.access.group !16 - %364 = trunc <8 x i64> %broadcast.splat168 to <8 x i32> - %365 = or <8 x i32> %364, - %366 = icmp sgt <8 x i32> %broadcast.splat170, %365 - %367 = extractelement <8 x i32> %365, i32 0 - %368 = add nsw i32 %mul.i.i.6, %367 - %369 = sext i32 %368 to i64 - %370 = getelementptr inbounds float, float* %10, i64 %369 - %371 = bitcast float* %370 to <8 x float>* - %wide.masked.load171.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %371, i32 4, <8 x i1> %366, <8 x float> undef), !tbaa !12 - %372 = fmul <8 x float> %broadcast.splat173, %wide.masked.load171.3 - %373 = bitcast float* %370 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %372, <8 x float>* %373, i32 4, <8 x i1> %366), !tbaa !12, !llvm.access.group !16 - br label %pregion_for_end.i.i.6 - -pregion_for_entry.entry.i.i.us.6: ; preds = %if.end.i.i.us.6.3, %pregion_for_entry.entry.i.i.us.6.preheader - %_local_id_x.i.0.us.6 = phi i64 [ %510, %if.end.i.i.us.6.3 ], [ 0, %pregion_for_entry.entry.i.i.us.6.preheader ] - %add1.i.i.i.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.6, %mul.i.i.i - %conv.i.i.us.6 = trunc i64 %add1.i.i.i.us.6 to i32 - %cmp4.i.i.us.6 = icmp sgt i32 %26, %conv.i.i.us.6 - br i1 %cmp4.i.i.us.6, label %if.then.i.i.us.6, label %if.end.i.i.us.6 - -if.then.i.i.us.6: ; preds = %pregion_for_entry.entry.i.i.us.6 - %add.i.i.us.6 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6 - %idxprom.i.i.us.6 = sext i32 %add.i.i.us.6 to i64 - %arrayidx.i.i.us.6 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.6 - %374 = load float, float* %arrayidx.i.i.us.6, align 4, !tbaa !12 - %mul6.i.i.us.6 = fmul float %18, %374 - store float %mul6.i.i.us.6, float* %arrayidx.i.i.us.6, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6 - -if.end.i.i.us.6: ; preds = %if.then.i.i.us.6, %pregion_for_entry.entry.i.i.us.6 - %375 = or i64 %_local_id_x.i.0.us.6, 1 - %add1.i.i.i.us.6.1 = add nuw nsw i64 %375, %mul.i.i.i - %conv.i.i.us.6.1 = trunc i64 %add1.i.i.i.us.6.1 to i32 - %cmp4.i.i.us.6.1 = icmp sgt i32 %26, %conv.i.i.us.6.1 - br i1 %cmp4.i.i.us.6.1, label %if.then.i.i.us.6.1, label %if.end.i.i.us.6.1 - -pregion_for_end.i.i.6.loopexit: ; preds = %if.end.i.i.us.6.3 - br label %pregion_for_end.i.i.6 - -pregion_for_end.i.i.6: ; preds = %pregion_for_end.i.i.6.loopexit, %vector.ph160, %pregion_for_end.i.i.5 - %376 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.7 = or i32 %376, 7 - %cmp.i.i.7 = icmp sgt i32 %26, %conv2.i.i.7 - %mul.i.i.7 = mul nsw i32 %26, %conv2.i.i.7 - br i1 %cmp.i.i.7, label %vector.scevcheck181, label %_pocl_kernel_syrk_kernel.exit - -vector.scevcheck181: ; preds = %pregion_for_end.i.i.6 - %377 = mul i32 %26, %conv2.i.i.7 - %378 = trunc i64 %2 to i32 - %379 = shl i32 %378, 5 - %380 = add i32 %377, %379 - %381 = icmp sgt i32 %380, 2147483616 - br i1 %381, label %pregion_for_entry.entry.i.i.us.7.preheader, label %vector.ph182 - -pregion_for_entry.entry.i.i.us.7.preheader: ; preds = %vector.scevcheck181 - br label %pregion_for_entry.entry.i.i.us.7 - -vector.ph182: ; preds = %vector.scevcheck181 - %broadcast.splatinsert189 = insertelement <8 x i64> undef, i64 %mul.i.i.i, i32 0 - %broadcast.splat190 = shufflevector <8 x i64> %broadcast.splatinsert189, <8 x i64> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert191 = insertelement <8 x i32> undef, i32 %26, i32 0 - %broadcast.splat192 = shufflevector <8 x i32> %broadcast.splatinsert191, <8 x i32> undef, <8 x i32> zeroinitializer - %broadcast.splatinsert194 = insertelement <8 x float> undef, float %18, i32 0 - %broadcast.splat195 = shufflevector <8 x float> %broadcast.splatinsert194, <8 x float> undef, <8 x i32> zeroinitializer - %382 = trunc <8 x i64> %broadcast.splat190 to <8 x i32> - %383 = or <8 x i32> %382, - %384 = icmp sgt <8 x i32> %broadcast.splat192, %383 - %385 = extractelement <8 x i32> %383, i32 0 - %386 = add nsw i32 %mul.i.i.7, %385 - %387 = sext i32 %386 to i64 - %388 = getelementptr inbounds float, float* %10, i64 %387 - %389 = bitcast float* %388 to <8 x float>* - %wide.masked.load193 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %389, i32 4, <8 x i1> %384, <8 x float> undef), !tbaa !12 - %390 = fmul <8 x float> %broadcast.splat195, %wide.masked.load193 - %391 = bitcast float* %388 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %390, <8 x float>* %391, i32 4, <8 x i1> %384), !tbaa !12, !llvm.access.group !16 - %392 = trunc <8 x i64> %broadcast.splat190 to <8 x i32> - %393 = or <8 x i32> %392, - %394 = icmp sgt <8 x i32> %broadcast.splat192, %393 - %395 = extractelement <8 x i32> %393, i32 0 - %396 = add nsw i32 %mul.i.i.7, %395 - %397 = sext i32 %396 to i64 - %398 = getelementptr inbounds float, float* %10, i64 %397 - %399 = bitcast float* %398 to <8 x float>* - %wide.masked.load193.1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %399, i32 4, <8 x i1> %394, <8 x float> undef), !tbaa !12 - %400 = fmul <8 x float> %broadcast.splat195, %wide.masked.load193.1 - %401 = bitcast float* %398 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %400, <8 x float>* %401, i32 4, <8 x i1> %394), !tbaa !12, !llvm.access.group !16 - %402 = trunc <8 x i64> %broadcast.splat190 to <8 x i32> - %403 = or <8 x i32> %402, - %404 = icmp sgt <8 x i32> %broadcast.splat192, %403 - %405 = extractelement <8 x i32> %403, i32 0 - %406 = add nsw i32 %mul.i.i.7, %405 - %407 = sext i32 %406 to i64 - %408 = getelementptr inbounds float, float* %10, i64 %407 - %409 = bitcast float* %408 to <8 x float>* - %wide.masked.load193.2 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %409, i32 4, <8 x i1> %404, <8 x float> undef), !tbaa !12 - %410 = fmul <8 x float> %broadcast.splat195, %wide.masked.load193.2 - %411 = bitcast float* %408 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %410, <8 x float>* %411, i32 4, <8 x i1> %404), !tbaa !12, !llvm.access.group !16 - %412 = trunc <8 x i64> %broadcast.splat190 to <8 x i32> - %413 = or <8 x i32> %412, - %414 = icmp sgt <8 x i32> %broadcast.splat192, %413 - %415 = extractelement <8 x i32> %413, i32 0 - %416 = add nsw i32 %mul.i.i.7, %415 - %417 = sext i32 %416 to i64 - %418 = getelementptr inbounds float, float* %10, i64 %417 - %419 = bitcast float* %418 to <8 x float>* - %wide.masked.load193.3 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %419, i32 4, <8 x i1> %414, <8 x float> undef), !tbaa !12 - %420 = fmul <8 x float> %broadcast.splat195, %wide.masked.load193.3 - %421 = bitcast float* %418 to <8 x float>* - call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %420, <8 x float>* %421, i32 4, <8 x i1> %414), !tbaa !12, !llvm.access.group !16 - br label %_pocl_kernel_syrk_kernel.exit - -pregion_for_entry.entry.i.i.us.7: ; preds = %if.end.i.i.us.7.3, %pregion_for_entry.entry.i.i.us.7.preheader - %_local_id_x.i.0.us.7 = phi i64 [ %504, %if.end.i.i.us.7.3 ], [ 0, %pregion_for_entry.entry.i.i.us.7.preheader ] - %add1.i.i.i.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.7, %mul.i.i.i - %conv.i.i.us.7 = trunc i64 %add1.i.i.i.us.7 to i32 - %cmp4.i.i.us.7 = icmp sgt i32 %26, %conv.i.i.us.7 - br i1 %cmp4.i.i.us.7, label %if.then.i.i.us.7, label %if.end.i.i.us.7 - -if.then.i.i.us.7: ; preds = %pregion_for_entry.entry.i.i.us.7 - %add.i.i.us.7 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7 - %idxprom.i.i.us.7 = sext i32 %add.i.i.us.7 to i64 - %arrayidx.i.i.us.7 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.7 - %422 = load float, float* %arrayidx.i.i.us.7, align 4, !tbaa !12 - %mul6.i.i.us.7 = fmul float %18, %422 - store float %mul6.i.i.us.7, float* %arrayidx.i.i.us.7, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7 - -if.end.i.i.us.7: ; preds = %if.then.i.i.us.7, %pregion_for_entry.entry.i.i.us.7 - %423 = or i64 %_local_id_x.i.0.us.7, 1 - %add1.i.i.i.us.7.1 = add nuw nsw i64 %423, %mul.i.i.i - %conv.i.i.us.7.1 = trunc i64 %add1.i.i.i.us.7.1 to i32 - %cmp4.i.i.us.7.1 = icmp sgt i32 %26, %conv.i.i.us.7.1 - br i1 %cmp4.i.i.us.7.1, label %if.then.i.i.us.7.1, label %if.end.i.i.us.7.1 - -pregion_for_entry.entry.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1, %pregion_for_entry.entry.i.i.us.us.1.preheader - %_local_id_x.i.0.us.us.1 = phi i64 [ %432, %if.end.i.i.us.us.1 ], [ 0, %pregion_for_entry.entry.i.i.us.us.1.preheader ] - %add1.i.i.i.us.us.1 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, %mul.i.i.i - %conv.i.i.us.us.1 = trunc i64 %add1.i.i.i.us.us.1 to i32 - %cmp4.i.i.us.us.1 = icmp sgt i32 %26, %conv.i.i.us.us.1 - br i1 %cmp4.i.i.us.us.1, label %if.then.i.i.us.us.1, label %if.end.i.i.us.us.1 - -if.then.i.i.us.us.1: ; preds = %pregion_for_entry.entry.i.i.us.us.1 - %add.i.i.us.us.1 = add nsw i32 %mul.i.i.us.1, %conv.i.i.us.us.1 - %idxprom.i.i.us.us.1 = sext i32 %add.i.i.us.us.1 to i64 - %arrayidx.i.i.us.us.1 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.us.1 - %424 = load float, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12 - %mul6.i.i.us.us.1 = fmul float %18, %424 - store float %mul6.i.i.us.us.1, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %mul14.i.i.us.us.1 = mul nsw i32 %22, %conv.i.i.us.us.1 - %425 = sext i32 %mul14.i.i.us.us.1 to i64 - br label %for.body.i.i.us.us.1 - -for.body.i.i.us.us.1: ; preds = %for.body.i.i.us.us.1, %if.then.i.i.us.us.1 - %indvars.iv.next.i.i3.us.us.1 = phi i64 [ %indvars.iv.next.i.i.us.us.1, %for.body.i.i.us.us.1 ], [ 0, %if.then.i.i.us.us.1 ] - %426 = phi float [ %431, %for.body.i.i.us.us.1 ], [ %mul6.i.i.us.us.1, %if.then.i.i.us.us.1 ] - %427 = add nsw i64 %indvars.iv.next.i.i3.us.us.1, %76 - %arrayidx12.i.i.us.us.1 = getelementptr inbounds float, float* %7, i64 %427 - %428 = load float, float* %arrayidx12.i.i.us.us.1, align 4, !tbaa !12 - %mul13.i.i.us.us.1 = fmul float %14, %428 - %429 = add nsw i64 %indvars.iv.next.i.i3.us.us.1, %425 - %arrayidx17.i.i.us.us.1 = getelementptr inbounds float, float* %7, i64 %429 - %430 = load float, float* %arrayidx17.i.i.us.us.1, align 4, !tbaa !12 - %431 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.1, float %430, float %426) #2 - store float %431, float* %arrayidx.i.i.us.us.1, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.1 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.1, 1 - %exitcond.not.i.i.us.us.1 = icmp eq i64 %indvars.iv.next.i.i.us.us.1, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.1, label %if.end.i.i.us.us.1.loopexit, label %for.body.i.i.us.us.1, !llvm.loop !21 - -if.end.i.i.us.us.1.loopexit: ; preds = %for.body.i.i.us.us.1 - br label %if.end.i.i.us.us.1 - -if.end.i.i.us.us.1: ; preds = %if.end.i.i.us.us.1.loopexit, %pregion_for_entry.entry.i.i.us.us.1 - %432 = add nuw nsw i64 %_local_id_x.i.0.us.us.1, 1 - %exitcond.not.1 = icmp eq i64 %432, 32 - br i1 %exitcond.not.1, label %pregion_for_end.i.i.us.1.loopexit, label %pregion_for_entry.entry.i.i.us.us.1, !llvm.loop !19 - -pregion_for_end.i.i.us.1.loopexit: ; preds = %if.end.i.i.us.us.1 - br label %pregion_for_end.i.i.us.1 - -pregion_for_end.i.i.us.1: ; preds = %pregion_for_end.i.i.us.1.loopexit, %pregion_for_end.i.i.us - %433 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.2 = or i32 %433, 2 - %cmp.i.i.us.2 = icmp sgt i32 %26, %conv2.i.i.us.2 - %mul.i.i.us.2 = mul nsw i32 %26, %conv2.i.i.us.2 - %mul9.i.i.us.2 = mul nsw i32 %22, %conv2.i.i.us.2 - %434 = sext i32 %mul9.i.i.us.2 to i64 - br i1 %cmp.i.i.us.2, label %pregion_for_entry.entry.i.i.us.us.2.preheader, label %pregion_for_end.i.i.us.2 - -pregion_for_entry.entry.i.i.us.us.2.preheader: ; preds = %pregion_for_end.i.i.us.1 - br label %pregion_for_entry.entry.i.i.us.us.2 - -pregion_for_entry.entry.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2, %pregion_for_entry.entry.i.i.us.us.2.preheader - %_local_id_x.i.0.us.us.2 = phi i64 [ %443, %if.end.i.i.us.us.2 ], [ 0, %pregion_for_entry.entry.i.i.us.us.2.preheader ] - %add1.i.i.i.us.us.2 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, %mul.i.i.i - %conv.i.i.us.us.2 = trunc i64 %add1.i.i.i.us.us.2 to i32 - %cmp4.i.i.us.us.2 = icmp sgt i32 %26, %conv.i.i.us.us.2 - br i1 %cmp4.i.i.us.us.2, label %if.then.i.i.us.us.2, label %if.end.i.i.us.us.2 - -if.then.i.i.us.us.2: ; preds = %pregion_for_entry.entry.i.i.us.us.2 - %add.i.i.us.us.2 = add nsw i32 %mul.i.i.us.2, %conv.i.i.us.us.2 - %idxprom.i.i.us.us.2 = sext i32 %add.i.i.us.us.2 to i64 - %arrayidx.i.i.us.us.2 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.us.2 - %435 = load float, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12 - %mul6.i.i.us.us.2 = fmul float %18, %435 - store float %mul6.i.i.us.us.2, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %mul14.i.i.us.us.2 = mul nsw i32 %22, %conv.i.i.us.us.2 - %436 = sext i32 %mul14.i.i.us.us.2 to i64 - br label %for.body.i.i.us.us.2 - -for.body.i.i.us.us.2: ; preds = %for.body.i.i.us.us.2, %if.then.i.i.us.us.2 - %indvars.iv.next.i.i3.us.us.2 = phi i64 [ %indvars.iv.next.i.i.us.us.2, %for.body.i.i.us.us.2 ], [ 0, %if.then.i.i.us.us.2 ] - %437 = phi float [ %442, %for.body.i.i.us.us.2 ], [ %mul6.i.i.us.us.2, %if.then.i.i.us.us.2 ] - %438 = add nsw i64 %indvars.iv.next.i.i3.us.us.2, %434 - %arrayidx12.i.i.us.us.2 = getelementptr inbounds float, float* %7, i64 %438 - %439 = load float, float* %arrayidx12.i.i.us.us.2, align 4, !tbaa !12 - %mul13.i.i.us.us.2 = fmul float %14, %439 - %440 = add nsw i64 %indvars.iv.next.i.i3.us.us.2, %436 - %arrayidx17.i.i.us.us.2 = getelementptr inbounds float, float* %7, i64 %440 - %441 = load float, float* %arrayidx17.i.i.us.us.2, align 4, !tbaa !12 - %442 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.2, float %441, float %437) #2 - store float %442, float* %arrayidx.i.i.us.us.2, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.2 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.2, 1 - %exitcond.not.i.i.us.us.2 = icmp eq i64 %indvars.iv.next.i.i.us.us.2, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.2, label %if.end.i.i.us.us.2.loopexit, label %for.body.i.i.us.us.2, !llvm.loop !21 - -if.end.i.i.us.us.2.loopexit: ; preds = %for.body.i.i.us.us.2 - br label %if.end.i.i.us.us.2 - -if.end.i.i.us.us.2: ; preds = %if.end.i.i.us.us.2.loopexit, %pregion_for_entry.entry.i.i.us.us.2 - %443 = add nuw nsw i64 %_local_id_x.i.0.us.us.2, 1 - %exitcond.not.2 = icmp eq i64 %443, 32 - br i1 %exitcond.not.2, label %pregion_for_end.i.i.us.2.loopexit, label %pregion_for_entry.entry.i.i.us.us.2, !llvm.loop !19 - -pregion_for_end.i.i.us.2.loopexit: ; preds = %if.end.i.i.us.us.2 - br label %pregion_for_end.i.i.us.2 - -pregion_for_end.i.i.us.2: ; preds = %pregion_for_end.i.i.us.2.loopexit, %pregion_for_end.i.i.us.1 - %444 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.3 = or i32 %444, 3 - %cmp.i.i.us.3 = icmp sgt i32 %26, %conv2.i.i.us.3 - %mul.i.i.us.3 = mul nsw i32 %26, %conv2.i.i.us.3 - %mul9.i.i.us.3 = mul nsw i32 %22, %conv2.i.i.us.3 - %445 = sext i32 %mul9.i.i.us.3 to i64 - br i1 %cmp.i.i.us.3, label %pregion_for_entry.entry.i.i.us.us.3.preheader, label %pregion_for_end.i.i.us.3 - -pregion_for_entry.entry.i.i.us.us.3.preheader: ; preds = %pregion_for_end.i.i.us.2 - br label %pregion_for_entry.entry.i.i.us.us.3 - -pregion_for_entry.entry.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3, %pregion_for_entry.entry.i.i.us.us.3.preheader - %_local_id_x.i.0.us.us.3 = phi i64 [ %454, %if.end.i.i.us.us.3 ], [ 0, %pregion_for_entry.entry.i.i.us.us.3.preheader ] - %add1.i.i.i.us.us.3 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, %mul.i.i.i - %conv.i.i.us.us.3 = trunc i64 %add1.i.i.i.us.us.3 to i32 - %cmp4.i.i.us.us.3 = icmp sgt i32 %26, %conv.i.i.us.us.3 - br i1 %cmp4.i.i.us.us.3, label %if.then.i.i.us.us.3, label %if.end.i.i.us.us.3 - -if.then.i.i.us.us.3: ; preds = %pregion_for_entry.entry.i.i.us.us.3 - %add.i.i.us.us.3 = add nsw i32 %mul.i.i.us.3, %conv.i.i.us.us.3 - %idxprom.i.i.us.us.3 = sext i32 %add.i.i.us.us.3 to i64 - %arrayidx.i.i.us.us.3 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.us.3 - %446 = load float, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12 - %mul6.i.i.us.us.3 = fmul float %18, %446 - store float %mul6.i.i.us.us.3, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %mul14.i.i.us.us.3 = mul nsw i32 %22, %conv.i.i.us.us.3 - %447 = sext i32 %mul14.i.i.us.us.3 to i64 - br label %for.body.i.i.us.us.3 - -for.body.i.i.us.us.3: ; preds = %for.body.i.i.us.us.3, %if.then.i.i.us.us.3 - %indvars.iv.next.i.i3.us.us.3 = phi i64 [ %indvars.iv.next.i.i.us.us.3, %for.body.i.i.us.us.3 ], [ 0, %if.then.i.i.us.us.3 ] - %448 = phi float [ %453, %for.body.i.i.us.us.3 ], [ %mul6.i.i.us.us.3, %if.then.i.i.us.us.3 ] - %449 = add nsw i64 %indvars.iv.next.i.i3.us.us.3, %445 - %arrayidx12.i.i.us.us.3 = getelementptr inbounds float, float* %7, i64 %449 - %450 = load float, float* %arrayidx12.i.i.us.us.3, align 4, !tbaa !12 - %mul13.i.i.us.us.3 = fmul float %14, %450 - %451 = add nsw i64 %indvars.iv.next.i.i3.us.us.3, %447 - %arrayidx17.i.i.us.us.3 = getelementptr inbounds float, float* %7, i64 %451 - %452 = load float, float* %arrayidx17.i.i.us.us.3, align 4, !tbaa !12 - %453 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.3, float %452, float %448) #2 - store float %453, float* %arrayidx.i.i.us.us.3, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.3 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.3, 1 - %exitcond.not.i.i.us.us.3 = icmp eq i64 %indvars.iv.next.i.i.us.us.3, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.3, label %if.end.i.i.us.us.3.loopexit, label %for.body.i.i.us.us.3, !llvm.loop !21 - -if.end.i.i.us.us.3.loopexit: ; preds = %for.body.i.i.us.us.3 - br label %if.end.i.i.us.us.3 - -if.end.i.i.us.us.3: ; preds = %if.end.i.i.us.us.3.loopexit, %pregion_for_entry.entry.i.i.us.us.3 - %454 = add nuw nsw i64 %_local_id_x.i.0.us.us.3, 1 - %exitcond.not.3 = icmp eq i64 %454, 32 - br i1 %exitcond.not.3, label %pregion_for_end.i.i.us.3.loopexit, label %pregion_for_entry.entry.i.i.us.us.3, !llvm.loop !19 - -pregion_for_end.i.i.us.3.loopexit: ; preds = %if.end.i.i.us.us.3 - br label %pregion_for_end.i.i.us.3 - -pregion_for_end.i.i.us.3: ; preds = %pregion_for_end.i.i.us.3.loopexit, %pregion_for_end.i.i.us.2 - %455 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.4 = or i32 %455, 4 - %cmp.i.i.us.4 = icmp sgt i32 %26, %conv2.i.i.us.4 - %mul.i.i.us.4 = mul nsw i32 %26, %conv2.i.i.us.4 - %mul9.i.i.us.4 = mul nsw i32 %22, %conv2.i.i.us.4 - %456 = sext i32 %mul9.i.i.us.4 to i64 - br i1 %cmp.i.i.us.4, label %pregion_for_entry.entry.i.i.us.us.4.preheader, label %pregion_for_end.i.i.us.4 - -pregion_for_entry.entry.i.i.us.us.4.preheader: ; preds = %pregion_for_end.i.i.us.3 - br label %pregion_for_entry.entry.i.i.us.us.4 - -pregion_for_entry.entry.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4, %pregion_for_entry.entry.i.i.us.us.4.preheader - %_local_id_x.i.0.us.us.4 = phi i64 [ %465, %if.end.i.i.us.us.4 ], [ 0, %pregion_for_entry.entry.i.i.us.us.4.preheader ] - %add1.i.i.i.us.us.4 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, %mul.i.i.i - %conv.i.i.us.us.4 = trunc i64 %add1.i.i.i.us.us.4 to i32 - %cmp4.i.i.us.us.4 = icmp sgt i32 %26, %conv.i.i.us.us.4 - br i1 %cmp4.i.i.us.us.4, label %if.then.i.i.us.us.4, label %if.end.i.i.us.us.4 - -if.then.i.i.us.us.4: ; preds = %pregion_for_entry.entry.i.i.us.us.4 - %add.i.i.us.us.4 = add nsw i32 %mul.i.i.us.4, %conv.i.i.us.us.4 - %idxprom.i.i.us.us.4 = sext i32 %add.i.i.us.us.4 to i64 - %arrayidx.i.i.us.us.4 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.us.4 - %457 = load float, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12 - %mul6.i.i.us.us.4 = fmul float %18, %457 - store float %mul6.i.i.us.us.4, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %mul14.i.i.us.us.4 = mul nsw i32 %22, %conv.i.i.us.us.4 - %458 = sext i32 %mul14.i.i.us.us.4 to i64 - br label %for.body.i.i.us.us.4 - -for.body.i.i.us.us.4: ; preds = %for.body.i.i.us.us.4, %if.then.i.i.us.us.4 - %indvars.iv.next.i.i3.us.us.4 = phi i64 [ %indvars.iv.next.i.i.us.us.4, %for.body.i.i.us.us.4 ], [ 0, %if.then.i.i.us.us.4 ] - %459 = phi float [ %464, %for.body.i.i.us.us.4 ], [ %mul6.i.i.us.us.4, %if.then.i.i.us.us.4 ] - %460 = add nsw i64 %indvars.iv.next.i.i3.us.us.4, %456 - %arrayidx12.i.i.us.us.4 = getelementptr inbounds float, float* %7, i64 %460 - %461 = load float, float* %arrayidx12.i.i.us.us.4, align 4, !tbaa !12 - %mul13.i.i.us.us.4 = fmul float %14, %461 - %462 = add nsw i64 %indvars.iv.next.i.i3.us.us.4, %458 - %arrayidx17.i.i.us.us.4 = getelementptr inbounds float, float* %7, i64 %462 - %463 = load float, float* %arrayidx17.i.i.us.us.4, align 4, !tbaa !12 - %464 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.4, float %463, float %459) #2 - store float %464, float* %arrayidx.i.i.us.us.4, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.4 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.4, 1 - %exitcond.not.i.i.us.us.4 = icmp eq i64 %indvars.iv.next.i.i.us.us.4, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.4, label %if.end.i.i.us.us.4.loopexit, label %for.body.i.i.us.us.4, !llvm.loop !21 - -if.end.i.i.us.us.4.loopexit: ; preds = %for.body.i.i.us.us.4 - br label %if.end.i.i.us.us.4 - -if.end.i.i.us.us.4: ; preds = %if.end.i.i.us.us.4.loopexit, %pregion_for_entry.entry.i.i.us.us.4 - %465 = add nuw nsw i64 %_local_id_x.i.0.us.us.4, 1 - %exitcond.not.4 = icmp eq i64 %465, 32 - br i1 %exitcond.not.4, label %pregion_for_end.i.i.us.4.loopexit, label %pregion_for_entry.entry.i.i.us.us.4, !llvm.loop !19 - -pregion_for_end.i.i.us.4.loopexit: ; preds = %if.end.i.i.us.us.4 - br label %pregion_for_end.i.i.us.4 - -pregion_for_end.i.i.us.4: ; preds = %pregion_for_end.i.i.us.4.loopexit, %pregion_for_end.i.i.us.3 - %466 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.5 = or i32 %466, 5 - %cmp.i.i.us.5 = icmp sgt i32 %26, %conv2.i.i.us.5 - %mul.i.i.us.5 = mul nsw i32 %26, %conv2.i.i.us.5 - %mul9.i.i.us.5 = mul nsw i32 %22, %conv2.i.i.us.5 - %467 = sext i32 %mul9.i.i.us.5 to i64 - br i1 %cmp.i.i.us.5, label %pregion_for_entry.entry.i.i.us.us.5.preheader, label %pregion_for_end.i.i.us.5 - -pregion_for_entry.entry.i.i.us.us.5.preheader: ; preds = %pregion_for_end.i.i.us.4 - br label %pregion_for_entry.entry.i.i.us.us.5 - -pregion_for_entry.entry.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5, %pregion_for_entry.entry.i.i.us.us.5.preheader - %_local_id_x.i.0.us.us.5 = phi i64 [ %476, %if.end.i.i.us.us.5 ], [ 0, %pregion_for_entry.entry.i.i.us.us.5.preheader ] - %add1.i.i.i.us.us.5 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, %mul.i.i.i - %conv.i.i.us.us.5 = trunc i64 %add1.i.i.i.us.us.5 to i32 - %cmp4.i.i.us.us.5 = icmp sgt i32 %26, %conv.i.i.us.us.5 - br i1 %cmp4.i.i.us.us.5, label %if.then.i.i.us.us.5, label %if.end.i.i.us.us.5 - -if.then.i.i.us.us.5: ; preds = %pregion_for_entry.entry.i.i.us.us.5 - %add.i.i.us.us.5 = add nsw i32 %mul.i.i.us.5, %conv.i.i.us.us.5 - %idxprom.i.i.us.us.5 = sext i32 %add.i.i.us.us.5 to i64 - %arrayidx.i.i.us.us.5 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.us.5 - %468 = load float, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12 - %mul6.i.i.us.us.5 = fmul float %18, %468 - store float %mul6.i.i.us.us.5, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %mul14.i.i.us.us.5 = mul nsw i32 %22, %conv.i.i.us.us.5 - %469 = sext i32 %mul14.i.i.us.us.5 to i64 - br label %for.body.i.i.us.us.5 - -for.body.i.i.us.us.5: ; preds = %for.body.i.i.us.us.5, %if.then.i.i.us.us.5 - %indvars.iv.next.i.i3.us.us.5 = phi i64 [ %indvars.iv.next.i.i.us.us.5, %for.body.i.i.us.us.5 ], [ 0, %if.then.i.i.us.us.5 ] - %470 = phi float [ %475, %for.body.i.i.us.us.5 ], [ %mul6.i.i.us.us.5, %if.then.i.i.us.us.5 ] - %471 = add nsw i64 %indvars.iv.next.i.i3.us.us.5, %467 - %arrayidx12.i.i.us.us.5 = getelementptr inbounds float, float* %7, i64 %471 - %472 = load float, float* %arrayidx12.i.i.us.us.5, align 4, !tbaa !12 - %mul13.i.i.us.us.5 = fmul float %14, %472 - %473 = add nsw i64 %indvars.iv.next.i.i3.us.us.5, %469 - %arrayidx17.i.i.us.us.5 = getelementptr inbounds float, float* %7, i64 %473 - %474 = load float, float* %arrayidx17.i.i.us.us.5, align 4, !tbaa !12 - %475 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.5, float %474, float %470) #2 - store float %475, float* %arrayidx.i.i.us.us.5, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.5 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.5, 1 - %exitcond.not.i.i.us.us.5 = icmp eq i64 %indvars.iv.next.i.i.us.us.5, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.5, label %if.end.i.i.us.us.5.loopexit, label %for.body.i.i.us.us.5, !llvm.loop !21 - -if.end.i.i.us.us.5.loopexit: ; preds = %for.body.i.i.us.us.5 - br label %if.end.i.i.us.us.5 - -if.end.i.i.us.us.5: ; preds = %if.end.i.i.us.us.5.loopexit, %pregion_for_entry.entry.i.i.us.us.5 - %476 = add nuw nsw i64 %_local_id_x.i.0.us.us.5, 1 - %exitcond.not.5 = icmp eq i64 %476, 32 - br i1 %exitcond.not.5, label %pregion_for_end.i.i.us.5.loopexit, label %pregion_for_entry.entry.i.i.us.us.5, !llvm.loop !19 - -pregion_for_end.i.i.us.5.loopexit: ; preds = %if.end.i.i.us.us.5 - br label %pregion_for_end.i.i.us.5 - -pregion_for_end.i.i.us.5: ; preds = %pregion_for_end.i.i.us.5.loopexit, %pregion_for_end.i.i.us.4 - %477 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.6 = or i32 %477, 6 - %cmp.i.i.us.6 = icmp sgt i32 %26, %conv2.i.i.us.6 - %mul.i.i.us.6 = mul nsw i32 %26, %conv2.i.i.us.6 - %mul9.i.i.us.6 = mul nsw i32 %22, %conv2.i.i.us.6 - %478 = sext i32 %mul9.i.i.us.6 to i64 - br i1 %cmp.i.i.us.6, label %pregion_for_entry.entry.i.i.us.us.6.preheader, label %pregion_for_end.i.i.us.6 - -pregion_for_entry.entry.i.i.us.us.6.preheader: ; preds = %pregion_for_end.i.i.us.5 - br label %pregion_for_entry.entry.i.i.us.us.6 - -pregion_for_entry.entry.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6, %pregion_for_entry.entry.i.i.us.us.6.preheader - %_local_id_x.i.0.us.us.6 = phi i64 [ %487, %if.end.i.i.us.us.6 ], [ 0, %pregion_for_entry.entry.i.i.us.us.6.preheader ] - %add1.i.i.i.us.us.6 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, %mul.i.i.i - %conv.i.i.us.us.6 = trunc i64 %add1.i.i.i.us.us.6 to i32 - %cmp4.i.i.us.us.6 = icmp sgt i32 %26, %conv.i.i.us.us.6 - br i1 %cmp4.i.i.us.us.6, label %if.then.i.i.us.us.6, label %if.end.i.i.us.us.6 - -if.then.i.i.us.us.6: ; preds = %pregion_for_entry.entry.i.i.us.us.6 - %add.i.i.us.us.6 = add nsw i32 %mul.i.i.us.6, %conv.i.i.us.us.6 - %idxprom.i.i.us.us.6 = sext i32 %add.i.i.us.us.6 to i64 - %arrayidx.i.i.us.us.6 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.us.6 - %479 = load float, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12 - %mul6.i.i.us.us.6 = fmul float %18, %479 - store float %mul6.i.i.us.us.6, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %mul14.i.i.us.us.6 = mul nsw i32 %22, %conv.i.i.us.us.6 - %480 = sext i32 %mul14.i.i.us.us.6 to i64 - br label %for.body.i.i.us.us.6 - -for.body.i.i.us.us.6: ; preds = %for.body.i.i.us.us.6, %if.then.i.i.us.us.6 - %indvars.iv.next.i.i3.us.us.6 = phi i64 [ %indvars.iv.next.i.i.us.us.6, %for.body.i.i.us.us.6 ], [ 0, %if.then.i.i.us.us.6 ] - %481 = phi float [ %486, %for.body.i.i.us.us.6 ], [ %mul6.i.i.us.us.6, %if.then.i.i.us.us.6 ] - %482 = add nsw i64 %indvars.iv.next.i.i3.us.us.6, %478 - %arrayidx12.i.i.us.us.6 = getelementptr inbounds float, float* %7, i64 %482 - %483 = load float, float* %arrayidx12.i.i.us.us.6, align 4, !tbaa !12 - %mul13.i.i.us.us.6 = fmul float %14, %483 - %484 = add nsw i64 %indvars.iv.next.i.i3.us.us.6, %480 - %arrayidx17.i.i.us.us.6 = getelementptr inbounds float, float* %7, i64 %484 - %485 = load float, float* %arrayidx17.i.i.us.us.6, align 4, !tbaa !12 - %486 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.6, float %485, float %481) #2 - store float %486, float* %arrayidx.i.i.us.us.6, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.6 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.6, 1 - %exitcond.not.i.i.us.us.6 = icmp eq i64 %indvars.iv.next.i.i.us.us.6, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.6, label %if.end.i.i.us.us.6.loopexit, label %for.body.i.i.us.us.6, !llvm.loop !21 - -if.end.i.i.us.us.6.loopexit: ; preds = %for.body.i.i.us.us.6 - br label %if.end.i.i.us.us.6 - -if.end.i.i.us.us.6: ; preds = %if.end.i.i.us.us.6.loopexit, %pregion_for_entry.entry.i.i.us.us.6 - %487 = add nuw nsw i64 %_local_id_x.i.0.us.us.6, 1 - %exitcond.not.6 = icmp eq i64 %487, 32 - br i1 %exitcond.not.6, label %pregion_for_end.i.i.us.6.loopexit, label %pregion_for_entry.entry.i.i.us.us.6, !llvm.loop !19 - -pregion_for_end.i.i.us.6.loopexit: ; preds = %if.end.i.i.us.us.6 - br label %pregion_for_end.i.i.us.6 - -pregion_for_end.i.i.us.6: ; preds = %pregion_for_end.i.i.us.6.loopexit, %pregion_for_end.i.i.us.5 - %488 = trunc i64 %mul3.i.i.i to i32 - %conv2.i.i.us.7 = or i32 %488, 7 - %cmp.i.i.us.7 = icmp sgt i32 %26, %conv2.i.i.us.7 - %mul.i.i.us.7 = mul nsw i32 %26, %conv2.i.i.us.7 - %mul9.i.i.us.7 = mul nsw i32 %22, %conv2.i.i.us.7 - %489 = sext i32 %mul9.i.i.us.7 to i64 - br i1 %cmp.i.i.us.7, label %pregion_for_entry.entry.i.i.us.us.7.preheader, label %_pocl_kernel_syrk_kernel.exit - -pregion_for_entry.entry.i.i.us.us.7.preheader: ; preds = %pregion_for_end.i.i.us.6 - br label %pregion_for_entry.entry.i.i.us.us.7 - -pregion_for_entry.entry.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7, %pregion_for_entry.entry.i.i.us.us.7.preheader - %_local_id_x.i.0.us.us.7 = phi i64 [ %498, %if.end.i.i.us.us.7 ], [ 0, %pregion_for_entry.entry.i.i.us.us.7.preheader ] - %add1.i.i.i.us.us.7 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, %mul.i.i.i - %conv.i.i.us.us.7 = trunc i64 %add1.i.i.i.us.us.7 to i32 - %cmp4.i.i.us.us.7 = icmp sgt i32 %26, %conv.i.i.us.us.7 - br i1 %cmp4.i.i.us.us.7, label %if.then.i.i.us.us.7, label %if.end.i.i.us.us.7 - -if.then.i.i.us.us.7: ; preds = %pregion_for_entry.entry.i.i.us.us.7 - %add.i.i.us.us.7 = add nsw i32 %mul.i.i.us.7, %conv.i.i.us.us.7 - %idxprom.i.i.us.us.7 = sext i32 %add.i.i.us.us.7 to i64 - %arrayidx.i.i.us.us.7 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.us.7 - %490 = load float, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12 - %mul6.i.i.us.us.7 = fmul float %18, %490 - store float %mul6.i.i.us.us.7, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %mul14.i.i.us.us.7 = mul nsw i32 %22, %conv.i.i.us.us.7 - %491 = sext i32 %mul14.i.i.us.us.7 to i64 - br label %for.body.i.i.us.us.7 - -for.body.i.i.us.us.7: ; preds = %for.body.i.i.us.us.7, %if.then.i.i.us.us.7 - %indvars.iv.next.i.i3.us.us.7 = phi i64 [ %indvars.iv.next.i.i.us.us.7, %for.body.i.i.us.us.7 ], [ 0, %if.then.i.i.us.us.7 ] - %492 = phi float [ %497, %for.body.i.i.us.us.7 ], [ %mul6.i.i.us.us.7, %if.then.i.i.us.us.7 ] - %493 = add nsw i64 %indvars.iv.next.i.i3.us.us.7, %489 - %arrayidx12.i.i.us.us.7 = getelementptr inbounds float, float* %7, i64 %493 - %494 = load float, float* %arrayidx12.i.i.us.us.7, align 4, !tbaa !12 - %mul13.i.i.us.us.7 = fmul float %14, %494 - %495 = add nsw i64 %indvars.iv.next.i.i3.us.us.7, %491 - %arrayidx17.i.i.us.us.7 = getelementptr inbounds float, float* %7, i64 %495 - %496 = load float, float* %arrayidx17.i.i.us.us.7, align 4, !tbaa !12 - %497 = tail call float @llvm.fmuladd.f32(float %mul13.i.i.us.us.7, float %496, float %492) #2 - store float %497, float* %arrayidx.i.i.us.us.7, align 4, !tbaa !12, !llvm.access.group !16 - %indvars.iv.next.i.i.us.us.7 = add nuw nsw i64 %indvars.iv.next.i.i3.us.us.7, 1 - %exitcond.not.i.i.us.us.7 = icmp eq i64 %indvars.iv.next.i.i.us.us.7, %wide.trip.count.i.i - br i1 %exitcond.not.i.i.us.us.7, label %if.end.i.i.us.us.7.loopexit, label %for.body.i.i.us.us.7, !llvm.loop !21 - -if.end.i.i.us.us.7.loopexit: ; preds = %for.body.i.i.us.us.7 - br label %if.end.i.i.us.us.7 - -if.end.i.i.us.us.7: ; preds = %if.end.i.i.us.us.7.loopexit, %pregion_for_entry.entry.i.i.us.us.7 - %498 = add nuw nsw i64 %_local_id_x.i.0.us.us.7, 1 - %exitcond.not.7 = icmp eq i64 %498, 32 - br i1 %exitcond.not.7, label %_pocl_kernel_syrk_kernel.exit.loopexit, label %pregion_for_entry.entry.i.i.us.us.7, !llvm.loop !19 - -if.then.i.i.us.7.1: ; preds = %if.end.i.i.us.7 - %add.i.i.us.7.1 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7.1 - %idxprom.i.i.us.7.1 = sext i32 %add.i.i.us.7.1 to i64 - %arrayidx.i.i.us.7.1 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.7.1 - %499 = load float, float* %arrayidx.i.i.us.7.1, align 4, !tbaa !12 - %mul6.i.i.us.7.1 = fmul float %18, %499 - store float %mul6.i.i.us.7.1, float* %arrayidx.i.i.us.7.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7.1 - -if.end.i.i.us.7.1: ; preds = %if.then.i.i.us.7.1, %if.end.i.i.us.7 - %500 = or i64 %_local_id_x.i.0.us.7, 2 - %add1.i.i.i.us.7.2 = add nuw nsw i64 %500, %mul.i.i.i - %conv.i.i.us.7.2 = trunc i64 %add1.i.i.i.us.7.2 to i32 - %cmp4.i.i.us.7.2 = icmp sgt i32 %26, %conv.i.i.us.7.2 - br i1 %cmp4.i.i.us.7.2, label %if.then.i.i.us.7.2, label %if.end.i.i.us.7.2 - -if.then.i.i.us.7.2: ; preds = %if.end.i.i.us.7.1 - %add.i.i.us.7.2 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7.2 - %idxprom.i.i.us.7.2 = sext i32 %add.i.i.us.7.2 to i64 - %arrayidx.i.i.us.7.2 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.7.2 - %501 = load float, float* %arrayidx.i.i.us.7.2, align 4, !tbaa !12 - %mul6.i.i.us.7.2 = fmul float %18, %501 - store float %mul6.i.i.us.7.2, float* %arrayidx.i.i.us.7.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7.2 - -if.end.i.i.us.7.2: ; preds = %if.then.i.i.us.7.2, %if.end.i.i.us.7.1 - %502 = or i64 %_local_id_x.i.0.us.7, 3 - %add1.i.i.i.us.7.3 = add nuw nsw i64 %502, %mul.i.i.i - %conv.i.i.us.7.3 = trunc i64 %add1.i.i.i.us.7.3 to i32 - %cmp4.i.i.us.7.3 = icmp sgt i32 %26, %conv.i.i.us.7.3 - br i1 %cmp4.i.i.us.7.3, label %if.then.i.i.us.7.3, label %if.end.i.i.us.7.3 - -if.then.i.i.us.7.3: ; preds = %if.end.i.i.us.7.2 - %add.i.i.us.7.3 = add nsw i32 %mul.i.i.7, %conv.i.i.us.7.3 - %idxprom.i.i.us.7.3 = sext i32 %add.i.i.us.7.3 to i64 - %arrayidx.i.i.us.7.3 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.7.3 - %503 = load float, float* %arrayidx.i.i.us.7.3, align 4, !tbaa !12 - %mul6.i.i.us.7.3 = fmul float %18, %503 - store float %mul6.i.i.us.7.3, float* %arrayidx.i.i.us.7.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.7.3 - -if.end.i.i.us.7.3: ; preds = %if.then.i.i.us.7.3, %if.end.i.i.us.7.2 - %504 = add nuw nsw i64 %_local_id_x.i.0.us.7, 4 - %exitcond34.7.not.3 = icmp eq i64 %504, 32 - br i1 %exitcond34.7.not.3, label %_pocl_kernel_syrk_kernel.exit.loopexit238, label %pregion_for_entry.entry.i.i.us.7, !llvm.loop !40 - -if.then.i.i.us.6.1: ; preds = %if.end.i.i.us.6 - %add.i.i.us.6.1 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6.1 - %idxprom.i.i.us.6.1 = sext i32 %add.i.i.us.6.1 to i64 - %arrayidx.i.i.us.6.1 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.6.1 - %505 = load float, float* %arrayidx.i.i.us.6.1, align 4, !tbaa !12 - %mul6.i.i.us.6.1 = fmul float %18, %505 - store float %mul6.i.i.us.6.1, float* %arrayidx.i.i.us.6.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6.1 - -if.end.i.i.us.6.1: ; preds = %if.then.i.i.us.6.1, %if.end.i.i.us.6 - %506 = or i64 %_local_id_x.i.0.us.6, 2 - %add1.i.i.i.us.6.2 = add nuw nsw i64 %506, %mul.i.i.i - %conv.i.i.us.6.2 = trunc i64 %add1.i.i.i.us.6.2 to i32 - %cmp4.i.i.us.6.2 = icmp sgt i32 %26, %conv.i.i.us.6.2 - br i1 %cmp4.i.i.us.6.2, label %if.then.i.i.us.6.2, label %if.end.i.i.us.6.2 - -if.then.i.i.us.6.2: ; preds = %if.end.i.i.us.6.1 - %add.i.i.us.6.2 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6.2 - %idxprom.i.i.us.6.2 = sext i32 %add.i.i.us.6.2 to i64 - %arrayidx.i.i.us.6.2 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.6.2 - %507 = load float, float* %arrayidx.i.i.us.6.2, align 4, !tbaa !12 - %mul6.i.i.us.6.2 = fmul float %18, %507 - store float %mul6.i.i.us.6.2, float* %arrayidx.i.i.us.6.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6.2 - -if.end.i.i.us.6.2: ; preds = %if.then.i.i.us.6.2, %if.end.i.i.us.6.1 - %508 = or i64 %_local_id_x.i.0.us.6, 3 - %add1.i.i.i.us.6.3 = add nuw nsw i64 %508, %mul.i.i.i - %conv.i.i.us.6.3 = trunc i64 %add1.i.i.i.us.6.3 to i32 - %cmp4.i.i.us.6.3 = icmp sgt i32 %26, %conv.i.i.us.6.3 - br i1 %cmp4.i.i.us.6.3, label %if.then.i.i.us.6.3, label %if.end.i.i.us.6.3 - -if.then.i.i.us.6.3: ; preds = %if.end.i.i.us.6.2 - %add.i.i.us.6.3 = add nsw i32 %mul.i.i.6, %conv.i.i.us.6.3 - %idxprom.i.i.us.6.3 = sext i32 %add.i.i.us.6.3 to i64 - %arrayidx.i.i.us.6.3 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.6.3 - %509 = load float, float* %arrayidx.i.i.us.6.3, align 4, !tbaa !12 - %mul6.i.i.us.6.3 = fmul float %18, %509 - store float %mul6.i.i.us.6.3, float* %arrayidx.i.i.us.6.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.6.3 - -if.end.i.i.us.6.3: ; preds = %if.then.i.i.us.6.3, %if.end.i.i.us.6.2 - %510 = add nuw nsw i64 %_local_id_x.i.0.us.6, 4 - %exitcond34.6.not.3 = icmp eq i64 %510, 32 - br i1 %exitcond34.6.not.3, label %pregion_for_end.i.i.6.loopexit, label %pregion_for_entry.entry.i.i.us.6, !llvm.loop !41 - -if.then.i.i.us.5.1: ; preds = %if.end.i.i.us.5 - %add.i.i.us.5.1 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5.1 - %idxprom.i.i.us.5.1 = sext i32 %add.i.i.us.5.1 to i64 - %arrayidx.i.i.us.5.1 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.5.1 - %511 = load float, float* %arrayidx.i.i.us.5.1, align 4, !tbaa !12 - %mul6.i.i.us.5.1 = fmul float %18, %511 - store float %mul6.i.i.us.5.1, float* %arrayidx.i.i.us.5.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5.1 - -if.end.i.i.us.5.1: ; preds = %if.then.i.i.us.5.1, %if.end.i.i.us.5 - %512 = or i64 %_local_id_x.i.0.us.5, 2 - %add1.i.i.i.us.5.2 = add nuw nsw i64 %512, %mul.i.i.i - %conv.i.i.us.5.2 = trunc i64 %add1.i.i.i.us.5.2 to i32 - %cmp4.i.i.us.5.2 = icmp sgt i32 %26, %conv.i.i.us.5.2 - br i1 %cmp4.i.i.us.5.2, label %if.then.i.i.us.5.2, label %if.end.i.i.us.5.2 - -if.then.i.i.us.5.2: ; preds = %if.end.i.i.us.5.1 - %add.i.i.us.5.2 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5.2 - %idxprom.i.i.us.5.2 = sext i32 %add.i.i.us.5.2 to i64 - %arrayidx.i.i.us.5.2 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.5.2 - %513 = load float, float* %arrayidx.i.i.us.5.2, align 4, !tbaa !12 - %mul6.i.i.us.5.2 = fmul float %18, %513 - store float %mul6.i.i.us.5.2, float* %arrayidx.i.i.us.5.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5.2 - -if.end.i.i.us.5.2: ; preds = %if.then.i.i.us.5.2, %if.end.i.i.us.5.1 - %514 = or i64 %_local_id_x.i.0.us.5, 3 - %add1.i.i.i.us.5.3 = add nuw nsw i64 %514, %mul.i.i.i - %conv.i.i.us.5.3 = trunc i64 %add1.i.i.i.us.5.3 to i32 - %cmp4.i.i.us.5.3 = icmp sgt i32 %26, %conv.i.i.us.5.3 - br i1 %cmp4.i.i.us.5.3, label %if.then.i.i.us.5.3, label %if.end.i.i.us.5.3 - -if.then.i.i.us.5.3: ; preds = %if.end.i.i.us.5.2 - %add.i.i.us.5.3 = add nsw i32 %mul.i.i.5, %conv.i.i.us.5.3 - %idxprom.i.i.us.5.3 = sext i32 %add.i.i.us.5.3 to i64 - %arrayidx.i.i.us.5.3 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.5.3 - %515 = load float, float* %arrayidx.i.i.us.5.3, align 4, !tbaa !12 - %mul6.i.i.us.5.3 = fmul float %18, %515 - store float %mul6.i.i.us.5.3, float* %arrayidx.i.i.us.5.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.5.3 - -if.end.i.i.us.5.3: ; preds = %if.then.i.i.us.5.3, %if.end.i.i.us.5.2 - %516 = add nuw nsw i64 %_local_id_x.i.0.us.5, 4 - %exitcond34.5.not.3 = icmp eq i64 %516, 32 - br i1 %exitcond34.5.not.3, label %pregion_for_end.i.i.5.loopexit, label %pregion_for_entry.entry.i.i.us.5, !llvm.loop !42 - -if.then.i.i.us.4.1: ; preds = %if.end.i.i.us.4 - %add.i.i.us.4.1 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4.1 - %idxprom.i.i.us.4.1 = sext i32 %add.i.i.us.4.1 to i64 - %arrayidx.i.i.us.4.1 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.4.1 - %517 = load float, float* %arrayidx.i.i.us.4.1, align 4, !tbaa !12 - %mul6.i.i.us.4.1 = fmul float %18, %517 - store float %mul6.i.i.us.4.1, float* %arrayidx.i.i.us.4.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4.1 - -if.end.i.i.us.4.1: ; preds = %if.then.i.i.us.4.1, %if.end.i.i.us.4 - %518 = or i64 %_local_id_x.i.0.us.4, 2 - %add1.i.i.i.us.4.2 = add nuw nsw i64 %518, %mul.i.i.i - %conv.i.i.us.4.2 = trunc i64 %add1.i.i.i.us.4.2 to i32 - %cmp4.i.i.us.4.2 = icmp sgt i32 %26, %conv.i.i.us.4.2 - br i1 %cmp4.i.i.us.4.2, label %if.then.i.i.us.4.2, label %if.end.i.i.us.4.2 - -if.then.i.i.us.4.2: ; preds = %if.end.i.i.us.4.1 - %add.i.i.us.4.2 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4.2 - %idxprom.i.i.us.4.2 = sext i32 %add.i.i.us.4.2 to i64 - %arrayidx.i.i.us.4.2 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.4.2 - %519 = load float, float* %arrayidx.i.i.us.4.2, align 4, !tbaa !12 - %mul6.i.i.us.4.2 = fmul float %18, %519 - store float %mul6.i.i.us.4.2, float* %arrayidx.i.i.us.4.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4.2 - -if.end.i.i.us.4.2: ; preds = %if.then.i.i.us.4.2, %if.end.i.i.us.4.1 - %520 = or i64 %_local_id_x.i.0.us.4, 3 - %add1.i.i.i.us.4.3 = add nuw nsw i64 %520, %mul.i.i.i - %conv.i.i.us.4.3 = trunc i64 %add1.i.i.i.us.4.3 to i32 - %cmp4.i.i.us.4.3 = icmp sgt i32 %26, %conv.i.i.us.4.3 - br i1 %cmp4.i.i.us.4.3, label %if.then.i.i.us.4.3, label %if.end.i.i.us.4.3 - -if.then.i.i.us.4.3: ; preds = %if.end.i.i.us.4.2 - %add.i.i.us.4.3 = add nsw i32 %mul.i.i.4, %conv.i.i.us.4.3 - %idxprom.i.i.us.4.3 = sext i32 %add.i.i.us.4.3 to i64 - %arrayidx.i.i.us.4.3 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.4.3 - %521 = load float, float* %arrayidx.i.i.us.4.3, align 4, !tbaa !12 - %mul6.i.i.us.4.3 = fmul float %18, %521 - store float %mul6.i.i.us.4.3, float* %arrayidx.i.i.us.4.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.4.3 - -if.end.i.i.us.4.3: ; preds = %if.then.i.i.us.4.3, %if.end.i.i.us.4.2 - %522 = add nuw nsw i64 %_local_id_x.i.0.us.4, 4 - %exitcond34.4.not.3 = icmp eq i64 %522, 32 - br i1 %exitcond34.4.not.3, label %pregion_for_end.i.i.4.loopexit, label %pregion_for_entry.entry.i.i.us.4, !llvm.loop !43 - -if.then.i.i.us.3.1: ; preds = %if.end.i.i.us.3 - %add.i.i.us.3.1 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3.1 - %idxprom.i.i.us.3.1 = sext i32 %add.i.i.us.3.1 to i64 - %arrayidx.i.i.us.3.1 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.3.1 - %523 = load float, float* %arrayidx.i.i.us.3.1, align 4, !tbaa !12 - %mul6.i.i.us.3.1 = fmul float %18, %523 - store float %mul6.i.i.us.3.1, float* %arrayidx.i.i.us.3.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3.1 - -if.end.i.i.us.3.1: ; preds = %if.then.i.i.us.3.1, %if.end.i.i.us.3 - %524 = or i64 %_local_id_x.i.0.us.3, 2 - %add1.i.i.i.us.3.2 = add nuw nsw i64 %524, %mul.i.i.i - %conv.i.i.us.3.2 = trunc i64 %add1.i.i.i.us.3.2 to i32 - %cmp4.i.i.us.3.2 = icmp sgt i32 %26, %conv.i.i.us.3.2 - br i1 %cmp4.i.i.us.3.2, label %if.then.i.i.us.3.2, label %if.end.i.i.us.3.2 - -if.then.i.i.us.3.2: ; preds = %if.end.i.i.us.3.1 - %add.i.i.us.3.2 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3.2 - %idxprom.i.i.us.3.2 = sext i32 %add.i.i.us.3.2 to i64 - %arrayidx.i.i.us.3.2 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.3.2 - %525 = load float, float* %arrayidx.i.i.us.3.2, align 4, !tbaa !12 - %mul6.i.i.us.3.2 = fmul float %18, %525 - store float %mul6.i.i.us.3.2, float* %arrayidx.i.i.us.3.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3.2 - -if.end.i.i.us.3.2: ; preds = %if.then.i.i.us.3.2, %if.end.i.i.us.3.1 - %526 = or i64 %_local_id_x.i.0.us.3, 3 - %add1.i.i.i.us.3.3 = add nuw nsw i64 %526, %mul.i.i.i - %conv.i.i.us.3.3 = trunc i64 %add1.i.i.i.us.3.3 to i32 - %cmp4.i.i.us.3.3 = icmp sgt i32 %26, %conv.i.i.us.3.3 - br i1 %cmp4.i.i.us.3.3, label %if.then.i.i.us.3.3, label %if.end.i.i.us.3.3 - -if.then.i.i.us.3.3: ; preds = %if.end.i.i.us.3.2 - %add.i.i.us.3.3 = add nsw i32 %mul.i.i.3, %conv.i.i.us.3.3 - %idxprom.i.i.us.3.3 = sext i32 %add.i.i.us.3.3 to i64 - %arrayidx.i.i.us.3.3 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.3.3 - %527 = load float, float* %arrayidx.i.i.us.3.3, align 4, !tbaa !12 - %mul6.i.i.us.3.3 = fmul float %18, %527 - store float %mul6.i.i.us.3.3, float* %arrayidx.i.i.us.3.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3.3 - -if.end.i.i.us.3.3: ; preds = %if.then.i.i.us.3.3, %if.end.i.i.us.3.2 - %528 = add nuw nsw i64 %_local_id_x.i.0.us.3, 4 - %exitcond34.3.not.3 = icmp eq i64 %528, 32 - br i1 %exitcond34.3.not.3, label %pregion_for_end.i.i.3.loopexit, label %pregion_for_entry.entry.i.i.us.3, !llvm.loop !44 - -if.then.i.i.us.2.1: ; preds = %if.end.i.i.us.2 - %add.i.i.us.2.1 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2.1 - %idxprom.i.i.us.2.1 = sext i32 %add.i.i.us.2.1 to i64 - %arrayidx.i.i.us.2.1 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.2.1 - %529 = load float, float* %arrayidx.i.i.us.2.1, align 4, !tbaa !12 - %mul6.i.i.us.2.1 = fmul float %18, %529 - store float %mul6.i.i.us.2.1, float* %arrayidx.i.i.us.2.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2.1 - -if.end.i.i.us.2.1: ; preds = %if.then.i.i.us.2.1, %if.end.i.i.us.2 - %530 = or i64 %_local_id_x.i.0.us.2, 2 - %add1.i.i.i.us.2.2 = add nuw nsw i64 %530, %mul.i.i.i - %conv.i.i.us.2.2 = trunc i64 %add1.i.i.i.us.2.2 to i32 - %cmp4.i.i.us.2.2 = icmp sgt i32 %26, %conv.i.i.us.2.2 - br i1 %cmp4.i.i.us.2.2, label %if.then.i.i.us.2.2, label %if.end.i.i.us.2.2 - -if.then.i.i.us.2.2: ; preds = %if.end.i.i.us.2.1 - %add.i.i.us.2.2 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2.2 - %idxprom.i.i.us.2.2 = sext i32 %add.i.i.us.2.2 to i64 - %arrayidx.i.i.us.2.2 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.2.2 - %531 = load float, float* %arrayidx.i.i.us.2.2, align 4, !tbaa !12 - %mul6.i.i.us.2.2 = fmul float %18, %531 - store float %mul6.i.i.us.2.2, float* %arrayidx.i.i.us.2.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2.2 - -if.end.i.i.us.2.2: ; preds = %if.then.i.i.us.2.2, %if.end.i.i.us.2.1 - %532 = or i64 %_local_id_x.i.0.us.2, 3 - %add1.i.i.i.us.2.3 = add nuw nsw i64 %532, %mul.i.i.i - %conv.i.i.us.2.3 = trunc i64 %add1.i.i.i.us.2.3 to i32 - %cmp4.i.i.us.2.3 = icmp sgt i32 %26, %conv.i.i.us.2.3 - br i1 %cmp4.i.i.us.2.3, label %if.then.i.i.us.2.3, label %if.end.i.i.us.2.3 - -if.then.i.i.us.2.3: ; preds = %if.end.i.i.us.2.2 - %add.i.i.us.2.3 = add nsw i32 %mul.i.i.2, %conv.i.i.us.2.3 - %idxprom.i.i.us.2.3 = sext i32 %add.i.i.us.2.3 to i64 - %arrayidx.i.i.us.2.3 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.2.3 - %533 = load float, float* %arrayidx.i.i.us.2.3, align 4, !tbaa !12 - %mul6.i.i.us.2.3 = fmul float %18, %533 - store float %mul6.i.i.us.2.3, float* %arrayidx.i.i.us.2.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2.3 - -if.end.i.i.us.2.3: ; preds = %if.then.i.i.us.2.3, %if.end.i.i.us.2.2 - %534 = add nuw nsw i64 %_local_id_x.i.0.us.2, 4 - %exitcond34.2.not.3 = icmp eq i64 %534, 32 - br i1 %exitcond34.2.not.3, label %pregion_for_end.i.i.2.loopexit, label %pregion_for_entry.entry.i.i.us.2, !llvm.loop !45 - -if.then.i.i.us.1.1: ; preds = %if.end.i.i.us.1 - %add.i.i.us.1.1 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1.1 - %idxprom.i.i.us.1.1 = sext i32 %add.i.i.us.1.1 to i64 - %arrayidx.i.i.us.1.1 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.1.1 - %535 = load float, float* %arrayidx.i.i.us.1.1, align 4, !tbaa !12 - %mul6.i.i.us.1.1 = fmul float %18, %535 - store float %mul6.i.i.us.1.1, float* %arrayidx.i.i.us.1.1, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1.1 - -if.end.i.i.us.1.1: ; preds = %if.then.i.i.us.1.1, %if.end.i.i.us.1 - %536 = or i64 %_local_id_x.i.0.us.1, 2 - %add1.i.i.i.us.1.2 = add nuw nsw i64 %536, %mul.i.i.i - %conv.i.i.us.1.2 = trunc i64 %add1.i.i.i.us.1.2 to i32 - %cmp4.i.i.us.1.2 = icmp sgt i32 %26, %conv.i.i.us.1.2 - br i1 %cmp4.i.i.us.1.2, label %if.then.i.i.us.1.2, label %if.end.i.i.us.1.2 - -if.then.i.i.us.1.2: ; preds = %if.end.i.i.us.1.1 - %add.i.i.us.1.2 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1.2 - %idxprom.i.i.us.1.2 = sext i32 %add.i.i.us.1.2 to i64 - %arrayidx.i.i.us.1.2 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.1.2 - %537 = load float, float* %arrayidx.i.i.us.1.2, align 4, !tbaa !12 - %mul6.i.i.us.1.2 = fmul float %18, %537 - store float %mul6.i.i.us.1.2, float* %arrayidx.i.i.us.1.2, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1.2 - -if.end.i.i.us.1.2: ; preds = %if.then.i.i.us.1.2, %if.end.i.i.us.1.1 - %538 = or i64 %_local_id_x.i.0.us.1, 3 - %add1.i.i.i.us.1.3 = add nuw nsw i64 %538, %mul.i.i.i - %conv.i.i.us.1.3 = trunc i64 %add1.i.i.i.us.1.3 to i32 - %cmp4.i.i.us.1.3 = icmp sgt i32 %26, %conv.i.i.us.1.3 - br i1 %cmp4.i.i.us.1.3, label %if.then.i.i.us.1.3, label %if.end.i.i.us.1.3 - -if.then.i.i.us.1.3: ; preds = %if.end.i.i.us.1.2 - %add.i.i.us.1.3 = add nsw i32 %mul.i.i.1, %conv.i.i.us.1.3 - %idxprom.i.i.us.1.3 = sext i32 %add.i.i.us.1.3 to i64 - %arrayidx.i.i.us.1.3 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.1.3 - %539 = load float, float* %arrayidx.i.i.us.1.3, align 4, !tbaa !12 - %mul6.i.i.us.1.3 = fmul float %18, %539 - store float %mul6.i.i.us.1.3, float* %arrayidx.i.i.us.1.3, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1.3 - -if.end.i.i.us.1.3: ; preds = %if.then.i.i.us.1.3, %if.end.i.i.us.1.2 - %540 = add nuw nsw i64 %_local_id_x.i.0.us.1, 4 - %exitcond34.1.not.3 = icmp eq i64 %540, 32 - br i1 %exitcond34.1.not.3, label %pregion_for_end.i.i.1.loopexit, label %pregion_for_entry.entry.i.i.us.1, !llvm.loop !46 - -if.then.i.i.us.1214: ; preds = %if.end.i.i.us - %add.i.i.us.1210 = add nsw i32 %mul.i.i.us, %conv.i.i.us.1207 - %idxprom.i.i.us.1211 = sext i32 %add.i.i.us.1210 to i64 - %arrayidx.i.i.us.1212 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.1211 - %541 = load float, float* %arrayidx.i.i.us.1212, align 4, !tbaa !12 - %mul6.i.i.us.1213 = fmul float %18, %541 - store float %mul6.i.i.us.1213, float* %arrayidx.i.i.us.1212, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.1215 - -if.end.i.i.us.1215: ; preds = %if.then.i.i.us.1214, %if.end.i.i.us - %542 = or i64 %_local_id_x.i.0.us, 2 - %add1.i.i.i.us.2217 = add nuw nsw i64 %542, %mul.i.i.i - %conv.i.i.us.2218 = trunc i64 %add1.i.i.i.us.2217 to i32 - %cmp4.i.i.us.2219 = icmp sgt i32 %26, %conv.i.i.us.2218 - br i1 %cmp4.i.i.us.2219, label %if.then.i.i.us.2225, label %if.end.i.i.us.2226 - -if.then.i.i.us.2225: ; preds = %if.end.i.i.us.1215 - %add.i.i.us.2221 = add nsw i32 %mul.i.i.us, %conv.i.i.us.2218 - %idxprom.i.i.us.2222 = sext i32 %add.i.i.us.2221 to i64 - %arrayidx.i.i.us.2223 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.2222 - %543 = load float, float* %arrayidx.i.i.us.2223, align 4, !tbaa !12 - %mul6.i.i.us.2224 = fmul float %18, %543 - store float %mul6.i.i.us.2224, float* %arrayidx.i.i.us.2223, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.2226 - -if.end.i.i.us.2226: ; preds = %if.then.i.i.us.2225, %if.end.i.i.us.1215 - %544 = or i64 %_local_id_x.i.0.us, 3 - %add1.i.i.i.us.3228 = add nuw nsw i64 %544, %mul.i.i.i - %conv.i.i.us.3229 = trunc i64 %add1.i.i.i.us.3228 to i32 - %cmp4.i.i.us.3230 = icmp sgt i32 %26, %conv.i.i.us.3229 - br i1 %cmp4.i.i.us.3230, label %if.then.i.i.us.3236, label %if.end.i.i.us.3237 - -if.then.i.i.us.3236: ; preds = %if.end.i.i.us.2226 - %add.i.i.us.3232 = add nsw i32 %mul.i.i.us, %conv.i.i.us.3229 - %idxprom.i.i.us.3233 = sext i32 %add.i.i.us.3232 to i64 - %arrayidx.i.i.us.3234 = getelementptr inbounds float, float* %10, i64 %idxprom.i.i.us.3233 - %545 = load float, float* %arrayidx.i.i.us.3234, align 4, !tbaa !12 - %mul6.i.i.us.3235 = fmul float %18, %545 - store float %mul6.i.i.us.3235, float* %arrayidx.i.i.us.3234, align 4, !tbaa !12, !llvm.access.group !16 - br label %if.end.i.i.us.3237 - -if.end.i.i.us.3237: ; preds = %if.then.i.i.us.3236, %if.end.i.i.us.2226 - %546 = add nuw nsw i64 %_local_id_x.i.0.us, 4 - %exitcond34.not.3 = icmp eq i64 %546, 32 - br i1 %exitcond34.not.3, label %pregion_for_end.i.i.loopexit, label %pregion_for_entry.entry.i.i.us, !llvm.loop !47 -} - -; Function Attrs: argmemonly nounwind readonly willreturn -declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32 immarg, <8 x i1>, <8 x float>) #3 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>) #4 - -attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { alwaysinline nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-builtins" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } -attributes #3 = { argmemonly nounwind readonly willreturn } -attributes #4 = { argmemonly nounwind willreturn } - -!llvm.module.flags = !{!0, !1, !2} -!opencl.ocl.version = !{!3} -!llvm.ident = !{!4} -!opencl.spir.version = !{!3} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"PIC Level", i32 2} -!2 = !{i32 7, !"PIE Level", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 91e89f9a5115b0f83b8f026e1ad0e6d1f885fa9b)"} -!5 = !{i32 1, i32 1, i32 0, i32 0, i32 0, i32 0} -!6 = !{!"none", !"none", !"none", !"none", !"none", !"none"} -!7 = !{!"DATA_TYPE*", !"DATA_TYPE*", !"DATA_TYPE", !"DATA_TYPE", !"int", !"int"} -!8 = !{!"float*", !"float*", !"float", !"float", !"int", !"int"} -!9 = !{!"", !"", !"", !"", !"", !""} -!10 = !{!"a", !"c", !"alpha", !"beta", !"ni", !"nj"} -!11 = !{i32 1} -!12 = !{!13, !13, i64 0} -!13 = !{!"float", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !{!17, !18} -!17 = distinct !{} -!18 = distinct !{} -!19 = distinct !{!19, !20} -!20 = !{!"llvm.loop.parallel_accesses", !17} -!21 = distinct !{!21, !22} -!22 = !{!"llvm.loop.unroll.disable"} -!23 = distinct !{!23, !20, !24} -!24 = !{!"llvm.loop.isvectorized", i32 1} -!25 = distinct !{!25, !20, !24} -!26 = distinct !{!26, !20, !24} -!27 = distinct !{!27, !20, !24} -!28 = distinct !{!28, !20, !24} -!29 = distinct !{!29, !20, !24} -!30 = distinct !{!30, !20, !24} -!31 = distinct !{!31, !20, !24} -!32 = distinct !{!32, !20, !24} -!33 = distinct !{!33, !20, !24} -!34 = distinct !{!34, !20, !24} -!35 = distinct !{!35, !20, !24} -!36 = distinct !{!36, !20, !24} -!37 = distinct !{!37, !20, !24} -!38 = distinct !{!38, !20, !24} -!39 = distinct !{!39, !20, !24} -!40 = distinct !{!40, !20, !24} -!41 = distinct !{!41, !20, !24} -!42 = distinct !{!42, !20, !24} -!43 = distinct !{!43, !20, !24} -!44 = distinct !{!44, !20, !24} -!45 = distinct !{!45, !20, !24} -!46 = distinct !{!46, !20, !24} -!47 = distinct !{!47, !20, !24} diff --git a/unroll-cfgs/2DConvolution.pdf b/unroll-cfgs/2DConvolution.pdf deleted file mode 100644 index 13af03e..0000000 Binary files a/unroll-cfgs/2DConvolution.pdf and /dev/null differ diff --git a/unroll-cfgs/2mm_kernel1.pdf b/unroll-cfgs/2mm_kernel1.pdf deleted file mode 100644 index 04b0133..0000000 Binary files a/unroll-cfgs/2mm_kernel1.pdf and /dev/null differ diff --git a/unroll-cfgs/2mm_kernel2.pdf b/unroll-cfgs/2mm_kernel2.pdf deleted file mode 100644 index 46c6d01..0000000 Binary files a/unroll-cfgs/2mm_kernel2.pdf and /dev/null differ diff --git a/unroll-cfgs/3DConvolution.pdf b/unroll-cfgs/3DConvolution.pdf deleted file mode 100644 index 9af819f..0000000 Binary files a/unroll-cfgs/3DConvolution.pdf and /dev/null differ diff --git a/unroll-cfgs/3mm_kernel1.pdf b/unroll-cfgs/3mm_kernel1.pdf deleted file mode 100644 index 2070f29..0000000 Binary files a/unroll-cfgs/3mm_kernel1.pdf and /dev/null differ diff --git a/unroll-cfgs/3mm_kernel2.pdf b/unroll-cfgs/3mm_kernel2.pdf deleted file mode 100644 index 82d9865..0000000 Binary files a/unroll-cfgs/3mm_kernel2.pdf and /dev/null differ diff --git a/unroll-cfgs/3mm_kernel3.pdf b/unroll-cfgs/3mm_kernel3.pdf deleted file mode 100644 index 05d5bb8..0000000 Binary files a/unroll-cfgs/3mm_kernel3.pdf and /dev/null differ diff --git a/unroll-cfgs/adi_kernel1.pdf b/unroll-cfgs/adi_kernel1.pdf deleted file mode 100644 index 1727a8c..0000000 Binary files a/unroll-cfgs/adi_kernel1.pdf and /dev/null differ diff --git a/unroll-cfgs/adi_kernel2.pdf b/unroll-cfgs/adi_kernel2.pdf deleted file mode 100644 index e8fc2ae..0000000 Binary files a/unroll-cfgs/adi_kernel2.pdf and /dev/null differ diff --git a/unroll-cfgs/adi_kernel3.pdf b/unroll-cfgs/adi_kernel3.pdf deleted file mode 100644 index 7d64815..0000000 Binary files a/unroll-cfgs/adi_kernel3.pdf and /dev/null differ diff --git a/unroll-cfgs/adi_kernel4.pdf b/unroll-cfgs/adi_kernel4.pdf deleted file mode 100644 index c231094..0000000 Binary files a/unroll-cfgs/adi_kernel4.pdf and /dev/null differ diff --git a/unroll-cfgs/adi_kernel5.pdf b/unroll-cfgs/adi_kernel5.pdf deleted file mode 100644 index 8c7d98d..0000000 Binary files a/unroll-cfgs/adi_kernel5.pdf and /dev/null differ diff --git a/unroll-cfgs/adi_kernel6.pdf b/unroll-cfgs/adi_kernel6.pdf deleted file mode 100644 index 34d0e3f..0000000 Binary files a/unroll-cfgs/adi_kernel6.pdf and /dev/null differ diff --git a/unroll-cfgs/atax_kernel1.pdf b/unroll-cfgs/atax_kernel1.pdf deleted file mode 100644 index 6672d35..0000000 Binary files a/unroll-cfgs/atax_kernel1.pdf and /dev/null differ diff --git a/unroll-cfgs/atax_kernel2.pdf b/unroll-cfgs/atax_kernel2.pdf deleted file mode 100644 index e9cd6e4..0000000 Binary files a/unroll-cfgs/atax_kernel2.pdf and /dev/null differ diff --git a/unroll-cfgs/bicg_kernel1.pdf b/unroll-cfgs/bicg_kernel1.pdf deleted file mode 100644 index 2874a05..0000000 Binary files a/unroll-cfgs/bicg_kernel1.pdf and /dev/null differ diff --git a/unroll-cfgs/bicg_kernel2.pdf b/unroll-cfgs/bicg_kernel2.pdf deleted file mode 100644 index 9d7b166..0000000 Binary files a/unroll-cfgs/bicg_kernel2.pdf and /dev/null differ diff --git a/unroll-cfgs/correlation_corr.pdf b/unroll-cfgs/correlation_corr.pdf deleted file mode 100644 index 2b53a95..0000000 Binary files a/unroll-cfgs/correlation_corr.pdf and /dev/null differ diff --git a/unroll-cfgs/correlation_mean.pdf b/unroll-cfgs/correlation_mean.pdf deleted file mode 100644 index 23c6bb6..0000000 Binary files a/unroll-cfgs/correlation_mean.pdf and /dev/null differ diff --git a/unroll-cfgs/correlation_reduce.pdf b/unroll-cfgs/correlation_reduce.pdf deleted file mode 100644 index 73b4bba..0000000 Binary files a/unroll-cfgs/correlation_reduce.pdf and /dev/null differ diff --git a/unroll-cfgs/correlation_std.pdf b/unroll-cfgs/correlation_std.pdf deleted file mode 100644 index fe248da..0000000 Binary files a/unroll-cfgs/correlation_std.pdf and /dev/null differ diff --git a/unroll-cfgs/covariance_covar.pdf b/unroll-cfgs/covariance_covar.pdf deleted file mode 100644 index d516fbb..0000000 Binary files a/unroll-cfgs/covariance_covar.pdf and /dev/null differ diff --git a/unroll-cfgs/covariance_mean.pdf b/unroll-cfgs/covariance_mean.pdf deleted file mode 100644 index 9adc528..0000000 Binary files a/unroll-cfgs/covariance_mean.pdf and /dev/null differ diff --git a/unroll-cfgs/covariance_reduce.pdf b/unroll-cfgs/covariance_reduce.pdf deleted file mode 100644 index 66d955a..0000000 Binary files a/unroll-cfgs/covariance_reduce.pdf and /dev/null differ diff --git a/unroll-cfgs/doitgen_kernel1.pdf b/unroll-cfgs/doitgen_kernel1.pdf deleted file mode 100644 index 0f0db22..0000000 Binary files a/unroll-cfgs/doitgen_kernel1.pdf and /dev/null differ diff --git a/unroll-cfgs/doitgen_kernel2.pdf b/unroll-cfgs/doitgen_kernel2.pdf deleted file mode 100644 index 7e4dec0..0000000 Binary files a/unroll-cfgs/doitgen_kernel2.pdf and /dev/null differ diff --git a/unroll-cfgs/fdtd2d_kernel1.pdf b/unroll-cfgs/fdtd2d_kernel1.pdf deleted file mode 100644 index 67423eb..0000000 Binary files a/unroll-cfgs/fdtd2d_kernel1.pdf and /dev/null differ diff --git a/unroll-cfgs/fdtd2d_kernel2.pdf b/unroll-cfgs/fdtd2d_kernel2.pdf deleted file mode 100644 index e9fa5a7..0000000 Binary files a/unroll-cfgs/fdtd2d_kernel2.pdf and /dev/null differ diff --git a/unroll-cfgs/fdtd2d_kernel3.pdf b/unroll-cfgs/fdtd2d_kernel3.pdf deleted file mode 100644 index a0de704..0000000 Binary files a/unroll-cfgs/fdtd2d_kernel3.pdf and /dev/null differ diff --git a/unroll-cfgs/gemm.pdf b/unroll-cfgs/gemm.pdf deleted file mode 100644 index bb9eb35..0000000 Binary files a/unroll-cfgs/gemm.pdf and /dev/null differ diff --git a/unroll-cfgs/gemver_kernel1.pdf b/unroll-cfgs/gemver_kernel1.pdf deleted file mode 100644 index 3a849d5..0000000 Binary files a/unroll-cfgs/gemver_kernel1.pdf and /dev/null differ diff --git a/unroll-cfgs/gemver_kernel2.pdf b/unroll-cfgs/gemver_kernel2.pdf deleted file mode 100644 index 1f394ce..0000000 Binary files a/unroll-cfgs/gemver_kernel2.pdf and /dev/null differ diff --git a/unroll-cfgs/gemver_kernel3.pdf b/unroll-cfgs/gemver_kernel3.pdf deleted file mode 100644 index f1488b7..0000000 Binary files a/unroll-cfgs/gemver_kernel3.pdf and /dev/null differ diff --git a/unroll-cfgs/gesummv.pdf b/unroll-cfgs/gesummv.pdf deleted file mode 100644 index 83a586d..0000000 Binary files a/unroll-cfgs/gesummv.pdf and /dev/null differ diff --git a/unroll-cfgs/gramschmidt_kernel2.pdf b/unroll-cfgs/gramschmidt_kernel2.pdf deleted file mode 100644 index 0dddba4..0000000 Binary files a/unroll-cfgs/gramschmidt_kernel2.pdf and /dev/null differ diff --git a/unroll-cfgs/jacobi2D_kernel1.pdf b/unroll-cfgs/jacobi2D_kernel1.pdf deleted file mode 100644 index d530c59..0000000 Binary files a/unroll-cfgs/jacobi2D_kernel1.pdf and /dev/null differ diff --git a/unroll-cfgs/jacobi2D_kernel2.pdf b/unroll-cfgs/jacobi2D_kernel2.pdf deleted file mode 100644 index 6bdf5b8..0000000 Binary files a/unroll-cfgs/jacobi2D_kernel2.pdf and /dev/null differ diff --git a/unroll-cfgs/lu_kernel2.pdf b/unroll-cfgs/lu_kernel2.pdf deleted file mode 100644 index 6e86611..0000000 Binary files a/unroll-cfgs/lu_kernel2.pdf and /dev/null differ diff --git a/unroll-cfgs/mvt_kernel1.pdf b/unroll-cfgs/mvt_kernel1.pdf deleted file mode 100644 index 6177f26..0000000 Binary files a/unroll-cfgs/mvt_kernel1.pdf and /dev/null differ diff --git a/unroll-cfgs/mvt_kernel2.pdf b/unroll-cfgs/mvt_kernel2.pdf deleted file mode 100644 index 0ccfc51..0000000 Binary files a/unroll-cfgs/mvt_kernel2.pdf and /dev/null differ diff --git a/unroll-cfgs/syr2k.pdf b/unroll-cfgs/syr2k.pdf deleted file mode 100644 index 99e17a2..0000000 Binary files a/unroll-cfgs/syr2k.pdf and /dev/null differ diff --git a/unroll-cfgs/syrk.pdf b/unroll-cfgs/syrk.pdf deleted file mode 100644 index d92d89e..0000000 Binary files a/unroll-cfgs/syrk.pdf and /dev/null differ