Option --parallel-jobs is only supported by amdclang++ Upstream bug: https://github.com/GPUOpen-LibrariesAndSDKs/HIPRT/issues/21 --- a/contrib/Orochi/scripts/kernelCompile.py +++ b/contrib/Orochi/scripts/kernelCompile.py @@ -17,7 +17,7 @@ def compile( index ): if index == 0 : command = [ "hipcc", - "-x", "hip", "..\ParallelPrimitives\RadixSortKernels.h", "-O3", "-std=c++17", "-ffast-math", "--cuda-device-only", "--genco", "-I../", "-include", "hip/hip_runtime.h", "-parallel-jobs=15"] + "-x", "hip", "..\ParallelPrimitives\RadixSortKernels.h", "-O3", "-std=c++17", "-ffast-math", "--cuda-device-only", "--genco", "-I../", "-include", "hip/hip_runtime.h"] #command.append( "--offload-arch=gfx1100" ) for i in enumArch( "gfx900" ): command.append( "--offload-arch=" + i ) --- a/scripts/bitcodes/compile.py +++ b/scripts/bitcodes/compile.py @@ -117,15 +117,15 @@ def compileAmd(): parallel_jobs = 15 dst = 'hiprt' + hiprt_ver + '_' + hip_version + '_amd_lib' + postfix - cmd = hipccpath + ' -x hip ../../hiprt/impl/hiprt_kernels_bitcode.h -O3 -std=c++17 ' + targets + ' -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm -I../../contrib/Orochi/ -I../../ -DHIPRT_BITCODE_LINKING -ffast-math -parallel-jobs=' + str(parallel_jobs) + ' -o ' + dst + cmd = hipccpath + ' -x hip ../../hiprt/impl/hiprt_kernels_bitcode.h -O3 -std=c++17 ' + targets + ' -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm -I../../contrib/Orochi/ -I../../ -DHIPRT_BITCODE_LINKING -ffast-math -o ' + dst compileScript(cmd, dst) dst = 'hiprt' + hiprt_ver + '_' + hip_version + '_amd.hipfb' - cmd = hipccpath + ' -x hip ../../hiprt/impl/hiprt_kernels.h -O3 -std=c++17 ' + targets + ' -mllvm -amdgpu-early-inline-all=false -mllvm -amdgpu-function-calls=true --genco -I../../ -DHIPRT_BITCODE_LINKING -ffast-math -parallel-jobs=' + str(parallel_jobs) + ' -o ' + dst + cmd = hipccpath + ' -x hip ../../hiprt/impl/hiprt_kernels.h -O3 -std=c++17 ' + targets + ' -mllvm -amdgpu-early-inline-all=false -mllvm -amdgpu-function-calls=true --genco -I../../ -DHIPRT_BITCODE_LINKING -ffast-math -o ' + dst compileScript(cmd, dst) dst = 'oro_compiled_kernels.hipfb' - cmd = hipccpath + ' -x hip ../../contrib/Orochi/ParallelPrimitives/RadixSortKernels.h -O3 -std=c++17 ' + targets + ' --genco -I../../contrib/Orochi/ -include hip/hip_runtime.h -DHIPRT_BITCODE_LINKING -ffast-math -parallel-jobs=' + str(parallel_jobs) + ' -o ' + dst + cmd = hipccpath + ' -x hip ../../contrib/Orochi/ParallelPrimitives/RadixSortKernels.h -O3 -std=c++17 ' + targets + ' --genco -I../../contrib/Orochi/ -include hip/hip_runtime.h -DHIPRT_BITCODE_LINKING -ffast-math -o ' + dst compileScript(cmd, dst) --- a/scripts/bitcodes/precompile_bitcode.py +++ b/scripts/bitcodes/precompile_bitcode.py @@ -127,12 +127,12 @@ def compileAmd(): # compile custom function table hiprt_custom_func = 'hiprt' + hiprt_ver + '_' + hip_version + '_custom_func_table.bc' - cmd = hipccpath + ' -O3 -std=c++17 ' + targets + ' -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm -I../../ -ffast-math ../../test/bitcodes/custom_func_table.cpp -parallel-jobs=15 -o ' + hiprt_custom_func + cmd = hipccpath + ' -O3 -std=c++17 ' + targets + ' -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm -I../../ -ffast-math ../../test/bitcodes/custom_func_table.cpp -o ' + hiprt_custom_func compileScript('compiling ', cmd, hiprt_custom_func) # compiling unit test hiprt_unit_test = 'hiprt' + hiprt_ver + '_' + hip_version + '_unit_test'+ postfix - cmd = hipccpath + ' -O3 -std=c++17 ' + targets + ' -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm -I../../ -ffast-math -D BLOCK_SIZE=64 -D SHARED_STACK_SIZE=16 ../../test/bitcodes/unit_test.cpp -parallel-jobs=15 -o ' + hiprt_unit_test + cmd = hipccpath + ' -O3 -std=c++17 ' + targets + ' -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm -I../../ -ffast-math -D BLOCK_SIZE=64 -D SHARED_STACK_SIZE=16 ../../test/bitcodes/unit_test.cpp -o ' + hiprt_unit_test compileScript('compiling ', cmd, hiprt_unit_test) # linking