#!/usr/bin/make -f
export CXX=/usr/bin/hipcc
export DEB_BUILD_MAINT_OPTIONS = hardening=+all
export DEB_CXXFLAGS_MAINT_PREPEND = -gz
export DEB_LDFLAGS_MAINT_PREPEND = -v
export VERBOSE=1
#export AMD_LOG_LEVEL=4

# filter incompatible options from affecting device code
CXXFLAGS := $(subst -fstack-protector-strong,-Xarch_host -fstack-protector-strong,$(CXXFLAGS))
CXXFLAGS := $(subst -fcf-protection,-Xarch_host -fcf-protection,$(CXXFLAGS))

# rccl doesn't compile with all of the default ISAs, limit the ISAs to ones that work
# Filter out:
# - gfx1151 - unsupported upstream
# - gfx1100, gfx1101 - broken (lld: error: <unknown>:0: branch size exceeds simm16)
# - gfx90a, gfx1030, gfx1200, gfx942 - cause LP amd64 and amd64v3 builders to reset and fail without buildlog (probably OOM)
#   while LP arm64 local builds are ok. At the present we can build only for up to 2 ISAs.
#   For them to be added back we will need to await LP builders upgrade or upstream build memory usage
#   improvement (https://github.com/ROCm/rocm-systems/issues/3086), as even rccl-launchpad-build-fix.patch can't help
ROCM_ISAS := "gfx908;gfx1201"

CMAKE_FLAGS = \
	-DCMAKE_BUILD_TYPE=Release \
	-DCMAKE_SKIP_RPATH=ON \
	-DGPU_TARGETS=$(ROCM_ISAS) \
	-DROCM_SYMLINK_LIBS=OFF \
	-DEXPLICIT_ROCM_VERSION=$(shell dpkg-parsechangelog -S Version | sed 's/[+-].*//') \
	-DRCCL_ROCPROFILER_REGISTER=OFF \
	-DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF

ifeq (,$(filter nocheck,$(DEB_BUILD_PROFILES)))
CMAKE_FLAGS += -DBUILD_TESTS=ON
endif

%:
	dh $@ -Scmake

override_dh_auto_configure-arch:
	dh_auto_configure -- $(CMAKE_FLAGS)

override_dh_auto_test-arch:
ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS)))
	set -e \
	; if [ -r /dev/kfd ] \
	; then \
	  LD_LIBRARY_PATH=obj-$(DEB_HOST_MULTIARCH) \
	  RCCL_ENABLE_SIGNALHANDLER=1 \
	  NCCL_DEBUG=INFO \
	  HSA_FORCE_FINE_GRAIN_PCIE=1 \
	  obj-$(DEB_HOST_MULTIARCH)/test/UnitTests \
	; else echo "W: /dev/kfd unreadable: no available AMD GPU." \
	;      echo "W: tests skipped." \
	; fi
endif

override_dh_auto_configure-indep:
	:

# Linking for at least gfx908 takes more than the LP inactivity timeout of 150m thus we need heartbeat
# mechanism to prevent the build from being killed
override_dh_auto_build-arch:
	@echo "=== Starting build with heartbeat monitoring ==="
	@(while true; do echo "."; sleep 300; done) & HB_PID=$$!; \
	trap "kill $$HB_PID 2>/dev/null || true; echo ''; echo '=== Heartbeat stopped ==='" EXIT INT TERM; \
	set -e; \
	dh_auto_build; \
	kill $$HB_PID 2>/dev/null || true; \
	echo "=== Build completed successfully ==="

override_dh_auto_build-indep:
ifeq (,$(filter nodoc,$(DEB_BUILD_OPTIONS)))
	rocm-docs-build
	rm -rf build/html/_static/fonts
	rm -rf build/html/_static/vendor
	mv build/html html
endif

override_dh_auto_test-indep:
	:

override_dh_auto_install-indep:
	:

# dwz doesn't fully support DWARF-5 yet, see #1016936
override_dh_dwz:
	:

override_dh_gencontrol:
	dh_gencontrol -- -Vrocm:GPU-Architecture=$(subst ;, ,$(ROCM_ISAS))
