.PHONY: top clean clean-profiles

STAGE0_BUILD:=$(CURDIR)/stage0.build
STAGE1_BUILD:=$(CURDIR)/stage1.build
STAGE2_BUILD:=$(CURDIR)/stage2.build

STAGE0_TOOLS:=$(STAGE0_BUILD)/usr/tools/

PROFILE_DIR:=$(CURDIR)/profiles
PROFILE_FILE:=$(PROFILE_DIR)/merged.prof
JULIA_ROOT:=$(CURDIR)/../..

LLVM_CXXFILT:=$(STAGE0_TOOLS)llvm-cxxfilt
LLVM_PROFDATA:=$(STAGE0_TOOLS)llvm-profdata
LLVM_OBJCOPY:=$(STAGE0_TOOLS)llvm-objcopy

# When building a single libLLVM.so we need to increase -vp-counters-per-site
# significantly
COUNTERS_PER_SITE:=6
# Note: profile counters are not atomic by default, https://discourse.llvm.org/t/profile-guided-optimization-pgo-related-questions-and-suggestions/75232/5

AFTER_STAGE1_MESSAGE:='You can now optionally collect more profiling data for use in PGO by running Julia $\
	with an appropriate workload. If you wish, run `make clean_profiles` before doing so to remove any profiling data $\
	generated by building Julia. You should end up with about 15MB of data in $(PGO_PROFILE_DIR). $\
	Note that running extensive scripts may result in counter overflows, which can be detected by running $\
	`make top`. Afterwards run `make stage2`.'

STAGE1_FLAGS:=LDFLAGS="-fuse-ld=lld -flto=thin -Wl,--undefined-version -fprofile-generate=$(PROFILE_DIR)" $\
			CFLAGS="-fprofile-generate=$(PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE)" $\
			CXXFLAGS="-fprofile-generate=$(PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE)"
STAGE2_FLAGS:=LDFLAGS="-fuse-ld=lld -flto=thin -Wl,--undefined-version -fprofile-use=$(PROFILE_FILE) -Wl,--icf=safe" $\
			CFLAGS="-fprofile-use=$(PROFILE_FILE)" $\
			CXXFLAGS="-fprofile-use=$(PROFILE_FILE)"

COMMON_FLAGS:=USECLANG=1 USE_BINARYBUILDER_LLVM=0

all: stage2 # Default target as first in file

$(STAGE0_BUILD) $(STAGE1_BUILD) $(STAGE2_BUILD):
	$(MAKE) -C $(JULIA_ROOT) O=$@ configure

stage0: export USE_BINARYBUILDER_LLVM=1
stage0: | $(STAGE0_BUILD)
	# Turn [cd]tors into init/fini_array sections in libclang_rt, since lld
	# doesn't do that, and otherwise the profile constructor is not executed
	$(MAKE) -C $(STAGE0_BUILD)/deps install-clang install-llvm install-lld install-llvm-tools && \
	find $< -name 'libclang_rt.profile-*.a' -exec $(LLVM_OBJCOPY) --rename-section .ctors=.init_array --rename-section .dtors=.fini_array {} + && \
	touch $@

$(STAGE1_BUILD): stage0
stage1: | $(STAGE1_BUILD)
	@echo "--- Build Julia Stage 1 - with instrumentation"
	PATH=$(STAGE0_TOOLS):$$PATH $(STAGE1_FLAGS) $(MAKE) -C $(STAGE1_BUILD) $(COMMON_FLAGS) && touch $@
	@echo $(AFTER_STAGE1_MESSAGE)

stage2: $(PROFILE_FILE) | $(STAGE2_BUILD)
	@echo "--- Build Julia Stage 2 - PGO + LTO optimised"
	PATH=$(STAGE0_TOOLS):$$PATH $(STAGE2_FLAGS) $(MAKE) -C $(STAGE2_BUILD) $(COMMON_FLAGS) && touch $@

.DEFAULT: stage2
	PATH=$(STAGE0_TOOLS):$$PATH $(STAGE2_FLAGS) $(MAKE) -C $(STAGE2_BUILD) $(COMMON_FLAGS) $@

$(PROFILE_FILE): stage1 $(wildcard $(PROFILE_DIR)/*.profraw)
	$(LLVM_PROFDATA) merge -output=$@ $(PROFILE_DIR)/*.profraw

# show top 50 functions
top: $(PROFILE_FILE)
	$(LLVM_PROFDATA) show --topn=50 $< | $(LLVM_CXXFILT)

clean-profiles:
	rm -rf $(PROFILE_DIR)

clean:
	rm -f stage0 stage1 stage2 $(PROFILE_FILE)
