| From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
| From: Sami Tolvanen <samitolvanen@google.com> |
| Date: Tue, 28 Nov 2017 08:48:49 -0800 |
| Subject: ANDROID: kbuild: add support for Clang LTO |
| |
| This change adds the configuration option CONFIG_LTO_CLANG, and |
| build system support for Clang's Link Time Optimization (LTO). In |
| preparation for LTO support with other compilers, potentially common |
| parts of the changes are gated behind CONFIG_LTO instead. |
| |
| With -flto, instead of object files, Clang produces LLVM bitcode, |
| which is compiled into a native object at link time, allowing the |
| final binary to be optimized globally. For more details, see: |
| |
| https://llvm.org/docs/LinkTimeOptimization.html |
| |
| While the kernel normally uses GNU ld for linking, LLVM supports LTO |
| only with LLD or GNU gold linkers. This change assumes LLD is used. |
| |
| Bug: 145210207 |
| Change-Id: If1164ff33d073358ee7d4bba84cbb06c349c4a88 |
| Signed-off-by: Sami Tolvanen <samitolvanen@google.com> |
| --- |
| Makefile | 29 ++++++++++++++- |
| arch/Kconfig | 47 ++++++++++++++++++++++++ |
| include/asm-generic/vmlinux.lds.h | 5 ++- |
| scripts/Makefile.build | 54 +++++++++++++++++++++++++--- |
| scripts/Makefile.modfinal | 13 +++++++ |
| scripts/Makefile.modpost | 24 +++++++++++-- |
| scripts/link-vmlinux.sh | 59 ++++++++++++++++++++++++++----- |
| scripts/mod/modpost.c | 7 ++++ |
| 8 files changed, 221 insertions(+), 17 deletions(-) |
| |
| diff --git a/Makefile b/Makefile |
| index fa80ee470e4d..e19c72e56d3b 100644 |
| --- a/Makefile |
| +++ b/Makefile |
| @@ -654,6 +654,16 @@ RETPOLINE_VDSO_CFLAGS := $(call cc-option,$(RETPOLINE_VDSO_CFLAGS_GCC),$(call cc |
| export RETPOLINE_CFLAGS |
| export RETPOLINE_VDSO_CFLAGS |
| |
| +# Make toolchain changes before including arch/$(SRCARCH)/Makefile to ensure |
| +# ar/cc/ld-* macros return correct values. |
| +ifdef CONFIG_LTO_CLANG |
| +# LTO produces LLVM IR instead of object files. Use llvm-ar and llvm-nm, so we |
| +# can process these. |
| +AR := llvm-ar |
| +LLVM_NM := llvm-nm |
| +export LLVM_NM |
| +endif |
| + |
| include arch/$(SRCARCH)/Makefile |
| |
| ifdef need-config |
| @@ -856,6 +866,22 @@ KBUILD_CFLAGS += $(CC_FLAGS_SCS) |
| export CC_FLAGS_SCS |
| endif |
| |
| +ifdef CONFIG_LTO_CLANG |
| +ifdef CONFIG_THINLTO |
| +CC_FLAGS_LTO_CLANG := -flto=thin $(call cc-option, -fsplit-lto-unit) |
| +KBUILD_LDFLAGS += --thinlto-cache-dir=.thinlto-cache |
| +else |
| +CC_FLAGS_LTO_CLANG := -flto |
| +endif |
| +CC_FLAGS_LTO_CLANG += -fvisibility=default |
| +endif |
| + |
| +ifdef CONFIG_LTO |
| +CC_FLAGS_LTO := $(CC_FLAGS_LTO_CLANG) |
| +KBUILD_CFLAGS += $(CC_FLAGS_LTO) |
| +export CC_FLAGS_LTO |
| +endif |
| + |
| # arch Makefile may override CC so keep this after arch Makefile is included |
| NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include) |
| |
| @@ -1682,7 +1708,8 @@ clean: $(clean-dirs) |
| -o -name modules.builtin -o -name '.tmp_*.o.*' \ |
| -o -name '*.c.[012]*.*' \ |
| -o -name '*.ll' \ |
| - -o -name '*.gcno' \) -type f -print | xargs rm -f |
| + -o -name '*.gcno' \ |
| + -o -name '*.*.symversions' \) -type f -print | xargs rm -f |
| |
| # Generate tags for editors |
| # --------------------------------------------------------------------------- |
| diff --git a/arch/Kconfig b/arch/Kconfig |
| index 90a8c5ef4487..1766abf2f5c2 100644 |
| --- a/arch/Kconfig |
| +++ b/arch/Kconfig |
| @@ -554,6 +554,53 @@ config SHADOW_CALL_STACK_VMAP |
| provides better stack exhaustion protection, but increases per-thread |
| memory consumption as a full page is allocated for each shadow stack. |
| |
| +config LTO |
| + bool |
| + |
| +config ARCH_SUPPORTS_LTO_CLANG |
| + bool |
| + help |
| + An architecture should select this option if it supports: |
| + - compiling with Clang, |
| + - compiling inline assembly with Clang's integrated assembler, |
| + - and linking with LLD. |
| + |
| +config ARCH_SUPPORTS_THINLTO |
| + bool |
| + help |
| + An architecture should select this if it supports Clang ThinLTO. |
| + |
| +config THINLTO |
| + bool "Use Clang's ThinLTO (EXPERIMENTAL)" |
| + depends on LTO_CLANG && ARCH_SUPPORTS_THINLTO |
| + default y |
| + help |
| + Use ThinLTO to speed up Link Time Optimization. |
| + |
| +choice |
| + prompt "Link-Time Optimization (LTO) (EXPERIMENTAL)" |
| + default LTO_NONE |
| + help |
| + This option turns on Link-Time Optimization (LTO). |
| + |
| +config LTO_NONE |
| + bool "None" |
| + |
| +config LTO_CLANG |
| + bool "Use Clang's Link Time Optimization (LTO) (EXPERIMENTAL)" |
| + depends on ARCH_SUPPORTS_LTO_CLANG |
| + depends on !KASAN |
| + depends on !FTRACE_MCOUNT_RECORD |
| + depends on CC_IS_CLANG && CLANG_VERSION >= 100000 && LD_IS_LLD |
| + select LTO |
| + help |
| + This option enables Clang's Link Time Optimization (LTO), which allows |
| + the compiler to optimize the kernel globally at link time. If you |
| + enable this option, the compiler generates LLVM IR instead of object |
| + files, and the actual compilation from IR occurs at the LTO link step, |
| + which may take several minutes. |
| + |
| +endchoice |
| |
| config HAVE_ARCH_WITHIN_STACK_FRAMES |
| bool |
| diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h |
| index dae64600ccbf..ec4827773569 100644 |
| --- a/include/asm-generic/vmlinux.lds.h |
| +++ b/include/asm-generic/vmlinux.lds.h |
| @@ -63,10 +63,13 @@ |
| * .data. We don't want to pull in .data..other sections, which Linux |
| * has defined. Same for text and bss. |
| * |
| + * With LTO_CLANG, the linker also splits sections by default, so we need |
| + * these macros to combine the sections during the final link. |
| + * |
| * RODATA_MAIN is not used because existing code already defines .rodata.x |
| * sections to be brought in with rodata. |
| */ |
| -#ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION |
| +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) |
| #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* |
| #define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..LPBX* |
| #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* |
| diff --git a/scripts/Makefile.build b/scripts/Makefile.build |
| index a9e47953ca53..7aefd2a1347e 100644 |
| --- a/scripts/Makefile.build |
| +++ b/scripts/Makefile.build |
| @@ -93,7 +93,7 @@ endif |
| # --------------------------------------------------------------------------- |
| |
| quiet_cmd_cc_s_c = CC $(quiet_modtag) $@ |
| - cmd_cc_s_c = $(CC) $(filter-out $(DEBUG_CFLAGS), $(c_flags)) $(DISABLE_LTO) -fverbose-asm -S -o $@ $< |
| + cmd_cc_s_c = $(CC) $(filter-out $(DEBUG_CFLAGS) $(CC_FLAGS_LTO), $(c_flags)) $(DISABLE_LTO) -fverbose-asm -S -o $@ $< |
| |
| $(obj)/%.s: $(src)/%.c FORCE |
| $(call if_changed_dep,cc_s_c) |
| @@ -148,6 +148,15 @@ ifdef CONFIG_MODVERSIONS |
| # the actual value of the checksum generated by genksyms |
| # o remove .tmp_<file>.o to <file>.o |
| |
| +ifdef CONFIG_LTO_CLANG |
| +# Generate .o.symversions files for each .o with exported symbols, and link these |
| +# to the kernel and/or modules at the end. |
| +cmd_modversions_c = \ |
| + if $(LLVM_NM) $@ | grep -q __ksymtab; then \ |
| + $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ |
| + > $@.symversions; \ |
| + fi; |
| +else |
| cmd_modversions_c = \ |
| if $(OBJDUMP) -h $@ | grep -q __ksymtab; then \ |
| $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ |
| @@ -159,6 +168,7 @@ cmd_modversions_c = \ |
| rm -f $(@D)/.tmp_$(@F:.o=.ver); \ |
| fi |
| endif |
| +endif |
| |
| ifdef CONFIG_FTRACE_MCOUNT_RECORD |
| ifndef CC_USING_RECORD_MCOUNT |
| @@ -383,6 +393,21 @@ $(obj)/%.asn1.c $(obj)/%.asn1.h: $(src)/%.asn1 $(objtree)/scripts/asn1_compiler |
| # To build objects in subdirs, we need to descend into the directories |
| $(sort $(subdir-obj-y)): $(subdir-ym) ; |
| |
| +# combine symversions for later processing |
| +quiet_cmd_update_lto_symversions = SYMVER $@ |
| +ifeq ($(CONFIG_LTO_CLANG) $(CONFIG_MODVERSIONS),y y) |
| + cmd_update_lto_symversions = \ |
| + rm -f $@.symversions; \ |
| + for i in $(filter-out FORCE,$^); do \ |
| + if [ -f $$i.symversions ]; then \ |
| + cat $$i.symversions \ |
| + >> $@.symversions; \ |
| + fi; \ |
| + done |
| +else |
| + cmd_update_lto_symversions = echo >/dev/null |
| +endif |
| + |
| # |
| # Rule to compile a set of .o files into one .a file (without symbol table) |
| # |
| @@ -391,8 +416,11 @@ ifdef builtin-target |
| quiet_cmd_ar_builtin = AR $@ |
| cmd_ar_builtin = rm -f $@; $(AR) cDPrST $@ $(real-prereqs) |
| |
| +quiet_cmd_ar_and_symver = AR $@ |
| + cmd_ar_and_symver = $(cmd_update_lto_symversions); $(cmd_ar_builtin) |
| + |
| $(builtin-target): $(real-obj-y) FORCE |
| - $(call if_changed,ar_builtin) |
| + $(call if_changed,ar_and_symver) |
| |
| targets += $(builtin-target) |
| endif # builtin-target |
| @@ -412,16 +440,26 @@ $(modorder-target): $(subdir-ym) FORCE |
| # |
| ifdef lib-target |
| |
| +quiet_cmd_ar_lib = AR $@ |
| + cmd_ar_lib = $(cmd_update_lto_symversions); $(cmd_ar) |
| + |
| $(lib-target): $(lib-y) FORCE |
| - $(call if_changed,ar) |
| + $(call if_changed,ar_lib) |
| |
| targets += $(lib-target) |
| |
| dummy-object = $(obj)/.lib_exports.o |
| ksyms-lds = $(dot-target).lds |
| |
| +ifdef CONFIG_LTO_CLANG |
| +# Objdump doesn't understand LLVM IR. Use llvm-nm to dump symbols. |
| +dump_export_list = $(LLVM_NM) |
| +else |
| +dump_export_list = $(OBJDUMP) -h |
| +endif |
| + |
| quiet_cmd_export_list = EXPORTS $@ |
| -cmd_export_list = $(OBJDUMP) -h $< | \ |
| +cmd_export_list = $(dump_export_list) $< | \ |
| sed -ne '/___ksymtab/s/.*+\([^ ]*\).*/EXTERN(\1)/p' >$(ksyms-lds);\ |
| rm -f $(dummy-object);\ |
| echo | $(CC) $(a_flags) -c -o $(dummy-object) -x assembler -;\ |
| @@ -439,8 +477,16 @@ endif |
| # Do not replace $(filter %.o,^) with $(real-prereqs). When a single object |
| # module is turned into a multi object module, $^ will contain header file |
| # dependencies recorded in the .*.cmd file. |
| +ifdef CONFIG_LTO_CLANG |
| +quiet_cmd_link_multi-m = AR [M] $@ |
| +cmd_link_multi-m = \ |
| + $(cmd_update_lto_symversions); \ |
| + rm -f $@; \ |
| + $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(filter %.o,$^) |
| +else |
| quiet_cmd_link_multi-m = LD [M] $@ |
| cmd_link_multi-m = $(LD) $(ld_flags) -r -o $@ $(filter %.o,$^) |
| +endif |
| |
| $(multi-used-m): FORCE |
| $(call if_changed,link_multi-m) |
| diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal |
| index 411c1e600e7d..2164a84a45a2 100644 |
| --- a/scripts/Makefile.modfinal |
| +++ b/scripts/Makefile.modfinal |
| @@ -6,6 +6,7 @@ |
| PHONY := __modfinal |
| __modfinal: |
| |
| +include $(objtree)/include/config/auto.conf |
| include $(srctree)/scripts/Kbuild.include |
| |
| # for c_flags |
| @@ -30,12 +31,24 @@ quiet_cmd_cc_o_c = CC [M] $@ |
| ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink) |
| |
| quiet_cmd_ld_ko_o = LD [M] $@ |
| + |
| +ifdef CONFIG_LTO_CLANG |
| + cmd_ld_ko_o = \ |
| + $(LD) -r $(LDFLAGS) \ |
| + $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \ |
| + $(addprefix -T , $(KBUILD_LDS_MODULE)) \ |
| + $(shell [ -s $(@:.ko=.o.symversions) ] && \ |
| + echo -T $(@:.ko=.o.symversions)) \ |
| + -o $@ --whole-archive $(filter %.o, $^); \ |
| + $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true) |
| +else |
| cmd_ld_ko_o = \ |
| $(LD) -r $(KBUILD_LDFLAGS) \ |
| $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \ |
| $(addprefix -T , $(KBUILD_LDS_MODULE)) \ |
| -o $@ $(filter %.o, $^); \ |
| $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true) |
| +endif |
| |
| $(modules): %.ko: %.o %.mod.o $(KBUILD_LDS_MODULE) FORCE |
| +$(call if_changed,ld_ko_o) |
| diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost |
| index 952fff485546..b8b447b62283 100644 |
| --- a/scripts/Makefile.modpost |
| +++ b/scripts/Makefile.modpost |
| @@ -84,12 +84,32 @@ MODPOST += $(subst -i,-n,$(filter -i,$(MAKEFLAGS))) -s -T - $(wildcard vmlinux) |
| # find all modules listed in modules.order |
| modules := $(sort $(shell cat $(MODORDER))) |
| |
| +# With CONFIG_LTO_CLANG, .o files might be LLVM IR, so we need to link them |
| +# into actual objects before passing them to modpost |
| +modpost-ext = $(if $(CONFIG_LTO_CLANG),.lto,) |
| + |
| +ifdef CONFIG_LTO_CLANG |
| +quiet_cmd_cc_lto_link_modules = LTO [M] $@ |
| +cmd_cc_lto_link_modules = \ |
| + $(LD) $(ld_flags) -r -o $(@) \ |
| + $(shell [ -s $(@:$(modpost-ext).o=.o.symversions) ] && \ |
| + echo -T $(@:$(modpost-ext).o=.o.symversions)) \ |
| + --whole-archive $(filter-out FORCE,$^) |
| + |
| +$(modules:.ko=$(modpost-ext).o): %$(modpost-ext).o: %.o FORCE |
| + $(call if_changed,cc_lto_link_modules) |
| + |
| +PHONY += FORCE |
| +FORCE: |
| + |
| +endif |
| + |
| # Read out modules.order instead of expanding $(modules) to pass in modpost. |
| # Otherwise, allmodconfig would fail with "Argument list too long". |
| quiet_cmd_modpost = MODPOST $(words $(modules)) modules |
| - cmd_modpost = sed 's/ko$$/o/' $(MODORDER) | $(MODPOST) |
| + cmd_modpost = sed 's/\.ko$$/$(modpost-ext)\.o/' $(MODORDER) | $(MODPOST) |
| |
| -__modpost: |
| +__modpost: $(modules:.ko=$(modpost-ext).o) |
| @$(kecho) ' Building modules, stage 2.' |
| $(call cmd,modpost) |
| ifneq ($(KBUILD_MODPOST_NOFINAL),1) |
| diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh |
| index 06495379fcd8..e589ce9b0acd 100755 |
| --- a/scripts/link-vmlinux.sh |
| +++ b/scripts/link-vmlinux.sh |
| @@ -39,6 +39,30 @@ info() |
| fi |
| } |
| |
| +# If CONFIG_LTO_CLANG is selected, collect generated symbol versions into |
| +# .tmp_symversions |
| +modversions() |
| +{ |
| + if [ -z "${CONFIG_LTO_CLANG}" ]; then |
| + return |
| + fi |
| + if [ -z "${CONFIG_MODVERSIONS}" ]; then |
| + return |
| + fi |
| + |
| + rm -f .tmp_symversions |
| + |
| + for a in ${KBUILD_VMLINUX_OBJS} ${KBUILD_VMLINUX_LIBS}; do |
| + for o in $(${AR} t $a 2>/dev/null); do |
| + if [ -f ${o}.symversions ]; then |
| + cat ${o}.symversions >> .tmp_symversions |
| + fi |
| + done |
| + done |
| + |
| + echo "-T .tmp_symversions" |
| +} |
| + |
| # Link of vmlinux.o used for section mismatch analysis |
| # ${1} output file |
| modpost_link() |
| @@ -52,7 +76,15 @@ modpost_link() |
| ${KBUILD_VMLINUX_LIBS} \ |
| --end-group" |
| |
| - ${LD} ${KBUILD_LDFLAGS} -r -o ${1} ${objects} |
| + if [ -n "${CONFIG_LTO_CLANG}" ]; then |
| + # This might take a while, so indicate that we're doing |
| + # an LTO link |
| + info LTO ${1} |
| + else |
| + info LD ${1} |
| + fi |
| + |
| + ${LD} ${KBUILD_LDFLAGS} -r -o ${1} $(modversions) ${objects} |
| } |
| |
| # Link of vmlinux |
| @@ -70,13 +102,22 @@ vmlinux_link() |
| shift |
| |
| if [ "${SRCARCH}" != "um" ]; then |
| - objects="--whole-archive \ |
| - ${KBUILD_VMLINUX_OBJS} \ |
| - --no-whole-archive \ |
| - --start-group \ |
| - ${KBUILD_VMLINUX_LIBS} \ |
| - --end-group \ |
| - ${@}" |
| + if [ -n "${CONFIG_LTO_CLANG}" ]; then |
| + # Use vmlinux.o instead of performing the slow LTO |
| + # link again. |
| + objects="--whole-archive \ |
| + vmlinux.o \ |
| + --no-whole-archive \ |
| + ${@}" |
| + else |
| + objects="--whole-archive \ |
| + ${KBUILD_VMLINUX_OBJS} \ |
| + --no-whole-archive \ |
| + --start-group \ |
| + ${KBUILD_VMLINUX_LIBS} \ |
| + --end-group \ |
| + ${@}" |
| + fi |
| |
| ${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} \ |
| -o ${output} \ |
| @@ -189,6 +230,7 @@ cleanup() |
| rm -f .btf.* |
| rm -f .tmp_System.map |
| rm -f .tmp_kallsyms* |
| + rm -f .tmp_symversions |
| rm -f .tmp_vmlinux* |
| rm -f System.map |
| rm -f vmlinux |
| @@ -240,7 +282,6 @@ fi; |
| ${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init |
| |
| #link vmlinux.o |
| -info LD vmlinux.o |
| modpost_link vmlinux.o |
| |
| # modpost vmlinux.o to check for section mismatches |
| diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c |
| index d2a30a7b3f07..6ef098430b86 100644 |
| --- a/scripts/mod/modpost.c |
| +++ b/scripts/mod/modpost.c |
| @@ -146,6 +146,9 @@ static struct module *new_module(const char *modname) |
| p[strlen(p) - 2] = '\0'; |
| mod->is_dot_o = 1; |
| } |
| + /* strip trailing .lto */ |
| + if (strends(p, ".lto")) |
| + p[strlen(p) - 4] = '\0'; |
| |
| /* add to list */ |
| mod->name = p; |
| @@ -2000,6 +2003,10 @@ static char *remove_dot(char *s) |
| size_t m = strspn(s + n + 1, "0123456789"); |
| if (m && (s[n + m] == '.' || s[n + m] == 0)) |
| s[n] = 0; |
| + |
| + /* strip trailing .lto */ |
| + if (strends(s, ".lto")) |
| + s[strlen(s) - 4] = '\0'; |
| } |
| return s; |
| } |