blob: f514982a7ce351b4d02491161638c7912915a83e [file] [log] [blame]
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Tue, 28 Nov 2017 08:48:49 -0800
Subject: ANDROID: kbuild: add support for Clang LTO
This change adds the configuration option CONFIG_LTO_CLANG, and
build system support for Clang's Link Time Optimization (LTO). In
preparation for LTO support with other compilers, potentially common
parts of the changes are gated behind CONFIG_LTO instead.
With -flto, instead of object files, Clang produces LLVM bitcode,
which is compiled into a native object at link time, allowing the
final binary to be optimized globally. For more details, see:
https://llvm.org/docs/LinkTimeOptimization.html
While the kernel normally uses GNU ld for linking, LLVM supports LTO
only with LLD or GNU gold linkers. This change assumes LLD is used.
Bug: 145210207
Change-Id: If1164ff33d073358ee7d4bba84cbb06c349c4a88
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
---
Makefile | 29 ++++++++++++++-
arch/Kconfig | 47 ++++++++++++++++++++++++
include/asm-generic/vmlinux.lds.h | 5 ++-
scripts/Makefile.build | 54 +++++++++++++++++++++++++---
scripts/Makefile.modfinal | 13 +++++++
scripts/Makefile.modpost | 24 +++++++++++--
scripts/link-vmlinux.sh | 59 ++++++++++++++++++++++++++-----
scripts/mod/modpost.c | 7 ++++
8 files changed, 221 insertions(+), 17 deletions(-)
diff --git a/Makefile b/Makefile
index fa80ee470e4d..e19c72e56d3b 100644
--- a/Makefile
+++ b/Makefile
@@ -654,6 +654,16 @@ RETPOLINE_VDSO_CFLAGS := $(call cc-option,$(RETPOLINE_VDSO_CFLAGS_GCC),$(call cc
export RETPOLINE_CFLAGS
export RETPOLINE_VDSO_CFLAGS
+# Make toolchain changes before including arch/$(SRCARCH)/Makefile to ensure
+# ar/cc/ld-* macros return correct values.
+ifdef CONFIG_LTO_CLANG
+# LTO produces LLVM IR instead of object files. Use llvm-ar and llvm-nm, so we
+# can process these.
+AR := llvm-ar
+LLVM_NM := llvm-nm
+export LLVM_NM
+endif
+
include arch/$(SRCARCH)/Makefile
ifdef need-config
@@ -856,6 +866,22 @@ KBUILD_CFLAGS += $(CC_FLAGS_SCS)
export CC_FLAGS_SCS
endif
+ifdef CONFIG_LTO_CLANG
+ifdef CONFIG_THINLTO
+CC_FLAGS_LTO_CLANG := -flto=thin $(call cc-option, -fsplit-lto-unit)
+KBUILD_LDFLAGS += --thinlto-cache-dir=.thinlto-cache
+else
+CC_FLAGS_LTO_CLANG := -flto
+endif
+CC_FLAGS_LTO_CLANG += -fvisibility=default
+endif
+
+ifdef CONFIG_LTO
+CC_FLAGS_LTO := $(CC_FLAGS_LTO_CLANG)
+KBUILD_CFLAGS += $(CC_FLAGS_LTO)
+export CC_FLAGS_LTO
+endif
+
# arch Makefile may override CC so keep this after arch Makefile is included
NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
@@ -1682,7 +1708,8 @@ clean: $(clean-dirs)
-o -name modules.builtin -o -name '.tmp_*.o.*' \
-o -name '*.c.[012]*.*' \
-o -name '*.ll' \
- -o -name '*.gcno' \) -type f -print | xargs rm -f
+ -o -name '*.gcno' \
+ -o -name '*.*.symversions' \) -type f -print | xargs rm -f
# Generate tags for editors
# ---------------------------------------------------------------------------
diff --git a/arch/Kconfig b/arch/Kconfig
index 90a8c5ef4487..1766abf2f5c2 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -554,6 +554,53 @@ config SHADOW_CALL_STACK_VMAP
provides better stack exhaustion protection, but increases per-thread
memory consumption as a full page is allocated for each shadow stack.
+config LTO
+ bool
+
+config ARCH_SUPPORTS_LTO_CLANG
+ bool
+ help
+ An architecture should select this option if it supports:
+ - compiling with Clang,
+ - compiling inline assembly with Clang's integrated assembler,
+ - and linking with LLD.
+
+config ARCH_SUPPORTS_THINLTO
+ bool
+ help
+ An architecture should select this if it supports Clang ThinLTO.
+
+config THINLTO
+ bool "Use Clang's ThinLTO (EXPERIMENTAL)"
+ depends on LTO_CLANG && ARCH_SUPPORTS_THINLTO
+ default y
+ help
+ Use ThinLTO to speed up Link Time Optimization.
+
+choice
+ prompt "Link-Time Optimization (LTO) (EXPERIMENTAL)"
+ default LTO_NONE
+ help
+ This option turns on Link-Time Optimization (LTO).
+
+config LTO_NONE
+ bool "None"
+
+config LTO_CLANG
+ bool "Use Clang's Link Time Optimization (LTO) (EXPERIMENTAL)"
+ depends on ARCH_SUPPORTS_LTO_CLANG
+ depends on !KASAN
+ depends on !FTRACE_MCOUNT_RECORD
+ depends on CC_IS_CLANG && CLANG_VERSION >= 100000 && LD_IS_LLD
+ select LTO
+ help
+ This option enables Clang's Link Time Optimization (LTO), which allows
+ the compiler to optimize the kernel globally at link time. If you
+ enable this option, the compiler generates LLVM IR instead of object
+ files, and the actual compilation from IR occurs at the LTO link step,
+ which may take several minutes.
+
+endchoice
config HAVE_ARCH_WITHIN_STACK_FRAMES
bool
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index dae64600ccbf..ec4827773569 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -63,10 +63,13 @@
* .data. We don't want to pull in .data..other sections, which Linux
* has defined. Same for text and bss.
*
+ * With LTO_CLANG, the linker also splits sections by default, so we need
+ * these macros to combine the sections during the final link.
+ *
* RODATA_MAIN is not used because existing code already defines .rodata.x
* sections to be brought in with rodata.
*/
-#ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
+#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG)
#define TEXT_MAIN .text .text.[0-9a-zA-Z_]*
#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..LPBX*
#define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]*
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index a9e47953ca53..7aefd2a1347e 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -93,7 +93,7 @@ endif
# ---------------------------------------------------------------------------
quiet_cmd_cc_s_c = CC $(quiet_modtag) $@
- cmd_cc_s_c = $(CC) $(filter-out $(DEBUG_CFLAGS), $(c_flags)) $(DISABLE_LTO) -fverbose-asm -S -o $@ $<
+ cmd_cc_s_c = $(CC) $(filter-out $(DEBUG_CFLAGS) $(CC_FLAGS_LTO), $(c_flags)) $(DISABLE_LTO) -fverbose-asm -S -o $@ $<
$(obj)/%.s: $(src)/%.c FORCE
$(call if_changed_dep,cc_s_c)
@@ -148,6 +148,15 @@ ifdef CONFIG_MODVERSIONS
# the actual value of the checksum generated by genksyms
# o remove .tmp_<file>.o to <file>.o
+ifdef CONFIG_LTO_CLANG
+# Generate .o.symversions files for each .o with exported symbols, and link these
+# to the kernel and/or modules at the end.
+cmd_modversions_c = \
+ if $(LLVM_NM) $@ | grep -q __ksymtab; then \
+ $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \
+ > $@.symversions; \
+ fi;
+else
cmd_modversions_c = \
if $(OBJDUMP) -h $@ | grep -q __ksymtab; then \
$(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \
@@ -159,6 +168,7 @@ cmd_modversions_c = \
rm -f $(@D)/.tmp_$(@F:.o=.ver); \
fi
endif
+endif
ifdef CONFIG_FTRACE_MCOUNT_RECORD
ifndef CC_USING_RECORD_MCOUNT
@@ -383,6 +393,21 @@ $(obj)/%.asn1.c $(obj)/%.asn1.h: $(src)/%.asn1 $(objtree)/scripts/asn1_compiler
# To build objects in subdirs, we need to descend into the directories
$(sort $(subdir-obj-y)): $(subdir-ym) ;
+# combine symversions for later processing
+quiet_cmd_update_lto_symversions = SYMVER $@
+ifeq ($(CONFIG_LTO_CLANG) $(CONFIG_MODVERSIONS),y y)
+ cmd_update_lto_symversions = \
+ rm -f $@.symversions; \
+ for i in $(filter-out FORCE,$^); do \
+ if [ -f $$i.symversions ]; then \
+ cat $$i.symversions \
+ >> $@.symversions; \
+ fi; \
+ done
+else
+ cmd_update_lto_symversions = echo >/dev/null
+endif
+
#
# Rule to compile a set of .o files into one .a file (without symbol table)
#
@@ -391,8 +416,11 @@ ifdef builtin-target
quiet_cmd_ar_builtin = AR $@
cmd_ar_builtin = rm -f $@; $(AR) cDPrST $@ $(real-prereqs)
+quiet_cmd_ar_and_symver = AR $@
+ cmd_ar_and_symver = $(cmd_update_lto_symversions); $(cmd_ar_builtin)
+
$(builtin-target): $(real-obj-y) FORCE
- $(call if_changed,ar_builtin)
+ $(call if_changed,ar_and_symver)
targets += $(builtin-target)
endif # builtin-target
@@ -412,16 +440,26 @@ $(modorder-target): $(subdir-ym) FORCE
#
ifdef lib-target
+quiet_cmd_ar_lib = AR $@
+ cmd_ar_lib = $(cmd_update_lto_symversions); $(cmd_ar)
+
$(lib-target): $(lib-y) FORCE
- $(call if_changed,ar)
+ $(call if_changed,ar_lib)
targets += $(lib-target)
dummy-object = $(obj)/.lib_exports.o
ksyms-lds = $(dot-target).lds
+ifdef CONFIG_LTO_CLANG
+# Objdump doesn't understand LLVM IR. Use llvm-nm to dump symbols.
+dump_export_list = $(LLVM_NM)
+else
+dump_export_list = $(OBJDUMP) -h
+endif
+
quiet_cmd_export_list = EXPORTS $@
-cmd_export_list = $(OBJDUMP) -h $< | \
+cmd_export_list = $(dump_export_list) $< | \
sed -ne '/___ksymtab/s/.*+\([^ ]*\).*/EXTERN(\1)/p' >$(ksyms-lds);\
rm -f $(dummy-object);\
echo | $(CC) $(a_flags) -c -o $(dummy-object) -x assembler -;\
@@ -439,8 +477,16 @@ endif
# Do not replace $(filter %.o,^) with $(real-prereqs). When a single object
# module is turned into a multi object module, $^ will contain header file
# dependencies recorded in the .*.cmd file.
+ifdef CONFIG_LTO_CLANG
+quiet_cmd_link_multi-m = AR [M] $@
+cmd_link_multi-m = \
+ $(cmd_update_lto_symversions); \
+ rm -f $@; \
+ $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(filter %.o,$^)
+else
quiet_cmd_link_multi-m = LD [M] $@
cmd_link_multi-m = $(LD) $(ld_flags) -r -o $@ $(filter %.o,$^)
+endif
$(multi-used-m): FORCE
$(call if_changed,link_multi-m)
diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal
index 411c1e600e7d..2164a84a45a2 100644
--- a/scripts/Makefile.modfinal
+++ b/scripts/Makefile.modfinal
@@ -6,6 +6,7 @@
PHONY := __modfinal
__modfinal:
+include $(objtree)/include/config/auto.conf
include $(srctree)/scripts/Kbuild.include
# for c_flags
@@ -30,12 +31,24 @@ quiet_cmd_cc_o_c = CC [M] $@
ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink)
quiet_cmd_ld_ko_o = LD [M] $@
+
+ifdef CONFIG_LTO_CLANG
+ cmd_ld_ko_o = \
+ $(LD) -r $(LDFLAGS) \
+ $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \
+ $(addprefix -T , $(KBUILD_LDS_MODULE)) \
+ $(shell [ -s $(@:.ko=.o.symversions) ] && \
+ echo -T $(@:.ko=.o.symversions)) \
+ -o $@ --whole-archive $(filter %.o, $^); \
+ $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true)
+else
cmd_ld_ko_o = \
$(LD) -r $(KBUILD_LDFLAGS) \
$(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \
$(addprefix -T , $(KBUILD_LDS_MODULE)) \
-o $@ $(filter %.o, $^); \
$(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true)
+endif
$(modules): %.ko: %.o %.mod.o $(KBUILD_LDS_MODULE) FORCE
+$(call if_changed,ld_ko_o)
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index 952fff485546..b8b447b62283 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -84,12 +84,32 @@ MODPOST += $(subst -i,-n,$(filter -i,$(MAKEFLAGS))) -s -T - $(wildcard vmlinux)
# find all modules listed in modules.order
modules := $(sort $(shell cat $(MODORDER)))
+# With CONFIG_LTO_CLANG, .o files might be LLVM IR, so we need to link them
+# into actual objects before passing them to modpost
+modpost-ext = $(if $(CONFIG_LTO_CLANG),.lto,)
+
+ifdef CONFIG_LTO_CLANG
+quiet_cmd_cc_lto_link_modules = LTO [M] $@
+cmd_cc_lto_link_modules = \
+ $(LD) $(ld_flags) -r -o $(@) \
+ $(shell [ -s $(@:$(modpost-ext).o=.o.symversions) ] && \
+ echo -T $(@:$(modpost-ext).o=.o.symversions)) \
+ --whole-archive $(filter-out FORCE,$^)
+
+$(modules:.ko=$(modpost-ext).o): %$(modpost-ext).o: %.o FORCE
+ $(call if_changed,cc_lto_link_modules)
+
+PHONY += FORCE
+FORCE:
+
+endif
+
# Read out modules.order instead of expanding $(modules) to pass in modpost.
# Otherwise, allmodconfig would fail with "Argument list too long".
quiet_cmd_modpost = MODPOST $(words $(modules)) modules
- cmd_modpost = sed 's/ko$$/o/' $(MODORDER) | $(MODPOST)
+ cmd_modpost = sed 's/\.ko$$/$(modpost-ext)\.o/' $(MODORDER) | $(MODPOST)
-__modpost:
+__modpost: $(modules:.ko=$(modpost-ext).o)
@$(kecho) ' Building modules, stage 2.'
$(call cmd,modpost)
ifneq ($(KBUILD_MODPOST_NOFINAL),1)
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 06495379fcd8..e589ce9b0acd 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -39,6 +39,30 @@ info()
fi
}
+# If CONFIG_LTO_CLANG is selected, collect generated symbol versions into
+# .tmp_symversions
+modversions()
+{
+ if [ -z "${CONFIG_LTO_CLANG}" ]; then
+ return
+ fi
+ if [ -z "${CONFIG_MODVERSIONS}" ]; then
+ return
+ fi
+
+ rm -f .tmp_symversions
+
+ for a in ${KBUILD_VMLINUX_OBJS} ${KBUILD_VMLINUX_LIBS}; do
+ for o in $(${AR} t $a 2>/dev/null); do
+ if [ -f ${o}.symversions ]; then
+ cat ${o}.symversions >> .tmp_symversions
+ fi
+ done
+ done
+
+ echo "-T .tmp_symversions"
+}
+
# Link of vmlinux.o used for section mismatch analysis
# ${1} output file
modpost_link()
@@ -52,7 +76,15 @@ modpost_link()
${KBUILD_VMLINUX_LIBS} \
--end-group"
- ${LD} ${KBUILD_LDFLAGS} -r -o ${1} ${objects}
+ if [ -n "${CONFIG_LTO_CLANG}" ]; then
+ # This might take a while, so indicate that we're doing
+ # an LTO link
+ info LTO ${1}
+ else
+ info LD ${1}
+ fi
+
+ ${LD} ${KBUILD_LDFLAGS} -r -o ${1} $(modversions) ${objects}
}
# Link of vmlinux
@@ -70,13 +102,22 @@ vmlinux_link()
shift
if [ "${SRCARCH}" != "um" ]; then
- objects="--whole-archive \
- ${KBUILD_VMLINUX_OBJS} \
- --no-whole-archive \
- --start-group \
- ${KBUILD_VMLINUX_LIBS} \
- --end-group \
- ${@}"
+ if [ -n "${CONFIG_LTO_CLANG}" ]; then
+ # Use vmlinux.o instead of performing the slow LTO
+ # link again.
+ objects="--whole-archive \
+ vmlinux.o \
+ --no-whole-archive \
+ ${@}"
+ else
+ objects="--whole-archive \
+ ${KBUILD_VMLINUX_OBJS} \
+ --no-whole-archive \
+ --start-group \
+ ${KBUILD_VMLINUX_LIBS} \
+ --end-group \
+ ${@}"
+ fi
${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} \
-o ${output} \
@@ -189,6 +230,7 @@ cleanup()
rm -f .btf.*
rm -f .tmp_System.map
rm -f .tmp_kallsyms*
+ rm -f .tmp_symversions
rm -f .tmp_vmlinux*
rm -f System.map
rm -f vmlinux
@@ -240,7 +282,6 @@ fi;
${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init
#link vmlinux.o
-info LD vmlinux.o
modpost_link vmlinux.o
# modpost vmlinux.o to check for section mismatches
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index d2a30a7b3f07..6ef098430b86 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -146,6 +146,9 @@ static struct module *new_module(const char *modname)
p[strlen(p) - 2] = '\0';
mod->is_dot_o = 1;
}
+ /* strip trailing .lto */
+ if (strends(p, ".lto"))
+ p[strlen(p) - 4] = '\0';
/* add to list */
mod->name = p;
@@ -2000,6 +2003,10 @@ static char *remove_dot(char *s)
size_t m = strspn(s + n + 1, "0123456789");
if (m && (s[n + m] == '.' || s[n + m] == 0))
s[n] = 0;
+
+ /* strip trailing .lto */
+ if (strends(s, ".lto"))
+ s[strlen(s) - 4] = '\0';
}
return s;
}