Merge 4.9.168 into android-4.9-o

Changes in 4.9.168
	arm64: debug: Don't propagate UNKNOWN FAR into si_code for debug signals
	arm64: debug: Ensure debug handlers check triggering exception level
	ext4: cleanup bh release code in ext4_ind_remove_space()
	lib/int_sqrt: optimize initial value compute
	tty/serial: atmel: Add is_half_duplex helper
	tty/serial: atmel: RS485 HD w/DMA: enable RX after TX is stopped
	mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified
	i2c: core-smbus: prevent stack corruption on read I2C_BLOCK_DATA
	CIFS: fix POSIX lock leak and invalid ptr deref
	h8300: use cc-cross-prefix instead of hardcoding h8300-unknown-linux-
	tracing: kdb: Fix ftdump to not sleep
	gpio: gpio-omap: fix level interrupt idling
	include/linux/relay.h: fix percpu annotation in struct rchan
	sysctl: handle overflow for file-max
	enic: fix build warning without CONFIG_CPUMASK_OFFSTACK
	scsi: hisi_sas: Set PHY linkrate when disconnected
	mm/cma.c: cma_declare_contiguous: correct err handling
	mm/page_ext.c: fix an imbalance with kmemleak
	mm/vmalloc.c: fix kernel BUG at mm/vmalloc.c:512!
	mm/slab.c: kmemleak no scan alien caches
	ocfs2: fix a panic problem caused by o2cb_ctl
	f2fs: do not use mutex lock in atomic context
	fs/file.c: initialize init_files.resize_wait
	cifs: use correct format characters
	dm thin: add sanity checks to thin-pool and external snapshot creation
	cifs: Fix NULL pointer dereference of devname
	jbd2: fix invalid descriptor block checksum
	fs: fix guard_bio_eod to check for real EOD errors
	tools lib traceevent: Fix buffer overflow in arg_eval
	wil6210: check null pointer in _wil_cfg80211_merge_extra_ies
	crypto: crypto4xx - add missing of_node_put after of_device_is_available
	usb: chipidea: Grab the (legacy) USB PHY by phandle first
	scsi: core: replace GFP_ATOMIC with GFP_KERNEL in scsi_scan.c
	coresight: etm4x: Add support to enable ETMv4.2
	ARM: 8840/1: use a raw_spinlock_t in unwind
	iommu/io-pgtable-arm-v7s: Only kmemleak_ignore L2 tables
	mmc: omap: fix the maximum timeout setting
	e1000e: Fix -Wformat-truncation warnings
	mlxsw: spectrum: Avoid -Wformat-truncation warnings
	IB/mlx4: Increase the timeout for CM cache
	scsi: megaraid_sas: return error when create DMA pool failed
	perf test: Fix failure of 'evsel-tp-sched' test on s390
	SoC: imx-sgtl5000: add missing put_device()
	media: sh_veu: Correct return type for mem2mem buffer helpers
	media: s5p-jpeg: Correct return type for mem2mem buffer helpers
	media: s5p-g2d: Correct return type for mem2mem buffer helpers
	media: mx2_emmaprp: Correct return type for mem2mem buffer helpers
	vfs: fix preadv64v2 and pwritev64v2 compat syscalls with offset == -1
	HID: intel-ish-hid: avoid binding wrong ishtp_cl_device
	leds: lp55xx: fix null deref on firmware load failure
	iwlwifi: pcie: fix emergency path
	ACPI / video: Refactor and fix dmi_is_desktop()
	kprobes: Prohibit probing on bsearch()
	ARM: 8833/1: Ensure that NEON code always compiles with Clang
	ALSA: PCM: check if ops are defined before suspending PCM
	usb: f_fs: Avoid crash due to out-of-scope stack ptr access
	bcache: fix input overflow to cache set sysfs file io_error_halflife
	bcache: fix input overflow to sequential_cutoff
	bcache: improve sysfs_strtoul_clamp()
	genirq: Avoid summation loops for /proc/stat
	iw_cxgb4: fix srqidx leak during connection abort
	fbdev: fbmem: fix memory access if logo is bigger than the screen
	cdrom: Fix race condition in cdrom_sysctl_register
	e1000e: fix cyclic resets at link up with active tx
	ASoC: fsl-asoc-card: fix object reference leaks in fsl_asoc_card_probe
	efi/memattr: Don't bail on zero VA if it equals the region's PA
	ARM: dts: lpc32xx: Remove leading 0x and 0s from bindings notation
	soc: qcom: gsbi: Fix error handling in gsbi_probe()
	mt7601u: bump supported EEPROM version
	ARM: avoid Cortex-A9 livelock on tight dmb loops
	tty: increase the default flip buffer limit to 2*640K
	powerpc/pseries: Perform full re-add of CPU for topology update post-migration
	media: mt9m111: set initial frame size other than 0x0
	hwrng: virtio - Avoid repeated init of completion
	soc/tegra: fuse: Fix illegal free of IO base address
	HID: intel-ish: ipc: handle PIMR before ish_wakeup also clear PISR busy_clear bit
	hpet: Fix missing '=' character in the __setup() code of hpet_mmap_enable
	dmaengine: imx-dma: fix warning comparison of distinct pointer types
	dmaengine: qcom_hidma: assign channel cookie correctly
	netfilter: physdev: relax br_netfilter dependency
	media: s5p-jpeg: Check for fmt_ver_flag when doing fmt enumeration
	regulator: act8865: Fix act8600_sudcdc_voltage_ranges setting
	drm/nouveau: Stop using drm_crtc_force_disable
	x86/build: Specify elf_i386 linker emulation explicitly for i386 objects
	selinux: do not override context on context mounts
	wlcore: Fix memory leak in case wl12xx_fetch_firmware failure
	x86/build: Mark per-CPU symbols as absolute explicitly for LLD
	dmaengine: tegra: avoid overflow of byte tracking
	drm/dp/mst: Configure no_stop_bit correctly for remote i2c xfers
	ACPI / video: Extend chassis-type detection with a "Lunch Box" check
	Linux 4.9.168

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
diff --git a/Documentation/ABI/testing/sysfs-class-dual-role-usb b/Documentation/ABI/testing/sysfs-class-dual-role-usb
new file mode 100644
index 0000000..a900fd7
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-dual-role-usb
@@ -0,0 +1,71 @@
+What:		/sys/class/dual_role_usb/.../
+Date:		June 2015
+Contact:	Badhri Jagan Sridharan<badhri@google.com>
+Description:
+		Provide a generic interface to monitor and change
+		the state of dual role usb ports. The name here
+		refers to the name mentioned in the
+		dual_role_phy_desc that is passed while registering
+		the dual_role_phy_intstance through
+		devm_dual_role_instance_register.
+
+What:           /sys/class/dual_role_usb/.../supported_modes
+Date:           June 2015
+Contact:        Badhri Jagan Sridharan<badhri@google.com>
+Description:
+		This is a static node, once initialized this
+		is not expected to change during runtime. "dfp"
+		refers to "downstream facing port" i.e. port can
+		only act as host. "ufp" refers to "upstream
+		facing port" i.e. port can only act as device.
+		"dfp ufp" refers to "dual role port" i.e. the port
+		can either be a host port or a device port.
+
+What:		/sys/class/dual_role_usb/.../mode
+Date:		June 2015
+Contact:	Badhri Jagan Sridharan<badhri@google.com>
+Description:
+		The mode node refers to the current mode in which the
+		port is operating. "dfp" for host ports. "ufp" for device
+		ports and "none" when cable is not connected.
+
+		On devices where the USB mode is software-controllable,
+		userspace can change the mode by writing "dfp" or "ufp".
+		On devices where the USB mode is fixed in hardware,
+		this attribute is read-only.
+
+What:		/sys/class/dual_role_usb/.../power_role
+Date:		June 2015
+Contact:	Badhri Jagan Sridharan<badhri@google.com>
+Description:
+		The power_role node mentions whether the port
+		is "sink"ing or "source"ing power. "none" if
+		they are not connected.
+
+		On devices implementing USB Power Delivery,
+		userspace can control the power role by writing "sink" or
+		"source". On devices without USB-PD, this attribute is
+		read-only.
+
+What:		/sys/class/dual_role_usb/.../data_role
+Date:		June 2015
+Contact:	Badhri Jagan Sridharan<badhri@google.com>
+Description:
+		The data_role node mentions whether the port
+		is acting as "host" or "device" for USB data connection.
+		"none" if there is no active data link.
+
+		On devices implementing USB Power Delivery, userspace
+		can control the data role by writing "host" or "device".
+		On devices without USB-PD, this attribute is read-only
+
+What:		/sys/class/dual_role_usb/.../powers_vconn
+Date:		June 2015
+Contact:	Badhri Jagan Sridharan<badhri@google.com>
+Description:
+		The powers_vconn node mentions whether the port
+		is supplying power for VCONN pin.
+
+		On devices with software control of VCONN,
+		userspace can disable the power supply to VCONN by writing "n",
+		or enable the power supply by writing "y".
diff --git a/Documentation/ABI/testing/sysfs-kernel-wakeup_reasons b/Documentation/ABI/testing/sysfs-kernel-wakeup_reasons
new file mode 100644
index 0000000..acb19b9
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-wakeup_reasons
@@ -0,0 +1,16 @@
+What:		/sys/kernel/wakeup_reasons/last_resume_reason
+Date:		February 2014
+Contact:	Ruchi Kandoi <kandoiruchi@google.com>
+Description:
+		The /sys/kernel/wakeup_reasons/last_resume_reason is
+		used to report wakeup reasons after system exited suspend.
+
+What:		/sys/kernel/wakeup_reasons/last_suspend_time
+Date:		March 2015
+Contact:	jinqian <jinqian@google.com>
+Description:
+		The /sys/kernel/wakeup_reasons/last_suspend_time is
+		used to report time spent in last suspend cycle. It contains
+		two numbers (in seconds) separated by space. First number is
+		the time spent in suspend and resume processes. Second number
+		is the time spent in sleep state.
\ No newline at end of file
diff --git a/Documentation/android.txt b/Documentation/android.txt
new file mode 100644
index 0000000..0f40a78
--- /dev/null
+++ b/Documentation/android.txt
@@ -0,0 +1,121 @@
+				=============
+				A N D R O I D
+				=============
+
+Copyright (C) 2009 Google, Inc.
+Written by Mike Chan <mike@android.com>
+
+CONTENTS:
+---------
+
+1. Android
+  1.1 Required enabled config options
+  1.2 Required disabled config options
+  1.3 Recommended enabled config options
+2. Contact
+
+
+1. Android
+==========
+
+Android (www.android.com) is an open source operating system for mobile devices.
+This document describes configurations needed to run the Android framework on
+top of the Linux kernel.
+
+To see a working defconfig look at msm_defconfig or goldfish_defconfig
+which can be found at http://android.git.kernel.org in kernel/common.git
+and kernel/msm.git
+
+
+1.1 Required enabled config options
+-----------------------------------
+After building a standard defconfig, ensure that these options are enabled in
+your .config or defconfig if they are not already. Based off the msm_defconfig.
+You should keep the rest of the default options enabled in the defconfig
+unless you know what you are doing.
+
+ANDROID_PARANOID_NETWORK
+ASHMEM
+CONFIG_FB_MODE_HELPERS
+CONFIG_FONT_8x16
+CONFIG_FONT_8x8
+CONFIG_YAFFS_SHORT_NAMES_IN_RAM
+DAB
+EARLYSUSPEND
+FB
+FB_CFB_COPYAREA
+FB_CFB_FILLRECT
+FB_CFB_IMAGEBLIT
+FB_DEFERRED_IO
+FB_TILEBLITTING
+HIGH_RES_TIMERS
+INOTIFY
+INOTIFY_USER
+INPUT_EVDEV
+INPUT_GPIO
+INPUT_MISC
+LEDS_CLASS
+LEDS_GPIO
+LOCK_KERNEL
+LkOGGER
+LOW_MEMORY_KILLER
+MISC_DEVICES
+NEW_LEDS
+NO_HZ
+POWER_SUPPLY
+PREEMPT
+RAMFS
+RTC_CLASS
+RTC_LIB
+SWITCH
+SWITCH_GPIO
+TMPFS
+UID_STAT
+UID16
+USB_FUNCTION
+USB_FUNCTION_ADB
+USER_WAKELOCK
+VIDEO_OUTPUT_CONTROL
+WAKELOCK
+YAFFS_AUTO_YAFFS2
+YAFFS_FS
+YAFFS_YAFFS1
+YAFFS_YAFFS2
+
+
+1.2 Required disabled config options
+------------------------------------
+CONFIG_YAFFS_DISABLE_LAZY_LOAD
+DNOTIFY
+
+
+1.3 Recommended enabled config options
+------------------------------
+ANDROID_PMEM
+PSTORE_CONSOLE
+PSTORE_RAM
+SCHEDSTATS
+DEBUG_PREEMPT
+DEBUG_MUTEXES
+DEBUG_SPINLOCK_SLEEP
+DEBUG_INFO
+FRAME_POINTER
+CPU_FREQ
+CPU_FREQ_TABLE
+CPU_FREQ_DEFAULT_GOV_ONDEMAND
+CPU_FREQ_GOV_ONDEMAND
+CRC_CCITT
+EMBEDDED
+INPUT_TOUCHSCREEN
+I2C
+I2C_BOARDINFO
+LOG_BUF_SHIFT=17
+SERIAL_CORE
+SERIAL_CORE_CONSOLE
+
+
+2. Contact
+==========
+website: http://android.git.kernel.org
+
+mailing-lists: android-kernel@googlegroups.com
diff --git a/Documentation/block/00-INDEX b/Documentation/block/00-INDEX
index e55103a..a542b9f 100644
--- a/Documentation/block/00-INDEX
+++ b/Documentation/block/00-INDEX
@@ -30,3 +30,9 @@
 	- Switching I/O schedulers at runtime
 writeback_cache_control.txt
 	- Control of volatile write back caches
+mmc-max-speed.txt
+	- eMMC layer speed simulation, related to /sys/block/mmcblk*/
+          attributes:
+            max_read_speed
+            max_write_speed
+            cache_size
diff --git a/Documentation/block/mmc-max-speed.txt b/Documentation/block/mmc-max-speed.txt
new file mode 100644
index 0000000..3f052b9
--- /dev/null
+++ b/Documentation/block/mmc-max-speed.txt
@@ -0,0 +1,38 @@
+eMMC Block layer simulation speed controls in /sys/block/mmcblk*/
+===============================================
+
+Turned on with CONFIG_MMC_SIMULATE_MAX_SPEED which enables MMC device speed
+limiting. Used to test and simulate the behavior of the system when
+confronted with a slow MMC.
+
+Enables max_read_speed, max_write_speed and cache_size attributes and module
+default parameters to control the write or read maximum KB/second speed
+behaviors.
+
+NB: There is room for improving the algorithm for aspects tied directly to
+eMMC specific behavior. For instance, wear leveling and stalls from an
+exhausted erase pool. We would expect that if there was a need to provide
+similar speed simulation controls to other types of block devices, aspects of
+their behavior are modelled separately (e.g. head seek times, heat assist,
+shingling and rotational latency).
+
+/sys/block/mmcblk0/max_read_speed:
+
+Number of KB/second reads allowed to the block device. Used to test and
+simulate the behavior of the system when confronted with a slow reading MMC.
+Set to 0 or "off" to place no speed limit.
+
+/sys/block/mmcblk0/max_write_speed:
+
+Number of KB/second writes allowed to the block device. Used to test and
+simulate the behavior of the system when confronted with a slow writing MMC.
+Set to 0 or "off" to place no speed limit.
+
+/sys/block/mmcblk0/cache_size:
+
+Number of MB of high speed memory or high speed SLC cache expected on the
+eMMC device being simulated. Used to help simulate the write-back behavior
+more accurately. The assumption is the cache has no delay, but draws down
+in the background to the MLC/TLC primary store at the max_write_speed rate.
+Any write speed delays will show up when the cache is full, or when an I/O
+request to flush is issued.
diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt
index c15aa75..0cf9a6b 100644
--- a/Documentation/cpu-freq/governors.txt
+++ b/Documentation/cpu-freq/governors.txt
@@ -28,6 +28,7 @@
 2.3  Userspace
 2.4  Ondemand
 2.5  Conservative
+2.6  Interactive
 
 3.   The Governor Interface in the CPUfreq Core
 
@@ -218,6 +219,91 @@
 speed. Load for frequency increase is still evaluated every
 sampling rate.
 
+2.6 Interactive
+---------------
+
+The CPUfreq governor "interactive" is designed for latency-sensitive,
+interactive workloads. This governor sets the CPU speed depending on
+usage, similar to "ondemand" and "conservative" governors, but with a
+different set of configurable behaviors.
+
+The tunable values for this governor are:
+
+above_hispeed_delay: When speed is at or above hispeed_freq, wait for
+this long before raising speed in response to continued high load.
+The format is a single delay value, optionally followed by pairs of
+CPU speeds and the delay to use at or above those speeds.  Colons can
+be used between the speeds and associated delays for readability.  For
+example:
+
+   80000 1300000:200000 1500000:40000
+
+uses delay 80000 uS until CPU speed 1.3 GHz, at which speed delay
+200000 uS is used until speed 1.5 GHz, at which speed (and above)
+delay 40000 uS is used.  If speeds are specified these must appear in
+ascending order.  Default is 20000 uS.
+
+boost: If non-zero, immediately boost speed of all CPUs to at least
+hispeed_freq until zero is written to this attribute.  If zero, allow
+CPU speeds to drop below hispeed_freq according to load as usual.
+Default is zero.
+
+boostpulse: On each write, immediately boost speed of all CPUs to
+hispeed_freq for at least the period of time specified by
+boostpulse_duration, after which speeds are allowed to drop below
+hispeed_freq according to load as usual. Its a write-only file.
+
+boostpulse_duration: Length of time to hold CPU speed at hispeed_freq
+on a write to boostpulse, before allowing speed to drop according to
+load as usual.  Default is 80000 uS.
+
+go_hispeed_load: The CPU load at which to ramp to hispeed_freq.
+Default is 99%.
+
+hispeed_freq: An intermediate "high speed" at which to initially ramp
+when CPU load hits the value specified in go_hispeed_load.  If load
+stays high for the amount of time specified in above_hispeed_delay,
+then speed may be bumped higher.  Default is the maximum speed allowed
+by the policy at governor initialization time.
+
+io_is_busy: If set, the governor accounts IO time as CPU busy time.
+
+min_sample_time: The minimum amount of time to spend at the current
+frequency before ramping down. Default is 80000 uS.
+
+target_loads: CPU load values used to adjust speed to influence the
+current CPU load toward that value.  In general, the lower the target
+load, the more often the governor will raise CPU speeds to bring load
+below the target.  The format is a single target load, optionally
+followed by pairs of CPU speeds and CPU loads to target at or above
+those speeds.  Colons can be used between the speeds and associated
+target loads for readability.  For example:
+
+   85 1000000:90 1700000:99
+
+targets CPU load 85% below speed 1GHz, 90% at or above 1GHz, until
+1.7GHz and above, at which load 99% is targeted.  If speeds are
+specified these must appear in ascending order.  Higher target load
+values are typically specified for higher speeds, that is, target load
+values also usually appear in an ascending order. The default is
+target load 90% for all speeds.
+
+timer_rate: Sample rate for reevaluating CPU load when the CPU is not
+idle.  A deferrable timer is used, such that the CPU will not be woken
+from idle to service this timer until something else needs to run.
+(The maximum time to allow deferring this timer when not running at
+minimum speed is configurable via timer_slack.)  Default is 20000 uS.
+
+timer_slack: Maximum additional time to defer handling the governor
+sampling timer beyond timer_rate when running at speeds above the
+minimum.  For platforms that consume additional power at idle when
+CPUs are running at speeds greater than minimum, this places an upper
+bound on how long the timer will be deferred prior to re-evaluating
+load and dropping speed.  For example, if timer_rate is 20000uS and
+timer_slack is 10000uS then timers will be deferred for up to 30msec
+when not at lowest speed.  A value of -1 means defer timers
+indefinitely at all speeds.  Default is 80000 uS.
+
 3. The Governor Interface in the CPUfreq Core
 =============================================
 
diff --git a/Documentation/device-mapper/boot.txt b/Documentation/device-mapper/boot.txt
new file mode 100644
index 0000000..adcaad5
--- /dev/null
+++ b/Documentation/device-mapper/boot.txt
@@ -0,0 +1,42 @@
+Boot time creation of mapped devices
+===================================
+
+It is possible to configure a device mapper device to act as the root
+device for your system in two ways.
+
+The first is to build an initial ramdisk which boots to a minimal
+userspace which configures the device, then pivot_root(8) in to it.
+
+For simple device mapper configurations, it is possible to boot directly
+using the following kernel command line:
+
+dm="<name> <uuid> <ro>,table line 1,...,table line n"
+
+name = the name to associate with the device
+	after boot, udev, if used, will use that name to label
+	the device node.
+uuid = may be 'none' or the UUID desired for the device.
+ro = may be "ro" or "rw".  If "ro", the device and device table will be
+	marked read-only.
+
+Each table line may be as normal when using the dmsetup tool except for
+two variations:
+1. Any use of commas will be interpreted as a newline
+2. Quotation marks cannot be escaped and cannot be used without
+   terminating the dm= argument.
+
+Unless renamed by udev, the device node created will be dm-0 as the
+first minor number for the device-mapper is used during early creation.
+
+Example
+=======
+
+- Booting to a linear array made up of user-mode linux block devices:
+
+  dm="lroot none 0, 0 4096 linear 98:16 0, 4096 4096 linear 98:32 0" \
+  root=/dev/dm-0
+
+Will boot to a rw dm-linear target of 8192 sectors split across two
+block devices identified by their major:minor numbers.  After boot, udev
+will rename this target to /dev/mapper/lroot (depending on the rules).
+No uuid was assigned.
diff --git a/Documentation/devicetree/bindings/misc/memory-state-time.txt b/Documentation/devicetree/bindings/misc/memory-state-time.txt
new file mode 100644
index 0000000..c99a506
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/memory-state-time.txt
@@ -0,0 +1,8 @@
+Memory bandwidth and frequency state tracking
+
+Required properties:
+- compatible : should be:
+       "memory-state-time"
+- freq-tbl: Should contain entries with each frequency in Hz.
+- bw-buckets: Should contain upper-bound limits for each bandwidth bucket in Mbps.
+       Must match the framework power_profile.xml for the device.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 077c37e..843a4c8 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -392,6 +392,8 @@
  [stack]                  = the stack of the main process
  [vdso]                   = the "virtual dynamic shared object",
                             the kernel system call handler
+ [anon:<name>]            = an anonymous mapping that has been
+                            named by userspace
 
  or if empty, the mapping is anonymous.
 
@@ -419,6 +421,7 @@
 MMUPageSize:           4 kB
 Locked:                0 kB
 VmFlags: rd ex mr mw me dw
+Name:           name from userspace
 
 the first of these lines shows the same information as is displayed for the
 mapping in /proc/PID/maps.  The remaining lines show the size of the mapping
@@ -488,6 +491,9 @@
 might change in future as well. So each consumer of these flags has to
 follow each specific kernel version for the exact semantic.
 
+The "Name" field will only be present on a mapping that has been named by
+userspace, and will show the name passed in by userspace.
+
 This file is only present if the CONFIG_MMU kernel configuration option is
 enabled.
 
diff --git a/Documentation/gpu/drm-kms.rst b/Documentation/gpu/drm-kms.rst
index 53b872c..db86cda 100644
--- a/Documentation/gpu/drm-kms.rst
+++ b/Documentation/gpu/drm-kms.rst
@@ -308,6 +308,12 @@
 .. kernel-doc:: drivers/gpu/drm/drm_color_mgmt.c
    :export:
 
+Explicit Fencing Properties
+---------------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_atomic.c
+   :doc: explicit fencing properties
+
 Existing KMS Properties
 -----------------------
 
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index c708a50..0286914 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -87,6 +87,7 @@
 	BLACKFIN Blackfin architecture is enabled.
 	CLK	Common clock infrastructure is enabled.
 	CMA	Contiguous Memory Area support is enabled.
+	DM	Device mapper support is enabled.
 	DRM	Direct Rendering Management support is enabled.
 	DYNAMIC_DEBUG Build in debug messages and enable them at runtime
 	EDD	BIOS Enhanced Disk Drive Services (EDD) is enabled
@@ -1034,6 +1035,11 @@
 
 	dis_ucode_ldr	[X86] Disable the microcode loader.
 
+	dm=		[DM] Allows early creation of a device-mapper device.
+			See Documentation/device-mapper/boot.txt.
+
+	dmasound=	[HW,OSS] Sound subsystem buff
+
 	dma_debug=off	If the kernel is compiled with DMA_API_DEBUG support,
 			this option disables the debugging code at boot.
 
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index dbdc413..8b93b3e 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -600,6 +600,16 @@
 	Note that that additional client or server features are only
 	effective if the basic support (0x1 and 0x2) are enabled respectively.
 
+tcp_fwmark_accept - BOOLEAN
+	If set, incoming connections to listening sockets that do not have a
+	socket mark will set the mark of the accepting socket to the fwmark of
+	the incoming SYN packet. This will cause all packets on that connection
+	(starting from the first SYNACK) to be sent with that fwmark. The
+	listening socket's mark is unchanged. Listening sockets that already
+	have a fwmark set via setsockopt(SOL_SOCKET, SO_MARK, ...) are
+	unaffected.
+	Default: 0
+
 tcp_syn_retries - INTEGER
 	Number of times initial SYNs for an active TCP connection attempt
 	will be retransmitted. Should not be higher than 127. Default value
@@ -1439,11 +1449,20 @@
 	Functional default: enabled if accept_ra is enabled.
 			    disabled if accept_ra is disabled.
 
+accept_ra_rt_info_min_plen - INTEGER
+	Minimum prefix length of Route Information in RA.
+
+	Route Information w/ prefix smaller than this variable shall
+	be ignored.
+
+	Functional default: 0 if accept_ra_rtr_pref is enabled.
+			    -1 if accept_ra_rtr_pref is disabled.
+
 accept_ra_rt_info_max_plen - INTEGER
 	Maximum prefix length of Route Information in RA.
 
-	Route Information w/ prefix larger than or equal to this
-	variable shall be ignored.
+	Route Information w/ prefix larger than this variable shall
+	be ignored.
 
 	Functional default: 0 if accept_ra_rtr_pref is enabled.
 			    -1 if accept_ra_rtr_pref is disabled.
diff --git a/Documentation/scheduler/sched-energy.txt b/Documentation/scheduler/sched-energy.txt
new file mode 100644
index 0000000..dab2f90
--- /dev/null
+++ b/Documentation/scheduler/sched-energy.txt
@@ -0,0 +1,362 @@
+Energy cost model for energy-aware scheduling (EXPERIMENTAL)
+
+Introduction
+=============
+
+The basic energy model uses platform energy data stored in sched_group_energy
+data structures attached to the sched_groups in the sched_domain hierarchy. The
+energy cost model offers two functions that can be used to guide scheduling
+decisions:
+
+1.	static unsigned int sched_group_energy(struct energy_env *eenv)
+2.	static int energy_diff(struct energy_env *eenv)
+
+sched_group_energy() estimates the energy consumed by all cpus in a specific
+sched_group including any shared resources owned exclusively by this group of
+cpus. Resources shared with other cpus are excluded (e.g. later level caches).
+
+energy_diff() estimates the total energy impact of a utilization change. That
+is, adding, removing, or migrating utilization (tasks).
+
+Both functions use a struct energy_env to specify the scenario to be evaluated:
+
+	struct energy_env {
+		struct sched_group      *sg_top;
+		struct sched_group      *sg_cap;
+		int                     cap_idx;
+		int                     util_delta;
+		int                     src_cpu;
+		int                     dst_cpu;
+		int                     energy;
+	};
+
+sg_top: sched_group to be evaluated. Not used by energy_diff().
+
+sg_cap: sched_group covering the cpus in the same frequency domain. Set by
+sched_group_energy().
+
+cap_idx: Capacity state to be used for energy calculations. Set by
+find_new_capacity().
+
+util_delta: Amount of utilization to be added, removed, or migrated.
+
+src_cpu: Source cpu from where 'util_delta' utilization is removed. Should be
+-1 if no source (e.g. task wake-up).
+
+dst_cpu: Destination cpu where 'util_delta' utilization is added. Should be -1
+if utilization is removed (e.g. terminating tasks).
+
+energy: Result of sched_group_energy().
+
+The metric used to represent utilization is the actual per-entity running time
+averaged over time using a geometric series. Very similar to the existing
+per-entity load-tracking, but _not_ scaled by task priority and capped by the
+capacity of the cpu. The latter property does mean that utilization may
+underestimate the compute requirements for task on fully/over utilized cpus.
+The greatest potential for energy savings without affecting performance too much
+is scenarios where the system isn't fully utilized. If the system is deemed
+fully utilized load-balancing should be done with task load (includes task
+priority) instead in the interest of fairness and performance.
+
+
+Background and Terminology
+===========================
+
+To make it clear from the start:
+
+energy = [joule] (resource like a battery on powered devices)
+power = energy/time = [joule/second] = [watt]
+
+The goal of energy-aware scheduling is to minimize energy, while still getting
+the job done. That is, we want to maximize:
+
+	performance [inst/s]
+	--------------------
+	    power [W]
+
+which is equivalent to minimizing:
+
+	energy [J]
+	-----------
+	instruction
+
+while still getting 'good' performance. It is essentially an alternative
+optimization objective to the current performance-only objective for the
+scheduler. This alternative considers two objectives: energy-efficiency and
+performance. Hence, there needs to be a user controllable knob to switch the
+objective. Since it is early days, this is currently a sched_feature
+(ENERGY_AWARE).
+
+The idea behind introducing an energy cost model is to allow the scheduler to
+evaluate the implications of its decisions rather than applying energy-saving
+techniques blindly that may only have positive effects on some platforms. At
+the same time, the energy cost model must be as simple as possible to minimize
+the scheduler latency impact.
+
+Platform topology
+------------------
+
+The system topology (cpus, caches, and NUMA information, not peripherals) is
+represented in the scheduler by the sched_domain hierarchy which has
+sched_groups attached at each level that covers one or more cpus (see
+sched-domains.txt for more details). To add energy awareness to the scheduler
+we need to consider power and frequency domains.
+
+Power domain:
+
+A power domain is a part of the system that can be powered on/off
+independently. Power domains are typically organized in a hierarchy where you
+may be able to power down just a cpu or a group of cpus along with any
+associated resources (e.g.  shared caches). Powering up a cpu means that all
+power domains it is a part of in the hierarchy must be powered up. Hence, it is
+more expensive to power up the first cpu that belongs to a higher level power
+domain than powering up additional cpus in the same high level domain. Two
+level power domain hierarchy example:
+
+		Power source
+		         +-------------------------------+----...
+per group PD		 G                               G
+		         |           +----------+        |
+		    +--------+-------| Shared   |  (other groups)
+per-cpu PD	    G        G       | resource |
+		    |        |       +----------+
+		+-------+ +-------+
+		| CPU 0 | | CPU 1 |
+		+-------+ +-------+
+
+Frequency domain:
+
+Frequency domains (P-states) typically cover the same group of cpus as one of
+the power domain levels. That is, there might be several smaller power domains
+sharing the same frequency (P-state) or there might be a power domain spanning
+multiple frequency domains.
+
+From a scheduling point of view there is no need to know the actual frequencies
+[Hz]. All the scheduler cares about is the compute capacity available at the
+current state (P-state) the cpu is in and any other available states. For that
+reason, and to also factor in any cpu micro-architecture differences, compute
+capacity scaling states are called 'capacity states' in this document. For SMP
+systems this is equivalent to P-states. For mixed micro-architecture systems
+(like ARM big.LITTLE) it is P-states scaled according to the micro-architecture
+performance relative to the other cpus in the system.
+
+Energy modelling:
+------------------
+
+Due to the hierarchical nature of the power domains, the most obvious way to
+model energy costs is therefore to associate power and energy costs with
+domains (groups of cpus). Energy costs of shared resources are associated with
+the group of cpus that share the resources, only the cost of powering the
+cpu itself and any private resources (e.g. private L1 caches) is associated
+with the per-cpu groups (lowest level).
+
+For example, for an SMP system with per-cpu power domains and a cluster level
+(group of cpus) power domain we get the overall energy costs to be:
+
+	energy = energy_cluster + n * energy_cpu
+
+where 'n' is the number of cpus powered up and energy_cluster is the cost paid
+as soon as any cpu in the cluster is powered up.
+
+The power and frequency domains can naturally be mapped onto the existing
+sched_domain hierarchy and sched_groups by adding the necessary data to the
+existing data structures.
+
+The energy model considers energy consumption from two contributors (shown in
+the illustration below):
+
+1. Busy energy: Energy consumed while a cpu and the higher level groups that it
+belongs to are busy running tasks. Busy energy is associated with the state of
+the cpu, not an event. The time the cpu spends in this state varies. Thus, the
+most obvious platform parameter for this contribution is busy power
+(energy/time).
+
+2. Idle energy: Energy consumed while a cpu and higher level groups that it
+belongs to are idle (in a C-state). Like busy energy, idle energy is associated
+with the state of the cpu. Thus, the platform parameter for this contribution
+is idle power (energy/time).
+
+Energy consumed during transitions from an idle-state (C-state) to a busy state
+(P-state) or going the other way is ignored by the model to simplify the energy
+model calculations.
+
+
+	Power
+	^
+	|            busy->idle             idle->busy
+	|            transition             transition
+	|
+	|                _                      __
+	|               / \                    /  \__________________
+	|______________/   \                  /
+	|                   \                /
+	|  Busy              \    Idle      /        Busy
+	|  low P-state        \____________/         high P-state
+	|
+	+------------------------------------------------------------> time
+
+Busy    |--------------|                          |-----------------|
+
+Wakeup                 |------|            |------|
+
+Idle                          |------------|
+
+
+The basic algorithm
+====================
+
+The basic idea is to determine the total energy impact when utilization is
+added or removed by estimating the impact at each level in the sched_domain
+hierarchy starting from the bottom (sched_group contains just a single cpu).
+The energy cost comes from busy time (sched_group is awake because one or more
+cpus are busy) and idle time (in an idle-state). Energy model numbers account
+for energy costs associated with all cpus in the sched_group as a group.
+
+	for_each_domain(cpu, sd) {
+		sg = sched_group_of(cpu)
+		energy_before = curr_util(sg) * busy_power(sg)
+				+ (1-curr_util(sg)) * idle_power(sg)
+		energy_after = new_util(sg) * busy_power(sg)
+				+ (1-new_util(sg)) * idle_power(sg)
+		energy_diff += energy_before - energy_after
+
+	}
+
+	return energy_diff
+
+{curr, new}_util: The cpu utilization at the lowest level and the overall
+non-idle time for the entire group for higher levels. Utilization is in the
+range 0.0 to 1.0 in the pseudo-code.
+
+busy_power: The power consumption of the sched_group.
+
+idle_power: The power consumption of the sched_group when idle.
+
+Note: It is a fundamental assumption that the utilization is (roughly) scale
+invariant. Task utilization tracking factors in any frequency scaling and
+performance scaling differences due to difference cpu microarchitectures such
+that task utilization can be used across the entire system.
+
+
+Platform energy data
+=====================
+
+struct sched_group_energy can be attached to sched_groups in the sched_domain
+hierarchy and has the following members:
+
+cap_states:
+	List of struct capacity_state representing the supported capacity states
+	(P-states). struct capacity_state has two members: cap and power, which
+	represents the compute capacity and the busy_power of the state. The
+	list must be ordered by capacity low->high.
+
+nr_cap_states:
+	Number of capacity states in cap_states list.
+
+idle_states:
+	List of struct idle_state containing idle_state power cost for each
+	idle-state supported by the system orderd by shallowest state first.
+	All states must be included at all level in the hierarchy, i.e. a
+	sched_group spanning just a single cpu must also include coupled
+	idle-states (cluster states). In addition to the cpuidle idle-states,
+	the list must also contain an entry for the idling using the arch
+	default idle (arch_idle_cpu()). Despite this state may not be a true
+	hardware idle-state it is considered the shallowest idle-state in the
+	energy model and must be the first entry. cpus may enter this state
+	(possibly 'active idling') if cpuidle decides not enter a cpuidle
+	idle-state. Default idle may not be used when cpuidle is enabled.
+	In this case, it should just be a copy of the first cpuidle idle-state.
+
+nr_idle_states:
+	Number of idle states in idle_states list.
+
+There are no unit requirements for the energy cost data. Data can be normalized
+with any reference, however, the normalization must be consistent across all
+energy cost data. That is, one bogo-joule/watt must be the same quantity for
+data, but we don't care what it is.
+
+A recipe for platform characterization
+=======================================
+
+Obtaining the actual model data for a particular platform requires some way of
+measuring power/energy. There isn't a tool to help with this (yet). This
+section provides a recipe for use as reference. It covers the steps used to
+characterize the ARM TC2 development platform. This sort of measurements is
+expected to be done anyway when tuning cpuidle and cpufreq for a given
+platform.
+
+The energy model needs two types of data (struct sched_group_energy holds
+these) for each sched_group where energy costs should be taken into account:
+
+1. Capacity state information
+
+A list containing the compute capacity and power consumption when fully
+utilized attributed to the group as a whole for each available capacity state.
+At the lowest level (group contains just a single cpu) this is the power of the
+cpu alone without including power consumed by resources shared with other cpus.
+It basically needs to fit the basic modelling approach described in "Background
+and Terminology" section:
+
+	energy_system = energy_shared + n * energy_cpu
+
+for a system containing 'n' busy cpus. Only 'energy_cpu' should be included at
+the lowest level. 'energy_shared' is included at the next level which
+represents the group of cpus among which the resources are shared.
+
+This model is, of course, a simplification of reality. Thus, power/energy
+attributions might not always exactly represent how the hardware is designed.
+Also, busy power is likely to depend on the workload. It is therefore
+recommended to use a representative mix of workloads when characterizing the
+capacity states.
+
+If the group has no capacity scaling support, the list will contain a single
+state where power is the busy power attributed to the group. The capacity
+should be set to a default value (1024).
+
+When frequency domains include multiple power domains, the group representing
+the frequency domain and all child groups share capacity states. This must be
+indicated by setting the SD_SHARE_CAP_STATES sched_domain flag. All groups at
+all levels that share the capacity state must have the list of capacity states
+with the power set to the contribution of the individual group.
+
+2. Idle power information
+
+Stored in the idle_states list. The power number is the group idle power
+consumption in each idle state as well when the group is idle but has not
+entered an idle-state ('active idle' as mentioned earlier). Due to the way the
+energy model is defined, the idle power of the deepest group idle state can
+alternatively be accounted for in the parent group busy power. In that case the
+group idle state power values are offset such that the idle power of the
+deepest state is zero. It is less intuitive, but it is easier to measure as
+idle power consumed by the group and the busy/idle power of the parent group
+cannot be distinguished without per group measurement points.
+
+Measuring capacity states and idle power:
+
+The capacity states' capacity and power can be estimated by running a benchmark
+workload at each available capacity state. By restricting the benchmark to run
+on subsets of cpus it is possible to extrapolate the power consumption of
+shared resources.
+
+ARM TC2 has two clusters of two and three cpus respectively. Each cluster has a
+shared L2 cache. TC2 has on-chip energy counters per cluster. Running a
+benchmark workload on just one cpu in a cluster means that power is consumed in
+the cluster (higher level group) and a single cpu (lowest level group). Adding
+another benchmark task to another cpu increases the power consumption by the
+amount consumed by the additional cpu. Hence, it is possible to extrapolate the
+cluster busy power.
+
+For platforms that don't have energy counters or equivalent instrumentation
+built-in, it may be possible to use an external DAQ to acquire similar data.
+
+If the benchmark includes some performance score (for example sysbench cpu
+benchmark), this can be used to record the compute capacity.
+
+Measuring idle power requires insight into the idle state implementation on the
+particular platform. Specifically, if the platform has coupled idle-states (or
+package states). To measure non-coupled per-cpu idle-states it is necessary to
+keep one cpu busy to keep any shared resources alive to isolate the idle power
+of the cpu from idle/busy power of the shared resources. The cpu can be tricked
+into different per-cpu idle states by disabling the other states. Based on
+various combinations of measurements with specific cpus busy and disabling
+idle-states it is possible to extrapolate the idle-state power.
diff --git a/Documentation/scheduler/sched-tune.txt b/Documentation/scheduler/sched-tune.txt
new file mode 100644
index 0000000..9bd2231
--- /dev/null
+++ b/Documentation/scheduler/sched-tune.txt
@@ -0,0 +1,366 @@
+             Central, scheduler-driven, power-performance control
+                               (EXPERIMENTAL)
+
+Abstract
+========
+
+The topic of a single simple power-performance tunable, that is wholly
+scheduler centric, and has well defined and predictable properties has come up
+on several occasions in the past [1,2]. With techniques such as a scheduler
+driven DVFS [3], we now have a good framework for implementing such a tunable.
+This document describes the overall ideas behind its design and implementation.
+
+
+Table of Contents
+=================
+
+1. Motivation
+2. Introduction
+3. Signal Boosting Strategy
+4. OPP selection using boosted CPU utilization
+5. Per task group boosting
+6. Question and Answers
+   - What about "auto" mode?
+   - What about boosting on a congested system?
+   - How CPUs are boosted when we have tasks with multiple boost values?
+7. References
+
+
+1. Motivation
+=============
+
+Sched-DVFS [3] is a new event-driven cpufreq governor which allows the
+scheduler to select the optimal DVFS operating point (OPP) for running a task
+allocated to a CPU. The introduction of sched-DVFS enables running workloads at
+the most energy efficient OPPs.
+
+However, sometimes it may be desired to intentionally boost the performance of
+a workload even if that could imply a reasonable increase in energy
+consumption. For example, in order to reduce the response time of a task, we
+may want to run the task at a higher OPP than the one that is actually required
+by it's CPU bandwidth demand.
+
+This last requirement is especially important if we consider that one of the
+main goals of the sched-DVFS component is to replace all currently available
+CPUFreq policies. Since sched-DVFS is event based, as opposed to the sampling
+driven governors we currently have, it is already more responsive at selecting
+the optimal OPP to run tasks allocated to a CPU. However, just tracking the
+actual task load demand may not be enough from a performance standpoint.  For
+example, it is not possible to get behaviors similar to those provided by the
+"performance" and "interactive" CPUFreq governors.
+
+This document describes an implementation of a tunable, stacked on top of the
+sched-DVFS which extends its functionality to support task performance
+boosting.
+
+By "performance boosting" we mean the reduction of the time required to
+complete a task activation, i.e. the time elapsed from a task wakeup to its
+next deactivation (e.g. because it goes back to sleep or it terminates).  For
+example, if we consider a simple periodic task which executes the same workload
+for 5[s] every 20[s] while running at a certain OPP, a boosted execution of
+that task must complete each of its activations in less than 5[s].
+
+A previous attempt [5] to introduce such a boosting feature has not been
+successful mainly because of the complexity of the proposed solution.  The
+approach described in this document exposes a single simple interface to
+user-space.  This single tunable knob allows the tuning of system wide
+scheduler behaviours ranging from energy efficiency at one end through to
+incremental performance boosting at the other end.  This first tunable affects
+all tasks. However, a more advanced extension of the concept is also provided
+which uses CGroups to boost the performance of only selected tasks while using
+the energy efficient default for all others.
+
+The rest of this document introduces in more details the proposed solution
+which has been named SchedTune.
+
+
+2. Introduction
+===============
+
+SchedTune exposes a simple user-space interface with a single power-performance
+tunable:
+
+  /proc/sys/kernel/sched_cfs_boost
+
+This permits expressing a boost value as an integer in the range [0..100].
+
+A value of 0 (default) configures the CFS scheduler for maximum energy
+efficiency. This means that sched-DVFS runs the tasks at the minimum OPP
+required to satisfy their workload demand.
+A value of 100 configures scheduler for maximum performance, which translates
+to the selection of the maximum OPP on that CPU.
+
+The range between 0 and 100 can be set to satisfy other scenarios suitably. For
+example to satisfy interactive response or depending on other system events
+(battery level etc).
+
+A CGroup based extension is also provided, which permits further user-space
+defined task classification to tune the scheduler for different goals depending
+on the specific nature of the task, e.g. background vs interactive vs
+low-priority.
+
+The overall design of the SchedTune module is built on top of "Per-Entity Load
+Tracking" (PELT) signals and sched-DVFS by introducing a bias on the Operating
+Performance Point (OPP) selection.
+Each time a task is allocated on a CPU, sched-DVFS has the opportunity to tune
+the operating frequency of that CPU to better match the workload demand. The
+selection of the actual OPP being activated is influenced by the global boost
+value, or the boost value for the task CGroup when in use.
+
+This simple biasing approach leverages existing frameworks, which means minimal
+modifications to the scheduler, and yet it allows to achieve a range of
+different behaviours all from a single simple tunable knob.
+The only new concept introduced is that of signal boosting.
+
+
+3. Signal Boosting Strategy
+===========================
+
+The whole PELT machinery works based on the value of a few load tracking signals
+which basically track the CPU bandwidth requirements for tasks and the capacity
+of CPUs. The basic idea behind the SchedTune knob is to artificially inflate
+some of these load tracking signals to make a task or RQ appears more demanding
+that it actually is.
+
+Which signals have to be inflated depends on the specific "consumer".  However,
+independently from the specific (signal, consumer) pair, it is important to
+define a simple and possibly consistent strategy for the concept of boosting a
+signal.
+
+A boosting strategy defines how the "abstract" user-space defined
+sched_cfs_boost value is translated into an internal "margin" value to be added
+to a signal to get its inflated value:
+
+  margin         := boosting_strategy(sched_cfs_boost, signal)
+  boosted_signal := signal + margin
+
+Different boosting strategies were identified and analyzed before selecting the
+one found to be most effective.
+
+Signal Proportional Compensation (SPC)
+--------------------------------------
+
+In this boosting strategy the sched_cfs_boost value is used to compute a
+margin which is proportional to the complement of the original signal.
+When a signal has a maximum possible value, its complement is defined as
+the delta from the actual value and its possible maximum.
+
+Since the tunable implementation uses signals which have SCHED_LOAD_SCALE as
+the maximum possible value, the margin becomes:
+
+	margin := sched_cfs_boost * (SCHED_LOAD_SCALE - signal)
+
+Using this boosting strategy:
+- a 100% sched_cfs_boost means that the signal is scaled to the maximum value
+- each value in the range of sched_cfs_boost effectively inflates the signal in
+  question by a quantity which is proportional to the maximum value.
+
+For example, by applying the SPC boosting strategy to the selection of the OPP
+to run a task it is possible to achieve these behaviors:
+
+-   0% boosting: run the task at the minimum OPP required by its workload
+- 100% boosting: run the task at the maximum OPP available for the CPU
+-  50% boosting: run at the half-way OPP between minimum and maximum
+
+Which means that, at 50% boosting, a task will be scheduled to run at half of
+the maximum theoretically achievable performance on the specific target
+platform.
+
+A graphical representation of an SPC boosted signal is represented in the
+following figure where:
+ a) "-" represents the original signal
+ b) "b" represents a  50% boosted signal
+ c) "p" represents a 100% boosted signal
+
+
+   ^
+   |  SCHED_LOAD_SCALE
+   +-----------------------------------------------------------------+
+   |pppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppp
+   |
+   |                                             boosted_signal
+   |                                          bbbbbbbbbbbbbbbbbbbbbbbb
+   |
+   |                                            original signal
+   |                  bbbbbbbbbbbbbbbbbbbbbbbb+----------------------+
+   |                                          |
+   |bbbbbbbbbbbbbbbbbb                        |
+   |                                          |
+   |                                          |
+   |                                          |
+   |                  +-----------------------+
+   |                  |
+   |                  |
+   |                  |
+   |------------------+
+   |
+   |
+   +----------------------------------------------------------------------->
+
+The plot above shows a ramped load signal (titled 'original_signal') and it's
+boosted equivalent. For each step of the original signal the boosted signal
+corresponding to a 50% boost is midway from the original signal and the upper
+bound. Boosting by 100% generates a boosted signal which is always saturated to
+the upper bound.
+
+
+4. OPP selection using boosted CPU utilization
+==============================================
+
+It is worth calling out that the implementation does not introduce any new load
+signals. Instead, it provides an API to tune existing signals. This tuning is
+done on demand and only in scheduler code paths where it is sensible to do so.
+The new API calls are defined to return either the default signal or a boosted
+one, depending on the value of sched_cfs_boost. This is a clean an non invasive
+modification of the existing existing code paths.
+
+The signal representing a CPU's utilization is boosted according to the
+previously described SPC boosting strategy. To sched-DVFS, this allows a CPU
+(ie CFS run-queue) to appear more used then it actually is.
+
+Thus, with the sched_cfs_boost enabled we have the following main functions to
+get the current utilization of a CPU:
+
+  cpu_util()
+  boosted_cpu_util()
+
+The new boosted_cpu_util() is similar to the first but returns a boosted
+utilization signal which is a function of the sched_cfs_boost value.
+
+This function is used in the CFS scheduler code paths where sched-DVFS needs to
+decide the OPP to run a CPU at.
+For example, this allows selecting the highest OPP for a CPU which has
+the boost value set to 100%.
+
+
+5. Per task group boosting
+==========================
+
+The availability of a single knob which is used to boost all tasks in the
+system is certainly a simple solution but it quite likely doesn't fit many
+utilization scenarios, especially in the mobile device space.
+
+For example, on battery powered devices there usually are many background
+services which are long running and need energy efficient scheduling. On the
+other hand, some applications are more performance sensitive and require an
+interactive response and/or maximum performance, regardless of the energy cost.
+To better service such scenarios, the SchedTune implementation has an extension
+that provides a more fine grained boosting interface.
+
+A new CGroup controller, namely "schedtune", could be enabled which allows to
+defined and configure task groups with different boosting values.
+Tasks that require special performance can be put into separate CGroups.
+The value of the boost associated with the tasks in this group can be specified
+using a single knob exposed by the CGroup controller:
+
+   schedtune.boost
+
+This knob allows the definition of a boost value that is to be used for
+SPC boosting of all tasks attached to this group.
+
+The current schedtune controller implementation is really simple and has these
+main characteristics:
+
+  1) It is only possible to create 1 level depth hierarchies
+
+     The root control groups define the system-wide boost value to be applied
+     by default to all tasks. Its direct subgroups are named "boost groups" and
+     they define the boost value for specific set of tasks.
+     Further nested subgroups are not allowed since they do not have a sensible
+     meaning from a user-space standpoint.
+
+  2) It is possible to define only a limited number of "boost groups"
+
+     This number is defined at compile time and by default configured to 16.
+     This is a design decision motivated by two main reasons:
+     a) In a real system we do not expect utilization scenarios with more then few
+	boost groups. For example, a reasonable collection of groups could be
+        just "background", "interactive" and "performance".
+     b) It simplifies the implementation considerably, especially for the code
+	which has to compute the per CPU boosting once there are multiple
+        RUNNABLE tasks with different boost values.
+
+Such a simple design should allow servicing the main utilization scenarios identified
+so far. It provides a simple interface which can be used to manage the
+power-performance of all tasks or only selected tasks.
+Moreover, this interface can be easily integrated by user-space run-times (e.g.
+Android, ChromeOS) to implement a QoS solution for task boosting based on tasks
+classification, which has been a long standing requirement.
+
+Setup and usage
+---------------
+
+0. Use a kernel with CGROUP_SCHEDTUNE support enabled
+
+1. Check that the "schedtune" CGroup controller is available:
+
+   root@linaro-nano:~# cat /proc/cgroups
+   #subsys_name	hierarchy	num_cgroups	enabled
+   cpuset  	0		1		1
+   cpu     	0		1		1
+   schedtune	0		1		1
+
+2. Mount a tmpfs to create the CGroups mount point (Optional)
+
+   root@linaro-nano:~# sudo mount -t tmpfs cgroups /sys/fs/cgroup
+
+3. Mount the "schedtune" controller
+
+   root@linaro-nano:~# mkdir /sys/fs/cgroup/stune
+   root@linaro-nano:~# sudo mount -t cgroup -o schedtune stune /sys/fs/cgroup/stune
+
+4. Setup the system-wide boost value (Optional)
+
+   If not configured the root control group has a 0% boost value, which
+   basically disables boosting for all tasks in the system thus running in
+   an energy-efficient mode.
+
+   root@linaro-nano:~# echo $SYSBOOST > /sys/fs/cgroup/stune/schedtune.boost
+
+5. Create task groups and configure their specific boost value (Optional)
+
+   For example here we create a "performance" boost group configure to boost
+   all its tasks to 100%
+
+   root@linaro-nano:~# mkdir /sys/fs/cgroup/stune/performance
+   root@linaro-nano:~# echo 100 > /sys/fs/cgroup/stune/performance/schedtune.boost
+
+6. Move tasks into the boost group
+
+   For example, the following moves the tasks with PID $TASKPID (and all its
+   threads) into the "performance" boost group.
+
+   root@linaro-nano:~# echo "TASKPID > /sys/fs/cgroup/stune/performance/cgroup.procs
+
+This simple configuration allows only the threads of the $TASKPID task to run,
+when needed, at the highest OPP in the most capable CPU of the system.
+
+
+6. Question and Answers
+=======================
+
+What about "auto" mode?
+-----------------------
+
+The 'auto' mode as described in [5] can be implemented by interfacing SchedTune
+with some suitable user-space element. This element could use the exposed
+system-wide or cgroup based interface.
+
+How are multiple groups of tasks with different boost values managed?
+---------------------------------------------------------------------
+
+The current SchedTune implementation keeps track of the boosted RUNNABLE tasks
+on a CPU. Once sched-DVFS selects the OPP to run a CPU at, the CPU utilization
+is boosted with a value which is the maximum of the boost values of the
+currently RUNNABLE tasks in its RQ.
+
+This allows sched-DVFS to boost a CPU only while there are boosted tasks ready
+to run and switch back to the energy efficient mode as soon as the last boosted
+task is dequeued.
+
+
+7. References
+=============
+[1] http://lwn.net/Articles/552889
+[2] http://lkml.org/lkml/2012/5/18/91
+[3] http://lkml.org/lkml/2015/6/26/620
diff --git a/Documentation/sync.txt b/Documentation/sync.txt
new file mode 100644
index 0000000..a2d05e7
--- /dev/null
+++ b/Documentation/sync.txt
@@ -0,0 +1,75 @@
+Motivation:
+
+In complicated DMA pipelines such as graphics (multimedia, camera, gpu, display)
+a consumer of a buffer needs to know when the producer has finished producing
+it.  Likewise the producer needs to know when the consumer is finished with the
+buffer so it can reuse it.  A particular buffer may be consumed by multiple
+consumers which will retain the buffer for different amounts of time.  In
+addition, a consumer may consume multiple buffers atomically.
+The sync framework adds an API which allows synchronization between the
+producers and consumers in a generic way while also allowing platforms which
+have shared hardware synchronization primitives to exploit them.
+
+Goals:
+	* provide a generic API for expressing synchronization dependencies
+	* allow drivers to exploit hardware synchronization between hardware
+	  blocks
+	* provide a userspace API that allows a compositor to manage
+	  dependencies.
+	* provide rich telemetry data to allow debugging slowdowns and stalls of
+	   the graphics pipeline.
+
+Objects:
+	* sync_timeline
+	* sync_pt
+	* sync_fence
+
+sync_timeline:
+
+A sync_timeline is an abstract monotonically increasing counter. In general,
+each driver/hardware block context will have one of these.  They can be backed
+by the appropriate hardware or rely on the generic sw_sync implementation.
+Timelines are only ever created through their specific implementations
+(i.e. sw_sync.)
+
+sync_pt:
+
+A sync_pt is an abstract value which marks a point on a sync_timeline. Sync_pts
+have a single timeline parent.  They have 3 states: active, signaled, and error.
+They start in active state and transition, once, to either signaled (when the
+timeline counter advances beyond the sync_pt’s value) or error state.
+
+sync_fence:
+
+Sync_fences are the primary primitives used by drivers to coordinate
+synchronization of their buffers.  They are a collection of sync_pts which may
+or may not have the same timeline parent.  A sync_pt can only exist in one fence
+and the fence's list of sync_pts is immutable once created.  Fences can be
+waited on synchronously or asynchronously.  Two fences can also be merged to
+create a third fence containing a copy of the two fences’ sync_pts.  Fences are
+backed by file descriptors to allow userspace to coordinate the display pipeline
+dependencies.
+
+Use:
+
+A driver implementing sync support should have a work submission function which:
+     * takes a fence argument specifying when to begin work
+     * asynchronously queues that work to kick off when the fence is signaled
+     * returns a fence to indicate when its work will be done.
+     * signals the returned fence once the work is completed.
+
+Consider an imaginary display driver that has the following API:
+/*
+ * assumes buf is ready to be displayed.
+ * blocks until the buffer is on screen.
+ */
+    void display_buffer(struct dma_buf *buf);
+
+The new API will become:
+/*
+ * will display buf when fence is signaled.
+ * returns immediately with a fence that will signal when buf
+ * is no longer displayed.
+ */
+struct sync_fence* display_buffer(struct dma_buf *buf,
+                                 struct sync_fence *fence);
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index ffab8b5..52daff6 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -659,12 +659,14 @@
 perf_event_paranoid:
 
 Controls use of the performance events system by unprivileged
-users (without CAP_SYS_ADMIN).  The default value is 2.
+users (without CAP_SYS_ADMIN).  The default value is 3 if
+CONFIG_SECURITY_PERF_EVENTS_RESTRICT is set, or 2 otherwise.
 
  -1: Allow use of (almost) all events by all users
 >=0: Disallow raw tracepoint access by users without CAP_IOC_LOCK
 >=1: Disallow CPU event access by users without CAP_SYS_ADMIN
 >=2: Disallow kernel profiling by users without CAP_SYS_ADMIN
+>=3: Disallow all event access by users without CAP_SYS_ADMIN
 
 ==============================================================
 
diff --git a/Documentation/trace/events-power.txt b/Documentation/trace/events-power.txt
index 21d514ce..4d817d5 100644
--- a/Documentation/trace/events-power.txt
+++ b/Documentation/trace/events-power.txt
@@ -25,6 +25,7 @@
 
 cpu_idle		"state=%lu cpu_id=%lu"
 cpu_frequency		"state=%lu cpu_id=%lu"
+cpu_frequency_limits	"min=%lu max=%lu cpu_id=%lu"
 
 A suspend event is used to indicate the system going in and out of the
 suspend mode:
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 185c39f..91723ed 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -362,6 +362,26 @@
 		  to correlate events across hypervisor/guest if
 		  tb_offset is known.
 
+	  mono: This uses the fast monotonic clock (CLOCK_MONOTONIC)
+		which is monotonic and is subject to NTP rate adjustments.
+
+	  mono_raw:
+		This is the raw monotonic clock (CLOCK_MONOTONIC_RAW)
+		which is montonic but is not subject to any rate adjustments
+		and ticks at the same rate as the hardware clocksource.
+
+	  boot: This is the boot clock (CLOCK_BOOTTIME) and is based on the
+		fast monotonic clock, but also accounts for time spent in
+		suspend. Since the clock access is designed for use in
+		tracing in the suspend path, some side effects are possible
+		if clock is accessed after the suspend time is accounted before
+		the fast mono clock is updated. In this case, the clock update
+		appears to happen slightly sooner than it normally would have.
+		Also on 32-bit systems, it's possible that the 64-bit boot offset
+		sees a partial update. These effects are rare and post
+		processing should be able to handle them. See comments in the
+		ktime_get_boot_fast_ns() function for more information.
+
 	To set a clock, simply echo the clock name into this file.
 
 	  echo global > trace_clock
@@ -2102,6 +2122,35 @@
  1)   1.449 us    |             }
 
 
+You can disable the hierarchical function call formatting and instead print a
+flat list of function entry and return events.  This uses the format described
+in the Output Formatting section and respects all the trace options that
+control that formatting.  Hierarchical formatting is the default.
+
+	hierachical: echo nofuncgraph-flat > trace_options
+	flat: echo funcgraph-flat > trace_options
+
+  ie:
+
+  # tracer: function_graph
+  #
+  # entries-in-buffer/entries-written: 68355/68355   #P:2
+  #
+  #                              _-----=> irqs-off
+  #                             / _----=> need-resched
+  #                            | / _---=> hardirq/softirq
+  #                            || / _--=> preempt-depth
+  #                            ||| /     delay
+  #           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
+  #              | |       |   ||||       |         |
+                sh-1806  [001] d...   198.843443: graph_ent: func=_raw_spin_lock
+                sh-1806  [001] d...   198.843445: graph_ent: func=__raw_spin_lock
+                sh-1806  [001] d..1   198.843447: graph_ret: func=__raw_spin_lock
+                sh-1806  [001] d..1   198.843449: graph_ret: func=_raw_spin_lock
+                sh-1806  [001] d..1   198.843451: graph_ent: func=_raw_spin_unlock_irqrestore
+                sh-1806  [001] d...   198.843453: graph_ret: func=_raw_spin_unlock_irqrestore
+
+
 You might find other useful features for this tracer in the
 following "dynamic ftrace" section such as tracing only specific
 functions or tasks.
diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 9e46d6e..fa47df6a 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -97,4 +97,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SO_COOKIE		57
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 74a70f9..2bddb69 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1837,6 +1837,15 @@
 	help
 	  Say Y if you want to run Linux in a Virtual Machine on Xen on ARM.
 
+config ARM_FLUSH_CONSOLE_ON_RESTART
+	bool "Force flush the console on restart"
+	help
+	  If the console is locked while the system is rebooted, the messages
+	  in the temporary logbuffer would not have propogated to all the
+	  console drivers. This option forces the console lock to be
+	  released if it failed to be acquired, which will cause all the
+	  pending messages to be flushed.
+
 endmenu
 
 menu "Boot options"
@@ -1865,6 +1874,21 @@
 	  This was deprecated in 2001 and announced to live on for 5 years.
 	  Some old boot loaders still use this way.
 
+config BUILD_ARM_APPENDED_DTB_IMAGE
+	bool "Build a concatenated zImage/dtb by default"
+	depends on OF
+	help
+	  Enabling this option will cause a concatenated zImage and list of
+	  DTBs to be built by default (instead of a standalone zImage.)
+	  The image will built in arch/arm/boot/zImage-dtb
+
+config BUILD_ARM_APPENDED_DTB_IMAGE_NAMES
+	string "Default dtb names"
+	depends on BUILD_ARM_APPENDED_DTB_IMAGE
+	help
+	  Space separated list of names of dtbs to append when
+	  building a concatenated zImage-dtb.
+
 # Compressed boot loader in ROM.  Yes, we really want to ask about
 # TEXT and BSS so we preserve their values in the config files.
 config ZBOOT_ROM_TEXT
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index d83f7c3..17dcd94 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -1723,6 +1723,14 @@
 	  kernel low-level debugging functions. Add earlyprintk to your
 	  kernel parameters to enable this console.
 
+config EARLY_PRINTK_DIRECT
+	bool "Early printk direct"
+	depends on DEBUG_LL
+	help
+	  Say Y here if you want to have an early console using the
+	  kernel low-level debugging functions and EARLY_PRINTK is
+	  not early enough.
+
 config ARM_KPROBES_TEST
 	tristate "Kprobes test module"
 	depends on KPROBES && MODULES
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index e14ddca..9ced939 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -298,6 +298,8 @@
 # Default target when executing plain make
 ifeq ($(CONFIG_XIP_KERNEL),y)
 KBUILD_IMAGE := xipImage
+else ifeq ($(CONFIG_BUILD_ARM_APPENDED_DTB_IMAGE),y)
+KBUILD_IMAGE := zImage-dtb
 else
 KBUILD_IMAGE := zImage
 endif
@@ -349,6 +351,9 @@
 	$(Q)$(MAKE) $(build)=arch/arm/vdso $@
 endif
 
+zImage-dtb: vmlinux scripts dtbs
+	$(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/$@
+
 # We use MRPROPER_FILES and CLEAN_FILES now
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/arm/boot/.gitignore b/arch/arm/boot/.gitignore
index 3c79f85..ad7a025 100644
--- a/arch/arm/boot/.gitignore
+++ b/arch/arm/boot/.gitignore
@@ -4,3 +4,4 @@
 bootpImage
 uImage
 *.dtb
+zImage-dtb
\ No newline at end of file
diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile
index 50f8d1be..da75630 100644
--- a/arch/arm/boot/Makefile
+++ b/arch/arm/boot/Makefile
@@ -16,6 +16,7 @@
 ifneq ($(MACHINE),)
 include $(MACHINE)/Makefile.boot
 endif
+include $(srctree)/arch/arm/boot/dts/Makefile
 
 # Note: the following conditions must always be true:
 #   ZRELADDR == virt_to_phys(PAGE_OFFSET + TEXT_OFFSET)
@@ -29,6 +30,14 @@
 
 targets := Image zImage xipImage bootpImage uImage
 
+DTB_NAMES := $(subst $\",,$(CONFIG_BUILD_ARM_APPENDED_DTB_IMAGE_NAMES))
+ifneq ($(DTB_NAMES),)
+DTB_LIST := $(addsuffix .dtb,$(DTB_NAMES))
+else
+DTB_LIST := $(dtb-y)
+endif
+DTB_OBJS := $(addprefix $(obj)/dts/,$(DTB_LIST))
+
 ifeq ($(CONFIG_XIP_KERNEL),y)
 
 $(obj)/xipImage: vmlinux FORCE
@@ -55,6 +64,10 @@
 $(obj)/zImage:	$(obj)/compressed/vmlinux FORCE
 	$(call if_changed,objcopy)
 
+$(obj)/zImage-dtb:	$(obj)/zImage $(DTB_OBJS) FORCE
+	$(call if_changed,cat)
+	@echo '  Kernel: $@ is ready'
+
 endif
 
 ifneq ($(LOADADDR),)
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 2d7f2bb..b114faa 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -784,6 +784,8 @@
 		bic     r6, r6, #1 << 31        @ 32-bit translation system
 		bic     r6, r6, #(7 << 0) | (1 << 4)	@ use only ttbr0
 		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
+		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
+		mcr	p15, 0, r0, c7, c5, 4	@ ISB
 		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
 		mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
 #endif
diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index 7037201..54f95d3 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -960,5 +960,15 @@
 dtstree		:= $(srctree)/$(src)
 dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts))
 
-always		:= $(dtb-y)
+DTB_NAMES := $(subst $\",,$(CONFIG_BUILD_ARM_APPENDED_DTB_IMAGE_NAMES))
+ifneq ($(DTB_NAMES),)
+DTB_LIST := $(addsuffix .dtb,$(DTB_NAMES))
+else
+DTB_LIST := $(dtb-y)
+endif
+
+targets += dtbs dtbs_install
+targets += $(DTB_LIST)
+
+always		:= $(DTB_LIST)
 clean-files	:= *.dtb
diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig
index 9353184..ce01364 100644
--- a/arch/arm/common/Kconfig
+++ b/arch/arm/common/Kconfig
@@ -17,3 +17,7 @@
 
 config SHARP_SCOOP
 	bool
+
+config FIQ_GLUE
+	bool
+	select FIQ
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index 27f23b1..04aca89 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -4,6 +4,7 @@
 
 obj-y				+= firmware.o
 
+obj-$(CONFIG_FIQ_GLUE)		+= fiq_glue.o fiq_glue_setup.o
 obj-$(CONFIG_ICST)		+= icst.o
 obj-$(CONFIG_SA1111)		+= sa1111.o
 obj-$(CONFIG_DMABOUNCE)		+= dmabounce.o
diff --git a/arch/arm/common/fiq_glue.S b/arch/arm/common/fiq_glue.S
new file mode 100644
index 0000000..24b42ce
--- /dev/null
+++ b/arch/arm/common/fiq_glue.S
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2008 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+		.text
+
+		.global fiq_glue_end
+
+		/* fiq stack: r0-r15,cpsr,spsr of interrupted mode */
+
+ENTRY(fiq_glue)
+		/* store pc, cpsr from previous mode, reserve space for spsr */
+		mrs	r12, spsr
+		sub	lr, lr, #4
+		subs	r10, #1
+		bne	nested_fiq
+
+		str	r12, [sp, #-8]!
+		str	lr, [sp, #-4]!
+
+		/* store r8-r14 from previous mode */
+		sub	sp, sp, #(7 * 4)
+		stmia	sp, {r8-r14}^
+		nop
+
+		/* store r0-r7 from previous mode */
+		stmfd	sp!, {r0-r7}
+
+		/* setup func(data,regs) arguments */
+		mov	r0, r9
+		mov	r1, sp
+		mov	r3, r8
+
+		mov	r7, sp
+
+		/* Get sp and lr from non-user modes */
+		and	r4, r12, #MODE_MASK
+		cmp	r4, #USR_MODE
+		beq	fiq_from_usr_mode
+
+		mov	r7, sp
+		orr	r4, r4, #(PSR_I_BIT | PSR_F_BIT)
+		msr	cpsr_c, r4
+		str	sp, [r7, #(4 * 13)]
+		str	lr, [r7, #(4 * 14)]
+		mrs	r5, spsr
+		str	r5, [r7, #(4 * 17)]
+
+		cmp	r4, #(SVC_MODE | PSR_I_BIT | PSR_F_BIT)
+		/* use fiq stack if we reenter this mode */
+		subne	sp, r7, #(4 * 3)
+
+fiq_from_usr_mode:
+		msr	cpsr_c, #(SVC_MODE | PSR_I_BIT | PSR_F_BIT)
+		mov	r2, sp
+		sub	sp, r7, #12
+		stmfd	sp!, {r2, ip, lr}
+		/* call func(data,regs) */
+		blx	r3
+		ldmfd	sp, {r2, ip, lr}
+		mov	sp, r2
+
+		/* restore/discard saved state */
+		cmp	r4, #USR_MODE
+		beq	fiq_from_usr_mode_exit
+
+		msr	cpsr_c, r4
+		ldr	sp, [r7, #(4 * 13)]
+		ldr	lr, [r7, #(4 * 14)]
+		msr	spsr_cxsf, r5
+
+fiq_from_usr_mode_exit:
+		msr	cpsr_c, #(FIQ_MODE | PSR_I_BIT | PSR_F_BIT)
+
+		ldmfd	sp!, {r0-r7}
+		ldr	lr, [sp, #(4 * 7)]
+		ldr	r12, [sp, #(4 * 8)]
+		add	sp, sp, #(10 * 4)
+exit_fiq:
+		msr	spsr_cxsf, r12
+		add	r10, #1
+		cmp	r11, #0
+		moveqs	pc, lr
+		bx	r11 /* jump to custom fiq return function */
+
+nested_fiq:
+		orr	r12, r12, #(PSR_F_BIT)
+		b	exit_fiq
+
+fiq_glue_end:
+
+ENTRY(fiq_glue_setup) /* func, data, sp, smc call number */
+		stmfd		sp!, {r4}
+		mrs		r4, cpsr
+		msr		cpsr_c, #(FIQ_MODE | PSR_I_BIT | PSR_F_BIT)
+		movs		r8, r0
+		mov		r9, r1
+		mov		sp, r2
+		mov		r11, r3
+		moveq		r10, #0
+		movne		r10, #1
+		msr		cpsr_c, r4
+		ldmfd		sp!, {r4}
+		bx		lr
+
diff --git a/arch/arm/common/fiq_glue_setup.c b/arch/arm/common/fiq_glue_setup.c
new file mode 100644
index 0000000..8cb1b61
--- /dev/null
+++ b/arch/arm/common/fiq_glue_setup.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2010 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
+#include <asm/fiq.h>
+#include <asm/fiq_glue.h>
+
+extern unsigned char fiq_glue, fiq_glue_end;
+extern void fiq_glue_setup(void *func, void *data, void *sp,
+			   fiq_return_handler_t fiq_return_handler);
+
+static struct fiq_handler fiq_debbuger_fiq_handler = {
+	.name = "fiq_glue",
+};
+DEFINE_PER_CPU(void *, fiq_stack);
+static struct fiq_glue_handler *current_handler;
+static fiq_return_handler_t fiq_return_handler;
+static DEFINE_MUTEX(fiq_glue_lock);
+
+static void fiq_glue_setup_helper(void *info)
+{
+	struct fiq_glue_handler *handler = info;
+	fiq_glue_setup(handler->fiq, handler,
+		__get_cpu_var(fiq_stack) + THREAD_START_SP,
+		fiq_return_handler);
+}
+
+int fiq_glue_register_handler(struct fiq_glue_handler *handler)
+{
+	int ret;
+	int cpu;
+
+	if (!handler || !handler->fiq)
+		return -EINVAL;
+
+	mutex_lock(&fiq_glue_lock);
+	if (fiq_stack) {
+		ret = -EBUSY;
+		goto err_busy;
+	}
+
+	for_each_possible_cpu(cpu) {
+		void *stack;
+		stack = (void *)__get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
+		if (WARN_ON(!stack)) {
+			ret = -ENOMEM;
+			goto err_alloc_fiq_stack;
+		}
+		per_cpu(fiq_stack, cpu) = stack;
+	}
+
+	ret = claim_fiq(&fiq_debbuger_fiq_handler);
+	if (WARN_ON(ret))
+		goto err_claim_fiq;
+
+	current_handler = handler;
+	on_each_cpu(fiq_glue_setup_helper, handler, true);
+	set_fiq_handler(&fiq_glue, &fiq_glue_end - &fiq_glue);
+
+	mutex_unlock(&fiq_glue_lock);
+	return 0;
+
+err_claim_fiq:
+err_alloc_fiq_stack:
+	for_each_possible_cpu(cpu) {
+		__free_pages(per_cpu(fiq_stack, cpu), THREAD_SIZE_ORDER);
+		per_cpu(fiq_stack, cpu) = NULL;
+	}
+err_busy:
+	mutex_unlock(&fiq_glue_lock);
+	return ret;
+}
+
+static void fiq_glue_update_return_handler(void (*fiq_return)(void))
+{
+	fiq_return_handler = fiq_return;
+	if (current_handler)
+		on_each_cpu(fiq_glue_setup_helper, current_handler, true);
+}
+
+int fiq_glue_set_return_handler(void (*fiq_return)(void))
+{
+	int ret;
+
+	mutex_lock(&fiq_glue_lock);
+	if (fiq_return_handler) {
+		ret = -EBUSY;
+		goto err_busy;
+	}
+	fiq_glue_update_return_handler(fiq_return);
+	ret = 0;
+err_busy:
+	mutex_unlock(&fiq_glue_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(fiq_glue_set_return_handler);
+
+int fiq_glue_clear_return_handler(void (*fiq_return)(void))
+{
+	int ret;
+
+	mutex_lock(&fiq_glue_lock);
+	if (WARN_ON(fiq_return_handler != fiq_return)) {
+		ret = -EINVAL;
+		goto err_inval;
+	}
+	fiq_glue_update_return_handler(NULL);
+	ret = 0;
+err_inval:
+	mutex_unlock(&fiq_glue_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(fiq_glue_clear_return_handler);
+
+/**
+ * fiq_glue_resume - Restore fiqs after suspend or low power idle states
+ *
+ * This must be called before calling local_fiq_enable after returning from a
+ * power state where the fiq mode registers were lost. If a driver provided
+ * a resume hook when it registered the handler it will be called.
+ */
+
+void fiq_glue_resume(void)
+{
+	if (!current_handler)
+		return;
+	fiq_glue_setup(current_handler->fiq, current_handler,
+		__get_cpu_var(fiq_stack) + THREAD_START_SP,
+		fiq_return_handler);
+	if (current_handler->resume)
+		current_handler->resume(current_handler);
+}
+
diff --git a/arch/arm/configs/ranchu_defconfig b/arch/arm/configs/ranchu_defconfig
new file mode 100644
index 0000000..49e7bbd
--- /dev/null
+++ b/arch/arm/configs/ranchu_defconfig
@@ -0,0 +1,316 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_DEBUG=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
+CONFIG_ARCH_MMAP_RND_BITS=16
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_ARCH_VIRT=y
+CONFIG_ARM_KERNMEM_PERMS=y
+CONFIG_SMP=y
+CONFIG_PREEMPT=y
+CONFIG_AEABI=y
+CONFIG_HIGHMEM=y
+CONFIG_KSM=y
+CONFIG_SECCOMP=y
+CONFIG_CMDLINE="console=ttyAMA0"
+CONFIG_VFP=y
+CONFIG_NEON=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_PM_AUTOSLEEP=y
+CONFIG_PM_WAKELOCKS=y
+CONFIG_PM_WAKELOCKS_LIMIT=0
+# CONFIG_PM_WAKELOCKS_GC is not set
+CONFIG_PM_DEBUG=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_INET_DIAG_DESTROY=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_INET_ESP=y
+# CONFIG_INET_LRO is not set
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_OPTIMISTIC_DAD=y
+CONFIG_INET6_AH=y
+CONFIG_INET6_ESP=y
+CONFIG_INET6_IPCOMP=y
+CONFIG_IPV6_MIP6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CT_PROTO_DCCP=y
+CONFIG_NF_CT_PROTO_SCTP=y
+CONFIG_NF_CT_PROTO_UDPLITE=y
+CONFIG_NF_CONNTRACK_AMANDA=y
+CONFIG_NF_CONNTRACK_FTP=y
+CONFIG_NF_CONNTRACK_H323=y
+CONFIG_NF_CONNTRACK_IRC=y
+CONFIG_NF_CONNTRACK_NETBIOS_NS=y
+CONFIG_NF_CONNTRACK_PPTP=y
+CONFIG_NF_CONNTRACK_SANE=y
+CONFIG_NF_CONNTRACK_TFTP=y
+CONFIG_NF_CT_NETLINK=y
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=y
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y
+CONFIG_NETFILTER_XT_TARGET_MARK=y
+CONFIG_NETFILTER_XT_TARGET_NFLOG=y
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y
+CONFIG_NETFILTER_XT_TARGET_TPROXY=y
+CONFIG_NETFILTER_XT_TARGET_TRACE=y
+CONFIG_NETFILTER_XT_TARGET_SECMARK=y
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
+CONFIG_NETFILTER_XT_MATCH_COMMENT=y
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=y
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_HELPER=y
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=y
+CONFIG_NETFILTER_XT_MATCH_LENGTH=y
+CONFIG_NETFILTER_XT_MATCH_LIMIT=y
+CONFIG_NETFILTER_XT_MATCH_MAC=y
+CONFIG_NETFILTER_XT_MATCH_MARK=y
+CONFIG_NETFILTER_XT_MATCH_POLICY=y
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y
+CONFIG_NETFILTER_XT_MATCH_QTAGUID=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA2=y
+CONFIG_NETFILTER_XT_MATCH_SOCKET=y
+CONFIG_NETFILTER_XT_MATCH_STATE=y
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=y
+CONFIG_NETFILTER_XT_MATCH_STRING=y
+CONFIG_NETFILTER_XT_MATCH_TIME=y
+CONFIG_NETFILTER_XT_MATCH_U32=y
+CONFIG_NF_CONNTRACK_IPV4=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_MATCH_AH=y
+CONFIG_IP_NF_MATCH_ECN=y
+CONFIG_IP_NF_MATCH_TTL=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_REJECT=y
+CONFIG_IP_NF_MANGLE=y
+CONFIG_IP_NF_RAW=y
+CONFIG_IP_NF_SECURITY=y
+CONFIG_IP_NF_ARPTABLES=y
+CONFIG_IP_NF_ARPFILTER=y
+CONFIG_IP_NF_ARP_MANGLE=y
+CONFIG_NF_CONNTRACK_IPV6=y
+CONFIG_IP6_NF_IPTABLES=y
+CONFIG_IP6_NF_FILTER=y
+CONFIG_IP6_NF_TARGET_REJECT=y
+CONFIG_IP6_NF_MANGLE=y
+CONFIG_IP6_NF_RAW=y
+CONFIG_BRIDGE=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_HTB=y
+CONFIG_NET_CLS_U32=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_U32=y
+CONFIG_NET_CLS_ACT=y
+# CONFIG_WIRELESS is not set
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=8192
+CONFIG_VIRTIO_BLK=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=y
+CONFIG_DM_UEVENT=y
+CONFIG_DM_VERITY=y
+CONFIG_DM_VERITY_FEC=y
+CONFIG_NETDEVICES=y
+CONFIG_TUN=y
+CONFIG_VIRTIO_NET=y
+CONFIG_SMSC911X=y
+CONFIG_PPP=y
+CONFIG_PPP_BSDCOMP=y
+CONFIG_PPP_DEFLATE=y
+CONFIG_PPP_MPPE=y
+CONFIG_PPPOLAC=y
+CONFIG_PPPOPNS=y
+CONFIG_USB_USBNET=y
+# CONFIG_WLAN is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_KEYRESET=y
+CONFIG_KEYBOARD_GOLDFISH_EVENTS=y
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_INPUT_JOYSTICK=y
+CONFIG_JOYSTICK_XPAD=y
+CONFIG_JOYSTICK_XPAD_FF=y
+CONFIG_JOYSTICK_XPAD_LEDS=y
+CONFIG_INPUT_TABLET=y
+CONFIG_TABLET_USB_ACECAD=y
+CONFIG_TABLET_USB_AIPTEK=y
+CONFIG_TABLET_USB_GTCO=y
+CONFIG_TABLET_USB_HANWANG=y
+CONFIG_TABLET_USB_KBTAB=y
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_KEYCHORD=y
+CONFIG_INPUT_UINPUT=y
+CONFIG_INPUT_GPIO=y
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIO_AMBAKMI=y
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVMEM is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_VIRTIO_CONSOLE=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_MEDIA_SUPPORT=y
+CONFIG_FB=y
+CONFIG_FB_GOLDFISH=y
+CONFIG_FB_SIMPLE=y
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_HIDRAW=y
+CONFIG_UHID=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_ACRUX=y
+CONFIG_HID_ACRUX_FF=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_PRODIKEYS=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_DRAGONRISE=y
+CONFIG_DRAGONRISE_FF=y
+CONFIG_HID_EMS_FF=y
+CONFIG_HID_ELECOM=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_HOLTEK=y
+CONFIG_HID_KEYTOUCH=y
+CONFIG_HID_KYE=y
+CONFIG_HID_UCLOGIC=y
+CONFIG_HID_WALTOP=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_TWINHAN=y
+CONFIG_HID_KENSINGTON=y
+CONFIG_HID_LCPOWER=y
+CONFIG_HID_LOGITECH=y
+CONFIG_HID_LOGITECH_DJ=y
+CONFIG_LOGITECH_FF=y
+CONFIG_LOGIRUMBLEPAD2_FF=y
+CONFIG_LOGIG940_FF=y
+CONFIG_HID_MAGICMOUSE=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_MULTITOUCH=y
+CONFIG_HID_NTRIG=y
+CONFIG_HID_ORTEK=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_PANTHERLORD_FF=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_PICOLCD=y
+CONFIG_HID_PRIMAX=y
+CONFIG_HID_ROCCAT=y
+CONFIG_HID_SAITEK=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SONY=y
+CONFIG_HID_SPEEDLINK=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_GREENASIA=y
+CONFIG_GREENASIA_FF=y
+CONFIG_HID_SMARTJOYPLUS=y
+CONFIG_SMARTJOYPLUS_FF=y
+CONFIG_HID_TIVO=y
+CONFIG_HID_TOPSEED=y
+CONFIG_HID_THRUSTMASTER=y
+CONFIG_HID_WACOM=y
+CONFIG_HID_WIIMOTE=y
+CONFIG_HID_ZEROPLUS=y
+CONFIG_HID_ZYDACRON=y
+CONFIG_USB_HIDDEV=y
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_OTG_WAKELOCK=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_PL031=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_STAGING=y
+CONFIG_ASHMEM=y
+CONFIG_ANDROID_LOW_MEMORY_KILLER=y
+CONFIG_SYNC=y
+CONFIG_SW_SYNC=y
+CONFIG_SW_SYNC_USER=y
+CONFIG_ION=y
+CONFIG_GOLDFISH_AUDIO=y
+CONFIG_GOLDFISH=y
+CONFIG_GOLDFISH_PIPE=y
+CONFIG_ANDROID=y
+CONFIG_ANDROID_BINDER_IPC=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_QUOTA=y
+CONFIG_FUSE_FS=y
+CONFIG_CUSE=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_PSTORE=y
+CONFIG_PSTORE_CONSOLE=y
+CONFIG_PSTORE_RAM=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_DEBUG_INFO=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_PANIC_TIMEOUT=5
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_SCHEDSTATS=y
+CONFIG_TIMER_STATS=y
+CONFIG_ENABLE_DEFAULT_TRACERS=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_VIRTUALIZATION=y
diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h
index f13ae15..d2315ff 100644
--- a/arch/arm/include/asm/elf.h
+++ b/arch/arm/include/asm/elf.h
@@ -112,8 +112,12 @@
 #define CORE_DUMP_USE_REGSET
 #define ELF_EXEC_PAGESIZE	4096
 
-/* This is the base location for PIE (ET_DYN with INTERP) loads. */
-#define ELF_ET_DYN_BASE		0x400000UL
+/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
+   use of this is to invoke "./ld.so someprog" to test out a new version of
+   the loader.  We need to make sure that it is out of the way of the program
+   that it will "exec", and that there is sufficient room for the brk.  */
+
+#define ELF_ET_DYN_BASE	(TASK_SIZE / 3 * 2)
 
 /* When the program starts, a1 contains a pointer to a function to be 
    registered with atexit, as per the SVR4 ABI.  A value of 0 means we 
diff --git a/arch/arm/include/asm/fiq_glue.h b/arch/arm/include/asm/fiq_glue.h
new file mode 100644
index 0000000..a9e244f9
--- /dev/null
+++ b/arch/arm/include/asm/fiq_glue.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2010 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __ASM_FIQ_GLUE_H
+#define __ASM_FIQ_GLUE_H
+
+struct fiq_glue_handler {
+	void (*fiq)(struct fiq_glue_handler *h, void *regs, void *svc_sp);
+	void (*resume)(struct fiq_glue_handler *h);
+};
+typedef void (*fiq_return_handler_t)(void);
+
+int fiq_glue_register_handler(struct fiq_glue_handler *handler);
+int fiq_glue_set_return_handler(fiq_return_handler_t fiq_return);
+int fiq_glue_clear_return_handler(fiq_return_handler_t fiq_return);
+
+#ifdef CONFIG_FIQ_GLUE
+void fiq_glue_resume(void);
+#else
+static inline void fiq_glue_resume(void) {}
+#endif
+
+#endif
diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h
index 370f7a7..d060641 100644
--- a/arch/arm/include/asm/topology.h
+++ b/arch/arm/include/asm/topology.h
@@ -3,6 +3,7 @@
 
 #ifdef CONFIG_ARM_CPU_TOPOLOGY
 
+#include <linux/cpufreq.h>
 #include <linux/cpumask.h>
 
 struct cputopo_arm {
@@ -24,6 +25,12 @@
 void store_cpu_topology(unsigned int cpuid);
 const struct cpumask *cpu_coregroup_mask(int cpu);
 
+#ifdef CONFIG_CPU_FREQ
+#define arch_scale_freq_capacity cpufreq_scale_freq_capacity
+#endif
+#define arch_scale_cpu_capacity scale_cpu_capacity
+extern unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu);
+
 #else
 
 static inline void init_cpu_topology(void) { }
diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c
index 9232cae..f3c6622 100644
--- a/arch/arm/kernel/kgdb.c
+++ b/arch/arm/kernel/kgdb.c
@@ -140,6 +140,8 @@
 
 static int kgdb_brk_fn(struct pt_regs *regs, unsigned int instr)
 {
+	if (user_mode(regs))
+		return -1;
 	kgdb_handle_exception(1, SIGTRAP, 0, regs);
 
 	return 0;
@@ -147,6 +149,8 @@
 
 static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int instr)
 {
+	if (user_mode(regs))
+		return -1;
 	compiled_break = 1;
 	kgdb_handle_exception(1, SIGTRAP, 0, regs);
 
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 91d2d5b0..c6324b5 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -80,6 +80,7 @@
 
 void arch_cpu_idle_enter(void)
 {
+	idle_notifier_call_chain(IDLE_START);
 	ledtrig_cpu(CPU_LED_IDLE_START);
 #ifdef CONFIG_PL310_ERRATA_769419
 	wmb();
@@ -89,6 +90,78 @@
 void arch_cpu_idle_exit(void)
 {
 	ledtrig_cpu(CPU_LED_IDLE_END);
+	idle_notifier_call_chain(IDLE_END);
+}
+
+/*
+ * dump a block of kernel memory from around the given address
+ */
+static void show_data(unsigned long addr, int nbytes, const char *name)
+{
+	int	i, j;
+	int	nlines;
+	u32	*p;
+
+	/*
+	 * don't attempt to dump non-kernel addresses or
+	 * values that are probably just small negative numbers
+	 */
+	if (addr < PAGE_OFFSET || addr > -256UL)
+		return;
+
+	printk("\n%s: %#lx:\n", name, addr);
+
+	/*
+	 * round address down to a 32 bit boundary
+	 * and always dump a multiple of 32 bytes
+	 */
+	p = (u32 *)(addr & ~(sizeof(u32) - 1));
+	nbytes += (addr & (sizeof(u32) - 1));
+	nlines = (nbytes + 31) / 32;
+
+
+	for (i = 0; i < nlines; i++) {
+		/*
+		 * just display low 16 bits of address to keep
+		 * each line of the dump < 80 characters
+		 */
+		printk("%04lx ", (unsigned long)p & 0xffff);
+		for (j = 0; j < 8; j++) {
+			u32	data;
+			if (probe_kernel_address(p, data)) {
+				printk(" ********");
+			} else {
+				printk(" %08x", data);
+			}
+			++p;
+		}
+		printk("\n");
+	}
+}
+
+static void show_extra_register_data(struct pt_regs *regs, int nbytes)
+{
+	mm_segment_t fs;
+
+	fs = get_fs();
+	set_fs(KERNEL_DS);
+	show_data(regs->ARM_pc - nbytes, nbytes * 2, "PC");
+	show_data(regs->ARM_lr - nbytes, nbytes * 2, "LR");
+	show_data(regs->ARM_sp - nbytes, nbytes * 2, "SP");
+	show_data(regs->ARM_ip - nbytes, nbytes * 2, "IP");
+	show_data(regs->ARM_fp - nbytes, nbytes * 2, "FP");
+	show_data(regs->ARM_r0 - nbytes, nbytes * 2, "R0");
+	show_data(regs->ARM_r1 - nbytes, nbytes * 2, "R1");
+	show_data(regs->ARM_r2 - nbytes, nbytes * 2, "R2");
+	show_data(regs->ARM_r3 - nbytes, nbytes * 2, "R3");
+	show_data(regs->ARM_r4 - nbytes, nbytes * 2, "R4");
+	show_data(regs->ARM_r5 - nbytes, nbytes * 2, "R5");
+	show_data(regs->ARM_r6 - nbytes, nbytes * 2, "R6");
+	show_data(regs->ARM_r7 - nbytes, nbytes * 2, "R7");
+	show_data(regs->ARM_r8 - nbytes, nbytes * 2, "R8");
+	show_data(regs->ARM_r9 - nbytes, nbytes * 2, "R9");
+	show_data(regs->ARM_r10 - nbytes, nbytes * 2, "R10");
+	set_fs(fs);
 }
 
 void __show_regs(struct pt_regs *regs)
@@ -182,6 +255,8 @@
 		printk("Control: %08x%s\n", ctrl, buf);
 	}
 #endif
+
+	show_extra_register_data(regs, 128);
 }
 
 void show_regs(struct pt_regs * regs)
diff --git a/arch/arm/kernel/reboot.c b/arch/arm/kernel/reboot.c
index 3fa867a..d704df8 100644
--- a/arch/arm/kernel/reboot.c
+++ b/arch/arm/kernel/reboot.c
@@ -6,6 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+#include <linux/console.h>
 #include <linux/cpu.h>
 #include <linux/delay.h>
 #include <linux/reboot.h>
@@ -122,6 +123,31 @@
 		pm_power_off();
 }
 
+#ifdef CONFIG_ARM_FLUSH_CONSOLE_ON_RESTART
+void arm_machine_flush_console(void)
+{
+	printk("\n");
+	pr_emerg("Restarting %s\n", linux_banner);
+	if (console_trylock()) {
+		console_unlock();
+		return;
+	}
+
+	mdelay(50);
+
+	local_irq_disable();
+	if (!console_trylock())
+		pr_emerg("arm_restart: Console was locked! Busting\n");
+	else
+		pr_emerg("arm_restart: Console was locked!\n");
+	console_unlock();
+}
+#else
+void arm_machine_flush_console(void)
+{
+}
+#endif
+
 /*
  * Restart requires that the secondary CPUs stop performing any activity
  * while the primary CPU resets the system. Systems with a single CPU can
@@ -138,6 +164,10 @@
 	local_irq_disable();
 	smp_send_stop();
 
+	/* Flush the console to make sure all the relevant messages make it
+	 * out to the console drivers */
+	arm_machine_flush_console();
+
 	if (arm_pm_restart)
 		arm_pm_restart(reboot_mode, cmd);
 	else
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index ec279d1..df30200 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -42,9 +42,15 @@
  */
 static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
 
-unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
+unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu)
 {
+#ifdef CONFIG_CPU_FREQ
+	unsigned long max_freq_scale = cpufreq_scale_max_freq_capacity(cpu);
+
+	return per_cpu(cpu_scale, cpu) * max_freq_scale >> SCHED_CAPACITY_SHIFT;
+#else
 	return per_cpu(cpu_scale, cpu);
+#endif
 }
 
 static void set_capacity_scale(unsigned int cpu, unsigned long capacity)
@@ -153,6 +159,8 @@
 
 }
 
+static const struct sched_group_energy * const cpu_core_energy(int cpu);
+
 /*
  * Look for a customed capacity of a CPU in the cpu_capacity table during the
  * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the
@@ -160,10 +168,14 @@
  */
 static void update_cpu_capacity(unsigned int cpu)
 {
-	if (!cpu_capacity(cpu))
-		return;
+	unsigned long capacity = SCHED_CAPACITY_SCALE;
 
-	set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity);
+	if (cpu_core_energy(cpu)) {
+		int max_cap_idx = cpu_core_energy(cpu)->nr_cap_states - 1;
+		capacity = cpu_core_energy(cpu)->cap_states[max_cap_idx].cap;
+	}
+
+	set_capacity_scale(cpu, capacity);
 
 	pr_info("CPU%u: update cpu_capacity %lu\n",
 		cpu, arch_scale_cpu_capacity(NULL, cpu));
@@ -275,17 +287,138 @@
 		cpu_topology[cpuid].socket_id, mpidr);
 }
 
+/*
+ * ARM TC2 specific energy cost model data. There are no unit requirements for
+ * the data. Data can be normalized to any reference point, but the
+ * normalization must be consistent. That is, one bogo-joule/watt must be the
+ * same quantity for all data, but we don't care what it is.
+ */
+static struct idle_state idle_states_cluster_a7[] = {
+	 { .power = 25 }, /* arch_cpu_idle() (active idle) = WFI */
+	 { .power = 25 }, /* WFI */
+	 { .power = 10 }, /* cluster-sleep-l */
+	};
+
+static struct idle_state idle_states_cluster_a15[] = {
+	 { .power = 70 }, /* arch_cpu_idle() (active idle) = WFI */
+	 { .power = 70 }, /* WFI */
+	 { .power = 25 }, /* cluster-sleep-b */
+	};
+
+static struct capacity_state cap_states_cluster_a7[] = {
+	/* Cluster only power */
+	 { .cap =  150, .power = 2967, }, /*  350 MHz */
+	 { .cap =  172, .power = 2792, }, /*  400 MHz */
+	 { .cap =  215, .power = 2810, }, /*  500 MHz */
+	 { .cap =  258, .power = 2815, }, /*  600 MHz */
+	 { .cap =  301, .power = 2919, }, /*  700 MHz */
+	 { .cap =  344, .power = 2847, }, /*  800 MHz */
+	 { .cap =  387, .power = 3917, }, /*  900 MHz */
+	 { .cap =  430, .power = 4905, }, /* 1000 MHz */
+	};
+
+static struct capacity_state cap_states_cluster_a15[] = {
+	/* Cluster only power */
+	 { .cap =  426, .power =  7920, }, /*  500 MHz */
+	 { .cap =  512, .power =  8165, }, /*  600 MHz */
+	 { .cap =  597, .power =  8172, }, /*  700 MHz */
+	 { .cap =  682, .power =  8195, }, /*  800 MHz */
+	 { .cap =  768, .power =  8265, }, /*  900 MHz */
+	 { .cap =  853, .power =  8446, }, /* 1000 MHz */
+	 { .cap =  938, .power = 11426, }, /* 1100 MHz */
+	 { .cap = 1024, .power = 15200, }, /* 1200 MHz */
+	};
+
+static struct sched_group_energy energy_cluster_a7 = {
+	  .nr_idle_states = ARRAY_SIZE(idle_states_cluster_a7),
+	  .idle_states    = idle_states_cluster_a7,
+	  .nr_cap_states  = ARRAY_SIZE(cap_states_cluster_a7),
+	  .cap_states     = cap_states_cluster_a7,
+};
+
+static struct sched_group_energy energy_cluster_a15 = {
+	  .nr_idle_states = ARRAY_SIZE(idle_states_cluster_a15),
+	  .idle_states    = idle_states_cluster_a15,
+	  .nr_cap_states  = ARRAY_SIZE(cap_states_cluster_a15),
+	  .cap_states     = cap_states_cluster_a15,
+};
+
+static struct idle_state idle_states_core_a7[] = {
+	 { .power = 0 }, /* arch_cpu_idle (active idle) = WFI */
+	 { .power = 0 }, /* WFI */
+	 { .power = 0 }, /* cluster-sleep-l */
+	};
+
+static struct idle_state idle_states_core_a15[] = {
+	 { .power = 0 }, /* arch_cpu_idle (active idle) = WFI */
+	 { .power = 0 }, /* WFI */
+	 { .power = 0 }, /* cluster-sleep-b */
+	};
+
+static struct capacity_state cap_states_core_a7[] = {
+	/* Power per cpu */
+	 { .cap =  150, .power =  187, }, /*  350 MHz */
+	 { .cap =  172, .power =  275, }, /*  400 MHz */
+	 { .cap =  215, .power =  334, }, /*  500 MHz */
+	 { .cap =  258, .power =  407, }, /*  600 MHz */
+	 { .cap =  301, .power =  447, }, /*  700 MHz */
+	 { .cap =  344, .power =  549, }, /*  800 MHz */
+	 { .cap =  387, .power =  761, }, /*  900 MHz */
+	 { .cap =  430, .power = 1024, }, /* 1000 MHz */
+	};
+
+static struct capacity_state cap_states_core_a15[] = {
+	/* Power per cpu */
+	 { .cap =  426, .power = 2021, }, /*  500 MHz */
+	 { .cap =  512, .power = 2312, }, /*  600 MHz */
+	 { .cap =  597, .power = 2756, }, /*  700 MHz */
+	 { .cap =  682, .power = 3125, }, /*  800 MHz */
+	 { .cap =  768, .power = 3524, }, /*  900 MHz */
+	 { .cap =  853, .power = 3846, }, /* 1000 MHz */
+	 { .cap =  938, .power = 5177, }, /* 1100 MHz */
+	 { .cap = 1024, .power = 6997, }, /* 1200 MHz */
+	};
+
+static struct sched_group_energy energy_core_a7 = {
+	  .nr_idle_states = ARRAY_SIZE(idle_states_core_a7),
+	  .idle_states    = idle_states_core_a7,
+	  .nr_cap_states  = ARRAY_SIZE(cap_states_core_a7),
+	  .cap_states     = cap_states_core_a7,
+};
+
+static struct sched_group_energy energy_core_a15 = {
+	  .nr_idle_states = ARRAY_SIZE(idle_states_core_a15),
+	  .idle_states    = idle_states_core_a15,
+	  .nr_cap_states  = ARRAY_SIZE(cap_states_core_a15),
+	  .cap_states     = cap_states_core_a15,
+};
+
+/* sd energy functions */
+static inline
+const struct sched_group_energy * const cpu_cluster_energy(int cpu)
+{
+	return cpu_topology[cpu].socket_id ? &energy_cluster_a7 :
+			&energy_cluster_a15;
+}
+
+static inline
+const struct sched_group_energy * const cpu_core_energy(int cpu)
+{
+	return cpu_topology[cpu].socket_id ? &energy_core_a7 :
+			&energy_core_a15;
+}
+
 static inline int cpu_corepower_flags(void)
 {
-	return SD_SHARE_PKG_RESOURCES  | SD_SHARE_POWERDOMAIN;
+	return SD_SHARE_PKG_RESOURCES  | SD_SHARE_POWERDOMAIN | \
+	       SD_SHARE_CAP_STATES;
 }
 
 static struct sched_domain_topology_level arm_topology[] = {
 #ifdef CONFIG_SCHED_MC
-	{ cpu_corepower_mask, cpu_corepower_flags, SD_INIT_NAME(GMC) },
-	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+	{ cpu_coregroup_mask, cpu_corepower_flags, cpu_core_energy, SD_INIT_NAME(MC) },
 #endif
-	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
+	{ cpu_cpu_mask, NULL, cpu_cluster_energy, SD_INIT_NAME(DIE) },
 	{ NULL, },
 };
 
diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S
index 2465995..11da0f5 100644
--- a/arch/arm/mm/cache-v6.S
+++ b/arch/arm/mm/cache-v6.S
@@ -270,6 +270,11 @@
  *	- end     - virtual end address of region
  */
 ENTRY(v6_dma_flush_range)
+#ifdef CONFIG_CACHE_FLUSH_RANGE_LIMIT
+	sub	r2, r1, r0
+	cmp	r2, #CONFIG_CACHE_FLUSH_RANGE_LIMIT
+	bhi	v6_dma_flush_dcache_all
+#endif
 #ifdef CONFIG_DMA_CACHE_RWFO
 	ldrb	r2, [r0]		@ read for ownership
 	strb	r2, [r0]		@ write for ownership
@@ -292,6 +297,18 @@
 	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
 	ret	lr
 
+#ifdef CONFIG_CACHE_FLUSH_RANGE_LIMIT
+v6_dma_flush_dcache_all:
+	mov	r0, #0
+#ifdef HARVARD_CACHE
+	mcr	p15, 0, r0, c7, c14, 0		@ D cache clean+invalidate
+#else
+	mcr	p15, 0, r0, c7, c15, 0		@ Cache clean+invalidate
+#endif
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mov	pc, lr
+#endif
+
 /*
  *	dma_map_area(start, size, dir)
  *	- start	- kernel virtual start address
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 5ca207a..d6c9dee 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -276,10 +276,10 @@
 		local_irq_enable();
 
 	/*
-	 * If we're in an interrupt or have no user
+	 * If we're in an interrupt, or have no irqs, or have no user
 	 * context, we must not take the fault..
 	 */
-	if (faulthandler_disabled() || !mm)
+	if (faulthandler_disabled() || irqs_disabled() || !mm)
 		goto no_context;
 
 	if (user_mode(regs))
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 2eb8ae1..75646d5 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -852,6 +852,14 @@
 	  If unsure, say Y
 endif
 
+config ARM64_SW_TTBR0_PAN
+	bool "Emulate Privileged Access Never using TTBR0_EL1 switching"
+	help
+	  Enabling this option prevents the kernel from accessing
+	  user-space memory directly by pointing TTBR0_EL1 to a reserved
+	  zeroed area and reserved ASID. The user access routines
+	  restore the valid TTBR0_EL1 temporarily.
+
 menu "ARMv8.1 architectural features"
 
 config ARM64_HW_AFDBM
@@ -1011,6 +1019,23 @@
 	  entering them here. As a minimum, you should specify the the
 	  root device (e.g. root=/dev/nfs).
 
+choice
+	prompt "Kernel command line type" if CMDLINE != ""
+	default CMDLINE_FROM_BOOTLOADER
+
+config CMDLINE_FROM_BOOTLOADER
+	bool "Use bootloader kernel arguments if available"
+	help
+	  Uses the command-line options passed by the boot loader. If
+	  the boot loader doesn't provide any, the default kernel command
+	  string provided in CMDLINE will be used.
+
+config CMDLINE_EXTEND
+	bool "Extend bootloader kernel arguments"
+	help
+	  The command-line arguments provided by the boot loader will be
+	  appended to the default kernel command string.
+
 config CMDLINE_FORCE
 	bool "Always use the default kernel command string"
 	help
@@ -1018,6 +1043,7 @@
 	  loader passes other arguments to the kernel.
 	  This is useful if you cannot or don't want to change the
 	  command-line options your boot loader passes to the kernel.
+endchoice
 
 config EFI_STUB
 	bool
@@ -1050,6 +1076,41 @@
 	  However, even with this option, the resultant kernel should
 	  continue to boot on existing non-UEFI platforms.
 
+config BUILD_ARM64_APPENDED_DTB_IMAGE
+	bool "Build a concatenated Image.gz/dtb by default"
+	depends on OF
+	help
+	  Enabling this option will cause a concatenated Image.gz and list of
+	  DTBs to be built by default (instead of a standalone Image.gz.)
+	  The image will built in arch/arm64/boot/Image.gz-dtb
+
+choice
+	prompt "Appended DTB Kernel Image name"
+	depends on BUILD_ARM64_APPENDED_DTB_IMAGE
+	help
+	  Enabling this option will cause a specific kernel image Image or
+	  Image.gz to be used for final image creation.
+	  The image will built in arch/arm64/boot/IMAGE-NAME-dtb
+
+	config IMG_GZ_DTB
+		bool "Image.gz-dtb"
+	config IMG_DTB
+		bool "Image-dtb"
+endchoice
+
+config BUILD_ARM64_APPENDED_KERNEL_IMAGE_NAME
+	string
+	depends on BUILD_ARM64_APPENDED_DTB_IMAGE
+	default "Image.gz-dtb" if IMG_GZ_DTB
+	default "Image-dtb" if IMG_DTB
+
+config BUILD_ARM64_APPENDED_DTB_IMAGE_NAMES
+	string "Default dtb names"
+	depends on BUILD_ARM64_APPENDED_DTB_IMAGE
+	help
+	  Space separated list of names of dtbs to append when
+	  building a concatenated Image.gz-dtb.
+
 endmenu
 
 menu "Userspace binary formats"
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index ee94597..180e9be 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -44,6 +44,7 @@
 KBUILD_CFLAGS	+= -mgeneral-regs-only $(lseinstr)
 KBUILD_CFLAGS	+= -fno-asynchronous-unwind-tables
 KBUILD_CFLAGS	+= $(call cc-option, -mpc-relative-literal-loads)
+KBUILD_CFLAGS	+= -fno-pic
 KBUILD_AFLAGS	+= $(lseinstr)
 
 ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
@@ -98,7 +99,12 @@
 core-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
 
 # Default target when executing plain make
+ifeq ($(CONFIG_BUILD_ARM64_APPENDED_DTB_IMAGE),y)
+KBUILD_IMAGE	:= $(subst $\",,$(CONFIG_BUILD_ARM64_APPENDED_KERNEL_IMAGE_NAME))
+else
 KBUILD_IMAGE	:= Image.gz
+endif
+
 KBUILD_DTBS	:= dtbs
 
 all:	$(KBUILD_IMAGE) $(KBUILD_DTBS)
@@ -125,6 +131,9 @@
 dtbs_install:
 	$(Q)$(MAKE) $(dtbinst)=$(boot)/dts
 
+Image-dtb Image.gz-dtb: vmlinux scripts dtbs
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
 PHONY += vdso_install
 vdso_install:
 	$(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso $@
diff --git a/arch/arm64/boot/.gitignore b/arch/arm64/boot/.gitignore
index 8dab0bb..34e3520 100644
--- a/arch/arm64/boot/.gitignore
+++ b/arch/arm64/boot/.gitignore
@@ -1,2 +1,4 @@
 Image
+Image-dtb
 Image.gz
+Image.gz-dtb
diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile
index 1f012c5..2c8cb86 100644
--- a/arch/arm64/boot/Makefile
+++ b/arch/arm64/boot/Makefile
@@ -14,16 +14,29 @@
 # Based on the ia64 boot/Makefile.
 #
 
+include $(srctree)/arch/arm64/boot/dts/Makefile
+
 OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
 
 targets := Image Image.gz
 
+DTB_NAMES := $(subst $\",,$(CONFIG_BUILD_ARM64_APPENDED_DTB_IMAGE_NAMES))
+ifneq ($(DTB_NAMES),)
+DTB_LIST := $(addsuffix .dtb,$(DTB_NAMES))
+else
+DTB_LIST := $(dtb-y)
+endif
+DTB_OBJS := $(addprefix $(obj)/dts/,$(DTB_LIST))
+
 $(obj)/Image: vmlinux FORCE
 	$(call if_changed,objcopy)
 
 $(obj)/Image.bz2: $(obj)/Image FORCE
 	$(call if_changed,bzip2)
 
+$(obj)/Image-dtb: $(obj)/Image $(DTB_OBJS) FORCE
+	$(call if_changed,cat)
+
 $(obj)/Image.gz: $(obj)/Image FORCE
 	$(call if_changed,gzip)
 
@@ -36,6 +49,9 @@
 $(obj)/Image.lzo: $(obj)/Image FORCE
 	$(call if_changed,lzo)
 
+$(obj)/Image.gz-dtb: $(obj)/Image.gz $(DTB_OBJS) FORCE
+	$(call if_changed,cat)
+
 install:
 	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
 	$(obj)/Image System.map "$(INSTALL_PATH)"
diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile
index 6684f97..7ad2cf0 100644
--- a/arch/arm64/boot/dts/Makefile
+++ b/arch/arm64/boot/dts/Makefile
@@ -28,3 +28,17 @@
 dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(foreach d,$(dts-dirs), $(wildcard $(dtstree)/$(d)/*.dts)))
 
 always		:= $(dtb-y)
+
+targets += dtbs
+
+DTB_NAMES := $(subst $\",,$(CONFIG_BUILD_ARM64_APPENDED_DTB_IMAGE_NAMES))
+ifneq ($(DTB_NAMES),)
+DTB_LIST := $(addsuffix .dtb,$(DTB_NAMES))
+else
+DTB_LIST := $(dtb-y)
+endif
+targets += $(DTB_LIST)
+
+dtbs: $(addprefix $(obj)/, $(DTB_LIST))
+
+clean-files := dts/*.dtb *.dtb
diff --git a/arch/arm64/configs/ranchu64_defconfig b/arch/arm64/configs/ranchu64_defconfig
new file mode 100644
index 0000000..fc55008
--- /dev/null
+++ b/arch/arm64/configs/ranchu64_defconfig
@@ -0,0 +1,312 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+# CONFIG_SWAP is not set
+CONFIG_POSIX_MQUEUE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_CGROUP_DEBUG=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_ARCH_MMAP_RND_BITS=24
+CONFIG_ARCH_MMAP_RND_COMPAT_BITS=16
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+CONFIG_ARCH_VEXPRESS=y
+CONFIG_NR_CPUS=4
+CONFIG_PREEMPT=y
+CONFIG_KSM=y
+CONFIG_SECCOMP=y
+CONFIG_ARMV8_DEPRECATED=y
+CONFIG_SWP_EMULATION=y
+CONFIG_CP15_BARRIER_EMULATION=y
+CONFIG_SETEND_EMULATION=y
+CONFIG_CMDLINE="console=ttyAMA0"
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_COMPAT=y
+CONFIG_PM_AUTOSLEEP=y
+CONFIG_PM_WAKELOCKS=y
+CONFIG_PM_WAKELOCKS_LIMIT=0
+# CONFIG_PM_WAKELOCKS_GC is not set
+CONFIG_PM_DEBUG=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_INET_DIAG_DESTROY=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_INET_ESP=y
+# CONFIG_INET_LRO is not set
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_OPTIMISTIC_DAD=y
+CONFIG_INET6_AH=y
+CONFIG_INET6_ESP=y
+CONFIG_INET6_IPCOMP=y
+CONFIG_IPV6_MIP6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CT_PROTO_DCCP=y
+CONFIG_NF_CT_PROTO_SCTP=y
+CONFIG_NF_CT_PROTO_UDPLITE=y
+CONFIG_NF_CONNTRACK_AMANDA=y
+CONFIG_NF_CONNTRACK_FTP=y
+CONFIG_NF_CONNTRACK_H323=y
+CONFIG_NF_CONNTRACK_IRC=y
+CONFIG_NF_CONNTRACK_NETBIOS_NS=y
+CONFIG_NF_CONNTRACK_PPTP=y
+CONFIG_NF_CONNTRACK_SANE=y
+CONFIG_NF_CONNTRACK_TFTP=y
+CONFIG_NF_CT_NETLINK=y
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=y
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y
+CONFIG_NETFILTER_XT_TARGET_MARK=y
+CONFIG_NETFILTER_XT_TARGET_NFLOG=y
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y
+CONFIG_NETFILTER_XT_TARGET_TPROXY=y
+CONFIG_NETFILTER_XT_TARGET_TRACE=y
+CONFIG_NETFILTER_XT_TARGET_SECMARK=y
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
+CONFIG_NETFILTER_XT_MATCH_COMMENT=y
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=y
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_HELPER=y
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=y
+CONFIG_NETFILTER_XT_MATCH_LENGTH=y
+CONFIG_NETFILTER_XT_MATCH_LIMIT=y
+CONFIG_NETFILTER_XT_MATCH_MAC=y
+CONFIG_NETFILTER_XT_MATCH_MARK=y
+CONFIG_NETFILTER_XT_MATCH_POLICY=y
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y
+CONFIG_NETFILTER_XT_MATCH_QTAGUID=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA2=y
+CONFIG_NETFILTER_XT_MATCH_SOCKET=y
+CONFIG_NETFILTER_XT_MATCH_STATE=y
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=y
+CONFIG_NETFILTER_XT_MATCH_STRING=y
+CONFIG_NETFILTER_XT_MATCH_TIME=y
+CONFIG_NETFILTER_XT_MATCH_U32=y
+CONFIG_NF_CONNTRACK_IPV4=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_MATCH_AH=y
+CONFIG_IP_NF_MATCH_ECN=y
+CONFIG_IP_NF_MATCH_RPFILTER=y
+CONFIG_IP_NF_MATCH_TTL=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_REJECT=y
+CONFIG_IP_NF_MANGLE=y
+CONFIG_IP_NF_TARGET_ECN=y
+CONFIG_IP_NF_TARGET_TTL=y
+CONFIG_IP_NF_RAW=y
+CONFIG_IP_NF_SECURITY=y
+CONFIG_IP_NF_ARPTABLES=y
+CONFIG_IP_NF_ARPFILTER=y
+CONFIG_IP_NF_ARP_MANGLE=y
+CONFIG_NF_CONNTRACK_IPV6=y
+CONFIG_IP6_NF_IPTABLES=y
+CONFIG_IP6_NF_MATCH_AH=y
+CONFIG_IP6_NF_MATCH_EUI64=y
+CONFIG_IP6_NF_MATCH_FRAG=y
+CONFIG_IP6_NF_MATCH_OPTS=y
+CONFIG_IP6_NF_MATCH_HL=y
+CONFIG_IP6_NF_MATCH_IPV6HEADER=y
+CONFIG_IP6_NF_MATCH_MH=y
+CONFIG_IP6_NF_MATCH_RT=y
+CONFIG_IP6_NF_TARGET_HL=y
+CONFIG_IP6_NF_FILTER=y
+CONFIG_IP6_NF_TARGET_REJECT=y
+CONFIG_IP6_NF_MANGLE=y
+CONFIG_IP6_NF_RAW=y
+CONFIG_BRIDGE=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_HTB=y
+CONFIG_NET_CLS_U32=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_U32=y
+CONFIG_NET_CLS_ACT=y
+# CONFIG_WIRELESS is not set
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=8192
+CONFIG_VIRTIO_BLK=y
+CONFIG_SCSI=y
+# CONFIG_SCSI_PROC_FS is not set
+CONFIG_BLK_DEV_SD=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_MD=y
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=y
+CONFIG_DM_UEVENT=y
+CONFIG_DM_VERITY=y
+CONFIG_DM_VERITY_FEC=y
+CONFIG_NETDEVICES=y
+CONFIG_TUN=y
+CONFIG_VIRTIO_NET=y
+CONFIG_SMC91X=y
+CONFIG_PPP=y
+CONFIG_PPP_BSDCOMP=y
+CONFIG_PPP_DEFLATE=y
+CONFIG_PPP_MPPE=y
+CONFIG_PPPOLAC=y
+CONFIG_PPPOPNS=y
+# CONFIG_WLAN is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_KEYRESET=y
+CONFIG_KEYBOARD_GOLDFISH_EVENTS=y
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_INPUT_JOYSTICK=y
+CONFIG_INPUT_TABLET=y
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_KEYCHORD=y
+CONFIG_INPUT_UINPUT=y
+CONFIG_INPUT_GPIO=y
+# CONFIG_SERIO_SERPORT is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVMEM is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_VIRTIO_CONSOLE=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_BATTERY_GOLDFISH=y
+# CONFIG_HWMON is not set
+CONFIG_MEDIA_SUPPORT=y
+CONFIG_FB=y
+CONFIG_FB_GOLDFISH=y
+CONFIG_FB_SIMPLE=y
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_HIDRAW=y
+CONFIG_UHID=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_ACRUX=y
+CONFIG_HID_ACRUX_FF=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_PRODIKEYS=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_DRAGONRISE=y
+CONFIG_DRAGONRISE_FF=y
+CONFIG_HID_EMS_FF=y
+CONFIG_HID_ELECOM=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_KEYTOUCH=y
+CONFIG_HID_KYE=y
+CONFIG_HID_WALTOP=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_TWINHAN=y
+CONFIG_HID_KENSINGTON=y
+CONFIG_HID_LCPOWER=y
+CONFIG_HID_LOGITECH=y
+CONFIG_HID_LOGITECH_DJ=y
+CONFIG_LOGITECH_FF=y
+CONFIG_LOGIRUMBLEPAD2_FF=y
+CONFIG_LOGIG940_FF=y
+CONFIG_HID_MAGICMOUSE=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_MULTITOUCH=y
+CONFIG_HID_ORTEK=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_PANTHERLORD_FF=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_PICOLCD=y
+CONFIG_HID_PRIMAX=y
+CONFIG_HID_SAITEK=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SPEEDLINK=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_GREENASIA=y
+CONFIG_GREENASIA_FF=y
+CONFIG_HID_SMARTJOYPLUS=y
+CONFIG_SMARTJOYPLUS_FF=y
+CONFIG_HID_TIVO=y
+CONFIG_HID_TOPSEED=y
+CONFIG_HID_THRUSTMASTER=y
+CONFIG_HID_WACOM=y
+CONFIG_HID_WIIMOTE=y
+CONFIG_HID_ZEROPLUS=y
+CONFIG_HID_ZYDACRON=y
+# CONFIG_USB_SUPPORT is not set
+CONFIG_RTC_CLASS=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_STAGING=y
+CONFIG_ASHMEM=y
+CONFIG_ANDROID_TIMED_GPIO=y
+CONFIG_ANDROID_LOW_MEMORY_KILLER=y
+CONFIG_SYNC=y
+CONFIG_SW_SYNC=y
+CONFIG_SW_SYNC_USER=y
+CONFIG_ION=y
+CONFIG_GOLDFISH_AUDIO=y
+CONFIG_GOLDFISH=y
+CONFIG_GOLDFISH_PIPE=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_ANDROID=y
+CONFIG_ANDROID_BINDER_IPC=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_QUOTA=y
+CONFIG_FUSE_FS=y
+CONFIG_CUSE=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_PANIC_TIMEOUT=5
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_SCHEDSTATS=y
+CONFIG_TIMER_STATS=y
+# CONFIG_FTRACE is not set
+CONFIG_ATOMIC64_SELFTEST=y
+CONFIG_DEBUG_RODATA=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 3f85bbc..148849c 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -42,6 +42,15 @@
 	msr	daifclr, #2
 	.endm
 
+	.macro	save_and_disable_irq, flags
+	mrs	\flags, daif
+	msr	daifset, #2
+	.endm
+
+	.macro	restore_irq, flags
+	msr	daif, \flags
+	.endm
+
 /*
  * Enable and disable debug exceptions.
  */
@@ -451,6 +460,13 @@
 	movk	\reg, :abs_g0_nc:\val
 	.endm
 
+/*
+ * Return the current thread_info.
+ */
+	.macro	get_thread_info, rd
+	mrs	\rd, sp_el0
+	.endm
+
 	.macro	pte_to_phys, phys, pte
 	and	\phys, \pte, #(((1 << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT)
 	.endm
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 15868ec..1dc16f5 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -221,6 +221,12 @@
 	return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1));
 }
 
+static inline bool system_uses_ttbr0_pan(void)
+{
+	return IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) &&
+		!cpus_have_cap(ARM64_HAS_PAN);
+}
+
 #define ARM64_SSBD_UNKNOWN		-1
 #define ARM64_SSBD_FORCE_DISABLE	0
 #define ARM64_SSBD_KERNEL		1
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 6561582..6c4f9e8 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_EFI_H
 #define _ASM_EFI_H
 
+#include <asm/cpufeature.h>
 #include <asm/io.h>
 #include <asm/mmu_context.h>
 #include <asm/neon.h>
@@ -78,7 +79,32 @@
 
 static inline void efi_set_pgd(struct mm_struct *mm)
 {
-	switch_mm(NULL, mm, NULL);
+	__switch_mm(mm);
+
+	if (system_uses_ttbr0_pan()) {
+		if (mm != current->active_mm) {
+			/*
+			 * Update the current thread's saved ttbr0 since it is
+			 * restored as part of a return from exception. Enable
+			 * access to the valid TTBR0_EL1 and invoke the errata
+			 * workaround directly since there is no return from
+			 * exception when invoking the EFI run-time services.
+			 */
+			update_saved_ttbr0(current, mm);
+			uaccess_ttbr0_enable();
+			post_ttbr_update_workaround();
+		} else {
+			/*
+			 * Defer the switch to the current thread's TTBR0_EL1
+			 * until uaccess_enable(). Restore the current
+			 * thread's saved ttbr0 corresponding to its active_mm
+			 * (if different from init_mm).
+			 */
+			uaccess_ttbr0_disable();
+			if (current->active_mm != &init_mm)
+				update_saved_ttbr0(current, current->active_mm);
+		}
+	}
 }
 
 void efi_virtmap_load(void);
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 1fb02307..40a8a94 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -169,7 +169,7 @@
 #ifdef CONFIG_COMPAT
 
 /* PIE load location for compat arm. Must match ARM ELF_ET_DYN_BASE. */
-#define COMPAT_ELF_ET_DYN_BASE		0x000400000UL
+#define COMPAT_ELF_ET_DYN_BASE		(2 * TASK_SIZE_32 / 3)
 
 /* AArch32 registers. */
 #define COMPAT_ELF_NGREG		18
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index d14c478..85997c0 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -175,6 +175,12 @@
 #define ESR_ELx_SYS64_ISS_SYS_CTR_READ	(ESR_ELx_SYS64_ISS_SYS_CTR | \
 					 ESR_ELx_SYS64_ISS_DIR_READ)
 
+#define ESR_ELx_SYS64_ISS_SYS_CNTVCT	(ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 2, 14, 0) | \
+					 ESR_ELx_SYS64_ISS_DIR_READ)
+
+#define ESR_ELx_SYS64_ISS_SYS_CNTFRQ	(ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 0, 14, 0) | \
+					 ESR_ELx_SYS64_ISS_DIR_READ)
+
 #ifndef __ASSEMBLY__
 #include <asm/types.h>
 
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index 2a5090f..a891bb6 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -21,15 +21,12 @@
 #include <linux/futex.h>
 #include <linux/uaccess.h>
 
-#include <asm/alternative.h>
-#include <asm/cpufeature.h>
 #include <asm/errno.h>
-#include <asm/sysreg.h>
 
 #define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg)		\
+do {									\
+	uaccess_enable();						\
 	asm volatile(							\
-	ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN,		\
-		    CONFIG_ARM64_PAN)					\
 "	prfm	pstl1strm, %2\n"					\
 "1:	ldxr	%w1, %2\n"						\
 	insn "\n"							\
@@ -44,11 +41,11 @@
 "	.popsection\n"							\
 	_ASM_EXTABLE(1b, 4b)						\
 	_ASM_EXTABLE(2b, 4b)						\
-	ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN,		\
-		    CONFIG_ARM64_PAN)					\
 	: "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp)	\
 	: "r" (oparg), "Ir" (-EFAULT)					\
-	: "memory")
+	: "memory");							\
+	uaccess_disable();						\
+} while (0)
 
 static inline int
 arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
@@ -102,8 +99,8 @@
 		return -EFAULT;
 
 	uaddr = __uaccess_mask_ptr(_uaddr);
+	uaccess_enable();
 	asm volatile("// futex_atomic_cmpxchg_inatomic\n"
-ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
 "	prfm	pstl1strm, %2\n"
 "1:	ldxr	%w1, %2\n"
 "	sub	%w3, %w1, %w4\n"
@@ -118,10 +115,10 @@
 "	.popsection\n"
 	_ASM_EXTABLE(1b, 4b)
 	_ASM_EXTABLE(2b, 4b)
-ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
 	: "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp)
 	: "r" (oldval), "r" (newval), "Ir" (-EFAULT)
 	: "memory");
+	uaccess_disable();
 
 	*uval = val;
 	return ret;
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index 9510ace..b6b167a 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -77,7 +77,11 @@
 /* Lengths */
 #define ARM_BREAKPOINT_LEN_1	0x1
 #define ARM_BREAKPOINT_LEN_2	0x3
+#define ARM_BREAKPOINT_LEN_3	0x7
 #define ARM_BREAKPOINT_LEN_4	0xf
+#define ARM_BREAKPOINT_LEN_5	0x1f
+#define ARM_BREAKPOINT_LEN_6	0x3f
+#define ARM_BREAKPOINT_LEN_7	0x7f
 #define ARM_BREAKPOINT_LEN_8	0xff
 
 /* Kernel stepping */
@@ -119,7 +123,7 @@
 struct pmu;
 
 extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
-				  int *gen_len, int *gen_type);
+				  int *gen_len, int *gen_type, int *offset);
 extern int arch_check_bp_in_kernelspace(struct perf_event *bp);
 extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
 extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 7e51d1b..77a27af 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -19,6 +19,7 @@
 #ifndef __ASM_KERNEL_PGTABLE_H
 #define __ASM_KERNEL_PGTABLE_H
 
+#include <asm/pgtable.h>
 #include <asm/sparsemem.h>
 
 /*
@@ -54,6 +55,12 @@
 #define SWAPPER_DIR_SIZE	(SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
 #define IDMAP_DIR_SIZE		(IDMAP_PGTABLE_LEVELS * PAGE_SIZE)
 
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+#define RESERVED_TTBR0_SIZE	(PAGE_SIZE)
+#else
+#define RESERVED_TTBR0_SIZE	(0)
+#endif
+
 /* Initial memory map size */
 #if ARM64_SWAPPER_USES_SECTION_MAPS
 #define SWAPPER_BLOCK_SHIFT	SECTION_SHIFT
@@ -71,8 +78,16 @@
 /*
  * Initial memory map attributes.
  */
-#define SWAPPER_PTE_FLAGS	(PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
-#define SWAPPER_PMD_FLAGS	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+#define _SWAPPER_PTE_FLAGS	(PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
+#define _SWAPPER_PMD_FLAGS	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+#define SWAPPER_PTE_FLAGS	(_SWAPPER_PTE_FLAGS | PTE_NG)
+#define SWAPPER_PMD_FLAGS	(_SWAPPER_PMD_FLAGS | PMD_SECT_NG)
+#else
+#define SWAPPER_PTE_FLAGS	_SWAPPER_PTE_FLAGS
+#define SWAPPER_PMD_FLAGS	_SWAPPER_PMD_FLAGS
+#endif
 
 #if ARM64_SWAPPER_USES_SECTION_MAPS
 #define SWAPPER_MM_MMUFLAGS	(PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 6ac34c7..18b9f15 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -16,7 +16,9 @@
 #ifndef __ASM_MMU_H
 #define __ASM_MMU_H
 
+
 #define USER_ASID_FLAG	(UL(1) << 48)
+#define TTBR_ASID_MASK	(UL(0xffff) << 48)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index b96c4799..1ce6f3c 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -23,6 +23,7 @@
 #include <linux/sched.h>
 
 #include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
 #include <asm/proc-fns.h>
 #include <asm-generic/mm_hooks.h>
 #include <asm/cputype.h>
@@ -110,7 +111,7 @@
 	local_flush_tlb_all();
 	cpu_set_default_tcr_t0sz();
 
-	if (mm != &init_mm)
+	if (mm != &init_mm && !system_uses_ttbr0_pan())
 		cpu_switch_mm(mm->pgd, mm);
 }
 
@@ -170,21 +171,28 @@
 {
 }
 
-/*
- * This is the actual mm switch as far as the scheduler
- * is concerned.  No registers are touched.  We avoid
- * calling the CPU specific function when the mm hasn't
- * actually changed.
- */
-static inline void
-switch_mm(struct mm_struct *prev, struct mm_struct *next,
-	  struct task_struct *tsk)
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+static inline void update_saved_ttbr0(struct task_struct *tsk,
+				      struct mm_struct *mm)
+{
+	if (system_uses_ttbr0_pan()) {
+		u64 ttbr;
+		BUG_ON(mm->pgd == swapper_pg_dir);
+		ttbr = virt_to_phys(mm->pgd) | ASID(mm) << 48;
+		WRITE_ONCE(task_thread_info(tsk)->ttbr0, ttbr);
+	}
+}
+#else
+static inline void update_saved_ttbr0(struct task_struct *tsk,
+				      struct mm_struct *mm)
+{
+}
+#endif
+
+static inline void __switch_mm(struct mm_struct *next)
 {
 	unsigned int cpu = smp_processor_id();
 
-	if (prev == next)
-		return;
-
 	/*
 	 * init_mm.pgd does not contain any user mappings and it is always
 	 * active for kernel addresses in TTBR1. Just set the reserved TTBR0.
@@ -197,9 +205,28 @@
 	check_and_switch_context(next, cpu);
 }
 
+static inline void
+switch_mm(struct mm_struct *prev, struct mm_struct *next,
+	  struct task_struct *tsk)
+{
+	if (prev != next)
+		__switch_mm(next);
+
+	/*
+	 * Update the saved TTBR0_EL1 of the scheduled-in task as the previous
+	 * value may have not been initialised yet (activate_mm caller) or the
+	 * ASID has changed since the last run (following the context switch
+	 * of another thread of the same process). Avoid setting the reserved
+	 * TTBR0_EL1 to swapper_pg_dir (init_mm; e.g. via idle_task_exit).
+	 */
+	if (next != &init_mm)
+		update_saved_ttbr0(tsk, next);
+}
+
 #define deactivate_mm(tsk,mm)	do { } while (0)
-#define activate_mm(prev,next)	switch_mm(prev, next, NULL)
+#define activate_mm(prev,next)	switch_mm(prev, next, current)
 
 void verify_cpu_asid_bits(void);
+void post_ttbr_update_workaround(void);
 
 #endif
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 0dd1bc1..7ca27e5 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -48,6 +48,9 @@
 	unsigned long		flags;		/* low level flags */
 	mm_segment_t		addr_limit;	/* address limit */
 	struct task_struct	*task;		/* main task structure */
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+	u64			ttbr0;		/* saved TTBR0_EL1 */
+#endif
 	int			preempt_count;	/* 0 => preemptable, <0 => bug */
 	int			cpu;		/* cpu */
 };
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index 8b57339..7ec84d0 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -31,6 +31,14 @@
 				 cpumask_of_node(pcibus_to_node(bus)))
 
 #endif /* CONFIG_NUMA */
+struct sched_domain;
+#ifdef CONFIG_CPU_FREQ
+#define arch_scale_freq_capacity cpufreq_scale_freq_capacity
+extern unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu);
+extern unsigned long cpufreq_scale_max_freq_capacity(int cpu);
+#endif
+#define arch_scale_cpu_capacity scale_cpu_capacity
+extern unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu);
 
 #include <asm-generic/topology.h>
 
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index f5cd96c6..ed846af 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -18,6 +18,13 @@
 #ifndef __ASM_UACCESS_H
 #define __ASM_UACCESS_H
 
+#include <asm/alternative.h>
+#include <asm/kernel-pgtable.h>
+#include <asm/mmu.h>
+#include <asm/sysreg.h>
+
+#ifndef __ASSEMBLY__
+
 /*
  * User space memory access functions
  */
@@ -26,11 +33,9 @@
 #include <linux/string.h>
 #include <linux/thread_info.h>
 
-#include <asm/alternative.h>
 #include <asm/cpufeature.h>
 #include <asm/processor.h>
 #include <asm/ptrace.h>
-#include <asm/sysreg.h>
 #include <asm/errno.h>
 #include <asm/memory.h>
 #include <asm/compiler.h>
@@ -136,6 +141,115 @@
 	"	.popsection\n"
 
 /*
+ * User access enabling/disabling.
+ */
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+static inline void __uaccess_ttbr0_disable(void)
+{
+	unsigned long flags, ttbr;
+
+	local_irq_save(flags);
+	ttbr = read_sysreg(ttbr1_el1);
+	ttbr &= ~TTBR_ASID_MASK;
+	/* reserved_ttbr0 placed at the end of swapper_pg_dir */
+	write_sysreg(ttbr + SWAPPER_DIR_SIZE, ttbr0_el1);
+	isb();
+	/* Set reserved ASID */
+	write_sysreg(ttbr, ttbr1_el1);
+	isb();
+	local_irq_restore(flags);
+}
+
+static inline void __uaccess_ttbr0_enable(void)
+{
+	unsigned long flags, ttbr0, ttbr1;
+
+	/*
+	 * Disable interrupts to avoid preemption between reading the 'ttbr0'
+	 * variable and the MSR. A context switch could trigger an ASID
+	 * roll-over and an update of 'ttbr0'.
+	 */
+	local_irq_save(flags);
+	ttbr0 = READ_ONCE(current_thread_info()->ttbr0);
+
+	/* Restore active ASID */
+	ttbr1 = read_sysreg(ttbr1_el1);
+	ttbr1 &= ~TTBR_ASID_MASK;		/* safety measure */
+	ttbr1 |= ttbr0 & TTBR_ASID_MASK;
+	write_sysreg(ttbr1, ttbr1_el1);
+	isb();
+
+	/* Restore user page table */
+	write_sysreg(ttbr0, ttbr0_el1);
+	isb();
+	local_irq_restore(flags);
+}
+
+static inline bool uaccess_ttbr0_disable(void)
+{
+	if (!system_uses_ttbr0_pan())
+		return false;
+	__uaccess_ttbr0_disable();
+	return true;
+}
+
+static inline bool uaccess_ttbr0_enable(void)
+{
+	if (!system_uses_ttbr0_pan())
+		return false;
+	__uaccess_ttbr0_enable();
+	return true;
+}
+#else
+static inline bool uaccess_ttbr0_disable(void)
+{
+	return false;
+}
+
+static inline bool uaccess_ttbr0_enable(void)
+{
+	return false;
+}
+#endif
+
+#define __uaccess_disable(alt)						\
+do {									\
+	if (!uaccess_ttbr0_disable())					\
+		asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), alt,		\
+				CONFIG_ARM64_PAN));			\
+} while (0)
+
+#define __uaccess_enable(alt)						\
+do {									\
+	if (!uaccess_ttbr0_enable())					\
+		asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), alt,		\
+				CONFIG_ARM64_PAN));			\
+} while (0)
+
+static inline void uaccess_disable(void)
+{
+	__uaccess_disable(ARM64_HAS_PAN);
+}
+
+static inline void uaccess_enable(void)
+{
+	__uaccess_enable(ARM64_HAS_PAN);
+}
+
+/*
+ * These functions are no-ops when UAO is present.
+ */
+static inline void uaccess_disable_not_uao(void)
+{
+	__uaccess_disable(ARM64_ALT_PAN_NOT_UAO);
+}
+
+static inline void uaccess_enable_not_uao(void)
+{
+	__uaccess_enable(ARM64_ALT_PAN_NOT_UAO);
+}
+
+/*
  * Sanitise a uaccess pointer such that it becomes NULL if above the
  * current addr_limit.
  */
@@ -182,8 +296,7 @@
 do {									\
 	unsigned long __gu_val;						\
 	__chk_user_ptr(ptr);						\
-	asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\
-			CONFIG_ARM64_PAN));				\
+	uaccess_enable_not_uao();					\
 	switch (sizeof(*(ptr))) {					\
 	case 1:								\
 		__get_user_asm("ldrb", "ldtrb", "%w", __gu_val, (ptr),  \
@@ -204,9 +317,8 @@
 	default:							\
 		BUILD_BUG();						\
 	}								\
+	uaccess_disable_not_uao();					\
 	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
-	asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\
-			CONFIG_ARM64_PAN));				\
 } while (0)
 
 #define __get_user_check(x, ptr, err)					\
@@ -256,8 +368,7 @@
 do {									\
 	__typeof__(*(ptr)) __pu_val = (x);				\
 	__chk_user_ptr(ptr);						\
-	asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\
-			CONFIG_ARM64_PAN));				\
+	uaccess_enable_not_uao();					\
 	switch (sizeof(*(ptr))) {					\
 	case 1:								\
 		__put_user_asm("strb", "sttrb", "%w", __pu_val, (ptr),	\
@@ -278,8 +389,7 @@
 	default:							\
 		BUILD_BUG();						\
 	}								\
-	asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\
-			CONFIG_ARM64_PAN));				\
+	uaccess_disable_not_uao();					\
 } while (0)
 
 #define __put_user_check(x, ptr, err)					\
@@ -379,4 +489,77 @@
 extern __must_check long strlen_user(const char __user *str);
 extern __must_check long strnlen_user(const char __user *str, long n);
 
+#else	/* __ASSEMBLY__ */
+
+#include <asm/assembler.h>
+
+/*
+ * User access enabling/disabling macros.
+ */
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+	.macro	__uaccess_ttbr0_disable, tmp1
+	mrs	\tmp1, ttbr1_el1		// swapper_pg_dir
+	bic     \tmp1, \tmp1, #TTBR_ASID_MASK
+	add	\tmp1, \tmp1, #SWAPPER_DIR_SIZE	// reserved_ttbr0 at the end of swapper_pg_dir
+	msr	ttbr0_el1, \tmp1		// set reserved TTBR0_EL1
+	isb
+	sub     \tmp1, \tmp1, #SWAPPER_DIR_SIZE
+	msr     ttbr1_el1, \tmp1                // set reserved ASID
+	isb
+	.endm
+
+	.macro	__uaccess_ttbr0_enable, tmp1, tmp2
+	get_thread_info \tmp1
+	ldr	\tmp1, [\tmp1, #TSK_TI_TTBR0]	// load saved TTBR0_EL1
+	mrs     \tmp2, ttbr1_el1
+	extr    \tmp2, \tmp2, \tmp1, #48
+	ror     \tmp2, \tmp2, #16
+	msr     ttbr1_el1, \tmp2                // set the active ASID
+	isb
+	msr	ttbr0_el1, \tmp1		// set the non-PAN TTBR0_EL1
+	isb
+	.endm
+
+	.macro	uaccess_ttbr0_disable, tmp1, tmp2
+alternative_if_not ARM64_HAS_PAN
+	save_and_disable_irq \tmp2		// avoid preemption
+	__uaccess_ttbr0_disable \tmp1
+	restore_irq \tmp2
+alternative_else_nop_endif
+	.endm
+
+	.macro	uaccess_ttbr0_enable, tmp1, tmp2, tmp3
+alternative_if_not ARM64_HAS_PAN
+	save_and_disable_irq \tmp3		// avoid preemption
+	__uaccess_ttbr0_enable \tmp1, \tmp2
+	restore_irq \tmp3
+alternative_else_nop_endif
+	.endm
+#else
+	.macro	uaccess_ttbr0_disable, tmp1, tmp2
+	.endm
+
+	.macro	uaccess_ttbr0_enable, tmp1, tmp2, tmp3
+	.endm
+#endif
+
+/*
+ * These macros are no-ops when UAO is present.
+ */
+	.macro	uaccess_disable_not_uao, tmp1, tmp2
+	uaccess_ttbr0_disable \tmp1, \tmp2
+alternative_if ARM64_ALT_PAN_NOT_UAO
+	SET_PSTATE_PAN(1)
+alternative_else_nop_endif
+	.endm
+
+	.macro	uaccess_enable_not_uao, tmp1, tmp2, tmp3
+	uaccess_ttbr0_enable \tmp1, \tmp2, \tmp3
+alternative_if ARM64_ALT_PAN_NOT_UAO
+	SET_PSTATE_PAN(0)
+alternative_else_nop_endif
+	.endm
+
+#endif	/* __ASSEMBLY__ */
+
 #endif /* __ASM_UACCESS_H */
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index c0ede23..29d2ad8 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -14,7 +14,6 @@
 #include <linux/slab.h>
 #include <linux/sysctl.h>
 
-#include <asm/alternative.h>
 #include <asm/cpufeature.h>
 #include <asm/insn.h>
 #include <asm/opcodes.h>
@@ -285,10 +284,10 @@
 #define __SWP_LL_SC_LOOPS	4
 
 #define __user_swpX_asm(data, addr, res, temp, temp2, B)	\
+do {								\
+	uaccess_enable();					\
 	__asm__ __volatile__(					\
 	"	mov		%w3, %w7\n"			\
-	ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN,	\
-		    CONFIG_ARM64_PAN)				\
 	"0:	ldxr"B"		%w2, [%4]\n"			\
 	"1:	stxr"B"		%w0, %w1, [%4]\n"		\
 	"	cbz		%w0, 2f\n"			\
@@ -306,13 +305,13 @@
 	"	.popsection"					\
 	_ASM_EXTABLE(0b, 4b)					\
 	_ASM_EXTABLE(1b, 4b)					\
-	ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN,	\
-		CONFIG_ARM64_PAN)				\
 	: "=&r" (res), "+r" (data), "=&r" (temp), "=&r" (temp2)	\
 	: "r" ((unsigned long)addr), "i" (-EAGAIN),		\
 	  "i" (-EFAULT),					\
 	  "i" (__SWP_LL_SC_LOOPS)				\
-	: "memory")
+	: "memory");						\
+	uaccess_disable();					\
+} while (0)
 
 #define __user_swp_asm(data, addr, res, temp, temp2) \
 	__user_swpX_asm(data, addr, res, temp, temp2, "")
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index bd239b1..ee768f4 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -42,6 +42,9 @@
   DEFINE(TI_ADDR_LIMIT,		offsetof(struct thread_info, addr_limit));
   DEFINE(TI_TASK,		offsetof(struct thread_info, task));
   DEFINE(TI_CPU,		offsetof(struct thread_info, cpu));
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+  DEFINE(TSK_TI_TTBR0,		offsetof(struct thread_info, ttbr0));
+#endif
   BLANK();
   DEFINE(THREAD_CPU_CONTEXT,	offsetof(struct task_struct, thread.cpu_context));
   BLANK();
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index ca978d7..35d6747 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -32,7 +32,9 @@
 #include <asm/memory.h>
 #include <asm/mmu.h>
 #include <asm/processor.h>
+#include <asm/ptrace.h>
 #include <asm/thread_info.h>
+#include <asm/uaccess.h>
 #include <asm/asm-uaccess.h>
 #include <asm/unistd.h>
 #include <asm/kernel-pgtable.h>
@@ -164,6 +166,32 @@
 	mrs	x22, elr_el1
 	mrs	x23, spsr_el1
 	stp	lr, x21, [sp, #S_LR]
+
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+	/*
+	 * Set the TTBR0 PAN bit in SPSR. When the exception is taken from
+	 * EL0, there is no need to check the state of TTBR0_EL1 since
+	 * accesses are always enabled.
+	 * Note that the meaning of this bit differs from the ARMv8.1 PAN
+	 * feature as all TTBR0_EL1 accesses are disabled, not just those to
+	 * user mappings.
+	 */
+alternative_if ARM64_HAS_PAN
+	b	1f				// skip TTBR0 PAN
+alternative_else_nop_endif
+
+	.if	\el != 0
+	mrs	x21, ttbr0_el1
+	tst	x21, #TTBR_ASID_MASK		// Check for the reserved ASID
+	orr	x23, x23, #PSR_PAN_BIT		// Set the emulated PAN in the saved SPSR
+	b.eq	1f				// TTBR0 access already disabled
+	and	x23, x23, #~PSR_PAN_BIT		// Clear the emulated PAN in the saved SPSR
+	.endif
+
+	__uaccess_ttbr0_disable x21
+1:
+#endif
+
 	stp	x22, x23, [sp, #S_PC]
 
 	/*
@@ -202,6 +230,40 @@
 	ldp	x21, x22, [sp, #S_PC]		// load ELR, SPSR
 	.if	\el == 0
 	ct_user_enter
+	.endif
+
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+	/*
+	 * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
+	 * PAN bit checking.
+	 */
+alternative_if ARM64_HAS_PAN
+	b	2f				// skip TTBR0 PAN
+alternative_else_nop_endif
+
+	.if	\el != 0
+	tbnz	x22, #22, 1f			// Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set
+	.endif
+
+	__uaccess_ttbr0_enable x0, x1
+
+	.if	\el == 0
+	/*
+	 * Enable errata workarounds only if returning to user. The only
+	 * workaround currently required for TTBR0_EL1 changes are for the
+	 * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache
+	 * corruption).
+	 */
+	bl	post_ttbr_update_workaround
+	.endif
+1:
+	.if	\el != 0
+	and	x22, x22, #~PSR_PAN_BIT		// ARMv8.0 CPUs do not understand this bit
+	.endif
+2:
+#endif
+
+	.if	\el == 0
 	ldr	x23, [sp, #S_SP]		// load return stack pointer
 	msr	sp_el0, x23
 	tst	x22, #PSR_MODE32_BIT		// native task?
@@ -221,6 +283,7 @@
 	apply_ssbd 0, 5f, x0, x1
 5:
 	.endif
+
 	msr	elr_el1, x21			// set up the return data
 	msr	spsr_el1, x22
 	ldp	x0, x1, [sp, #16 * 0]
@@ -257,10 +320,6 @@
 	.endif
 	.endm
 
-	.macro	get_thread_info, rd
-	mrs	\rd, sp_el0
-	.endm
-
 	.macro	irq_stack_entry
 	mov	x19, sp			// preserve the original sp
 
@@ -891,14 +950,24 @@
 
 	.macro tramp_map_kernel, tmp
 	mrs	\tmp, ttbr1_el1
-	sub	\tmp, \tmp, #SWAPPER_DIR_SIZE
+	sub	\tmp, \tmp, #(SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
 	bic	\tmp, \tmp, #USER_ASID_FLAG
 	msr	ttbr1_el1, \tmp
+#ifdef CONFIG_ARCH_MSM8996
+	/* ASID already in \tmp[63:48] */
+	movk	\tmp, #:abs_g2_nc:(TRAMP_VALIAS >> 12)
+	movk	\tmp, #:abs_g1_nc:(TRAMP_VALIAS >> 12)
+	/* 2MB boundary containing the vectors, so we nobble the walk cache */
+	movk	\tmp, #:abs_g0_nc:((TRAMP_VALIAS & ~(SZ_2M - 1)) >> 12)
+	isb
+	tlbi	vae1, \tmp
+	dsb	nsh
+#endif /* CONFIG_ARCH_MSM8996 */
 	.endm
 
 	.macro tramp_unmap_kernel, tmp
 	mrs	\tmp, ttbr1_el1
-	add	\tmp, \tmp, #SWAPPER_DIR_SIZE
+	add	\tmp, \tmp, #(SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
 	orr	\tmp, \tmp, #USER_ASID_FLAG
 	msr	ttbr1_el1, \tmp
 	/*
@@ -925,7 +994,9 @@
 	tramp_map_kernel	x30
 #ifdef CONFIG_RANDOMIZE_BASE
 	adr	x30, tramp_vectors + PAGE_SIZE
+#ifndef CONFIG_ARCH_MSM8996
 	isb
+#endif
 	ldr	x30, [x30]
 #else
 	ldr	x30, =vectors
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index db6ff19..514954b 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -326,14 +326,14 @@
 	 * dirty cache lines being evicted.
 	 */
 	adrp	x0, idmap_pg_dir
-	adrp	x1, swapper_pg_dir + SWAPPER_DIR_SIZE
+	adrp	x1, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
 	bl	__inval_cache_range
 
 	/*
 	 * Clear the idmap and swapper page tables.
 	 */
 	adrp	x0, idmap_pg_dir
-	adrp	x6, swapper_pg_dir + SWAPPER_DIR_SIZE
+	adrp	x6, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
 1:	stp	xzr, xzr, [x0], #16
 	stp	xzr, xzr, [x0], #16
 	stp	xzr, xzr, [x0], #16
@@ -412,7 +412,7 @@
 	 * tables again to remove any speculatively loaded cache lines.
 	 */
 	adrp	x0, idmap_pg_dir
-	adrp	x1, swapper_pg_dir + SWAPPER_DIR_SIZE
+	adrp	x1, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
 	dmb	sy
 	bl	__inval_cache_range
 
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 0b9e5f6..fb0082a 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -318,9 +318,21 @@
 	case ARM_BREAKPOINT_LEN_2:
 		len_in_bytes = 2;
 		break;
+	case ARM_BREAKPOINT_LEN_3:
+		len_in_bytes = 3;
+		break;
 	case ARM_BREAKPOINT_LEN_4:
 		len_in_bytes = 4;
 		break;
+	case ARM_BREAKPOINT_LEN_5:
+		len_in_bytes = 5;
+		break;
+	case ARM_BREAKPOINT_LEN_6:
+		len_in_bytes = 6;
+		break;
+	case ARM_BREAKPOINT_LEN_7:
+		len_in_bytes = 7;
+		break;
 	case ARM_BREAKPOINT_LEN_8:
 		len_in_bytes = 8;
 		break;
@@ -350,7 +362,7 @@
  * to generic breakpoint descriptions.
  */
 int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
-			   int *gen_len, int *gen_type)
+			   int *gen_len, int *gen_type, int *offset)
 {
 	/* Type */
 	switch (ctrl.type) {
@@ -370,17 +382,33 @@
 		return -EINVAL;
 	}
 
+	if (!ctrl.len)
+		return -EINVAL;
+	*offset = __ffs(ctrl.len);
+
 	/* Len */
-	switch (ctrl.len) {
+	switch (ctrl.len >> *offset) {
 	case ARM_BREAKPOINT_LEN_1:
 		*gen_len = HW_BREAKPOINT_LEN_1;
 		break;
 	case ARM_BREAKPOINT_LEN_2:
 		*gen_len = HW_BREAKPOINT_LEN_2;
 		break;
+	case ARM_BREAKPOINT_LEN_3:
+		*gen_len = HW_BREAKPOINT_LEN_3;
+		break;
 	case ARM_BREAKPOINT_LEN_4:
 		*gen_len = HW_BREAKPOINT_LEN_4;
 		break;
+	case ARM_BREAKPOINT_LEN_5:
+		*gen_len = HW_BREAKPOINT_LEN_5;
+		break;
+	case ARM_BREAKPOINT_LEN_6:
+		*gen_len = HW_BREAKPOINT_LEN_6;
+		break;
+	case ARM_BREAKPOINT_LEN_7:
+		*gen_len = HW_BREAKPOINT_LEN_7;
+		break;
 	case ARM_BREAKPOINT_LEN_8:
 		*gen_len = HW_BREAKPOINT_LEN_8;
 		break;
@@ -424,9 +452,21 @@
 	case HW_BREAKPOINT_LEN_2:
 		info->ctrl.len = ARM_BREAKPOINT_LEN_2;
 		break;
+	case HW_BREAKPOINT_LEN_3:
+		info->ctrl.len = ARM_BREAKPOINT_LEN_3;
+		break;
 	case HW_BREAKPOINT_LEN_4:
 		info->ctrl.len = ARM_BREAKPOINT_LEN_4;
 		break;
+	case HW_BREAKPOINT_LEN_5:
+		info->ctrl.len = ARM_BREAKPOINT_LEN_5;
+		break;
+	case HW_BREAKPOINT_LEN_6:
+		info->ctrl.len = ARM_BREAKPOINT_LEN_6;
+		break;
+	case HW_BREAKPOINT_LEN_7:
+		info->ctrl.len = ARM_BREAKPOINT_LEN_7;
+		break;
 	case HW_BREAKPOINT_LEN_8:
 		info->ctrl.len = ARM_BREAKPOINT_LEN_8;
 		break;
@@ -518,18 +558,17 @@
 		default:
 			return -EINVAL;
 		}
-
-		info->address &= ~alignment_mask;
-		info->ctrl.len <<= offset;
 	} else {
 		if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE)
 			alignment_mask = 0x3;
 		else
 			alignment_mask = 0x7;
-		if (info->address & alignment_mask)
-			return -EINVAL;
+		offset = info->address & alignment_mask;
 	}
 
+	info->address &= ~alignment_mask;
+	info->ctrl.len <<= offset;
+
 	/*
 	 * Disallow per-task kernel breakpoints since these would
 	 * complicate the stepping code.
@@ -662,12 +701,47 @@
 }
 NOKPROBE_SYMBOL(breakpoint_handler);
 
+/*
+ * Arm64 hardware does not always report a watchpoint hit address that matches
+ * one of the watchpoints set. It can also report an address "near" the
+ * watchpoint if a single instruction access both watched and unwatched
+ * addresses. There is no straight-forward way, short of disassembling the
+ * offending instruction, to map that address back to the watchpoint. This
+ * function computes the distance of the memory access from the watchpoint as a
+ * heuristic for the likelyhood that a given access triggered the watchpoint.
+ *
+ * See Section D2.10.5 "Determining the memory location that caused a Watchpoint
+ * exception" of ARMv8 Architecture Reference Manual for details.
+ *
+ * The function returns the distance of the address from the bytes watched by
+ * the watchpoint. In case of an exact match, it returns 0.
+ */
+static u64 get_distance_from_watchpoint(unsigned long addr, u64 val,
+					struct arch_hw_breakpoint_ctrl *ctrl)
+{
+	u64 wp_low, wp_high;
+	u32 lens, lene;
+
+	lens = __ffs(ctrl->len);
+	lene = __fls(ctrl->len);
+
+	wp_low = val + lens;
+	wp_high = val + lene;
+	if (addr < wp_low)
+		return wp_low - addr;
+	else if (addr > wp_high)
+		return addr - wp_high;
+	else
+		return 0;
+}
+
 static int watchpoint_handler(unsigned long addr, unsigned int esr,
 			      struct pt_regs *regs)
 {
-	int i, step = 0, *kernel_step, access;
+	int i, step = 0, *kernel_step, access, closest_match = 0;
+	u64 min_dist = -1, dist;
 	u32 ctrl_reg;
-	u64 val, alignment_mask;
+	u64 val;
 	struct perf_event *wp, **slots;
 	struct debug_info *debug_info;
 	struct arch_hw_breakpoint *info;
@@ -676,35 +750,15 @@
 	slots = this_cpu_ptr(wp_on_reg);
 	debug_info = &current->thread.debug;
 
+	/*
+	 * Find all watchpoints that match the reported address. If no exact
+	 * match is found. Attribute the hit to the closest watchpoint.
+	 */
+	rcu_read_lock();
 	for (i = 0; i < core_num_wrps; ++i) {
-		rcu_read_lock();
-
 		wp = slots[i];
-
 		if (wp == NULL)
-			goto unlock;
-
-		info = counter_arch_bp(wp);
-		/* AArch32 watchpoints are either 4 or 8 bytes aligned. */
-		if (is_compat_task()) {
-			if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
-				alignment_mask = 0x7;
-			else
-				alignment_mask = 0x3;
-		} else {
-			alignment_mask = 0x7;
-		}
-
-		/* Check if the watchpoint value matches. */
-		val = read_wb_reg(AARCH64_DBG_REG_WVR, i);
-		if (val != (untagged_addr(addr) & ~alignment_mask))
-			goto unlock;
-
-		/* Possible match, check the byte address select to confirm. */
-		ctrl_reg = read_wb_reg(AARCH64_DBG_REG_WCR, i);
-		decode_ctrl_reg(ctrl_reg, &ctrl);
-		if (!((1 << (addr & alignment_mask)) & ctrl.len))
-			goto unlock;
+			continue;
 
 		/*
 		 * Check that the access type matches.
@@ -713,18 +767,41 @@
 		access = (esr & AARCH64_ESR_ACCESS_MASK) ? HW_BREAKPOINT_W :
 			 HW_BREAKPOINT_R;
 		if (!(access & hw_breakpoint_type(wp)))
-			goto unlock;
+			continue;
 
+		/* Check if the watchpoint value and byte select match. */
+		val = read_wb_reg(AARCH64_DBG_REG_WVR, i);
+		ctrl_reg = read_wb_reg(AARCH64_DBG_REG_WCR, i);
+		decode_ctrl_reg(ctrl_reg, &ctrl);
+		dist = get_distance_from_watchpoint(addr, val, &ctrl);
+		if (dist < min_dist) {
+			min_dist = dist;
+			closest_match = i;
+		}
+		/* Is this an exact match? */
+		if (dist != 0)
+			continue;
+
+		info = counter_arch_bp(wp);
 		info->trigger = addr;
 		perf_bp_event(wp, regs);
 
 		/* Do we need to handle the stepping? */
 		if (is_default_overflow_handler(wp))
 			step = 1;
-
-unlock:
-		rcu_read_unlock();
 	}
+	if (min_dist > 0 && min_dist != -1) {
+		/* No exact match found. */
+		wp = slots[closest_match];
+		info = counter_arch_bp(wp);
+		info->trigger = addr;
+		perf_bp_event(wp, regs);
+
+		/* Do we need to handle the stepping? */
+		if (is_default_overflow_handler(wp))
+			step = 1;
+	}
+	rcu_read_unlock();
 
 	if (!step)
 		return 0;
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index e917d11..d3e6f1b 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -166,6 +166,70 @@
 	while (1);
 }
 
+/*
+ * dump a block of kernel memory from around the given address
+ */
+static void show_data(unsigned long addr, int nbytes, const char *name)
+{
+	int	i, j;
+	int	nlines;
+	u32	*p;
+
+	/*
+	 * don't attempt to dump non-kernel addresses or
+	 * values that are probably just small negative numbers
+	 */
+	if (addr < PAGE_OFFSET || addr > -256UL)
+		return;
+
+	printk("\n%s: %#lx:\n", name, addr);
+
+	/*
+	 * round address down to a 32 bit boundary
+	 * and always dump a multiple of 32 bytes
+	 */
+	p = (u32 *)(addr & ~(sizeof(u32) - 1));
+	nbytes += (addr & (sizeof(u32) - 1));
+	nlines = (nbytes + 31) / 32;
+
+
+	for (i = 0; i < nlines; i++) {
+		/*
+		 * just display low 16 bits of address to keep
+		 * each line of the dump < 80 characters
+		 */
+		printk("%04lx ", (unsigned long)p & 0xffff);
+		for (j = 0; j < 8; j++) {
+			u32	data;
+			if (probe_kernel_address(p, data)) {
+				printk(" ********");
+			} else {
+				printk(" %08x", data);
+			}
+			++p;
+		}
+		printk("\n");
+	}
+}
+
+static void show_extra_register_data(struct pt_regs *regs, int nbytes)
+{
+	mm_segment_t fs;
+	unsigned int i;
+
+	fs = get_fs();
+	set_fs(KERNEL_DS);
+	show_data(regs->pc - nbytes, nbytes * 2, "PC");
+	show_data(regs->regs[30] - nbytes, nbytes * 2, "LR");
+	show_data(regs->sp - nbytes, nbytes * 2, "SP");
+	for (i = 0; i < 30; i++) {
+		char name[4];
+		snprintf(name, sizeof(name), "X%u", i);
+		show_data(regs->regs[i] - nbytes, nbytes * 2, name);
+	}
+	set_fs(fs);
+}
+
 void __show_regs(struct pt_regs *regs)
 {
 	int i, top_reg;
@@ -201,6 +265,8 @@
 
 		pr_cont("\n");
 	}
+	if (!user_mode(regs))
+		show_extra_register_data(regs, 128);
 	printk("\n");
 }
 
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 8eedeef..a22161c 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -327,13 +327,13 @@
 				     struct arch_hw_breakpoint_ctrl ctrl,
 				     struct perf_event_attr *attr)
 {
-	int err, len, type, disabled = !ctrl.enabled;
+	int err, len, type, offset, disabled = !ctrl.enabled;
 
 	attr->disabled = disabled;
 	if (disabled)
 		return 0;
 
-	err = arch_bp_generic_fields(ctrl, &len, &type);
+	err = arch_bp_generic_fields(ctrl, &len, &type, &offset);
 	if (err)
 		return err;
 
@@ -352,6 +352,7 @@
 
 	attr->bp_len	= len;
 	attr->bp_type	= type;
+	attr->bp_addr	+= offset;
 
 	return 0;
 }
@@ -404,7 +405,7 @@
 	if (IS_ERR(bp))
 		return PTR_ERR(bp);
 
-	*addr = bp ? bp->attr.bp_addr : 0;
+	*addr = bp ? counter_arch_bp(bp)->address : 0;
 	return 0;
 }
 
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index f534f492..f7545a7 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -291,6 +291,15 @@
 	smp_init_cpus();
 	smp_build_mpidr_hash();
 
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+	/*
+	 * Make sure init_thread_info.ttbr0 always generates translation
+	 * faults in case uaccess_enable() is inadvertently called by the init
+	 * thread.
+	 */
+	init_thread_info.ttbr0 = virt_to_phys(empty_zero_page);
+#endif
+
 #ifdef CONFIG_VT
 #if defined(CONFIG_VGA_CONSOLE)
 	conswitchp = &vga_con;
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 694f6de..7758f7f 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -19,10 +19,30 @@
 #include <linux/nodemask.h>
 #include <linux/of.h>
 #include <linux/sched.h>
+#include <linux/sched.h>
+#include <linux/sched_energy.h>
 
 #include <asm/cputype.h>
 #include <asm/topology.h>
 
+static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
+
+unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu)
+{
+#ifdef CONFIG_CPU_FREQ
+	unsigned long max_freq_scale = cpufreq_scale_max_freq_capacity(cpu);
+
+	return per_cpu(cpu_scale, cpu) * max_freq_scale >> SCHED_CAPACITY_SHIFT;
+#else
+	return per_cpu(cpu_scale, cpu);
+#endif
+}
+
+static void set_capacity_scale(unsigned int cpu, unsigned long capacity)
+{
+	per_cpu(cpu_scale, cpu) = capacity;
+}
+
 static int __init get_cpu_for_node(struct device_node *node)
 {
 	struct device_node *cpu_node;
@@ -206,11 +226,72 @@
 struct cpu_topology cpu_topology[NR_CPUS];
 EXPORT_SYMBOL_GPL(cpu_topology);
 
+/* sd energy functions */
+static inline
+const struct sched_group_energy * const cpu_cluster_energy(int cpu)
+{
+	struct sched_group_energy *sge = sge_array[cpu][SD_LEVEL1];
+
+	if (!sge) {
+		pr_warn("Invalid sched_group_energy for Cluster%d\n", cpu);
+		return NULL;
+	}
+
+	return sge;
+}
+
+static inline
+const struct sched_group_energy * const cpu_core_energy(int cpu)
+{
+	struct sched_group_energy *sge = sge_array[cpu][SD_LEVEL0];
+
+	if (!sge) {
+		pr_warn("Invalid sched_group_energy for CPU%d\n", cpu);
+		return NULL;
+	}
+
+	return sge;
+}
+
 const struct cpumask *cpu_coregroup_mask(int cpu)
 {
 	return &cpu_topology[cpu].core_sibling;
 }
 
+static int cpu_cpu_flags(void)
+{
+	return SD_ASYM_CPUCAPACITY;
+}
+
+static inline int cpu_corepower_flags(void)
+{
+	return SD_SHARE_PKG_RESOURCES  | SD_SHARE_POWERDOMAIN | \
+	       SD_SHARE_CAP_STATES;
+}
+
+static struct sched_domain_topology_level arm64_topology[] = {
+#ifdef CONFIG_SCHED_MC
+	{ cpu_coregroup_mask, cpu_corepower_flags, cpu_core_energy, SD_INIT_NAME(MC) },
+#endif
+	{ cpu_cpu_mask, cpu_cpu_flags, cpu_cluster_energy, SD_INIT_NAME(DIE) },
+	{ NULL, },
+};
+
+static void update_cpu_capacity(unsigned int cpu)
+{
+	unsigned long capacity = SCHED_CAPACITY_SCALE;
+
+	if (cpu_core_energy(cpu)) {
+		int max_cap_idx = cpu_core_energy(cpu)->nr_cap_states - 1;
+		capacity = cpu_core_energy(cpu)->cap_states[max_cap_idx].cap;
+	}
+
+	set_capacity_scale(cpu, capacity);
+
+	pr_info("CPU%d: update cpu_capacity %lu\n",
+		cpu, arch_scale_cpu_capacity(NULL, cpu));
+}
+
 static void update_siblings_masks(unsigned int cpuid)
 {
 	struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
@@ -272,6 +353,7 @@
 
 topology_populated:
 	update_siblings_masks(cpuid);
+	update_cpu_capacity(cpuid);
 }
 
 static void __init reset_cpu_topology(void)
@@ -302,4 +384,8 @@
 	 */
 	if (of_have_populated_dt() && parse_dt_topology())
 		reset_cpu_topology();
+	else
+		set_sched_topology(arm64_topology);
+
+	init_sched_energy_costs();
 }
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 28bef94..0dab4af 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -33,6 +33,7 @@
 #include <linux/syscalls.h>
 
 #include <asm/atomic.h>
+#include <asm/barrier.h>
 #include <asm/bug.h>
 #include <asm/debug-monitors.h>
 #include <asm/esr.h>
@@ -439,9 +440,10 @@
 }
 
 #define __user_cache_maint(insn, address, res)			\
-	if (address >= user_addr_max())				\
+	if (address >= user_addr_max()) {			\
 		res = -EFAULT;					\
-	else							\
+	} else {						\
+		uaccess_ttbr0_enable();				\
 		asm volatile (					\
 			"1:	" insn ", %1\n"			\
 			"	mov	%w0, #0\n"		\
@@ -453,7 +455,9 @@
 			"	.popsection\n"			\
 			_ASM_EXTABLE(1b, 3b)			\
 			: "=r" (res)				\
-			: "r" (address), "i" (-EFAULT) )
+			: "r" (address), "i" (-EFAULT));	\
+		uaccess_ttbr0_disable();			\
+	}
 
 static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
 {
@@ -496,6 +500,25 @@
 	regs->pc += 4;
 }
 
+static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
+{
+	int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
+
+	isb();
+	if (rt != 31)
+		regs->regs[rt] = arch_counter_get_cntvct();
+	regs->pc += 4;
+}
+
+static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs)
+{
+	int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
+
+	if (rt != 31)
+		regs->regs[rt] = read_sysreg(cntfrq_el0);
+	regs->pc += 4;
+}
+
 struct sys64_hook {
 	unsigned int esr_mask;
 	unsigned int esr_val;
@@ -514,6 +537,18 @@
 		.esr_val = ESR_ELx_SYS64_ISS_SYS_CTR_READ,
 		.handler = ctr_read_handler,
 	},
+	{
+		/* Trap read access to CNTVCT_EL0 */
+		.esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK,
+		.esr_val = ESR_ELx_SYS64_ISS_SYS_CNTVCT,
+		.handler = cntvct_read_handler,
+	},
+	{
+		/* Trap read access to CNTFRQ_EL0 */
+		.esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK,
+		.esr_val = ESR_ELx_SYS64_ISS_SYS_CNTFRQ,
+		.handler = cntfrq_read_handler,
+	},
 	{},
 };
 
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 6a58455..34d3ed6 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -228,6 +228,11 @@
 	swapper_pg_dir = .;
 	. += SWAPPER_DIR_SIZE;
 
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+	reserved_ttbr0 = .;
+	. += RESERVED_TTBR0_SIZE;
+#endif
+
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 	tramp_pg_dir = .;
 	. += PAGE_SIZE;
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index efbf610e..b581e16 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -17,10 +17,7 @@
  */
 #include <linux/linkage.h>
 
-#include <asm/alternative.h>
-#include <asm/assembler.h>
-#include <asm/cpufeature.h>
-#include <asm/sysreg.h>
+#include <asm/uaccess.h>
 
 	.text
 
@@ -33,8 +30,7 @@
  * Alignment fixed up by hardware.
  */
 ENTRY(__arch_clear_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
-	    CONFIG_ARM64_PAN)
+	uaccess_enable_not_uao x2, x3, x4
 	mov	x2, x1			// save the size for fixup return
 	subs	x1, x1, #8
 	b.mi	2f
@@ -54,8 +50,7 @@
 	b.mi	5f
 uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
 5:	mov	x0, #0
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
-	    CONFIG_ARM64_PAN)
+	uaccess_disable_not_uao x2, x3
 	ret
 ENDPROC(__arch_clear_user)
 
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 4fd67ea..c7a7d96 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -16,11 +16,8 @@
 
 #include <linux/linkage.h>
 
-#include <asm/alternative.h>
-#include <asm/assembler.h>
 #include <asm/cache.h>
-#include <asm/cpufeature.h>
-#include <asm/sysreg.h>
+#include <asm/uaccess.h>
 
 /*
  * Copy from user space to a kernel buffer (alignment handled by the hardware)
@@ -67,12 +64,10 @@
 
 end	.req	x5
 ENTRY(__arch_copy_from_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
-	    CONFIG_ARM64_PAN)
+	uaccess_enable_not_uao x3, x4, x5
 	add	end, x0, x2
 #include "copy_template.S"
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
-	    CONFIG_ARM64_PAN)
+	uaccess_disable_not_uao x3, x4
 	mov	x0, #0				// Nothing to copy
 	ret
 ENDPROC(__arch_copy_from_user)
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index 841bf8f..800779e 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -18,11 +18,8 @@
 
 #include <linux/linkage.h>
 
-#include <asm/alternative.h>
-#include <asm/assembler.h>
 #include <asm/cache.h>
-#include <asm/cpufeature.h>
-#include <asm/sysreg.h>
+#include <asm/uaccess.h>
 
 /*
  * Copy from user space to user space (alignment handled by the hardware)
@@ -68,12 +65,10 @@
 
 end	.req	x5
 ENTRY(__arch_copy_in_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
-	    CONFIG_ARM64_PAN)
+	uaccess_enable_not_uao x3, x4, x5
 	add	end, x0, x2
 #include "copy_template.S"
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
-	    CONFIG_ARM64_PAN)
+	uaccess_disable_not_uao x3, x4
 	mov	x0, #0
 	ret
 ENDPROC(__arch_copy_in_user)
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index 7a7efe2..f6cfcc0 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -16,11 +16,8 @@
 
 #include <linux/linkage.h>
 
-#include <asm/alternative.h>
-#include <asm/assembler.h>
 #include <asm/cache.h>
-#include <asm/cpufeature.h>
-#include <asm/sysreg.h>
+#include <asm/uaccess.h>
 
 /*
  * Copy to user space from a kernel buffer (alignment handled by the hardware)
@@ -66,12 +63,10 @@
 
 end	.req	x5
 ENTRY(__arch_copy_to_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
-	    CONFIG_ARM64_PAN)
+	uaccess_enable_not_uao x3, x4, x5
 	add	end, x0, x2
 #include "copy_template.S"
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
-	    CONFIG_ARM64_PAN)
+	uaccess_disable_not_uao x3, x4
 	mov	x0, #0
 	ret
 ENDPROC(__arch_copy_to_user)
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 58b5a90..82ecb6c 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -23,6 +23,7 @@
 #include <asm/assembler.h>
 #include <asm/cpufeature.h>
 #include <asm/alternative.h>
+#include <asm/uaccess.h>
 
 /*
  *	flush_icache_range(start,end)
@@ -48,6 +49,7 @@
  *	- end     - virtual end address of region
  */
 ENTRY(__flush_cache_user_range)
+	uaccess_ttbr0_enable x2, x3, x4
 	dcache_line_size x2, x3
 	sub	x3, x2, #1
 	bic	x4, x0, x3
@@ -69,10 +71,12 @@
 	dsb	ish
 	isb
 	mov	x0, #0
+1:
+	uaccess_ttbr0_disable x1, x2
 	ret
 9:
 	mov	x0, #-EFAULT
-	ret
+	b	1b
 ENDPROC(flush_icache_range)
 ENDPROC(__flush_cache_user_range)
 
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 62d976e..c841cce 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -233,7 +233,12 @@
 
 	arm64_apply_bp_hardening();
 
-	cpu_switch_mm(mm->pgd, mm);
+	/*
+	 * Defer TTBR0_EL1 setting for user threads to uaccess_enable() when
+	 * emulating PAN.
+	 */
+	if (!system_uses_ttbr0_pan())
+		cpu_switch_mm(mm->pgd, mm);
 }
 
 /* Errata workaround post TTBRx_EL1 update. */
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index cab3574..6dda5ff 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -174,7 +174,7 @@
 	/* create a coherent mapping */
 	page = virt_to_page(ptr);
 	coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP,
-						   prot, NULL);
+						   prot, __builtin_return_address(0));
 	if (!coherent_ptr)
 		goto no_map;
 
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 051b320..76eefbc 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -286,13 +286,19 @@
 	return fault;
 }
 
-static inline bool is_permission_fault(unsigned int esr)
+static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs)
 {
 	unsigned int ec       = ESR_ELx_EC(esr);
 	unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
 
-	return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM) ||
-	       (ec == ESR_ELx_EC_IABT_CUR && fsc_type == ESR_ELx_FSC_PERM);
+	if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR)
+		return false;
+
+	if (system_uses_ttbr0_pan())
+		return fsc_type == ESR_ELx_FSC_FAULT &&
+			(regs->pstate & PSR_PAN_BIT);
+	else
+		return fsc_type == ESR_ELx_FSC_PERM;
 }
 
 static bool is_el0_instruction_abort(unsigned int esr)
@@ -332,7 +338,7 @@
 		mm_flags |= FAULT_FLAG_WRITE;
 	}
 
-	if (is_permission_fault(esr) && (addr < TASK_SIZE)) {
+	if (addr < TASK_SIZE && is_permission_fault(esr, regs)) {
 		/* regs->orig_addr_limit may be 0 if we entered from EL0 */
 		if (regs->orig_addr_limit == KERNEL_DS)
 			die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 18d96d3..d8fd0d0 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -134,6 +134,9 @@
 ENTRY(cpu_do_switch_mm)
 	mrs	x2, ttbr1_el1
 	mmid	x1, x1				// get mm->context.id
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+	bfi	x0, x1, #48, #16		// set the ASID field in TTBR0
+#endif
 	bfi	x2, x1, #48, #16		// set the ASID
 	msr	ttbr1_el1, x2			// in TTBR1 (since TCR.A1 is set)
 	isb
diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S
index 329c802..69711f2 100644
--- a/arch/arm64/xen/hypercall.S
+++ b/arch/arm64/xen/hypercall.S
@@ -49,6 +49,7 @@
 
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/uaccess.h>
 #include <xen/interface/xen.h>
 
 
@@ -91,6 +92,20 @@
 	mov x2, x3
 	mov x3, x4
 	mov x4, x5
+	/*
+	 * Privcmd calls are issued by the userspace. The kernel needs to
+	 * enable access to TTBR0_EL1 as the hypervisor would issue stage 1
+	 * translations to user memory via AT instructions. Since AT
+	 * instructions are not affected by the PAN bit (ARMv8.1), we only
+	 * need the explicit uaccess_enable/disable if the TTBR0 PAN emulation
+	 * is enabled (it implies that hardware UAO and PAN disabled).
+	 */
+	uaccess_ttbr0_enable x6, x7, x8
 	hvc XEN_IMM
+
+	/*
+	 * Disable userspace access from kernel once the hyp call completed.
+	 */
+	uaccess_ttbr0_disable x6, x7
 	ret
 ENDPROC(privcmd_call);
diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h
index 1fd147f..5f10f9b 100644
--- a/arch/avr32/include/uapi/asm/socket.h
+++ b/arch/avr32/include/uapi/asm/socket.h
@@ -90,4 +90,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SO_COOKIE		57
+
 #endif /* _UAPI__ASM_AVR32_SOCKET_H */
diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h
index afbc98f0..ed960d3 100644
--- a/arch/frv/include/uapi/asm/socket.h
+++ b/arch/frv/include/uapi/asm/socket.h
@@ -90,5 +90,7 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SO_COOKIE		57
+
 #endif /* _ASM_SOCKET_H */
 
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index 0018fad..9790d13 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -99,4 +99,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SO_COOKIE		57
+
 #endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h
index 5fe42fc..ad25676 100644
--- a/arch/m32r/include/uapi/asm/socket.h
+++ b/arch/m32r/include/uapi/asm/socket.h
@@ -90,4 +90,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SO_COOKIE		57
+
 #endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 2027240a..2f106d0 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -108,4 +108,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SO_COOKIE		57
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h
index 5129f23..69f9618 100644
--- a/arch/mn10300/include/uapi/asm/socket.h
+++ b/arch/mn10300/include/uapi/asm/socket.h
@@ -90,4 +90,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SO_COOKIE		57
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 9c935d717d..b96a193 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -89,4 +89,6 @@
 
 #define SO_CNX_ADVICE		0x402E
 
+#define SO_COOKIE		0x4032
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index 1672e33..e78550f 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -97,4 +97,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SO_COOKIE		57
+
 #endif	/* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 41b51c2..04fe908 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -96,4 +96,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SO_COOKIE		57
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 31aede3..de15f0a 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -86,6 +86,8 @@
 
 #define SO_CNX_ADVICE		0x0037
 
+#define SO_COOKIE		0x003b
+
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		0x5001
 #define SO_SECURITY_ENCRYPTION_TRANSPORT	0x5002
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index b5226a0..93b170e 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -110,6 +110,8 @@
         KBUILD_CFLAGS += $(call cc-option,-mno-80387)
         KBUILD_CFLAGS += $(call cc-option,-mno-fp-ret-in-387)
 
+        KBUILD_CFLAGS += -fno-pic
+
         # By default gcc and clang use a stack alignment of 16 bytes for x86.
         # However the standard kernel entry on x86-64 leaves the stack on an
         # 8-byte boundary. If the compiler isn't informed about the actual
diff --git a/arch/x86/configs/i386_ranchu_defconfig b/arch/x86/configs/i386_ranchu_defconfig
new file mode 100644
index 0000000..a1c83c4
--- /dev/null
+++ b/arch/x86/configs/i386_ranchu_defconfig
@@ -0,0 +1,424 @@
+# CONFIG_64BIT is not set
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_POSIX_MQUEUE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_DEBUG=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_ARCH_MMAP_RND_BITS=16
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_OSF_PARTITION=y
+CONFIG_AMIGA_PARTITION=y
+CONFIG_MAC_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_SGI_PARTITION=y
+CONFIG_SUN_PARTITION=y
+CONFIG_KARMA_PARTITION=y
+CONFIG_SMP=y
+CONFIG_X86_BIGSMP=y
+CONFIG_MCORE2=y
+CONFIG_X86_GENERIC=y
+CONFIG_HPET_TIMER=y
+CONFIG_NR_CPUS=512
+CONFIG_PREEMPT=y
+# CONFIG_X86_MCE is not set
+CONFIG_X86_REBOOTFIXUPS=y
+CONFIG_X86_MSR=y
+CONFIG_X86_CPUID=y
+CONFIG_KSM=y
+CONFIG_CMA=y
+# CONFIG_MTRR_SANITIZER is not set
+CONFIG_EFI=y
+CONFIG_EFI_STUB=y
+CONFIG_HZ_100=y
+CONFIG_PHYSICAL_START=0x100000
+CONFIG_PM_AUTOSLEEP=y
+CONFIG_PM_WAKELOCKS=y
+CONFIG_PM_WAKELOCKS_LIMIT=0
+# CONFIG_PM_WAKELOCKS_GC is not set
+CONFIG_PM_DEBUG=y
+CONFIG_CPU_FREQ=y
+# CONFIG_CPU_FREQ_STAT is not set
+CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_PCIEPORTBUS=y
+# CONFIG_PCIEASPM is not set
+CONFIG_PCCARD=y
+CONFIG_YENTA=y
+CONFIG_HOTPLUG_PCI=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_BINFMT_MISC=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_ESP=y
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+CONFIG_INET_DIAG_DESTROY=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_OPTIMISTIC_DAD=y
+CONFIG_INET6_AH=y
+CONFIG_INET6_ESP=y
+CONFIG_INET6_IPCOMP=y
+CONFIG_IPV6_MIP6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NETLABEL=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CT_PROTO_DCCP=y
+CONFIG_NF_CT_PROTO_SCTP=y
+CONFIG_NF_CT_PROTO_UDPLITE=y
+CONFIG_NF_CONNTRACK_AMANDA=y
+CONFIG_NF_CONNTRACK_FTP=y
+CONFIG_NF_CONNTRACK_H323=y
+CONFIG_NF_CONNTRACK_IRC=y
+CONFIG_NF_CONNTRACK_NETBIOS_NS=y
+CONFIG_NF_CONNTRACK_PPTP=y
+CONFIG_NF_CONNTRACK_SANE=y
+CONFIG_NF_CONNTRACK_TFTP=y
+CONFIG_NF_CT_NETLINK=y
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=y
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y
+CONFIG_NETFILTER_XT_TARGET_MARK=y
+CONFIG_NETFILTER_XT_TARGET_NFLOG=y
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y
+CONFIG_NETFILTER_XT_TARGET_TPROXY=y
+CONFIG_NETFILTER_XT_TARGET_TRACE=y
+CONFIG_NETFILTER_XT_TARGET_SECMARK=y
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
+CONFIG_NETFILTER_XT_MATCH_COMMENT=y
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=y
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_HELPER=y
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=y
+CONFIG_NETFILTER_XT_MATCH_LENGTH=y
+CONFIG_NETFILTER_XT_MATCH_LIMIT=y
+CONFIG_NETFILTER_XT_MATCH_MAC=y
+CONFIG_NETFILTER_XT_MATCH_MARK=y
+CONFIG_NETFILTER_XT_MATCH_POLICY=y
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y
+CONFIG_NETFILTER_XT_MATCH_QTAGUID=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA2=y
+CONFIG_NETFILTER_XT_MATCH_SOCKET=y
+CONFIG_NETFILTER_XT_MATCH_STATE=y
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=y
+CONFIG_NETFILTER_XT_MATCH_STRING=y
+CONFIG_NETFILTER_XT_MATCH_TIME=y
+CONFIG_NETFILTER_XT_MATCH_U32=y
+CONFIG_NF_CONNTRACK_IPV4=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_MATCH_AH=y
+CONFIG_IP_NF_MATCH_ECN=y
+CONFIG_IP_NF_MATCH_TTL=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_REJECT=y
+CONFIG_IP_NF_MANGLE=y
+CONFIG_IP_NF_RAW=y
+CONFIG_IP_NF_SECURITY=y
+CONFIG_IP_NF_ARPTABLES=y
+CONFIG_IP_NF_ARPFILTER=y
+CONFIG_IP_NF_ARP_MANGLE=y
+CONFIG_NF_CONNTRACK_IPV6=y
+CONFIG_IP6_NF_IPTABLES=y
+CONFIG_IP6_NF_FILTER=y
+CONFIG_IP6_NF_TARGET_REJECT=y
+CONFIG_IP6_NF_MANGLE=y
+CONFIG_IP6_NF_RAW=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_HTB=y
+CONFIG_NET_CLS_U32=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_U32=y
+CONFIG_NET_CLS_ACT=y
+CONFIG_CFG80211=y
+CONFIG_MAC80211=y
+CONFIG_MAC80211_LEDS=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DMA_CMA=y
+CONFIG_CMA_SIZE_MBYTES=16
+CONFIG_CONNECTOR=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=8192
+CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_SPI_ATTRS=y
+CONFIG_SCSI_ISCSI_ATTRS=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
+CONFIG_ATA_PIIX=y
+CONFIG_PATA_AMD=y
+CONFIG_PATA_OLDPIIX=y
+CONFIG_PATA_SCH=y
+CONFIG_PATA_MPIIX=y
+CONFIG_ATA_GENERIC=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_DEBUG=y
+CONFIG_DM_CRYPT=y
+CONFIG_DM_MIRROR=y
+CONFIG_DM_ZERO=y
+CONFIG_DM_UEVENT=y
+CONFIG_DM_VERITY=y
+CONFIG_DM_VERITY_FEC=y
+CONFIG_NETDEVICES=y
+CONFIG_NETCONSOLE=y
+CONFIG_TUN=y
+CONFIG_VIRTIO_NET=y
+CONFIG_BNX2=y
+CONFIG_TIGON3=y
+CONFIG_NET_TULIP=y
+CONFIG_E100=y
+CONFIG_E1000=y
+CONFIG_E1000E=y
+CONFIG_SKY2=y
+CONFIG_NE2K_PCI=y
+CONFIG_FORCEDETH=y
+CONFIG_8139TOO=y
+# CONFIG_8139TOO_PIO is not set
+CONFIG_R8169=y
+CONFIG_FDDI=y
+CONFIG_PPP=y
+CONFIG_PPP_BSDCOMP=y
+CONFIG_PPP_DEFLATE=y
+CONFIG_PPP_MPPE=y
+CONFIG_PPPOLAC=y
+CONFIG_PPPOPNS=y
+CONFIG_USB_USBNET=y
+CONFIG_INPUT_POLLDEV=y
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_KEYRESET=y
+# CONFIG_KEYBOARD_ATKBD is not set
+CONFIG_KEYBOARD_GOLDFISH_EVENTS=y
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_INPUT_JOYSTICK=y
+CONFIG_JOYSTICK_XPAD=y
+CONFIG_JOYSTICK_XPAD_FF=y
+CONFIG_JOYSTICK_XPAD_LEDS=y
+CONFIG_INPUT_TABLET=y
+CONFIG_TABLET_USB_ACECAD=y
+CONFIG_TABLET_USB_AIPTEK=y
+CONFIG_TABLET_USB_GTCO=y
+CONFIG_TABLET_USB_HANWANG=y
+CONFIG_TABLET_USB_KBTAB=y
+CONFIG_INPUT_TOUCHSCREEN=y
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_KEYCHORD=y
+CONFIG_INPUT_UINPUT=y
+CONFIG_INPUT_GPIO=y
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_NONSTANDARD=y
+# CONFIG_DEVMEM is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_NVRAM=y
+CONFIG_I2C_I801=y
+CONFIG_BATTERY_GOLDFISH=y
+CONFIG_WATCHDOG=y
+CONFIG_MEDIA_SUPPORT=y
+CONFIG_AGP=y
+CONFIG_AGP_AMD64=y
+CONFIG_AGP_INTEL=y
+CONFIG_DRM=y
+CONFIG_FB_MODE_HELPERS=y
+CONFIG_FB_TILEBLITTING=y
+CONFIG_FB_EFI=y
+CONFIG_FB_GOLDFISH=y
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_HIDRAW=y
+CONFIG_UHID=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_ACRUX=y
+CONFIG_HID_ACRUX_FF=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_PRODIKEYS=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_DRAGONRISE=y
+CONFIG_DRAGONRISE_FF=y
+CONFIG_HID_EMS_FF=y
+CONFIG_HID_ELECOM=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_HOLTEK=y
+CONFIG_HID_KEYTOUCH=y
+CONFIG_HID_KYE=y
+CONFIG_HID_UCLOGIC=y
+CONFIG_HID_WALTOP=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_TWINHAN=y
+CONFIG_HID_KENSINGTON=y
+CONFIG_HID_LCPOWER=y
+CONFIG_HID_LOGITECH=y
+CONFIG_HID_LOGITECH_DJ=y
+CONFIG_LOGITECH_FF=y
+CONFIG_LOGIRUMBLEPAD2_FF=y
+CONFIG_LOGIG940_FF=y
+CONFIG_HID_MAGICMOUSE=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_MULTITOUCH=y
+CONFIG_HID_NTRIG=y
+CONFIG_HID_ORTEK=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_PANTHERLORD_FF=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_PICOLCD=y
+CONFIG_HID_PRIMAX=y
+CONFIG_HID_ROCCAT=y
+CONFIG_HID_SAITEK=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SONY=y
+CONFIG_HID_SPEEDLINK=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_GREENASIA=y
+CONFIG_GREENASIA_FF=y
+CONFIG_HID_SMARTJOYPLUS=y
+CONFIG_SMARTJOYPLUS_FF=y
+CONFIG_HID_TIVO=y
+CONFIG_HID_TOPSEED=y
+CONFIG_HID_THRUSTMASTER=y
+CONFIG_HID_WACOM=y
+CONFIG_HID_WIIMOTE=y
+CONFIG_HID_ZEROPLUS=y
+CONFIG_HID_ZYDACRON=y
+CONFIG_HID_PID=y
+CONFIG_USB_HIDDEV=y
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+CONFIG_USB_MON=y
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_UHCI_HCD=y
+CONFIG_USB_PRINTER=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_OTG_WAKELOCK=y
+CONFIG_EDAC=y
+CONFIG_RTC_CLASS=y
+# CONFIG_RTC_HCTOSYS is not set
+CONFIG_DMADEVICES=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_STAGING=y
+CONFIG_ASHMEM=y
+CONFIG_ANDROID_LOW_MEMORY_KILLER=y
+CONFIG_SYNC=y
+CONFIG_SW_SYNC=y
+CONFIG_SYNC_FILE=y
+CONFIG_ION=y
+CONFIG_GOLDFISH_AUDIO=y
+CONFIG_SND_HDA_INTEL=y
+CONFIG_GOLDFISH=y
+CONFIG_GOLDFISH_PIPE=y
+CONFIG_GOLDFISH_SYNC=y
+CONFIG_ANDROID=y
+CONFIG_ANDROID_BINDER_IPC=y
+CONFIG_ISCSI_IBFT_FIND=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_QUOTA=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+# CONFIG_PRINT_QUOTA_WARNING is not set
+CONFIG_FUSE_FS=y
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_PSTORE=y
+CONFIG_PSTORE_CONSOLE=y
+CONFIG_PSTORE_RAM=y
+# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=y
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=2048
+# CONFIG_UNUSED_SYMBOLS is not set
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_PANIC_TIMEOUT=5
+CONFIG_SCHEDSTATS=y
+CONFIG_TIMER_STATS=y
+CONFIG_SCHED_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
+CONFIG_KEYS=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_CRYPTO_AES_586=y
+CONFIG_CRYPTO_TWOFISH=y
+CONFIG_ASYMMETRIC_KEY_TYPE=y
+CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y
+CONFIG_X509_CERTIFICATE_PARSER=y
+CONFIG_PKCS7_MESSAGE_PARSER=y
+CONFIG_PKCS7_TEST_KEY=y
+# CONFIG_VIRTUALIZATION is not set
+CONFIG_CRC_T10DIF=y
diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig
new file mode 100644
index 0000000..5b06edd
--- /dev/null
+++ b/arch/x86/configs/x86_64_cuttlefish_defconfig
@@ -0,0 +1,454 @@
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_FHANDLE is not set
+# CONFIG_USELIB is not set
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_MEMCG=y
+CONFIG_MEMCG_SWAP=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_CGROUP_BPF=y
+CONFIG_NAMESPACES=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_RD_LZ4 is not set
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_PCSPKR_PLATFORM is not set
+CONFIG_BPF_SYSCALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_CC_STACKPROTECTOR_STRONG=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_SMP=y
+CONFIG_HYPERVISOR_GUEST=y
+CONFIG_PARAVIRT=y
+CONFIG_PARAVIRT_SPINLOCKS=y
+CONFIG_MCORE2=y
+CONFIG_PROCESSOR_SELECT=y
+# CONFIG_CPU_SUP_CENTAUR is not set
+CONFIG_NR_CPUS=8
+CONFIG_PREEMPT=y
+# CONFIG_MICROCODE is not set
+CONFIG_X86_MSR=y
+CONFIG_X86_CPUID=y
+CONFIG_KSM=y
+CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
+CONFIG_TRANSPARENT_HUGEPAGE=y
+# CONFIG_MTRR is not set
+CONFIG_HZ_100=y
+CONFIG_KEXEC=y
+CONFIG_CRASH_DUMP=y
+CONFIG_PHYSICAL_START=0x200000
+CONFIG_RANDOMIZE_BASE=y
+CONFIG_PHYSICAL_ALIGN=0x1000000
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyS0 reboot=p nopti"
+CONFIG_PM_WAKELOCKS=y
+CONFIG_PM_WAKELOCKS_LIMIT=0
+# CONFIG_PM_WAKELOCKS_GC is not set
+CONFIG_PM_DEBUG=y
+CONFIG_ACPI_PROCFS_POWER=y
+# CONFIG_ACPI_FAN is not set
+# CONFIG_ACPI_THERMAL is not set
+# CONFIG_X86_PM_TIMER is not set
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+CONFIG_X86_ACPI_CPUFREQ=y
+# CONFIG_X86_ACPI_CPUFREQ_CPB is not set
+CONFIG_PCI_MMCONFIG=y
+CONFIG_PCI_MSI=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_BINFMT_MISC=y
+CONFIG_IA32_EMULATION=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_ESP=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+CONFIG_INET_DIAG_DESTROY=y
+CONFIG_TCP_CONG_ADVANCED=y
+# CONFIG_TCP_CONG_BIC is not set
+# CONFIG_TCP_CONG_WESTWOOD is not set
+# CONFIG_TCP_CONG_HTCP is not set
+CONFIG_TCP_MD5SIG=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_OPTIMISTIC_DAD=y
+CONFIG_INET6_AH=y
+CONFIG_INET6_ESP=y
+CONFIG_INET6_IPCOMP=y
+CONFIG_IPV6_MIP6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NETLABEL=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CT_PROTO_DCCP=y
+CONFIG_NF_CT_PROTO_SCTP=y
+CONFIG_NF_CT_PROTO_UDPLITE=y
+CONFIG_NF_CONNTRACK_AMANDA=y
+CONFIG_NF_CONNTRACK_FTP=y
+CONFIG_NF_CONNTRACK_H323=y
+CONFIG_NF_CONNTRACK_IRC=y
+CONFIG_NF_CONNTRACK_NETBIOS_NS=y
+CONFIG_NF_CONNTRACK_PPTP=y
+CONFIG_NF_CONNTRACK_SANE=y
+CONFIG_NF_CONNTRACK_TFTP=y
+CONFIG_NF_CT_NETLINK=y
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=y
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y
+CONFIG_NETFILTER_XT_TARGET_MARK=y
+CONFIG_NETFILTER_XT_TARGET_NFLOG=y
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y
+CONFIG_NETFILTER_XT_TARGET_TPROXY=y
+CONFIG_NETFILTER_XT_TARGET_TRACE=y
+CONFIG_NETFILTER_XT_TARGET_SECMARK=y
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
+CONFIG_NETFILTER_XT_MATCH_COMMENT=y
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=y
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_HELPER=y
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=y
+CONFIG_NETFILTER_XT_MATCH_LENGTH=y
+CONFIG_NETFILTER_XT_MATCH_LIMIT=y
+CONFIG_NETFILTER_XT_MATCH_MAC=y
+CONFIG_NETFILTER_XT_MATCH_MARK=y
+CONFIG_NETFILTER_XT_MATCH_POLICY=y
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y
+CONFIG_NETFILTER_XT_MATCH_QTAGUID=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA2=y
+CONFIG_NETFILTER_XT_MATCH_SOCKET=y
+CONFIG_NETFILTER_XT_MATCH_STATE=y
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=y
+CONFIG_NETFILTER_XT_MATCH_STRING=y
+CONFIG_NETFILTER_XT_MATCH_TIME=y
+CONFIG_NETFILTER_XT_MATCH_U32=y
+CONFIG_NF_CONNTRACK_IPV4=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_MATCH_AH=y
+CONFIG_IP_NF_MATCH_ECN=y
+CONFIG_IP_NF_MATCH_TTL=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_REJECT=y
+CONFIG_IP_NF_NAT=y
+CONFIG_IP_NF_TARGET_MASQUERADE=y
+CONFIG_IP_NF_TARGET_NETMAP=y
+CONFIG_IP_NF_TARGET_REDIRECT=y
+CONFIG_IP_NF_MANGLE=y
+CONFIG_IP_NF_RAW=y
+CONFIG_IP_NF_SECURITY=y
+CONFIG_IP_NF_ARPTABLES=y
+CONFIG_IP_NF_ARPFILTER=y
+CONFIG_IP_NF_ARP_MANGLE=y
+CONFIG_NF_CONNTRACK_IPV6=y
+CONFIG_IP6_NF_IPTABLES=y
+CONFIG_IP6_NF_MATCH_IPV6HEADER=y
+CONFIG_IP6_NF_MATCH_RPFILTER=y
+CONFIG_IP6_NF_FILTER=y
+CONFIG_IP6_NF_TARGET_REJECT=y
+CONFIG_IP6_NF_MANGLE=y
+CONFIG_IP6_NF_RAW=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_HTB=y
+CONFIG_NET_CLS_U32=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_U32=y
+CONFIG_NET_CLS_ACT=y
+CONFIG_CFG80211=y
+CONFIG_MAC80211=y
+CONFIG_RFKILL=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+CONFIG_DEBUG_DEVRES=y
+CONFIG_OF=y
+CONFIG_OF_UNITTEST=y
+# CONFIG_PNP_DEBUG_MESSAGES is not set
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=8192
+CONFIG_VIRTIO_BLK=y
+CONFIG_UID_SYS_STATS=y
+CONFIG_MEMORY_STATE_TIME=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_SPI_ATTRS=y
+CONFIG_SCSI_VIRTIO=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=y
+CONFIG_DM_MIRROR=y
+CONFIG_DM_ZERO=y
+CONFIG_DM_UEVENT=y
+CONFIG_DM_VERITY=y
+CONFIG_DM_VERITY_FEC=y
+CONFIG_NETDEVICES=y
+CONFIG_NETCONSOLE=y
+CONFIG_NETCONSOLE_DYNAMIC=y
+CONFIG_TUN=y
+CONFIG_VIRTIO_NET=y
+# CONFIG_ETHERNET is not set
+CONFIG_PPP=y
+CONFIG_PPP_BSDCOMP=y
+CONFIG_PPP_DEFLATE=y
+CONFIG_PPP_MPPE=y
+CONFIG_PPPOLAC=y
+CONFIG_PPPOPNS=y
+CONFIG_USB_USBNET=y
+# CONFIG_USB_NET_AX8817X is not set
+# CONFIG_USB_NET_AX88179_178A is not set
+# CONFIG_USB_NET_CDCETHER is not set
+# CONFIG_USB_NET_CDC_NCM is not set
+# CONFIG_USB_NET_NET1080 is not set
+# CONFIG_USB_NET_CDC_SUBSET is not set
+# CONFIG_USB_NET_ZAURUS is not set
+# CONFIG_WLAN_VENDOR_ADMTEK is not set
+# CONFIG_WLAN_VENDOR_ATH is not set
+# CONFIG_WLAN_VENDOR_ATMEL is not set
+# CONFIG_WLAN_VENDOR_BROADCOM is not set
+# CONFIG_WLAN_VENDOR_CISCO is not set
+# CONFIG_WLAN_VENDOR_INTEL is not set
+# CONFIG_WLAN_VENDOR_INTERSIL is not set
+# CONFIG_WLAN_VENDOR_MARVELL is not set
+# CONFIG_WLAN_VENDOR_MEDIATEK is not set
+# CONFIG_WLAN_VENDOR_RALINK is not set
+# CONFIG_WLAN_VENDOR_REALTEK is not set
+# CONFIG_WLAN_VENDOR_RSI is not set
+# CONFIG_WLAN_VENDOR_ST is not set
+# CONFIG_WLAN_VENDOR_TI is not set
+# CONFIG_WLAN_VENDOR_ZYDAS is not set
+CONFIG_MAC80211_HWSIM=y
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_KEYRESET=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_INPUT_JOYSTICK=y
+CONFIG_JOYSTICK_XPAD=y
+CONFIG_JOYSTICK_XPAD_FF=y
+CONFIG_JOYSTICK_XPAD_LEDS=y
+CONFIG_INPUT_TABLET=y
+CONFIG_TABLET_USB_ACECAD=y
+CONFIG_TABLET_USB_AIPTEK=y
+CONFIG_TABLET_USB_GTCO=y
+CONFIG_TABLET_USB_HANWANG=y
+CONFIG_TABLET_USB_KBTAB=y
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_KEYCHORD=y
+CONFIG_INPUT_UINPUT=y
+CONFIG_INPUT_GPIO=y
+# CONFIG_SERIO_I8042 is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVMEM is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=48
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_HW_RANDOM=y
+# CONFIG_HW_RANDOM_INTEL is not set
+# CONFIG_HW_RANDOM_AMD is not set
+# CONFIG_HW_RANDOM_VIA is not set
+CONFIG_HW_RANDOM_VIRTIO=y
+CONFIG_HPET=y
+# CONFIG_HPET_MMAP_DEFAULT is not set
+# CONFIG_DEVPORT is not set
+# CONFIG_ACPI_I2C_OPREGION is not set
+# CONFIG_I2C_COMPAT is not set
+# CONFIG_I2C_HELPER_AUTO is not set
+CONFIG_PTP_1588_CLOCK=y
+# CONFIG_HWMON is not set
+# CONFIG_X86_PKG_TEMP_THERMAL is not set
+CONFIG_WATCHDOG=y
+CONFIG_SOFT_WATCHDOG=y
+CONFIG_MEDIA_SUPPORT=y
+# CONFIG_DVB_TUNER_DIB0070 is not set
+# CONFIG_DVB_TUNER_DIB0090 is not set
+# CONFIG_VGA_ARB is not set
+CONFIG_DRM=y
+# CONFIG_DRM_FBDEV_EMULATION is not set
+CONFIG_DRM_VIRTIO_GPU=y
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_HIDRAW=y
+CONFIG_UHID=y
+# CONFIG_HID_GENERIC is not set
+CONFIG_HID_A4TECH=y
+CONFIG_HID_ACRUX=y
+CONFIG_HID_ACRUX_FF=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_PRODIKEYS=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_DRAGONRISE=y
+CONFIG_DRAGONRISE_FF=y
+CONFIG_HID_EMS_FF=y
+CONFIG_HID_ELECOM=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_HOLTEK=y
+CONFIG_HID_KEYTOUCH=y
+CONFIG_HID_KYE=y
+CONFIG_HID_UCLOGIC=y
+CONFIG_HID_WALTOP=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_TWINHAN=y
+CONFIG_HID_KENSINGTON=y
+CONFIG_HID_LCPOWER=y
+CONFIG_HID_LOGITECH=y
+CONFIG_HID_LOGITECH_DJ=y
+CONFIG_LOGITECH_FF=y
+CONFIG_LOGIRUMBLEPAD2_FF=y
+CONFIG_LOGIG940_FF=y
+CONFIG_HID_MAGICMOUSE=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_MULTITOUCH=y
+CONFIG_HID_NTRIG=y
+CONFIG_HID_ORTEK=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_PANTHERLORD_FF=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_PICOLCD=y
+CONFIG_HID_PRIMAX=y
+CONFIG_HID_ROCCAT=y
+CONFIG_HID_SAITEK=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SONY=y
+CONFIG_HID_SPEEDLINK=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_GREENASIA=y
+CONFIG_GREENASIA_FF=y
+CONFIG_HID_SMARTJOYPLUS=y
+CONFIG_SMARTJOYPLUS_FF=y
+CONFIG_HID_TIVO=y
+CONFIG_HID_TOPSEED=y
+CONFIG_HID_THRUSTMASTER=y
+CONFIG_HID_WACOM=y
+CONFIG_HID_WIIMOTE=y
+CONFIG_HID_ZEROPLUS=y
+CONFIG_HID_ZYDACRON=y
+CONFIG_USB_HIDDEV=y
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_GADGET=y
+CONFIG_USB_DUMMY_HCD=y
+CONFIG_USB_CONFIGFS=y
+CONFIG_USB_CONFIGFS_F_FS=y
+CONFIG_USB_CONFIGFS_F_ACC=y
+CONFIG_USB_CONFIGFS_F_AUDIO_SRC=y
+CONFIG_USB_CONFIGFS_UEVENT=y
+CONFIG_USB_CONFIGFS_F_MIDI=y
+CONFIG_RTC_CLASS=y
+# CONFIG_RTC_HCTOSYS is not set
+CONFIG_SW_SYNC=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
+CONFIG_STAGING=y
+CONFIG_ASHMEM=y
+CONFIG_ANDROID_VSOC=y
+CONFIG_ION=y
+# CONFIG_X86_PLATFORM_DEVICES is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_ANDROID=y
+CONFIG_ANDROID_BINDER_IPC=y
+# CONFIG_FIRMWARE_MEMMAP is not set
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_EXT4_ENCRYPTION=y
+CONFIG_QUOTA=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+# CONFIG_PRINT_QUOTA_WARNING is not set
+CONFIG_QFMT_V2=y
+CONFIG_AUTOFS4_FS=y
+CONFIG_FUSE_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_SDCARD_FS=y
+CONFIG_PSTORE=y
+CONFIG_PSTORE_CONSOLE=y
+CONFIG_PSTORE_RAM=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=y
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+# CONFIG_UNUSED_SYMBOLS is not set
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_PANIC_TIMEOUT=5
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_SCHEDSTATS=y
+CONFIG_TIMER_STATS=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_ENABLE_DEFAULT_TRACERS=y
+CONFIG_IO_DELAY_NONE=y
+CONFIG_DEBUG_BOOT_PARAMS=y
+CONFIG_OPTIMIZE_INLINING=y
+CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_PATH=y
+CONFIG_HARDENED_USERCOPY=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
+# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
diff --git a/arch/x86/configs/x86_64_ranchu_defconfig b/arch/x86/configs/x86_64_ranchu_defconfig
new file mode 100644
index 0000000..d50434f
--- /dev/null
+++ b/arch/x86/configs/x86_64_ranchu_defconfig
@@ -0,0 +1,419 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_POSIX_MQUEUE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_DEBUG=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_ARCH_MMAP_RND_BITS=32
+CONFIG_ARCH_MMAP_RND_COMPAT_BITS=16
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_OSF_PARTITION=y
+CONFIG_AMIGA_PARTITION=y
+CONFIG_MAC_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_SGI_PARTITION=y
+CONFIG_SUN_PARTITION=y
+CONFIG_KARMA_PARTITION=y
+CONFIG_SMP=y
+CONFIG_MCORE2=y
+CONFIG_MAXSMP=y
+CONFIG_PREEMPT=y
+# CONFIG_X86_MCE is not set
+CONFIG_X86_MSR=y
+CONFIG_X86_CPUID=y
+CONFIG_KSM=y
+CONFIG_CMA=y
+# CONFIG_MTRR_SANITIZER is not set
+CONFIG_EFI=y
+CONFIG_EFI_STUB=y
+CONFIG_HZ_100=y
+CONFIG_PHYSICAL_START=0x100000
+CONFIG_PM_AUTOSLEEP=y
+CONFIG_PM_WAKELOCKS=y
+CONFIG_PM_WAKELOCKS_LIMIT=0
+# CONFIG_PM_WAKELOCKS_GC is not set
+CONFIG_PM_DEBUG=y
+CONFIG_CPU_FREQ=y
+# CONFIG_CPU_FREQ_STAT is not set
+CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_PCI_MMCONFIG=y
+CONFIG_PCIEPORTBUS=y
+# CONFIG_PCIEASPM is not set
+CONFIG_PCCARD=y
+CONFIG_YENTA=y
+CONFIG_HOTPLUG_PCI=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_BINFMT_MISC=y
+CONFIG_IA32_EMULATION=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_ESP=y
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+CONFIG_INET_DIAG_DESTROY=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_OPTIMISTIC_DAD=y
+CONFIG_INET6_AH=y
+CONFIG_INET6_ESP=y
+CONFIG_INET6_IPCOMP=y
+CONFIG_IPV6_MIP6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NETLABEL=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CT_PROTO_DCCP=y
+CONFIG_NF_CT_PROTO_SCTP=y
+CONFIG_NF_CT_PROTO_UDPLITE=y
+CONFIG_NF_CONNTRACK_AMANDA=y
+CONFIG_NF_CONNTRACK_FTP=y
+CONFIG_NF_CONNTRACK_H323=y
+CONFIG_NF_CONNTRACK_IRC=y
+CONFIG_NF_CONNTRACK_NETBIOS_NS=y
+CONFIG_NF_CONNTRACK_PPTP=y
+CONFIG_NF_CONNTRACK_SANE=y
+CONFIG_NF_CONNTRACK_TFTP=y
+CONFIG_NF_CT_NETLINK=y
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=y
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y
+CONFIG_NETFILTER_XT_TARGET_MARK=y
+CONFIG_NETFILTER_XT_TARGET_NFLOG=y
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y
+CONFIG_NETFILTER_XT_TARGET_TPROXY=y
+CONFIG_NETFILTER_XT_TARGET_TRACE=y
+CONFIG_NETFILTER_XT_TARGET_SECMARK=y
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
+CONFIG_NETFILTER_XT_MATCH_COMMENT=y
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=y
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_HELPER=y
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=y
+CONFIG_NETFILTER_XT_MATCH_LENGTH=y
+CONFIG_NETFILTER_XT_MATCH_LIMIT=y
+CONFIG_NETFILTER_XT_MATCH_MAC=y
+CONFIG_NETFILTER_XT_MATCH_MARK=y
+CONFIG_NETFILTER_XT_MATCH_POLICY=y
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y
+CONFIG_NETFILTER_XT_MATCH_QTAGUID=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA2=y
+CONFIG_NETFILTER_XT_MATCH_SOCKET=y
+CONFIG_NETFILTER_XT_MATCH_STATE=y
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=y
+CONFIG_NETFILTER_XT_MATCH_STRING=y
+CONFIG_NETFILTER_XT_MATCH_TIME=y
+CONFIG_NETFILTER_XT_MATCH_U32=y
+CONFIG_NF_CONNTRACK_IPV4=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_MATCH_AH=y
+CONFIG_IP_NF_MATCH_ECN=y
+CONFIG_IP_NF_MATCH_TTL=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_REJECT=y
+CONFIG_IP_NF_MANGLE=y
+CONFIG_IP_NF_RAW=y
+CONFIG_IP_NF_SECURITY=y
+CONFIG_IP_NF_ARPTABLES=y
+CONFIG_IP_NF_ARPFILTER=y
+CONFIG_IP_NF_ARP_MANGLE=y
+CONFIG_NF_CONNTRACK_IPV6=y
+CONFIG_IP6_NF_IPTABLES=y
+CONFIG_IP6_NF_FILTER=y
+CONFIG_IP6_NF_TARGET_REJECT=y
+CONFIG_IP6_NF_MANGLE=y
+CONFIG_IP6_NF_RAW=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_HTB=y
+CONFIG_NET_CLS_U32=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_U32=y
+CONFIG_NET_CLS_ACT=y
+CONFIG_CFG80211=y
+CONFIG_MAC80211=y
+CONFIG_MAC80211_LEDS=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DMA_CMA=y
+CONFIG_CONNECTOR=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=8192
+CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_SPI_ATTRS=y
+CONFIG_SCSI_ISCSI_ATTRS=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
+CONFIG_ATA_PIIX=y
+CONFIG_PATA_AMD=y
+CONFIG_PATA_OLDPIIX=y
+CONFIG_PATA_SCH=y
+CONFIG_PATA_MPIIX=y
+CONFIG_ATA_GENERIC=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_DEBUG=y
+CONFIG_DM_CRYPT=y
+CONFIG_DM_MIRROR=y
+CONFIG_DM_ZERO=y
+CONFIG_DM_UEVENT=y
+CONFIG_DM_VERITY=y
+CONFIG_DM_VERITY_FEC=y
+CONFIG_NETDEVICES=y
+CONFIG_NETCONSOLE=y
+CONFIG_TUN=y
+CONFIG_VIRTIO_NET=y
+CONFIG_BNX2=y
+CONFIG_TIGON3=y
+CONFIG_NET_TULIP=y
+CONFIG_E100=y
+CONFIG_E1000=y
+CONFIG_E1000E=y
+CONFIG_SKY2=y
+CONFIG_NE2K_PCI=y
+CONFIG_FORCEDETH=y
+CONFIG_8139TOO=y
+# CONFIG_8139TOO_PIO is not set
+CONFIG_R8169=y
+CONFIG_FDDI=y
+CONFIG_PPP=y
+CONFIG_PPP_BSDCOMP=y
+CONFIG_PPP_DEFLATE=y
+CONFIG_PPP_MPPE=y
+CONFIG_PPPOLAC=y
+CONFIG_PPPOPNS=y
+CONFIG_USB_USBNET=y
+CONFIG_INPUT_POLLDEV=y
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_KEYRESET=y
+# CONFIG_KEYBOARD_ATKBD is not set
+CONFIG_KEYBOARD_GOLDFISH_EVENTS=y
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_INPUT_JOYSTICK=y
+CONFIG_JOYSTICK_XPAD=y
+CONFIG_JOYSTICK_XPAD_FF=y
+CONFIG_JOYSTICK_XPAD_LEDS=y
+CONFIG_INPUT_TABLET=y
+CONFIG_TABLET_USB_ACECAD=y
+CONFIG_TABLET_USB_AIPTEK=y
+CONFIG_TABLET_USB_GTCO=y
+CONFIG_TABLET_USB_HANWANG=y
+CONFIG_TABLET_USB_KBTAB=y
+CONFIG_INPUT_TOUCHSCREEN=y
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_KEYCHORD=y
+CONFIG_INPUT_UINPUT=y
+CONFIG_INPUT_GPIO=y
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_NONSTANDARD=y
+# CONFIG_DEVMEM is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_NVRAM=y
+CONFIG_I2C_I801=y
+CONFIG_BATTERY_GOLDFISH=y
+CONFIG_WATCHDOG=y
+CONFIG_MEDIA_SUPPORT=y
+CONFIG_AGP=y
+CONFIG_AGP_AMD64=y
+CONFIG_AGP_INTEL=y
+CONFIG_DRM=y
+CONFIG_FB_MODE_HELPERS=y
+CONFIG_FB_TILEBLITTING=y
+CONFIG_FB_EFI=y
+CONFIG_FB_GOLDFISH=y
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_HIDRAW=y
+CONFIG_UHID=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_ACRUX=y
+CONFIG_HID_ACRUX_FF=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_PRODIKEYS=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_DRAGONRISE=y
+CONFIG_DRAGONRISE_FF=y
+CONFIG_HID_EMS_FF=y
+CONFIG_HID_ELECOM=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_HOLTEK=y
+CONFIG_HID_KEYTOUCH=y
+CONFIG_HID_KYE=y
+CONFIG_HID_UCLOGIC=y
+CONFIG_HID_WALTOP=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_TWINHAN=y
+CONFIG_HID_KENSINGTON=y
+CONFIG_HID_LCPOWER=y
+CONFIG_HID_LOGITECH=y
+CONFIG_HID_LOGITECH_DJ=y
+CONFIG_LOGITECH_FF=y
+CONFIG_LOGIRUMBLEPAD2_FF=y
+CONFIG_LOGIG940_FF=y
+CONFIG_HID_MAGICMOUSE=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_MULTITOUCH=y
+CONFIG_HID_NTRIG=y
+CONFIG_HID_ORTEK=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_PANTHERLORD_FF=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_PICOLCD=y
+CONFIG_HID_PRIMAX=y
+CONFIG_HID_ROCCAT=y
+CONFIG_HID_SAITEK=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SONY=y
+CONFIG_HID_SPEEDLINK=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_GREENASIA=y
+CONFIG_GREENASIA_FF=y
+CONFIG_HID_SMARTJOYPLUS=y
+CONFIG_SMARTJOYPLUS_FF=y
+CONFIG_HID_TIVO=y
+CONFIG_HID_TOPSEED=y
+CONFIG_HID_THRUSTMASTER=y
+CONFIG_HID_WACOM=y
+CONFIG_HID_WIIMOTE=y
+CONFIG_HID_ZEROPLUS=y
+CONFIG_HID_ZYDACRON=y
+CONFIG_HID_PID=y
+CONFIG_USB_HIDDEV=y
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+CONFIG_USB_MON=y
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_UHCI_HCD=y
+CONFIG_USB_PRINTER=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_OTG_WAKELOCK=y
+CONFIG_EDAC=y
+CONFIG_RTC_CLASS=y
+# CONFIG_RTC_HCTOSYS is not set
+CONFIG_DMADEVICES=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_STAGING=y
+CONFIG_ASHMEM=y
+CONFIG_ANDROID_LOW_MEMORY_KILLER=y
+CONFIG_SYNC=y
+CONFIG_SW_SYNC=y
+CONFIG_SYNC_FILE=y
+CONFIG_ION=y
+CONFIG_GOLDFISH_AUDIO=y
+CONFIG_SND_HDA_INTEL=y
+CONFIG_GOLDFISH=y
+CONFIG_GOLDFISH_PIPE=y
+CONFIG_GOLDFISH_SYNC=y
+CONFIG_ANDROID=y
+CONFIG_ANDROID_BINDER_IPC=y
+CONFIG_ISCSI_IBFT_FIND=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_QUOTA=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+# CONFIG_PRINT_QUOTA_WARNING is not set
+CONFIG_FUSE_FS=y
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_PSTORE=y
+CONFIG_PSTORE_CONSOLE=y
+CONFIG_PSTORE_RAM=y
+# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=y
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+# CONFIG_UNUSED_SYMBOLS is not set
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_PANIC_TIMEOUT=5
+CONFIG_SCHEDSTATS=y
+CONFIG_TIMER_STATS=y
+CONFIG_SCHED_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
+CONFIG_KEYS=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_CRYPTO_TWOFISH=y
+CONFIG_ASYMMETRIC_KEY_TYPE=y
+CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y
+CONFIG_X509_CERTIFICATE_PARSER=y
+CONFIG_PKCS7_MESSAGE_PARSER=y
+CONFIG_PKCS7_TEST_KEY=y
+# CONFIG_VIRTUALIZATION is not set
+CONFIG_CRC_T10DIF=y
diff --git a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h
index c5d1785..02bab09 100644
--- a/arch/x86/include/asm/idle.h
+++ b/arch/x86/include/asm/idle.h
@@ -1,13 +1,6 @@
 #ifndef _ASM_X86_IDLE_H
 #define _ASM_X86_IDLE_H
 
-#define IDLE_START 1
-#define IDLE_END 2
-
-struct notifier_block;
-void idle_notifier_register(struct notifier_block *n);
-void idle_notifier_unregister(struct notifier_block *n);
-
 #ifdef CONFIG_X86_64
 void enter_idle(void);
 void exit_idle(void);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 00a9047..e9195a1 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -68,19 +68,6 @@
 
 #ifdef CONFIG_X86_64
 static DEFINE_PER_CPU(unsigned char, is_idle);
-static ATOMIC_NOTIFIER_HEAD(idle_notifier);
-
-void idle_notifier_register(struct notifier_block *n)
-{
-	atomic_notifier_chain_register(&idle_notifier, n);
-}
-EXPORT_SYMBOL_GPL(idle_notifier_register);
-
-void idle_notifier_unregister(struct notifier_block *n)
-{
-	atomic_notifier_chain_unregister(&idle_notifier, n);
-}
-EXPORT_SYMBOL_GPL(idle_notifier_unregister);
 #endif
 
 /*
@@ -397,14 +384,14 @@
 void enter_idle(void)
 {
 	this_cpu_write(is_idle, 1);
-	atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
+	idle_notifier_call_chain(IDLE_START);
 }
 
 static void __exit_idle(void)
 {
 	if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
 		return;
-	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
+	idle_notifier_call_chain(IDLE_END);
 }
 
 /* Called from interrupts to signify idle end */
diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
index 81435d9..fc7ca28 100644
--- a/arch/xtensa/include/uapi/asm/socket.h
+++ b/arch/xtensa/include/uapi/asm/socket.h
@@ -101,4 +101,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SO_COOKIE		57
+
 #endif	/* _XTENSA_SOCKET_H */
diff --git a/block/blk-core.c b/block/blk-core.c
index 77b99bf..4c50e57 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -40,6 +40,8 @@
 #include "blk.h"
 #include "blk-mq.h"
 
+#include <linux/math64.h>
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
@@ -3569,3 +3571,85 @@
 
 	return 0;
 }
+
+/*
+ * Blk IO latency support. We want this to be as cheap as possible, so doing
+ * this lockless (and avoiding atomics), a few off by a few errors in this
+ * code is not harmful, and we don't want to do anything that is
+ * perf-impactful.
+ * TODO : If necessary, we can make the histograms per-cpu and aggregate
+ * them when printing them out.
+ */
+void
+blk_zero_latency_hist(struct io_latency_state *s)
+{
+	memset(s->latency_y_axis_read, 0,
+	       sizeof(s->latency_y_axis_read));
+	memset(s->latency_y_axis_write, 0,
+	       sizeof(s->latency_y_axis_write));
+	s->latency_reads_elems = 0;
+	s->latency_writes_elems = 0;
+}
+EXPORT_SYMBOL(blk_zero_latency_hist);
+
+ssize_t
+blk_latency_hist_show(struct io_latency_state *s, char *buf)
+{
+	int i;
+	int bytes_written = 0;
+	u_int64_t num_elem, elem;
+	int pct;
+
+	num_elem = s->latency_reads_elems;
+	if (num_elem > 0) {
+		bytes_written += scnprintf(buf + bytes_written,
+			   PAGE_SIZE - bytes_written,
+			   "IO svc_time Read Latency Histogram (n = %llu):\n",
+			   num_elem);
+		for (i = 0;
+		     i < ARRAY_SIZE(latency_x_axis_us);
+		     i++) {
+			elem = s->latency_y_axis_read[i];
+			pct = div64_u64(elem * 100, num_elem);
+			bytes_written += scnprintf(buf + bytes_written,
+						   PAGE_SIZE - bytes_written,
+						   "\t< %5lluus%15llu%15d%%\n",
+						   latency_x_axis_us[i],
+						   elem, pct);
+		}
+		/* Last element in y-axis table is overflow */
+		elem = s->latency_y_axis_read[i];
+		pct = div64_u64(elem * 100, num_elem);
+		bytes_written += scnprintf(buf + bytes_written,
+					   PAGE_SIZE - bytes_written,
+					   "\t> %5dms%15llu%15d%%\n", 10,
+					   elem, pct);
+	}
+	num_elem = s->latency_writes_elems;
+	if (num_elem > 0) {
+		bytes_written += scnprintf(buf + bytes_written,
+			   PAGE_SIZE - bytes_written,
+			   "IO svc_time Write Latency Histogram (n = %llu):\n",
+			   num_elem);
+		for (i = 0;
+		     i < ARRAY_SIZE(latency_x_axis_us);
+		     i++) {
+			elem = s->latency_y_axis_write[i];
+			pct = div64_u64(elem * 100, num_elem);
+			bytes_written += scnprintf(buf + bytes_written,
+						   PAGE_SIZE - bytes_written,
+						   "\t< %5lluus%15llu%15d%%\n",
+						   latency_x_axis_us[i],
+						   elem, pct);
+		}
+		/* Last element in y-axis table is overflow */
+		elem = s->latency_y_axis_write[i];
+		pct = div64_u64(elem * 100, num_elem);
+		bytes_written += scnprintf(buf + bytes_written,
+					   PAGE_SIZE - bytes_written,
+					   "\t> %5dms%15llu%15d%%\n", 10,
+					   elem, pct);
+	}
+	return bytes_written;
+}
+EXPORT_SYMBOL(blk_latency_hist_show);
diff --git a/build.config.cuttlefish.x86_64 b/build.config.cuttlefish.x86_64
new file mode 100644
index 0000000..5a96563
--- /dev/null
+++ b/build.config.cuttlefish.x86_64
@@ -0,0 +1,15 @@
+ARCH=x86_64
+BRANCH=android-4.9
+CLANG_TRIPLE=x86_64-linux-gnu-
+CROSS_COMPILE=x86_64-linux-androidkernel-
+DEFCONFIG=x86_64_cuttlefish_defconfig
+EXTRA_CMDS=''
+KERNEL_DIR=common
+POST_DEFCONFIG_CMDS="check_defconfig"
+CLANG_PREBUILT_BIN=prebuilts/clang/host/linux-x86/clang-4630689/bin
+LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin
+FILES="
+arch/x86/boot/bzImage
+vmlinux
+System.map
+"
diff --git a/build.config.goldfish.arm b/build.config.goldfish.arm
new file mode 100644
index 0000000..866da93
--- /dev/null
+++ b/build.config.goldfish.arm
@@ -0,0 +1,12 @@
+ARCH=arm
+BRANCH=android-4.4
+CROSS_COMPILE=arm-linux-androidkernel-
+DEFCONFIG=ranchu_defconfig
+EXTRA_CMDS=''
+KERNEL_DIR=common
+LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/arm/arm-linux-androideabi-4.9/bin
+FILES="
+arch/arm/boot/zImage
+vmlinux
+System.map
+"
diff --git a/build.config.goldfish.arm64 b/build.config.goldfish.arm64
new file mode 100644
index 0000000..9c963cf
--- /dev/null
+++ b/build.config.goldfish.arm64
@@ -0,0 +1,12 @@
+ARCH=arm64
+BRANCH=android-4.4
+CROSS_COMPILE=aarch64-linux-android-
+DEFCONFIG=ranchu64_defconfig
+EXTRA_CMDS=''
+KERNEL_DIR=common
+LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/aarch64/aarch64-linux-android-4.9/bin
+FILES="
+arch/arm64/boot/Image
+vmlinux
+System.map
+"
diff --git a/build.config.goldfish.mips b/build.config.goldfish.mips
new file mode 100644
index 0000000..8af53d2
--- /dev/null
+++ b/build.config.goldfish.mips
@@ -0,0 +1,11 @@
+ARCH=mips
+BRANCH=android-4.4
+CROSS_COMPILE=mips64el-linux-android-
+DEFCONFIG=ranchu_defconfig
+EXTRA_CMDS=''
+KERNEL_DIR=common
+LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/mips/mips64el-linux-android-4.9/bin
+FILES="
+vmlinux
+System.map
+"
diff --git a/build.config.goldfish.mips64 b/build.config.goldfish.mips64
new file mode 100644
index 0000000..2a33d36
--- /dev/null
+++ b/build.config.goldfish.mips64
@@ -0,0 +1,11 @@
+ARCH=mips
+BRANCH=android-4.4
+CROSS_COMPILE=mips64el-linux-android-
+DEFCONFIG=ranchu64_defconfig
+EXTRA_CMDS=''
+KERNEL_DIR=common
+LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/mips/mips64el-linux-android-4.9/bin
+FILES="
+vmlinux
+System.map
+"
diff --git a/build.config.goldfish.x86 b/build.config.goldfish.x86
new file mode 100644
index 0000000..f86253f
--- /dev/null
+++ b/build.config.goldfish.x86
@@ -0,0 +1,12 @@
+ARCH=x86
+BRANCH=android-4.4
+CROSS_COMPILE=x86_64-linux-android-
+DEFCONFIG=i386_ranchu_defconfig
+EXTRA_CMDS=''
+KERNEL_DIR=common
+LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin
+FILES="
+arch/x86/boot/bzImage
+vmlinux
+System.map
+"
diff --git a/build.config.goldfish.x86_64 b/build.config.goldfish.x86_64
new file mode 100644
index 0000000..e173886
--- /dev/null
+++ b/build.config.goldfish.x86_64
@@ -0,0 +1,12 @@
+ARCH=x86_64
+BRANCH=android-4.4
+CROSS_COMPILE=x86_64-linux-android-
+DEFCONFIG=x86_64_ranchu_defconfig
+EXTRA_CMDS=''
+KERNEL_DIR=common
+LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin
+FILES="
+arch/x86/boot/bzImage
+vmlinux
+System.map
+"
diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig
index bdfc6c6..01de42c 100644
--- a/drivers/android/Kconfig
+++ b/drivers/android/Kconfig
@@ -19,6 +19,18 @@
 	  Android process, using Binder to identify, invoke and pass arguments
 	  between said processes.
 
+config ANDROID_BINDER_DEVICES
+	string "Android Binder devices"
+	depends on ANDROID_BINDER_IPC
+	default "binder,hwbinder,vndbinder"
+	---help---
+	  Default value for the binder.devices parameter.
+
+	  The binder.devices parameter is a comma-separated list of strings
+	  that specifies the names of the binder device nodes that will be
+	  created. Each binder device has its own context manager, and is
+	  therefore logically separated from the other devices.
+
 config ANDROID_BINDER_IPC_32BIT
 	bool
 	depends on !64BIT && ANDROID_BINDER_IPC
@@ -32,6 +44,16 @@
 
 	  Note that enabling this will break newer Android user-space.
 
+config ANDROID_BINDER_IPC_SELFTEST
+	bool "Android Binder IPC Driver Selftest"
+	depends on ANDROID_BINDER_IPC
+	---help---
+	  This feature allows binder selftest to run.
+
+	  Binder selftest checks the allocation and free of binder buffers
+	  exhaustively with combinations of various buffer sizes and
+	  alignments.
+
 endif # if ANDROID
 
 endmenu
diff --git a/drivers/android/Makefile b/drivers/android/Makefile
index 3b7e4b0..a01254c 100644
--- a/drivers/android/Makefile
+++ b/drivers/android/Makefile
@@ -1,3 +1,4 @@
 ccflags-y += -I$(src)			# needed for trace events
 
-obj-$(CONFIG_ANDROID_BINDER_IPC)	+= binder.o
+obj-$(CONFIG_ANDROID_BINDER_IPC)	+= binder.o binder_alloc.o
+obj-$(CONFIG_ANDROID_BINDER_IPC_SELFTEST) += binder_alloc_selftest.o
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 80499f42..e7f4cd1 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -15,6 +15,40 @@
  *
  */
 
+/*
+ * Locking overview
+ *
+ * There are 3 main spinlocks which must be acquired in the
+ * order shown:
+ *
+ * 1) proc->outer_lock : protects binder_ref
+ *    binder_proc_lock() and binder_proc_unlock() are
+ *    used to acq/rel.
+ * 2) node->lock : protects most fields of binder_node.
+ *    binder_node_lock() and binder_node_unlock() are
+ *    used to acq/rel
+ * 3) proc->inner_lock : protects the thread and node lists
+ *    (proc->threads, proc->waiting_threads, proc->nodes)
+ *    and all todo lists associated with the binder_proc
+ *    (proc->todo, thread->todo, proc->delivered_death and
+ *    node->async_todo), as well as thread->transaction_stack
+ *    binder_inner_proc_lock() and binder_inner_proc_unlock()
+ *    are used to acq/rel
+ *
+ * Any lock under procA must never be nested under any lock at the same
+ * level or below on procB.
+ *
+ * Functions that require a lock held on entry indicate which lock
+ * in the suffix of the function name:
+ *
+ * foo_olocked() : requires node->outer_lock
+ * foo_nlocked() : requires node->lock
+ * foo_ilocked() : requires proc->inner_lock
+ * foo_oilocked(): requires proc->outer_lock and proc->inner_lock
+ * foo_nilocked(): requires node->lock and proc->inner_lock
+ * ...
+ */
+
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <asm/cacheflush.h>
@@ -24,7 +58,6 @@
 #include <linux/fs.h>
 #include <linux/list.h>
 #include <linux/miscdevice.h>
-#include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/nsproxy.h>
@@ -34,31 +67,31 @@
 #include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <linux/uaccess.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
 #include <linux/pid_namespace.h>
 #include <linux/security.h>
+#include <linux/spinlock.h>
 
 #ifdef CONFIG_ANDROID_BINDER_IPC_32BIT
 #define BINDER_IPC_32BIT 1
 #endif
 
 #include <uapi/linux/android/binder.h>
+#include "binder_alloc.h"
 #include "binder_trace.h"
 
-static DEFINE_MUTEX(binder_main_lock);
-static DEFINE_MUTEX(binder_deferred_lock);
-static DEFINE_MUTEX(binder_mmap_lock);
-
-static HLIST_HEAD(binder_procs);
 static HLIST_HEAD(binder_deferred_list);
+static DEFINE_MUTEX(binder_deferred_lock);
+
+static HLIST_HEAD(binder_devices);
+static HLIST_HEAD(binder_procs);
+static DEFINE_MUTEX(binder_procs_lock);
+
 static HLIST_HEAD(binder_dead_nodes);
+static DEFINE_SPINLOCK(binder_dead_nodes_lock);
 
 static struct dentry *binder_debugfs_dir_entry_root;
 static struct dentry *binder_debugfs_dir_entry_proc;
-static struct binder_node *binder_context_mgr_node;
-static kuid_t binder_context_mgr_uid = INVALID_UID;
-static int binder_last_id;
+static atomic_t binder_last_id;
 
 #define BINDER_DEBUG_ENTRY(name) \
 static int binder_##name##_open(struct inode *inode, struct file *file) \
@@ -104,16 +137,15 @@
 	BINDER_DEBUG_TRANSACTION_COMPLETE   = 1U << 10,
 	BINDER_DEBUG_FREE_BUFFER            = 1U << 11,
 	BINDER_DEBUG_INTERNAL_REFS          = 1U << 12,
-	BINDER_DEBUG_BUFFER_ALLOC           = 1U << 13,
-	BINDER_DEBUG_PRIORITY_CAP           = 1U << 14,
-	BINDER_DEBUG_BUFFER_ALLOC_ASYNC     = 1U << 15,
+	BINDER_DEBUG_PRIORITY_CAP           = 1U << 13,
+	BINDER_DEBUG_SPINLOCKS              = 1U << 14,
 };
 static uint32_t binder_debug_mask = BINDER_DEBUG_USER_ERROR |
 	BINDER_DEBUG_FAILED_TRANSACTION | BINDER_DEBUG_DEAD_TRANSACTION;
 module_param_named(debug_mask, binder_debug_mask, uint, S_IWUSR | S_IRUGO);
 
-static bool binder_debug_no_lock;
-module_param_named(proc_no_lock, binder_debug_no_lock, bool, S_IWUSR | S_IRUGO);
+static char *binder_devices_param = CONFIG_ANDROID_BINDER_DEVICES;
+module_param_named(devices, binder_devices_param, charp, S_IRUGO);
 
 static DECLARE_WAIT_QUEUE_HEAD(binder_user_error_wait);
 static int binder_stop_on_user_error;
@@ -145,6 +177,17 @@
 			binder_stop_on_user_error = 2; \
 	} while (0)
 
+#define to_flat_binder_object(hdr) \
+	container_of(hdr, struct flat_binder_object, hdr)
+
+#define to_binder_fd_object(hdr) container_of(hdr, struct binder_fd_object, hdr)
+
+#define to_binder_buffer_object(hdr) \
+	container_of(hdr, struct binder_buffer_object, hdr)
+
+#define to_binder_fd_array_object(hdr) \
+	container_of(hdr, struct binder_fd_array_object, hdr)
+
 enum binder_stat_types {
 	BINDER_STAT_PROC,
 	BINDER_STAT_THREAD,
@@ -157,26 +200,27 @@
 };
 
 struct binder_stats {
-	int br[_IOC_NR(BR_FAILED_REPLY) + 1];
-	int bc[_IOC_NR(BC_DEAD_BINDER_DONE) + 1];
-	int obj_created[BINDER_STAT_COUNT];
-	int obj_deleted[BINDER_STAT_COUNT];
+	atomic_t br[_IOC_NR(BR_FAILED_REPLY) + 1];
+	atomic_t bc[_IOC_NR(BC_REPLY_SG) + 1];
+	atomic_t obj_created[BINDER_STAT_COUNT];
+	atomic_t obj_deleted[BINDER_STAT_COUNT];
 };
 
 static struct binder_stats binder_stats;
 
 static inline void binder_stats_deleted(enum binder_stat_types type)
 {
-	binder_stats.obj_deleted[type]++;
+	atomic_inc(&binder_stats.obj_deleted[type]);
 }
 
 static inline void binder_stats_created(enum binder_stat_types type)
 {
-	binder_stats.obj_created[type]++;
+	atomic_inc(&binder_stats.obj_created[type]);
 }
 
 struct binder_transaction_log_entry {
 	int debug_id;
+	int debug_id_done;
 	int call_type;
 	int from_proc;
 	int from_thread;
@@ -186,10 +230,14 @@
 	int to_node;
 	int data_size;
 	int offsets_size;
+	int return_error_line;
+	uint32_t return_error;
+	uint32_t return_error_param;
+	const char *context_name;
 };
 struct binder_transaction_log {
-	int next;
-	int full;
+	atomic_t cur;
+	bool full;
 	struct binder_transaction_log_entry entry[32];
 };
 static struct binder_transaction_log binder_transaction_log;
@@ -199,22 +247,50 @@
 	struct binder_transaction_log *log)
 {
 	struct binder_transaction_log_entry *e;
+	unsigned int cur = atomic_inc_return(&log->cur);
 
-	e = &log->entry[log->next];
-	memset(e, 0, sizeof(*e));
-	log->next++;
-	if (log->next == ARRAY_SIZE(log->entry)) {
-		log->next = 0;
+	if (cur >= ARRAY_SIZE(log->entry))
 		log->full = 1;
-	}
+	e = &log->entry[cur % ARRAY_SIZE(log->entry)];
+	WRITE_ONCE(e->debug_id_done, 0);
+	/*
+	 * write-barrier to synchronize access to e->debug_id_done.
+	 * We make sure the initialized 0 value is seen before
+	 * memset() other fields are zeroed by memset.
+	 */
+	smp_wmb();
+	memset(e, 0, sizeof(*e));
 	return e;
 }
 
+struct binder_context {
+	struct binder_node *binder_context_mgr_node;
+	struct mutex context_mgr_node_lock;
+
+	kuid_t binder_context_mgr_uid;
+	const char *name;
+};
+
+struct binder_device {
+	struct hlist_node hlist;
+	struct miscdevice miscdev;
+	struct binder_context context;
+};
+
+/**
+ * struct binder_work - work enqueued on a worklist
+ * @entry:             node enqueued on list
+ * @type:              type of work to be performed
+ *
+ * There are separate work lists for proc, thread, and node (async).
+ */
 struct binder_work {
 	struct list_head entry;
+
 	enum {
 		BINDER_WORK_TRANSACTION = 1,
 		BINDER_WORK_TRANSACTION_COMPLETE,
+		BINDER_WORK_RETURN_ERROR,
 		BINDER_WORK_NODE,
 		BINDER_WORK_DEAD_BINDER,
 		BINDER_WORK_DEAD_BINDER_AND_CLEAR,
@@ -222,8 +298,77 @@
 	} type;
 };
 
+struct binder_error {
+	struct binder_work work;
+	uint32_t cmd;
+};
+
+/**
+ * struct binder_node - binder node bookkeeping
+ * @debug_id:             unique ID for debugging
+ *                        (invariant after initialized)
+ * @lock:                 lock for node fields
+ * @work:                 worklist element for node work
+ *                        (protected by @proc->inner_lock)
+ * @rb_node:              element for proc->nodes tree
+ *                        (protected by @proc->inner_lock)
+ * @dead_node:            element for binder_dead_nodes list
+ *                        (protected by binder_dead_nodes_lock)
+ * @proc:                 binder_proc that owns this node
+ *                        (invariant after initialized)
+ * @refs:                 list of references on this node
+ *                        (protected by @lock)
+ * @internal_strong_refs: used to take strong references when
+ *                        initiating a transaction
+ *                        (protected by @proc->inner_lock if @proc
+ *                        and by @lock)
+ * @local_weak_refs:      weak user refs from local process
+ *                        (protected by @proc->inner_lock if @proc
+ *                        and by @lock)
+ * @local_strong_refs:    strong user refs from local process
+ *                        (protected by @proc->inner_lock if @proc
+ *                        and by @lock)
+ * @tmp_refs:             temporary kernel refs
+ *                        (protected by @proc->inner_lock while @proc
+ *                        is valid, and by binder_dead_nodes_lock
+ *                        if @proc is NULL. During inc/dec and node release
+ *                        it is also protected by @lock to provide safety
+ *                        as the node dies and @proc becomes NULL)
+ * @ptr:                  userspace pointer for node
+ *                        (invariant, no lock needed)
+ * @cookie:               userspace cookie for node
+ *                        (invariant, no lock needed)
+ * @has_strong_ref:       userspace notified of strong ref
+ *                        (protected by @proc->inner_lock if @proc
+ *                        and by @lock)
+ * @pending_strong_ref:   userspace has acked notification of strong ref
+ *                        (protected by @proc->inner_lock if @proc
+ *                        and by @lock)
+ * @has_weak_ref:         userspace notified of weak ref
+ *                        (protected by @proc->inner_lock if @proc
+ *                        and by @lock)
+ * @pending_weak_ref:     userspace has acked notification of weak ref
+ *                        (protected by @proc->inner_lock if @proc
+ *                        and by @lock)
+ * @has_async_transaction: async transaction to node in progress
+ *                        (protected by @lock)
+ * @sched_policy:         minimum scheduling policy for node
+ *                        (invariant after initialized)
+ * @accept_fds:           file descriptor operations supported for node
+ *                        (invariant after initialized)
+ * @min_priority:         minimum scheduling priority
+ *                        (invariant after initialized)
+ * @inherit_rt:           inherit RT scheduling policy from caller
+ * @txn_security_ctx:     require sender's security context
+ *                        (invariant after initialized)
+ * @async_todo:           list of async work items
+ *                        (protected by @proc->inner_lock)
+ *
+ * Bookkeeping structure for binder nodes.
+ */
 struct binder_node {
 	int debug_id;
+	spinlock_t lock;
 	struct binder_work work;
 	union {
 		struct rb_node rb_node;
@@ -234,88 +379,187 @@
 	int internal_strong_refs;
 	int local_weak_refs;
 	int local_strong_refs;
+	int tmp_refs;
 	binder_uintptr_t ptr;
 	binder_uintptr_t cookie;
-	unsigned has_strong_ref:1;
-	unsigned pending_strong_ref:1;
-	unsigned has_weak_ref:1;
-	unsigned pending_weak_ref:1;
-	unsigned has_async_transaction:1;
-	unsigned accept_fds:1;
-	unsigned min_priority:8;
+	struct {
+		/*
+		 * bitfield elements protected by
+		 * proc inner_lock
+		 */
+		u8 has_strong_ref:1;
+		u8 pending_strong_ref:1;
+		u8 has_weak_ref:1;
+		u8 pending_weak_ref:1;
+	};
+	struct {
+		/*
+		 * invariant after initialization
+		 */
+		u8 sched_policy:2;
+		u8 inherit_rt:1;
+		u8 accept_fds:1;
+		u8 txn_security_ctx:1;
+		u8 min_priority;
+	};
+	bool has_async_transaction;
 	struct list_head async_todo;
 };
 
 struct binder_ref_death {
+	/**
+	 * @work: worklist element for death notifications
+	 *        (protected by inner_lock of the proc that
+	 *        this ref belongs to)
+	 */
 	struct binder_work work;
 	binder_uintptr_t cookie;
 };
 
+/**
+ * struct binder_ref_data - binder_ref counts and id
+ * @debug_id:        unique ID for the ref
+ * @desc:            unique userspace handle for ref
+ * @strong:          strong ref count (debugging only if not locked)
+ * @weak:            weak ref count (debugging only if not locked)
+ *
+ * Structure to hold ref count and ref id information. Since
+ * the actual ref can only be accessed with a lock, this structure
+ * is used to return information about the ref to callers of
+ * ref inc/dec functions.
+ */
+struct binder_ref_data {
+	int debug_id;
+	uint32_t desc;
+	int strong;
+	int weak;
+};
+
+/**
+ * struct binder_ref - struct to track references on nodes
+ * @data:        binder_ref_data containing id, handle, and current refcounts
+ * @rb_node_desc: node for lookup by @data.desc in proc's rb_tree
+ * @rb_node_node: node for lookup by @node in proc's rb_tree
+ * @node_entry:  list entry for node->refs list in target node
+ *               (protected by @node->lock)
+ * @proc:        binder_proc containing ref
+ * @node:        binder_node of target node. When cleaning up a
+ *               ref for deletion in binder_cleanup_ref, a non-NULL
+ *               @node indicates the node must be freed
+ * @death:       pointer to death notification (ref_death) if requested
+ *               (protected by @node->lock)
+ *
+ * Structure to track references from procA to target node (on procB). This
+ * structure is unsafe to access without holding @proc->outer_lock.
+ */
 struct binder_ref {
 	/* Lookups needed: */
 	/*   node + proc => ref (transaction) */
 	/*   desc + proc => ref (transaction, inc/dec ref) */
 	/*   node => refs + procs (proc exit) */
-	int debug_id;
+	struct binder_ref_data data;
 	struct rb_node rb_node_desc;
 	struct rb_node rb_node_node;
 	struct hlist_node node_entry;
 	struct binder_proc *proc;
 	struct binder_node *node;
-	uint32_t desc;
-	int strong;
-	int weak;
 	struct binder_ref_death *death;
 };
 
-struct binder_buffer {
-	struct list_head entry; /* free and allocated entries by address */
-	struct rb_node rb_node; /* free entry by size or allocated entry */
-				/* by address */
-	unsigned free:1;
-	unsigned allow_user_free:1;
-	unsigned async_transaction:1;
-	unsigned debug_id:29;
-
-	struct binder_transaction *transaction;
-
-	struct binder_node *target_node;
-	size_t data_size;
-	size_t offsets_size;
-	uint8_t data[0];
-};
-
 enum binder_deferred_state {
 	BINDER_DEFERRED_PUT_FILES    = 0x01,
 	BINDER_DEFERRED_FLUSH        = 0x02,
 	BINDER_DEFERRED_RELEASE      = 0x04,
 };
 
+/**
+ * struct binder_priority - scheduler policy and priority
+ * @sched_policy            scheduler policy
+ * @prio                    [100..139] for SCHED_NORMAL, [0..99] for FIFO/RT
+ *
+ * The binder driver supports inheriting the following scheduler policies:
+ * SCHED_NORMAL
+ * SCHED_BATCH
+ * SCHED_FIFO
+ * SCHED_RR
+ */
+struct binder_priority {
+	unsigned int sched_policy;
+	int prio;
+};
+
+/**
+ * struct binder_proc - binder process bookkeeping
+ * @proc_node:            element for binder_procs list
+ * @threads:              rbtree of binder_threads in this proc
+ *                        (protected by @inner_lock)
+ * @nodes:                rbtree of binder nodes associated with
+ *                        this proc ordered by node->ptr
+ *                        (protected by @inner_lock)
+ * @refs_by_desc:         rbtree of refs ordered by ref->desc
+ *                        (protected by @outer_lock)
+ * @refs_by_node:         rbtree of refs ordered by ref->node
+ *                        (protected by @outer_lock)
+ * @waiting_threads:      threads currently waiting for proc work
+ *                        (protected by @inner_lock)
+ * @pid                   PID of group_leader of process
+ *                        (invariant after initialized)
+ * @tsk                   task_struct for group_leader of process
+ *                        (invariant after initialized)
+ * @files                 files_struct for process
+ *                        (invariant after initialized)
+ * @deferred_work_node:   element for binder_deferred_list
+ *                        (protected by binder_deferred_lock)
+ * @deferred_work:        bitmap of deferred work to perform
+ *                        (protected by binder_deferred_lock)
+ * @is_dead:              process is dead and awaiting free
+ *                        when outstanding transactions are cleaned up
+ *                        (protected by @inner_lock)
+ * @todo:                 list of work for this process
+ *                        (protected by @inner_lock)
+ * @wait:                 wait queue head to wait for proc work
+ *                        (invariant after initialized)
+ * @stats:                per-process binder statistics
+ *                        (atomics, no lock needed)
+ * @delivered_death:      list of delivered death notification
+ *                        (protected by @inner_lock)
+ * @max_threads:          cap on number of binder threads
+ *                        (protected by @inner_lock)
+ * @requested_threads:    number of binder threads requested but not
+ *                        yet started. In current implementation, can
+ *                        only be 0 or 1.
+ *                        (protected by @inner_lock)
+ * @requested_threads_started: number binder threads started
+ *                        (protected by @inner_lock)
+ * @tmp_ref:              temporary reference to indicate proc is in use
+ *                        (protected by @inner_lock)
+ * @default_priority:     default scheduler priority
+ *                        (invariant after initialized)
+ * @debugfs_entry:        debugfs node
+ * @alloc:                binder allocator bookkeeping
+ * @context:              binder_context for this proc
+ *                        (invariant after initialized)
+ * @inner_lock:           can nest under outer_lock and/or node lock
+ * @outer_lock:           no nesting under innor or node lock
+ *                        Lock order: 1) outer, 2) node, 3) inner
+ *
+ * Bookkeeping structure for binder processes
+ */
 struct binder_proc {
 	struct hlist_node proc_node;
 	struct rb_root threads;
 	struct rb_root nodes;
 	struct rb_root refs_by_desc;
 	struct rb_root refs_by_node;
+	struct list_head waiting_threads;
 	int pid;
-	struct vm_area_struct *vma;
-	struct mm_struct *vma_vm_mm;
 	struct task_struct *tsk;
 	struct files_struct *files;
 	struct mutex files_lock;
 	struct hlist_node deferred_work_node;
 	int deferred_work;
-	void *buffer;
-	ptrdiff_t user_buffer_offset;
+	bool is_dead;
 
-	struct list_head buffers;
-	struct rb_root free_buffers;
-	struct rb_root allocated_buffers;
-	size_t free_async_space;
-
-	struct page **pages;
-	size_t buffer_size;
-	uint32_t buffer_free;
 	struct list_head todo;
 	wait_queue_head_t wait;
 	struct binder_stats stats;
@@ -323,9 +567,13 @@
 	int max_threads;
 	int requested_threads;
 	int requested_threads_started;
-	int ready_threads;
-	long default_priority;
+	int tmp_ref;
+	struct binder_priority default_priority;
 	struct dentry *debugfs_entry;
+	struct binder_alloc alloc;
+	struct binder_context *context;
+	spinlock_t inner_lock;
+	spinlock_t outer_lock;
 };
 
 enum {
@@ -334,22 +582,60 @@
 	BINDER_LOOPER_STATE_EXITED      = 0x04,
 	BINDER_LOOPER_STATE_INVALID     = 0x08,
 	BINDER_LOOPER_STATE_WAITING     = 0x10,
-	BINDER_LOOPER_STATE_NEED_RETURN = 0x20
+	BINDER_LOOPER_STATE_POLL        = 0x20,
 };
 
+/**
+ * struct binder_thread - binder thread bookkeeping
+ * @proc:                 binder process for this thread
+ *                        (invariant after initialization)
+ * @rb_node:              element for proc->threads rbtree
+ *                        (protected by @proc->inner_lock)
+ * @waiting_thread_node:  element for @proc->waiting_threads list
+ *                        (protected by @proc->inner_lock)
+ * @pid:                  PID for this thread
+ *                        (invariant after initialization)
+ * @looper:               bitmap of looping state
+ *                        (only accessed by this thread)
+ * @looper_needs_return:  looping thread needs to exit driver
+ *                        (no lock needed)
+ * @transaction_stack:    stack of in-progress transactions for this thread
+ *                        (protected by @proc->inner_lock)
+ * @todo:                 list of work to do for this thread
+ *                        (protected by @proc->inner_lock)
+ * @return_error:         transaction errors reported by this thread
+ *                        (only accessed by this thread)
+ * @reply_error:          transaction errors reported by target thread
+ *                        (protected by @proc->inner_lock)
+ * @wait:                 wait queue for thread work
+ * @stats:                per-thread statistics
+ *                        (atomics, no lock needed)
+ * @tmp_ref:              temporary reference to indicate thread is in use
+ *                        (atomic since @proc->inner_lock cannot
+ *                        always be acquired)
+ * @is_dead:              thread is dead and awaiting free
+ *                        when outstanding transactions are cleaned up
+ *                        (protected by @proc->inner_lock)
+ * @task:                 struct task_struct for this thread
+ *
+ * Bookkeeping structure for binder threads.
+ */
 struct binder_thread {
 	struct binder_proc *proc;
 	struct rb_node rb_node;
+	struct list_head waiting_thread_node;
 	int pid;
-	int looper;
+	int looper;              /* only modified by this thread */
+	bool looper_need_return; /* can be written by other thread */
 	struct binder_transaction *transaction_stack;
 	struct list_head todo;
-	uint32_t return_error; /* Write failed, return error code in read buf */
-	uint32_t return_error2; /* Write failed, return error code in read */
-		/* buffer. Used when sending a reply to a dead process that */
-		/* we are also waiting on */
+	struct binder_error return_error;
+	struct binder_error reply_error;
 	wait_queue_head_t wait;
 	struct binder_stats stats;
+	atomic_t tmp_ref;
+	bool is_dead;
+	struct task_struct *task;
 };
 
 struct binder_transaction {
@@ -366,13 +652,258 @@
 	struct binder_buffer *buffer;
 	unsigned int	code;
 	unsigned int	flags;
-	long	priority;
-	long	saved_priority;
+	struct binder_priority	priority;
+	struct binder_priority	saved_priority;
+	bool    set_priority_called;
 	kuid_t	sender_euid;
+	binder_uintptr_t security_ctx;
+	/**
+	 * @lock:  protects @from, @to_proc, and @to_thread
+	 *
+	 * @from, @to_proc, and @to_thread can be set to NULL
+	 * during thread teardown
+	 */
+	spinlock_t lock;
 };
 
+/**
+ * binder_proc_lock() - Acquire outer lock for given binder_proc
+ * @proc:         struct binder_proc to acquire
+ *
+ * Acquires proc->outer_lock. Used to protect binder_ref
+ * structures associated with the given proc.
+ */
+#define binder_proc_lock(proc) _binder_proc_lock(proc, __LINE__)
+static void
+_binder_proc_lock(struct binder_proc *proc, int line)
+{
+	binder_debug(BINDER_DEBUG_SPINLOCKS,
+		     "%s: line=%d\n", __func__, line);
+	spin_lock(&proc->outer_lock);
+}
+
+/**
+ * binder_proc_unlock() - Release spinlock for given binder_proc
+ * @proc:         struct binder_proc to acquire
+ *
+ * Release lock acquired via binder_proc_lock()
+ */
+#define binder_proc_unlock(_proc) _binder_proc_unlock(_proc, __LINE__)
+static void
+_binder_proc_unlock(struct binder_proc *proc, int line)
+{
+	binder_debug(BINDER_DEBUG_SPINLOCKS,
+		     "%s: line=%d\n", __func__, line);
+	spin_unlock(&proc->outer_lock);
+}
+
+/**
+ * binder_inner_proc_lock() - Acquire inner lock for given binder_proc
+ * @proc:         struct binder_proc to acquire
+ *
+ * Acquires proc->inner_lock. Used to protect todo lists
+ */
+#define binder_inner_proc_lock(proc) _binder_inner_proc_lock(proc, __LINE__)
+static void
+_binder_inner_proc_lock(struct binder_proc *proc, int line)
+{
+	binder_debug(BINDER_DEBUG_SPINLOCKS,
+		     "%s: line=%d\n", __func__, line);
+	spin_lock(&proc->inner_lock);
+}
+
+/**
+ * binder_inner_proc_unlock() - Release inner lock for given binder_proc
+ * @proc:         struct binder_proc to acquire
+ *
+ * Release lock acquired via binder_inner_proc_lock()
+ */
+#define binder_inner_proc_unlock(proc) _binder_inner_proc_unlock(proc, __LINE__)
+static void
+_binder_inner_proc_unlock(struct binder_proc *proc, int line)
+{
+	binder_debug(BINDER_DEBUG_SPINLOCKS,
+		     "%s: line=%d\n", __func__, line);
+	spin_unlock(&proc->inner_lock);
+}
+
+/**
+ * binder_node_lock() - Acquire spinlock for given binder_node
+ * @node:         struct binder_node to acquire
+ *
+ * Acquires node->lock. Used to protect binder_node fields
+ */
+#define binder_node_lock(node) _binder_node_lock(node, __LINE__)
+static void
+_binder_node_lock(struct binder_node *node, int line)
+{
+	binder_debug(BINDER_DEBUG_SPINLOCKS,
+		     "%s: line=%d\n", __func__, line);
+	spin_lock(&node->lock);
+}
+
+/**
+ * binder_node_unlock() - Release spinlock for given binder_proc
+ * @node:         struct binder_node to acquire
+ *
+ * Release lock acquired via binder_node_lock()
+ */
+#define binder_node_unlock(node) _binder_node_unlock(node, __LINE__)
+static void
+_binder_node_unlock(struct binder_node *node, int line)
+{
+	binder_debug(BINDER_DEBUG_SPINLOCKS,
+		     "%s: line=%d\n", __func__, line);
+	spin_unlock(&node->lock);
+}
+
+/**
+ * binder_node_inner_lock() - Acquire node and inner locks
+ * @node:         struct binder_node to acquire
+ *
+ * Acquires node->lock. If node->proc also acquires
+ * proc->inner_lock. Used to protect binder_node fields
+ */
+#define binder_node_inner_lock(node) _binder_node_inner_lock(node, __LINE__)
+static void
+_binder_node_inner_lock(struct binder_node *node, int line)
+{
+	binder_debug(BINDER_DEBUG_SPINLOCKS,
+		     "%s: line=%d\n", __func__, line);
+	spin_lock(&node->lock);
+	if (node->proc)
+		binder_inner_proc_lock(node->proc);
+}
+
+/**
+ * binder_node_unlock() - Release node and inner locks
+ * @node:         struct binder_node to acquire
+ *
+ * Release lock acquired via binder_node_lock()
+ */
+#define binder_node_inner_unlock(node) _binder_node_inner_unlock(node, __LINE__)
+static void
+_binder_node_inner_unlock(struct binder_node *node, int line)
+{
+	struct binder_proc *proc = node->proc;
+
+	binder_debug(BINDER_DEBUG_SPINLOCKS,
+		     "%s: line=%d\n", __func__, line);
+	if (proc)
+		binder_inner_proc_unlock(proc);
+	spin_unlock(&node->lock);
+}
+
+static bool binder_worklist_empty_ilocked(struct list_head *list)
+{
+	return list_empty(list);
+}
+
+/**
+ * binder_worklist_empty() - Check if no items on the work list
+ * @proc:       binder_proc associated with list
+ * @list:	list to check
+ *
+ * Return: true if there are no items on list, else false
+ */
+static bool binder_worklist_empty(struct binder_proc *proc,
+				  struct list_head *list)
+{
+	bool ret;
+
+	binder_inner_proc_lock(proc);
+	ret = binder_worklist_empty_ilocked(list);
+	binder_inner_proc_unlock(proc);
+	return ret;
+}
+
+static void
+binder_enqueue_work_ilocked(struct binder_work *work,
+			   struct list_head *target_list)
+{
+	BUG_ON(target_list == NULL);
+	BUG_ON(work->entry.next && !list_empty(&work->entry));
+	list_add_tail(&work->entry, target_list);
+}
+
+/**
+ * binder_enqueue_work() - Add an item to the work list
+ * @proc:         binder_proc associated with list
+ * @work:         struct binder_work to add to list
+ * @target_list:  list to add work to
+ *
+ * Adds the work to the specified list. Asserts that work
+ * is not already on a list.
+ */
+static void
+binder_enqueue_work(struct binder_proc *proc,
+		    struct binder_work *work,
+		    struct list_head *target_list)
+{
+	binder_inner_proc_lock(proc);
+	binder_enqueue_work_ilocked(work, target_list);
+	binder_inner_proc_unlock(proc);
+}
+
+static void
+binder_dequeue_work_ilocked(struct binder_work *work)
+{
+	list_del_init(&work->entry);
+}
+
+/**
+ * binder_dequeue_work() - Removes an item from the work list
+ * @proc:         binder_proc associated with list
+ * @work:         struct binder_work to remove from list
+ *
+ * Removes the specified work item from whatever list it is on.
+ * Can safely be called if work is not on any list.
+ */
+static void
+binder_dequeue_work(struct binder_proc *proc, struct binder_work *work)
+{
+	binder_inner_proc_lock(proc);
+	binder_dequeue_work_ilocked(work);
+	binder_inner_proc_unlock(proc);
+}
+
+static struct binder_work *binder_dequeue_work_head_ilocked(
+					struct list_head *list)
+{
+	struct binder_work *w;
+
+	w = list_first_entry_or_null(list, struct binder_work, entry);
+	if (w)
+		list_del_init(&w->entry);
+	return w;
+}
+
+/**
+ * binder_dequeue_work_head() - Dequeues the item at head of list
+ * @proc:         binder_proc associated with list
+ * @list:         list to dequeue head
+ *
+ * Removes the head of the list if there are items on the list
+ *
+ * Return: pointer dequeued binder_work, NULL if list was empty
+ */
+static struct binder_work *binder_dequeue_work_head(
+					struct binder_proc *proc,
+					struct list_head *list)
+{
+	struct binder_work *w;
+
+	binder_inner_proc_lock(proc);
+	w = binder_dequeue_work_head_ilocked(list);
+	binder_inner_proc_unlock(proc);
+	return w;
+}
+
 static void
 binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer);
+static void binder_free_thread(struct binder_thread *thread);
+static void binder_free_proc(struct binder_proc *proc);
+static void binder_inc_node_tmpref_ilocked(struct binder_node *node);
 
 static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
 {
@@ -434,453 +965,281 @@
 	return retval;
 }
 
-static inline void binder_lock(const char *tag)
+static bool binder_has_work_ilocked(struct binder_thread *thread,
+				    bool do_proc_work)
 {
-	trace_binder_lock(tag);
-	mutex_lock(&binder_main_lock);
-	trace_binder_locked(tag);
+	return !binder_worklist_empty_ilocked(&thread->todo) ||
+		thread->looper_need_return ||
+		(do_proc_work &&
+		 !binder_worklist_empty_ilocked(&thread->proc->todo));
 }
 
-static inline void binder_unlock(const char *tag)
+static bool binder_has_work(struct binder_thread *thread, bool do_proc_work)
 {
-	trace_binder_unlock(tag);
-	mutex_unlock(&binder_main_lock);
+	bool has_work;
+
+	binder_inner_proc_lock(thread->proc);
+	has_work = binder_has_work_ilocked(thread, do_proc_work);
+	binder_inner_proc_unlock(thread->proc);
+
+	return has_work;
 }
 
-static void binder_set_nice(long nice)
+static bool binder_available_for_proc_work_ilocked(struct binder_thread *thread)
 {
-	long min_nice;
+	return !thread->transaction_stack &&
+		binder_worklist_empty_ilocked(&thread->todo) &&
+		(thread->looper & (BINDER_LOOPER_STATE_ENTERED |
+				   BINDER_LOOPER_STATE_REGISTERED));
+}
 
-	if (can_nice(current, nice)) {
-		set_user_nice(current, nice);
+static void binder_wakeup_poll_threads_ilocked(struct binder_proc *proc,
+					       bool sync)
+{
+	struct rb_node *n;
+	struct binder_thread *thread;
+
+	for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) {
+		thread = rb_entry(n, struct binder_thread, rb_node);
+		if (thread->looper & BINDER_LOOPER_STATE_POLL &&
+		    binder_available_for_proc_work_ilocked(thread)) {
+			if (sync)
+				wake_up_interruptible_sync(&thread->wait);
+			else
+				wake_up_interruptible(&thread->wait);
+		}
+	}
+}
+
+/**
+ * binder_select_thread_ilocked() - selects a thread for doing proc work.
+ * @proc:	process to select a thread from
+ *
+ * Note that calling this function moves the thread off the waiting_threads
+ * list, so it can only be woken up by the caller of this function, or a
+ * signal. Therefore, callers *should* always wake up the thread this function
+ * returns.
+ *
+ * Return:	If there's a thread currently waiting for process work,
+ *		returns that thread. Otherwise returns NULL.
+ */
+static struct binder_thread *
+binder_select_thread_ilocked(struct binder_proc *proc)
+{
+	struct binder_thread *thread;
+
+	assert_spin_locked(&proc->inner_lock);
+	thread = list_first_entry_or_null(&proc->waiting_threads,
+					  struct binder_thread,
+					  waiting_thread_node);
+
+	if (thread)
+		list_del_init(&thread->waiting_thread_node);
+
+	return thread;
+}
+
+/**
+ * binder_wakeup_thread_ilocked() - wakes up a thread for doing proc work.
+ * @proc:	process to wake up a thread in
+ * @thread:	specific thread to wake-up (may be NULL)
+ * @sync:	whether to do a synchronous wake-up
+ *
+ * This function wakes up a thread in the @proc process.
+ * The caller may provide a specific thread to wake-up in
+ * the @thread parameter. If @thread is NULL, this function
+ * will wake up threads that have called poll().
+ *
+ * Note that for this function to work as expected, callers
+ * should first call binder_select_thread() to find a thread
+ * to handle the work (if they don't have a thread already),
+ * and pass the result into the @thread parameter.
+ */
+static void binder_wakeup_thread_ilocked(struct binder_proc *proc,
+					 struct binder_thread *thread,
+					 bool sync)
+{
+	assert_spin_locked(&proc->inner_lock);
+
+	if (thread) {
+		if (sync)
+			wake_up_interruptible_sync(&thread->wait);
+		else
+			wake_up_interruptible(&thread->wait);
 		return;
 	}
-	min_nice = rlimit_to_nice(current->signal->rlim[RLIMIT_NICE].rlim_cur);
-	binder_debug(BINDER_DEBUG_PRIORITY_CAP,
-		     "%d: nice value %ld not allowed use %ld instead\n",
-		      current->pid, nice, min_nice);
-	set_user_nice(current, min_nice);
-	if (min_nice <= MAX_NICE)
-		return;
-	binder_user_error("%d RLIMIT_NICE not set\n", current->pid);
+
+	/* Didn't find a thread waiting for proc work; this can happen
+	 * in two scenarios:
+	 * 1. All threads are busy handling transactions
+	 *    In that case, one of those threads should call back into
+	 *    the kernel driver soon and pick up this work.
+	 * 2. Threads are using the (e)poll interface, in which case
+	 *    they may be blocked on the waitqueue without having been
+	 *    added to waiting_threads. For this case, we just iterate
+	 *    over all threads not handling transaction work, and
+	 *    wake them all up. We wake all because we don't know whether
+	 *    a thread that called into (e)poll is handling non-binder
+	 *    work currently.
+	 */
+	binder_wakeup_poll_threads_ilocked(proc, sync);
 }
 
-static size_t binder_buffer_size(struct binder_proc *proc,
-				 struct binder_buffer *buffer)
+static void binder_wakeup_proc_ilocked(struct binder_proc *proc)
 {
-	if (list_is_last(&buffer->entry, &proc->buffers))
-		return proc->buffer + proc->buffer_size - (void *)buffer->data;
-	return (size_t)list_entry(buffer->entry.next,
-			  struct binder_buffer, entry) - (size_t)buffer->data;
+	struct binder_thread *thread = binder_select_thread_ilocked(proc);
+
+	binder_wakeup_thread_ilocked(proc, thread, /* sync = */false);
 }
 
-static void binder_insert_free_buffer(struct binder_proc *proc,
-				      struct binder_buffer *new_buffer)
+static bool is_rt_policy(int policy)
 {
-	struct rb_node **p = &proc->free_buffers.rb_node;
-	struct rb_node *parent = NULL;
-	struct binder_buffer *buffer;
-	size_t buffer_size;
-	size_t new_buffer_size;
-
-	BUG_ON(!new_buffer->free);
-
-	new_buffer_size = binder_buffer_size(proc, new_buffer);
-
-	binder_debug(BINDER_DEBUG_BUFFER_ALLOC,
-		     "%d: add free buffer, size %zd, at %p\n",
-		      proc->pid, new_buffer_size, new_buffer);
-
-	while (*p) {
-		parent = *p;
-		buffer = rb_entry(parent, struct binder_buffer, rb_node);
-		BUG_ON(!buffer->free);
-
-		buffer_size = binder_buffer_size(proc, buffer);
-
-		if (new_buffer_size < buffer_size)
-			p = &parent->rb_left;
-		else
-			p = &parent->rb_right;
-	}
-	rb_link_node(&new_buffer->rb_node, parent, p);
-	rb_insert_color(&new_buffer->rb_node, &proc->free_buffers);
+	return policy == SCHED_FIFO || policy == SCHED_RR;
 }
 
-static void binder_insert_allocated_buffer(struct binder_proc *proc,
-					   struct binder_buffer *new_buffer)
+static bool is_fair_policy(int policy)
 {
-	struct rb_node **p = &proc->allocated_buffers.rb_node;
-	struct rb_node *parent = NULL;
-	struct binder_buffer *buffer;
-
-	BUG_ON(new_buffer->free);
-
-	while (*p) {
-		parent = *p;
-		buffer = rb_entry(parent, struct binder_buffer, rb_node);
-		BUG_ON(buffer->free);
-
-		if (new_buffer < buffer)
-			p = &parent->rb_left;
-		else if (new_buffer > buffer)
-			p = &parent->rb_right;
-		else
-			BUG();
-	}
-	rb_link_node(&new_buffer->rb_node, parent, p);
-	rb_insert_color(&new_buffer->rb_node, &proc->allocated_buffers);
+	return policy == SCHED_NORMAL || policy == SCHED_BATCH;
 }
 
-static struct binder_buffer *binder_buffer_lookup(struct binder_proc *proc,
-						  uintptr_t user_ptr)
+static bool binder_supported_policy(int policy)
 {
-	struct rb_node *n = proc->allocated_buffers.rb_node;
-	struct binder_buffer *buffer;
-	struct binder_buffer *kern_ptr;
-
-	kern_ptr = (struct binder_buffer *)(user_ptr - proc->user_buffer_offset
-		- offsetof(struct binder_buffer, data));
-
-	while (n) {
-		buffer = rb_entry(n, struct binder_buffer, rb_node);
-		BUG_ON(buffer->free);
-
-		if (kern_ptr < buffer)
-			n = n->rb_left;
-		else if (kern_ptr > buffer)
-			n = n->rb_right;
-		else
-			return buffer;
-	}
-	return NULL;
+	return is_fair_policy(policy) || is_rt_policy(policy);
 }
 
-static int binder_update_page_range(struct binder_proc *proc, int allocate,
-				    void *start, void *end,
-				    struct vm_area_struct *vma)
+static int to_userspace_prio(int policy, int kernel_priority)
 {
-	void *page_addr;
-	unsigned long user_page_addr;
-	struct page **page;
-	struct mm_struct *mm;
-
-	binder_debug(BINDER_DEBUG_BUFFER_ALLOC,
-		     "%d: %s pages %p-%p\n", proc->pid,
-		     allocate ? "allocate" : "free", start, end);
-
-	if (end <= start)
-		return 0;
-
-	trace_binder_update_page_range(proc, allocate, start, end);
-
-	if (vma)
-		mm = NULL;
+	if (is_fair_policy(policy))
+		return PRIO_TO_NICE(kernel_priority);
 	else
-		mm = get_task_mm(proc->tsk);
-
-	if (mm) {
-		down_write(&mm->mmap_sem);
-		vma = proc->vma;
-		if (vma && mm != proc->vma_vm_mm) {
-			pr_err("%d: vma mm and task mm mismatch\n",
-				proc->pid);
-			vma = NULL;
-		}
-	}
-
-	if (allocate == 0)
-		goto free_range;
-
-	if (vma == NULL) {
-		pr_err("%d: binder_alloc_buf failed to map pages in userspace, no vma\n",
-			proc->pid);
-		goto err_no_vma;
-	}
-
-	for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) {
-		int ret;
-
-		page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE];
-
-		BUG_ON(*page);
-		*page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
-		if (*page == NULL) {
-			pr_err("%d: binder_alloc_buf failed for page at %p\n",
-				proc->pid, page_addr);
-			goto err_alloc_page_failed;
-		}
-		ret = map_kernel_range_noflush((unsigned long)page_addr,
-					PAGE_SIZE, PAGE_KERNEL, page);
-		flush_cache_vmap((unsigned long)page_addr,
-				(unsigned long)page_addr + PAGE_SIZE);
-		if (ret != 1) {
-			pr_err("%d: binder_alloc_buf failed to map page at %p in kernel\n",
-			       proc->pid, page_addr);
-			goto err_map_kernel_failed;
-		}
-		user_page_addr =
-			(uintptr_t)page_addr + proc->user_buffer_offset;
-		ret = vm_insert_page(vma, user_page_addr, page[0]);
-		if (ret) {
-			pr_err("%d: binder_alloc_buf failed to map page at %lx in userspace\n",
-			       proc->pid, user_page_addr);
-			goto err_vm_insert_page_failed;
-		}
-		/* vm_insert_page does not seem to increment the refcount */
-	}
-	if (mm) {
-		up_write(&mm->mmap_sem);
-		mmput(mm);
-	}
-	return 0;
-
-free_range:
-	for (page_addr = end - PAGE_SIZE; page_addr >= start;
-	     page_addr -= PAGE_SIZE) {
-		page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE];
-		if (vma)
-			zap_page_range(vma, (uintptr_t)page_addr +
-				proc->user_buffer_offset, PAGE_SIZE, NULL);
-err_vm_insert_page_failed:
-		unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE);
-err_map_kernel_failed:
-		__free_page(*page);
-		*page = NULL;
-err_alloc_page_failed:
-		;
-	}
-err_no_vma:
-	if (mm) {
-		up_write(&mm->mmap_sem);
-		mmput(mm);
-	}
-	return -ENOMEM;
+		return MAX_USER_RT_PRIO - 1 - kernel_priority;
 }
 
-static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc,
-					      size_t data_size,
-					      size_t offsets_size, int is_async)
+static int to_kernel_prio(int policy, int user_priority)
 {
-	struct rb_node *n = proc->free_buffers.rb_node;
-	struct binder_buffer *buffer;
-	size_t buffer_size;
-	struct rb_node *best_fit = NULL;
-	void *has_page_addr;
-	void *end_page_addr;
-	size_t size;
+	if (is_fair_policy(policy))
+		return NICE_TO_PRIO(user_priority);
+	else
+		return MAX_USER_RT_PRIO - 1 - user_priority;
+}
 
-	if (proc->vma == NULL) {
-		pr_err("%d: binder_alloc_buf, no vma\n",
-		       proc->pid);
-		return NULL;
-	}
+static void binder_do_set_priority(struct task_struct *task,
+				   struct binder_priority desired,
+				   bool verify)
+{
+	int priority; /* user-space prio value */
+	bool has_cap_nice;
+	unsigned int policy = desired.sched_policy;
 
-	size = ALIGN(data_size, sizeof(void *)) +
-		ALIGN(offsets_size, sizeof(void *));
+	if (task->policy == policy && task->normal_prio == desired.prio)
+		return;
 
-	if (size < data_size || size < offsets_size) {
-		binder_user_error("%d: got transaction with invalid size %zd-%zd\n",
-				proc->pid, data_size, offsets_size);
-		return NULL;
-	}
+	has_cap_nice = has_capability_noaudit(task, CAP_SYS_NICE);
 
-	if (is_async &&
-	    proc->free_async_space < size + sizeof(struct binder_buffer)) {
-		binder_debug(BINDER_DEBUG_BUFFER_ALLOC,
-			     "%d: binder_alloc_buf size %zd failed, no async space left\n",
-			      proc->pid, size);
-		return NULL;
-	}
+	priority = to_userspace_prio(policy, desired.prio);
 
-	while (n) {
-		buffer = rb_entry(n, struct binder_buffer, rb_node);
-		BUG_ON(!buffer->free);
-		buffer_size = binder_buffer_size(proc, buffer);
+	if (verify && is_rt_policy(policy) && !has_cap_nice) {
+		long max_rtprio = task_rlimit(task, RLIMIT_RTPRIO);
 
-		if (size < buffer_size) {
-			best_fit = n;
-			n = n->rb_left;
-		} else if (size > buffer_size)
-			n = n->rb_right;
-		else {
-			best_fit = n;
-			break;
+		if (max_rtprio == 0) {
+			policy = SCHED_NORMAL;
+			priority = MIN_NICE;
+		} else if (priority > max_rtprio) {
+			priority = max_rtprio;
 		}
 	}
-	if (best_fit == NULL) {
-		pr_err("%d: binder_alloc_buf size %zd failed, no address space\n",
-			proc->pid, size);
-		return NULL;
-	}
-	if (n == NULL) {
-		buffer = rb_entry(best_fit, struct binder_buffer, rb_node);
-		buffer_size = binder_buffer_size(proc, buffer);
-	}
 
-	binder_debug(BINDER_DEBUG_BUFFER_ALLOC,
-		     "%d: binder_alloc_buf size %zd got buffer %p size %zd\n",
-		      proc->pid, size, buffer, buffer_size);
+	if (verify && is_fair_policy(policy) && !has_cap_nice) {
+		long min_nice = rlimit_to_nice(task_rlimit(task, RLIMIT_NICE));
 
-	has_page_addr =
-		(void *)(((uintptr_t)buffer->data + buffer_size) & PAGE_MASK);
-	if (n == NULL) {
-		if (size + sizeof(struct binder_buffer) + 4 >= buffer_size)
-			buffer_size = size; /* no room for other buffers */
-		else
-			buffer_size = size + sizeof(struct binder_buffer);
-	}
-	end_page_addr =
-		(void *)PAGE_ALIGN((uintptr_t)buffer->data + buffer_size);
-	if (end_page_addr > has_page_addr)
-		end_page_addr = has_page_addr;
-	if (binder_update_page_range(proc, 1,
-	    (void *)PAGE_ALIGN((uintptr_t)buffer->data), end_page_addr, NULL))
-		return NULL;
-
-	rb_erase(best_fit, &proc->free_buffers);
-	buffer->free = 0;
-	binder_insert_allocated_buffer(proc, buffer);
-	if (buffer_size != size) {
-		struct binder_buffer *new_buffer = (void *)buffer->data + size;
-
-		list_add(&new_buffer->entry, &buffer->entry);
-		new_buffer->free = 1;
-		binder_insert_free_buffer(proc, new_buffer);
-	}
-	binder_debug(BINDER_DEBUG_BUFFER_ALLOC,
-		     "%d: binder_alloc_buf size %zd got %p\n",
-		      proc->pid, size, buffer);
-	buffer->data_size = data_size;
-	buffer->offsets_size = offsets_size;
-	buffer->async_transaction = is_async;
-	if (is_async) {
-		proc->free_async_space -= size + sizeof(struct binder_buffer);
-		binder_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC,
-			     "%d: binder_alloc_buf size %zd async free %zd\n",
-			      proc->pid, size, proc->free_async_space);
-	}
-
-	return buffer;
-}
-
-static void *buffer_start_page(struct binder_buffer *buffer)
-{
-	return (void *)((uintptr_t)buffer & PAGE_MASK);
-}
-
-static void *buffer_end_page(struct binder_buffer *buffer)
-{
-	return (void *)(((uintptr_t)(buffer + 1) - 1) & PAGE_MASK);
-}
-
-static void binder_delete_free_buffer(struct binder_proc *proc,
-				      struct binder_buffer *buffer)
-{
-	struct binder_buffer *prev, *next = NULL;
-	int free_page_end = 1;
-	int free_page_start = 1;
-
-	BUG_ON(proc->buffers.next == &buffer->entry);
-	prev = list_entry(buffer->entry.prev, struct binder_buffer, entry);
-	BUG_ON(!prev->free);
-	if (buffer_end_page(prev) == buffer_start_page(buffer)) {
-		free_page_start = 0;
-		if (buffer_end_page(prev) == buffer_end_page(buffer))
-			free_page_end = 0;
-		binder_debug(BINDER_DEBUG_BUFFER_ALLOC,
-			     "%d: merge free, buffer %p share page with %p\n",
-			      proc->pid, buffer, prev);
-	}
-
-	if (!list_is_last(&buffer->entry, &proc->buffers)) {
-		next = list_entry(buffer->entry.next,
-				  struct binder_buffer, entry);
-		if (buffer_start_page(next) == buffer_end_page(buffer)) {
-			free_page_end = 0;
-			if (buffer_start_page(next) ==
-			    buffer_start_page(buffer))
-				free_page_start = 0;
-			binder_debug(BINDER_DEBUG_BUFFER_ALLOC,
-				     "%d: merge free, buffer %p share page with %p\n",
-				      proc->pid, buffer, prev);
+		if (min_nice > MAX_NICE) {
+			binder_user_error("%d RLIMIT_NICE not set\n",
+					  task->pid);
+			return;
+		} else if (priority < min_nice) {
+			priority = min_nice;
 		}
 	}
-	list_del(&buffer->entry);
-	if (free_page_start || free_page_end) {
-		binder_debug(BINDER_DEBUG_BUFFER_ALLOC,
-			     "%d: merge free, buffer %p do not share page%s%s with %p or %p\n",
-			     proc->pid, buffer, free_page_start ? "" : " end",
-			     free_page_end ? "" : " start", prev, next);
-		binder_update_page_range(proc, 0, free_page_start ?
-			buffer_start_page(buffer) : buffer_end_page(buffer),
-			(free_page_end ? buffer_end_page(buffer) :
-			buffer_start_page(buffer)) + PAGE_SIZE, NULL);
+
+	if (policy != desired.sched_policy ||
+	    to_kernel_prio(policy, priority) != desired.prio)
+		binder_debug(BINDER_DEBUG_PRIORITY_CAP,
+			     "%d: priority %d not allowed, using %d instead\n",
+			      task->pid, desired.prio,
+			      to_kernel_prio(policy, priority));
+
+	/* Set the actual priority */
+	if (task->policy != policy || is_rt_policy(policy)) {
+		struct sched_param params;
+
+		params.sched_priority = is_rt_policy(policy) ? priority : 0;
+
+		sched_setscheduler_nocheck(task,
+					   policy | SCHED_RESET_ON_FORK,
+					   &params);
 	}
+	if (is_fair_policy(policy))
+		set_user_nice(task, priority);
 }
 
-static void binder_free_buf(struct binder_proc *proc,
-			    struct binder_buffer *buffer)
+static void binder_set_priority(struct task_struct *task,
+				struct binder_priority desired)
 {
-	size_t size, buffer_size;
-
-	buffer_size = binder_buffer_size(proc, buffer);
-
-	size = ALIGN(buffer->data_size, sizeof(void *)) +
-		ALIGN(buffer->offsets_size, sizeof(void *));
-
-	binder_debug(BINDER_DEBUG_BUFFER_ALLOC,
-		     "%d: binder_free_buf %p size %zd buffer_size %zd\n",
-		      proc->pid, buffer, size, buffer_size);
-
-	BUG_ON(buffer->free);
-	BUG_ON(size > buffer_size);
-	BUG_ON(buffer->transaction != NULL);
-	BUG_ON((void *)buffer < proc->buffer);
-	BUG_ON((void *)buffer > proc->buffer + proc->buffer_size);
-
-	if (buffer->async_transaction) {
-		proc->free_async_space += size + sizeof(struct binder_buffer);
-
-		binder_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC,
-			     "%d: binder_free_buf size %zd async free %zd\n",
-			      proc->pid, size, proc->free_async_space);
-	}
-
-	binder_update_page_range(proc, 0,
-		(void *)PAGE_ALIGN((uintptr_t)buffer->data),
-		(void *)(((uintptr_t)buffer->data + buffer_size) & PAGE_MASK),
-		NULL);
-	rb_erase(&buffer->rb_node, &proc->allocated_buffers);
-	buffer->free = 1;
-	if (!list_is_last(&buffer->entry, &proc->buffers)) {
-		struct binder_buffer *next = list_entry(buffer->entry.next,
-						struct binder_buffer, entry);
-
-		if (next->free) {
-			rb_erase(&next->rb_node, &proc->free_buffers);
-			binder_delete_free_buffer(proc, next);
-		}
-	}
-	if (proc->buffers.next != &buffer->entry) {
-		struct binder_buffer *prev = list_entry(buffer->entry.prev,
-						struct binder_buffer, entry);
-
-		if (prev->free) {
-			binder_delete_free_buffer(proc, buffer);
-			rb_erase(&prev->rb_node, &proc->free_buffers);
-			buffer = prev;
-		}
-	}
-	binder_insert_free_buffer(proc, buffer);
+	binder_do_set_priority(task, desired, /* verify = */ true);
 }
 
-static struct binder_node *binder_get_node(struct binder_proc *proc,
-					   binder_uintptr_t ptr)
+static void binder_restore_priority(struct task_struct *task,
+				    struct binder_priority desired)
+{
+	binder_do_set_priority(task, desired, /* verify = */ false);
+}
+
+static void binder_transaction_priority(struct task_struct *task,
+					struct binder_transaction *t,
+					struct binder_priority node_prio,
+					bool inherit_rt)
+{
+	struct binder_priority desired_prio;
+
+	if (t->set_priority_called)
+		return;
+
+	t->set_priority_called = true;
+	t->saved_priority.sched_policy = task->policy;
+	t->saved_priority.prio = task->normal_prio;
+
+	if (!inherit_rt && is_rt_policy(desired_prio.sched_policy)) {
+		desired_prio.prio = NICE_TO_PRIO(0);
+		desired_prio.sched_policy = SCHED_NORMAL;
+	} else {
+		desired_prio.prio = t->priority.prio;
+		desired_prio.sched_policy = t->priority.sched_policy;
+	}
+
+	if (node_prio.prio < t->priority.prio ||
+	    (node_prio.prio == t->priority.prio &&
+	     node_prio.sched_policy == SCHED_FIFO)) {
+		/*
+		 * In case the minimum priority on the node is
+		 * higher (lower value), use that priority. If
+		 * the priority is the same, but the node uses
+		 * SCHED_FIFO, prefer SCHED_FIFO, since it can
+		 * run unbounded, unlike SCHED_RR.
+		 */
+		desired_prio = node_prio;
+	}
+
+	binder_set_priority(task, desired_prio);
+}
+
+static struct binder_node *binder_get_node_ilocked(struct binder_proc *proc,
+						   binder_uintptr_t ptr)
 {
 	struct rb_node *n = proc->nodes.rb_node;
 	struct binder_node *node;
 
+	assert_spin_locked(&proc->inner_lock);
+
 	while (n) {
 		node = rb_entry(n, struct binder_node, rb_node);
 
@@ -888,21 +1247,47 @@
 			n = n->rb_left;
 		else if (ptr > node->ptr)
 			n = n->rb_right;
-		else
+		else {
+			/*
+			 * take an implicit weak reference
+			 * to ensure node stays alive until
+			 * call to binder_put_node()
+			 */
+			binder_inc_node_tmpref_ilocked(node);
 			return node;
+		}
 	}
 	return NULL;
 }
 
-static struct binder_node *binder_new_node(struct binder_proc *proc,
-					   binder_uintptr_t ptr,
-					   binder_uintptr_t cookie)
+static struct binder_node *binder_get_node(struct binder_proc *proc,
+					   binder_uintptr_t ptr)
+{
+	struct binder_node *node;
+
+	binder_inner_proc_lock(proc);
+	node = binder_get_node_ilocked(proc, ptr);
+	binder_inner_proc_unlock(proc);
+	return node;
+}
+
+static struct binder_node *binder_init_node_ilocked(
+						struct binder_proc *proc,
+						struct binder_node *new_node,
+						struct flat_binder_object *fp)
 {
 	struct rb_node **p = &proc->nodes.rb_node;
 	struct rb_node *parent = NULL;
 	struct binder_node *node;
+	binder_uintptr_t ptr = fp ? fp->binder : 0;
+	binder_uintptr_t cookie = fp ? fp->cookie : 0;
+	__u32 flags = fp ? fp->flags : 0;
+	s8 priority;
+
+	assert_spin_locked(&proc->inner_lock);
 
 	while (*p) {
+
 		parent = *p;
 		node = rb_entry(parent, struct binder_node, rb_node);
 
@@ -910,39 +1295,87 @@
 			p = &(*p)->rb_left;
 		else if (ptr > node->ptr)
 			p = &(*p)->rb_right;
-		else
-			return NULL;
+		else {
+			/*
+			 * A matching node is already in
+			 * the rb tree. Abandon the init
+			 * and return it.
+			 */
+			binder_inc_node_tmpref_ilocked(node);
+			return node;
+		}
 	}
-
-	node = kzalloc(sizeof(*node), GFP_KERNEL);
-	if (node == NULL)
-		return NULL;
+	node = new_node;
 	binder_stats_created(BINDER_STAT_NODE);
+	node->tmp_refs++;
 	rb_link_node(&node->rb_node, parent, p);
 	rb_insert_color(&node->rb_node, &proc->nodes);
-	node->debug_id = ++binder_last_id;
+	node->debug_id = atomic_inc_return(&binder_last_id);
 	node->proc = proc;
 	node->ptr = ptr;
 	node->cookie = cookie;
 	node->work.type = BINDER_WORK_NODE;
+	priority = flags & FLAT_BINDER_FLAG_PRIORITY_MASK;
+	node->sched_policy = (flags & FLAT_BINDER_FLAG_PRIORITY_MASK) >>
+		FLAT_BINDER_FLAG_SCHED_POLICY_SHIFT;
+	node->min_priority = to_kernel_prio(node->sched_policy, priority);
+	node->accept_fds = !!(flags & FLAT_BINDER_FLAG_ACCEPTS_FDS);
+	node->inherit_rt = !!(flags & FLAT_BINDER_FLAG_INHERIT_RT);
+	node->txn_security_ctx = !!(flags & FLAT_BINDER_FLAG_TXN_SECURITY_CTX);
+	spin_lock_init(&node->lock);
 	INIT_LIST_HEAD(&node->work.entry);
 	INIT_LIST_HEAD(&node->async_todo);
 	binder_debug(BINDER_DEBUG_INTERNAL_REFS,
 		     "%d:%d node %d u%016llx c%016llx created\n",
 		     proc->pid, current->pid, node->debug_id,
 		     (u64)node->ptr, (u64)node->cookie);
+
 	return node;
 }
 
-static int binder_inc_node(struct binder_node *node, int strong, int internal,
-			   struct list_head *target_list)
+static struct binder_node *binder_new_node(struct binder_proc *proc,
+					   struct flat_binder_object *fp)
 {
+	struct binder_node *node;
+	struct binder_node *new_node = kzalloc(sizeof(*node), GFP_KERNEL);
+
+	if (!new_node)
+		return NULL;
+	binder_inner_proc_lock(proc);
+	node = binder_init_node_ilocked(proc, new_node, fp);
+	binder_inner_proc_unlock(proc);
+	if (node != new_node)
+		/*
+		 * The node was already added by another thread
+		 */
+		kfree(new_node);
+
+	return node;
+}
+
+static void binder_free_node(struct binder_node *node)
+{
+	kfree(node);
+	binder_stats_deleted(BINDER_STAT_NODE);
+}
+
+static int binder_inc_node_nilocked(struct binder_node *node, int strong,
+				    int internal,
+				    struct list_head *target_list)
+{
+	struct binder_proc *proc = node->proc;
+
+	assert_spin_locked(&node->lock);
+	if (proc)
+		assert_spin_locked(&proc->inner_lock);
 	if (strong) {
 		if (internal) {
 			if (target_list == NULL &&
 			    node->internal_strong_refs == 0 &&
-			    !(node == binder_context_mgr_node &&
-			    node->has_strong_ref)) {
+			    !(node->proc &&
+			      node == node->proc->context->
+				      binder_context_mgr_node &&
+			      node->has_strong_ref)) {
 				pr_err("invalid inc strong node for %d\n",
 					node->debug_id);
 				return -EINVAL;
@@ -951,8 +1384,8 @@
 		} else
 			node->local_strong_refs++;
 		if (!node->has_strong_ref && target_list) {
-			list_del_init(&node->work.entry);
-			list_add_tail(&node->work.entry, target_list);
+			binder_dequeue_work_ilocked(&node->work);
+			binder_enqueue_work_ilocked(&node->work, target_list);
 		}
 	} else {
 		if (!internal)
@@ -963,58 +1396,169 @@
 					node->debug_id);
 				return -EINVAL;
 			}
-			list_add_tail(&node->work.entry, target_list);
+			binder_enqueue_work_ilocked(&node->work, target_list);
 		}
 	}
 	return 0;
 }
 
-static int binder_dec_node(struct binder_node *node, int strong, int internal)
+static int binder_inc_node(struct binder_node *node, int strong, int internal,
+			   struct list_head *target_list)
 {
+	int ret;
+
+	binder_node_inner_lock(node);
+	ret = binder_inc_node_nilocked(node, strong, internal, target_list);
+	binder_node_inner_unlock(node);
+
+	return ret;
+}
+
+static bool binder_dec_node_nilocked(struct binder_node *node,
+				     int strong, int internal)
+{
+	struct binder_proc *proc = node->proc;
+
+	assert_spin_locked(&node->lock);
+	if (proc)
+		assert_spin_locked(&proc->inner_lock);
 	if (strong) {
 		if (internal)
 			node->internal_strong_refs--;
 		else
 			node->local_strong_refs--;
 		if (node->local_strong_refs || node->internal_strong_refs)
-			return 0;
+			return false;
 	} else {
 		if (!internal)
 			node->local_weak_refs--;
-		if (node->local_weak_refs || !hlist_empty(&node->refs))
-			return 0;
+		if (node->local_weak_refs || node->tmp_refs ||
+				!hlist_empty(&node->refs))
+			return false;
 	}
-	if (node->proc && (node->has_strong_ref || node->has_weak_ref)) {
+
+	if (proc && (node->has_strong_ref || node->has_weak_ref)) {
 		if (list_empty(&node->work.entry)) {
-			list_add_tail(&node->work.entry, &node->proc->todo);
-			wake_up_interruptible(&node->proc->wait);
+			binder_enqueue_work_ilocked(&node->work, &proc->todo);
+			binder_wakeup_proc_ilocked(proc);
 		}
 	} else {
 		if (hlist_empty(&node->refs) && !node->local_strong_refs &&
-		    !node->local_weak_refs) {
-			list_del_init(&node->work.entry);
-			if (node->proc) {
-				rb_erase(&node->rb_node, &node->proc->nodes);
+		    !node->local_weak_refs && !node->tmp_refs) {
+			if (proc) {
+				binder_dequeue_work_ilocked(&node->work);
+				rb_erase(&node->rb_node, &proc->nodes);
 				binder_debug(BINDER_DEBUG_INTERNAL_REFS,
 					     "refless node %d deleted\n",
 					     node->debug_id);
 			} else {
+				BUG_ON(!list_empty(&node->work.entry));
+				spin_lock(&binder_dead_nodes_lock);
+				/*
+				 * tmp_refs could have changed so
+				 * check it again
+				 */
+				if (node->tmp_refs) {
+					spin_unlock(&binder_dead_nodes_lock);
+					return false;
+				}
 				hlist_del(&node->dead_node);
+				spin_unlock(&binder_dead_nodes_lock);
 				binder_debug(BINDER_DEBUG_INTERNAL_REFS,
 					     "dead node %d deleted\n",
 					     node->debug_id);
 			}
-			kfree(node);
-			binder_stats_deleted(BINDER_STAT_NODE);
+			return true;
 		}
 	}
-
-	return 0;
+	return false;
 }
 
+static void binder_dec_node(struct binder_node *node, int strong, int internal)
+{
+	bool free_node;
 
-static struct binder_ref *binder_get_ref(struct binder_proc *proc,
-					 u32 desc, bool need_strong_ref)
+	binder_node_inner_lock(node);
+	free_node = binder_dec_node_nilocked(node, strong, internal);
+	binder_node_inner_unlock(node);
+	if (free_node)
+		binder_free_node(node);
+}
+
+static void binder_inc_node_tmpref_ilocked(struct binder_node *node)
+{
+	/*
+	 * No call to binder_inc_node() is needed since we
+	 * don't need to inform userspace of any changes to
+	 * tmp_refs
+	 */
+	node->tmp_refs++;
+}
+
+/**
+ * binder_inc_node_tmpref() - take a temporary reference on node
+ * @node:	node to reference
+ *
+ * Take reference on node to prevent the node from being freed
+ * while referenced only by a local variable. The inner lock is
+ * needed to serialize with the node work on the queue (which
+ * isn't needed after the node is dead). If the node is dead
+ * (node->proc is NULL), use binder_dead_nodes_lock to protect
+ * node->tmp_refs against dead-node-only cases where the node
+ * lock cannot be acquired (eg traversing the dead node list to
+ * print nodes)
+ */
+static void binder_inc_node_tmpref(struct binder_node *node)
+{
+	binder_node_lock(node);
+	if (node->proc)
+		binder_inner_proc_lock(node->proc);
+	else
+		spin_lock(&binder_dead_nodes_lock);
+	binder_inc_node_tmpref_ilocked(node);
+	if (node->proc)
+		binder_inner_proc_unlock(node->proc);
+	else
+		spin_unlock(&binder_dead_nodes_lock);
+	binder_node_unlock(node);
+}
+
+/**
+ * binder_dec_node_tmpref() - remove a temporary reference on node
+ * @node:	node to reference
+ *
+ * Release temporary reference on node taken via binder_inc_node_tmpref()
+ */
+static void binder_dec_node_tmpref(struct binder_node *node)
+{
+	bool free_node;
+
+	binder_node_inner_lock(node);
+	if (!node->proc)
+		spin_lock(&binder_dead_nodes_lock);
+	node->tmp_refs--;
+	BUG_ON(node->tmp_refs < 0);
+	if (!node->proc)
+		spin_unlock(&binder_dead_nodes_lock);
+	/*
+	 * Call binder_dec_node() to check if all refcounts are 0
+	 * and cleanup is needed. Calling with strong=0 and internal=1
+	 * causes no actual reference to be released in binder_dec_node().
+	 * If that changes, a change is needed here too.
+	 */
+	free_node = binder_dec_node_nilocked(node, 0, 1);
+	binder_node_inner_unlock(node);
+	if (free_node)
+		binder_free_node(node);
+}
+
+static void binder_put_node(struct binder_node *node)
+{
+	binder_dec_node_tmpref(node);
+}
+
+static struct binder_ref *binder_get_ref_olocked(struct binder_proc *proc,
+						 u32 desc, bool need_strong_ref)
 {
 	struct rb_node *n = proc->refs_by_desc.rb_node;
 	struct binder_ref *ref;
@@ -1022,11 +1566,11 @@
 	while (n) {
 		ref = rb_entry(n, struct binder_ref, rb_node_desc);
 
-		if (desc < ref->desc) {
+		if (desc < ref->data.desc) {
 			n = n->rb_left;
-		} else if (desc > ref->desc) {
+		} else if (desc > ref->data.desc) {
 			n = n->rb_right;
-		} else if (need_strong_ref && !ref->strong) {
+		} else if (need_strong_ref && !ref->data.strong) {
 			binder_user_error("tried to use weak ref as strong ref\n");
 			return NULL;
 		} else {
@@ -1036,13 +1580,34 @@
 	return NULL;
 }
 
-static struct binder_ref *binder_get_ref_for_node(struct binder_proc *proc,
-						  struct binder_node *node)
+/**
+ * binder_get_ref_for_node_olocked() - get the ref associated with given node
+ * @proc:	binder_proc that owns the ref
+ * @node:	binder_node of target
+ * @new_ref:	newly allocated binder_ref to be initialized or %NULL
+ *
+ * Look up the ref for the given node and return it if it exists
+ *
+ * If it doesn't exist and the caller provides a newly allocated
+ * ref, initialize the fields of the newly allocated ref and insert
+ * into the given proc rb_trees and node refs list.
+ *
+ * Return:	the ref for node. It is possible that another thread
+ *		allocated/initialized the ref first in which case the
+ *		returned ref would be different than the passed-in
+ *		new_ref. new_ref must be kfree'd by the caller in
+ *		this case.
+ */
+static struct binder_ref *binder_get_ref_for_node_olocked(
+					struct binder_proc *proc,
+					struct binder_node *node,
+					struct binder_ref *new_ref)
 {
-	struct rb_node *n;
+	struct binder_context *context = proc->context;
 	struct rb_node **p = &proc->refs_by_node.rb_node;
 	struct rb_node *parent = NULL;
-	struct binder_ref *ref, *new_ref;
+	struct binder_ref *ref;
+	struct rb_node *n;
 
 	while (*p) {
 		parent = *p;
@@ -1055,22 +1620,22 @@
 		else
 			return ref;
 	}
-	new_ref = kzalloc(sizeof(*ref), GFP_KERNEL);
-	if (new_ref == NULL)
+	if (!new_ref)
 		return NULL;
+
 	binder_stats_created(BINDER_STAT_REF);
-	new_ref->debug_id = ++binder_last_id;
+	new_ref->data.debug_id = atomic_inc_return(&binder_last_id);
 	new_ref->proc = proc;
 	new_ref->node = node;
 	rb_link_node(&new_ref->rb_node_node, parent, p);
 	rb_insert_color(&new_ref->rb_node_node, &proc->refs_by_node);
 
-	new_ref->desc = (node == binder_context_mgr_node) ? 0 : 1;
+	new_ref->data.desc = (node == context->binder_context_mgr_node) ? 0 : 1;
 	for (n = rb_first(&proc->refs_by_desc); n != NULL; n = rb_next(n)) {
 		ref = rb_entry(n, struct binder_ref, rb_node_desc);
-		if (ref->desc > new_ref->desc)
+		if (ref->data.desc > new_ref->data.desc)
 			break;
-		new_ref->desc = ref->desc + 1;
+		new_ref->data.desc = ref->data.desc + 1;
 	}
 
 	p = &proc->refs_by_desc.rb_node;
@@ -1078,121 +1643,423 @@
 		parent = *p;
 		ref = rb_entry(parent, struct binder_ref, rb_node_desc);
 
-		if (new_ref->desc < ref->desc)
+		if (new_ref->data.desc < ref->data.desc)
 			p = &(*p)->rb_left;
-		else if (new_ref->desc > ref->desc)
+		else if (new_ref->data.desc > ref->data.desc)
 			p = &(*p)->rb_right;
 		else
 			BUG();
 	}
 	rb_link_node(&new_ref->rb_node_desc, parent, p);
 	rb_insert_color(&new_ref->rb_node_desc, &proc->refs_by_desc);
-	if (node) {
-		hlist_add_head(&new_ref->node_entry, &node->refs);
 
-		binder_debug(BINDER_DEBUG_INTERNAL_REFS,
-			     "%d new ref %d desc %d for node %d\n",
-			      proc->pid, new_ref->debug_id, new_ref->desc,
-			      node->debug_id);
-	} else {
-		binder_debug(BINDER_DEBUG_INTERNAL_REFS,
-			     "%d new ref %d desc %d for dead node\n",
-			      proc->pid, new_ref->debug_id, new_ref->desc);
-	}
+	binder_node_lock(node);
+	hlist_add_head(&new_ref->node_entry, &node->refs);
+
+	binder_debug(BINDER_DEBUG_INTERNAL_REFS,
+		     "%d new ref %d desc %d for node %d\n",
+		      proc->pid, new_ref->data.debug_id, new_ref->data.desc,
+		      node->debug_id);
+	binder_node_unlock(node);
 	return new_ref;
 }
 
-static void binder_delete_ref(struct binder_ref *ref)
+static void binder_cleanup_ref_olocked(struct binder_ref *ref)
 {
+	bool delete_node = false;
+
 	binder_debug(BINDER_DEBUG_INTERNAL_REFS,
 		     "%d delete ref %d desc %d for node %d\n",
-		      ref->proc->pid, ref->debug_id, ref->desc,
+		      ref->proc->pid, ref->data.debug_id, ref->data.desc,
 		      ref->node->debug_id);
 
 	rb_erase(&ref->rb_node_desc, &ref->proc->refs_by_desc);
 	rb_erase(&ref->rb_node_node, &ref->proc->refs_by_node);
-	if (ref->strong)
-		binder_dec_node(ref->node, 1, 1);
+
+	binder_node_inner_lock(ref->node);
+	if (ref->data.strong)
+		binder_dec_node_nilocked(ref->node, 1, 1);
+
 	hlist_del(&ref->node_entry);
-	binder_dec_node(ref->node, 0, 1);
+	delete_node = binder_dec_node_nilocked(ref->node, 0, 1);
+	binder_node_inner_unlock(ref->node);
+	/*
+	 * Clear ref->node unless we want the caller to free the node
+	 */
+	if (!delete_node) {
+		/*
+		 * The caller uses ref->node to determine
+		 * whether the node needs to be freed. Clear
+		 * it since the node is still alive.
+		 */
+		ref->node = NULL;
+	}
+
 	if (ref->death) {
 		binder_debug(BINDER_DEBUG_DEAD_BINDER,
 			     "%d delete ref %d desc %d has death notification\n",
-			      ref->proc->pid, ref->debug_id, ref->desc);
-		list_del(&ref->death->work.entry);
-		kfree(ref->death);
+			      ref->proc->pid, ref->data.debug_id,
+			      ref->data.desc);
+		binder_dequeue_work(ref->proc, &ref->death->work);
 		binder_stats_deleted(BINDER_STAT_DEATH);
 	}
-	kfree(ref);
 	binder_stats_deleted(BINDER_STAT_REF);
 }
 
-static int binder_inc_ref(struct binder_ref *ref, int strong,
-			  struct list_head *target_list)
+/**
+ * binder_inc_ref_olocked() - increment the ref for given handle
+ * @ref:         ref to be incremented
+ * @strong:      if true, strong increment, else weak
+ * @target_list: list to queue node work on
+ *
+ * Increment the ref. @ref->proc->outer_lock must be held on entry
+ *
+ * Return: 0, if successful, else errno
+ */
+static int binder_inc_ref_olocked(struct binder_ref *ref, int strong,
+				  struct list_head *target_list)
 {
 	int ret;
 
 	if (strong) {
-		if (ref->strong == 0) {
+		if (ref->data.strong == 0) {
 			ret = binder_inc_node(ref->node, 1, 1, target_list);
 			if (ret)
 				return ret;
 		}
-		ref->strong++;
+		ref->data.strong++;
 	} else {
-		if (ref->weak == 0) {
+		if (ref->data.weak == 0) {
 			ret = binder_inc_node(ref->node, 0, 1, target_list);
 			if (ret)
 				return ret;
 		}
-		ref->weak++;
+		ref->data.weak++;
 	}
 	return 0;
 }
 
-
-static int binder_dec_ref(struct binder_ref *ref, int strong)
+/**
+ * binder_dec_ref() - dec the ref for given handle
+ * @ref:	ref to be decremented
+ * @strong:	if true, strong decrement, else weak
+ *
+ * Decrement the ref.
+ *
+ * Return: true if ref is cleaned up and ready to be freed
+ */
+static bool binder_dec_ref_olocked(struct binder_ref *ref, int strong)
 {
 	if (strong) {
-		if (ref->strong == 0) {
+		if (ref->data.strong == 0) {
 			binder_user_error("%d invalid dec strong, ref %d desc %d s %d w %d\n",
-					  ref->proc->pid, ref->debug_id,
-					  ref->desc, ref->strong, ref->weak);
-			return -EINVAL;
+					  ref->proc->pid, ref->data.debug_id,
+					  ref->data.desc, ref->data.strong,
+					  ref->data.weak);
+			return false;
 		}
-		ref->strong--;
-		if (ref->strong == 0) {
-			int ret;
-
-			ret = binder_dec_node(ref->node, strong, 1);
-			if (ret)
-				return ret;
-		}
+		ref->data.strong--;
+		if (ref->data.strong == 0)
+			binder_dec_node(ref->node, strong, 1);
 	} else {
-		if (ref->weak == 0) {
+		if (ref->data.weak == 0) {
 			binder_user_error("%d invalid dec weak, ref %d desc %d s %d w %d\n",
-					  ref->proc->pid, ref->debug_id,
-					  ref->desc, ref->strong, ref->weak);
-			return -EINVAL;
+					  ref->proc->pid, ref->data.debug_id,
+					  ref->data.desc, ref->data.strong,
+					  ref->data.weak);
+			return false;
 		}
-		ref->weak--;
+		ref->data.weak--;
 	}
-	if (ref->strong == 0 && ref->weak == 0)
-		binder_delete_ref(ref);
-	return 0;
+	if (ref->data.strong == 0 && ref->data.weak == 0) {
+		binder_cleanup_ref_olocked(ref);
+		return true;
+	}
+	return false;
 }
 
-static void binder_pop_transaction(struct binder_thread *target_thread,
-				   struct binder_transaction *t)
+/**
+ * binder_get_node_from_ref() - get the node from the given proc/desc
+ * @proc:	proc containing the ref
+ * @desc:	the handle associated with the ref
+ * @need_strong_ref: if true, only return node if ref is strong
+ * @rdata:	the id/refcount data for the ref
+ *
+ * Given a proc and ref handle, return the associated binder_node
+ *
+ * Return: a binder_node or NULL if not found or not strong when strong required
+ */
+static struct binder_node *binder_get_node_from_ref(
+		struct binder_proc *proc,
+		u32 desc, bool need_strong_ref,
+		struct binder_ref_data *rdata)
 {
-	if (target_thread) {
-		BUG_ON(target_thread->transaction_stack != t);
-		BUG_ON(target_thread->transaction_stack->from != target_thread);
-		target_thread->transaction_stack =
-			target_thread->transaction_stack->from_parent;
-		t->from = NULL;
+	struct binder_node *node;
+	struct binder_ref *ref;
+
+	binder_proc_lock(proc);
+	ref = binder_get_ref_olocked(proc, desc, need_strong_ref);
+	if (!ref)
+		goto err_no_ref;
+	node = ref->node;
+	/*
+	 * Take an implicit reference on the node to ensure
+	 * it stays alive until the call to binder_put_node()
+	 */
+	binder_inc_node_tmpref(node);
+	if (rdata)
+		*rdata = ref->data;
+	binder_proc_unlock(proc);
+
+	return node;
+
+err_no_ref:
+	binder_proc_unlock(proc);
+	return NULL;
+}
+
+/**
+ * binder_free_ref() - free the binder_ref
+ * @ref:	ref to free
+ *
+ * Free the binder_ref. Free the binder_node indicated by ref->node
+ * (if non-NULL) and the binder_ref_death indicated by ref->death.
+ */
+static void binder_free_ref(struct binder_ref *ref)
+{
+	if (ref->node)
+		binder_free_node(ref->node);
+	kfree(ref->death);
+	kfree(ref);
+}
+
+/**
+ * binder_update_ref_for_handle() - inc/dec the ref for given handle
+ * @proc:	proc containing the ref
+ * @desc:	the handle associated with the ref
+ * @increment:	true=inc reference, false=dec reference
+ * @strong:	true=strong reference, false=weak reference
+ * @rdata:	the id/refcount data for the ref
+ *
+ * Given a proc and ref handle, increment or decrement the ref
+ * according to "increment" arg.
+ *
+ * Return: 0 if successful, else errno
+ */
+static int binder_update_ref_for_handle(struct binder_proc *proc,
+		uint32_t desc, bool increment, bool strong,
+		struct binder_ref_data *rdata)
+{
+	int ret = 0;
+	struct binder_ref *ref;
+	bool delete_ref = false;
+
+	binder_proc_lock(proc);
+	ref = binder_get_ref_olocked(proc, desc, strong);
+	if (!ref) {
+		ret = -EINVAL;
+		goto err_no_ref;
 	}
-	t->need_reply = 0;
+	if (increment)
+		ret = binder_inc_ref_olocked(ref, strong, NULL);
+	else
+		delete_ref = binder_dec_ref_olocked(ref, strong);
+
+	if (rdata)
+		*rdata = ref->data;
+	binder_proc_unlock(proc);
+
+	if (delete_ref)
+		binder_free_ref(ref);
+	return ret;
+
+err_no_ref:
+	binder_proc_unlock(proc);
+	return ret;
+}
+
+/**
+ * binder_dec_ref_for_handle() - dec the ref for given handle
+ * @proc:	proc containing the ref
+ * @desc:	the handle associated with the ref
+ * @strong:	true=strong reference, false=weak reference
+ * @rdata:	the id/refcount data for the ref
+ *
+ * Just calls binder_update_ref_for_handle() to decrement the ref.
+ *
+ * Return: 0 if successful, else errno
+ */
+static int binder_dec_ref_for_handle(struct binder_proc *proc,
+		uint32_t desc, bool strong, struct binder_ref_data *rdata)
+{
+	return binder_update_ref_for_handle(proc, desc, false, strong, rdata);
+}
+
+
+/**
+ * binder_inc_ref_for_node() - increment the ref for given proc/node
+ * @proc:	 proc containing the ref
+ * @node:	 target node
+ * @strong:	 true=strong reference, false=weak reference
+ * @target_list: worklist to use if node is incremented
+ * @rdata:	 the id/refcount data for the ref
+ *
+ * Given a proc and node, increment the ref. Create the ref if it
+ * doesn't already exist
+ *
+ * Return: 0 if successful, else errno
+ */
+static int binder_inc_ref_for_node(struct binder_proc *proc,
+			struct binder_node *node,
+			bool strong,
+			struct list_head *target_list,
+			struct binder_ref_data *rdata)
+{
+	struct binder_ref *ref;
+	struct binder_ref *new_ref = NULL;
+	int ret = 0;
+
+	binder_proc_lock(proc);
+	ref = binder_get_ref_for_node_olocked(proc, node, NULL);
+	if (!ref) {
+		binder_proc_unlock(proc);
+		new_ref = kzalloc(sizeof(*ref), GFP_KERNEL);
+		if (!new_ref)
+			return -ENOMEM;
+		binder_proc_lock(proc);
+		ref = binder_get_ref_for_node_olocked(proc, node, new_ref);
+	}
+	ret = binder_inc_ref_olocked(ref, strong, target_list);
+	*rdata = ref->data;
+	binder_proc_unlock(proc);
+	if (new_ref && ref != new_ref)
+		/*
+		 * Another thread created the ref first so
+		 * free the one we allocated
+		 */
+		kfree(new_ref);
+	return ret;
+}
+
+static void binder_pop_transaction_ilocked(struct binder_thread *target_thread,
+					   struct binder_transaction *t)
+{
+	BUG_ON(!target_thread);
+	assert_spin_locked(&target_thread->proc->inner_lock);
+	BUG_ON(target_thread->transaction_stack != t);
+	BUG_ON(target_thread->transaction_stack->from != target_thread);
+	target_thread->transaction_stack =
+		target_thread->transaction_stack->from_parent;
+	t->from = NULL;
+}
+
+/**
+ * binder_thread_dec_tmpref() - decrement thread->tmp_ref
+ * @thread:	thread to decrement
+ *
+ * A thread needs to be kept alive while being used to create or
+ * handle a transaction. binder_get_txn_from() is used to safely
+ * extract t->from from a binder_transaction and keep the thread
+ * indicated by t->from from being freed. When done with that
+ * binder_thread, this function is called to decrement the
+ * tmp_ref and free if appropriate (thread has been released
+ * and no transaction being processed by the driver)
+ */
+static void binder_thread_dec_tmpref(struct binder_thread *thread)
+{
+	/*
+	 * atomic is used to protect the counter value while
+	 * it cannot reach zero or thread->is_dead is false
+	 */
+	binder_inner_proc_lock(thread->proc);
+	atomic_dec(&thread->tmp_ref);
+	if (thread->is_dead && !atomic_read(&thread->tmp_ref)) {
+		binder_inner_proc_unlock(thread->proc);
+		binder_free_thread(thread);
+		return;
+	}
+	binder_inner_proc_unlock(thread->proc);
+}
+
+/**
+ * binder_proc_dec_tmpref() - decrement proc->tmp_ref
+ * @proc:	proc to decrement
+ *
+ * A binder_proc needs to be kept alive while being used to create or
+ * handle a transaction. proc->tmp_ref is incremented when
+ * creating a new transaction or the binder_proc is currently in-use
+ * by threads that are being released. When done with the binder_proc,
+ * this function is called to decrement the counter and free the
+ * proc if appropriate (proc has been released, all threads have
+ * been released and not currenly in-use to process a transaction).
+ */
+static void binder_proc_dec_tmpref(struct binder_proc *proc)
+{
+	binder_inner_proc_lock(proc);
+	proc->tmp_ref--;
+	if (proc->is_dead && RB_EMPTY_ROOT(&proc->threads) &&
+			!proc->tmp_ref) {
+		binder_inner_proc_unlock(proc);
+		binder_free_proc(proc);
+		return;
+	}
+	binder_inner_proc_unlock(proc);
+}
+
+/**
+ * binder_get_txn_from() - safely extract the "from" thread in transaction
+ * @t:	binder transaction for t->from
+ *
+ * Atomically return the "from" thread and increment the tmp_ref
+ * count for the thread to ensure it stays alive until
+ * binder_thread_dec_tmpref() is called.
+ *
+ * Return: the value of t->from
+ */
+static struct binder_thread *binder_get_txn_from(
+		struct binder_transaction *t)
+{
+	struct binder_thread *from;
+
+	spin_lock(&t->lock);
+	from = t->from;
+	if (from)
+		atomic_inc(&from->tmp_ref);
+	spin_unlock(&t->lock);
+	return from;
+}
+
+/**
+ * binder_get_txn_from_and_acq_inner() - get t->from and acquire inner lock
+ * @t:	binder transaction for t->from
+ *
+ * Same as binder_get_txn_from() except it also acquires the proc->inner_lock
+ * to guarantee that the thread cannot be released while operating on it.
+ * The caller must call binder_inner_proc_unlock() to release the inner lock
+ * as well as call binder_dec_thread_txn() to release the reference.
+ *
+ * Return: the value of t->from
+ */
+static struct binder_thread *binder_get_txn_from_and_acq_inner(
+		struct binder_transaction *t)
+{
+	struct binder_thread *from;
+
+	from = binder_get_txn_from(t);
+	if (!from)
+		return NULL;
+	binder_inner_proc_lock(from->proc);
+	if (t->from) {
+		BUG_ON(from != t->from);
+		return from;
+	}
+	binder_inner_proc_unlock(from->proc);
+	binder_thread_dec_tmpref(from);
+	return NULL;
+}
+
+static void binder_free_transaction(struct binder_transaction *t)
+{
 	if (t->buffer)
 		t->buffer->transaction = NULL;
 	kfree(t);
@@ -1207,30 +2074,28 @@
 
 	BUG_ON(t->flags & TF_ONE_WAY);
 	while (1) {
-		target_thread = t->from;
+		target_thread = binder_get_txn_from_and_acq_inner(t);
 		if (target_thread) {
-			if (target_thread->return_error != BR_OK &&
-			   target_thread->return_error2 == BR_OK) {
-				target_thread->return_error2 =
-					target_thread->return_error;
-				target_thread->return_error = BR_OK;
-			}
-			if (target_thread->return_error == BR_OK) {
-				binder_debug(BINDER_DEBUG_FAILED_TRANSACTION,
-					     "send failed reply for transaction %d to %d:%d\n",
-					      t->debug_id,
-					      target_thread->proc->pid,
-					      target_thread->pid);
+			binder_debug(BINDER_DEBUG_FAILED_TRANSACTION,
+				     "send failed reply for transaction %d to %d:%d\n",
+				      t->debug_id,
+				      target_thread->proc->pid,
+				      target_thread->pid);
 
-				binder_pop_transaction(target_thread, t);
-				target_thread->return_error = error_code;
+			binder_pop_transaction_ilocked(target_thread, t);
+			if (target_thread->reply_error.cmd == BR_OK) {
+				target_thread->reply_error.cmd = error_code;
+				binder_enqueue_work_ilocked(
+					&target_thread->reply_error.work,
+					&target_thread->todo);
 				wake_up_interruptible(&target_thread->wait);
 			} else {
-				pr_err("reply failed, target thread, %d:%d, has error code %d already\n",
-					target_thread->proc->pid,
-					target_thread->pid,
-					target_thread->return_error);
+				WARN(1, "Unexpected reply error: %u\n",
+						target_thread->reply_error.cmd);
 			}
+			binder_inner_proc_unlock(target_thread->proc);
+			binder_thread_dec_tmpref(target_thread);
+			binder_free_transaction(t);
 			return;
 		}
 		next = t->from_parent;
@@ -1239,7 +2104,7 @@
 			     "send failed reply for transaction %d, target dead\n",
 			     t->debug_id);
 
-		binder_pop_transaction(target_thread, t);
+		binder_free_transaction(t);
 		if (next == NULL) {
 			binder_debug(BINDER_DEBUG_DEAD_BINDER,
 				     "reply failed, no target thread at root\n");
@@ -1252,11 +2117,158 @@
 	}
 }
 
+/**
+ * binder_validate_object() - checks for a valid metadata object in a buffer.
+ * @buffer:	binder_buffer that we're parsing.
+ * @offset:	offset in the buffer at which to validate an object.
+ *
+ * Return:	If there's a valid metadata object at @offset in @buffer, the
+ *		size of that object. Otherwise, it returns zero.
+ */
+static size_t binder_validate_object(struct binder_buffer *buffer, u64 offset)
+{
+	/* Check if we can read a header first */
+	struct binder_object_header *hdr;
+	size_t object_size = 0;
+
+	if (offset > buffer->data_size - sizeof(*hdr) ||
+	    buffer->data_size < sizeof(*hdr) ||
+	    !IS_ALIGNED(offset, sizeof(u32)))
+		return 0;
+
+	/* Ok, now see if we can read a complete object. */
+	hdr = (struct binder_object_header *)(buffer->data + offset);
+	switch (hdr->type) {
+	case BINDER_TYPE_BINDER:
+	case BINDER_TYPE_WEAK_BINDER:
+	case BINDER_TYPE_HANDLE:
+	case BINDER_TYPE_WEAK_HANDLE:
+		object_size = sizeof(struct flat_binder_object);
+		break;
+	case BINDER_TYPE_FD:
+		object_size = sizeof(struct binder_fd_object);
+		break;
+	case BINDER_TYPE_PTR:
+		object_size = sizeof(struct binder_buffer_object);
+		break;
+	case BINDER_TYPE_FDA:
+		object_size = sizeof(struct binder_fd_array_object);
+		break;
+	default:
+		return 0;
+	}
+	if (offset <= buffer->data_size - object_size &&
+	    buffer->data_size >= object_size)
+		return object_size;
+	else
+		return 0;
+}
+
+/**
+ * binder_validate_ptr() - validates binder_buffer_object in a binder_buffer.
+ * @b:		binder_buffer containing the object
+ * @index:	index in offset array at which the binder_buffer_object is
+ *		located
+ * @start:	points to the start of the offset array
+ * @num_valid:	the number of valid offsets in the offset array
+ *
+ * Return:	If @index is within the valid range of the offset array
+ *		described by @start and @num_valid, and if there's a valid
+ *		binder_buffer_object at the offset found in index @index
+ *		of the offset array, that object is returned. Otherwise,
+ *		%NULL is returned.
+ *		Note that the offset found in index @index itself is not
+ *		verified; this function assumes that @num_valid elements
+ *		from @start were previously verified to have valid offsets.
+ */
+static struct binder_buffer_object *binder_validate_ptr(struct binder_buffer *b,
+							binder_size_t index,
+							binder_size_t *start,
+							binder_size_t num_valid)
+{
+	struct binder_buffer_object *buffer_obj;
+	binder_size_t *offp;
+
+	if (index >= num_valid)
+		return NULL;
+
+	offp = start + index;
+	buffer_obj = (struct binder_buffer_object *)(b->data + *offp);
+	if (buffer_obj->hdr.type != BINDER_TYPE_PTR)
+		return NULL;
+
+	return buffer_obj;
+}
+
+/**
+ * binder_validate_fixup() - validates pointer/fd fixups happen in order.
+ * @b:			transaction buffer
+ * @objects_start	start of objects buffer
+ * @buffer:		binder_buffer_object in which to fix up
+ * @offset:		start offset in @buffer to fix up
+ * @last_obj:		last binder_buffer_object that we fixed up in
+ * @last_min_offset:	minimum fixup offset in @last_obj
+ *
+ * Return:		%true if a fixup in buffer @buffer at offset @offset is
+ *			allowed.
+ *
+ * For safety reasons, we only allow fixups inside a buffer to happen
+ * at increasing offsets; additionally, we only allow fixup on the last
+ * buffer object that was verified, or one of its parents.
+ *
+ * Example of what is allowed:
+ *
+ * A
+ *   B (parent = A, offset = 0)
+ *   C (parent = A, offset = 16)
+ *     D (parent = C, offset = 0)
+ *   E (parent = A, offset = 32) // min_offset is 16 (C.parent_offset)
+ *
+ * Examples of what is not allowed:
+ *
+ * Decreasing offsets within the same parent:
+ * A
+ *   C (parent = A, offset = 16)
+ *   B (parent = A, offset = 0) // decreasing offset within A
+ *
+ * Referring to a parent that wasn't the last object or any of its parents:
+ * A
+ *   B (parent = A, offset = 0)
+ *   C (parent = A, offset = 0)
+ *   C (parent = A, offset = 16)
+ *     D (parent = B, offset = 0) // B is not A or any of A's parents
+ */
+static bool binder_validate_fixup(struct binder_buffer *b,
+				  binder_size_t *objects_start,
+				  struct binder_buffer_object *buffer,
+				  binder_size_t fixup_offset,
+				  struct binder_buffer_object *last_obj,
+				  binder_size_t last_min_offset)
+{
+	if (!last_obj) {
+		/* Nothing to fix up in */
+		return false;
+	}
+
+	while (last_obj != buffer) {
+		/*
+		 * Safe to retrieve the parent of last_obj, since it
+		 * was already previously verified by the driver.
+		 */
+		if ((last_obj->flags & BINDER_BUFFER_FLAG_HAS_PARENT) == 0)
+			return false;
+		last_min_offset = last_obj->parent_offset + sizeof(uintptr_t);
+		last_obj = (struct binder_buffer_object *)
+			(b->data + *(objects_start + last_obj->parent));
+	}
+	return (fixup_offset >= last_min_offset);
+}
+
 static void binder_transaction_buffer_release(struct binder_proc *proc,
 					      struct binder_buffer *buffer,
 					      binder_size_t *failed_at)
 {
-	binder_size_t *offp, *off_end;
+	binder_size_t *offp, *off_start, *off_end;
 	int debug_id = buffer->debug_id;
 
 	binder_debug(BINDER_DEBUG_TRANSACTION,
@@ -1267,28 +2279,30 @@
 	if (buffer->target_node)
 		binder_dec_node(buffer->target_node, 1, 0);
 
-	offp = (binder_size_t *)(buffer->data +
-				 ALIGN(buffer->data_size, sizeof(void *)));
+	off_start = (binder_size_t *)(buffer->data +
+				      ALIGN(buffer->data_size, sizeof(void *)));
 	if (failed_at)
 		off_end = failed_at;
 	else
-		off_end = (void *)offp + buffer->offsets_size;
-	for (; offp < off_end; offp++) {
-		struct flat_binder_object *fp;
+		off_end = (void *)off_start + buffer->offsets_size;
+	for (offp = off_start; offp < off_end; offp++) {
+		struct binder_object_header *hdr;
+		size_t object_size = binder_validate_object(buffer, *offp);
 
-		if (*offp > buffer->data_size - sizeof(*fp) ||
-		    buffer->data_size < sizeof(*fp) ||
-		    !IS_ALIGNED(*offp, sizeof(u32))) {
-			pr_err("transaction release %d bad offset %lld, size %zd\n",
+		if (object_size == 0) {
+			pr_err("transaction release %d bad object at offset %lld, size %zd\n",
 			       debug_id, (u64)*offp, buffer->data_size);
 			continue;
 		}
-		fp = (struct flat_binder_object *)(buffer->data + *offp);
-		switch (fp->type) {
+		hdr = (struct binder_object_header *)(buffer->data + *offp);
+		switch (hdr->type) {
 		case BINDER_TYPE_BINDER:
 		case BINDER_TYPE_WEAK_BINDER: {
-			struct binder_node *node = binder_get_node(proc, fp->binder);
+			struct flat_binder_object *fp;
+			struct binder_node *node;
 
+			fp = to_flat_binder_object(hdr);
+			node = binder_get_node(proc, fp->binder);
 			if (node == NULL) {
 				pr_err("transaction release %d bad node %016llx\n",
 				       debug_id, (u64)fp->binder);
@@ -1297,90 +2311,566 @@
 			binder_debug(BINDER_DEBUG_TRANSACTION,
 				     "        node %d u%016llx\n",
 				     node->debug_id, (u64)node->ptr);
-			binder_dec_node(node, fp->type == BINDER_TYPE_BINDER, 0);
+			binder_dec_node(node, hdr->type == BINDER_TYPE_BINDER,
+					0);
+			binder_put_node(node);
 		} break;
 		case BINDER_TYPE_HANDLE:
 		case BINDER_TYPE_WEAK_HANDLE: {
-			struct binder_ref *ref;
+			struct flat_binder_object *fp;
+			struct binder_ref_data rdata;
+			int ret;
 
-			ref = binder_get_ref(proc, fp->handle,
-					     fp->type == BINDER_TYPE_HANDLE);
+			fp = to_flat_binder_object(hdr);
+			ret = binder_dec_ref_for_handle(proc, fp->handle,
+				hdr->type == BINDER_TYPE_HANDLE, &rdata);
 
-			if (ref == NULL) {
-				pr_err("transaction release %d bad handle %d\n",
-				 debug_id, fp->handle);
+			if (ret) {
+				pr_err("transaction release %d bad handle %d, ret = %d\n",
+				 debug_id, fp->handle, ret);
 				break;
 			}
 			binder_debug(BINDER_DEBUG_TRANSACTION,
-				     "        ref %d desc %d (node %d)\n",
-				     ref->debug_id, ref->desc, ref->node->debug_id);
-			binder_dec_ref(ref, fp->type == BINDER_TYPE_HANDLE);
+				     "        ref %d desc %d\n",
+				     rdata.debug_id, rdata.desc);
 		} break;
 
-		case BINDER_TYPE_FD:
-			binder_debug(BINDER_DEBUG_TRANSACTION,
-				     "        fd %d\n", fp->handle);
-			if (failed_at)
-				task_close_fd(proc, fp->handle);
-			break;
+		case BINDER_TYPE_FD: {
+			struct binder_fd_object *fp = to_binder_fd_object(hdr);
 
+			binder_debug(BINDER_DEBUG_TRANSACTION,
+				     "        fd %d\n", fp->fd);
+			if (failed_at)
+				task_close_fd(proc, fp->fd);
+		} break;
+		case BINDER_TYPE_PTR:
+			/*
+			 * Nothing to do here, this will get cleaned up when the
+			 * transaction buffer gets freed
+			 */
+			break;
+		case BINDER_TYPE_FDA: {
+			struct binder_fd_array_object *fda;
+			struct binder_buffer_object *parent;
+			uintptr_t parent_buffer;
+			u32 *fd_array;
+			size_t fd_index;
+			binder_size_t fd_buf_size;
+
+			fda = to_binder_fd_array_object(hdr);
+			parent = binder_validate_ptr(buffer, fda->parent,
+						     off_start,
+						     offp - off_start);
+			if (!parent) {
+				pr_err("transaction release %d bad parent offset",
+				       debug_id);
+				continue;
+			}
+			/*
+			 * Since the parent was already fixed up, convert it
+			 * back to kernel address space to access it
+			 */
+			parent_buffer = parent->buffer -
+				binder_alloc_get_user_buffer_offset(
+						&proc->alloc);
+
+			fd_buf_size = sizeof(u32) * fda->num_fds;
+			if (fda->num_fds >= SIZE_MAX / sizeof(u32)) {
+				pr_err("transaction release %d invalid number of fds (%lld)\n",
+				       debug_id, (u64)fda->num_fds);
+				continue;
+			}
+			if (fd_buf_size > parent->length ||
+			    fda->parent_offset > parent->length - fd_buf_size) {
+				/* No space for all file descriptors here. */
+				pr_err("transaction release %d not enough space for %lld fds in buffer\n",
+				       debug_id, (u64)fda->num_fds);
+				continue;
+			}
+			fd_array = (u32 *)(parent_buffer + fda->parent_offset);
+			for (fd_index = 0; fd_index < fda->num_fds; fd_index++)
+				task_close_fd(proc, fd_array[fd_index]);
+		} break;
 		default:
 			pr_err("transaction release %d bad object type %x\n",
-				debug_id, fp->type);
+				debug_id, hdr->type);
 			break;
 		}
 	}
 }
 
+static int binder_translate_binder(struct flat_binder_object *fp,
+				   struct binder_transaction *t,
+				   struct binder_thread *thread)
+{
+	struct binder_node *node;
+	struct binder_proc *proc = thread->proc;
+	struct binder_proc *target_proc = t->to_proc;
+	struct binder_ref_data rdata;
+	int ret = 0;
+
+	node = binder_get_node(proc, fp->binder);
+	if (!node) {
+		node = binder_new_node(proc, fp);
+		if (!node)
+			return -ENOMEM;
+	}
+	if (fp->cookie != node->cookie) {
+		binder_user_error("%d:%d sending u%016llx node %d, cookie mismatch %016llx != %016llx\n",
+				  proc->pid, thread->pid, (u64)fp->binder,
+				  node->debug_id, (u64)fp->cookie,
+				  (u64)node->cookie);
+		ret = -EINVAL;
+		goto done;
+	}
+	if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) {
+		ret = -EPERM;
+		goto done;
+	}
+
+	ret = binder_inc_ref_for_node(target_proc, node,
+			fp->hdr.type == BINDER_TYPE_BINDER,
+			&thread->todo, &rdata);
+	if (ret)
+		goto done;
+
+	if (fp->hdr.type == BINDER_TYPE_BINDER)
+		fp->hdr.type = BINDER_TYPE_HANDLE;
+	else
+		fp->hdr.type = BINDER_TYPE_WEAK_HANDLE;
+	fp->binder = 0;
+	fp->handle = rdata.desc;
+	fp->cookie = 0;
+
+	trace_binder_transaction_node_to_ref(t, node, &rdata);
+	binder_debug(BINDER_DEBUG_TRANSACTION,
+		     "        node %d u%016llx -> ref %d desc %d\n",
+		     node->debug_id, (u64)node->ptr,
+		     rdata.debug_id, rdata.desc);
+done:
+	binder_put_node(node);
+	return ret;
+}
+
+static int binder_translate_handle(struct flat_binder_object *fp,
+				   struct binder_transaction *t,
+				   struct binder_thread *thread)
+{
+	struct binder_proc *proc = thread->proc;
+	struct binder_proc *target_proc = t->to_proc;
+	struct binder_node *node;
+	struct binder_ref_data src_rdata;
+	int ret = 0;
+
+	node = binder_get_node_from_ref(proc, fp->handle,
+			fp->hdr.type == BINDER_TYPE_HANDLE, &src_rdata);
+	if (!node) {
+		binder_user_error("%d:%d got transaction with invalid handle, %d\n",
+				  proc->pid, thread->pid, fp->handle);
+		return -EINVAL;
+	}
+	if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) {
+		ret = -EPERM;
+		goto done;
+	}
+
+	binder_node_lock(node);
+	if (node->proc == target_proc) {
+		if (fp->hdr.type == BINDER_TYPE_HANDLE)
+			fp->hdr.type = BINDER_TYPE_BINDER;
+		else
+			fp->hdr.type = BINDER_TYPE_WEAK_BINDER;
+		fp->binder = node->ptr;
+		fp->cookie = node->cookie;
+		if (node->proc)
+			binder_inner_proc_lock(node->proc);
+		binder_inc_node_nilocked(node,
+					 fp->hdr.type == BINDER_TYPE_BINDER,
+					 0, NULL);
+		if (node->proc)
+			binder_inner_proc_unlock(node->proc);
+		trace_binder_transaction_ref_to_node(t, node, &src_rdata);
+		binder_debug(BINDER_DEBUG_TRANSACTION,
+			     "        ref %d desc %d -> node %d u%016llx\n",
+			     src_rdata.debug_id, src_rdata.desc, node->debug_id,
+			     (u64)node->ptr);
+		binder_node_unlock(node);
+	} else {
+		struct binder_ref_data dest_rdata;
+
+		binder_node_unlock(node);
+		ret = binder_inc_ref_for_node(target_proc, node,
+				fp->hdr.type == BINDER_TYPE_HANDLE,
+				NULL, &dest_rdata);
+		if (ret)
+			goto done;
+
+		fp->binder = 0;
+		fp->handle = dest_rdata.desc;
+		fp->cookie = 0;
+		trace_binder_transaction_ref_to_ref(t, node, &src_rdata,
+						    &dest_rdata);
+		binder_debug(BINDER_DEBUG_TRANSACTION,
+			     "        ref %d desc %d -> ref %d desc %d (node %d)\n",
+			     src_rdata.debug_id, src_rdata.desc,
+			     dest_rdata.debug_id, dest_rdata.desc,
+			     node->debug_id);
+	}
+done:
+	binder_put_node(node);
+	return ret;
+}
+
+static int binder_translate_fd(int fd,
+			       struct binder_transaction *t,
+			       struct binder_thread *thread,
+			       struct binder_transaction *in_reply_to)
+{
+	struct binder_proc *proc = thread->proc;
+	struct binder_proc *target_proc = t->to_proc;
+	int target_fd;
+	struct file *file;
+	int ret;
+	bool target_allows_fd;
+
+	if (in_reply_to)
+		target_allows_fd = !!(in_reply_to->flags & TF_ACCEPT_FDS);
+	else
+		target_allows_fd = t->buffer->target_node->accept_fds;
+	if (!target_allows_fd) {
+		binder_user_error("%d:%d got %s with fd, %d, but target does not allow fds\n",
+				  proc->pid, thread->pid,
+				  in_reply_to ? "reply" : "transaction",
+				  fd);
+		ret = -EPERM;
+		goto err_fd_not_accepted;
+	}
+
+	file = fget(fd);
+	if (!file) {
+		binder_user_error("%d:%d got transaction with invalid fd, %d\n",
+				  proc->pid, thread->pid, fd);
+		ret = -EBADF;
+		goto err_fget;
+	}
+	ret = security_binder_transfer_file(proc->tsk, target_proc->tsk, file);
+	if (ret < 0) {
+		ret = -EPERM;
+		goto err_security;
+	}
+
+	target_fd = task_get_unused_fd_flags(target_proc, O_CLOEXEC);
+	if (target_fd < 0) {
+		ret = -ENOMEM;
+		goto err_get_unused_fd;
+	}
+	task_fd_install(target_proc, target_fd, file);
+	trace_binder_transaction_fd(t, fd, target_fd);
+	binder_debug(BINDER_DEBUG_TRANSACTION, "        fd %d -> %d\n",
+		     fd, target_fd);
+
+	return target_fd;
+
+err_get_unused_fd:
+err_security:
+	fput(file);
+err_fget:
+err_fd_not_accepted:
+	return ret;
+}
+
+static int binder_translate_fd_array(struct binder_fd_array_object *fda,
+				     struct binder_buffer_object *parent,
+				     struct binder_transaction *t,
+				     struct binder_thread *thread,
+				     struct binder_transaction *in_reply_to)
+{
+	binder_size_t fdi, fd_buf_size, num_installed_fds;
+	int target_fd;
+	uintptr_t parent_buffer;
+	u32 *fd_array;
+	struct binder_proc *proc = thread->proc;
+	struct binder_proc *target_proc = t->to_proc;
+
+	fd_buf_size = sizeof(u32) * fda->num_fds;
+	if (fda->num_fds >= SIZE_MAX / sizeof(u32)) {
+		binder_user_error("%d:%d got transaction with invalid number of fds (%lld)\n",
+				  proc->pid, thread->pid, (u64)fda->num_fds);
+		return -EINVAL;
+	}
+	if (fd_buf_size > parent->length ||
+	    fda->parent_offset > parent->length - fd_buf_size) {
+		/* No space for all file descriptors here. */
+		binder_user_error("%d:%d not enough space to store %lld fds in buffer\n",
+				  proc->pid, thread->pid, (u64)fda->num_fds);
+		return -EINVAL;
+	}
+	/*
+	 * Since the parent was already fixed up, convert it
+	 * back to the kernel address space to access it
+	 */
+	parent_buffer = parent->buffer -
+		binder_alloc_get_user_buffer_offset(&target_proc->alloc);
+	fd_array = (u32 *)(parent_buffer + fda->parent_offset);
+	if (!IS_ALIGNED((unsigned long)fd_array, sizeof(u32))) {
+		binder_user_error("%d:%d parent offset not aligned correctly.\n",
+				  proc->pid, thread->pid);
+		return -EINVAL;
+	}
+	for (fdi = 0; fdi < fda->num_fds; fdi++) {
+		target_fd = binder_translate_fd(fd_array[fdi], t, thread,
+						in_reply_to);
+		if (target_fd < 0)
+			goto err_translate_fd_failed;
+		fd_array[fdi] = target_fd;
+	}
+	return 0;
+
+err_translate_fd_failed:
+	/*
+	 * Failed to allocate fd or security error, free fds
+	 * installed so far.
+	 */
+	num_installed_fds = fdi;
+	for (fdi = 0; fdi < num_installed_fds; fdi++)
+		task_close_fd(target_proc, fd_array[fdi]);
+	return target_fd;
+}
+
+static int binder_fixup_parent(struct binder_transaction *t,
+			       struct binder_thread *thread,
+			       struct binder_buffer_object *bp,
+			       binder_size_t *off_start,
+			       binder_size_t num_valid,
+			       struct binder_buffer_object *last_fixup_obj,
+			       binder_size_t last_fixup_min_off)
+{
+	struct binder_buffer_object *parent;
+	u8 *parent_buffer;
+	struct binder_buffer *b = t->buffer;
+	struct binder_proc *proc = thread->proc;
+	struct binder_proc *target_proc = t->to_proc;
+
+	if (!(bp->flags & BINDER_BUFFER_FLAG_HAS_PARENT))
+		return 0;
+
+	parent = binder_validate_ptr(b, bp->parent, off_start, num_valid);
+	if (!parent) {
+		binder_user_error("%d:%d got transaction with invalid parent offset or type\n",
+				  proc->pid, thread->pid);
+		return -EINVAL;
+	}
+
+	if (!binder_validate_fixup(b, off_start,
+				   parent, bp->parent_offset,
+				   last_fixup_obj,
+				   last_fixup_min_off)) {
+		binder_user_error("%d:%d got transaction with out-of-order buffer fixup\n",
+				  proc->pid, thread->pid);
+		return -EINVAL;
+	}
+
+	if (parent->length < sizeof(binder_uintptr_t) ||
+	    bp->parent_offset > parent->length - sizeof(binder_uintptr_t)) {
+		/* No space for a pointer here! */
+		binder_user_error("%d:%d got transaction with invalid parent offset\n",
+				  proc->pid, thread->pid);
+		return -EINVAL;
+	}
+	parent_buffer = (u8 *)(parent->buffer -
+			binder_alloc_get_user_buffer_offset(
+				&target_proc->alloc));
+	*(binder_uintptr_t *)(parent_buffer + bp->parent_offset) = bp->buffer;
+
+	return 0;
+}
+
+/**
+ * binder_proc_transaction() - sends a transaction to a process and wakes it up
+ * @t:		transaction to send
+ * @proc:	process to send the transaction to
+ * @thread:	thread in @proc to send the transaction to (may be NULL)
+ *
+ * This function queues a transaction to the specified process. It will try
+ * to find a thread in the target process to handle the transaction and
+ * wake it up. If no thread is found, the work is queued to the proc
+ * waitqueue.
+ *
+ * If the @thread parameter is not NULL, the transaction is always queued
+ * to the waitlist of that specific thread.
+ *
+ * Return:	true if the transactions was successfully queued
+ *		false if the target process or thread is dead
+ */
+static bool binder_proc_transaction(struct binder_transaction *t,
+				    struct binder_proc *proc,
+				    struct binder_thread *thread)
+{
+	struct list_head *target_list = NULL;
+	struct binder_node *node = t->buffer->target_node;
+	struct binder_priority node_prio;
+	bool oneway = !!(t->flags & TF_ONE_WAY);
+	bool wakeup = true;
+
+	BUG_ON(!node);
+	binder_node_lock(node);
+	node_prio.prio = node->min_priority;
+	node_prio.sched_policy = node->sched_policy;
+
+	if (oneway) {
+		BUG_ON(thread);
+		if (node->has_async_transaction) {
+			target_list = &node->async_todo;
+			wakeup = false;
+		} else {
+			node->has_async_transaction = 1;
+		}
+	}
+
+	binder_inner_proc_lock(proc);
+
+	if (proc->is_dead || (thread && thread->is_dead)) {
+		binder_inner_proc_unlock(proc);
+		binder_node_unlock(node);
+		return false;
+	}
+
+	if (!thread && !target_list)
+		thread = binder_select_thread_ilocked(proc);
+
+	if (thread) {
+		target_list = &thread->todo;
+		binder_transaction_priority(thread->task, t, node_prio,
+					    node->inherit_rt);
+	} else if (!target_list) {
+		target_list = &proc->todo;
+	} else {
+		BUG_ON(target_list != &node->async_todo);
+	}
+
+	binder_enqueue_work_ilocked(&t->work, target_list);
+
+	if (wakeup)
+		binder_wakeup_thread_ilocked(proc, thread, !oneway /* sync */);
+
+	binder_inner_proc_unlock(proc);
+	binder_node_unlock(node);
+
+	return true;
+}
+
+/**
+ * binder_get_node_refs_for_txn() - Get required refs on node for txn
+ * @node:         struct binder_node for which to get refs
+ * @proc:         returns @node->proc if valid
+ * @error:        if no @proc then returns BR_DEAD_REPLY
+ *
+ * User-space normally keeps the node alive when creating a transaction
+ * since it has a reference to the target. The local strong ref keeps it
+ * alive if the sending process dies before the target process processes
+ * the transaction. If the source process is malicious or has a reference
+ * counting bug, relying on the local strong ref can fail.
+ *
+ * Since user-space can cause the local strong ref to go away, we also take
+ * a tmpref on the node to ensure it survives while we are constructing
+ * the transaction. We also need a tmpref on the proc while we are
+ * constructing the transaction, so we take that here as well.
+ *
+ * Return: The target_node with refs taken or NULL if no @node->proc is NULL.
+ * Also sets @proc if valid. If the @node->proc is NULL indicating that the
+ * target proc has died, @error is set to BR_DEAD_REPLY
+ */
+static struct binder_node *binder_get_node_refs_for_txn(
+		struct binder_node *node,
+		struct binder_proc **procp,
+		uint32_t *error)
+{
+	struct binder_node *target_node = NULL;
+
+	binder_node_inner_lock(node);
+	if (node->proc) {
+		target_node = node;
+		binder_inc_node_nilocked(node, 1, 0, NULL);
+		binder_inc_node_tmpref_ilocked(node);
+		node->proc->tmp_ref++;
+		*procp = node->proc;
+	} else
+		*error = BR_DEAD_REPLY;
+	binder_node_inner_unlock(node);
+
+	return target_node;
+}
+
 static void binder_transaction(struct binder_proc *proc,
 			       struct binder_thread *thread,
-			       struct binder_transaction_data *tr, int reply)
+			       struct binder_transaction_data *tr, int reply,
+			       binder_size_t extra_buffers_size)
 {
+	int ret;
 	struct binder_transaction *t;
 	struct binder_work *tcomplete;
-	binder_size_t *offp, *off_end;
+	binder_size_t *offp, *off_end, *off_start;
 	binder_size_t off_min;
-	struct binder_proc *target_proc;
+	u8 *sg_bufp, *sg_buf_end;
+	struct binder_proc *target_proc = NULL;
 	struct binder_thread *target_thread = NULL;
 	struct binder_node *target_node = NULL;
-	struct list_head *target_list;
-	wait_queue_head_t *target_wait;
 	struct binder_transaction *in_reply_to = NULL;
 	struct binder_transaction_log_entry *e;
-	uint32_t return_error;
+	uint32_t return_error = 0;
+	uint32_t return_error_param = 0;
+	uint32_t return_error_line = 0;
+	struct binder_buffer_object *last_fixup_obj = NULL;
+	binder_size_t last_fixup_min_off = 0;
+	struct binder_context *context = proc->context;
+	int t_debug_id = atomic_inc_return(&binder_last_id);
+	char *secctx = NULL;
+	u32 secctx_sz = 0;
 
 	e = binder_transaction_log_add(&binder_transaction_log);
+	e->debug_id = t_debug_id;
 	e->call_type = reply ? 2 : !!(tr->flags & TF_ONE_WAY);
 	e->from_proc = proc->pid;
 	e->from_thread = thread->pid;
 	e->target_handle = tr->target.handle;
 	e->data_size = tr->data_size;
 	e->offsets_size = tr->offsets_size;
+	e->context_name = proc->context->name;
 
 	if (reply) {
+		binder_inner_proc_lock(proc);
 		in_reply_to = thread->transaction_stack;
 		if (in_reply_to == NULL) {
+			binder_inner_proc_unlock(proc);
 			binder_user_error("%d:%d got reply transaction with no transaction stack\n",
 					  proc->pid, thread->pid);
 			return_error = BR_FAILED_REPLY;
+			return_error_param = -EPROTO;
+			return_error_line = __LINE__;
 			goto err_empty_call_stack;
 		}
-		binder_set_nice(in_reply_to->saved_priority);
 		if (in_reply_to->to_thread != thread) {
+			spin_lock(&in_reply_to->lock);
 			binder_user_error("%d:%d got reply transaction with bad transaction stack, transaction %d has target %d:%d\n",
 				proc->pid, thread->pid, in_reply_to->debug_id,
 				in_reply_to->to_proc ?
 				in_reply_to->to_proc->pid : 0,
 				in_reply_to->to_thread ?
 				in_reply_to->to_thread->pid : 0);
+			spin_unlock(&in_reply_to->lock);
+			binder_inner_proc_unlock(proc);
 			return_error = BR_FAILED_REPLY;
+			return_error_param = -EPROTO;
+			return_error_line = __LINE__;
 			in_reply_to = NULL;
 			goto err_bad_call_stack;
 		}
 		thread->transaction_stack = in_reply_to->to_parent;
-		target_thread = in_reply_to->from;
+		binder_inner_proc_unlock(proc);
+		target_thread = binder_get_txn_from_and_acq_inner(in_reply_to);
 		if (target_thread == NULL) {
 			return_error = BR_DEAD_REPLY;
+			return_error_line = __LINE__;
 			goto err_dead_binder;
 		}
 		if (target_thread->transaction_stack != in_reply_to) {
@@ -1389,106 +2879,148 @@
 				target_thread->transaction_stack ?
 				target_thread->transaction_stack->debug_id : 0,
 				in_reply_to->debug_id);
+			binder_inner_proc_unlock(target_thread->proc);
 			return_error = BR_FAILED_REPLY;
+			return_error_param = -EPROTO;
+			return_error_line = __LINE__;
 			in_reply_to = NULL;
 			target_thread = NULL;
 			goto err_dead_binder;
 		}
 		target_proc = target_thread->proc;
+		target_proc->tmp_ref++;
+		binder_inner_proc_unlock(target_thread->proc);
 	} else {
 		if (tr->target.handle) {
 			struct binder_ref *ref;
 
-			ref = binder_get_ref(proc, tr->target.handle, true);
-			if (ref == NULL) {
+			/*
+			 * There must already be a strong ref
+			 * on this node. If so, do a strong
+			 * increment on the node to ensure it
+			 * stays alive until the transaction is
+			 * done.
+			 */
+			binder_proc_lock(proc);
+			ref = binder_get_ref_olocked(proc, tr->target.handle,
+						     true);
+			if (ref) {
+				target_node = binder_get_node_refs_for_txn(
+						ref->node, &target_proc,
+						&return_error);
+			} else {
 				binder_user_error("%d:%d got transaction to invalid handle\n",
-					proc->pid, thread->pid);
+						  proc->pid, thread->pid);
 				return_error = BR_FAILED_REPLY;
-				goto err_invalid_target_handle;
 			}
-			target_node = ref->node;
+			binder_proc_unlock(proc);
 		} else {
-			target_node = binder_context_mgr_node;
-			if (target_node == NULL) {
+			mutex_lock(&context->context_mgr_node_lock);
+			target_node = context->binder_context_mgr_node;
+			if (target_node)
+				target_node = binder_get_node_refs_for_txn(
+						target_node, &target_proc,
+						&return_error);
+			else
 				return_error = BR_DEAD_REPLY;
-				goto err_no_context_mgr_node;
-			}
+			mutex_unlock(&context->context_mgr_node_lock);
 		}
-		e->to_node = target_node->debug_id;
-		target_proc = target_node->proc;
-		if (target_proc == NULL) {
-			return_error = BR_DEAD_REPLY;
+		if (!target_node) {
+			/*
+			 * return_error is set above
+			 */
+			return_error_param = -EINVAL;
+			return_error_line = __LINE__;
 			goto err_dead_binder;
 		}
+		e->to_node = target_node->debug_id;
 		if (security_binder_transaction(proc->tsk,
 						target_proc->tsk) < 0) {
 			return_error = BR_FAILED_REPLY;
+			return_error_param = -EPERM;
+			return_error_line = __LINE__;
 			goto err_invalid_target_handle;
 		}
+		binder_inner_proc_lock(proc);
 		if (!(tr->flags & TF_ONE_WAY) && thread->transaction_stack) {
 			struct binder_transaction *tmp;
 
 			tmp = thread->transaction_stack;
 			if (tmp->to_thread != thread) {
+				spin_lock(&tmp->lock);
 				binder_user_error("%d:%d got new transaction with bad transaction stack, transaction %d has target %d:%d\n",
 					proc->pid, thread->pid, tmp->debug_id,
 					tmp->to_proc ? tmp->to_proc->pid : 0,
 					tmp->to_thread ?
 					tmp->to_thread->pid : 0);
+				spin_unlock(&tmp->lock);
+				binder_inner_proc_unlock(proc);
 				return_error = BR_FAILED_REPLY;
+				return_error_param = -EPROTO;
+				return_error_line = __LINE__;
 				goto err_bad_call_stack;
 			}
 			while (tmp) {
-				if (tmp->from && tmp->from->proc == target_proc)
-					target_thread = tmp->from;
+				struct binder_thread *from;
+
+				spin_lock(&tmp->lock);
+				from = tmp->from;
+				if (from && from->proc == target_proc) {
+					atomic_inc(&from->tmp_ref);
+					target_thread = from;
+					spin_unlock(&tmp->lock);
+					break;
+				}
+				spin_unlock(&tmp->lock);
 				tmp = tmp->from_parent;
 			}
 		}
+		binder_inner_proc_unlock(proc);
 	}
-	if (target_thread) {
+	if (target_thread)
 		e->to_thread = target_thread->pid;
-		target_list = &target_thread->todo;
-		target_wait = &target_thread->wait;
-	} else {
-		target_list = &target_proc->todo;
-		target_wait = &target_proc->wait;
-	}
 	e->to_proc = target_proc->pid;
 
 	/* TODO: reuse incoming transaction for reply */
 	t = kzalloc(sizeof(*t), GFP_KERNEL);
 	if (t == NULL) {
 		return_error = BR_FAILED_REPLY;
+		return_error_param = -ENOMEM;
+		return_error_line = __LINE__;
 		goto err_alloc_t_failed;
 	}
 	binder_stats_created(BINDER_STAT_TRANSACTION);
+	spin_lock_init(&t->lock);
 
 	tcomplete = kzalloc(sizeof(*tcomplete), GFP_KERNEL);
 	if (tcomplete == NULL) {
 		return_error = BR_FAILED_REPLY;
+		return_error_param = -ENOMEM;
+		return_error_line = __LINE__;
 		goto err_alloc_tcomplete_failed;
 	}
 	binder_stats_created(BINDER_STAT_TRANSACTION_COMPLETE);
 
-	t->debug_id = ++binder_last_id;
-	e->debug_id = t->debug_id;
+	t->debug_id = t_debug_id;
 
 	if (reply)
 		binder_debug(BINDER_DEBUG_TRANSACTION,
-			     "%d:%d BC_REPLY %d -> %d:%d, data %016llx-%016llx size %lld-%lld\n",
+			     "%d:%d BC_REPLY %d -> %d:%d, data %016llx-%016llx size %lld-%lld-%lld\n",
 			     proc->pid, thread->pid, t->debug_id,
 			     target_proc->pid, target_thread->pid,
 			     (u64)tr->data.ptr.buffer,
 			     (u64)tr->data.ptr.offsets,
-			     (u64)tr->data_size, (u64)tr->offsets_size);
+			     (u64)tr->data_size, (u64)tr->offsets_size,
+			     (u64)extra_buffers_size);
 	else
 		binder_debug(BINDER_DEBUG_TRANSACTION,
-			     "%d:%d BC_TRANSACTION %d -> %d - node %d, data %016llx-%016llx size %lld-%lld\n",
+			     "%d:%d BC_TRANSACTION %d -> %d - node %d, data %016llx-%016llx size %lld-%lld-%lld\n",
 			     proc->pid, thread->pid, t->debug_id,
 			     target_proc->pid, target_node->debug_id,
 			     (u64)tr->data.ptr.buffer,
 			     (u64)tr->data.ptr.offsets,
-			     (u64)tr->data_size, (u64)tr->offsets_size);
+			     (u64)tr->data_size, (u64)tr->offsets_size,
+			     (u64)extra_buffers_size);
 
 	if (!reply && !(tr->flags & TF_ONE_WAY))
 		t->from = thread;
@@ -1499,32 +3031,74 @@
 	t->to_thread = target_thread;
 	t->code = tr->code;
 	t->flags = tr->flags;
-	t->priority = task_nice(current);
+	if (!(t->flags & TF_ONE_WAY) &&
+	    binder_supported_policy(current->policy)) {
+		/* Inherit supported policies for synchronous transactions */
+		t->priority.sched_policy = current->policy;
+		t->priority.prio = current->normal_prio;
+	} else {
+		/* Otherwise, fall back to the default priority */
+		t->priority = target_proc->default_priority;
+	}
+
+	if (target_node && target_node->txn_security_ctx) {
+		u32 secid;
+
+		security_task_getsecid(proc->tsk, &secid);
+		ret = security_secid_to_secctx(secid, &secctx, &secctx_sz);
+		if (ret) {
+			return_error = BR_FAILED_REPLY;
+			return_error_param = ret;
+			return_error_line = __LINE__;
+			goto err_get_secctx_failed;
+		}
+		extra_buffers_size += ALIGN(secctx_sz, sizeof(u64));
+	}
 
 	trace_binder_transaction(reply, t, target_node);
 
-	t->buffer = binder_alloc_buf(target_proc, tr->data_size,
-		tr->offsets_size, !reply && (t->flags & TF_ONE_WAY));
-	if (t->buffer == NULL) {
-		return_error = BR_FAILED_REPLY;
+	t->buffer = binder_alloc_new_buf(&target_proc->alloc, tr->data_size,
+		tr->offsets_size, extra_buffers_size,
+		!reply && (t->flags & TF_ONE_WAY));
+	if (IS_ERR(t->buffer)) {
+		/*
+		 * -ESRCH indicates VMA cleared. The target is dying.
+		 */
+		return_error_param = PTR_ERR(t->buffer);
+		return_error = return_error_param == -ESRCH ?
+			BR_DEAD_REPLY : BR_FAILED_REPLY;
+		return_error_line = __LINE__;
+		t->buffer = NULL;
 		goto err_binder_alloc_buf_failed;
 	}
-	t->buffer->allow_user_free = 0;
+	if (secctx) {
+		size_t buf_offset = ALIGN(tr->data_size, sizeof(void *)) +
+				    ALIGN(tr->offsets_size, sizeof(void *)) +
+				    ALIGN(extra_buffers_size, sizeof(void *)) -
+				    ALIGN(secctx_sz, sizeof(u64));
+		char *kptr = t->buffer->data + buf_offset;
+
+		t->security_ctx = (uintptr_t)kptr +
+		    binder_alloc_get_user_buffer_offset(&target_proc->alloc);
+		memcpy(kptr, secctx, secctx_sz);
+		security_release_secctx(secctx, secctx_sz);
+		secctx = NULL;
+	}
 	t->buffer->debug_id = t->debug_id;
 	t->buffer->transaction = t;
 	t->buffer->target_node = target_node;
 	trace_binder_transaction_alloc_buf(t->buffer);
-	if (target_node)
-		binder_inc_node(target_node, 1, 0, NULL);
-
-	offp = (binder_size_t *)(t->buffer->data +
-				 ALIGN(tr->data_size, sizeof(void *)));
+	off_start = (binder_size_t *)(t->buffer->data +
+				      ALIGN(tr->data_size, sizeof(void *)));
+	offp = off_start;
 
 	if (copy_from_user(t->buffer->data, (const void __user *)(uintptr_t)
 			   tr->data.ptr.buffer, tr->data_size)) {
 		binder_user_error("%d:%d got transaction with invalid data ptr\n",
 				proc->pid, thread->pid);
 		return_error = BR_FAILED_REPLY;
+		return_error_param = -EFAULT;
+		return_error_line = __LINE__;
 		goto err_copy_data_failed;
 	}
 	if (copy_from_user(offp, (const void __user *)(uintptr_t)
@@ -1532,232 +3106,248 @@
 		binder_user_error("%d:%d got transaction with invalid offsets ptr\n",
 				proc->pid, thread->pid);
 		return_error = BR_FAILED_REPLY;
+		return_error_param = -EFAULT;
+		return_error_line = __LINE__;
 		goto err_copy_data_failed;
 	}
 	if (!IS_ALIGNED(tr->offsets_size, sizeof(binder_size_t))) {
 		binder_user_error("%d:%d got transaction with invalid offsets size, %lld\n",
 				proc->pid, thread->pid, (u64)tr->offsets_size);
 		return_error = BR_FAILED_REPLY;
+		return_error_param = -EINVAL;
+		return_error_line = __LINE__;
 		goto err_bad_offset;
 	}
-	off_end = (void *)offp + tr->offsets_size;
+	if (!IS_ALIGNED(extra_buffers_size, sizeof(u64))) {
+		binder_user_error("%d:%d got transaction with unaligned buffers size, %lld\n",
+				  proc->pid, thread->pid,
+				  (u64)extra_buffers_size);
+		return_error = BR_FAILED_REPLY;
+		return_error_param = -EINVAL;
+		return_error_line = __LINE__;
+		goto err_bad_offset;
+	}
+	off_end = (void *)off_start + tr->offsets_size;
+	sg_bufp = (u8 *)(PTR_ALIGN(off_end, sizeof(void *)));
+	sg_buf_end = sg_bufp + extra_buffers_size;
 	off_min = 0;
 	for (; offp < off_end; offp++) {
-		struct flat_binder_object *fp;
+		struct binder_object_header *hdr;
+		size_t object_size = binder_validate_object(t->buffer, *offp);
 
-		if (*offp > t->buffer->data_size - sizeof(*fp) ||
-		    *offp < off_min ||
-		    t->buffer->data_size < sizeof(*fp) ||
-		    !IS_ALIGNED(*offp, sizeof(u32))) {
-			binder_user_error("%d:%d got transaction with invalid offset, %lld (min %lld, max %lld)\n",
+		if (object_size == 0 || *offp < off_min) {
+			binder_user_error("%d:%d got transaction with invalid offset (%lld, min %lld max %lld) or object.\n",
 					  proc->pid, thread->pid, (u64)*offp,
 					  (u64)off_min,
-					  (u64)(t->buffer->data_size -
-					  sizeof(*fp)));
+					  (u64)t->buffer->data_size);
 			return_error = BR_FAILED_REPLY;
+			return_error_param = -EINVAL;
+			return_error_line = __LINE__;
 			goto err_bad_offset;
 		}
-		fp = (struct flat_binder_object *)(t->buffer->data + *offp);
-		off_min = *offp + sizeof(struct flat_binder_object);
-		switch (fp->type) {
+
+		hdr = (struct binder_object_header *)(t->buffer->data + *offp);
+		off_min = *offp + object_size;
+		switch (hdr->type) {
 		case BINDER_TYPE_BINDER:
 		case BINDER_TYPE_WEAK_BINDER: {
-			struct binder_ref *ref;
-			struct binder_node *node = binder_get_node(proc, fp->binder);
+			struct flat_binder_object *fp;
 
-			if (node == NULL) {
-				node = binder_new_node(proc, fp->binder, fp->cookie);
-				if (node == NULL) {
-					return_error = BR_FAILED_REPLY;
-					goto err_binder_new_node_failed;
-				}
-				node->min_priority = fp->flags & FLAT_BINDER_FLAG_PRIORITY_MASK;
-				node->accept_fds = !!(fp->flags & FLAT_BINDER_FLAG_ACCEPTS_FDS);
-			}
-			if (fp->cookie != node->cookie) {
-				binder_user_error("%d:%d sending u%016llx node %d, cookie mismatch %016llx != %016llx\n",
-					proc->pid, thread->pid,
-					(u64)fp->binder, node->debug_id,
-					(u64)fp->cookie, (u64)node->cookie);
+			fp = to_flat_binder_object(hdr);
+			ret = binder_translate_binder(fp, t, thread);
+			if (ret < 0) {
 				return_error = BR_FAILED_REPLY;
-				goto err_binder_get_ref_for_node_failed;
+				return_error_param = ret;
+				return_error_line = __LINE__;
+				goto err_translate_failed;
 			}
-			if (security_binder_transfer_binder(proc->tsk,
-							    target_proc->tsk)) {
-				return_error = BR_FAILED_REPLY;
-				goto err_binder_get_ref_for_node_failed;
-			}
-			ref = binder_get_ref_for_node(target_proc, node);
-			if (ref == NULL) {
-				return_error = BR_FAILED_REPLY;
-				goto err_binder_get_ref_for_node_failed;
-			}
-			if (fp->type == BINDER_TYPE_BINDER)
-				fp->type = BINDER_TYPE_HANDLE;
-			else
-				fp->type = BINDER_TYPE_WEAK_HANDLE;
-			fp->binder = 0;
-			fp->handle = ref->desc;
-			fp->cookie = 0;
-			binder_inc_ref(ref, fp->type == BINDER_TYPE_HANDLE,
-				       &thread->todo);
-
-			trace_binder_transaction_node_to_ref(t, node, ref);
-			binder_debug(BINDER_DEBUG_TRANSACTION,
-				     "        node %d u%016llx -> ref %d desc %d\n",
-				     node->debug_id, (u64)node->ptr,
-				     ref->debug_id, ref->desc);
 		} break;
 		case BINDER_TYPE_HANDLE:
 		case BINDER_TYPE_WEAK_HANDLE: {
-			struct binder_ref *ref;
+			struct flat_binder_object *fp;
 
-			ref = binder_get_ref(proc, fp->handle,
-					     fp->type == BINDER_TYPE_HANDLE);
-
-			if (ref == NULL) {
-				binder_user_error("%d:%d got transaction with invalid handle, %d\n",
-						proc->pid,
-						thread->pid, fp->handle);
+			fp = to_flat_binder_object(hdr);
+			ret = binder_translate_handle(fp, t, thread);
+			if (ret < 0) {
 				return_error = BR_FAILED_REPLY;
-				goto err_binder_get_ref_failed;
-			}
-			if (security_binder_transfer_binder(proc->tsk,
-							    target_proc->tsk)) {
-				return_error = BR_FAILED_REPLY;
-				goto err_binder_get_ref_failed;
-			}
-			if (ref->node->proc == target_proc) {
-				if (fp->type == BINDER_TYPE_HANDLE)
-					fp->type = BINDER_TYPE_BINDER;
-				else
-					fp->type = BINDER_TYPE_WEAK_BINDER;
-				fp->binder = ref->node->ptr;
-				fp->cookie = ref->node->cookie;
-				binder_inc_node(ref->node, fp->type == BINDER_TYPE_BINDER, 0, NULL);
-				trace_binder_transaction_ref_to_node(t, ref);
-				binder_debug(BINDER_DEBUG_TRANSACTION,
-					     "        ref %d desc %d -> node %d u%016llx\n",
-					     ref->debug_id, ref->desc, ref->node->debug_id,
-					     (u64)ref->node->ptr);
-			} else {
-				struct binder_ref *new_ref;
-
-				new_ref = binder_get_ref_for_node(target_proc, ref->node);
-				if (new_ref == NULL) {
-					return_error = BR_FAILED_REPLY;
-					goto err_binder_get_ref_for_node_failed;
-				}
-				fp->binder = 0;
-				fp->handle = new_ref->desc;
-				fp->cookie = 0;
-				binder_inc_ref(new_ref, fp->type == BINDER_TYPE_HANDLE, NULL);
-				trace_binder_transaction_ref_to_ref(t, ref,
-								    new_ref);
-				binder_debug(BINDER_DEBUG_TRANSACTION,
-					     "        ref %d desc %d -> ref %d desc %d (node %d)\n",
-					     ref->debug_id, ref->desc, new_ref->debug_id,
-					     new_ref->desc, ref->node->debug_id);
+				return_error_param = ret;
+				return_error_line = __LINE__;
+				goto err_translate_failed;
 			}
 		} break;
 
 		case BINDER_TYPE_FD: {
-			int target_fd;
-			struct file *file;
+			struct binder_fd_object *fp = to_binder_fd_object(hdr);
+			int target_fd = binder_translate_fd(fp->fd, t, thread,
+							    in_reply_to);
 
-			if (reply) {
-				if (!(in_reply_to->flags & TF_ACCEPT_FDS)) {
-					binder_user_error("%d:%d got reply with fd, %d, but target does not allow fds\n",
-						proc->pid, thread->pid, fp->handle);
-					return_error = BR_FAILED_REPLY;
-					goto err_fd_not_allowed;
-				}
-			} else if (!target_node->accept_fds) {
-				binder_user_error("%d:%d got transaction with fd, %d, but target does not allow fds\n",
-					proc->pid, thread->pid, fp->handle);
-				return_error = BR_FAILED_REPLY;
-				goto err_fd_not_allowed;
-			}
-
-			file = fget(fp->handle);
-			if (file == NULL) {
-				binder_user_error("%d:%d got transaction with invalid fd, %d\n",
-					proc->pid, thread->pid, fp->handle);
-				return_error = BR_FAILED_REPLY;
-				goto err_fget_failed;
-			}
-			if (security_binder_transfer_file(proc->tsk,
-							  target_proc->tsk,
-							  file) < 0) {
-				fput(file);
-				return_error = BR_FAILED_REPLY;
-				goto err_get_unused_fd_failed;
-			}
-			target_fd = task_get_unused_fd_flags(target_proc, O_CLOEXEC);
 			if (target_fd < 0) {
-				fput(file);
 				return_error = BR_FAILED_REPLY;
-				goto err_get_unused_fd_failed;
+				return_error_param = target_fd;
+				return_error_line = __LINE__;
+				goto err_translate_failed;
 			}
-			task_fd_install(target_proc, target_fd, file);
-			trace_binder_transaction_fd(t, fp->handle, target_fd);
-			binder_debug(BINDER_DEBUG_TRANSACTION,
-				     "        fd %d -> %d\n", fp->handle, target_fd);
-			/* TODO: fput? */
-			fp->binder = 0;
-			fp->handle = target_fd;
+			fp->pad_binder = 0;
+			fp->fd = target_fd;
 		} break;
+		case BINDER_TYPE_FDA: {
+			struct binder_fd_array_object *fda =
+				to_binder_fd_array_object(hdr);
+			struct binder_buffer_object *parent =
+				binder_validate_ptr(t->buffer, fda->parent,
+						    off_start,
+						    offp - off_start);
+			if (!parent) {
+				binder_user_error("%d:%d got transaction with invalid parent offset or type\n",
+						  proc->pid, thread->pid);
+				return_error = BR_FAILED_REPLY;
+				return_error_param = -EINVAL;
+				return_error_line = __LINE__;
+				goto err_bad_parent;
+			}
+			if (!binder_validate_fixup(t->buffer, off_start,
+						   parent, fda->parent_offset,
+						   last_fixup_obj,
+						   last_fixup_min_off)) {
+				binder_user_error("%d:%d got transaction with out-of-order buffer fixup\n",
+						  proc->pid, thread->pid);
+				return_error = BR_FAILED_REPLY;
+				return_error_param = -EINVAL;
+				return_error_line = __LINE__;
+				goto err_bad_parent;
+			}
+			ret = binder_translate_fd_array(fda, parent, t, thread,
+							in_reply_to);
+			if (ret < 0) {
+				return_error = BR_FAILED_REPLY;
+				return_error_param = ret;
+				return_error_line = __LINE__;
+				goto err_translate_failed;
+			}
+			last_fixup_obj = parent;
+			last_fixup_min_off =
+				fda->parent_offset + sizeof(u32) * fda->num_fds;
+		} break;
+		case BINDER_TYPE_PTR: {
+			struct binder_buffer_object *bp =
+				to_binder_buffer_object(hdr);
+			size_t buf_left = sg_buf_end - sg_bufp;
 
+			if (bp->length > buf_left) {
+				binder_user_error("%d:%d got transaction with too large buffer\n",
+						  proc->pid, thread->pid);
+				return_error = BR_FAILED_REPLY;
+				return_error_param = -EINVAL;
+				return_error_line = __LINE__;
+				goto err_bad_offset;
+			}
+			if (copy_from_user(sg_bufp,
+					   (const void __user *)(uintptr_t)
+					   bp->buffer, bp->length)) {
+				binder_user_error("%d:%d got transaction with invalid offsets ptr\n",
+						  proc->pid, thread->pid);
+				return_error_param = -EFAULT;
+				return_error = BR_FAILED_REPLY;
+				return_error_line = __LINE__;
+				goto err_copy_data_failed;
+			}
+			/* Fixup buffer pointer to target proc address space */
+			bp->buffer = (uintptr_t)sg_bufp +
+				binder_alloc_get_user_buffer_offset(
+						&target_proc->alloc);
+			sg_bufp += ALIGN(bp->length, sizeof(u64));
+
+			ret = binder_fixup_parent(t, thread, bp, off_start,
+						  offp - off_start,
+						  last_fixup_obj,
+						  last_fixup_min_off);
+			if (ret < 0) {
+				return_error = BR_FAILED_REPLY;
+				return_error_param = ret;
+				return_error_line = __LINE__;
+				goto err_translate_failed;
+			}
+			last_fixup_obj = bp;
+			last_fixup_min_off = 0;
+		} break;
 		default:
 			binder_user_error("%d:%d got transaction with invalid object type, %x\n",
-				proc->pid, thread->pid, fp->type);
+				proc->pid, thread->pid, hdr->type);
 			return_error = BR_FAILED_REPLY;
+			return_error_param = -EINVAL;
+			return_error_line = __LINE__;
 			goto err_bad_object_type;
 		}
 	}
+	tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE;
+	binder_enqueue_work(proc, tcomplete, &thread->todo);
+	t->work.type = BINDER_WORK_TRANSACTION;
+
 	if (reply) {
+		binder_inner_proc_lock(target_proc);
+		if (target_thread->is_dead) {
+			binder_inner_proc_unlock(target_proc);
+			goto err_dead_proc_or_thread;
+		}
 		BUG_ON(t->buffer->async_transaction != 0);
-		binder_pop_transaction(target_thread, in_reply_to);
+		binder_pop_transaction_ilocked(target_thread, in_reply_to);
+		binder_enqueue_work_ilocked(&t->work, &target_thread->todo);
+		binder_inner_proc_unlock(target_proc);
+		wake_up_interruptible_sync(&target_thread->wait);
+		binder_restore_priority(current, in_reply_to->saved_priority);
+		binder_free_transaction(in_reply_to);
 	} else if (!(t->flags & TF_ONE_WAY)) {
 		BUG_ON(t->buffer->async_transaction != 0);
+		binder_inner_proc_lock(proc);
 		t->need_reply = 1;
 		t->from_parent = thread->transaction_stack;
 		thread->transaction_stack = t;
+		binder_inner_proc_unlock(proc);
+		if (!binder_proc_transaction(t, target_proc, target_thread)) {
+			binder_inner_proc_lock(proc);
+			binder_pop_transaction_ilocked(thread, t);
+			binder_inner_proc_unlock(proc);
+			goto err_dead_proc_or_thread;
+		}
 	} else {
 		BUG_ON(target_node == NULL);
 		BUG_ON(t->buffer->async_transaction != 1);
-		if (target_node->has_async_transaction) {
-			target_list = &target_node->async_todo;
-			target_wait = NULL;
-		} else
-			target_node->has_async_transaction = 1;
+		if (!binder_proc_transaction(t, target_proc, NULL))
+			goto err_dead_proc_or_thread;
 	}
-	t->work.type = BINDER_WORK_TRANSACTION;
-	list_add_tail(&t->work.entry, target_list);
-	tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE;
-	list_add_tail(&tcomplete->entry, &thread->todo);
-	if (target_wait) {
-		if (reply || !(t->flags & TF_ONE_WAY))
-			wake_up_interruptible_sync(target_wait);
-		else
-			wake_up_interruptible(target_wait);
-	}
+	if (target_thread)
+		binder_thread_dec_tmpref(target_thread);
+	binder_proc_dec_tmpref(target_proc);
+	if (target_node)
+		binder_dec_node_tmpref(target_node);
+	/*
+	 * write barrier to synchronize with initialization
+	 * of log entry
+	 */
+	smp_wmb();
+	WRITE_ONCE(e->debug_id_done, t_debug_id);
 	return;
 
-err_get_unused_fd_failed:
-err_fget_failed:
-err_fd_not_allowed:
-err_binder_get_ref_for_node_failed:
-err_binder_get_ref_failed:
-err_binder_new_node_failed:
+err_dead_proc_or_thread:
+	return_error = BR_DEAD_REPLY;
+	return_error_line = __LINE__;
+	binder_dequeue_work(proc, tcomplete);
+err_translate_failed:
 err_bad_object_type:
 err_bad_offset:
+err_bad_parent:
 err_copy_data_failed:
 	trace_binder_transaction_failed_buffer_release(t->buffer);
 	binder_transaction_buffer_release(target_proc, t->buffer, offp);
+	if (target_node)
+		binder_dec_node_tmpref(target_node);
+	target_node = NULL;
 	t->buffer->transaction = NULL;
-	binder_free_buf(target_proc, t->buffer);
+	binder_alloc_free_buf(&target_proc->alloc, t->buffer);
 err_binder_alloc_buf_failed:
+	if (secctx)
+		security_release_secctx(secctx, secctx_sz);
+err_get_secctx_failed:
 	kfree(tcomplete);
 	binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE);
 err_alloc_tcomplete_failed:
@@ -1768,25 +3358,52 @@
 err_empty_call_stack:
 err_dead_binder:
 err_invalid_target_handle:
-err_no_context_mgr_node:
+	if (target_thread)
+		binder_thread_dec_tmpref(target_thread);
+	if (target_proc)
+		binder_proc_dec_tmpref(target_proc);
+	if (target_node) {
+		binder_dec_node(target_node, 1, 0);
+		binder_dec_node_tmpref(target_node);
+	}
+
 	binder_debug(BINDER_DEBUG_FAILED_TRANSACTION,
-		     "%d:%d transaction failed %d, size %lld-%lld\n",
-		     proc->pid, thread->pid, return_error,
-		     (u64)tr->data_size, (u64)tr->offsets_size);
+		     "%d:%d transaction failed %d/%d, size %lld-%lld line %d\n",
+		     proc->pid, thread->pid, return_error, return_error_param,
+		     (u64)tr->data_size, (u64)tr->offsets_size,
+		     return_error_line);
 
 	{
 		struct binder_transaction_log_entry *fe;
 
+		e->return_error = return_error;
+		e->return_error_param = return_error_param;
+		e->return_error_line = return_error_line;
 		fe = binder_transaction_log_add(&binder_transaction_log_failed);
 		*fe = *e;
+		/*
+		 * write barrier to synchronize with initialization
+		 * of log entry
+		 */
+		smp_wmb();
+		WRITE_ONCE(e->debug_id_done, t_debug_id);
+		WRITE_ONCE(fe->debug_id_done, t_debug_id);
 	}
 
-	BUG_ON(thread->return_error != BR_OK);
+	BUG_ON(thread->return_error.cmd != BR_OK);
 	if (in_reply_to) {
-		thread->return_error = BR_TRANSACTION_COMPLETE;
+		binder_restore_priority(current, in_reply_to->saved_priority);
+		thread->return_error.cmd = BR_TRANSACTION_COMPLETE;
+		binder_enqueue_work(thread->proc,
+				    &thread->return_error.work,
+				    &thread->todo);
 		binder_send_failed_reply(in_reply_to, return_error);
-	} else
-		thread->return_error = return_error;
+	} else {
+		thread->return_error.cmd = return_error;
+		binder_enqueue_work(thread->proc,
+				    &thread->return_error.work,
+				    &thread->todo);
+	}
 }
 
 static int binder_thread_write(struct binder_proc *proc,
@@ -1795,19 +3412,22 @@
 			binder_size_t *consumed)
 {
 	uint32_t cmd;
+	struct binder_context *context = proc->context;
 	void __user *buffer = (void __user *)(uintptr_t)binder_buffer;
 	void __user *ptr = buffer + *consumed;
 	void __user *end = buffer + size;
 
-	while (ptr < end && thread->return_error == BR_OK) {
+	while (ptr < end && thread->return_error.cmd == BR_OK) {
+		int ret;
+
 		if (get_user(cmd, (uint32_t __user *)ptr))
 			return -EFAULT;
 		ptr += sizeof(uint32_t);
 		trace_binder_command(cmd);
 		if (_IOC_NR(cmd) < ARRAY_SIZE(binder_stats.bc)) {
-			binder_stats.bc[_IOC_NR(cmd)]++;
-			proc->stats.bc[_IOC_NR(cmd)]++;
-			thread->stats.bc[_IOC_NR(cmd)]++;
+			atomic_inc(&binder_stats.bc[_IOC_NR(cmd)]);
+			atomic_inc(&proc->stats.bc[_IOC_NR(cmd)]);
+			atomic_inc(&thread->stats.bc[_IOC_NR(cmd)]);
 		}
 		switch (cmd) {
 		case BC_INCREFS:
@@ -1815,53 +3435,61 @@
 		case BC_RELEASE:
 		case BC_DECREFS: {
 			uint32_t target;
-			struct binder_ref *ref;
 			const char *debug_string;
+			bool strong = cmd == BC_ACQUIRE || cmd == BC_RELEASE;
+			bool increment = cmd == BC_INCREFS || cmd == BC_ACQUIRE;
+			struct binder_ref_data rdata;
 
 			if (get_user(target, (uint32_t __user *)ptr))
 				return -EFAULT;
+
 			ptr += sizeof(uint32_t);
-			if (target == 0 && binder_context_mgr_node &&
-			    (cmd == BC_INCREFS || cmd == BC_ACQUIRE)) {
-				ref = binder_get_ref_for_node(proc,
-					       binder_context_mgr_node);
-				if (ref->desc != target) {
-					binder_user_error("%d:%d tried to acquire reference to desc 0, got %d instead\n",
-						proc->pid, thread->pid,
-						ref->desc);
-				}
-			} else
-				ref = binder_get_ref(proc, target,
-						     cmd == BC_ACQUIRE ||
-						     cmd == BC_RELEASE);
-			if (ref == NULL) {
-				binder_user_error("%d:%d refcount change on invalid ref %d\n",
-					proc->pid, thread->pid, target);
-				break;
+			ret = -1;
+			if (increment && !target) {
+				struct binder_node *ctx_mgr_node;
+				mutex_lock(&context->context_mgr_node_lock);
+				ctx_mgr_node = context->binder_context_mgr_node;
+				if (ctx_mgr_node)
+					ret = binder_inc_ref_for_node(
+							proc, ctx_mgr_node,
+							strong, NULL, &rdata);
+				mutex_unlock(&context->context_mgr_node_lock);
+			}
+			if (ret)
+				ret = binder_update_ref_for_handle(
+						proc, target, increment, strong,
+						&rdata);
+			if (!ret && rdata.desc != target) {
+				binder_user_error("%d:%d tried to acquire reference to desc %d, got %d instead\n",
+					proc->pid, thread->pid,
+					target, rdata.desc);
 			}
 			switch (cmd) {
 			case BC_INCREFS:
 				debug_string = "IncRefs";
-				binder_inc_ref(ref, 0, NULL);
 				break;
 			case BC_ACQUIRE:
 				debug_string = "Acquire";
-				binder_inc_ref(ref, 1, NULL);
 				break;
 			case BC_RELEASE:
 				debug_string = "Release";
-				binder_dec_ref(ref, 1);
 				break;
 			case BC_DECREFS:
 			default:
 				debug_string = "DecRefs";
-				binder_dec_ref(ref, 0);
+				break;
+			}
+			if (ret) {
+				binder_user_error("%d:%d %s %d refcount change on invalid ref %d ret %d\n",
+					proc->pid, thread->pid, debug_string,
+					strong, target, ret);
 				break;
 			}
 			binder_debug(BINDER_DEBUG_USER_REFS,
-				     "%d:%d %s ref %d desc %d s %d w %d for node %d\n",
-				     proc->pid, thread->pid, debug_string, ref->debug_id,
-				     ref->desc, ref->strong, ref->weak, ref->node->debug_id);
+				     "%d:%d %s ref %d desc %d s %d w %d\n",
+				     proc->pid, thread->pid, debug_string,
+				     rdata.debug_id, rdata.desc, rdata.strong,
+				     rdata.weak);
 			break;
 		}
 		case BC_INCREFS_DONE:
@@ -1869,6 +3497,7 @@
 			binder_uintptr_t node_ptr;
 			binder_uintptr_t cookie;
 			struct binder_node *node;
+			bool free_node;
 
 			if (get_user(node_ptr, (binder_uintptr_t __user *)ptr))
 				return -EFAULT;
@@ -1893,13 +3522,17 @@
 					"BC_INCREFS_DONE" : "BC_ACQUIRE_DONE",
 					(u64)node_ptr, node->debug_id,
 					(u64)cookie, (u64)node->cookie);
+				binder_put_node(node);
 				break;
 			}
+			binder_node_inner_lock(node);
 			if (cmd == BC_ACQUIRE_DONE) {
 				if (node->pending_strong_ref == 0) {
 					binder_user_error("%d:%d BC_ACQUIRE_DONE node %d has no pending acquire request\n",
 						proc->pid, thread->pid,
 						node->debug_id);
+					binder_node_inner_unlock(node);
+					binder_put_node(node);
 					break;
 				}
 				node->pending_strong_ref = 0;
@@ -1908,16 +3541,23 @@
 					binder_user_error("%d:%d BC_INCREFS_DONE node %d has no pending increfs request\n",
 						proc->pid, thread->pid,
 						node->debug_id);
+					binder_node_inner_unlock(node);
+					binder_put_node(node);
 					break;
 				}
 				node->pending_weak_ref = 0;
 			}
-			binder_dec_node(node, cmd == BC_ACQUIRE_DONE, 0);
+			free_node = binder_dec_node_nilocked(node,
+					cmd == BC_ACQUIRE_DONE, 0);
+			WARN_ON(free_node);
 			binder_debug(BINDER_DEBUG_USER_REFS,
-				     "%d:%d %s node %d ls %d lw %d\n",
+				     "%d:%d %s node %d ls %d lw %d tr %d\n",
 				     proc->pid, thread->pid,
 				     cmd == BC_INCREFS_DONE ? "BC_INCREFS_DONE" : "BC_ACQUIRE_DONE",
-				     node->debug_id, node->local_strong_refs, node->local_weak_refs);
+				     node->debug_id, node->local_strong_refs,
+				     node->local_weak_refs, node->tmp_refs);
+			binder_node_inner_unlock(node);
+			binder_put_node(node);
 			break;
 		}
 		case BC_ATTEMPT_ACQUIRE:
@@ -1935,15 +3575,20 @@
 				return -EFAULT;
 			ptr += sizeof(binder_uintptr_t);
 
-			buffer = binder_buffer_lookup(proc, data_ptr);
-			if (buffer == NULL) {
-				binder_user_error("%d:%d BC_FREE_BUFFER u%016llx no match\n",
-					proc->pid, thread->pid, (u64)data_ptr);
-				break;
-			}
-			if (!buffer->allow_user_free) {
-				binder_user_error("%d:%d BC_FREE_BUFFER u%016llx matched unreturned buffer\n",
-					proc->pid, thread->pid, (u64)data_ptr);
+			buffer = binder_alloc_prepare_to_free(&proc->alloc,
+							      data_ptr);
+			if (IS_ERR_OR_NULL(buffer)) {
+				if (PTR_ERR(buffer) == -EPERM) {
+					binder_user_error(
+						"%d:%d BC_FREE_BUFFER u%016llx matched unreturned or currently freeing buffer\n",
+						proc->pid, thread->pid,
+						(u64)data_ptr);
+				} else {
+					binder_user_error(
+						"%d:%d BC_FREE_BUFFER u%016llx no match\n",
+						proc->pid, thread->pid,
+						(u64)data_ptr);
+				}
 				break;
 			}
 			binder_debug(BINDER_DEBUG_FREE_BUFFER,
@@ -1957,18 +3602,41 @@
 				buffer->transaction = NULL;
 			}
 			if (buffer->async_transaction && buffer->target_node) {
-				BUG_ON(!buffer->target_node->has_async_transaction);
-				if (list_empty(&buffer->target_node->async_todo))
-					buffer->target_node->has_async_transaction = 0;
-				else
-					list_move_tail(buffer->target_node->async_todo.next, &thread->todo);
+				struct binder_node *buf_node;
+				struct binder_work *w;
+
+				buf_node = buffer->target_node;
+				binder_node_inner_lock(buf_node);
+				BUG_ON(!buf_node->has_async_transaction);
+				BUG_ON(buf_node->proc != proc);
+				w = binder_dequeue_work_head_ilocked(
+						&buf_node->async_todo);
+				if (!w) {
+					buf_node->has_async_transaction = 0;
+				} else {
+					binder_enqueue_work_ilocked(
+							w, &proc->todo);
+					binder_wakeup_proc_ilocked(proc);
+				}
+				binder_node_inner_unlock(buf_node);
 			}
 			trace_binder_transaction_buffer_release(buffer);
 			binder_transaction_buffer_release(proc, buffer, NULL);
-			binder_free_buf(proc, buffer);
+			binder_alloc_free_buf(&proc->alloc, buffer);
 			break;
 		}
 
+		case BC_TRANSACTION_SG:
+		case BC_REPLY_SG: {
+			struct binder_transaction_data_sg tr;
+
+			if (copy_from_user(&tr, ptr, sizeof(tr)))
+				return -EFAULT;
+			ptr += sizeof(tr);
+			binder_transaction(proc, thread, &tr.transaction_data,
+					   cmd == BC_REPLY_SG, tr.buffers_size);
+			break;
+		}
 		case BC_TRANSACTION:
 		case BC_REPLY: {
 			struct binder_transaction_data tr;
@@ -1976,7 +3644,8 @@
 			if (copy_from_user(&tr, ptr, sizeof(tr)))
 				return -EFAULT;
 			ptr += sizeof(tr);
-			binder_transaction(proc, thread, &tr, cmd == BC_REPLY);
+			binder_transaction(proc, thread, &tr,
+					   cmd == BC_REPLY, 0);
 			break;
 		}
 
@@ -1984,6 +3653,7 @@
 			binder_debug(BINDER_DEBUG_THREADS,
 				     "%d:%d BC_REGISTER_LOOPER\n",
 				     proc->pid, thread->pid);
+			binder_inner_proc_lock(proc);
 			if (thread->looper & BINDER_LOOPER_STATE_ENTERED) {
 				thread->looper |= BINDER_LOOPER_STATE_INVALID;
 				binder_user_error("%d:%d ERROR: BC_REGISTER_LOOPER called after BC_ENTER_LOOPER\n",
@@ -1997,6 +3667,7 @@
 				proc->requested_threads_started++;
 			}
 			thread->looper |= BINDER_LOOPER_STATE_REGISTERED;
+			binder_inner_proc_unlock(proc);
 			break;
 		case BC_ENTER_LOOPER:
 			binder_debug(BINDER_DEBUG_THREADS,
@@ -2021,7 +3692,7 @@
 			uint32_t target;
 			binder_uintptr_t cookie;
 			struct binder_ref *ref;
-			struct binder_ref_death *death;
+			struct binder_ref_death *death = NULL;
 
 			if (get_user(target, (uint32_t __user *)ptr))
 				return -EFAULT;
@@ -2029,7 +3700,29 @@
 			if (get_user(cookie, (binder_uintptr_t __user *)ptr))
 				return -EFAULT;
 			ptr += sizeof(binder_uintptr_t);
-			ref = binder_get_ref(proc, target, false);
+			if (cmd == BC_REQUEST_DEATH_NOTIFICATION) {
+				/*
+				 * Allocate memory for death notification
+				 * before taking lock
+				 */
+				death = kzalloc(sizeof(*death), GFP_KERNEL);
+				if (death == NULL) {
+					WARN_ON(thread->return_error.cmd !=
+						BR_OK);
+					thread->return_error.cmd = BR_ERROR;
+					binder_enqueue_work(
+						thread->proc,
+						&thread->return_error.work,
+						&thread->todo);
+					binder_debug(
+						BINDER_DEBUG_FAILED_TRANSACTION,
+						"%d:%d BC_REQUEST_DEATH_NOTIFICATION failed\n",
+						proc->pid, thread->pid);
+					break;
+				}
+			}
+			binder_proc_lock(proc);
+			ref = binder_get_ref_olocked(proc, target, false);
 			if (ref == NULL) {
 				binder_user_error("%d:%d %s invalid ref %d\n",
 					proc->pid, thread->pid,
@@ -2037,6 +3730,8 @@
 					"BC_REQUEST_DEATH_NOTIFICATION" :
 					"BC_CLEAR_DEATH_NOTIFICATION",
 					target);
+				binder_proc_unlock(proc);
+				kfree(death);
 				break;
 			}
 
@@ -2046,21 +3741,18 @@
 				     cmd == BC_REQUEST_DEATH_NOTIFICATION ?
 				     "BC_REQUEST_DEATH_NOTIFICATION" :
 				     "BC_CLEAR_DEATH_NOTIFICATION",
-				     (u64)cookie, ref->debug_id, ref->desc,
-				     ref->strong, ref->weak, ref->node->debug_id);
+				     (u64)cookie, ref->data.debug_id,
+				     ref->data.desc, ref->data.strong,
+				     ref->data.weak, ref->node->debug_id);
 
+			binder_node_lock(ref->node);
 			if (cmd == BC_REQUEST_DEATH_NOTIFICATION) {
 				if (ref->death) {
 					binder_user_error("%d:%d BC_REQUEST_DEATH_NOTIFICATION death notification already set\n",
 						proc->pid, thread->pid);
-					break;
-				}
-				death = kzalloc(sizeof(*death), GFP_KERNEL);
-				if (death == NULL) {
-					thread->return_error = BR_ERROR;
-					binder_debug(BINDER_DEBUG_FAILED_TRANSACTION,
-						     "%d:%d BC_REQUEST_DEATH_NOTIFICATION failed\n",
-						     proc->pid, thread->pid);
+					binder_node_unlock(ref->node);
+					binder_proc_unlock(proc);
+					kfree(death);
 					break;
 				}
 				binder_stats_created(BINDER_STAT_DEATH);
@@ -2069,17 +3761,19 @@
 				ref->death = death;
 				if (ref->node->proc == NULL) {
 					ref->death->work.type = BINDER_WORK_DEAD_BINDER;
-					if (thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED)) {
-						list_add_tail(&ref->death->work.entry, &thread->todo);
-					} else {
-						list_add_tail(&ref->death->work.entry, &proc->todo);
-						wake_up_interruptible(&proc->wait);
-					}
+
+					binder_inner_proc_lock(proc);
+					binder_enqueue_work_ilocked(
+						&ref->death->work, &proc->todo);
+					binder_wakeup_proc_ilocked(proc);
+					binder_inner_proc_unlock(proc);
 				}
 			} else {
 				if (ref->death == NULL) {
 					binder_user_error("%d:%d BC_CLEAR_DEATH_NOTIFICATION death notification not active\n",
 						proc->pid, thread->pid);
+					binder_node_unlock(ref->node);
+					binder_proc_unlock(proc);
 					break;
 				}
 				death = ref->death;
@@ -2088,22 +3782,35 @@
 						proc->pid, thread->pid,
 						(u64)death->cookie,
 						(u64)cookie);
+					binder_node_unlock(ref->node);
+					binder_proc_unlock(proc);
 					break;
 				}
 				ref->death = NULL;
+				binder_inner_proc_lock(proc);
 				if (list_empty(&death->work.entry)) {
 					death->work.type = BINDER_WORK_CLEAR_DEATH_NOTIFICATION;
-					if (thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED)) {
-						list_add_tail(&death->work.entry, &thread->todo);
-					} else {
-						list_add_tail(&death->work.entry, &proc->todo);
-						wake_up_interruptible(&proc->wait);
+					if (thread->looper &
+					    (BINDER_LOOPER_STATE_REGISTERED |
+					     BINDER_LOOPER_STATE_ENTERED))
+						binder_enqueue_work_ilocked(
+								&death->work,
+								&thread->todo);
+					else {
+						binder_enqueue_work_ilocked(
+								&death->work,
+								&proc->todo);
+						binder_wakeup_proc_ilocked(
+								proc);
 					}
 				} else {
 					BUG_ON(death->work.type != BINDER_WORK_DEAD_BINDER);
 					death->work.type = BINDER_WORK_DEAD_BINDER_AND_CLEAR;
 				}
+				binder_inner_proc_unlock(proc);
 			}
+			binder_node_unlock(ref->node);
+			binder_proc_unlock(proc);
 		} break;
 		case BC_DEAD_BINDER_DONE: {
 			struct binder_work *w;
@@ -2114,8 +3821,13 @@
 				return -EFAULT;
 
 			ptr += sizeof(cookie);
-			list_for_each_entry(w, &proc->delivered_death, entry) {
-				struct binder_ref_death *tmp_death = container_of(w, struct binder_ref_death, work);
+			binder_inner_proc_lock(proc);
+			list_for_each_entry(w, &proc->delivered_death,
+					    entry) {
+				struct binder_ref_death *tmp_death =
+					container_of(w,
+						     struct binder_ref_death,
+						     work);
 
 				if (tmp_death->cookie == cookie) {
 					death = tmp_death;
@@ -2129,19 +3841,25 @@
 			if (death == NULL) {
 				binder_user_error("%d:%d BC_DEAD_BINDER_DONE %016llx not found\n",
 					proc->pid, thread->pid, (u64)cookie);
+				binder_inner_proc_unlock(proc);
 				break;
 			}
-
-			list_del_init(&death->work.entry);
+			binder_dequeue_work_ilocked(&death->work);
 			if (death->work.type == BINDER_WORK_DEAD_BINDER_AND_CLEAR) {
 				death->work.type = BINDER_WORK_CLEAR_DEATH_NOTIFICATION;
-				if (thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED)) {
-					list_add_tail(&death->work.entry, &thread->todo);
-				} else {
-					list_add_tail(&death->work.entry, &proc->todo);
-					wake_up_interruptible(&proc->wait);
+				if (thread->looper &
+					(BINDER_LOOPER_STATE_REGISTERED |
+					 BINDER_LOOPER_STATE_ENTERED))
+					binder_enqueue_work_ilocked(
+						&death->work, &thread->todo);
+				else {
+					binder_enqueue_work_ilocked(
+							&death->work,
+							&proc->todo);
+					binder_wakeup_proc_ilocked(proc);
 				}
 			}
+			binder_inner_proc_unlock(proc);
 		} break;
 
 		default:
@@ -2159,23 +3877,73 @@
 {
 	trace_binder_return(cmd);
 	if (_IOC_NR(cmd) < ARRAY_SIZE(binder_stats.br)) {
-		binder_stats.br[_IOC_NR(cmd)]++;
-		proc->stats.br[_IOC_NR(cmd)]++;
-		thread->stats.br[_IOC_NR(cmd)]++;
+		atomic_inc(&binder_stats.br[_IOC_NR(cmd)]);
+		atomic_inc(&proc->stats.br[_IOC_NR(cmd)]);
+		atomic_inc(&thread->stats.br[_IOC_NR(cmd)]);
 	}
 }
 
-static int binder_has_proc_work(struct binder_proc *proc,
-				struct binder_thread *thread)
+static int binder_put_node_cmd(struct binder_proc *proc,
+			       struct binder_thread *thread,
+			       void __user **ptrp,
+			       binder_uintptr_t node_ptr,
+			       binder_uintptr_t node_cookie,
+			       int node_debug_id,
+			       uint32_t cmd, const char *cmd_name)
 {
-	return !list_empty(&proc->todo) ||
-		(thread->looper & BINDER_LOOPER_STATE_NEED_RETURN);
+	void __user *ptr = *ptrp;
+
+	if (put_user(cmd, (uint32_t __user *)ptr))
+		return -EFAULT;
+	ptr += sizeof(uint32_t);
+
+	if (put_user(node_ptr, (binder_uintptr_t __user *)ptr))
+		return -EFAULT;
+	ptr += sizeof(binder_uintptr_t);
+
+	if (put_user(node_cookie, (binder_uintptr_t __user *)ptr))
+		return -EFAULT;
+	ptr += sizeof(binder_uintptr_t);
+
+	binder_stat_br(proc, thread, cmd);
+	binder_debug(BINDER_DEBUG_USER_REFS, "%d:%d %s %d u%016llx c%016llx\n",
+		     proc->pid, thread->pid, cmd_name, node_debug_id,
+		     (u64)node_ptr, (u64)node_cookie);
+
+	*ptrp = ptr;
+	return 0;
 }
 
-static int binder_has_thread_work(struct binder_thread *thread)
+static int binder_wait_for_work(struct binder_thread *thread,
+				bool do_proc_work)
 {
-	return !list_empty(&thread->todo) || thread->return_error != BR_OK ||
-		(thread->looper & BINDER_LOOPER_STATE_NEED_RETURN);
+	DEFINE_WAIT(wait);
+	struct binder_proc *proc = thread->proc;
+	int ret = 0;
+
+	freezer_do_not_count();
+	binder_inner_proc_lock(proc);
+	for (;;) {
+		prepare_to_wait(&thread->wait, &wait, TASK_INTERRUPTIBLE);
+		if (binder_has_work_ilocked(thread, do_proc_work))
+			break;
+		if (do_proc_work)
+			list_add(&thread->waiting_thread_node,
+				 &proc->waiting_threads);
+		binder_inner_proc_unlock(proc);
+		schedule();
+		binder_inner_proc_lock(proc);
+		list_del_init(&thread->waiting_thread_node);
+		if (signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+	}
+	finish_wait(&thread->wait, &wait);
+	binder_inner_proc_unlock(proc);
+	freezer_count();
+
+	return ret;
 }
 
 static int binder_thread_read(struct binder_proc *proc,
@@ -2197,37 +3965,15 @@
 	}
 
 retry:
-	wait_for_proc_work = thread->transaction_stack == NULL &&
-				list_empty(&thread->todo);
-
-	if (thread->return_error != BR_OK && ptr < end) {
-		if (thread->return_error2 != BR_OK) {
-			if (put_user(thread->return_error2, (uint32_t __user *)ptr))
-				return -EFAULT;
-			ptr += sizeof(uint32_t);
-			binder_stat_br(proc, thread, thread->return_error2);
-			if (ptr == end)
-				goto done;
-			thread->return_error2 = BR_OK;
-		}
-		if (put_user(thread->return_error, (uint32_t __user *)ptr))
-			return -EFAULT;
-		ptr += sizeof(uint32_t);
-		binder_stat_br(proc, thread, thread->return_error);
-		thread->return_error = BR_OK;
-		goto done;
-	}
-
+	binder_inner_proc_lock(proc);
+	wait_for_proc_work = binder_available_for_proc_work_ilocked(thread);
+	binder_inner_proc_unlock(proc);
 
 	thread->looper |= BINDER_LOOPER_STATE_WAITING;
-	if (wait_for_proc_work)
-		proc->ready_threads++;
-
-	binder_unlock(__func__);
 
 	trace_binder_wait_for_work(wait_for_proc_work,
 				   !!thread->transaction_stack,
-				   !list_empty(&thread->todo));
+				   !binder_worklist_empty(proc, &thread->todo));
 	if (wait_for_proc_work) {
 		if (!(thread->looper & (BINDER_LOOPER_STATE_REGISTERED |
 					BINDER_LOOPER_STATE_ENTERED))) {
@@ -2236,24 +3982,16 @@
 			wait_event_interruptible(binder_user_error_wait,
 						 binder_stop_on_user_error < 2);
 		}
-		binder_set_nice(proc->default_priority);
-		if (non_block) {
-			if (!binder_has_proc_work(proc, thread))
-				ret = -EAGAIN;
-		} else
-			ret = wait_event_freezable_exclusive(proc->wait, binder_has_proc_work(proc, thread));
-	} else {
-		if (non_block) {
-			if (!binder_has_thread_work(thread))
-				ret = -EAGAIN;
-		} else
-			ret = wait_event_freezable(thread->wait, binder_has_thread_work(thread));
+		binder_restore_priority(current, proc->default_priority);
 	}
 
-	binder_lock(__func__);
+	if (non_block) {
+		if (!binder_has_work(thread, wait_for_proc_work))
+			ret = -EAGAIN;
+	} else {
+		ret = binder_wait_for_work(thread, wait_for_proc_work);
+	}
 
-	if (wait_for_proc_work)
-		proc->ready_threads--;
 	thread->looper &= ~BINDER_LOOPER_STATE_WAITING;
 
 	if (ret)
@@ -2261,32 +3999,55 @@
 
 	while (1) {
 		uint32_t cmd;
-		struct binder_transaction_data tr;
-		struct binder_work *w;
+		struct binder_transaction_data_secctx tr;
+		struct binder_transaction_data *trd = &tr.transaction_data;
+		struct binder_work *w = NULL;
+		struct list_head *list = NULL;
 		struct binder_transaction *t = NULL;
+		struct binder_thread *t_from;
+		size_t trsize = sizeof(*trd);
 
-		if (!list_empty(&thread->todo)) {
-			w = list_first_entry(&thread->todo, struct binder_work,
-					     entry);
-		} else if (!list_empty(&proc->todo) && wait_for_proc_work) {
-			w = list_first_entry(&proc->todo, struct binder_work,
-					     entry);
-		} else {
+		binder_inner_proc_lock(proc);
+		if (!binder_worklist_empty_ilocked(&thread->todo))
+			list = &thread->todo;
+		else if (!binder_worklist_empty_ilocked(&proc->todo) &&
+			   wait_for_proc_work)
+			list = &proc->todo;
+		else {
+			binder_inner_proc_unlock(proc);
+
 			/* no data added */
-			if (ptr - buffer == 4 &&
-			    !(thread->looper & BINDER_LOOPER_STATE_NEED_RETURN))
+			if (ptr - buffer == 4 && !thread->looper_need_return)
 				goto retry;
 			break;
 		}
 
-		if (end - ptr < sizeof(tr) + 4)
+		if (end - ptr < sizeof(tr) + 4) {
+			binder_inner_proc_unlock(proc);
 			break;
+		}
+		w = binder_dequeue_work_head_ilocked(list);
 
 		switch (w->type) {
 		case BINDER_WORK_TRANSACTION: {
+			binder_inner_proc_unlock(proc);
 			t = container_of(w, struct binder_transaction, work);
 		} break;
+		case BINDER_WORK_RETURN_ERROR: {
+			struct binder_error *e = container_of(
+					w, struct binder_error, work);
+
+			WARN_ON(e->cmd == BR_OK);
+			binder_inner_proc_unlock(proc);
+			if (put_user(e->cmd, (uint32_t __user *)ptr))
+				return -EFAULT;
+			e->cmd = BR_OK;
+			ptr += sizeof(uint32_t);
+
+			binder_stat_br(proc, thread, cmd);
+		} break;
 		case BINDER_WORK_TRANSACTION_COMPLETE: {
+			binder_inner_proc_unlock(proc);
 			cmd = BR_TRANSACTION_COMPLETE;
 			if (put_user(cmd, (uint32_t __user *)ptr))
 				return -EFAULT;
@@ -2296,113 +4057,134 @@
 			binder_debug(BINDER_DEBUG_TRANSACTION_COMPLETE,
 				     "%d:%d BR_TRANSACTION_COMPLETE\n",
 				     proc->pid, thread->pid);
-
-			list_del(&w->entry);
 			kfree(w);
 			binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE);
 		} break;
 		case BINDER_WORK_NODE: {
 			struct binder_node *node = container_of(w, struct binder_node, work);
-			uint32_t cmd = BR_NOOP;
-			const char *cmd_name;
-			int strong = node->internal_strong_refs || node->local_strong_refs;
-			int weak = !hlist_empty(&node->refs) || node->local_weak_refs || strong;
+			int strong, weak;
+			binder_uintptr_t node_ptr = node->ptr;
+			binder_uintptr_t node_cookie = node->cookie;
+			int node_debug_id = node->debug_id;
+			int has_weak_ref;
+			int has_strong_ref;
+			void __user *orig_ptr = ptr;
 
-			if (weak && !node->has_weak_ref) {
-				cmd = BR_INCREFS;
-				cmd_name = "BR_INCREFS";
+			BUG_ON(proc != node->proc);
+			strong = node->internal_strong_refs ||
+					node->local_strong_refs;
+			weak = !hlist_empty(&node->refs) ||
+					node->local_weak_refs ||
+					node->tmp_refs || strong;
+			has_strong_ref = node->has_strong_ref;
+			has_weak_ref = node->has_weak_ref;
+
+			if (weak && !has_weak_ref) {
 				node->has_weak_ref = 1;
 				node->pending_weak_ref = 1;
 				node->local_weak_refs++;
-			} else if (strong && !node->has_strong_ref) {
-				cmd = BR_ACQUIRE;
-				cmd_name = "BR_ACQUIRE";
+			}
+			if (strong && !has_strong_ref) {
 				node->has_strong_ref = 1;
 				node->pending_strong_ref = 1;
 				node->local_strong_refs++;
-			} else if (!strong && node->has_strong_ref) {
-				cmd = BR_RELEASE;
-				cmd_name = "BR_RELEASE";
+			}
+			if (!strong && has_strong_ref)
 				node->has_strong_ref = 0;
-			} else if (!weak && node->has_weak_ref) {
-				cmd = BR_DECREFS;
-				cmd_name = "BR_DECREFS";
+			if (!weak && has_weak_ref)
 				node->has_weak_ref = 0;
-			}
-			if (cmd != BR_NOOP) {
-				if (put_user(cmd, (uint32_t __user *)ptr))
-					return -EFAULT;
-				ptr += sizeof(uint32_t);
-				if (put_user(node->ptr,
-					     (binder_uintptr_t __user *)ptr))
-					return -EFAULT;
-				ptr += sizeof(binder_uintptr_t);
-				if (put_user(node->cookie,
-					     (binder_uintptr_t __user *)ptr))
-					return -EFAULT;
-				ptr += sizeof(binder_uintptr_t);
+			if (!weak && !strong) {
+				binder_debug(BINDER_DEBUG_INTERNAL_REFS,
+					     "%d:%d node %d u%016llx c%016llx deleted\n",
+					     proc->pid, thread->pid,
+					     node_debug_id,
+					     (u64)node_ptr,
+					     (u64)node_cookie);
+				rb_erase(&node->rb_node, &proc->nodes);
+				binder_inner_proc_unlock(proc);
+				binder_node_lock(node);
+				/*
+				 * Acquire the node lock before freeing the
+				 * node to serialize with other threads that
+				 * may have been holding the node lock while
+				 * decrementing this node (avoids race where
+				 * this thread frees while the other thread
+				 * is unlocking the node after the final
+				 * decrement)
+				 */
+				binder_node_unlock(node);
+				binder_free_node(node);
+			} else
+				binder_inner_proc_unlock(proc);
 
-				binder_stat_br(proc, thread, cmd);
-				binder_debug(BINDER_DEBUG_USER_REFS,
-					     "%d:%d %s %d u%016llx c%016llx\n",
-					     proc->pid, thread->pid, cmd_name,
-					     node->debug_id,
-					     (u64)node->ptr, (u64)node->cookie);
-			} else {
-				list_del_init(&w->entry);
-				if (!weak && !strong) {
-					binder_debug(BINDER_DEBUG_INTERNAL_REFS,
-						     "%d:%d node %d u%016llx c%016llx deleted\n",
-						     proc->pid, thread->pid,
-						     node->debug_id,
-						     (u64)node->ptr,
-						     (u64)node->cookie);
-					rb_erase(&node->rb_node, &proc->nodes);
-					kfree(node);
-					binder_stats_deleted(BINDER_STAT_NODE);
-				} else {
-					binder_debug(BINDER_DEBUG_INTERNAL_REFS,
-						     "%d:%d node %d u%016llx c%016llx state unchanged\n",
-						     proc->pid, thread->pid,
-						     node->debug_id,
-						     (u64)node->ptr,
-						     (u64)node->cookie);
-				}
-			}
+			if (weak && !has_weak_ref)
+				ret = binder_put_node_cmd(
+						proc, thread, &ptr, node_ptr,
+						node_cookie, node_debug_id,
+						BR_INCREFS, "BR_INCREFS");
+			if (!ret && strong && !has_strong_ref)
+				ret = binder_put_node_cmd(
+						proc, thread, &ptr, node_ptr,
+						node_cookie, node_debug_id,
+						BR_ACQUIRE, "BR_ACQUIRE");
+			if (!ret && !strong && has_strong_ref)
+				ret = binder_put_node_cmd(
+						proc, thread, &ptr, node_ptr,
+						node_cookie, node_debug_id,
+						BR_RELEASE, "BR_RELEASE");
+			if (!ret && !weak && has_weak_ref)
+				ret = binder_put_node_cmd(
+						proc, thread, &ptr, node_ptr,
+						node_cookie, node_debug_id,
+						BR_DECREFS, "BR_DECREFS");
+			if (orig_ptr == ptr)
+				binder_debug(BINDER_DEBUG_INTERNAL_REFS,
+					     "%d:%d node %d u%016llx c%016llx state unchanged\n",
+					     proc->pid, thread->pid,
+					     node_debug_id,
+					     (u64)node_ptr,
+					     (u64)node_cookie);
+			if (ret)
+				return ret;
 		} break;
 		case BINDER_WORK_DEAD_BINDER:
 		case BINDER_WORK_DEAD_BINDER_AND_CLEAR:
 		case BINDER_WORK_CLEAR_DEATH_NOTIFICATION: {
 			struct binder_ref_death *death;
 			uint32_t cmd;
+			binder_uintptr_t cookie;
 
 			death = container_of(w, struct binder_ref_death, work);
 			if (w->type == BINDER_WORK_CLEAR_DEATH_NOTIFICATION)
 				cmd = BR_CLEAR_DEATH_NOTIFICATION_DONE;
 			else
 				cmd = BR_DEAD_BINDER;
-			if (put_user(cmd, (uint32_t __user *)ptr))
-				return -EFAULT;
-			ptr += sizeof(uint32_t);
-			if (put_user(death->cookie,
-				     (binder_uintptr_t __user *)ptr))
-				return -EFAULT;
-			ptr += sizeof(binder_uintptr_t);
-			binder_stat_br(proc, thread, cmd);
+			cookie = death->cookie;
+
 			binder_debug(BINDER_DEBUG_DEATH_NOTIFICATION,
 				     "%d:%d %s %016llx\n",
 				      proc->pid, thread->pid,
 				      cmd == BR_DEAD_BINDER ?
 				      "BR_DEAD_BINDER" :
 				      "BR_CLEAR_DEATH_NOTIFICATION_DONE",
-				      (u64)death->cookie);
-
+				      (u64)cookie);
 			if (w->type == BINDER_WORK_CLEAR_DEATH_NOTIFICATION) {
-				list_del(&w->entry);
+				binder_inner_proc_unlock(proc);
 				kfree(death);
 				binder_stats_deleted(BINDER_STAT_DEATH);
-			} else
-				list_move(&w->entry, &proc->delivered_death);
+			} else {
+				binder_enqueue_work_ilocked(
+						w, &proc->delivered_death);
+				binder_inner_proc_unlock(proc);
+			}
+			if (put_user(cmd, (uint32_t __user *)ptr))
+				return -EFAULT;
+			ptr += sizeof(uint32_t);
+			if (put_user(cookie,
+				     (binder_uintptr_t __user *)ptr))
+				return -EFAULT;
+			ptr += sizeof(binder_uintptr_t);
+			binder_stat_br(proc, thread, cmd);
 			if (cmd == BR_DEAD_BINDER)
 				goto done; /* DEAD_BINDER notifications can cause transactions */
 		} break;
@@ -2414,50 +4196,61 @@
 		BUG_ON(t->buffer == NULL);
 		if (t->buffer->target_node) {
 			struct binder_node *target_node = t->buffer->target_node;
+			struct binder_priority node_prio;
 
-			tr.target.ptr = target_node->ptr;
-			tr.cookie =  target_node->cookie;
-			t->saved_priority = task_nice(current);
-			if (t->priority < target_node->min_priority &&
-			    !(t->flags & TF_ONE_WAY))
-				binder_set_nice(t->priority);
-			else if (!(t->flags & TF_ONE_WAY) ||
-				 t->saved_priority > target_node->min_priority)
-				binder_set_nice(target_node->min_priority);
+			trd->target.ptr = target_node->ptr;
+			trd->cookie =  target_node->cookie;
+			node_prio.sched_policy = target_node->sched_policy;
+			node_prio.prio = target_node->min_priority;
+			binder_transaction_priority(current, t, node_prio,
+						    target_node->inherit_rt);
 			cmd = BR_TRANSACTION;
 		} else {
-			tr.target.ptr = 0;
-			tr.cookie = 0;
+			trd->target.ptr = 0;
+			trd->cookie = 0;
 			cmd = BR_REPLY;
 		}
-		tr.code = t->code;
-		tr.flags = t->flags;
-		tr.sender_euid = from_kuid(current_user_ns(), t->sender_euid);
+		trd->code = t->code;
+		trd->flags = t->flags;
+		trd->sender_euid = from_kuid(current_user_ns(), t->sender_euid);
 
-		if (t->from) {
-			struct task_struct *sender = t->from->proc->tsk;
+		t_from = binder_get_txn_from(t);
+		if (t_from) {
+			struct task_struct *sender = t_from->proc->tsk;
 
-			tr.sender_pid = task_tgid_nr_ns(sender,
-							task_active_pid_ns(current));
+			trd->sender_pid =
+				task_tgid_nr_ns(sender,
+						task_active_pid_ns(current));
 		} else {
-			tr.sender_pid = 0;
+			trd->sender_pid = 0;
 		}
 
-		tr.data_size = t->buffer->data_size;
-		tr.offsets_size = t->buffer->offsets_size;
-		tr.data.ptr.buffer = (binder_uintptr_t)(
-					(uintptr_t)t->buffer->data +
-					proc->user_buffer_offset);
-		tr.data.ptr.offsets = tr.data.ptr.buffer +
+		trd->data_size = t->buffer->data_size;
+		trd->offsets_size = t->buffer->offsets_size;
+		trd->data.ptr.buffer = (binder_uintptr_t)
+			((uintptr_t)t->buffer->data +
+			binder_alloc_get_user_buffer_offset(&proc->alloc));
+		trd->data.ptr.offsets = trd->data.ptr.buffer +
 					ALIGN(t->buffer->data_size,
 					    sizeof(void *));
 
-		if (put_user(cmd, (uint32_t __user *)ptr))
+		tr.secctx = t->security_ctx;
+		if (t->security_ctx) {
+			cmd = BR_TRANSACTION_SEC_CTX;
+			trsize = sizeof(tr);
+		}
+		if (put_user(cmd, (uint32_t __user *)ptr)) {
+			if (t_from)
+				binder_thread_dec_tmpref(t_from);
 			return -EFAULT;
+		}
 		ptr += sizeof(uint32_t);
-		if (copy_to_user(ptr, &tr, sizeof(tr)))
+		if (copy_to_user(ptr, &tr, trsize)) {
+			if (t_from)
+				binder_thread_dec_tmpref(t_from);
 			return -EFAULT;
-		ptr += sizeof(tr);
+		}
+		ptr += trsize;
 
 		trace_binder_transaction_received(t);
 		binder_stat_br(proc, thread, cmd);
@@ -2465,22 +4258,25 @@
 			     "%d:%d %s %d %d:%d, cmd %d size %zd-%zd ptr %016llx-%016llx\n",
 			     proc->pid, thread->pid,
 			     (cmd == BR_TRANSACTION) ? "BR_TRANSACTION" :
-			     "BR_REPLY",
-			     t->debug_id, t->from ? t->from->proc->pid : 0,
-			     t->from ? t->from->pid : 0, cmd,
+				(cmd == BR_TRANSACTION_SEC_CTX) ?
+				     "BR_TRANSACTION_SEC_CTX" : "BR_REPLY",
+			     t->debug_id, t_from ? t_from->proc->pid : 0,
+			     t_from ? t_from->pid : 0, cmd,
 			     t->buffer->data_size, t->buffer->offsets_size,
-			     (u64)tr.data.ptr.buffer, (u64)tr.data.ptr.offsets);
+			     (u64)trd->data.ptr.buffer,
+			     (u64)trd->data.ptr.offsets);
 
-		list_del(&t->work.entry);
+		if (t_from)
+			binder_thread_dec_tmpref(t_from);
 		t->buffer->allow_user_free = 1;
-		if (cmd == BR_TRANSACTION && !(t->flags & TF_ONE_WAY)) {
+		if (cmd != BR_REPLY && !(t->flags & TF_ONE_WAY)) {
+			binder_inner_proc_lock(thread->proc);
 			t->to_parent = thread->transaction_stack;
 			t->to_thread = thread;
 			thread->transaction_stack = t;
+			binder_inner_proc_unlock(thread->proc);
 		} else {
-			t->buffer->transaction = NULL;
-			kfree(t);
-			binder_stats_deleted(BINDER_STAT_TRANSACTION);
+			binder_free_transaction(t);
 		}
 		break;
 	}
@@ -2488,29 +4284,36 @@
 done:
 
 	*consumed = ptr - buffer;
-	if (proc->requested_threads + proc->ready_threads == 0 &&
+	binder_inner_proc_lock(proc);
+	if (proc->requested_threads == 0 &&
+	    list_empty(&thread->proc->waiting_threads) &&
 	    proc->requested_threads_started < proc->max_threads &&
 	    (thread->looper & (BINDER_LOOPER_STATE_REGISTERED |
 	     BINDER_LOOPER_STATE_ENTERED)) /* the user-space code fails to */
 	     /*spawn a new thread if we leave this out */) {
 		proc->requested_threads++;
+		binder_inner_proc_unlock(proc);
 		binder_debug(BINDER_DEBUG_THREADS,
 			     "%d:%d BR_SPAWN_LOOPER\n",
 			     proc->pid, thread->pid);
 		if (put_user(BR_SPAWN_LOOPER, (uint32_t __user *)buffer))
 			return -EFAULT;
 		binder_stat_br(proc, thread, BR_SPAWN_LOOPER);
-	}
+	} else
+		binder_inner_proc_unlock(proc);
 	return 0;
 }
 
-static void binder_release_work(struct list_head *list)
+static void binder_release_work(struct binder_proc *proc,
+				struct list_head *list)
 {
 	struct binder_work *w;
 
-	while (!list_empty(list)) {
-		w = list_first_entry(list, struct binder_work, entry);
-		list_del_init(&w->entry);
+	while (1) {
+		w = binder_dequeue_work_head(proc, list);
+		if (!w)
+			return;
+
 		switch (w->type) {
 		case BINDER_WORK_TRANSACTION: {
 			struct binder_transaction *t;
@@ -2523,11 +4326,17 @@
 				binder_debug(BINDER_DEBUG_DEAD_TRANSACTION,
 					"undelivered transaction %d\n",
 					t->debug_id);
-				t->buffer->transaction = NULL;
-				kfree(t);
-				binder_stats_deleted(BINDER_STAT_TRANSACTION);
+				binder_free_transaction(t);
 			}
 		} break;
+		case BINDER_WORK_RETURN_ERROR: {
+			struct binder_error *e = container_of(
+					w, struct binder_error, work);
+
+			binder_debug(BINDER_DEBUG_DEAD_TRANSACTION,
+				"undelivered TRANSACTION_ERROR: %u\n",
+				e->cmd);
+		} break;
 		case BINDER_WORK_TRANSACTION_COMPLETE: {
 			binder_debug(BINDER_DEBUG_DEAD_TRANSACTION,
 				"undelivered TRANSACTION_COMPLETE\n");
@@ -2554,7 +4363,8 @@
 
 }
 
-static struct binder_thread *binder_get_thread(struct binder_proc *proc)
+static struct binder_thread *binder_get_thread_ilocked(
+		struct binder_proc *proc, struct binder_thread *new_thread)
 {
 	struct binder_thread *thread = NULL;
 	struct rb_node *parent = NULL;
@@ -2569,38 +4379,102 @@
 		else if (current->pid > thread->pid)
 			p = &(*p)->rb_right;
 		else
-			break;
+			return thread;
 	}
-	if (*p == NULL) {
-		thread = kzalloc(sizeof(*thread), GFP_KERNEL);
-		if (thread == NULL)
+	if (!new_thread)
+		return NULL;
+	thread = new_thread;
+	binder_stats_created(BINDER_STAT_THREAD);
+	thread->proc = proc;
+	thread->pid = current->pid;
+	get_task_struct(current);
+	thread->task = current;
+	atomic_set(&thread->tmp_ref, 0);
+	init_waitqueue_head(&thread->wait);
+	INIT_LIST_HEAD(&thread->todo);
+	rb_link_node(&thread->rb_node, parent, p);
+	rb_insert_color(&thread->rb_node, &proc->threads);
+	thread->looper_need_return = true;
+	thread->return_error.work.type = BINDER_WORK_RETURN_ERROR;
+	thread->return_error.cmd = BR_OK;
+	thread->reply_error.work.type = BINDER_WORK_RETURN_ERROR;
+	thread->reply_error.cmd = BR_OK;
+	INIT_LIST_HEAD(&new_thread->waiting_thread_node);
+	return thread;
+}
+
+static struct binder_thread *binder_get_thread(struct binder_proc *proc)
+{
+	struct binder_thread *thread;
+	struct binder_thread *new_thread;
+
+	binder_inner_proc_lock(proc);
+	thread = binder_get_thread_ilocked(proc, NULL);
+	binder_inner_proc_unlock(proc);
+	if (!thread) {
+		new_thread = kzalloc(sizeof(*thread), GFP_KERNEL);
+		if (new_thread == NULL)
 			return NULL;
-		binder_stats_created(BINDER_STAT_THREAD);
-		thread->proc = proc;
-		thread->pid = current->pid;
-		init_waitqueue_head(&thread->wait);
-		INIT_LIST_HEAD(&thread->todo);
-		rb_link_node(&thread->rb_node, parent, p);
-		rb_insert_color(&thread->rb_node, &proc->threads);
-		thread->looper |= BINDER_LOOPER_STATE_NEED_RETURN;
-		thread->return_error = BR_OK;
-		thread->return_error2 = BR_OK;
+		binder_inner_proc_lock(proc);
+		thread = binder_get_thread_ilocked(proc, new_thread);
+		binder_inner_proc_unlock(proc);
+		if (thread != new_thread)
+			kfree(new_thread);
 	}
 	return thread;
 }
 
-static int binder_free_thread(struct binder_proc *proc,
-			      struct binder_thread *thread)
+static void binder_free_proc(struct binder_proc *proc)
+{
+	BUG_ON(!list_empty(&proc->todo));
+	BUG_ON(!list_empty(&proc->delivered_death));
+	binder_alloc_deferred_release(&proc->alloc);
+	put_task_struct(proc->tsk);
+	binder_stats_deleted(BINDER_STAT_PROC);
+	kfree(proc);
+}
+
+static void binder_free_thread(struct binder_thread *thread)
+{
+	BUG_ON(!list_empty(&thread->todo));
+	binder_stats_deleted(BINDER_STAT_THREAD);
+	binder_proc_dec_tmpref(thread->proc);
+	put_task_struct(thread->task);
+	kfree(thread);
+}
+
+static int binder_thread_release(struct binder_proc *proc,
+				 struct binder_thread *thread)
 {
 	struct binder_transaction *t;
 	struct binder_transaction *send_reply = NULL;
 	int active_transactions = 0;
+	struct binder_transaction *last_t = NULL;
 
+	binder_inner_proc_lock(thread->proc);
+	/*
+	 * take a ref on the proc so it survives
+	 * after we remove this thread from proc->threads.
+	 * The corresponding dec is when we actually
+	 * free the thread in binder_free_thread()
+	 */
+	proc->tmp_ref++;
+	/*
+	 * take a ref on this thread to ensure it
+	 * survives while we are releasing it
+	 */
+	atomic_inc(&thread->tmp_ref);
 	rb_erase(&thread->rb_node, &proc->threads);
 	t = thread->transaction_stack;
-	if (t && t->to_thread == thread)
-		send_reply = t;
+	if (t) {
+		spin_lock(&t->lock);
+		if (t->to_thread == thread)
+			send_reply = t;
+	}
+	thread->is_dead = true;
+
 	while (t) {
+		last_t = t;
 		active_transactions++;
 		binder_debug(BINDER_DEBUG_DEAD_TRANSACTION,
 			     "release %d:%d transaction %d %s, still active\n",
@@ -2621,12 +4495,16 @@
 			t = t->from_parent;
 		} else
 			BUG();
+		spin_unlock(&last_t->lock);
+		if (t)
+			spin_lock(&t->lock);
 	}
+	binder_inner_proc_unlock(thread->proc);
+
 	if (send_reply)
 		binder_send_failed_reply(send_reply, BR_DEAD_REPLY);
-	binder_release_work(&thread->todo);
-	kfree(thread);
-	binder_stats_deleted(BINDER_STAT_THREAD);
+	binder_release_work(proc, &thread->todo);
+	binder_thread_dec_tmpref(thread);
 	return active_transactions;
 }
 
@@ -2635,34 +4513,23 @@
 {
 	struct binder_proc *proc = filp->private_data;
 	struct binder_thread *thread = NULL;
-	int wait_for_proc_work;
-
-	binder_lock(__func__);
+	bool wait_for_proc_work;
 
 	thread = binder_get_thread(proc);
-	if (!thread) {
-		binder_unlock(__func__);
+	if (!thread)
 		return POLLERR;
-	}
 
-	wait_for_proc_work = thread->transaction_stack == NULL &&
-		list_empty(&thread->todo) && thread->return_error == BR_OK;
+	binder_inner_proc_lock(thread->proc);
+	thread->looper |= BINDER_LOOPER_STATE_POLL;
+	wait_for_proc_work = binder_available_for_proc_work_ilocked(thread);
 
-	binder_unlock(__func__);
+	binder_inner_proc_unlock(thread->proc);
 
-	if (wait_for_proc_work) {
-		if (binder_has_proc_work(proc, thread))
-			return POLLIN;
-		poll_wait(filp, &proc->wait, wait);
-		if (binder_has_proc_work(proc, thread))
-			return POLLIN;
-	} else {
-		if (binder_has_thread_work(thread))
-			return POLLIN;
-		poll_wait(filp, &thread->wait, wait);
-		if (binder_has_thread_work(thread))
-			return POLLIN;
-	}
+	poll_wait(filp, &thread->wait, wait);
+
+	if (binder_has_work(thread, wait_for_proc_work))
+		return POLLIN;
+
 	return 0;
 }
 
@@ -2709,8 +4576,10 @@
 					 &bwr.read_consumed,
 					 filp->f_flags & O_NONBLOCK);
 		trace_binder_read_done(ret);
-		if (!list_empty(&proc->todo))
-			wake_up_interruptible(&proc->wait);
+		binder_inner_proc_lock(proc);
+		if (!binder_worklist_empty_ilocked(&proc->todo))
+			binder_wakeup_proc_ilocked(proc);
+		binder_inner_proc_unlock(proc);
 		if (ret < 0) {
 			if (copy_to_user(ubuf, &bwr, sizeof(bwr)))
 				ret = -EFAULT;
@@ -2730,13 +4599,17 @@
 	return ret;
 }
 
-static int binder_ioctl_set_ctx_mgr(struct file *filp)
+static int binder_ioctl_set_ctx_mgr(struct file *filp,
+				    struct flat_binder_object *fbo)
 {
 	int ret = 0;
 	struct binder_proc *proc = filp->private_data;
+	struct binder_context *context = proc->context;
+	struct binder_node *new_node;
 	kuid_t curr_euid = current_euid();
 
-	if (binder_context_mgr_node != NULL) {
+	mutex_lock(&context->context_mgr_node_lock);
+	if (context->binder_context_mgr_node) {
 		pr_err("BINDER_SET_CONTEXT_MGR already set\n");
 		ret = -EBUSY;
 		goto out;
@@ -2744,31 +4617,60 @@
 	ret = security_binder_set_context_mgr(proc->tsk);
 	if (ret < 0)
 		goto out;
-	if (uid_valid(binder_context_mgr_uid)) {
-		if (!uid_eq(binder_context_mgr_uid, curr_euid)) {
+	if (uid_valid(context->binder_context_mgr_uid)) {
+		if (!uid_eq(context->binder_context_mgr_uid, curr_euid)) {
 			pr_err("BINDER_SET_CONTEXT_MGR bad uid %d != %d\n",
 			       from_kuid(&init_user_ns, curr_euid),
 			       from_kuid(&init_user_ns,
-					binder_context_mgr_uid));
+					 context->binder_context_mgr_uid));
 			ret = -EPERM;
 			goto out;
 		}
 	} else {
-		binder_context_mgr_uid = curr_euid;
+		context->binder_context_mgr_uid = curr_euid;
 	}
-	binder_context_mgr_node = binder_new_node(proc, 0, 0);
-	if (binder_context_mgr_node == NULL) {
+	new_node = binder_new_node(proc, fbo);
+	if (!new_node) {
 		ret = -ENOMEM;
 		goto out;
 	}
-	binder_context_mgr_node->local_weak_refs++;
-	binder_context_mgr_node->local_strong_refs++;
-	binder_context_mgr_node->has_strong_ref = 1;
-	binder_context_mgr_node->has_weak_ref = 1;
+	binder_node_lock(new_node);
+	new_node->local_weak_refs++;
+	new_node->local_strong_refs++;
+	new_node->has_strong_ref = 1;
+	new_node->has_weak_ref = 1;
+	context->binder_context_mgr_node = new_node;
+	binder_node_unlock(new_node);
+	binder_put_node(new_node);
 out:
+	mutex_unlock(&context->context_mgr_node_lock);
 	return ret;
 }
 
+static int binder_ioctl_get_node_debug_info(struct binder_proc *proc,
+				struct binder_node_debug_info *info) {
+	struct rb_node *n;
+	binder_uintptr_t ptr = info->ptr;
+
+	memset(info, 0, sizeof(*info));
+
+	binder_inner_proc_lock(proc);
+	for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n)) {
+		struct binder_node *node = rb_entry(n, struct binder_node,
+						    rb_node);
+		if (node->ptr > ptr) {
+			info->ptr = node->ptr;
+			info->cookie = node->cookie;
+			info->has_strong_ref = node->has_strong_ref;
+			info->has_weak_ref = node->has_weak_ref;
+			break;
+		}
+	}
+	binder_inner_proc_unlock(proc);
+
+	return 0;
+}
+
 static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	int ret;
@@ -2780,13 +4682,14 @@
 	/*pr_info("binder_ioctl: %d:%d %x %lx\n",
 			proc->pid, current->pid, cmd, arg);*/
 
+	binder_selftest_alloc(&proc->alloc);
+
 	trace_binder_ioctl(cmd, arg);
 
 	ret = wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2);
 	if (ret)
 		goto err_unlocked;
 
-	binder_lock(__func__);
 	thread = binder_get_thread(proc);
 	if (thread == NULL) {
 		ret = -ENOMEM;
@@ -2799,21 +4702,40 @@
 		if (ret)
 			goto err;
 		break;
-	case BINDER_SET_MAX_THREADS:
-		if (copy_from_user(&proc->max_threads, ubuf, sizeof(proc->max_threads))) {
+	case BINDER_SET_MAX_THREADS: {
+		int max_threads;
+
+		if (copy_from_user(&max_threads, ubuf,
+				   sizeof(max_threads))) {
 			ret = -EINVAL;
 			goto err;
 		}
+		binder_inner_proc_lock(proc);
+		proc->max_threads = max_threads;
+		binder_inner_proc_unlock(proc);
 		break;
+	}
+	case BINDER_SET_CONTEXT_MGR_EXT: {
+		struct flat_binder_object fbo;
+
+		if (copy_from_user(&fbo, ubuf, sizeof(fbo))) {
+			ret = -EINVAL;
+			goto err;
+		}
+		ret = binder_ioctl_set_ctx_mgr(filp, &fbo);
+		if (ret)
+			goto err;
+		break;
+	}
 	case BINDER_SET_CONTEXT_MGR:
-		ret = binder_ioctl_set_ctx_mgr(filp);
+		ret = binder_ioctl_set_ctx_mgr(filp, NULL);
 		if (ret)
 			goto err;
 		break;
 	case BINDER_THREAD_EXIT:
 		binder_debug(BINDER_DEBUG_THREADS, "%d:%d exit\n",
 			     proc->pid, thread->pid);
-		binder_free_thread(proc, thread);
+		binder_thread_release(proc, thread);
 		thread = NULL;
 		break;
 	case BINDER_VERSION: {
@@ -2830,6 +4752,24 @@
 		}
 		break;
 	}
+	case BINDER_GET_NODE_DEBUG_INFO: {
+		struct binder_node_debug_info info;
+
+		if (copy_from_user(&info, ubuf, sizeof(info))) {
+			ret = -EFAULT;
+			goto err;
+		}
+
+		ret = binder_ioctl_get_node_debug_info(proc, &info);
+		if (ret < 0)
+			goto err;
+
+		if (copy_to_user(ubuf, &info, sizeof(info))) {
+			ret = -EFAULT;
+			goto err;
+		}
+		break;
+	}
 	default:
 		ret = -EINVAL;
 		goto err;
@@ -2837,8 +4777,7 @@
 	ret = 0;
 err:
 	if (thread)
-		thread->looper &= ~BINDER_LOOPER_STATE_NEED_RETURN;
-	binder_unlock(__func__);
+		thread->looper_need_return = false;
 	wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2);
 	if (ret && ret != -ERESTARTSYS)
 		pr_info("%d:%d ioctl %x %lx returned %d\n", proc->pid, current->pid, cmd, arg, ret);
@@ -2867,8 +4806,7 @@
 		     proc->pid, vma->vm_start, vma->vm_end,
 		     (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags,
 		     (unsigned long)pgprot_val(vma->vm_page_prot));
-	proc->vma = NULL;
-	proc->vma_vm_mm = NULL;
+	binder_alloc_vma_close(&proc->alloc);
 	binder_defer_work(proc, BINDER_DEFERRED_PUT_FILES);
 }
 
@@ -2886,10 +4824,8 @@
 static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	int ret;
-	struct vm_struct *area;
 	struct binder_proc *proc = filp->private_data;
 	const char *failure_string;
-	struct binder_buffer *buffer;
 
 	if (proc->tsk != current->group_leader)
 		return -EINVAL;
@@ -2898,8 +4834,8 @@
 		vma->vm_end = vma->vm_start + SZ_4M;
 
 	binder_debug(BINDER_DEBUG_OPEN_CLOSE,
-		     "binder_mmap: %d %lx-%lx (%ld K) vma %lx pagep %lx\n",
-		     proc->pid, vma->vm_start, vma->vm_end,
+		     "%s: %d %lx-%lx (%ld K) vma %lx pagep %lx\n",
+		     __func__, proc->pid, vma->vm_start, vma->vm_end,
 		     (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags,
 		     (unsigned long)pgprot_val(vma->vm_page_prot));
 
@@ -2909,75 +4845,17 @@
 		goto err_bad_arg;
 	}
 	vma->vm_flags = (vma->vm_flags | VM_DONTCOPY) & ~VM_MAYWRITE;
-
-	mutex_lock(&binder_mmap_lock);
-	if (proc->buffer) {
-		ret = -EBUSY;
-		failure_string = "already mapped";
-		goto err_already_mapped;
-	}
-
-	area = get_vm_area(vma->vm_end - vma->vm_start, VM_IOREMAP);
-	if (area == NULL) {
-		ret = -ENOMEM;
-		failure_string = "get_vm_area";
-		goto err_get_vm_area_failed;
-	}
-	proc->buffer = area->addr;
-	proc->user_buffer_offset = vma->vm_start - (uintptr_t)proc->buffer;
-	mutex_unlock(&binder_mmap_lock);
-
-#ifdef CONFIG_CPU_CACHE_VIPT
-	if (cache_is_vipt_aliasing()) {
-		while (CACHE_COLOUR((vma->vm_start ^ (uint32_t)proc->buffer))) {
-			pr_info("binder_mmap: %d %lx-%lx maps %p bad alignment\n", proc->pid, vma->vm_start, vma->vm_end, proc->buffer);
-			vma->vm_start += PAGE_SIZE;
-		}
-	}
-#endif
-	proc->pages = kzalloc(sizeof(proc->pages[0]) * ((vma->vm_end - vma->vm_start) / PAGE_SIZE), GFP_KERNEL);
-	if (proc->pages == NULL) {
-		ret = -ENOMEM;
-		failure_string = "alloc page array";
-		goto err_alloc_pages_failed;
-	}
-	proc->buffer_size = vma->vm_end - vma->vm_start;
-
 	vma->vm_ops = &binder_vm_ops;
 	vma->vm_private_data = proc;
 
-	if (binder_update_page_range(proc, 1, proc->buffer, proc->buffer + PAGE_SIZE, vma)) {
-		ret = -ENOMEM;
-		failure_string = "alloc small buf";
-		goto err_alloc_small_buf_failed;
-	}
-	buffer = proc->buffer;
-	INIT_LIST_HEAD(&proc->buffers);
-	list_add(&buffer->entry, &proc->buffers);
-	buffer->free = 1;
-	binder_insert_free_buffer(proc, buffer);
-	proc->free_async_space = proc->buffer_size / 2;
-	barrier();
+	ret = binder_alloc_mmap_handler(&proc->alloc, vma);
+	if (ret)
+		return ret;
 	mutex_lock(&proc->files_lock);
 	proc->files = get_files_struct(current);
 	mutex_unlock(&proc->files_lock);
-	proc->vma = vma;
-	proc->vma_vm_mm = vma->vm_mm;
-
-	/*pr_info("binder_mmap: %d %lx-%lx maps %p\n",
-		 proc->pid, vma->vm_start, vma->vm_end, proc->buffer);*/
 	return 0;
 
-err_alloc_small_buf_failed:
-	kfree(proc->pages);
-	proc->pages = NULL;
-err_alloc_pages_failed:
-	mutex_lock(&binder_mmap_lock);
-	vfree(proc->buffer);
-	proc->buffer = NULL;
-err_get_vm_area_failed:
-err_already_mapped:
-	mutex_unlock(&binder_mmap_lock);
 err_bad_arg:
 	pr_err("binder_mmap: %d %lx-%lx %s failed %d\n",
 	       proc->pid, vma->vm_start, vma->vm_end, failure_string, ret);
@@ -2987,6 +4865,7 @@
 static int binder_open(struct inode *nodp, struct file *filp)
 {
 	struct binder_proc *proc;
+	struct binder_device *binder_dev;
 
 	binder_debug(BINDER_DEBUG_OPEN_CLOSE, "binder_open: %d:%d\n",
 		     current->group_leader->pid, current->pid);
@@ -2994,29 +4873,50 @@
 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 	if (proc == NULL)
 		return -ENOMEM;
+	spin_lock_init(&proc->inner_lock);
+	spin_lock_init(&proc->outer_lock);
 	get_task_struct(current->group_leader);
 	proc->tsk = current->group_leader;
 	mutex_init(&proc->files_lock);
 	INIT_LIST_HEAD(&proc->todo);
-	init_waitqueue_head(&proc->wait);
-	proc->default_priority = task_nice(current);
+	if (binder_supported_policy(current->policy)) {
+		proc->default_priority.sched_policy = current->policy;
+		proc->default_priority.prio = current->normal_prio;
+	} else {
+		proc->default_priority.sched_policy = SCHED_NORMAL;
+		proc->default_priority.prio = NICE_TO_PRIO(0);
+	}
 
-	binder_lock(__func__);
+	binder_dev = container_of(filp->private_data, struct binder_device,
+				  miscdev);
+	proc->context = &binder_dev->context;
+	binder_alloc_init(&proc->alloc);
 
 	binder_stats_created(BINDER_STAT_PROC);
-	hlist_add_head(&proc->proc_node, &binder_procs);
 	proc->pid = current->group_leader->pid;
 	INIT_LIST_HEAD(&proc->delivered_death);
+	INIT_LIST_HEAD(&proc->waiting_threads);
 	filp->private_data = proc;
 
-	binder_unlock(__func__);
+	mutex_lock(&binder_procs_lock);
+	hlist_add_head(&proc->proc_node, &binder_procs);
+	mutex_unlock(&binder_procs_lock);
 
 	if (binder_debugfs_dir_entry_proc) {
 		char strbuf[11];
 
 		snprintf(strbuf, sizeof(strbuf), "%u", proc->pid);
+		/*
+		 * proc debug entries are shared between contexts, so
+		 * this will fail if the process tries to open the driver
+		 * again with a different context. The priting code will
+		 * anyway print all contexts that a given PID has, so this
+		 * is not a problem.
+		 */
 		proc->debugfs_entry = debugfs_create_file(strbuf, S_IRUGO,
-			binder_debugfs_dir_entry_proc, proc, &binder_proc_fops);
+			binder_debugfs_dir_entry_proc,
+			(void *)(unsigned long)proc->pid,
+			&binder_proc_fops);
 	}
 
 	return 0;
@@ -3036,16 +4936,17 @@
 	struct rb_node *n;
 	int wake_count = 0;
 
+	binder_inner_proc_lock(proc);
 	for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) {
 		struct binder_thread *thread = rb_entry(n, struct binder_thread, rb_node);
 
-		thread->looper |= BINDER_LOOPER_STATE_NEED_RETURN;
+		thread->looper_need_return = true;
 		if (thread->looper & BINDER_LOOPER_STATE_WAITING) {
 			wake_up_interruptible(&thread->wait);
 			wake_count++;
 		}
 	}
-	wake_up_interruptible_all(&proc->wait);
+	binder_inner_proc_unlock(proc);
 
 	binder_debug(BINDER_DEBUG_OPEN_CLOSE,
 		     "binder_flush: %d woke %d threads\n", proc->pid,
@@ -3066,13 +4967,21 @@
 {
 	struct binder_ref *ref;
 	int death = 0;
+	struct binder_proc *proc = node->proc;
 
-	list_del_init(&node->work.entry);
-	binder_release_work(&node->async_todo);
+	binder_release_work(proc, &node->async_todo);
 
-	if (hlist_empty(&node->refs)) {
-		kfree(node);
-		binder_stats_deleted(BINDER_STAT_NODE);
+	binder_node_lock(node);
+	binder_inner_proc_lock(proc);
+	binder_dequeue_work_ilocked(&node->work);
+	/*
+	 * The caller must have taken a temporary ref on the node,
+	 */
+	BUG_ON(!node->tmp_refs);
+	if (hlist_empty(&node->refs) && node->tmp_refs == 1) {
+		binder_inner_proc_unlock(proc);
+		binder_node_unlock(node);
+		binder_free_node(node);
 
 		return refs;
 	}
@@ -3080,59 +4989,84 @@
 	node->proc = NULL;
 	node->local_strong_refs = 0;
 	node->local_weak_refs = 0;
+	binder_inner_proc_unlock(proc);
+
+	spin_lock(&binder_dead_nodes_lock);
 	hlist_add_head(&node->dead_node, &binder_dead_nodes);
+	spin_unlock(&binder_dead_nodes_lock);
 
 	hlist_for_each_entry(ref, &node->refs, node_entry) {
 		refs++;
-
-		if (!ref->death)
+		/*
+		 * Need the node lock to synchronize
+		 * with new notification requests and the
+		 * inner lock to synchronize with queued
+		 * death notifications.
+		 */
+		binder_inner_proc_lock(ref->proc);
+		if (!ref->death) {
+			binder_inner_proc_unlock(ref->proc);
 			continue;
+		}
 
 		death++;
 
-		if (list_empty(&ref->death->work.entry)) {
-			ref->death->work.type = BINDER_WORK_DEAD_BINDER;
-			list_add_tail(&ref->death->work.entry,
-				      &ref->proc->todo);
-			wake_up_interruptible(&ref->proc->wait);
-		} else
-			BUG();
+		BUG_ON(!list_empty(&ref->death->work.entry));
+		ref->death->work.type = BINDER_WORK_DEAD_BINDER;
+		binder_enqueue_work_ilocked(&ref->death->work,
+					    &ref->proc->todo);
+		binder_wakeup_proc_ilocked(ref->proc);
+		binder_inner_proc_unlock(ref->proc);
 	}
 
 	binder_debug(BINDER_DEBUG_DEAD_BINDER,
 		     "node %d now dead, refs %d, death %d\n",
 		     node->debug_id, refs, death);
+	binder_node_unlock(node);
+	binder_put_node(node);
 
 	return refs;
 }
 
 static void binder_deferred_release(struct binder_proc *proc)
 {
-	struct binder_transaction *t;
+	struct binder_context *context = proc->context;
 	struct rb_node *n;
-	int threads, nodes, incoming_refs, outgoing_refs, buffers,
-		active_transactions, page_count;
+	int threads, nodes, incoming_refs, outgoing_refs, active_transactions;
 
-	BUG_ON(proc->vma);
 	BUG_ON(proc->files);
 
+	mutex_lock(&binder_procs_lock);
 	hlist_del(&proc->proc_node);
+	mutex_unlock(&binder_procs_lock);
 
-	if (binder_context_mgr_node && binder_context_mgr_node->proc == proc) {
+	mutex_lock(&context->context_mgr_node_lock);
+	if (context->binder_context_mgr_node &&
+	    context->binder_context_mgr_node->proc == proc) {
 		binder_debug(BINDER_DEBUG_DEAD_BINDER,
 			     "%s: %d context_mgr_node gone\n",
 			     __func__, proc->pid);
-		binder_context_mgr_node = NULL;
+		context->binder_context_mgr_node = NULL;
 	}
+	mutex_unlock(&context->context_mgr_node_lock);
+	binder_inner_proc_lock(proc);
+	/*
+	 * Make sure proc stays alive after we
+	 * remove all the threads
+	 */
+	proc->tmp_ref++;
 
+	proc->is_dead = true;
 	threads = 0;
 	active_transactions = 0;
 	while ((n = rb_first(&proc->threads))) {
 		struct binder_thread *thread;
 
 		thread = rb_entry(n, struct binder_thread, rb_node);
+		binder_inner_proc_unlock(proc);
 		threads++;
-		active_transactions += binder_free_thread(proc, thread);
+		active_transactions += binder_thread_release(proc, thread);
+		binder_inner_proc_lock(proc);
 	}
 
 	nodes = 0;
@@ -3142,73 +5076,42 @@
 
 		node = rb_entry(n, struct binder_node, rb_node);
 		nodes++;
+		/*
+		 * take a temporary ref on the node before
+		 * calling binder_node_release() which will either
+		 * kfree() the node or call binder_put_node()
+		 */
+		binder_inc_node_tmpref_ilocked(node);
 		rb_erase(&node->rb_node, &proc->nodes);
+		binder_inner_proc_unlock(proc);
 		incoming_refs = binder_node_release(node, incoming_refs);
+		binder_inner_proc_lock(proc);
 	}
+	binder_inner_proc_unlock(proc);
 
 	outgoing_refs = 0;
+	binder_proc_lock(proc);
 	while ((n = rb_first(&proc->refs_by_desc))) {
 		struct binder_ref *ref;
 
 		ref = rb_entry(n, struct binder_ref, rb_node_desc);
 		outgoing_refs++;
-		binder_delete_ref(ref);
+		binder_cleanup_ref_olocked(ref);
+		binder_proc_unlock(proc);
+		binder_free_ref(ref);
+		binder_proc_lock(proc);
 	}
+	binder_proc_unlock(proc);
 
-	binder_release_work(&proc->todo);
-	binder_release_work(&proc->delivered_death);
-
-	buffers = 0;
-	while ((n = rb_first(&proc->allocated_buffers))) {
-		struct binder_buffer *buffer;
-
-		buffer = rb_entry(n, struct binder_buffer, rb_node);
-
-		t = buffer->transaction;
-		if (t) {
-			t->buffer = NULL;
-			buffer->transaction = NULL;
-			pr_err("release proc %d, transaction %d, not freed\n",
-			       proc->pid, t->debug_id);
-			/*BUG();*/
-		}
-
-		binder_free_buf(proc, buffer);
-		buffers++;
-	}
-
-	binder_stats_deleted(BINDER_STAT_PROC);
-
-	page_count = 0;
-	if (proc->pages) {
-		int i;
-
-		for (i = 0; i < proc->buffer_size / PAGE_SIZE; i++) {
-			void *page_addr;
-
-			if (!proc->pages[i])
-				continue;
-
-			page_addr = proc->buffer + i * PAGE_SIZE;
-			binder_debug(BINDER_DEBUG_BUFFER_ALLOC,
-				     "%s: %d: page %d at %p not freed\n",
-				     __func__, proc->pid, i, page_addr);
-			unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE);
-			__free_page(proc->pages[i]);
-			page_count++;
-		}
-		kfree(proc->pages);
-		vfree(proc->buffer);
-	}
-
-	put_task_struct(proc->tsk);
+	binder_release_work(proc, &proc->todo);
+	binder_release_work(proc, &proc->delivered_death);
 
 	binder_debug(BINDER_DEBUG_OPEN_CLOSE,
-		     "%s: %d threads %d, nodes %d (ref %d), refs %d, active transactions %d, buffers %d, pages %d\n",
+		     "%s: %d threads %d, nodes %d (ref %d), refs %d, active transactions %d\n",
 		     __func__, proc->pid, threads, nodes, incoming_refs,
-		     outgoing_refs, active_transactions, buffers, page_count);
+		     outgoing_refs, active_transactions);
 
-	kfree(proc);
+	binder_proc_dec_tmpref(proc);
 }
 
 static void binder_deferred_func(struct work_struct *work)
@@ -3219,7 +5122,6 @@
 	int defer;
 
 	do {
-		binder_lock(__func__);
 		mutex_lock(&binder_deferred_lock);
 		if (!hlist_empty(&binder_deferred_list)) {
 			proc = hlist_entry(binder_deferred_list.first,
@@ -3248,7 +5150,6 @@
 		if (defer & BINDER_DEFERRED_RELEASE)
 			binder_deferred_release(proc); /* frees proc */
 
-		binder_unlock(__func__);
 		if (files)
 			put_files_struct(files);
 	} while (proc);
@@ -3268,41 +5169,52 @@
 	mutex_unlock(&binder_deferred_lock);
 }
 
-static void print_binder_transaction(struct seq_file *m, const char *prefix,
-				     struct binder_transaction *t)
+static void print_binder_transaction_ilocked(struct seq_file *m,
+					     struct binder_proc *proc,
+					     const char *prefix,
+					     struct binder_transaction *t)
 {
+	struct binder_proc *to_proc;
+	struct binder_buffer *buffer = t->buffer;
+
+	spin_lock(&t->lock);
+	to_proc = t->to_proc;
 	seq_printf(m,
-		   "%s %d: %p from %d:%d to %d:%d code %x flags %x pri %ld r%d",
+		   "%s %d: %p from %d:%d to %d:%d code %x flags %x pri %d:%d r%d",
 		   prefix, t->debug_id, t,
 		   t->from ? t->from->proc->pid : 0,
 		   t->from ? t->from->pid : 0,
-		   t->to_proc ? t->to_proc->pid : 0,
+		   to_proc ? to_proc->pid : 0,
 		   t->to_thread ? t->to_thread->pid : 0,
-		   t->code, t->flags, t->priority, t->need_reply);
-	if (t->buffer == NULL) {
+		   t->code, t->flags, t->priority.sched_policy,
+		   t->priority.prio, t->need_reply);
+	spin_unlock(&t->lock);
+
+	if (proc != to_proc) {
+		/*
+		 * Can only safely deref buffer if we are holding the
+		 * correct proc inner lock for this node
+		 */
+		seq_puts(m, "\n");
+		return;
+	}
+
+	if (buffer == NULL) {
 		seq_puts(m, " buffer free\n");
 		return;
 	}
-	if (t->buffer->target_node)
-		seq_printf(m, " node %d",
-			   t->buffer->target_node->debug_id);
+	if (buffer->target_node)
+		seq_printf(m, " node %d", buffer->target_node->debug_id);
 	seq_printf(m, " size %zd:%zd data %p\n",
-		   t->buffer->data_size, t->buffer->offsets_size,
-		   t->buffer->data);
-}
-
-static void print_binder_buffer(struct seq_file *m, const char *prefix,
-				struct binder_buffer *buffer)
-{
-	seq_printf(m, "%s %d: %p size %zd:%zd %s\n",
-		   prefix, buffer->debug_id, buffer->data,
 		   buffer->data_size, buffer->offsets_size,
-		   buffer->transaction ? "active" : "delivered");
+		   buffer->data);
 }
 
-static void print_binder_work(struct seq_file *m, const char *prefix,
-			      const char *transaction_prefix,
-			      struct binder_work *w)
+static void print_binder_work_ilocked(struct seq_file *m,
+				     struct binder_proc *proc,
+				     const char *prefix,
+				     const char *transaction_prefix,
+				     struct binder_work *w)
 {
 	struct binder_node *node;
 	struct binder_transaction *t;
@@ -3310,8 +5222,16 @@
 	switch (w->type) {
 	case BINDER_WORK_TRANSACTION:
 		t = container_of(w, struct binder_transaction, work);
-		print_binder_transaction(m, transaction_prefix, t);
+		print_binder_transaction_ilocked(
+				m, proc, transaction_prefix, t);
 		break;
+	case BINDER_WORK_RETURN_ERROR: {
+		struct binder_error *e = container_of(
+				w, struct binder_error, work);
+
+		seq_printf(m, "%stransaction error: %u\n",
+			   prefix, e->cmd);
+	} break;
 	case BINDER_WORK_TRANSACTION_COMPLETE:
 		seq_printf(m, "%stransaction complete\n", prefix);
 		break;
@@ -3336,40 +5256,46 @@
 	}
 }
 
-static void print_binder_thread(struct seq_file *m,
-				struct binder_thread *thread,
-				int print_always)
+static void print_binder_thread_ilocked(struct seq_file *m,
+					struct binder_thread *thread,
+					int print_always)
 {
 	struct binder_transaction *t;
 	struct binder_work *w;
 	size_t start_pos = m->count;
 	size_t header_pos;
 
-	seq_printf(m, "  thread %d: l %02x\n", thread->pid, thread->looper);
+	seq_printf(m, "  thread %d: l %02x need_return %d tr %d\n",
+			thread->pid, thread->looper,
+			thread->looper_need_return,
+			atomic_read(&thread->tmp_ref));
 	header_pos = m->count;
 	t = thread->transaction_stack;
 	while (t) {
 		if (t->from == thread) {
-			print_binder_transaction(m,
-						 "    outgoing transaction", t);
+			print_binder_transaction_ilocked(m, thread->proc,
+					"    outgoing transaction", t);
 			t = t->from_parent;
 		} else if (t->to_thread == thread) {
-			print_binder_transaction(m,
+			print_binder_transaction_ilocked(m, thread->proc,
 						 "    incoming transaction", t);
 			t = t->to_parent;
 		} else {
-			print_binder_transaction(m, "    bad transaction", t);
+			print_binder_transaction_ilocked(m, thread->proc,
+					"    bad transaction", t);
 			t = NULL;
 		}
 	}
 	list_for_each_entry(w, &thread->todo, entry) {
-		print_binder_work(m, "    ", "    pending transaction", w);
+		print_binder_work_ilocked(m, thread->proc, "    ",
+					  "    pending transaction", w);
 	}
 	if (!print_always && m->count == header_pos)
 		m->count = start_pos;
 }
 
-static void print_binder_node(struct seq_file *m, struct binder_node *node)
+static void print_binder_node_nilocked(struct seq_file *m,
+				       struct binder_node *node)
 {
 	struct binder_ref *ref;
 	struct binder_work *w;
@@ -3379,27 +5305,35 @@
 	hlist_for_each_entry(ref, &node->refs, node_entry)
 		count++;
 
-	seq_printf(m, "  node %d: u%016llx c%016llx hs %d hw %d ls %d lw %d is %d iw %d",
+	seq_printf(m, "  node %d: u%016llx c%016llx pri %d:%d hs %d hw %d ls %d lw %d is %d iw %d tr %d",
 		   node->debug_id, (u64)node->ptr, (u64)node->cookie,
+		   node->sched_policy, node->min_priority,
 		   node->has_strong_ref, node->has_weak_ref,
 		   node->local_strong_refs, node->local_weak_refs,
-		   node->internal_strong_refs, count);
+		   node->internal_strong_refs, count, node->tmp_refs);
 	if (count) {
 		seq_puts(m, " proc");
 		hlist_for_each_entry(ref, &node->refs, node_entry)
 			seq_printf(m, " %d", ref->proc->pid);
 	}
 	seq_puts(m, "\n");
-	list_for_each_entry(w, &node->async_todo, entry)
-		print_binder_work(m, "    ",
-				  "    pending async transaction", w);
+	if (node->proc) {
+		list_for_each_entry(w, &node->async_todo, entry)
+			print_binder_work_ilocked(m, node->proc, "    ",
+					  "    pending async transaction", w);
+	}
 }
 
-static void print_binder_ref(struct seq_file *m, struct binder_ref *ref)
+static void print_binder_ref_olocked(struct seq_file *m,
+				     struct binder_ref *ref)
 {
-	seq_printf(m, "  ref %d: desc %d %snode %d s %d w %d d %p\n",
-		   ref->debug_id, ref->desc, ref->node->proc ? "" : "dead ",
-		   ref->node->debug_id, ref->strong, ref->weak, ref->death);
+	binder_node_lock(ref->node);
+	seq_printf(m, "  ref %d: desc %d %snode %d s %d w %d d %pK\n",
+		   ref->data.debug_id, ref->data.desc,
+		   ref->node->proc ? "" : "dead ",
+		   ref->node->debug_id, ref->data.strong,
+		   ref->data.weak, ref->death);
+	binder_node_unlock(ref->node);
 }
 
 static void print_binder_proc(struct seq_file *m,
@@ -3409,35 +5343,60 @@
 	struct rb_node *n;
 	size_t start_pos = m->count;
 	size_t header_pos;
+	struct binder_node *last_node = NULL;
 
 	seq_printf(m, "proc %d\n", proc->pid);
+	seq_printf(m, "context %s\n", proc->context->name);
 	header_pos = m->count;
 
+	binder_inner_proc_lock(proc);
 	for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n))
-		print_binder_thread(m, rb_entry(n, struct binder_thread,
+		print_binder_thread_ilocked(m, rb_entry(n, struct binder_thread,
 						rb_node), print_all);
+
 	for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n)) {
 		struct binder_node *node = rb_entry(n, struct binder_node,
 						    rb_node);
-		if (print_all || node->has_async_transaction)
-			print_binder_node(m, node);
+		/*
+		 * take a temporary reference on the node so it
+		 * survives and isn't removed from the tree
+		 * while we print it.
+		 */
+		binder_inc_node_tmpref_ilocked(node);
+		/* Need to drop inner lock to take node lock */
+		binder_inner_proc_unlock(proc);
+		if (last_node)
+			binder_put_node(last_node);
+		binder_node_inner_lock(node);
+		print_binder_node_nilocked(m, node);
+		binder_node_inner_unlock(node);
+		last_node = node;
+		binder_inner_proc_lock(proc);
 	}
+	binder_inner_proc_unlock(proc);
+	if (last_node)
+		binder_put_node(last_node);
+
 	if (print_all) {
+		binder_proc_lock(proc);
 		for (n = rb_first(&proc->refs_by_desc);
 		     n != NULL;
 		     n = rb_next(n))
-			print_binder_ref(m, rb_entry(n, struct binder_ref,
-						     rb_node_desc));
+			print_binder_ref_olocked(m, rb_entry(n,
+							    struct binder_ref,
+							    rb_node_desc));
+		binder_proc_unlock(proc);
 	}
-	for (n = rb_first(&proc->allocated_buffers); n != NULL; n = rb_next(n))
-		print_binder_buffer(m, "  buffer",
-				    rb_entry(n, struct binder_buffer, rb_node));
+	binder_alloc_print_allocated(m, &proc->alloc);
+	binder_inner_proc_lock(proc);
 	list_for_each_entry(w, &proc->todo, entry)
-		print_binder_work(m, "  ", "  pending transaction", w);
+		print_binder_work_ilocked(m, proc, "  ",
+					  "  pending transaction", w);
 	list_for_each_entry(w, &proc->delivered_death, entry) {
 		seq_puts(m, "  has delivered dead binder\n");
 		break;
 	}
+	binder_inner_proc_unlock(proc);
 	if (!print_all && m->count == header_pos)
 		m->count = start_pos;
 }
@@ -3480,7 +5439,9 @@
 	"BC_EXIT_LOOPER",
 	"BC_REQUEST_DEATH_NOTIFICATION",
 	"BC_CLEAR_DEATH_NOTIFICATION",
-	"BC_DEAD_BINDER_DONE"
+	"BC_DEAD_BINDER_DONE",
+	"BC_TRANSACTION_SG",
+	"BC_REPLY_SG",
 };
 
 static const char * const binder_objstat_strings[] = {
@@ -3501,17 +5462,21 @@
 	BUILD_BUG_ON(ARRAY_SIZE(stats->bc) !=
 		     ARRAY_SIZE(binder_command_strings));
 	for (i = 0; i < ARRAY_SIZE(stats->bc); i++) {
-		if (stats->bc[i])
+		int temp = atomic_read(&stats->bc[i]);
+
+		if (temp)
 			seq_printf(m, "%s%s: %d\n", prefix,
-				   binder_command_strings[i], stats->bc[i]);
+				   binder_command_strings[i], temp);
 	}
 
 	BUILD_BUG_ON(ARRAY_SIZE(stats->br) !=
 		     ARRAY_SIZE(binder_return_strings));
 	for (i = 0; i < ARRAY_SIZE(stats->br); i++) {
-		if (stats->br[i])
+		int temp = atomic_read(&stats->br[i]);
+
+		if (temp)
 			seq_printf(m, "%s%s: %d\n", prefix,
-				   binder_return_strings[i], stats->br[i]);
+				   binder_return_strings[i], temp);
 	}
 
 	BUILD_BUG_ON(ARRAY_SIZE(stats->obj_created) !=
@@ -3519,11 +5484,15 @@
 	BUILD_BUG_ON(ARRAY_SIZE(stats->obj_created) !=
 		     ARRAY_SIZE(stats->obj_deleted));
 	for (i = 0; i < ARRAY_SIZE(stats->obj_created); i++) {
-		if (stats->obj_created[i] || stats->obj_deleted[i])
-			seq_printf(m, "%s%s: active %d total %d\n", prefix,
+		int created = atomic_read(&stats->obj_created[i]);
+		int deleted = atomic_read(&stats->obj_deleted[i]);
+
+		if (created || deleted)
+			seq_printf(m, "%s%s: active %d total %d\n",
+				prefix,
 				binder_objstat_strings[i],
-				stats->obj_created[i] - stats->obj_deleted[i],
-				stats->obj_created[i]);
+				created - deleted,
+				created);
 	}
 }
 
@@ -3531,50 +5500,59 @@
 				    struct binder_proc *proc)
 {
 	struct binder_work *w;
+	struct binder_thread *thread;
 	struct rb_node *n;
-	int count, strong, weak;
+	int count, strong, weak, ready_threads;
+	size_t free_async_space =
+		binder_alloc_get_free_async_space(&proc->alloc);
 
 	seq_printf(m, "proc %d\n", proc->pid);
+	seq_printf(m, "context %s\n", proc->context->name);
 	count = 0;
+	ready_threads = 0;
+	binder_inner_proc_lock(proc);
 	for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n))
 		count++;
+
+	list_for_each_entry(thread, &proc->waiting_threads, waiting_thread_node)
+		ready_threads++;
+
 	seq_printf(m, "  threads: %d\n", count);
 	seq_printf(m, "  requested threads: %d+%d/%d\n"
 			"  ready threads %d\n"
 			"  free async space %zd\n", proc->requested_threads,
 			proc->requested_threads_started, proc->max_threads,
-			proc->ready_threads, proc->free_async_space);
+			ready_threads,
+			free_async_space);
 	count = 0;
 	for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n))
 		count++;
+	binder_inner_proc_unlock(proc);
 	seq_printf(m, "  nodes: %d\n", count);
 	count = 0;
 	strong = 0;
 	weak = 0;
+	binder_proc_lock(proc);
 	for (n = rb_first(&proc->refs_by_desc); n != NULL; n = rb_next(n)) {
 		struct binder_ref *ref = rb_entry(n, struct binder_ref,
 						  rb_node_desc);
 		count++;
-		strong += ref->strong;
-		weak += ref->weak;
+		strong += ref->data.strong;
+		weak += ref->data.weak;
 	}
+	binder_proc_unlock(proc);
 	seq_printf(m, "  refs: %d s %d w %d\n", count, strong, weak);
 
-	count = 0;
-	for (n = rb_first(&proc->allocated_buffers); n != NULL; n = rb_next(n))
-		count++;
+	count = binder_alloc_get_allocated_count(&proc->alloc);
 	seq_printf(m, "  buffers: %d\n", count);
 
 	count = 0;
+	binder_inner_proc_lock(proc);
 	list_for_each_entry(w, &proc->todo, entry) {
-		switch (w->type) {
-		case BINDER_WORK_TRANSACTION:
+		if (w->type == BINDER_WORK_TRANSACTION)
 			count++;
-			break;
-		default:
-			break;
-		}
 	}
+	binder_inner_proc_unlock(proc);
 	seq_printf(m, "  pending transactions: %d\n", count);
 
 	print_binder_stats(m, "  ", &proc->stats);
@@ -3585,107 +5563,131 @@
 {
 	struct binder_proc *proc;
 	struct binder_node *node;
-	int do_lock = !binder_debug_no_lock;
-
-	if (do_lock)
-		binder_lock(__func__);
+	struct binder_node *last_node = NULL;
 
 	seq_puts(m, "binder state:\n");
 
+	spin_lock(&binder_dead_nodes_lock);
 	if (!hlist_empty(&binder_dead_nodes))
 		seq_puts(m, "dead nodes:\n");
-	hlist_for_each_entry(node, &binder_dead_nodes, dead_node)
-		print_binder_node(m, node);
+	hlist_for_each_entry(node, &binder_dead_nodes, dead_node) {
+		/*
+		 * take a temporary reference on the node so it
+		 * survives and isn't removed from the list
+		 * while we print it.
+		 */
+		node->tmp_refs++;
+		spin_unlock(&binder_dead_nodes_lock);
+		if (last_node)
+			binder_put_node(last_node);
+		binder_node_lock(node);
+		print_binder_node_nilocked(m, node);
+		binder_node_unlock(node);
+		last_node = node;
+		spin_lock(&binder_dead_nodes_lock);
+	}
+	spin_unlock(&binder_dead_nodes_lock);
+	if (last_node)
+		binder_put_node(last_node);
 
+	mutex_lock(&binder_procs_lock);
 	hlist_for_each_entry(proc, &binder_procs, proc_node)
 		print_binder_proc(m, proc, 1);
-	if (do_lock)
-		binder_unlock(__func__);
+	mutex_unlock(&binder_procs_lock);
+
 	return 0;
 }
 
 static int binder_stats_show(struct seq_file *m, void *unused)
 {
 	struct binder_proc *proc;
-	int do_lock = !binder_debug_no_lock;
-
-	if (do_lock)
-		binder_lock(__func__);
 
 	seq_puts(m, "binder stats:\n");
 
 	print_binder_stats(m, "", &binder_stats);
 
+	mutex_lock(&binder_procs_lock);
 	hlist_for_each_entry(proc, &binder_procs, proc_node)
 		print_binder_proc_stats(m, proc);
-	if (do_lock)
-		binder_unlock(__func__);
+	mutex_unlock(&binder_procs_lock);
+
 	return 0;
 }
 
 static int binder_transactions_show(struct seq_file *m, void *unused)
 {
 	struct binder_proc *proc;
-	int do_lock = !binder_debug_no_lock;
-
-	if (do_lock)
-		binder_lock(__func__);
 
 	seq_puts(m, "binder transactions:\n");
+	mutex_lock(&binder_procs_lock);
 	hlist_for_each_entry(proc, &binder_procs, proc_node)
 		print_binder_proc(m, proc, 0);
-	if (do_lock)
-		binder_unlock(__func__);
+	mutex_unlock(&binder_procs_lock);
+
 	return 0;
 }
 
 static int binder_proc_show(struct seq_file *m, void *unused)
 {
 	struct binder_proc *itr;
-	struct binder_proc *proc = m->private;
-	int do_lock = !binder_debug_no_lock;
-	bool valid_proc = false;
+	int pid = (unsigned long)m->private;
 
-	if (do_lock)
-		binder_lock(__func__);
-
+	mutex_lock(&binder_procs_lock);
 	hlist_for_each_entry(itr, &binder_procs, proc_node) {
-		if (itr == proc) {
-			valid_proc = true;
-			break;
+		if (itr->pid == pid) {
+			seq_puts(m, "binder proc state:\n");
+			print_binder_proc(m, itr, 1);
 		}
 	}
-	if (valid_proc) {
-		seq_puts(m, "binder proc state:\n");
-		print_binder_proc(m, proc, 1);
-	}
-	if (do_lock)
-		binder_unlock(__func__);
+	mutex_unlock(&binder_procs_lock);
+
 	return 0;
 }
 
 static void print_binder_transaction_log_entry(struct seq_file *m,
 					struct binder_transaction_log_entry *e)
 {
+	int debug_id = READ_ONCE(e->debug_id_done);
+	/*
+	 * read barrier to guarantee debug_id_done read before
+	 * we print the log values
+	 */
+	smp_rmb();
 	seq_printf(m,
-		   "%d: %s from %d:%d to %d:%d node %d handle %d size %d:%d\n",
+		   "%d: %s from %d:%d to %d:%d context %s node %d handle %d size %d:%d ret %d/%d l=%d",
 		   e->debug_id, (e->call_type == 2) ? "reply" :
 		   ((e->call_type == 1) ? "async" : "call "), e->from_proc,
-		   e->from_thread, e->to_proc, e->to_thread, e->to_node,
-		   e->target_handle, e->data_size, e->offsets_size);
+		   e->from_thread, e->to_proc, e->to_thread, e->context_name,
+		   e->to_node, e->target_handle, e->data_size, e->offsets_size,
+		   e->return_error, e->return_error_param,
+		   e->return_error_line);
+	/*
+	 * read-barrier to guarantee read of debug_id_done after
+	 * done printing the fields of the entry
+	 */
+	smp_rmb();
+	seq_printf(m, debug_id && debug_id == READ_ONCE(e->debug_id_done) ?
+			"\n" : " (incomplete)\n");
 }
 
 static int binder_transaction_log_show(struct seq_file *m, void *unused)
 {
 	struct binder_transaction_log *log = m->private;
+	unsigned int log_cur = atomic_read(&log->cur);
+	unsigned int count;
+	unsigned int cur;
 	int i;
 
-	if (log->full) {
-		for (i = log->next; i < ARRAY_SIZE(log->entry); i++)
-			print_binder_transaction_log_entry(m, &log->entry[i]);
+	count = log_cur + 1;
+	cur = count < ARRAY_SIZE(log->entry) && !log->full ?
+		0 : count % ARRAY_SIZE(log->entry);
+	if (count > ARRAY_SIZE(log->entry) || log->full)
+		count = ARRAY_SIZE(log->entry);
+	for (i = 0; i < count; i++) {
+		unsigned int index = cur++ % ARRAY_SIZE(log->entry);
+
+		print_binder_transaction_log_entry(m, &log->entry[index]);
 	}
-	for (i = 0; i < log->next; i++)
-		print_binder_transaction_log_entry(m, &log->entry[i]);
 	return 0;
 }
 
@@ -3700,26 +5702,54 @@
 	.release = binder_release,
 };
 
-static struct miscdevice binder_miscdev = {
-	.minor = MISC_DYNAMIC_MINOR,
-	.name = "binder",
-	.fops = &binder_fops
-};
-
 BINDER_DEBUG_ENTRY(state);
 BINDER_DEBUG_ENTRY(stats);
 BINDER_DEBUG_ENTRY(transactions);
 BINDER_DEBUG_ENTRY(transaction_log);
 
+static int __init init_binder_device(const char *name)
+{
+	int ret;
+	struct binder_device *binder_device;
+
+	binder_device = kzalloc(sizeof(*binder_device), GFP_KERNEL);
+	if (!binder_device)
+		return -ENOMEM;
+
+	binder_device->miscdev.fops = &binder_fops;
+	binder_device->miscdev.minor = MISC_DYNAMIC_MINOR;
+	binder_device->miscdev.name = name;
+
+	binder_device->context.binder_context_mgr_uid = INVALID_UID;
+	binder_device->context.name = name;
+	mutex_init(&binder_device->context.context_mgr_node_lock);
+
+	ret = misc_register(&binder_device->miscdev);
+	if (ret < 0) {
+		kfree(binder_device);
+		return ret;
+	}
+
+	hlist_add_head(&binder_device->hlist, &binder_devices);
+
+	return ret;
+}
+
 static int __init binder_init(void)
 {
 	int ret;
+	char *device_name, *device_names;
+	struct binder_device *device;
+	struct hlist_node *tmp;
+
+	atomic_set(&binder_transaction_log.cur, ~0U);
+	atomic_set(&binder_transaction_log_failed.cur, ~0U);
 
 	binder_debugfs_dir_entry_root = debugfs_create_dir("binder", NULL);
 	if (binder_debugfs_dir_entry_root)
 		binder_debugfs_dir_entry_proc = debugfs_create_dir("proc",
 						 binder_debugfs_dir_entry_root);
-	ret = misc_register(&binder_miscdev);
+
 	if (binder_debugfs_dir_entry_root) {
 		debugfs_create_file("state",
 				    S_IRUGO,
@@ -3747,6 +5777,35 @@
 				    &binder_transaction_log_failed,
 				    &binder_transaction_log_fops);
 	}
+
+	/*
+	 * Copy the module_parameter string, because we don't want to
+	 * tokenize it in-place.
+	 */
+	device_names = kzalloc(strlen(binder_devices_param) + 1, GFP_KERNEL);
+	if (!device_names) {
+		ret = -ENOMEM;
+		goto err_alloc_device_names_failed;
+	}
+	strcpy(device_names, binder_devices_param);
+
+	while ((device_name = strsep(&device_names, ","))) {
+		ret = init_binder_device(device_name);
+		if (ret)
+			goto err_init_binder_device_failed;
+	}
+
+	return ret;
+
+err_init_binder_device_failed:
+	hlist_for_each_entry_safe(device, tmp, &binder_devices, hlist) {
+		misc_deregister(&device->miscdev);
+		hlist_del(&device->hlist);
+		kfree(device);
+	}
+err_alloc_device_names_failed:
+	debugfs_remove_recursive(binder_debugfs_dir_entry_root);
+
 	return ret;
 }
 
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
new file mode 100644
index 0000000..50c19a9
--- /dev/null
+++ b/drivers/android/binder_alloc.c
@@ -0,0 +1,854 @@
+/* binder_alloc.c
+ *
+ * Android IPC Subsystem
+ *
+ * Copyright (C) 2007-2017 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <asm/cacheflush.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/rtmutex.h>
+#include <linux/rbtree.h>
+#include <linux/seq_file.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include "binder_alloc.h"
+#include "binder_trace.h"
+
+#define BINDER_MIN_ALLOC (1 * PAGE_SIZE)
+
+static DEFINE_MUTEX(binder_alloc_mmap_lock);
+
+enum {
+	BINDER_DEBUG_OPEN_CLOSE             = 1U << 1,
+	BINDER_DEBUG_BUFFER_ALLOC           = 1U << 2,
+	BINDER_DEBUG_BUFFER_ALLOC_ASYNC     = 1U << 3,
+};
+static uint32_t binder_alloc_debug_mask;
+
+module_param_named(debug_mask, binder_alloc_debug_mask,
+		   uint, 0644);
+
+#define binder_alloc_debug(mask, x...) \
+	do { \
+		if (binder_alloc_debug_mask & mask) \
+			pr_info(x); \
+	} while (0)
+
+static struct binder_buffer *binder_buffer_next(struct binder_buffer *buffer)
+{
+	return list_entry(buffer->entry.next, struct binder_buffer, entry);
+}
+
+static struct binder_buffer *binder_buffer_prev(struct binder_buffer *buffer)
+{
+	return list_entry(buffer->entry.prev, struct binder_buffer, entry);
+}
+
+static size_t binder_alloc_buffer_size(struct binder_alloc *alloc,
+				       struct binder_buffer *buffer)
+{
+	if (list_is_last(&buffer->entry, &alloc->buffers))
+		return (u8 *)alloc->buffer +
+			alloc->buffer_size - (u8 *)buffer->data;
+	return (u8 *)binder_buffer_next(buffer)->data - (u8 *)buffer->data;
+}
+
+static void binder_insert_free_buffer(struct binder_alloc *alloc,
+				      struct binder_buffer *new_buffer)
+{
+	struct rb_node **p = &alloc->free_buffers.rb_node;
+	struct rb_node *parent = NULL;
+	struct binder_buffer *buffer;
+	size_t buffer_size;
+	size_t new_buffer_size;
+
+	BUG_ON(!new_buffer->free);
+
+	new_buffer_size = binder_alloc_buffer_size(alloc, new_buffer);
+
+	binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
+		     "%d: add free buffer, size %zd, at %pK\n",
+		      alloc->pid, new_buffer_size, new_buffer);
+
+	while (*p) {
+		parent = *p;
+		buffer = rb_entry(parent, struct binder_buffer, rb_node);
+		BUG_ON(!buffer->free);
+
+		buffer_size = binder_alloc_buffer_size(alloc, buffer);
+
+		if (new_buffer_size < buffer_size)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
+	}
+	rb_link_node(&new_buffer->rb_node, parent, p);
+	rb_insert_color(&new_buffer->rb_node, &alloc->free_buffers);
+}
+
+static void binder_insert_allocated_buffer_locked(
+		struct binder_alloc *alloc, struct binder_buffer *new_buffer)
+{
+	struct rb_node **p = &alloc->allocated_buffers.rb_node;
+	struct rb_node *parent = NULL;
+	struct binder_buffer *buffer;
+
+	BUG_ON(new_buffer->free);
+
+	while (*p) {
+		parent = *p;
+		buffer = rb_entry(parent, struct binder_buffer, rb_node);
+		BUG_ON(buffer->free);
+
+		if (new_buffer->data < buffer->data)
+			p = &parent->rb_left;
+		else if (new_buffer->data > buffer->data)
+			p = &parent->rb_right;
+		else
+			BUG();
+	}
+	rb_link_node(&new_buffer->rb_node, parent, p);
+	rb_insert_color(&new_buffer->rb_node, &alloc->allocated_buffers);
+}
+
+static struct binder_buffer *binder_alloc_prepare_to_free_locked(
+		struct binder_alloc *alloc,
+		uintptr_t user_ptr)
+{
+	struct rb_node *n = alloc->allocated_buffers.rb_node;
+	struct binder_buffer *buffer;
+	void *kern_ptr;
+
+	kern_ptr = (void *)(user_ptr - alloc->user_buffer_offset);
+
+	while (n) {
+		buffer = rb_entry(n, struct binder_buffer, rb_node);
+		BUG_ON(buffer->free);
+
+		if (kern_ptr < buffer->data)
+			n = n->rb_left;
+		else if (kern_ptr > buffer->data)
+			n = n->rb_right;
+		else {
+			/*
+			 * Guard against user threads attempting to
+			 * free the buffer when in use by kernel or
+			 * after it's already been freed.
+			 */
+			if (!buffer->allow_user_free)
+				return ERR_PTR(-EPERM);
+			buffer->allow_user_free = 0;
+			return buffer;
+		}
+	}
+	return NULL;
+}
+
+/**
+ * binder_alloc_buffer_lookup() - get buffer given user ptr
+ * @alloc:	binder_alloc for this proc
+ * @user_ptr:	User pointer to buffer data
+ *
+ * Validate userspace pointer to buffer data and return buffer corresponding to
+ * that user pointer. Search the rb tree for buffer that matches user data
+ * pointer.
+ *
+ * Return:	Pointer to buffer or NULL
+ */
+struct binder_buffer *binder_alloc_prepare_to_free(struct binder_alloc *alloc,
+						   uintptr_t user_ptr)
+{
+	struct binder_buffer *buffer;
+
+	mutex_lock(&alloc->mutex);
+	buffer = binder_alloc_prepare_to_free_locked(alloc, user_ptr);
+	mutex_unlock(&alloc->mutex);
+	return buffer;
+}
+
+static int __binder_update_page_range(struct binder_alloc *alloc, int allocate,
+				      void *start, void *end,
+				      struct vm_area_struct *vma)
+{
+	void *page_addr;
+	unsigned long user_page_addr;
+	struct page **page;
+	struct mm_struct *mm;
+
+	binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
+		     "%d: %s pages %pK-%pK\n", alloc->pid,
+		     allocate ? "allocate" : "free", start, end);
+
+	if (end <= start)
+		return 0;
+
+	trace_binder_update_page_range(alloc, allocate, start, end);
+
+	if (vma)
+		mm = NULL;
+	else
+		mm = get_task_mm(alloc->tsk);
+
+	if (mm) {
+		down_write(&mm->mmap_sem);
+		vma = alloc->vma;
+		if (vma && mm != alloc->vma_vm_mm) {
+			pr_err("%d: vma mm and task mm mismatch\n",
+				alloc->pid);
+			vma = NULL;
+		}
+	}
+
+	if (allocate == 0)
+		goto free_range;
+
+	if (vma == NULL) {
+		pr_err("%d: binder_alloc_buf failed to map pages in userspace, no vma\n",
+			alloc->pid);
+		goto err_no_vma;
+	}
+
+	for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) {
+		int ret;
+
+		page = &alloc->pages[(page_addr - alloc->buffer) / PAGE_SIZE];
+
+		BUG_ON(*page);
+		*page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
+		if (*page == NULL) {
+			pr_err("%d: binder_alloc_buf failed for page at %pK\n",
+				alloc->pid, page_addr);
+			goto err_alloc_page_failed;
+		}
+		ret = map_kernel_range_noflush((unsigned long)page_addr,
+					PAGE_SIZE, PAGE_KERNEL, page);
+		flush_cache_vmap((unsigned long)page_addr,
+				(unsigned long)page_addr + PAGE_SIZE);
+		if (ret != 1) {
+			pr_err("%d: binder_alloc_buf failed to map page at %pK in kernel\n",
+			       alloc->pid, page_addr);
+			goto err_map_kernel_failed;
+		}
+		user_page_addr =
+			(uintptr_t)page_addr + alloc->user_buffer_offset;
+		ret = vm_insert_page(vma, user_page_addr, page[0]);
+		if (ret) {
+			pr_err("%d: binder_alloc_buf failed to map page at %lx in userspace\n",
+			       alloc->pid, user_page_addr);
+			goto err_vm_insert_page_failed;
+		}
+		/* vm_insert_page does not seem to increment the refcount */
+	}
+	if (mm) {
+		up_write(&mm->mmap_sem);
+		mmput(mm);
+	}
+	return 0;
+
+free_range:
+	for (page_addr = end - PAGE_SIZE; page_addr >= start;
+	     page_addr -= PAGE_SIZE) {
+		page = &alloc->pages[(page_addr - alloc->buffer) / PAGE_SIZE];
+		if (vma)
+			zap_page_range(vma, (uintptr_t)page_addr +
+				alloc->user_buffer_offset, PAGE_SIZE, NULL);
+err_vm_insert_page_failed:
+		unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE);
+err_map_kernel_failed:
+		__free_page(*page);
+		*page = NULL;
+err_alloc_page_failed:
+		;
+	}
+err_no_vma:
+	if (mm) {
+		up_write(&mm->mmap_sem);
+		mmput(mm);
+	}
+	return vma ? -ENOMEM : -ESRCH;
+}
+
+static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
+				    void *start, void *end,
+				    struct vm_area_struct *vma)
+{
+	/*
+	 * For regular updates, move up start if needed since MIN_ALLOC pages
+	 * are always mapped
+	 */
+	if (start - alloc->buffer < BINDER_MIN_ALLOC)
+		start = alloc->buffer + BINDER_MIN_ALLOC;
+
+	return __binder_update_page_range(alloc, allocate, start, end, vma);
+}
+
+struct binder_buffer *binder_alloc_new_buf_locked(struct binder_alloc *alloc,
+						  size_t data_size,
+						  size_t offsets_size,
+						  size_t extra_buffers_size,
+						  int is_async)
+{
+	struct rb_node *n = alloc->free_buffers.rb_node;
+	struct binder_buffer *buffer;
+	size_t buffer_size;
+	struct rb_node *best_fit = NULL;
+	void *has_page_addr;
+	void *end_page_addr;
+	size_t size, data_offsets_size;
+	int ret;
+
+	if (alloc->vma == NULL) {
+		pr_err("%d: binder_alloc_buf, no vma\n",
+		       alloc->pid);
+		return ERR_PTR(-ESRCH);
+	}
+
+	data_offsets_size = ALIGN(data_size, sizeof(void *)) +
+		ALIGN(offsets_size, sizeof(void *));
+
+	if (data_offsets_size < data_size || data_offsets_size < offsets_size) {
+		binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
+				"%d: got transaction with invalid size %zd-%zd\n",
+				alloc->pid, data_size, offsets_size);
+		return ERR_PTR(-EINVAL);
+	}
+	size = data_offsets_size + ALIGN(extra_buffers_size, sizeof(void *));
+	if (size < data_offsets_size || size < extra_buffers_size) {
+		binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
+				"%d: got transaction with invalid extra_buffers_size %zd\n",
+				alloc->pid, extra_buffers_size);
+		return ERR_PTR(-EINVAL);
+	}
+	if (is_async &&
+	    alloc->free_async_space < size + sizeof(struct binder_buffer)) {
+		binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
+			     "%d: binder_alloc_buf size %zd failed, no async space left\n",
+			      alloc->pid, size);
+		return ERR_PTR(-ENOSPC);
+	}
+
+	/* Pad 0-size buffers so they get assigned unique addresses */
+	size = max(size, sizeof(void *));
+
+	while (n) {
+		buffer = rb_entry(n, struct binder_buffer, rb_node);
+		BUG_ON(!buffer->free);
+		buffer_size = binder_alloc_buffer_size(alloc, buffer);
+
+		if (size < buffer_size) {
+			best_fit = n;
+			n = n->rb_left;
+		} else if (size > buffer_size)
+			n = n->rb_right;
+		else {
+			best_fit = n;
+			break;
+		}
+	}
+	if (best_fit == NULL) {
+		size_t allocated_buffers = 0;
+		size_t largest_alloc_size = 0;
+		size_t total_alloc_size = 0;
+		size_t free_buffers = 0;
+		size_t largest_free_size = 0;
+		size_t total_free_size = 0;
+
+		for (n = rb_first(&alloc->allocated_buffers); n != NULL;
+		     n = rb_next(n)) {
+			buffer = rb_entry(n, struct binder_buffer, rb_node);
+			buffer_size = binder_alloc_buffer_size(alloc, buffer);
+			allocated_buffers++;
+			total_alloc_size += buffer_size;
+			if (buffer_size > largest_alloc_size)
+				largest_alloc_size = buffer_size;
+		}
+		for (n = rb_first(&alloc->free_buffers); n != NULL;
+		     n = rb_next(n)) {
+			buffer = rb_entry(n, struct binder_buffer, rb_node);
+			buffer_size = binder_alloc_buffer_size(alloc, buffer);
+			free_buffers++;
+			total_free_size += buffer_size;
+			if (buffer_size > largest_free_size)
+				largest_free_size = buffer_size;
+		}
+		pr_err("%d: binder_alloc_buf size %zd failed, no address space\n",
+			alloc->pid, size);
+		pr_err("allocated: %zd (num: %zd largest: %zd), free: %zd (num: %zd largest: %zd)\n",
+		       total_alloc_size, allocated_buffers, largest_alloc_size,
+		       total_free_size, free_buffers, largest_free_size);
+		return ERR_PTR(-ENOSPC);
+	}
+	if (n == NULL) {
+		buffer = rb_entry(best_fit, struct binder_buffer, rb_node);
+		buffer_size = binder_alloc_buffer_size(alloc, buffer);
+	}
+
+	binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
+		     "%d: binder_alloc_buf size %zd got buffer %pK size %zd\n",
+		      alloc->pid, size, buffer, buffer_size);
+
+	has_page_addr =
+		(void *)(((uintptr_t)buffer->data + buffer_size) & PAGE_MASK);
+	WARN_ON(n && buffer_size != size);
+	end_page_addr =
+		(void *)PAGE_ALIGN((uintptr_t)buffer->data + size);
+	if (end_page_addr > has_page_addr)
+		end_page_addr = has_page_addr;
+	ret = binder_update_page_range(alloc, 1,
+	    (void *)PAGE_ALIGN((uintptr_t)buffer->data), end_page_addr, NULL);
+	if (ret)
+		return ERR_PTR(ret);
+
+	if (buffer_size != size) {
+		struct binder_buffer *new_buffer;
+
+		new_buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
+		if (!new_buffer) {
+			pr_err("%s: %d failed to alloc new buffer struct\n",
+			       __func__, alloc->pid);
+			goto err_alloc_buf_struct_failed;
+		}
+		new_buffer->data = (u8 *)buffer->data + size;
+		list_add(&new_buffer->entry, &buffer->entry);
+		new_buffer->free = 1;
+		binder_insert_free_buffer(alloc, new_buffer);
+	}
+
+	rb_erase(best_fit, &alloc->free_buffers);
+	buffer->free = 0;
+	buffer->allow_user_free = 0;
+	binder_insert_allocated_buffer_locked(alloc, buffer);
+	binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
+		     "%d: binder_alloc_buf size %zd got %pK\n",
+		      alloc->pid, size, buffer);
+	buffer->data_size = data_size;
+	buffer->offsets_size = offsets_size;
+	buffer->async_transaction = is_async;
+	buffer->extra_buffers_size = extra_buffers_size;
+	if (is_async) {
+		alloc->free_async_space -= size + sizeof(struct binder_buffer);
+		binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC,
+			     "%d: binder_alloc_buf size %zd async free %zd\n",
+			      alloc->pid, size, alloc->free_async_space);
+	}
+	return buffer;
+
+err_alloc_buf_struct_failed:
+	binder_update_page_range(alloc, 0,
+				 (void *)PAGE_ALIGN((uintptr_t)buffer->data),
+				 end_page_addr, NULL);
+	return ERR_PTR(-ENOMEM);
+}
+
+/**
+ * binder_alloc_new_buf() - Allocate a new binder buffer
+ * @alloc:              binder_alloc for this proc
+ * @data_size:          size of user data buffer
+ * @offsets_size:       user specified buffer offset
+ * @extra_buffers_size: size of extra space for meta-data (eg, security context)
+ * @is_async:           buffer for async transaction
+ *
+ * Allocate a new buffer given the requested sizes. Returns
+ * the kernel version of the buffer pointer. The size allocated
+ * is the sum of the three given sizes (each rounded up to
+ * pointer-sized boundary)
+ *
+ * Return:	The allocated buffer or %NULL if error
+ */
+struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc,
+					   size_t data_size,
+					   size_t offsets_size,
+					   size_t extra_buffers_size,
+					   int is_async)
+{
+	struct binder_buffer *buffer;
+
+	mutex_lock(&alloc->mutex);
+	buffer = binder_alloc_new_buf_locked(alloc, data_size, offsets_size,
+					     extra_buffers_size, is_async);
+	mutex_unlock(&alloc->mutex);
+	return buffer;
+}
+
+static void *buffer_start_page(struct binder_buffer *buffer)
+{
+	return (void *)((uintptr_t)buffer->data & PAGE_MASK);
+}
+
+static void *prev_buffer_end_page(struct binder_buffer *buffer)
+{
+	return (void *)(((uintptr_t)(buffer->data) - 1) & PAGE_MASK);
+}
+
+static void binder_delete_free_buffer(struct binder_alloc *alloc,
+				      struct binder_buffer *buffer)
+{
+	struct binder_buffer *prev, *next = NULL;
+	bool to_free = true;
+	BUG_ON(alloc->buffers.next == &buffer->entry);
+	prev = binder_buffer_prev(buffer);
+	BUG_ON(!prev->free);
+	if (prev_buffer_end_page(prev) == buffer_start_page(buffer)) {
+		to_free = false;
+		binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
+				   "%d: merge free, buffer %pK share page with %pK\n",
+				   alloc->pid, buffer->data, prev->data);
+	}
+
+	if (!list_is_last(&buffer->entry, &alloc->buffers)) {
+		next = binder_buffer_next(buffer);
+		if (buffer_start_page(next) == buffer_start_page(buffer)) {
+			to_free = false;
+			binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
+					   "%d: merge free, buffer %pK share page with %pK\n",
+					   alloc->pid,
+					   buffer->data,
+					   next->data);
+		}
+	}
+
+	if (PAGE_ALIGNED(buffer->data)) {
+		binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
+				   "%d: merge free, buffer start %pK is page aligned\n",
+				   alloc->pid, buffer->data);
+		to_free = false;
+	}
+
+	if (to_free) {
+		binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
+				   "%d: merge free, buffer %pK do not share page with %pK or %pK\n",
+				   alloc->pid, buffer->data,
+				   prev->data, next ? next->data : NULL);
+		binder_update_page_range(alloc, 0, buffer_start_page(buffer),
+					 buffer_start_page(buffer) + PAGE_SIZE,
+					 NULL);
+	}
+	list_del(&buffer->entry);
+	kfree(buffer);
+}
+
+static void binder_free_buf_locked(struct binder_alloc *alloc,
+				   struct binder_buffer *buffer)
+{
+	size_t size, buffer_size;
+
+	buffer_size = binder_alloc_buffer_size(alloc, buffer);
+
+	size = ALIGN(buffer->data_size, sizeof(void *)) +
+		ALIGN(buffer->offsets_size, sizeof(void *)) +
+		ALIGN(buffer->extra_buffers_size, sizeof(void *));
+
+	binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
+		     "%d: binder_free_buf %pK size %zd buffer_size %zd\n",
+		      alloc->pid, buffer, size, buffer_size);
+
+	BUG_ON(buffer->free);
+	BUG_ON(size > buffer_size);
+	BUG_ON(buffer->transaction != NULL);
+	BUG_ON(buffer->data < alloc->buffer);
+	BUG_ON(buffer->data > alloc->buffer + alloc->buffer_size);
+
+	if (buffer->async_transaction) {
+		alloc->free_async_space += size + sizeof(struct binder_buffer);
+
+		binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC,
+			     "%d: binder_free_buf size %zd async free %zd\n",
+			      alloc->pid, size, alloc->free_async_space);
+	}
+
+	binder_update_page_range(alloc, 0,
+		(void *)PAGE_ALIGN((uintptr_t)buffer->data),
+		(void *)(((uintptr_t)buffer->data + buffer_size) & PAGE_MASK),
+		NULL);
+
+	rb_erase(&buffer->rb_node, &alloc->allocated_buffers);
+	buffer->free = 1;
+	if (!list_is_last(&buffer->entry, &alloc->buffers)) {
+		struct binder_buffer *next = binder_buffer_next(buffer);
+
+		if (next->free) {
+			rb_erase(&next->rb_node, &alloc->free_buffers);
+			binder_delete_free_buffer(alloc, next);
+		}
+	}
+	if (alloc->buffers.next != &buffer->entry) {
+		struct binder_buffer *prev = binder_buffer_prev(buffer);
+
+		if (prev->free) {
+			binder_delete_free_buffer(alloc, buffer);
+			rb_erase(&prev->rb_node, &alloc->free_buffers);
+			buffer = prev;
+		}
+	}
+	binder_insert_free_buffer(alloc, buffer);
+}
+
+/**
+ * binder_alloc_free_buf() - free a binder buffer
+ * @alloc:	binder_alloc for this proc
+ * @buffer:	kernel pointer to buffer
+ *
+ * Free the buffer allocated via binder_alloc_new_buffer()
+ */
+void binder_alloc_free_buf(struct binder_alloc *alloc,
+			    struct binder_buffer *buffer)
+{
+	mutex_lock(&alloc->mutex);
+	binder_free_buf_locked(alloc, buffer);
+	mutex_unlock(&alloc->mutex);
+}
+
+/**
+ * binder_alloc_mmap_handler() - map virtual address space for proc
+ * @alloc:	alloc structure for this proc
+ * @vma:	vma passed to mmap()
+ *
+ * Called by binder_mmap() to initialize the space specified in
+ * vma for allocating binder buffers
+ *
+ * Return:
+ *      0 = success
+ *      -EBUSY = address space already mapped
+ *      -ENOMEM = failed to map memory to given address space
+ */
+int binder_alloc_mmap_handler(struct binder_alloc *alloc,
+			      struct vm_area_struct *vma)
+{
+	int ret;
+	struct vm_struct *area;
+	const char *failure_string;
+	struct binder_buffer *buffer;
+
+	mutex_lock(&binder_alloc_mmap_lock);
+	if (alloc->buffer) {
+		ret = -EBUSY;
+		failure_string = "already mapped";
+		goto err_already_mapped;
+	}
+
+	area = get_vm_area(vma->vm_end - vma->vm_start, VM_IOREMAP);
+	if (area == NULL) {
+		ret = -ENOMEM;
+		failure_string = "get_vm_area";
+		goto err_get_vm_area_failed;
+	}
+	alloc->buffer = area->addr;
+	alloc->user_buffer_offset =
+		vma->vm_start - (uintptr_t)alloc->buffer;
+	mutex_unlock(&binder_alloc_mmap_lock);
+
+#ifdef CONFIG_CPU_CACHE_VIPT
+	if (cache_is_vipt_aliasing()) {
+		while (CACHE_COLOUR(
+				(vma->vm_start ^ (uint32_t)alloc->buffer))) {
+			pr_info("%s: %d %lx-%lx maps %pK bad alignment\n",
+				__func__, alloc->pid, vma->vm_start,
+				vma->vm_end, alloc->buffer);
+			vma->vm_start += PAGE_SIZE;
+		}
+	}
+#endif
+	alloc->pages = kzalloc(sizeof(alloc->pages[0]) *
+				   ((vma->vm_end - vma->vm_start) / PAGE_SIZE),
+			       GFP_KERNEL);
+	if (alloc->pages == NULL) {
+		ret = -ENOMEM;
+		failure_string = "alloc page array";
+		goto err_alloc_pages_failed;
+	}
+	alloc->buffer_size = vma->vm_end - vma->vm_start;
+
+	buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
+	if (!buffer) {
+		ret = -ENOMEM;
+		failure_string = "alloc buffer struct";
+		goto err_alloc_buf_struct_failed;
+	}
+
+	if (__binder_update_page_range(alloc, 1, alloc->buffer,
+				       alloc->buffer + BINDER_MIN_ALLOC, vma)) {
+		ret = -ENOMEM;
+		failure_string = "alloc small buf";
+		goto err_alloc_small_buf_failed;
+	}
+	buffer->data = alloc->buffer;
+	list_add(&buffer->entry, &alloc->buffers);
+	buffer->free = 1;
+	binder_insert_free_buffer(alloc, buffer);
+	alloc->free_async_space = alloc->buffer_size / 2;
+	barrier();
+	alloc->vma = vma;
+	alloc->vma_vm_mm = vma->vm_mm;
+
+	return 0;
+
+err_alloc_small_buf_failed:
+	kfree(buffer);
+err_alloc_buf_struct_failed:
+	kfree(alloc->pages);
+	alloc->pages = NULL;
+err_alloc_pages_failed:
+	mutex_lock(&binder_alloc_mmap_lock);
+	vfree(alloc->buffer);
+	alloc->buffer = NULL;
+err_get_vm_area_failed:
+err_already_mapped:
+	mutex_unlock(&binder_alloc_mmap_lock);
+	pr_err("%s: %d %lx-%lx %s failed %d\n", __func__,
+	       alloc->pid, vma->vm_start, vma->vm_end, failure_string, ret);
+	return ret;
+}
+
+
+void binder_alloc_deferred_release(struct binder_alloc *alloc)
+{
+	struct rb_node *n;
+	int buffers, page_count;
+	struct binder_buffer *buffer;
+
+	BUG_ON(alloc->vma);
+
+	buffers = 0;
+	mutex_lock(&alloc->mutex);
+	while ((n = rb_first(&alloc->allocated_buffers))) {
+		buffer = rb_entry(n, struct binder_buffer, rb_node);
+
+		/* Transaction should already have been freed */
+		BUG_ON(buffer->transaction);
+
+		binder_free_buf_locked(alloc, buffer);
+		buffers++;
+	}
+
+	while (!list_empty(&alloc->buffers)) {
+		buffer = list_first_entry(&alloc->buffers,
+					  struct binder_buffer, entry);
+		WARN_ON(!buffer->free);
+
+		list_del(&buffer->entry);
+		WARN_ON_ONCE(!list_empty(&alloc->buffers));
+		kfree(buffer);
+	}
+
+	page_count = 0;
+	if (alloc->pages) {
+		int i;
+
+		for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) {
+			void *page_addr;
+
+			if (!alloc->pages[i])
+				continue;
+
+			page_addr = alloc->buffer + i * PAGE_SIZE;
+			binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
+				     "%s: %d: page %d at %pK not freed\n",
+				     __func__, alloc->pid, i, page_addr);
+			unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE);
+			__free_page(alloc->pages[i]);
+			page_count++;
+		}
+		kfree(alloc->pages);
+		vfree(alloc->buffer);
+	}
+	mutex_unlock(&alloc->mutex);
+
+	binder_alloc_debug(BINDER_DEBUG_OPEN_CLOSE,
+		     "%s: %d buffers %d, pages %d\n",
+		     __func__, alloc->pid, buffers, page_count);
+}
+
+static void print_binder_buffer(struct seq_file *m, const char *prefix,
+				struct binder_buffer *buffer)
+{
+	seq_printf(m, "%s %d: %pK size %zd:%zd:%zd %s\n",
+		   prefix, buffer->debug_id, buffer->data,
+		   buffer->data_size, buffer->offsets_size,
+		   buffer->extra_buffers_size,
+		   buffer->transaction ? "active" : "delivered");
+}
+
+/**
+ * binder_alloc_print_allocated() - print buffer info
+ * @m:     seq_file for output via seq_printf()
+ * @alloc: binder_alloc for this proc
+ *
+ * Prints information about every buffer associated with
+ * the binder_alloc state to the given seq_file
+ */
+void binder_alloc_print_allocated(struct seq_file *m,
+				  struct binder_alloc *alloc)
+{
+	struct rb_node *n;
+
+	mutex_lock(&alloc->mutex);
+	for (n = rb_first(&alloc->allocated_buffers); n != NULL; n = rb_next(n))
+		print_binder_buffer(m, "  buffer",
+				    rb_entry(n, struct binder_buffer, rb_node));
+	mutex_unlock(&alloc->mutex);
+}
+
+/**
+ * binder_alloc_get_allocated_count() - return count of buffers
+ * @alloc: binder_alloc for this proc
+ *
+ * Return: count of allocated buffers
+ */
+int binder_alloc_get_allocated_count(struct binder_alloc *alloc)
+{
+	struct rb_node *n;
+	int count = 0;
+
+	mutex_lock(&alloc->mutex);
+	for (n = rb_first(&alloc->allocated_buffers); n != NULL; n = rb_next(n))
+		count++;
+	mutex_unlock(&alloc->mutex);
+	return count;
+}
+
+
+/**
+ * binder_alloc_vma_close() - invalidate address space
+ * @alloc: binder_alloc for this proc
+ *
+ * Called from binder_vma_close() when releasing address space.
+ * Clears alloc->vma to prevent new incoming transactions from
+ * allocating more buffers.
+ */
+void binder_alloc_vma_close(struct binder_alloc *alloc)
+{
+	WRITE_ONCE(alloc->vma, NULL);
+	WRITE_ONCE(alloc->vma_vm_mm, NULL);
+}
+
+/**
+ * binder_alloc_init() - called by binder_open() for per-proc initialization
+ * @alloc: binder_alloc for this proc
+ *
+ * Called from binder_open() to initialize binder_alloc fields for
+ * new binder proc
+ */
+void binder_alloc_init(struct binder_alloc *alloc)
+{
+	alloc->tsk = current->group_leader;
+	alloc->pid = current->group_leader->pid;
+	mutex_init(&alloc->mutex);
+	INIT_LIST_HEAD(&alloc->buffers);
+}
+
diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h
new file mode 100644
index 0000000..395d9b5
--- /dev/null
+++ b/drivers/android/binder_alloc.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright (C) 2017 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_BINDER_ALLOC_H
+#define _LINUX_BINDER_ALLOC_H
+
+#include <linux/rbtree.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/rtmutex.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+
+struct binder_transaction;
+
+/**
+ * struct binder_buffer - buffer used for binder transactions
+ * @entry:              entry alloc->buffers
+ * @rb_node:            node for allocated_buffers/free_buffers rb trees
+ * @free:               true if buffer is free
+ * @allow_user_free:    describe the second member of struct blah,
+ * @async_transaction:  describe the second member of struct blah,
+ * @debug_id:           describe the second member of struct blah,
+ * @transaction:        describe the second member of struct blah,
+ * @target_node:        describe the second member of struct blah,
+ * @data_size:          describe the second member of struct blah,
+ * @offsets_size:       describe the second member of struct blah,
+ * @extra_buffers_size: describe the second member of struct blah,
+ * @data:i              describe the second member of struct blah,
+ *
+ * Bookkeeping structure for binder transaction buffers
+ */
+struct binder_buffer {
+	struct list_head entry; /* free and allocated entries by address */
+	struct rb_node rb_node; /* free entry by size or allocated entry */
+				/* by address */
+	unsigned free:1;
+	unsigned allow_user_free:1;
+	unsigned async_transaction:1;
+	unsigned debug_id:29;
+
+	struct binder_transaction *transaction;
+
+	struct binder_node *target_node;
+	size_t data_size;
+	size_t offsets_size;
+	size_t extra_buffers_size;
+	void *data;
+};
+
+/**
+ * struct binder_alloc - per-binder proc state for binder allocator
+ * @vma:                vm_area_struct passed to mmap_handler
+ *                      (invarient after mmap)
+ * @tsk:                tid for task that called init for this proc
+ *                      (invariant after init)
+ * @vma_vm_mm:          copy of vma->vm_mm (invarient after mmap)
+ * @buffer:             base of per-proc address space mapped via mmap
+ * @user_buffer_offset: offset between user and kernel VAs for buffer
+ * @buffers:            list of all buffers for this proc
+ * @free_buffers:       rb tree of buffers available for allocation
+ *                      sorted by size
+ * @allocated_buffers:  rb tree of allocated buffers sorted by address
+ * @free_async_space:   VA space available for async buffers. This is
+ *                      initialized at mmap time to 1/2 the full VA space
+ * @pages:              array of physical page addresses for each
+ *                      page of mmap'd space
+ * @buffer_size:        size of address space specified via mmap
+ * @pid:                pid for associated binder_proc (invariant after init)
+ *
+ * Bookkeeping structure for per-proc address space management for binder
+ * buffers. It is normally initialized during binder_init() and binder_mmap()
+ * calls. The address space is used for both user-visible buffers and for
+ * struct binder_buffer objects used to track the user buffers
+ */
+struct binder_alloc {
+	struct mutex mutex;
+	struct task_struct *tsk;
+	struct vm_area_struct *vma;
+	struct mm_struct *vma_vm_mm;
+	void *buffer;
+	ptrdiff_t user_buffer_offset;
+	struct list_head buffers;
+	struct rb_root free_buffers;
+	struct rb_root allocated_buffers;
+	size_t free_async_space;
+	struct page **pages;
+	size_t buffer_size;
+	uint32_t buffer_free;
+	int pid;
+};
+
+#ifdef CONFIG_ANDROID_BINDER_IPC_SELFTEST
+void binder_selftest_alloc(struct binder_alloc *alloc);
+#else
+static inline void binder_selftest_alloc(struct binder_alloc *alloc) {}
+#endif
+extern struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc,
+						  size_t data_size,
+						  size_t offsets_size,
+						  size_t extra_buffers_size,
+						  int is_async);
+extern void binder_alloc_init(struct binder_alloc *alloc);
+extern void binder_alloc_vma_close(struct binder_alloc *alloc);
+extern struct binder_buffer *
+binder_alloc_prepare_to_free(struct binder_alloc *alloc,
+			     uintptr_t user_ptr);
+extern void binder_alloc_free_buf(struct binder_alloc *alloc,
+				  struct binder_buffer *buffer);
+extern int binder_alloc_mmap_handler(struct binder_alloc *alloc,
+				     struct vm_area_struct *vma);
+extern void binder_alloc_deferred_release(struct binder_alloc *alloc);
+extern int binder_alloc_get_allocated_count(struct binder_alloc *alloc);
+extern void binder_alloc_print_allocated(struct seq_file *m,
+					 struct binder_alloc *alloc);
+
+/**
+ * binder_alloc_get_free_async_space() - get free space available for async
+ * @alloc:	binder_alloc for this proc
+ *
+ * Return:	the bytes remaining in the address-space for async transactions
+ */
+static inline size_t
+binder_alloc_get_free_async_space(struct binder_alloc *alloc)
+{
+	size_t free_async_space;
+
+	mutex_lock(&alloc->mutex);
+	free_async_space = alloc->free_async_space;
+	mutex_unlock(&alloc->mutex);
+	return free_async_space;
+}
+
+/**
+ * binder_alloc_get_user_buffer_offset() - get offset between kernel/user addrs
+ * @alloc:	binder_alloc for this proc
+ *
+ * Return:	the offset between kernel and user-space addresses to use for
+ * virtual address conversion
+ */
+static inline ptrdiff_t
+binder_alloc_get_user_buffer_offset(struct binder_alloc *alloc)
+{
+	/*
+	 * user_buffer_offset is constant if vma is set and
+	 * undefined if vma is not set. It is possible to
+	 * get here with !alloc->vma if the target process
+	 * is dying while a transaction is being initiated.
+	 * Returning the old value is ok in this case and
+	 * the transaction will fail.
+	 */
+	return alloc->user_buffer_offset;
+}
+
+#endif /* _LINUX_BINDER_ALLOC_H */
+
diff --git a/drivers/android/binder_alloc_selftest.c b/drivers/android/binder_alloc_selftest.c
new file mode 100644
index 0000000..0bf72079
--- /dev/null
+++ b/drivers/android/binder_alloc_selftest.c
@@ -0,0 +1,270 @@
+/* binder_alloc_selftest.c
+ *
+ * Android IPC Subsystem
+ *
+ * Copyright (C) 2017 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/mm_types.h>
+#include <linux/err.h>
+#include "binder_alloc.h"
+
+#define BUFFER_NUM 5
+#define BUFFER_MIN_SIZE (PAGE_SIZE / 8)
+
+static bool binder_selftest_run = true;
+static int binder_selftest_failures;
+static DEFINE_MUTEX(binder_selftest_lock);
+
+/**
+ * enum buf_end_align_type - Page alignment of a buffer
+ * end with regard to the end of the previous buffer.
+ *
+ * In the pictures below, buf2 refers to the buffer we
+ * are aligning. buf1 refers to previous buffer by addr.
+ * Symbol [ means the start of a buffer, ] means the end
+ * of a buffer, and | means page boundaries.
+ */
+enum buf_end_align_type {
+	/**
+	 * @SAME_PAGE_UNALIGNED: The end of this buffer is on
+	 * the same page as the end of the previous buffer and
+	 * is not page aligned. Examples:
+	 * buf1 ][ buf2 ][ ...
+	 * buf1 ]|[ buf2 ][ ...
+	 */
+	SAME_PAGE_UNALIGNED = 0,
+	/**
+	 * @SAME_PAGE_ALIGNED: When the end of the previous buffer
+	 * is not page aligned, the end of this buffer is on the
+	 * same page as the end of the previous buffer and is page
+	 * aligned. When the previous buffer is page aligned, the
+	 * end of this buffer is aligned to the next page boundary.
+	 * Examples:
+	 * buf1 ][ buf2 ]| ...
+	 * buf1 ]|[ buf2 ]| ...
+	 */
+	SAME_PAGE_ALIGNED,
+	/**
+	 * @NEXT_PAGE_UNALIGNED: The end of this buffer is on
+	 * the page next to the end of the previous buffer and
+	 * is not page aligned. Examples:
+	 * buf1 ][ buf2 | buf2 ][ ...
+	 * buf1 ]|[ buf2 | buf2 ][ ...
+	 */
+	NEXT_PAGE_UNALIGNED,
+	/**
+	 * @NEXT_PAGE_ALIGNED: The end of this buffer is on
+	 * the page next to the end of the previous buffer and
+	 * is page aligned. Examples:
+	 * buf1 ][ buf2 | buf2 ]| ...
+	 * buf1 ]|[ buf2 | buf2 ]| ...
+	 */
+	NEXT_PAGE_ALIGNED,
+	/**
+	 * @NEXT_NEXT_UNALIGNED: The end of this buffer is on
+	 * the page that follows the page after the end of the
+	 * previous buffer and is not page aligned. Examples:
+	 * buf1 ][ buf2 | buf2 | buf2 ][ ...
+	 * buf1 ]|[ buf2 | buf2 | buf2 ][ ...
+	 */
+	NEXT_NEXT_UNALIGNED,
+	LOOP_END,
+};
+
+static void pr_err_size_seq(size_t *sizes, int *seq)
+{
+	int i;
+
+	pr_err("alloc sizes: ");
+	for (i = 0; i < BUFFER_NUM; i++)
+		pr_cont("[%zu]", sizes[i]);
+	pr_cont("\n");
+	pr_err("free seq: ");
+	for (i = 0; i < BUFFER_NUM; i++)
+		pr_cont("[%d]", seq[i]);
+	pr_cont("\n");
+}
+
+static bool check_buffer_pages_allocated(struct binder_alloc *alloc,
+					 struct binder_buffer *buffer,
+					 size_t size)
+{
+	void *page_addr, *end;
+	int page_index;
+
+	end = (void *)PAGE_ALIGN((uintptr_t)buffer->data + size);
+	page_addr = buffer->data;
+	for (; page_addr < end; page_addr += PAGE_SIZE) {
+		page_index = (page_addr - alloc->buffer) / PAGE_SIZE;
+		if (!alloc->pages[page_index]) {
+			pr_err("incorrect alloc state at page index %d\n",
+			       page_index);
+			return false;
+		}
+	}
+	return true;
+}
+
+static void binder_selftest_alloc_buf(struct binder_alloc *alloc,
+				      struct binder_buffer *buffers[],
+				      size_t *sizes, int *seq)
+{
+	int i;
+
+	for (i = 0; i < BUFFER_NUM; i++) {
+		buffers[i] = binder_alloc_new_buf(alloc, sizes[i], 0, 0, 0);
+		if (IS_ERR(buffers[i]) ||
+		    !check_buffer_pages_allocated(alloc, buffers[i],
+						  sizes[i])) {
+			pr_err_size_seq(sizes, seq);
+			binder_selftest_failures++;
+		}
+	}
+}
+
+static void binder_selftest_free_buf(struct binder_alloc *alloc,
+				     struct binder_buffer *buffers[],
+				     size_t *sizes, int *seq)
+{
+	int i;
+
+	for (i = 0; i < BUFFER_NUM; i++)
+		binder_alloc_free_buf(alloc, buffers[seq[i]]);
+
+	for (i = 0; i < (alloc->buffer_size / PAGE_SIZE); i++) {
+		if ((!alloc->pages[i]) == (i == 0)) {
+			pr_err("incorrect free state at page index %d\n", i);
+			binder_selftest_failures++;
+		}
+	}
+}
+
+static void binder_selftest_alloc_free(struct binder_alloc *alloc,
+				       size_t *sizes, int *seq)
+{
+	struct binder_buffer *buffers[BUFFER_NUM];
+
+	binder_selftest_alloc_buf(alloc, buffers, sizes, seq);
+	binder_selftest_free_buf(alloc, buffers, sizes, seq);
+}
+
+static bool is_dup(int *seq, int index, int val)
+{
+	int i;
+
+	for (i = 0; i < index; i++) {
+		if (seq[i] == val)
+			return true;
+	}
+	return false;
+}
+
+/* Generate BUFFER_NUM factorial free orders. */
+static void binder_selftest_free_seq(struct binder_alloc *alloc,
+				     size_t *sizes, int *seq, int index)
+{
+	int i;
+
+	if (index == BUFFER_NUM) {
+		binder_selftest_alloc_free(alloc, sizes, seq);
+		return;
+	}
+	for (i = 0; i < BUFFER_NUM; i++) {
+		if (is_dup(seq, index, i))
+			continue;
+		seq[index] = i;
+		binder_selftest_free_seq(alloc, sizes, seq, index + 1);
+	}
+}
+
+static void binder_selftest_alloc_size(struct binder_alloc *alloc,
+				       size_t *end_offset)
+{
+	int i;
+	int seq[BUFFER_NUM] = {0};
+	size_t front_sizes[BUFFER_NUM];
+	size_t back_sizes[BUFFER_NUM];
+	size_t last_offset, offset = 0;
+
+	for (i = 0; i < BUFFER_NUM; i++) {
+		last_offset = offset;
+		offset = end_offset[i];
+		front_sizes[i] = offset - last_offset;
+		back_sizes[BUFFER_NUM - i - 1] = front_sizes[i];
+	}
+	/*
+	 * Buffers share the first or last few pages.
+	 * Only BUFFER_NUM - 1 buffer sizes are adjustable since
+	 * we need one giant buffer before getting to the last page.
+	 */
+	back_sizes[0] += alloc->buffer_size - end_offset[BUFFER_NUM - 1];
+	binder_selftest_free_seq(alloc, front_sizes, seq, 0);
+	binder_selftest_free_seq(alloc, back_sizes, seq, 0);
+}
+
+static void binder_selftest_alloc_offset(struct binder_alloc *alloc,
+					 size_t *end_offset, int index)
+{
+	int align;
+	size_t end, prev;
+
+	if (index == BUFFER_NUM) {
+		binder_selftest_alloc_size(alloc, end_offset);
+		return;
+	}
+	prev = index == 0 ? 0 : end_offset[index - 1];
+	end = prev;
+
+	BUILD_BUG_ON(BUFFER_MIN_SIZE * BUFFER_NUM >= PAGE_SIZE);
+
+	for (align = SAME_PAGE_UNALIGNED; align < LOOP_END; align++) {
+		if (align % 2)
+			end = ALIGN(end, PAGE_SIZE);
+		else
+			end += BUFFER_MIN_SIZE;
+		end_offset[index] = end;
+		binder_selftest_alloc_offset(alloc, end_offset, index + 1);
+	}
+}
+
+/**
+ * binder_selftest_alloc() - Test alloc and free of buffer pages.
+ * @alloc: Pointer to alloc struct.
+ *
+ * Allocate BUFFER_NUM buffers to cover all page alignment cases,
+ * then free them in all orders possible. Check that pages are
+ * allocated after buffer alloc and freed after freeing buffer.
+ */
+void binder_selftest_alloc(struct binder_alloc *alloc)
+{
+	size_t end_offset[BUFFER_NUM];
+
+	if (!binder_selftest_run)
+		return;
+	mutex_lock(&binder_selftest_lock);
+	if (!binder_selftest_run || !alloc->vma)
+		goto done;
+	pr_info("STARTED\n");
+	binder_selftest_alloc_offset(alloc, end_offset, 0);
+	binder_selftest_run = false;
+	if (binder_selftest_failures > 0)
+		pr_info("%d tests FAILED\n", binder_selftest_failures);
+	else
+		pr_info("PASSED\n");
+
+done:
+	mutex_unlock(&binder_selftest_lock);
+}
diff --git a/drivers/android/binder_trace.h b/drivers/android/binder_trace.h
index 7f20f3d..7967db1 100644
--- a/drivers/android/binder_trace.h
+++ b/drivers/android/binder_trace.h
@@ -23,7 +23,8 @@
 struct binder_buffer;
 struct binder_node;
 struct binder_proc;
-struct binder_ref;
+struct binder_alloc;
+struct binder_ref_data;
 struct binder_thread;
 struct binder_transaction;
 
@@ -146,8 +147,8 @@
 
 TRACE_EVENT(binder_transaction_node_to_ref,
 	TP_PROTO(struct binder_transaction *t, struct binder_node *node,
-		 struct binder_ref *ref),
-	TP_ARGS(t, node, ref),
+		 struct binder_ref_data *rdata),
+	TP_ARGS(t, node, rdata),
 
 	TP_STRUCT__entry(
 		__field(int, debug_id)
@@ -160,8 +161,8 @@
 		__entry->debug_id = t->debug_id;
 		__entry->node_debug_id = node->debug_id;
 		__entry->node_ptr = node->ptr;
-		__entry->ref_debug_id = ref->debug_id;
-		__entry->ref_desc = ref->desc;
+		__entry->ref_debug_id = rdata->debug_id;
+		__entry->ref_desc = rdata->desc;
 	),
 	TP_printk("transaction=%d node=%d src_ptr=0x%016llx ==> dest_ref=%d dest_desc=%d",
 		  __entry->debug_id, __entry->node_debug_id,
@@ -170,8 +171,9 @@
 );
 
 TRACE_EVENT(binder_transaction_ref_to_node,
-	TP_PROTO(struct binder_transaction *t, struct binder_ref *ref),
-	TP_ARGS(t, ref),
+	TP_PROTO(struct binder_transaction *t, struct binder_node *node,
+		 struct binder_ref_data *rdata),
+	TP_ARGS(t, node, rdata),
 
 	TP_STRUCT__entry(
 		__field(int, debug_id)
@@ -182,10 +184,10 @@
 	),
 	TP_fast_assign(
 		__entry->debug_id = t->debug_id;
-		__entry->ref_debug_id = ref->debug_id;
-		__entry->ref_desc = ref->desc;
-		__entry->node_debug_id = ref->node->debug_id;
-		__entry->node_ptr = ref->node->ptr;
+		__entry->ref_debug_id = rdata->debug_id;
+		__entry->ref_desc = rdata->desc;
+		__entry->node_debug_id = node->debug_id;
+		__entry->node_ptr = node->ptr;
 	),
 	TP_printk("transaction=%d node=%d src_ref=%d src_desc=%d ==> dest_ptr=0x%016llx",
 		  __entry->debug_id, __entry->node_debug_id,
@@ -194,9 +196,10 @@
 );
 
 TRACE_EVENT(binder_transaction_ref_to_ref,
-	TP_PROTO(struct binder_transaction *t, struct binder_ref *src_ref,
-		 struct binder_ref *dest_ref),
-	TP_ARGS(t, src_ref, dest_ref),
+	TP_PROTO(struct binder_transaction *t, struct binder_node *node,
+		 struct binder_ref_data *src_ref,
+		 struct binder_ref_data *dest_ref),
+	TP_ARGS(t, node, src_ref, dest_ref),
 
 	TP_STRUCT__entry(
 		__field(int, debug_id)
@@ -208,7 +211,7 @@
 	),
 	TP_fast_assign(
 		__entry->debug_id = t->debug_id;
-		__entry->node_debug_id = src_ref->node->debug_id;
+		__entry->node_debug_id = node->debug_id;
 		__entry->src_ref_debug_id = src_ref->debug_id;
 		__entry->src_ref_desc = src_ref->desc;
 		__entry->dest_ref_debug_id = dest_ref->debug_id;
@@ -268,9 +271,9 @@
 	TP_ARGS(buffer));
 
 TRACE_EVENT(binder_update_page_range,
-	TP_PROTO(struct binder_proc *proc, bool allocate,
+	TP_PROTO(struct binder_alloc *alloc, bool allocate,
 		 void *start, void *end),
-	TP_ARGS(proc, allocate, start, end),
+	TP_ARGS(alloc, allocate, start, end),
 	TP_STRUCT__entry(
 		__field(int, proc)
 		__field(bool, allocate)
@@ -278,9 +281,9 @@
 		__field(size_t, size)
 	),
 	TP_fast_assign(
-		__entry->proc = proc->pid;
+		__entry->proc = alloc->pid;
 		__entry->allocate = allocate;
-		__entry->offset = start - proc->buffer;
+		__entry->offset = start - alloc->buffer;
 		__entry->size = end - start;
 	),
 	TP_printk("proc=%d allocate=%d offset=%zu size=%zu",
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 9851721..574d08f 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -33,6 +33,7 @@
 #include <linux/cpufreq.h>
 #include <linux/cpuidle.h>
 #include <linux/timer.h>
+#include <linux/wakeup_reason.h>
 
 #include "../base.h"
 #include "power.h"
@@ -1353,6 +1354,7 @@
 	pm_callback_t callback = NULL;
 	char *info = NULL;
 	int error = 0;
+	char suspend_abort[MAX_SUSPEND_ABORT_LEN];
 	DECLARE_DPM_WATCHDOG_ON_STACK(wd);
 
 	TRACE_DEVICE(dev);
@@ -1375,6 +1377,9 @@
 		pm_wakeup_event(dev, 0);
 
 	if (pm_wakeup_pending()) {
+		pm_get_active_wakeup_sources(suspend_abort,
+			MAX_SUSPEND_ABORT_LEN);
+		log_suspend_abort_reason(suspend_abort);
 		dev->power.direct_complete = false;
 		async_error = -EBUSY;
 		goto Complete;
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 9f51a16..fd1b5c8 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -15,6 +15,7 @@
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
 #include <linux/pm_wakeirq.h>
+#include <linux/types.h>
 #include <trace/events/power.h>
 
 #include "power.h"
@@ -810,6 +811,37 @@
 }
 EXPORT_SYMBOL_GPL(pm_wakeup_event);
 
+void pm_get_active_wakeup_sources(char *pending_wakeup_source, size_t max)
+{
+	struct wakeup_source *ws, *last_active_ws = NULL;
+	int len = 0;
+	bool active = false;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ws, &wakeup_sources, entry) {
+		if (ws->active && len < max) {
+			if (!active)
+				len += scnprintf(pending_wakeup_source, max,
+						"Pending Wakeup Sources: ");
+			len += scnprintf(pending_wakeup_source + len, max - len,
+				"%s ", ws->name);
+			active = true;
+		} else if (!active &&
+			   (!last_active_ws ||
+			    ktime_to_ns(ws->last_time) >
+			    ktime_to_ns(last_active_ws->last_time))) {
+			last_active_ws = ws;
+		}
+	}
+	if (!active && last_active_ws) {
+		scnprintf(pending_wakeup_source, max,
+				"Last active Wakeup Source: %s",
+				last_active_ws->name);
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(pm_get_active_wakeup_sources);
+
 void pm_print_active_wakeup_sources(void)
 {
 	struct wakeup_source *ws;
@@ -1017,7 +1049,7 @@
 		active_time = ktime_set(0, 0);
 	}
 
-	seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t%lu\t\t%lld\t\t%lld\t\t%lld\t\t%lld\t\t%lld\n",
+	seq_printf(m, "%-32s\t%lu\t\t%lu\t\t%lu\t\t%lu\t\t%lld\t\t%lld\t\t%lld\t\t%lld\t\t%lld\n",
 		   ws->name, active_count, ws->event_count,
 		   ws->wakeup_count, ws->expire_count,
 		   ktime_to_ms(active_time), ktime_to_ms(total_time),
@@ -1038,7 +1070,7 @@
 	struct wakeup_source *ws;
 	int srcuidx;
 
-	seq_puts(m, "name\t\tactive_count\tevent_count\twakeup_count\t"
+	seq_puts(m, "name\t\t\t\t\tactive_count\tevent_count\twakeup_count\t"
 		"expire_count\tactive_since\ttotal_time\tmax_time\t"
 		"last_change\tprevent_suspend_time\n");
 
diff --git a/drivers/base/syscore.c b/drivers/base/syscore.c
index 8d98a32..96c34a9 100644
--- a/drivers/base/syscore.c
+++ b/drivers/base/syscore.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/suspend.h>
 #include <trace/events/power.h>
+#include <linux/wakeup_reason.h>
 
 static LIST_HEAD(syscore_ops_list);
 static DEFINE_MUTEX(syscore_ops_lock);
@@ -75,6 +76,8 @@
 	return 0;
 
  err_out:
+	log_suspend_abort_reason("System core suspend callback %pF failed",
+		ops->suspend);
 	pr_err("PM: System core suspend callback %pF failed.\n", ops->suspend);
 
 	list_for_each_entry_continue(ops, &syscore_ops_list, node)
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index e2c6e43..0bed22f 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -305,6 +305,14 @@
 	  This must be disabled for hardware validation purposes to detect any
 	  hardware anomalies of missing events.
 
+config ARM_ARCH_TIMER_VCT_ACCESS
+	bool "Support for ARM architected timer virtual counter access in userspace"
+	default !ARM64
+	depends on ARM_ARCH_TIMER
+	help
+	  This option enables support for reading the ARM architected timer's
+	  virtual counter in userspace.
+
 config FSL_ERRATUM_A008585
 	bool "Workaround for Freescale/NXP Erratum A-008585"
 	default y
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index a2503db..e3bc592 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -449,7 +449,10 @@
 			| ARCH_TIMER_USR_PCT_ACCESS_EN);
 
 	/* Enable user access to the virtual counter */
-	cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN;
+	if (IS_ENABLED(CONFIG_ARM_ARCH_TIMER_VCT_ACCESS))
+		cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN;
+	else
+		cntkctl &= ~ARCH_TIMER_USR_VCT_ACCESS_EN;
 
 	arch_timer_set_cntkctl(cntkctl);
 }
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index cac26fb..bc6afa3 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -102,6 +102,24 @@
 	  governor. If unsure have a look at the help section of the
 	  driver. Fallback governor will be the performance governor.
 
+config CPU_FREQ_DEFAULT_GOV_SCHED
+	bool "sched"
+	select CPU_FREQ_GOV_SCHED
+	help
+	  Use the CPUfreq governor 'sched' as default. This scales
+	  cpu frequency using CPU utilization estimates from the
+	  scheduler.
+
+config CPU_FREQ_DEFAULT_GOV_INTERACTIVE
+	bool "interactive"
+	select CPU_FREQ_GOV_INTERACTIVE
+	select CPU_FREQ_GOV_PERFORMANCE
+	help
+	  Use the CPUFreq governor 'interactive' as default. This allows
+	  you to get a full dynamic cpu frequency capable system by simply
+	  loading your cpufreq low-level hardware driver, using the
+	  'interactive' governor for latency-sensitive workloads.
+
 config CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
 	bool "schedutil"
 	depends on SMP
@@ -193,6 +211,39 @@
 
 	  If in doubt, say N.
 
+config CPU_FREQ_GOV_SCHED
+	bool "'sched' cpufreq governor"
+	depends on CPU_FREQ
+	depends on SMP
+	select CPU_FREQ_GOV_COMMON
+	help
+	  'sched' - this governor scales cpu frequency from the
+	  scheduler as a function of cpu capacity utilization. It does
+	  not evaluate utilization on a periodic basis (as ondemand
+	  does) but instead is event-driven by the scheduler.
+
+	  If in doubt, say N.
+
+config CPU_FREQ_GOV_INTERACTIVE
+	tristate "'interactive' cpufreq policy governor"
+	depends on CPU_FREQ
+	select CPU_FREQ_GOV_ATTR_SET
+	select IRQ_WORK
+	help
+	  'interactive' - This driver adds a dynamic cpufreq policy governor
+	  designed for latency-sensitive workloads.
+
+	  This governor attempts to reduce the latency of clock
+	  increases so that the system is more responsive to
+	  interactive workloads.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called cpufreq_interactive.
+
+	  For details, take a look at linux/Documentation/cpu-freq.
+
+	  If in doubt, say N.
+
 config CPU_FREQ_GOV_SCHEDUTIL
 	bool "'schedutil' cpufreq policy governor"
 	depends on CPU_FREQ && SMP
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 0a9b6a09..f0c9905 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -10,6 +10,7 @@
 obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE)	+= cpufreq_userspace.o
 obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND)	+= cpufreq_ondemand.o
 obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE)	+= cpufreq_conservative.o
+obj-$(CONFIG_CPU_FREQ_GOV_INTERACTIVE)	+= cpufreq_interactive.o
 obj-$(CONFIG_CPU_FREQ_GOV_COMMON)		+= cpufreq_governor.o
 obj-$(CONFIG_CPU_FREQ_GOV_ATTR_SET)	+= cpufreq_governor_attr_set.o
 
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index a38a23f..35884ae 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -29,6 +29,9 @@
 #include <linux/suspend.h>
 #include <linux/syscore_ops.h>
 #include <linux/tick.h>
+#ifdef CONFIG_SMP
+#include <linux/sched.h>
+#endif
 #include <trace/events/power.h>
 
 static LIST_HEAD(cpufreq_policy_list);
@@ -117,6 +120,12 @@
 }
 EXPORT_SYMBOL_GPL(have_governor_per_policy);
 
+bool cpufreq_driver_is_slow(void)
+{
+	return !(cpufreq_driver->flags & CPUFREQ_DRIVER_FAST);
+}
+EXPORT_SYMBOL_GPL(cpufreq_driver_is_slow);
+
 struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy)
 {
 	if (have_governor_per_policy())
@@ -301,6 +310,50 @@
 #endif
 }
 
+/*********************************************************************
+ *               FREQUENCY INVARIANT CPU CAPACITY                    *
+ *********************************************************************/
+
+static DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
+static DEFINE_PER_CPU(unsigned long, max_freq_scale) = SCHED_CAPACITY_SCALE;
+
+static void
+scale_freq_capacity(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs)
+{
+	unsigned long cur = freqs ? freqs->new : policy->cur;
+	unsigned long scale = (cur << SCHED_CAPACITY_SHIFT) / policy->max;
+	struct cpufreq_cpuinfo *cpuinfo = &policy->cpuinfo;
+	int cpu;
+
+	pr_debug("cpus %*pbl cur/cur max freq %lu/%u kHz freq scale %lu\n",
+		 cpumask_pr_args(policy->cpus), cur, policy->max, scale);
+
+	for_each_cpu(cpu, policy->cpus)
+		per_cpu(freq_scale, cpu) = scale;
+
+	if (freqs)
+		return;
+
+	scale = (policy->max << SCHED_CAPACITY_SHIFT) / cpuinfo->max_freq;
+
+	pr_debug("cpus %*pbl cur max/max freq %u/%u kHz max freq scale %lu\n",
+		 cpumask_pr_args(policy->cpus), policy->max, cpuinfo->max_freq,
+		 scale);
+
+	for_each_cpu(cpu, policy->cpus)
+		per_cpu(max_freq_scale, cpu) = scale;
+}
+
+unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu)
+{
+	return per_cpu(freq_scale, cpu);
+}
+
+unsigned long cpufreq_scale_max_freq_capacity(int cpu)
+{
+	return per_cpu(max_freq_scale, cpu);
+}
+
 static void __cpufreq_notify_transition(struct cpufreq_policy *policy,
 		struct cpufreq_freqs *freqs, unsigned int state)
 {
@@ -378,6 +431,9 @@
 void cpufreq_freq_transition_begin(struct cpufreq_policy *policy,
 		struct cpufreq_freqs *freqs)
 {
+#ifdef CONFIG_SMP
+	int cpu;
+#endif
 
 	/*
 	 * Catch double invocations of _begin() which lead to self-deadlock.
@@ -405,6 +461,12 @@
 
 	spin_unlock(&policy->transition_lock);
 
+	scale_freq_capacity(policy, freqs);
+#ifdef CONFIG_SMP
+	for_each_cpu(cpu, policy->cpus)
+		trace_cpu_capacity(capacity_curr_of(cpu), cpu);
+#endif
+
 	cpufreq_notify_transition(policy, freqs, CPUFREQ_PRECHANGE);
 }
 EXPORT_SYMBOL_GPL(cpufreq_freq_transition_begin);
@@ -2200,8 +2262,11 @@
 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
 			CPUFREQ_NOTIFY, new_policy);
 
+	scale_freq_capacity(new_policy, NULL);
+
 	policy->min = new_policy->min;
 	policy->max = new_policy->max;
+	trace_cpu_frequency_limits(policy->max, policy->min, policy->cpu);
 
 	policy->cached_target_freq = UINT_MAX;
 
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 00a7435..0fe2518 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -302,7 +302,10 @@
 	dbs_info->requested_freq = policy->cur;
 }
 
-static struct dbs_governor cs_governor = {
+#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
+static
+#endif
+struct dbs_governor cs_governor = {
 	.gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("conservative"),
 	.kobj_type = { .default_attrs = cs_attributes },
 	.gov_dbs_timer = cs_dbs_timer,
diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c
new file mode 100644
index 0000000..5a77d91
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_interactive.c
@@ -0,0 +1,1411 @@
+/*
+ * drivers/cpufreq/cpufreq_interactive.c
+ *
+ * Copyright (C) 2010-2016 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Author: Mike Chan (mike@android.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/cpufreq.h>
+#include <linux/irq_work.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/sched/rt.h>
+#include <linux/tick.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+#include <linux/kthread.h>
+#include <linux/slab.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/cpufreq_interactive.h>
+
+#define gov_attr_ro(_name)						\
+static struct governor_attr _name =					\
+__ATTR(_name, 0444, show_##_name, NULL)
+
+#define gov_attr_wo(_name)						\
+static struct governor_attr _name =					\
+__ATTR(_name, 0200, NULL, store_##_name)
+
+#define gov_attr_rw(_name)						\
+static struct governor_attr _name =					\
+__ATTR(_name, 0644, show_##_name, store_##_name)
+
+/* Separate instance required for each 'interactive' directory in sysfs */
+struct interactive_tunables {
+	struct gov_attr_set attr_set;
+
+	/* Hi speed to bump to from lo speed when load burst (default max) */
+	unsigned int hispeed_freq;
+
+	/* Go to hi speed when CPU load at or above this value. */
+#define DEFAULT_GO_HISPEED_LOAD 99
+	unsigned long go_hispeed_load;
+
+	/* Target load. Lower values result in higher CPU speeds. */
+	spinlock_t target_loads_lock;
+	unsigned int *target_loads;
+	int ntarget_loads;
+
+	/*
+	 * The minimum amount of time to spend at a frequency before we can ramp
+	 * down.
+	 */
+#define DEFAULT_MIN_SAMPLE_TIME (80 * USEC_PER_MSEC)
+	unsigned long min_sample_time;
+
+	/* The sample rate of the timer used to increase frequency */
+	unsigned long sampling_rate;
+
+	/*
+	 * Wait this long before raising speed above hispeed, by default a
+	 * single timer interval.
+	 */
+	spinlock_t above_hispeed_delay_lock;
+	unsigned int *above_hispeed_delay;
+	int nabove_hispeed_delay;
+
+	/* Non-zero means indefinite speed boost active */
+	int boost;
+	/* Duration of a boot pulse in usecs */
+	int boostpulse_duration;
+	/* End time of boost pulse in ktime converted to usecs */
+	u64 boostpulse_endtime;
+	bool boosted;
+
+	/*
+	 * Max additional time to wait in idle, beyond sampling_rate, at speeds
+	 * above minimum before wakeup to reduce speed, or -1 if unnecessary.
+	 */
+#define DEFAULT_TIMER_SLACK (4 * DEFAULT_SAMPLING_RATE)
+	unsigned long timer_slack_delay;
+	unsigned long timer_slack;
+	bool io_is_busy;
+};
+
+/* Separate instance required for each 'struct cpufreq_policy' */
+struct interactive_policy {
+	struct cpufreq_policy *policy;
+	struct interactive_tunables *tunables;
+	struct list_head tunables_hook;
+};
+
+/* Separate instance required for each CPU */
+struct interactive_cpu {
+	struct update_util_data update_util;
+	struct interactive_policy *ipolicy;
+
+	struct irq_work irq_work;
+	u64 last_sample_time;
+	unsigned long next_sample_jiffies;
+	bool work_in_progress;
+
+	struct rw_semaphore enable_sem;
+	struct timer_list slack_timer;
+
+	spinlock_t load_lock; /* protects the next 4 fields */
+	u64 time_in_idle;
+	u64 time_in_idle_timestamp;
+	u64 cputime_speedadj;
+	u64 cputime_speedadj_timestamp;
+
+	spinlock_t target_freq_lock; /*protects target freq */
+	unsigned int target_freq;
+
+	unsigned int floor_freq;
+	u64 pol_floor_val_time; /* policy floor_validate_time */
+	u64 loc_floor_val_time; /* per-cpu floor_validate_time */
+	u64 pol_hispeed_val_time; /* policy hispeed_validate_time */
+	u64 loc_hispeed_val_time; /* per-cpu hispeed_validate_time */
+};
+
+static DEFINE_PER_CPU(struct interactive_cpu, interactive_cpu);
+
+/* Realtime thread handles frequency scaling */
+static struct task_struct *speedchange_task;
+static cpumask_t speedchange_cpumask;
+static spinlock_t speedchange_cpumask_lock;
+
+/* Target load. Lower values result in higher CPU speeds. */
+#define DEFAULT_TARGET_LOAD 90
+static unsigned int default_target_loads[] = {DEFAULT_TARGET_LOAD};
+
+#define DEFAULT_SAMPLING_RATE (20 * USEC_PER_MSEC)
+#define DEFAULT_ABOVE_HISPEED_DELAY DEFAULT_SAMPLING_RATE
+static unsigned int default_above_hispeed_delay[] = {
+	DEFAULT_ABOVE_HISPEED_DELAY
+};
+
+/* Iterate over interactive policies for tunables */
+#define for_each_ipolicy(__ip)	\
+	list_for_each_entry(__ip, &tunables->attr_set.policy_list, tunables_hook)
+
+static struct interactive_tunables *global_tunables;
+static DEFINE_MUTEX(global_tunables_lock);
+
+static inline void update_slack_delay(struct interactive_tunables *tunables)
+{
+	tunables->timer_slack_delay = usecs_to_jiffies(tunables->timer_slack +
+						       tunables->sampling_rate);
+}
+
+static bool timer_slack_required(struct interactive_cpu *icpu)
+{
+	struct interactive_policy *ipolicy = icpu->ipolicy;
+	struct interactive_tunables *tunables = ipolicy->tunables;
+
+	if (tunables->timer_slack < 0)
+		return false;
+
+	if (icpu->target_freq > ipolicy->policy->min)
+		return true;
+
+	return false;
+}
+
+static void gov_slack_timer_start(struct interactive_cpu *icpu, int cpu)
+{
+	struct interactive_tunables *tunables = icpu->ipolicy->tunables;
+
+	icpu->slack_timer.expires = jiffies + tunables->timer_slack_delay;
+	add_timer_on(&icpu->slack_timer, cpu);
+}
+
+static void gov_slack_timer_modify(struct interactive_cpu *icpu)
+{
+	struct interactive_tunables *tunables = icpu->ipolicy->tunables;
+
+	mod_timer(&icpu->slack_timer, jiffies + tunables->timer_slack_delay);
+}
+
+static void slack_timer_resched(struct interactive_cpu *icpu, int cpu,
+				bool modify)
+{
+	struct interactive_tunables *tunables = icpu->ipolicy->tunables;
+	unsigned long flags;
+
+	spin_lock_irqsave(&icpu->load_lock, flags);
+
+	icpu->time_in_idle = get_cpu_idle_time(cpu,
+					       &icpu->time_in_idle_timestamp,
+					       tunables->io_is_busy);
+	icpu->cputime_speedadj = 0;
+	icpu->cputime_speedadj_timestamp = icpu->time_in_idle_timestamp;
+
+	if (timer_slack_required(icpu)) {
+		if (modify)
+			gov_slack_timer_modify(icpu);
+		else
+			gov_slack_timer_start(icpu, cpu);
+	}
+
+	spin_unlock_irqrestore(&icpu->load_lock, flags);
+}
+
+static unsigned int
+freq_to_above_hispeed_delay(struct interactive_tunables *tunables,
+			    unsigned int freq)
+{
+	unsigned long flags;
+	unsigned int ret;
+	int i;
+
+	spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags);
+
+	for (i = 0; i < tunables->nabove_hispeed_delay - 1 &&
+	     freq >= tunables->above_hispeed_delay[i + 1]; i += 2)
+		;
+
+	ret = tunables->above_hispeed_delay[i];
+	spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags);
+
+	return ret;
+}
+
+static unsigned int freq_to_targetload(struct interactive_tunables *tunables,
+				       unsigned int freq)
+{
+	unsigned long flags;
+	unsigned int ret;
+	int i;
+
+	spin_lock_irqsave(&tunables->target_loads_lock, flags);
+
+	for (i = 0; i < tunables->ntarget_loads - 1 &&
+	     freq >= tunables->target_loads[i + 1]; i += 2)
+		;
+
+	ret = tunables->target_loads[i];
+	spin_unlock_irqrestore(&tunables->target_loads_lock, flags);
+	return ret;
+}
+
+/*
+ * If increasing frequencies never map to a lower target load then
+ * choose_freq() will find the minimum frequency that does not exceed its
+ * target load given the current load.
+ */
+static unsigned int choose_freq(struct interactive_cpu *icpu,
+				unsigned int loadadjfreq)
+{
+	struct cpufreq_policy *policy = icpu->ipolicy->policy;
+	struct cpufreq_frequency_table *freq_table = policy->freq_table;
+	unsigned int prevfreq, freqmin = 0, freqmax = UINT_MAX, tl;
+	unsigned int freq = policy->cur;
+	int index;
+
+	do {
+		prevfreq = freq;
+		tl = freq_to_targetload(icpu->ipolicy->tunables, freq);
+
+		/*
+		 * Find the lowest frequency where the computed load is less
+		 * than or equal to the target load.
+		 */
+
+		index = cpufreq_frequency_table_target(policy, loadadjfreq / tl,
+						       CPUFREQ_RELATION_L);
+
+		freq = freq_table[index].frequency;
+
+		if (freq > prevfreq) {
+			/* The previous frequency is too low */
+			freqmin = prevfreq;
+
+			if (freq < freqmax)
+				continue;
+
+			/* Find highest frequency that is less than freqmax */
+			index = cpufreq_frequency_table_target(policy,
+					freqmax - 1, CPUFREQ_RELATION_H);
+
+			freq = freq_table[index].frequency;
+
+			if (freq == freqmin) {
+				/*
+				 * The first frequency below freqmax has already
+				 * been found to be too low. freqmax is the
+				 * lowest speed we found that is fast enough.
+				 */
+				freq = freqmax;
+				break;
+			}
+		} else if (freq < prevfreq) {
+			/* The previous frequency is high enough. */
+			freqmax = prevfreq;
+
+			if (freq > freqmin)
+				continue;
+
+			/* Find lowest frequency that is higher than freqmin */
+			index = cpufreq_frequency_table_target(policy,
+					freqmin + 1, CPUFREQ_RELATION_L);
+
+			freq = freq_table[index].frequency;
+
+			/*
+			 * If freqmax is the first frequency above
+			 * freqmin then we have already found that
+			 * this speed is fast enough.
+			 */
+			if (freq == freqmax)
+				break;
+		}
+
+		/* If same frequency chosen as previous then done. */
+	} while (freq != prevfreq);
+
+	return freq;
+}
+
+static u64 update_load(struct interactive_cpu *icpu, int cpu)
+{
+	struct interactive_tunables *tunables = icpu->ipolicy->tunables;
+	u64 now_idle, now, active_time, delta_idle, delta_time;
+
+	now_idle = get_cpu_idle_time(cpu, &now, tunables->io_is_busy);
+	delta_idle = (now_idle - icpu->time_in_idle);
+	delta_time = (now - icpu->time_in_idle_timestamp);
+
+	if (delta_time <= delta_idle)
+		active_time = 0;
+	else
+		active_time = delta_time - delta_idle;
+
+	icpu->cputime_speedadj += active_time * icpu->ipolicy->policy->cur;
+
+	icpu->time_in_idle = now_idle;
+	icpu->time_in_idle_timestamp = now;
+
+	return now;
+}
+
+/* Re-evaluate load to see if a frequency change is required or not */
+static void eval_target_freq(struct interactive_cpu *icpu)
+{
+	struct interactive_tunables *tunables = icpu->ipolicy->tunables;
+	struct cpufreq_policy *policy = icpu->ipolicy->policy;
+	struct cpufreq_frequency_table *freq_table = policy->freq_table;
+	u64 cputime_speedadj, now, max_fvtime;
+	unsigned int new_freq, loadadjfreq, index, delta_time;
+	unsigned long flags;
+	int cpu_load;
+	int cpu = smp_processor_id();
+
+	spin_lock_irqsave(&icpu->load_lock, flags);
+	now = update_load(icpu, smp_processor_id());
+	delta_time = (unsigned int)(now - icpu->cputime_speedadj_timestamp);
+	cputime_speedadj = icpu->cputime_speedadj;
+	spin_unlock_irqrestore(&icpu->load_lock, flags);
+
+	if (WARN_ON_ONCE(!delta_time))
+		return;
+
+	spin_lock_irqsave(&icpu->target_freq_lock, flags);
+	do_div(cputime_speedadj, delta_time);
+	loadadjfreq = (unsigned int)cputime_speedadj * 100;
+	cpu_load = loadadjfreq / policy->cur;
+	tunables->boosted = tunables->boost ||
+			    now < tunables->boostpulse_endtime;
+
+	if (cpu_load >= tunables->go_hispeed_load || tunables->boosted) {
+		if (policy->cur < tunables->hispeed_freq) {
+			new_freq = tunables->hispeed_freq;
+		} else {
+			new_freq = choose_freq(icpu, loadadjfreq);
+
+			if (new_freq < tunables->hispeed_freq)
+				new_freq = tunables->hispeed_freq;
+		}
+	} else {
+		new_freq = choose_freq(icpu, loadadjfreq);
+		if (new_freq > tunables->hispeed_freq &&
+		    policy->cur < tunables->hispeed_freq)
+			new_freq = tunables->hispeed_freq;
+	}
+
+	if (policy->cur >= tunables->hispeed_freq &&
+	    new_freq > policy->cur &&
+	    now - icpu->pol_hispeed_val_time < freq_to_above_hispeed_delay(tunables, policy->cur)) {
+		trace_cpufreq_interactive_notyet(cpu, cpu_load,
+				icpu->target_freq, policy->cur, new_freq);
+		goto exit;
+	}
+
+	icpu->loc_hispeed_val_time = now;
+
+	index = cpufreq_frequency_table_target(policy, new_freq,
+					       CPUFREQ_RELATION_L);
+	new_freq = freq_table[index].frequency;
+
+	/*
+	 * Do not scale below floor_freq unless we have been at or above the
+	 * floor frequency for the minimum sample time since last validated.
+	 */
+	max_fvtime = max(icpu->pol_floor_val_time, icpu->loc_floor_val_time);
+	if (new_freq < icpu->floor_freq && icpu->target_freq >= policy->cur) {
+		if (now - max_fvtime < tunables->min_sample_time) {
+			trace_cpufreq_interactive_notyet(cpu, cpu_load,
+				icpu->target_freq, policy->cur, new_freq);
+			goto exit;
+		}
+	}
+
+	/*
+	 * Update the timestamp for checking whether speed has been held at
+	 * or above the selected frequency for a minimum of min_sample_time,
+	 * if not boosted to hispeed_freq.  If boosted to hispeed_freq then we
+	 * allow the speed to drop as soon as the boostpulse duration expires
+	 * (or the indefinite boost is turned off).
+	 */
+
+	if (!tunables->boosted || new_freq > tunables->hispeed_freq) {
+		icpu->floor_freq = new_freq;
+		if (icpu->target_freq >= policy->cur || new_freq >= policy->cur)
+			icpu->loc_floor_val_time = now;
+	}
+
+	if (icpu->target_freq == new_freq &&
+	    icpu->target_freq <= policy->cur) {
+		trace_cpufreq_interactive_already(cpu, cpu_load,
+			icpu->target_freq, policy->cur, new_freq);
+		goto exit;
+	}
+
+	trace_cpufreq_interactive_target(cpu, cpu_load, icpu->target_freq,
+					 policy->cur, new_freq);
+
+	icpu->target_freq = new_freq;
+	spin_unlock_irqrestore(&icpu->target_freq_lock, flags);
+
+	spin_lock_irqsave(&speedchange_cpumask_lock, flags);
+	cpumask_set_cpu(cpu, &speedchange_cpumask);
+	spin_unlock_irqrestore(&speedchange_cpumask_lock, flags);
+
+	wake_up_process(speedchange_task);
+	return;
+
+exit:
+	spin_unlock_irqrestore(&icpu->target_freq_lock, flags);
+}
+
+static void cpufreq_interactive_update(struct interactive_cpu *icpu)
+{
+	eval_target_freq(icpu);
+	slack_timer_resched(icpu, smp_processor_id(), true);
+}
+
+static void cpufreq_interactive_idle_end(void)
+{
+	struct interactive_cpu *icpu = &per_cpu(interactive_cpu,
+						smp_processor_id());
+
+	if (!down_read_trylock(&icpu->enable_sem))
+		return;
+
+	if (icpu->ipolicy) {
+		/*
+		 * We haven't sampled load for more than sampling_rate time, do
+		 * it right now.
+		 */
+		if (time_after_eq(jiffies, icpu->next_sample_jiffies))
+			cpufreq_interactive_update(icpu);
+	}
+
+	up_read(&icpu->enable_sem);
+}
+
+static void cpufreq_interactive_get_policy_info(struct cpufreq_policy *policy,
+						unsigned int *pmax_freq,
+						u64 *phvt, u64 *pfvt)
+{
+	struct interactive_cpu *icpu;
+	u64 hvt = ~0ULL, fvt = 0;
+	unsigned int max_freq = 0, i;
+
+	for_each_cpu(i, policy->cpus) {
+		icpu = &per_cpu(interactive_cpu, i);
+
+		fvt = max(fvt, icpu->loc_floor_val_time);
+		if (icpu->target_freq > max_freq) {
+			max_freq = icpu->target_freq;
+			hvt = icpu->loc_hispeed_val_time;
+		} else if (icpu->target_freq == max_freq) {
+			hvt = min(hvt, icpu->loc_hispeed_val_time);
+		}
+	}
+
+	*pmax_freq = max_freq;
+	*phvt = hvt;
+	*pfvt = fvt;
+}
+
+static void cpufreq_interactive_adjust_cpu(unsigned int cpu,
+					   struct cpufreq_policy *policy)
+{
+	struct interactive_cpu *icpu;
+	u64 hvt, fvt;
+	unsigned int max_freq;
+	int i;
+
+	cpufreq_interactive_get_policy_info(policy, &max_freq, &hvt, &fvt);
+
+	for_each_cpu(i, policy->cpus) {
+		icpu = &per_cpu(interactive_cpu, i);
+		icpu->pol_floor_val_time = fvt;
+	}
+
+	if (max_freq != policy->cur) {
+		__cpufreq_driver_target(policy, max_freq, CPUFREQ_RELATION_H);
+		for_each_cpu(i, policy->cpus) {
+			icpu = &per_cpu(interactive_cpu, i);
+			icpu->pol_hispeed_val_time = hvt;
+		}
+	}
+
+	trace_cpufreq_interactive_setspeed(cpu, max_freq, policy->cur);
+}
+
+static int cpufreq_interactive_speedchange_task(void *data)
+{
+	unsigned int cpu;
+	cpumask_t tmp_mask;
+	unsigned long flags;
+
+again:
+	set_current_state(TASK_INTERRUPTIBLE);
+	spin_lock_irqsave(&speedchange_cpumask_lock, flags);
+
+	if (cpumask_empty(&speedchange_cpumask)) {
+		spin_unlock_irqrestore(&speedchange_cpumask_lock, flags);
+		schedule();
+
+		if (kthread_should_stop())
+			return 0;
+
+		spin_lock_irqsave(&speedchange_cpumask_lock, flags);
+	}
+
+	set_current_state(TASK_RUNNING);
+	tmp_mask = speedchange_cpumask;
+	cpumask_clear(&speedchange_cpumask);
+	spin_unlock_irqrestore(&speedchange_cpumask_lock, flags);
+
+	for_each_cpu(cpu, &tmp_mask) {
+		struct interactive_cpu *icpu = &per_cpu(interactive_cpu, cpu);
+		struct cpufreq_policy *policy;
+
+		if (unlikely(!down_read_trylock(&icpu->enable_sem)))
+			continue;
+
+		if (likely(icpu->ipolicy)) {
+			policy = icpu->ipolicy->policy;
+			cpufreq_interactive_adjust_cpu(cpu, policy);
+		}
+
+		up_read(&icpu->enable_sem);
+	}
+
+	goto again;
+}
+
+static void cpufreq_interactive_boost(struct interactive_tunables *tunables)
+{
+	struct interactive_policy *ipolicy;
+	struct cpufreq_policy *policy;
+	struct interactive_cpu *icpu;
+	unsigned long flags[2];
+	bool wakeup = false;
+	int i;
+
+	tunables->boosted = true;
+
+	spin_lock_irqsave(&speedchange_cpumask_lock, flags[0]);
+
+	for_each_ipolicy(ipolicy) {
+		policy = ipolicy->policy;
+
+		for_each_cpu(i, policy->cpus) {
+			icpu = &per_cpu(interactive_cpu, i);
+
+			if (!down_read_trylock(&icpu->enable_sem))
+				continue;
+
+			if (!icpu->ipolicy) {
+				up_read(&icpu->enable_sem);
+				continue;
+			}
+
+			spin_lock_irqsave(&icpu->target_freq_lock, flags[1]);
+			if (icpu->target_freq < tunables->hispeed_freq) {
+				icpu->target_freq = tunables->hispeed_freq;
+				cpumask_set_cpu(i, &speedchange_cpumask);
+				icpu->pol_hispeed_val_time = ktime_to_us(ktime_get());
+				wakeup = true;
+			}
+			spin_unlock_irqrestore(&icpu->target_freq_lock, flags[1]);
+
+			up_read(&icpu->enable_sem);
+		}
+	}
+
+	spin_unlock_irqrestore(&speedchange_cpumask_lock, flags[0]);
+
+	if (wakeup)
+		wake_up_process(speedchange_task);
+}
+
+static int cpufreq_interactive_notifier(struct notifier_block *nb,
+					unsigned long val, void *data)
+{
+	struct cpufreq_freqs *freq = data;
+	struct interactive_cpu *icpu = &per_cpu(interactive_cpu, freq->cpu);
+	unsigned long flags;
+
+	if (val != CPUFREQ_POSTCHANGE)
+		return 0;
+
+	if (!down_read_trylock(&icpu->enable_sem))
+		return 0;
+
+	if (!icpu->ipolicy) {
+		up_read(&icpu->enable_sem);
+		return 0;
+	}
+
+	spin_lock_irqsave(&icpu->load_lock, flags);
+	update_load(icpu, freq->cpu);
+	spin_unlock_irqrestore(&icpu->load_lock, flags);
+
+	up_read(&icpu->enable_sem);
+
+	return 0;
+}
+
+static struct notifier_block cpufreq_notifier_block = {
+	.notifier_call = cpufreq_interactive_notifier,
+};
+
+static unsigned int *get_tokenized_data(const char *buf, int *num_tokens)
+{
+	const char *cp = buf;
+	int ntokens = 1, i = 0;
+	unsigned int *tokenized_data;
+	int err = -EINVAL;
+
+	while ((cp = strpbrk(cp + 1, " :")))
+		ntokens++;
+
+	if (!(ntokens & 0x1))
+		goto err;
+
+	tokenized_data = kcalloc(ntokens, sizeof(*tokenized_data), GFP_KERNEL);
+	if (!tokenized_data) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	cp = buf;
+	while (i < ntokens) {
+		if (kstrtouint(cp, 0, &tokenized_data[i++]) < 0)
+			goto err_kfree;
+
+		cp = strpbrk(cp, " :");
+		if (!cp)
+			break;
+		cp++;
+	}
+
+	if (i != ntokens)
+		goto err_kfree;
+
+	*num_tokens = ntokens;
+	return tokenized_data;
+
+err_kfree:
+	kfree(tokenized_data);
+err:
+	return ERR_PTR(err);
+}
+
+/* Interactive governor sysfs interface */
+static struct interactive_tunables *to_tunables(struct gov_attr_set *attr_set)
+{
+	return container_of(attr_set, struct interactive_tunables, attr_set);
+}
+
+#define show_one(file_name, type)					\
+static ssize_t show_##file_name(struct gov_attr_set *attr_set, char *buf) \
+{									\
+	struct interactive_tunables *tunables = to_tunables(attr_set);	\
+	return sprintf(buf, type "\n", tunables->file_name);		\
+}
+
+static ssize_t show_target_loads(struct gov_attr_set *attr_set, char *buf)
+{
+	struct interactive_tunables *tunables = to_tunables(attr_set);
+	unsigned long flags;
+	ssize_t ret = 0;
+	int i;
+
+	spin_lock_irqsave(&tunables->target_loads_lock, flags);
+
+	for (i = 0; i < tunables->ntarget_loads; i++)
+		ret += sprintf(buf + ret, "%u%s", tunables->target_loads[i],
+			       i & 0x1 ? ":" : " ");
+
+	sprintf(buf + ret - 1, "\n");
+	spin_unlock_irqrestore(&tunables->target_loads_lock, flags);
+
+	return ret;
+}
+
+static ssize_t store_target_loads(struct gov_attr_set *attr_set,
+				  const char *buf, size_t count)
+{
+	struct interactive_tunables *tunables = to_tunables(attr_set);
+	unsigned int *new_target_loads;
+	unsigned long flags;
+	int ntokens;
+
+	new_target_loads = get_tokenized_data(buf, &ntokens);
+	if (IS_ERR(new_target_loads))
+		return PTR_ERR(new_target_loads);
+
+	spin_lock_irqsave(&tunables->target_loads_lock, flags);
+	if (tunables->target_loads != default_target_loads)
+		kfree(tunables->target_loads);
+	tunables->target_loads = new_target_loads;
+	tunables->ntarget_loads = ntokens;
+	spin_unlock_irqrestore(&tunables->target_loads_lock, flags);
+
+	return count;
+}
+
+static ssize_t show_above_hispeed_delay(struct gov_attr_set *attr_set,
+					char *buf)
+{
+	struct interactive_tunables *tunables = to_tunables(attr_set);
+	unsigned long flags;
+	ssize_t ret = 0;
+	int i;
+
+	spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags);
+
+	for (i = 0; i < tunables->nabove_hispeed_delay; i++)
+		ret += sprintf(buf + ret, "%u%s",
+			       tunables->above_hispeed_delay[i],
+			       i & 0x1 ? ":" : " ");
+
+	sprintf(buf + ret - 1, "\n");
+	spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags);
+
+	return ret;
+}
+
+static ssize_t store_above_hispeed_delay(struct gov_attr_set *attr_set,
+					 const char *buf, size_t count)
+{
+	struct interactive_tunables *tunables = to_tunables(attr_set);
+	unsigned int *new_above_hispeed_delay = NULL;
+	unsigned long flags;
+	int ntokens;
+
+	new_above_hispeed_delay = get_tokenized_data(buf, &ntokens);
+	if (IS_ERR(new_above_hispeed_delay))
+		return PTR_ERR(new_above_hispeed_delay);
+
+	spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags);
+	if (tunables->above_hispeed_delay != default_above_hispeed_delay)
+		kfree(tunables->above_hispeed_delay);
+	tunables->above_hispeed_delay = new_above_hispeed_delay;
+	tunables->nabove_hispeed_delay = ntokens;
+	spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags);
+
+	return count;
+}
+
+static ssize_t store_hispeed_freq(struct gov_attr_set *attr_set,
+				  const char *buf, size_t count)
+{
+	struct interactive_tunables *tunables = to_tunables(attr_set);
+	unsigned long int val;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret < 0)
+		return ret;
+
+	tunables->hispeed_freq = val;
+
+	return count;
+}
+
+static ssize_t store_go_hispeed_load(struct gov_attr_set *attr_set,
+				     const char *buf, size_t count)
+{
+	struct interactive_tunables *tunables = to_tunables(attr_set);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret < 0)
+		return ret;
+
+	tunables->go_hispeed_load = val;
+
+	return count;
+}
+
+static ssize_t store_min_sample_time(struct gov_attr_set *attr_set,
+				     const char *buf, size_t count)
+{
+	struct interactive_tunables *tunables = to_tunables(attr_set);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret < 0)
+		return ret;
+
+	tunables->min_sample_time = val;
+
+	return count;
+}
+
+static ssize_t show_timer_rate(struct gov_attr_set *attr_set, char *buf)
+{
+	struct interactive_tunables *tunables = to_tunables(attr_set);
+
+	return sprintf(buf, "%lu\n", tunables->sampling_rate);
+}
+
+static ssize_t store_timer_rate(struct gov_attr_set *attr_set, const char *buf,
+				size_t count)
+{
+	struct interactive_tunables *tunables = to_tunables(attr_set);
+	unsigned long val, val_round;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret < 0)
+		return ret;
+
+	val_round = jiffies_to_usecs(usecs_to_jiffies(val));
+	if (val != val_round)
+		pr_warn("timer_rate not aligned to jiffy. Rounded up to %lu\n",
+			val_round);
+
+	tunables->sampling_rate = val_round;
+
+	return count;
+}
+
+static ssize_t store_timer_slack(struct gov_attr_set *attr_set, const char *buf,
+				 size_t count)
+{
+	struct interactive_tunables *tunables = to_tunables(attr_set);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtol(buf, 10, &val);
+	if (ret < 0)
+		return ret;
+
+	tunables->timer_slack = val;
+	update_slack_delay(tunables);
+
+	return count;
+}
+
+static ssize_t store_boost(struct gov_attr_set *attr_set, const char *buf,
+			   size_t count)
+{
+	struct interactive_tunables *tunables = to_tunables(attr_set);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret < 0)
+		return ret;
+
+	tunables->boost = val;
+
+	if (tunables->boost) {
+		trace_cpufreq_interactive_boost("on");
+		if (!tunables->boosted)
+			cpufreq_interactive_boost(tunables);
+	} else {
+		tunables->boostpulse_endtime = ktime_to_us(ktime_get());
+		trace_cpufreq_interactive_unboost("off");
+	}
+
+	return count;
+}
+
+static ssize_t store_boostpulse(struct gov_attr_set *attr_set, const char *buf,
+				size_t count)
+{
+	struct interactive_tunables *tunables = to_tunables(attr_set);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret < 0)
+		return ret;
+
+	tunables->boostpulse_endtime = ktime_to_us(ktime_get()) +
+					tunables->boostpulse_duration;
+	trace_cpufreq_interactive_boost("pulse");
+	if (!tunables->boosted)
+		cpufreq_interactive_boost(tunables);
+
+	return count;
+}
+
+static ssize_t store_boostpulse_duration(struct gov_attr_set *attr_set,
+					 const char *buf, size_t count)
+{
+	struct interactive_tunables *tunables = to_tunables(attr_set);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret < 0)
+		return ret;
+
+	tunables->boostpulse_duration = val;
+
+	return count;
+}
+
+static ssize_t store_io_is_busy(struct gov_attr_set *attr_set, const char *buf,
+				size_t count)
+{
+	struct interactive_tunables *tunables = to_tunables(attr_set);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret < 0)
+		return ret;
+
+	tunables->io_is_busy = val;
+
+	return count;
+}
+
+show_one(hispeed_freq, "%u");
+show_one(go_hispeed_load, "%lu");
+show_one(min_sample_time, "%lu");
+show_one(timer_slack, "%lu");
+show_one(boost, "%u");
+show_one(boostpulse_duration, "%u");
+show_one(io_is_busy, "%u");
+
+gov_attr_rw(target_loads);
+gov_attr_rw(above_hispeed_delay);
+gov_attr_rw(hispeed_freq);
+gov_attr_rw(go_hispeed_load);
+gov_attr_rw(min_sample_time);
+gov_attr_rw(timer_rate);
+gov_attr_rw(timer_slack);
+gov_attr_rw(boost);
+gov_attr_wo(boostpulse);
+gov_attr_rw(boostpulse_duration);
+gov_attr_rw(io_is_busy);
+
+static struct attribute *interactive_attributes[] = {
+	&target_loads.attr,
+	&above_hispeed_delay.attr,
+	&hispeed_freq.attr,
+	&go_hispeed_load.attr,
+	&min_sample_time.attr,
+	&timer_rate.attr,
+	&timer_slack.attr,
+	&boost.attr,
+	&boostpulse.attr,
+	&boostpulse_duration.attr,
+	&io_is_busy.attr,
+	NULL
+};
+
+static struct kobj_type interactive_tunables_ktype = {
+	.default_attrs = interactive_attributes,
+	.sysfs_ops = &governor_sysfs_ops,
+};
+
+static int cpufreq_interactive_idle_notifier(struct notifier_block *nb,
+					     unsigned long val, void *data)
+{
+	if (val == IDLE_END)
+		cpufreq_interactive_idle_end();
+
+	return 0;
+}
+
+static struct notifier_block cpufreq_interactive_idle_nb = {
+	.notifier_call = cpufreq_interactive_idle_notifier,
+};
+
+/* Interactive Governor callbacks */
+struct interactive_governor {
+	struct cpufreq_governor gov;
+	unsigned int usage_count;
+};
+
+static struct interactive_governor interactive_gov;
+
+#define CPU_FREQ_GOV_INTERACTIVE	(&interactive_gov.gov)
+
+static void irq_work(struct irq_work *irq_work)
+{
+	struct interactive_cpu *icpu = container_of(irq_work, struct
+						    interactive_cpu, irq_work);
+
+	cpufreq_interactive_update(icpu);
+	icpu->work_in_progress = false;
+}
+
+static void update_util_handler(struct update_util_data *data, u64 time,
+				unsigned int flags)
+{
+	struct interactive_cpu *icpu = container_of(data,
+					struct interactive_cpu, update_util);
+	struct interactive_policy *ipolicy = icpu->ipolicy;
+	struct interactive_tunables *tunables = ipolicy->tunables;
+	u64 delta_ns;
+
+	/*
+	 * The irq-work may not be allowed to be queued up right now.
+	 * Possible reasons:
+	 * - Work has already been queued up or is in progress.
+	 * - It is too early (too little time from the previous sample).
+	 */
+	if (icpu->work_in_progress)
+		return;
+
+	delta_ns = time - icpu->last_sample_time;
+	if ((s64)delta_ns < tunables->sampling_rate * NSEC_PER_USEC)
+		return;
+
+	icpu->last_sample_time = time;
+	icpu->next_sample_jiffies = usecs_to_jiffies(tunables->sampling_rate) +
+				    jiffies;
+
+	icpu->work_in_progress = true;
+	irq_work_queue(&icpu->irq_work);
+}
+
+static void gov_set_update_util(struct interactive_policy *ipolicy)
+{
+	struct cpufreq_policy *policy = ipolicy->policy;
+	struct interactive_cpu *icpu;
+	int cpu;
+
+	for_each_cpu(cpu, policy->cpus) {
+		icpu = &per_cpu(interactive_cpu, cpu);
+
+		icpu->last_sample_time = 0;
+		icpu->next_sample_jiffies = 0;
+		cpufreq_add_update_util_hook(cpu, &icpu->update_util,
+					     update_util_handler);
+	}
+}
+
+static inline void gov_clear_update_util(struct cpufreq_policy *policy)
+{
+	int i;
+
+	for_each_cpu(i, policy->cpus)
+		cpufreq_remove_update_util_hook(i);
+
+	synchronize_sched();
+}
+
+static void icpu_cancel_work(struct interactive_cpu *icpu)
+{
+	irq_work_sync(&icpu->irq_work);
+	icpu->work_in_progress = false;
+	del_timer_sync(&icpu->slack_timer);
+}
+
+static struct interactive_policy *
+interactive_policy_alloc(struct cpufreq_policy *policy)
+{
+	struct interactive_policy *ipolicy;
+
+	ipolicy = kzalloc(sizeof(*ipolicy), GFP_KERNEL);
+	if (!ipolicy)
+		return NULL;
+
+	ipolicy->policy = policy;
+
+	return ipolicy;
+}
+
+static void interactive_policy_free(struct interactive_policy *ipolicy)
+{
+	kfree(ipolicy);
+}
+
+static struct interactive_tunables *
+interactive_tunables_alloc(struct interactive_policy *ipolicy)
+{
+	struct interactive_tunables *tunables;
+
+	tunables = kzalloc(sizeof(*tunables), GFP_KERNEL);
+	if (!tunables)
+		return NULL;
+
+	gov_attr_set_init(&tunables->attr_set, &ipolicy->tunables_hook);
+	if (!have_governor_per_policy())
+		global_tunables = tunables;
+
+	ipolicy->tunables = tunables;
+
+	return tunables;
+}
+
+static void interactive_tunables_free(struct interactive_tunables *tunables)
+{
+	if (!have_governor_per_policy())
+		global_tunables = NULL;
+
+	kfree(tunables);
+}
+
+int cpufreq_interactive_init(struct cpufreq_policy *policy)
+{
+	struct interactive_policy *ipolicy;
+	struct interactive_tunables *tunables;
+	int ret;
+
+	/* State should be equivalent to EXIT */
+	if (policy->governor_data)
+		return -EBUSY;
+
+	ipolicy = interactive_policy_alloc(policy);
+	if (!ipolicy)
+		return -ENOMEM;
+
+	mutex_lock(&global_tunables_lock);
+
+	if (global_tunables) {
+		if (WARN_ON(have_governor_per_policy())) {
+			ret = -EINVAL;
+			goto free_int_policy;
+		}
+
+		policy->governor_data = ipolicy;
+		ipolicy->tunables = global_tunables;
+
+		gov_attr_set_get(&global_tunables->attr_set,
+				 &ipolicy->tunables_hook);
+		goto out;
+	}
+
+	tunables = interactive_tunables_alloc(ipolicy);
+	if (!tunables) {
+		ret = -ENOMEM;
+		goto free_int_policy;
+	}
+
+	tunables->hispeed_freq = policy->max;
+	tunables->above_hispeed_delay = default_above_hispeed_delay;
+	tunables->nabove_hispeed_delay =
+		ARRAY_SIZE(default_above_hispeed_delay);
+	tunables->go_hispeed_load = DEFAULT_GO_HISPEED_LOAD;
+	tunables->target_loads = default_target_loads;
+	tunables->ntarget_loads = ARRAY_SIZE(default_target_loads);
+	tunables->min_sample_time = DEFAULT_MIN_SAMPLE_TIME;
+	tunables->boostpulse_duration = DEFAULT_MIN_SAMPLE_TIME;
+	tunables->sampling_rate = DEFAULT_SAMPLING_RATE;
+	tunables->timer_slack = DEFAULT_TIMER_SLACK;
+	update_slack_delay(tunables);
+
+	spin_lock_init(&tunables->target_loads_lock);
+	spin_lock_init(&tunables->above_hispeed_delay_lock);
+
+	policy->governor_data = ipolicy;
+
+	ret = kobject_init_and_add(&tunables->attr_set.kobj,
+				   &interactive_tunables_ktype,
+				   get_governor_parent_kobj(policy), "%s",
+				   interactive_gov.gov.name);
+	if (ret)
+		goto fail;
+
+	/* One time initialization for governor */
+	if (!interactive_gov.usage_count++) {
+		idle_notifier_register(&cpufreq_interactive_idle_nb);
+		cpufreq_register_notifier(&cpufreq_notifier_block,
+					  CPUFREQ_TRANSITION_NOTIFIER);
+	}
+
+ out:
+	mutex_unlock(&global_tunables_lock);
+	return 0;
+
+ fail:
+	policy->governor_data = NULL;
+	interactive_tunables_free(tunables);
+
+ free_int_policy:
+	mutex_unlock(&global_tunables_lock);
+
+	interactive_policy_free(ipolicy);
+	pr_err("governor initialization failed (%d)\n", ret);
+
+	return ret;
+}
+
+void cpufreq_interactive_exit(struct cpufreq_policy *policy)
+{
+	struct interactive_policy *ipolicy = policy->governor_data;
+	struct interactive_tunables *tunables = ipolicy->tunables;
+	unsigned int count;
+
+	mutex_lock(&global_tunables_lock);
+
+	/* Last policy using the governor ? */
+	if (!--interactive_gov.usage_count) {
+		cpufreq_unregister_notifier(&cpufreq_notifier_block,
+					    CPUFREQ_TRANSITION_NOTIFIER);
+		idle_notifier_unregister(&cpufreq_interactive_idle_nb);
+	}
+
+	count = gov_attr_set_put(&tunables->attr_set, &ipolicy->tunables_hook);
+	policy->governor_data = NULL;
+	if (!count)
+		interactive_tunables_free(tunables);
+
+	mutex_unlock(&global_tunables_lock);
+
+	interactive_policy_free(ipolicy);
+}
+
+int cpufreq_interactive_start(struct cpufreq_policy *policy)
+{
+	struct interactive_policy *ipolicy = policy->governor_data;
+	struct interactive_cpu *icpu;
+	unsigned int cpu;
+
+	for_each_cpu(cpu, policy->cpus) {
+		icpu = &per_cpu(interactive_cpu, cpu);
+
+		icpu->target_freq = policy->cur;
+		icpu->floor_freq = icpu->target_freq;
+		icpu->pol_floor_val_time = ktime_to_us(ktime_get());
+		icpu->loc_floor_val_time = icpu->pol_floor_val_time;
+		icpu->pol_hispeed_val_time = icpu->pol_floor_val_time;
+		icpu->loc_hispeed_val_time = icpu->pol_floor_val_time;
+
+		down_write(&icpu->enable_sem);
+		icpu->ipolicy = ipolicy;
+		up_write(&icpu->enable_sem);
+
+		slack_timer_resched(icpu, cpu, false);
+	}
+
+	gov_set_update_util(ipolicy);
+	return 0;
+}
+
+void cpufreq_interactive_stop(struct cpufreq_policy *policy)
+{
+	struct interactive_policy *ipolicy = policy->governor_data;
+	struct interactive_cpu *icpu;
+	unsigned int cpu;
+
+	gov_clear_update_util(ipolicy->policy);
+
+	for_each_cpu(cpu, policy->cpus) {
+		icpu = &per_cpu(interactive_cpu, cpu);
+
+		icpu_cancel_work(icpu);
+
+		down_write(&icpu->enable_sem);
+		icpu->ipolicy = NULL;
+		up_write(&icpu->enable_sem);
+	}
+}
+
+void cpufreq_interactive_limits(struct cpufreq_policy *policy)
+{
+	struct interactive_cpu *icpu;
+	unsigned int cpu;
+	unsigned long flags;
+
+	cpufreq_policy_apply_limits(policy);
+
+	for_each_cpu(cpu, policy->cpus) {
+		icpu = &per_cpu(interactive_cpu, cpu);
+
+		spin_lock_irqsave(&icpu->target_freq_lock, flags);
+
+		if (policy->max < icpu->target_freq)
+			icpu->target_freq = policy->max;
+		else if (policy->min > icpu->target_freq)
+			icpu->target_freq = policy->min;
+
+		spin_unlock_irqrestore(&icpu->target_freq_lock, flags);
+	}
+}
+
+static struct interactive_governor interactive_gov = {
+	.gov = {
+		.name			= "interactive",
+		.max_transition_latency	= TRANSITION_LATENCY_LIMIT,
+		.owner			= THIS_MODULE,
+		.init			= cpufreq_interactive_init,
+		.exit			= cpufreq_interactive_exit,
+		.start			= cpufreq_interactive_start,
+		.stop			= cpufreq_interactive_stop,
+		.limits			= cpufreq_interactive_limits,
+	}
+};
+
+static void cpufreq_interactive_nop_timer(unsigned long data)
+{
+	/*
+	 * The purpose of slack-timer is to wake up the CPU from IDLE, in order
+	 * to decrease its frequency if it is not set to minimum already.
+	 *
+	 * This is important for platforms where CPU with higher frequencies
+	 * consume higher power even at IDLE.
+	 */
+}
+
+static int __init cpufreq_interactive_gov_init(void)
+{
+	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
+	struct interactive_cpu *icpu;
+	unsigned int cpu;
+
+	for_each_possible_cpu(cpu) {
+		icpu = &per_cpu(interactive_cpu, cpu);
+
+		init_irq_work(&icpu->irq_work, irq_work);
+		spin_lock_init(&icpu->load_lock);
+		spin_lock_init(&icpu->target_freq_lock);
+		init_rwsem(&icpu->enable_sem);
+
+		/* Initialize per-cpu slack-timer */
+		init_timer_pinned(&icpu->slack_timer);
+		icpu->slack_timer.function = cpufreq_interactive_nop_timer;
+	}
+
+	spin_lock_init(&speedchange_cpumask_lock);
+	speedchange_task = kthread_create(cpufreq_interactive_speedchange_task,
+					  NULL, "cfinteractive");
+	if (IS_ERR(speedchange_task))
+		return PTR_ERR(speedchange_task);
+
+	sched_setscheduler_nocheck(speedchange_task, SCHED_FIFO, &param);
+	get_task_struct(speedchange_task);
+
+	/* wake up so the thread does not look hung to the freezer */
+	wake_up_process(speedchange_task);
+
+	return cpufreq_register_governor(CPU_FREQ_GOV_INTERACTIVE);
+}
+
+#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
+struct cpufreq_governor *cpufreq_default_governor(void)
+{
+	return CPU_FREQ_GOV_INTERACTIVE;
+}
+
+fs_initcall(cpufreq_interactive_gov_init);
+#else
+module_init(cpufreq_interactive_gov_init);
+#endif
+
+static void __exit cpufreq_interactive_gov_exit(void)
+{
+	cpufreq_unregister_governor(CPU_FREQ_GOV_INTERACTIVE);
+	kthread_stop(speedchange_task);
+	put_task_struct(speedchange_task);
+}
+module_exit(cpufreq_interactive_gov_exit);
+
+MODULE_AUTHOR("Mike Chan <mike@android.com>");
+MODULE_DESCRIPTION("'cpufreq_interactive' - A dynamic cpufreq governor for Latency sensitive workloads");
+MODULE_LICENSE("GPL");
diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c
index dafb679..399428e 100644
--- a/drivers/cpufreq/cpufreq_performance.c
+++ b/drivers/cpufreq/cpufreq_performance.c
@@ -22,7 +22,10 @@
 	__cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H);
 }
 
-static struct cpufreq_governor cpufreq_gov_performance = {
+#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE_MODULE
+static
+#endif
+struct cpufreq_governor cpufreq_gov_performance = {
 	.name		= "performance",
 	.owner		= THIS_MODULE,
 	.limits		= cpufreq_gov_performance_limits,
diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c
index 78a6510..5daa500 100644
--- a/drivers/cpufreq/cpufreq_powersave.c
+++ b/drivers/cpufreq/cpufreq_powersave.c
@@ -22,7 +22,10 @@
 	__cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L);
 }
 
-static struct cpufreq_governor cpufreq_gov_powersave = {
+#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE
+static
+#endif
+struct cpufreq_governor cpufreq_gov_powersave = {
 	.name		= "powersave",
 	.limits		= cpufreq_gov_powersave_limits,
 	.owner		= THIS_MODULE,
diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c
index bd897e3..765166d 100644
--- a/drivers/cpufreq/cpufreq_userspace.c
+++ b/drivers/cpufreq/cpufreq_userspace.c
@@ -118,7 +118,10 @@
 	mutex_unlock(&userspace_mutex);
 }
 
-static struct cpufreq_governor cpufreq_gov_userspace = {
+#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE
+static
+#endif
+struct cpufreq_governor cpufreq_gov_userspace = {
 	.name		= "userspace",
 	.init		= cpufreq_userspace_policy_init,
 	.exit		= cpufreq_userspace_policy_exit,
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 35237c8..439f460 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -193,7 +193,7 @@
 	}
 
 	/* Take note of the planned idle state. */
-	sched_idle_set_state(target_state);
+	sched_idle_set_state(target_state, index);
 
 	trace_cpu_idle_rcuidle(index, dev->cpu);
 	time_start = ns_to_ktime(local_clock());
@@ -206,7 +206,7 @@
 	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
 
 	/* The cpu is no longer idle or about to enter idle. */
-	sched_idle_set_state(NULL);
+	sched_idle_set_state(NULL, -1);
 
 	if (broadcast) {
 		if (WARN_ON_ONCE(!irqs_disabled()))
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 03d38c2..65bb6fd 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -178,7 +178,12 @@
 
 	/* for higher loadavg, we are more reluctant */
 
-	mult += 2 * get_loadavg(load);
+	/*
+	 * this doesn't work as intended - it is almost always 0, but can
+	 * sometimes, depending on workload, spike very high into the hundreds
+	 * even when the average cpu load is under 10%.
+	 */
+	/* mult += 2 * get_loadavg(); */
 
 	/* for IO wait tasks (per cpu!) we add 5x each */
 	mult += 10 * nr_iowaiters;
diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c
index 04bf298..883b3be 100644
--- a/drivers/dma-buf/fence.c
+++ b/drivers/dma-buf/fence.c
@@ -68,6 +68,8 @@
 	struct fence_cb *cur, *tmp;
 	int ret = 0;
 
+	lockdep_assert_held(fence->lock);
+
 	if (WARN_ON(!fence))
 		return -EINVAL;
 
@@ -159,9 +161,6 @@
 	if (WARN_ON(timeout < 0))
 		return -EINVAL;
 
-	if (timeout == 0)
-		return fence_is_signaled(fence);
-
 	trace_fence_wait_start(fence);
 	ret = fence->ops->wait(fence, intr, timeout);
 	trace_fence_wait_end(fence);
@@ -329,8 +328,12 @@
 	spin_lock_irqsave(fence->lock, flags);
 
 	ret = !list_empty(&cb->node);
-	if (ret)
+	if (ret) {
 		list_del_init(&cb->node);
+		if (list_empty(&fence->cb_list))
+			if (fence->ops->disable_signaling)
+				fence->ops->disable_signaling(fence);
+	}
 
 	spin_unlock_irqrestore(fence->lock, flags);
 
diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
index 723d8af..82f35a4 100644
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@@ -280,18 +280,24 @@
 				      unsigned *pshared_count,
 				      struct fence ***pshared)
 {
-	unsigned shared_count = 0;
-	unsigned retry = 1;
-	struct fence **shared = NULL, *fence_excl = NULL;
-	int ret = 0;
+	struct fence **shared = NULL;
+	struct fence *fence_excl;
+	unsigned int shared_count;
+	int ret = 1;
 
-	while (retry) {
+	do {
 		struct reservation_object_list *fobj;
 		unsigned seq;
+		unsigned int i;
 
-		seq = read_seqcount_begin(&obj->seq);
+		shared_count = i = 0;
 
 		rcu_read_lock();
+		seq = read_seqcount_begin(&obj->seq);
+
+		fence_excl = rcu_dereference(obj->fence_excl);
+		if (fence_excl && !fence_get_rcu(fence_excl))
+			goto unlock;
 
 		fobj = rcu_dereference(obj->fence);
 		if (fobj) {
@@ -309,52 +315,37 @@
 				}
 
 				ret = -ENOMEM;
-				shared_count = 0;
 				break;
 			}
 			shared = nshared;
-			memcpy(shared, fobj->shared, sz);
 			shared_count = fobj->shared_count;
-		} else
-			shared_count = 0;
-		fence_excl = rcu_dereference(obj->fence_excl);
-
-		retry = read_seqcount_retry(&obj->seq, seq);
-		if (retry)
-			goto unlock;
-
-		if (!fence_excl || fence_get_rcu(fence_excl)) {
-			unsigned i;
 
 			for (i = 0; i < shared_count; ++i) {
-				if (fence_get_rcu(shared[i]))
-					continue;
-
-				/* uh oh, refcount failed, abort and retry */
-				while (i--)
-					fence_put(shared[i]);
-
-				if (fence_excl) {
-					fence_put(fence_excl);
-					fence_excl = NULL;
-				}
-
-				retry = 1;
-				break;
+				shared[i] = rcu_dereference(fobj->shared[i]);
+				if (!fence_get_rcu(shared[i]))
+					break;
 			}
-		} else
-			retry = 1;
+		}
 
+		if (i != shared_count || read_seqcount_retry(&obj->seq, seq)) {
+			while (i--)
+				fence_put(shared[i]);
+			fence_put(fence_excl);
+			goto unlock;
+		}
+
+		ret = 0;
 unlock:
 		rcu_read_unlock();
-	}
-	*pshared_count = shared_count;
-	if (shared_count)
-		*pshared = shared;
-	else {
-		*pshared = NULL;
+	} while (ret);
+
+	if (!shared_count) {
 		kfree(shared);
+		shared = NULL;
 	}
+
+	*pshared_count = shared_count;
+	*pshared = shared;
 	*pfence_excl = fence_excl;
 
 	return ret;
@@ -379,10 +370,7 @@
 {
 	struct fence *fence;
 	unsigned seq, shared_count, i = 0;
-	long ret = timeout;
-
-	if (!timeout)
-		return reservation_object_test_signaled_rcu(obj, wait_all);
+	long ret = timeout ? timeout : 1;
 
 retry:
 	fence = NULL;
@@ -397,9 +385,6 @@
 		if (fobj)
 			shared_count = fobj->shared_count;
 
-		if (read_seqcount_retry(&obj->seq, seq))
-			goto unlock_retry;
-
 		for (i = 0; i < shared_count; ++i) {
 			struct fence *lfence = rcu_dereference(fobj->shared[i]);
 
@@ -422,9 +407,6 @@
 	if (!shared_count) {
 		struct fence *fence_excl = rcu_dereference(obj->fence_excl);
 
-		if (read_seqcount_retry(&obj->seq, seq))
-			goto unlock_retry;
-
 		if (fence_excl &&
 		    !test_bit(FENCE_FLAG_SIGNALED_BIT, &fence_excl->flags)) {
 			if (!fence_get_rcu(fence_excl))
@@ -439,6 +421,11 @@
 
 	rcu_read_unlock();
 	if (fence) {
+		if (read_seqcount_retry(&obj->seq, seq)) {
+			fence_put(fence);
+			goto retry;
+		}
+
 		ret = fence_wait_timeout(fence, intr, ret);
 		fence_put(fence);
 		if (ret > 0 && wait_all && (i + 1 < shared_count))
@@ -484,12 +471,13 @@
 					  bool test_all)
 {
 	unsigned seq, shared_count;
-	int ret = true;
+	int ret;
 
+	rcu_read_lock();
 retry:
+	ret = true;
 	shared_count = 0;
 	seq = read_seqcount_begin(&obj->seq);
-	rcu_read_lock();
 
 	if (test_all) {
 		unsigned i;
@@ -500,46 +488,35 @@
 		if (fobj)
 			shared_count = fobj->shared_count;
 
-		if (read_seqcount_retry(&obj->seq, seq))
-			goto unlock_retry;
-
 		for (i = 0; i < shared_count; ++i) {
 			struct fence *fence = rcu_dereference(fobj->shared[i]);
 
 			ret = reservation_object_test_signaled_single(fence);
 			if (ret < 0)
-				goto unlock_retry;
+				goto retry;
 			else if (!ret)
 				break;
 		}
 
-		/*
-		 * There could be a read_seqcount_retry here, but nothing cares
-		 * about whether it's the old or newer fence pointers that are
-		 * signaled. That race could still have happened after checking
-		 * read_seqcount_retry. If you care, use ww_mutex_lock.
-		 */
+		if (read_seqcount_retry(&obj->seq, seq))
+			goto retry;
 	}
 
 	if (!shared_count) {
 		struct fence *fence_excl = rcu_dereference(obj->fence_excl);
 
-		if (read_seqcount_retry(&obj->seq, seq))
-			goto unlock_retry;
-
 		if (fence_excl) {
 			ret = reservation_object_test_signaled_single(
 								fence_excl);
 			if (ret < 0)
-				goto unlock_retry;
+				goto retry;
+
+			if (read_seqcount_retry(&obj->seq, seq))
+				goto retry;
 		}
 	}
 
 	rcu_read_unlock();
 	return ret;
-
-unlock_retry:
-	rcu_read_unlock();
-	goto retry;
 }
 EXPORT_SYMBOL_GPL(reservation_object_test_signaled_rcu);
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
index 4f35114..9dc86d3 100644
--- a/drivers/dma-buf/sw_sync.c
+++ b/drivers/dma-buf/sw_sync.c
@@ -169,6 +169,13 @@
 	return true;
 }
 
+static void timeline_fence_disable_signaling(struct fence *fence)
+{
+	struct sync_pt *pt = container_of(fence, struct sync_pt, base);
+
+	list_del_init(&pt->link);
+}
+
 static void timeline_fence_value_str(struct fence *fence,
 				    char *str, int size)
 {
@@ -187,6 +194,7 @@
 	.get_driver_name = timeline_fence_get_driver_name,
 	.get_timeline_name = timeline_fence_get_timeline_name,
 	.enable_signaling = timeline_fence_enable_signaling,
+	.disable_signaling = timeline_fence_disable_signaling,
 	.signaled = timeline_fence_signaled,
 	.wait = fence_default_wait,
 	.release = timeline_fence_release,
@@ -360,8 +368,8 @@
 	}
 
 	sync_file = sync_file_create(&pt->base);
+	fence_put(&pt->base);
 	if (!sync_file) {
-		fence_put(&pt->base);
 		err = -ENOMEM;
 		goto err;
 	}
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index f0c374d..c835f62 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -299,10 +299,9 @@
 
 	poll_wait(file, &sync_file->wq, wait);
 
-	if (!poll_does_not_wait(wait) &&
-	    !test_and_set_bit(POLL_ENABLED, &sync_file->fence->flags)) {
+	if (!test_and_set_bit(POLL_ENABLED, &sync_file->fence->flags)) {
 		if (fence_add_callback(sync_file->fence, &sync_file->cb,
-				       fence_check_cb_func) < 0)
+					   fence_check_cb_func) < 0)
 			wake_up_all(&sync_file->wq);
 	}
 
diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c
index 993aa56..7ec09ea8 100644
--- a/drivers/firmware/efi/libstub/arm-stub.c
+++ b/drivers/firmware/efi/libstub/arm-stub.c
@@ -18,8 +18,6 @@
 
 #include "efistub.h"
 
-bool __nokaslr;
-
 static int efi_get_secureboot(efi_system_table_t *sys_table_arg)
 {
 	static efi_char16_t const sb_var_name[] = {
@@ -268,18 +266,6 @@
 		goto fail;
 	}
 
-	/* check whether 'nokaslr' was passed on the command line */
-	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
-		static const u8 default_cmdline[] = CONFIG_CMDLINE;
-		const u8 *str, *cmdline = cmdline_ptr;
-
-		if (IS_ENABLED(CONFIG_CMDLINE_FORCE))
-			cmdline = default_cmdline;
-		str = strstr(cmdline, "nokaslr");
-		if (str == cmdline || (str > cmdline && *(str - 1) == ' '))
-			__nokaslr = true;
-	}
-
 	si = setup_graphics(sys_table);
 
 	status = handle_kernel_image(sys_table, image_addr, &image_size,
@@ -291,9 +277,13 @@
 		goto fail_free_cmdline;
 	}
 
-	status = efi_parse_options(cmdline_ptr);
-	if (status != EFI_SUCCESS)
-		pr_efi_err(sys_table, "Failed to parse EFI cmdline options\n");
+	if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) ||
+	    IS_ENABLED(CONFIG_CMDLINE_FORCE) ||
+	    cmdline_size == 0)
+		efi_parse_options(CONFIG_CMDLINE);
+
+	if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && cmdline_size > 0)
+		efi_parse_options(cmdline_ptr);
 
 	secure_boot = efi_get_secureboot(sys_table);
 	if (secure_boot > 0)
diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
index 959d9b8..f7a6970 100644
--- a/drivers/firmware/efi/libstub/arm64-stub.c
+++ b/drivers/firmware/efi/libstub/arm64-stub.c
@@ -24,8 +24,6 @@
 
 #include "efistub.h"
 
-extern bool __nokaslr;
-
 efi_status_t check_platform_features(efi_system_table_t *sys_table_arg)
 {
 	u64 tg;
@@ -60,7 +58,7 @@
 	u64 phys_seed = 0;
 
 	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
-		if (!__nokaslr) {
+		if (!nokaslr()) {
 			status = efi_get_random_bytes(sys_table_arg,
 						      sizeof(phys_seed),
 						      (u8 *)&phys_seed);
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index 09d10dc..d984332 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -41,6 +41,13 @@
 #define EFI_ALLOC_ALIGN		EFI_PAGE_SIZE
 #endif
 
+static int __section(.data) __nokaslr;
+
+int __pure nokaslr(void)
+{
+	return __nokaslr;
+}
+
 #define EFI_MMAP_NR_SLACK_SLOTS	8
 
 struct file_info {
@@ -351,10 +358,14 @@
  * environments, first in the early boot environment of the EFI boot
  * stub, and subsequently during the kernel boot.
  */
-efi_status_t efi_parse_options(char *cmdline)
+efi_status_t efi_parse_options(char const *cmdline)
 {
 	char *str;
 
+	str = strstr(cmdline, "nokaslr");
+	if (str == cmdline || (str && str > cmdline && *(str - 1) == ' '))
+		__nokaslr = 1;
+
 	/*
 	 * Currently, the only efi= option we look for is 'nochunk', which
 	 * is intended to work around known issues on certain x86 UEFI
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index fac6799..d0e5aca 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -15,6 +15,8 @@
  */
 #undef __init
 
+extern int __pure nokaslr(void);
+
 void efi_char16_printk(efi_system_table_t *, efi_char16_t *);
 
 efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, void *__image,
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 483059a..43cb33d 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -12,6 +12,7 @@
 	select I2C
 	select I2C_ALGOBIT
 	select DMA_SHARED_BUFFER
+	select SYNC_FILE
 	help
 	  Kernel-level support for the Direct Rendering Infrastructure (DRI)
 	  introduced in XFree86 4.0. If you say Y here, you need to select
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index dd6fff1..d6f57c4 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -30,6 +30,7 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_mode.h>
 #include <drm/drm_plane_helper.h>
+#include <linux/sync_file.h>
 
 #include "drm_crtc_internal.h"
 
@@ -292,6 +293,23 @@
 }
 EXPORT_SYMBOL(drm_atomic_get_crtc_state);
 
+static void set_out_fence_for_crtc(struct drm_atomic_state *state,
+				   struct drm_crtc *crtc, s32 __user *fence_ptr)
+{
+	state->crtcs[drm_crtc_index(crtc)].out_fence_ptr = fence_ptr;
+}
+
+static s32 __user *get_out_fence_for_crtc(struct drm_atomic_state *state,
+					  struct drm_crtc *crtc)
+{
+	s32 __user *fence_ptr;
+
+	fence_ptr = state->crtcs[drm_crtc_index(crtc)].out_fence_ptr;
+	state->crtcs[drm_crtc_index(crtc)].out_fence_ptr = NULL;
+
+	return fence_ptr;
+}
+
 /**
  * drm_atomic_set_mode_for_crtc - set mode for CRTC
  * @state: the CRTC whose incoming state to update
@@ -496,6 +514,16 @@
 					&replaced);
 		state->color_mgmt_changed |= replaced;
 		return ret;
+	} else if (property == config->prop_out_fence_ptr) {
+		s32 __user *fence_ptr = u64_to_user_ptr(val);
+
+		if (!fence_ptr)
+			return 0;
+
+		if (put_user(-1, fence_ptr))
+			return -EFAULT;
+
+		set_out_fence_for_crtc(state->state, crtc, fence_ptr);
 	} else if (crtc->funcs->atomic_set_property)
 		return crtc->funcs->atomic_set_property(crtc, state, property, val);
 	else
@@ -538,6 +566,8 @@
 		*val = (state->ctm) ? state->ctm->base.id : 0;
 	else if (property == config->gamma_lut_property)
 		*val = (state->gamma_lut) ? state->gamma_lut->base.id : 0;
+	else if (property == config->prop_out_fence_ptr)
+		*val = 0;
 	else if (crtc->funcs->atomic_get_property)
 		return crtc->funcs->atomic_get_property(crtc, state, property, val);
 	else
@@ -693,6 +723,17 @@
 		drm_atomic_set_fb_for_plane(state, fb);
 		if (fb)
 			drm_framebuffer_unreference(fb);
+	} else if (property == config->prop_in_fence_fd) {
+		if (state->fence)
+			return -EINVAL;
+
+		if (U642I64(val) == -1)
+			return 0;
+
+		state->fence = sync_file_get_fence(val);
+		if (!state->fence)
+			return -EINVAL;
+
 	} else if (property == config->prop_crtc_id) {
 		struct drm_crtc *crtc = drm_crtc_find(dev, val);
 		return drm_atomic_set_crtc_for_plane(state, crtc);
@@ -752,6 +793,8 @@
 
 	if (property == config->prop_fb_id) {
 		*val = (state->fb) ? state->fb->base.id : 0;
+	} else if (property == config->prop_in_fence_fd) {
+		*val = -1;
 	} else if (property == config->prop_crtc_id) {
 		*val = (state->crtc) ? state->crtc->base.id : 0;
 	} else if (property == config->prop_crtc_x) {
@@ -1154,6 +1197,36 @@
 EXPORT_SYMBOL(drm_atomic_set_fb_for_plane);
 
 /**
+ * drm_atomic_set_fence_for_plane - set fence for plane
+ * @plane_state: atomic state object for the plane
+ * @fence: fence to use for the plane
+ *
+ * Helper to setup the plane_state fence in case it is not set yet.
+ * By using this drivers doesn't need to worry if the user choose
+ * implicit or explicit fencing.
+ *
+ * This function will not set the fence to the state if it was set
+ * via explicit fencing interfaces on the atomic ioctl. It will
+ * all drope the reference to the fence as we not storing it
+ * anywhere.
+ *
+ * Otherwise, if plane_state->fence is not set this function we
+ * just set it with the received implict fence.
+ */
+void
+drm_atomic_set_fence_for_plane(struct drm_plane_state *plane_state,
+			       struct fence *fence)
+{
+	if (plane_state->fence) {
+		fence_put(fence);
+		return;
+	}
+
+	plane_state->fence = fence;
+}
+EXPORT_SYMBOL(drm_atomic_set_fence_for_plane);
+
+/**
  * drm_atomic_set_crtc_for_connector - set crtc for connector
  * @conn_state: atomic state object for the connector
  * @crtc: crtc to use for the connector
@@ -1472,11 +1545,9 @@
  */
 
 static struct drm_pending_vblank_event *create_vblank_event(
-		struct drm_device *dev, struct drm_file *file_priv,
-		struct fence *fence, uint64_t user_data)
+		struct drm_device *dev, uint64_t user_data)
 {
 	struct drm_pending_vblank_event *e = NULL;
-	int ret;
 
 	e = kzalloc(sizeof *e, GFP_KERNEL);
 	if (!e)
@@ -1486,17 +1557,6 @@
 	e->event.base.length = sizeof(e->event);
 	e->event.user_data = user_data;
 
-	if (file_priv) {
-		ret = drm_event_reserve_init(dev, file_priv, &e->base,
-					     &e->event.base);
-		if (ret) {
-			kfree(e);
-			return NULL;
-		}
-	}
-
-	e->base.fence = fence;
-
 	return e;
 }
 
@@ -1601,6 +1661,206 @@
 }
 EXPORT_SYMBOL(drm_atomic_clean_old_fb);
 
+/**
+ * DOC: explicit fencing properties
+ *
+ * Explicit fencing allows userspace to control the buffer synchronization
+ * between devices. A Fence or a group of fences are transfered to/from
+ * userspace using Sync File fds and there are two DRM properties for that.
+ * IN_FENCE_FD on each DRM Plane to send fences to the kernel and
+ * OUT_FENCE_PTR on each DRM CRTC to receive fences from the kernel.
+ *
+ * As a contrast, with implicit fencing the kernel keeps track of any
+ * ongoing rendering, and automatically ensures that the atomic update waits
+ * for any pending rendering to complete. For shared buffers represented with
+ * a struct &dma_buf this is tracked in &reservation_object structures.
+ * Implicit syncing is how Linux traditionally worked (e.g. DRI2/3 on X.org),
+ * whereas explicit fencing is what Android wants.
+ *
+ * "IN_FENCE_FD”:
+ *	Use this property to pass a fence that DRM should wait on before
+ *	proceeding with the Atomic Commit request and show the framebuffer for
+ *	the plane on the screen. The fence can be either a normal fence or a
+ *	merged one, the sync_file framework will handle both cases and use a
+ *	fence_array if a merged fence is received. Passing -1 here means no
+ *	fences to wait on.
+ *
+ *	If the Atomic Commit request has the DRM_MODE_ATOMIC_TEST_ONLY flag
+ *	it will only check if the Sync File is a valid one.
+ *
+ *	On the driver side the fence is stored on the @fence parameter of
+ *	struct &drm_plane_state. Drivers which also support implicit fencing
+ *	should set the implicit fence using drm_atomic_set_fence_for_plane(),
+ *	to make sure there's consistent behaviour between drivers in precedence
+ *	of implicit vs. explicit fencing.
+ *
+ * "OUT_FENCE_PTR”:
+ *	Use this property to pass a file descriptor pointer to DRM. Once the
+ *	Atomic Commit request call returns OUT_FENCE_PTR will be filled with
+ *	the file descriptor number of a Sync File. This Sync File contains the
+ *	CRTC fence that will be signaled when all framebuffers present on the
+ *	Atomic Commit * request for that given CRTC are scanned out on the
+ *	screen.
+ *
+ *	The Atomic Commit request fails if a invalid pointer is passed. If the
+ *	Atomic Commit request fails for any other reason the out fence fd
+ *	returned will be -1. On a Atomic Commit with the
+ *	DRM_MODE_ATOMIC_TEST_ONLY flag the out fence will also be set to -1.
+ *
+ *	Note that out-fences don't have a special interface to drivers and are
+ *	internally represented by a struct &drm_pending_vblank_event in struct
+ *	&drm_crtc_state, which is also used by the nonblocking atomic commit
+ *	helpers and for the DRM event handling for existing userspace.
+ */
+
+struct drm_out_fence_state {
+	s32 __user *out_fence_ptr;
+	struct sync_file *sync_file;
+	int fd;
+};
+
+static int setup_out_fence(struct drm_out_fence_state *fence_state,
+			   struct fence *fence)
+{
+	fence_state->fd = get_unused_fd_flags(O_CLOEXEC);
+	if (fence_state->fd < 0)
+		return fence_state->fd;
+
+	if (put_user(fence_state->fd, fence_state->out_fence_ptr))
+		return -EFAULT;
+
+	fence_state->sync_file = sync_file_create(fence);
+	if (!fence_state->sync_file)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int prepare_crtc_signaling(struct drm_device *dev,
+				  struct drm_atomic_state *state,
+				  struct drm_mode_atomic *arg,
+				  struct drm_file *file_priv,
+				  struct drm_out_fence_state **fence_state,
+				  unsigned int *num_fences)
+{
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *crtc_state;
+	int i, ret;
+
+	if (arg->flags & DRM_MODE_ATOMIC_TEST_ONLY)
+		return 0;
+
+	for_each_crtc_in_state(state, crtc, crtc_state, i) {
+		s32 __user *fence_ptr;
+
+		fence_ptr = get_out_fence_for_crtc(crtc_state->state, crtc);
+
+		if (arg->flags & DRM_MODE_PAGE_FLIP_EVENT || fence_ptr) {
+			struct drm_pending_vblank_event *e;
+
+			e = create_vblank_event(dev, arg->user_data);
+			if (!e)
+				return -ENOMEM;
+
+			crtc_state->event = e;
+		}
+
+		if (arg->flags & DRM_MODE_PAGE_FLIP_EVENT) {
+			struct drm_pending_vblank_event *e = crtc_state->event;
+
+			if (!file_priv)
+				continue;
+
+			ret = drm_event_reserve_init(dev, file_priv, &e->base,
+						     &e->event.base);
+			if (ret) {
+				kfree(e);
+				crtc_state->event = NULL;
+				return ret;
+			}
+		}
+
+		if (fence_ptr) {
+			struct fence *fence;
+			struct drm_out_fence_state *f;
+
+			f = krealloc(*fence_state, sizeof(**fence_state) *
+				     (*num_fences + 1), GFP_KERNEL);
+			if (!f)
+				return -ENOMEM;
+
+			memset(&f[*num_fences], 0, sizeof(*f));
+
+			f[*num_fences].out_fence_ptr = fence_ptr;
+			*fence_state = f;
+
+			fence = drm_crtc_create_fence(crtc);
+			if (!fence)
+				return -ENOMEM;
+
+			ret = setup_out_fence(&f[(*num_fences)++], fence);
+			if (ret) {
+				fence_put(fence);
+				return ret;
+			}
+
+			crtc_state->event->base.fence = fence;
+		}
+	}
+
+	return 0;
+}
+
+static void complete_crtc_signaling(struct drm_device *dev,
+				    struct drm_atomic_state *state,
+				    struct drm_out_fence_state *fence_state,
+				    unsigned int num_fences,
+				    bool install_fds)
+{
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *crtc_state;
+	int i;
+
+	if (install_fds) {
+		for (i = 0; i < num_fences; i++)
+			fd_install(fence_state[i].fd,
+				   fence_state[i].sync_file->file);
+
+		kfree(fence_state);
+		return;
+	}
+
+	for_each_crtc_in_state(state, crtc, crtc_state, i) {
+		struct drm_pending_vblank_event *event = crtc_state->event;
+		/*
+		 * Free the allocated event. drm_atomic_helper_setup_commit
+		 * can allocate an event too, so only free it if it's ours
+		 * to prevent a double free in drm_atomic_state_clear.
+		 */
+		if (event && (event->base.fence || event->base.file_priv)) {
+			drm_event_cancel_free(dev, &event->base);
+			crtc_state->event = NULL;
+		}
+	}
+
+	if (!fence_state)
+		return;
+
+	for (i = 0; i < num_fences; i++) {
+		if (fence_state[i].sync_file)
+			fput(fence_state[i].sync_file->file);
+		if (fence_state[i].fd >= 0)
+			put_unused_fd(fence_state[i].fd);
+
+		/* If this fails log error to the user */
+		if (fence_state[i].out_fence_ptr &&
+		    put_user(-1, fence_state[i].out_fence_ptr))
+			DRM_DEBUG_ATOMIC("Couldn't clear out_fence_ptr\n");
+	}
+
+	kfree(fence_state);
+}
+
 int drm_mode_atomic_ioctl(struct drm_device *dev,
 			  void *data, struct drm_file *file_priv)
 {
@@ -1613,11 +1873,10 @@
 	struct drm_atomic_state *state;
 	struct drm_modeset_acquire_ctx ctx;
 	struct drm_plane *plane;
-	struct drm_crtc *crtc;
-	struct drm_crtc_state *crtc_state;
+	struct drm_out_fence_state *fence_state = NULL;
 	unsigned plane_mask;
 	int ret = 0;
-	unsigned int i, j;
+	unsigned int i, j, num_fences = 0;
 
 	/* disallow for drivers not supporting atomic: */
 	if (!drm_core_check_feature(dev, DRIVER_ATOMIC))
@@ -1732,20 +1991,10 @@
 		drm_mode_object_unreference(obj);
 	}
 
-	if (arg->flags & DRM_MODE_PAGE_FLIP_EVENT) {
-		for_each_crtc_in_state(state, crtc, crtc_state, i) {
-			struct drm_pending_vblank_event *e;
-
-			e = create_vblank_event(dev, file_priv, NULL,
-						arg->user_data);
-			if (!e) {
-				ret = -ENOMEM;
-				goto out;
-			}
-
-			crtc_state->event = e;
-		}
-	}
+	ret = prepare_crtc_signaling(dev, state, arg, file_priv, &fence_state,
+				     &num_fences);
+	if (ret)
+		goto out;
 
 	if (arg->flags & DRM_MODE_ATOMIC_TEST_ONLY) {
 		/*
@@ -1762,20 +2011,7 @@
 out:
 	drm_atomic_clean_old_fb(dev, plane_mask, ret);
 
-	if (ret && arg->flags & DRM_MODE_PAGE_FLIP_EVENT) {
-		/*
-		 * Free the allocated event. drm_atomic_helper_setup_commit
-		 * can allocate an event too, so only free it if it's ours
-		 * to prevent a double free in drm_atomic_state_clear.
-		 */
-		for_each_crtc_in_state(state, crtc, crtc_state, i) {
-			struct drm_pending_vblank_event *event = crtc_state->event;
-			if (event && (event->base.fence || event->base.file_priv)) {
-				drm_event_cancel_free(dev, &event->base);
-				crtc_state->event = NULL;
-			}
-		}
-	}
+	complete_crtc_signaling(dev, state, fence_state, num_fences, !ret);
 
 	if (ret == -EDEADLK) {
 		drm_atomic_state_clear(state);
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 50acd79..f34b4e8 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -3166,6 +3166,9 @@
 {
 	if (state->fb)
 		drm_framebuffer_unreference(state->fb);
+
+	if (state->fence)
+		fence_put(state->fence);
 }
 EXPORT_SYMBOL(__drm_atomic_helper_plane_destroy_state);
 
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 2d7bedf..79b3d52 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -33,6 +33,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <linux/fence.h>
 #include <drm/drmP.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
@@ -141,6 +142,54 @@
 	}
 }
 
+static const struct fence_ops drm_crtc_fence_ops;
+
+static struct drm_crtc *fence_to_crtc(struct fence *fence)
+{
+	BUG_ON(fence->ops != &drm_crtc_fence_ops);
+	return container_of(fence->lock, struct drm_crtc, fence_lock);
+}
+
+static const char *drm_crtc_fence_get_driver_name(struct fence *fence)
+{
+	struct drm_crtc *crtc = fence_to_crtc(fence);
+
+	return crtc->dev->driver->name;
+}
+
+static const char *drm_crtc_fence_get_timeline_name(struct fence *fence)
+{
+	struct drm_crtc *crtc = fence_to_crtc(fence);
+
+	return crtc->timeline_name;
+}
+
+static bool drm_crtc_fence_enable_signaling(struct fence *fence)
+{
+	return true;
+}
+
+static const struct fence_ops drm_crtc_fence_ops = {
+	.get_driver_name = drm_crtc_fence_get_driver_name,
+	.get_timeline_name = drm_crtc_fence_get_timeline_name,
+	.enable_signaling = drm_crtc_fence_enable_signaling,
+	.wait = fence_default_wait,
+};
+
+struct fence *drm_crtc_create_fence(struct drm_crtc *crtc)
+{
+	struct fence *fence;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return NULL;
+
+	fence_init(fence, &drm_crtc_fence_ops, &crtc->fence_lock,
+		       crtc->fence_context, ++crtc->fence_seqno);
+
+	return fence;
+}
+
 /**
  * drm_crtc_init_with_planes - Initialise a new CRTC object with
  *    specified primary and cursor planes.
@@ -198,6 +247,11 @@
 		return -ENOMEM;
 	}
 
+	crtc->fence_context = fence_context_alloc(1);
+	spin_lock_init(&crtc->fence_lock);
+	snprintf(crtc->timeline_name, sizeof(crtc->timeline_name),
+		 "CRTC:%d-%s", crtc->base.id, crtc->name);
+
 	crtc->base.properties = &crtc->properties;
 
 	list_add_tail(&crtc->head, &config->crtc_list);
@@ -213,6 +267,8 @@
 	if (drm_core_check_feature(dev, DRIVER_ATOMIC)) {
 		drm_object_attach_property(&crtc->base, config->prop_active, 0);
 		drm_object_attach_property(&crtc->base, config->prop_mode_id, 0);
+		drm_object_attach_property(&crtc->base,
+					   config->prop_out_fence_ptr, 0);
 	}
 
 	return 0;
@@ -365,6 +421,18 @@
 		return -ENOMEM;
 	dev->mode_config.prop_fb_id = prop;
 
+	prop = drm_property_create_signed_range(dev, DRM_MODE_PROP_ATOMIC,
+			"IN_FENCE_FD", -1, INT_MAX);
+	if (!prop)
+		return -ENOMEM;
+	dev->mode_config.prop_in_fence_fd = prop;
+
+	prop = drm_property_create_range(dev, DRM_MODE_PROP_ATOMIC,
+			"OUT_FENCE_PTR", 0, U64_MAX);
+	if (!prop)
+		return -ENOMEM;
+	dev->mode_config.prop_out_fence_ptr = prop;
+
 	prop = drm_property_create_object(dev, DRM_MODE_PROP_ATOMIC,
 			"CRTC_ID", DRM_MODE_OBJECT_CRTC);
 	if (!prop)
diff --git a/drivers/gpu/drm/drm_crtc_internal.h b/drivers/gpu/drm/drm_crtc_internal.h
index c48ba02..df2b51a 100644
--- a/drivers/gpu/drm/drm_crtc_internal.h
+++ b/drivers/gpu/drm/drm_crtc_internal.h
@@ -41,6 +41,8 @@
 			    const struct drm_display_mode *mode,
 			    const struct drm_framebuffer *fb);
 
+struct fence *drm_crtc_create_fence(struct drm_crtc *crtc);
+
 void drm_fb_release(struct drm_file *file_priv);
 
 /* dumb buffer support IOCTLs */
diff --git a/drivers/gpu/drm/drm_fb_cma_helper.c b/drivers/gpu/drm/drm_fb_cma_helper.c
index 1fd6eac..52629b6 100644
--- a/drivers/gpu/drm/drm_fb_cma_helper.c
+++ b/drivers/gpu/drm/drm_fb_cma_helper.c
@@ -18,13 +18,16 @@
  */
 
 #include <drm/drmP.h>
+#include <drm/drm_atomic.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_fb_cma_helper.h>
+#include <linux/dma-buf.h>
 #include <linux/dma-mapping.h>
 #include <linux/module.h>
+#include <linux/reservation.h>
 
 #define DEFAULT_FBDEFIO_DELAY_MS 50
 
@@ -265,6 +268,38 @@
 }
 EXPORT_SYMBOL_GPL(drm_fb_cma_get_gem_obj);
 
+/**
+ * drm_fb_cma_prepare_fb() - Prepare CMA framebuffer
+ * @plane: Which plane
+ * @state: Plane state attach fence to
+ *
+ * This should be put into prepare_fb hook of struct &drm_plane_helper_funcs .
+ *
+ * This function checks if the plane FB has an dma-buf attached, extracts
+ * the exclusive fence and attaches it to plane state for the atomic helper
+ * to wait on.
+ *
+ * There is no need for cleanup_fb for CMA based framebuffer drivers.
+ */
+int drm_fb_cma_prepare_fb(struct drm_plane *plane,
+			  struct drm_plane_state *state)
+{
+	struct dma_buf *dma_buf;
+	struct fence *fence;
+
+	if ((plane->state->fb == state->fb) || !state->fb)
+		return 0;
+
+	dma_buf = drm_fb_cma_get_gem_obj(state->fb, 0)->base.dma_buf;
+	if (dma_buf) {
+		fence = reservation_object_get_excl_rcu(dma_buf->resv);
+		drm_atomic_set_fence_for_plane(state, fence);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(drm_fb_cma_prepare_fb);
+
 #ifdef CONFIG_DEBUG_FS
 static void drm_fb_cma_describe(struct drm_framebuffer *fb, struct seq_file *m)
 {
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index c37b7b5..129b80b 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -664,6 +664,10 @@
 		list_del(&p->pending_link);
 	}
 	spin_unlock_irqrestore(&dev->event_lock, flags);
+
+	if (p->fence)
+		fence_put(p->fence);
+
 	kfree(p);
 }
 EXPORT_SYMBOL(drm_event_cancel_free);
diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
index 249c0ae..3957ef8 100644
--- a/drivers/gpu/drm/drm_plane.c
+++ b/drivers/gpu/drm/drm_plane.c
@@ -137,6 +137,7 @@
 
 	if (drm_core_check_feature(dev, DRIVER_ATOMIC)) {
 		drm_object_attach_property(&plane->base, config->prop_fb_id, 0);
+		drm_object_attach_property(&plane->base, config->prop_in_fence_fd, -1);
 		drm_object_attach_property(&plane->base, config->prop_crtc_id, 0);
 		drm_object_attach_property(&plane->base, config->prop_crtc_x, 0);
 		drm_object_attach_property(&plane->base, config->prop_crtc_y, 0);
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
index 83bf997..5e67e8b 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
@@ -218,9 +218,10 @@
 
 	mdp5_state = kmemdup(to_mdp5_plane_state(plane->state),
 			sizeof(*mdp5_state), GFP_KERNEL);
+	if (!mdp5_state)
+		return NULL;
 
-	if (mdp5_state && mdp5_state->base.fb)
-		drm_framebuffer_reference(mdp5_state->base.fb);
+	__drm_atomic_helper_plane_duplicate_state(plane, &mdp5_state->base);
 
 	mdp5_state->mode_changed = false;
 	mdp5_state->pending = false;
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 52a2a1a..5a18408 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1606,7 +1606,14 @@
 int ttm_bo_wait(struct ttm_buffer_object *bo,
 		bool interruptible, bool no_wait)
 {
-	long timeout = no_wait ? 0 : 15 * HZ;
+	long timeout = 15 * HZ;
+
+	if (no_wait) {
+		if (reservation_object_test_signaled_rcu(bo->resv, true))
+			return 0;
+		else
+			return -EBUSY;
+	}
 
 	timeout = reservation_object_wait_timeout_rcu(bo->resv, true,
 						      interruptible, timeout);
diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c
index d02ee53..7256647 100644
--- a/drivers/hid/uhid.c
+++ b/drivers/hid/uhid.c
@@ -30,6 +30,8 @@
 #define UHID_NAME	"uhid"
 #define UHID_BUFSIZE	32
 
+static DEFINE_MUTEX(uhid_open_mutex);
+
 struct uhid_device {
 	struct mutex devlock;
 	bool running;
@@ -144,15 +146,26 @@
 static int uhid_hid_open(struct hid_device *hid)
 {
 	struct uhid_device *uhid = hid->driver_data;
+	int retval = 0;
 
-	return uhid_queue_event(uhid, UHID_OPEN);
+	mutex_lock(&uhid_open_mutex);
+	if (!hid->open++) {
+		retval = uhid_queue_event(uhid, UHID_OPEN);
+		if (retval)
+			hid->open--;
+	}
+	mutex_unlock(&uhid_open_mutex);
+	return retval;
 }
 
 static void uhid_hid_close(struct hid_device *hid)
 {
 	struct uhid_device *uhid = hid->driver_data;
 
-	uhid_queue_event(uhid, UHID_CLOSE);
+	mutex_lock(&uhid_open_mutex);
+	if (!--hid->open)
+		uhid_queue_event(uhid, UHID_CLOSE);
+	mutex_unlock(&uhid_open_mutex);
 }
 
 static int uhid_hid_parse(struct hid_device *hid)
diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig
index 6261874..34ffa02 100644
--- a/drivers/input/Kconfig
+++ b/drivers/input/Kconfig
@@ -187,6 +187,19 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called apm-power.
 
+config INPUT_KEYRESET
+	bool "Reset key"
+	depends on INPUT
+	select INPUT_KEYCOMBO
+	---help---
+	  Say Y here if you want to reboot when some keys are pressed;
+
+config INPUT_KEYCOMBO
+	bool "Key combo"
+	depends on INPUT
+	---help---
+	  Say Y here if you want to take action when some keys are pressed;
+
 comment "Input Device Drivers"
 
 source "drivers/input/keyboard/Kconfig"
diff --git a/drivers/input/Makefile b/drivers/input/Makefile
index 595820b..6a3281c 100644
--- a/drivers/input/Makefile
+++ b/drivers/input/Makefile
@@ -26,5 +26,7 @@
 obj-$(CONFIG_INPUT_MISC)	+= misc/
 
 obj-$(CONFIG_INPUT_APMPOWER)	+= apm-power.o
+obj-$(CONFIG_INPUT_KEYRESET)	+= keyreset.o
+obj-$(CONFIG_INPUT_KEYCOMBO)	+= keycombo.o
 
 obj-$(CONFIG_RMI4_CORE)		+= rmi4/
diff --git a/drivers/input/keyboard/goldfish_events.c b/drivers/input/keyboard/goldfish_events.c
index f6e643b..c877e56 100644
--- a/drivers/input/keyboard/goldfish_events.c
+++ b/drivers/input/keyboard/goldfish_events.c
@@ -17,6 +17,7 @@
 #include <linux/interrupt.h>
 #include <linux/types.h>
 #include <linux/input.h>
+#include <linux/input/mt.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
@@ -24,6 +25,8 @@
 #include <linux/io.h>
 #include <linux/acpi.h>
 
+#define GOLDFISH_MAX_FINGERS 5
+
 enum {
 	REG_READ        = 0x00,
 	REG_SET_PAGE    = 0x00,
@@ -52,7 +55,21 @@
 	value = __raw_readl(edev->addr + REG_READ);
 
 	input_event(edev->input, type, code, value);
-	input_sync(edev->input);
+	// Send an extra (EV_SYN, SYN_REPORT, 0x0) event
+	// if a key was pressed. Some keyboard device
+        // drivers may only send the EV_KEY event and
+        // not EV_SYN.
+        // Note that sending an extra SYN_REPORT is not
+        // necessary nor correct protocol with other
+        // devices such as touchscreens, which will send
+        // their own SYN_REPORT's when sufficient event
+        // information has been collected (e.g., for
+        // touchscreens, when pressure and X/Y coordinates
+	// have been received). Hence, we will only send
+	// this extra SYN_REPORT if type == EV_KEY.
+	if (type == EV_KEY) {
+		input_sync(edev->input);
+	}
 	return IRQ_HANDLED;
 }
 
@@ -154,6 +171,15 @@
 
 	input_dev->name = edev->name;
 	input_dev->id.bustype = BUS_HOST;
+	// Set the Goldfish Device to be multi-touch.
+	// In the Ranchu kernel, there is multi-touch-specific
+	// code for handling ABS_MT_SLOT events.
+	// See drivers/input/input.c:input_handle_abs_event.
+	// If we do not issue input_mt_init_slots,
+        // the kernel will filter out needed ABS_MT_SLOT
+        // events when we touch the screen in more than one place,
+        // preventing multi-touch with more than one finger from working.
+	input_mt_init_slots(input_dev, GOLDFISH_MAX_FINGERS, 0);
 
 	events_import_bits(edev, input_dev->evbit, EV_SYN, EV_MAX);
 	events_import_bits(edev, input_dev->keybit, EV_KEY, KEY_MAX);
diff --git a/drivers/input/keycombo.c b/drivers/input/keycombo.c
new file mode 100644
index 0000000..2fba451
--- /dev/null
+++ b/drivers/input/keycombo.c
@@ -0,0 +1,261 @@
+/* drivers/input/keycombo.c
+ *
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/input.h>
+#include <linux/keycombo.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+struct keycombo_state {
+	struct input_handler input_handler;
+	unsigned long keybit[BITS_TO_LONGS(KEY_CNT)];
+	unsigned long upbit[BITS_TO_LONGS(KEY_CNT)];
+	unsigned long key[BITS_TO_LONGS(KEY_CNT)];
+	spinlock_t lock;
+	struct  workqueue_struct *wq;
+	int key_down_target;
+	int key_down;
+	int key_up;
+	struct delayed_work key_down_work;
+	int delay;
+	struct work_struct key_up_work;
+	void (*key_up_fn)(void *);
+	void (*key_down_fn)(void *);
+	void *priv;
+	int key_is_down;
+	struct wakeup_source combo_held_wake_source;
+	struct wakeup_source combo_up_wake_source;
+};
+
+static void do_key_down(struct work_struct *work)
+{
+	struct delayed_work *dwork = container_of(work, struct delayed_work,
+									work);
+	struct keycombo_state *state = container_of(dwork,
+					struct keycombo_state, key_down_work);
+	if (state->key_down_fn)
+		state->key_down_fn(state->priv);
+}
+
+static void do_key_up(struct work_struct *work)
+{
+	struct keycombo_state *state = container_of(work, struct keycombo_state,
+								key_up_work);
+	if (state->key_up_fn)
+		state->key_up_fn(state->priv);
+	__pm_relax(&state->combo_up_wake_source);
+}
+
+static void keycombo_event(struct input_handle *handle, unsigned int type,
+		unsigned int code, int value)
+{
+	unsigned long flags;
+	struct keycombo_state *state = handle->private;
+
+	if (type != EV_KEY)
+		return;
+
+	if (code >= KEY_MAX)
+		return;
+
+	if (!test_bit(code, state->keybit))
+		return;
+
+	spin_lock_irqsave(&state->lock, flags);
+	if (!test_bit(code, state->key) == !value)
+		goto done;
+	__change_bit(code, state->key);
+	if (test_bit(code, state->upbit)) {
+		if (value)
+			state->key_up++;
+		else
+			state->key_up--;
+	} else {
+		if (value)
+			state->key_down++;
+		else
+			state->key_down--;
+	}
+	if (state->key_down == state->key_down_target && state->key_up == 0) {
+		__pm_stay_awake(&state->combo_held_wake_source);
+		state->key_is_down = 1;
+		if (queue_delayed_work(state->wq, &state->key_down_work,
+								state->delay))
+			pr_debug("Key down work already queued!");
+	} else if (state->key_is_down) {
+		if (!cancel_delayed_work(&state->key_down_work)) {
+			__pm_stay_awake(&state->combo_up_wake_source);
+			queue_work(state->wq, &state->key_up_work);
+		}
+		__pm_relax(&state->combo_held_wake_source);
+		state->key_is_down = 0;
+	}
+done:
+	spin_unlock_irqrestore(&state->lock, flags);
+}
+
+static int keycombo_connect(struct input_handler *handler,
+		struct input_dev *dev,
+		const struct input_device_id *id)
+{
+	int i;
+	int ret;
+	struct input_handle *handle;
+	struct keycombo_state *state =
+		container_of(handler, struct keycombo_state, input_handler);
+	for (i = 0; i < KEY_MAX; i++) {
+		if (test_bit(i, state->keybit) && test_bit(i, dev->keybit))
+			break;
+	}
+	if (i == KEY_MAX)
+		return -ENODEV;
+
+	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+	if (!handle)
+		return -ENOMEM;
+
+	handle->dev = dev;
+	handle->handler = handler;
+	handle->name = KEYCOMBO_NAME;
+	handle->private = state;
+
+	ret = input_register_handle(handle);
+	if (ret)
+		goto err_input_register_handle;
+
+	ret = input_open_device(handle);
+	if (ret)
+		goto err_input_open_device;
+
+	return 0;
+
+err_input_open_device:
+	input_unregister_handle(handle);
+err_input_register_handle:
+	kfree(handle);
+	return ret;
+}
+
+static void keycombo_disconnect(struct input_handle *handle)
+{
+	input_close_device(handle);
+	input_unregister_handle(handle);
+	kfree(handle);
+}
+
+static const struct input_device_id keycombo_ids[] = {
+		{
+				.flags = INPUT_DEVICE_ID_MATCH_EVBIT,
+				.evbit = { BIT_MASK(EV_KEY) },
+		},
+		{ },
+};
+MODULE_DEVICE_TABLE(input, keycombo_ids);
+
+static int keycombo_probe(struct platform_device *pdev)
+{
+	int ret;
+	int key, *keyp;
+	struct keycombo_state *state;
+	struct keycombo_platform_data *pdata = pdev->dev.platform_data;
+
+	if (!pdata)
+		return -EINVAL;
+
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return -ENOMEM;
+
+	spin_lock_init(&state->lock);
+	keyp = pdata->keys_down;
+	while ((key = *keyp++)) {
+		if (key >= KEY_MAX)
+			continue;
+		state->key_down_target++;
+		__set_bit(key, state->keybit);
+	}
+	if (pdata->keys_up) {
+		keyp = pdata->keys_up;
+		while ((key = *keyp++)) {
+			if (key >= KEY_MAX)
+				continue;
+			__set_bit(key, state->keybit);
+			__set_bit(key, state->upbit);
+		}
+	}
+
+	state->wq = alloc_ordered_workqueue("keycombo", 0);
+	if (!state->wq)
+		return -ENOMEM;
+
+	state->priv = pdata->priv;
+
+	if (pdata->key_down_fn)
+		state->key_down_fn = pdata->key_down_fn;
+	INIT_DELAYED_WORK(&state->key_down_work, do_key_down);
+
+	if (pdata->key_up_fn)
+		state->key_up_fn = pdata->key_up_fn;
+	INIT_WORK(&state->key_up_work, do_key_up);
+
+	wakeup_source_init(&state->combo_held_wake_source, "key combo");
+	wakeup_source_init(&state->combo_up_wake_source, "key combo up");
+	state->delay = msecs_to_jiffies(pdata->key_down_delay);
+
+	state->input_handler.event = keycombo_event;
+	state->input_handler.connect = keycombo_connect;
+	state->input_handler.disconnect = keycombo_disconnect;
+	state->input_handler.name = KEYCOMBO_NAME;
+	state->input_handler.id_table = keycombo_ids;
+	ret = input_register_handler(&state->input_handler);
+	if (ret) {
+		kfree(state);
+		return ret;
+	}
+	platform_set_drvdata(pdev, state);
+	return 0;
+}
+
+int keycombo_remove(struct platform_device *pdev)
+{
+	struct keycombo_state *state = platform_get_drvdata(pdev);
+	input_unregister_handler(&state->input_handler);
+	destroy_workqueue(state->wq);
+	kfree(state);
+	return 0;
+}
+
+
+struct platform_driver keycombo_driver = {
+		.driver.name = KEYCOMBO_NAME,
+		.probe = keycombo_probe,
+		.remove = keycombo_remove,
+};
+
+static int __init keycombo_init(void)
+{
+	return platform_driver_register(&keycombo_driver);
+}
+
+static void __exit keycombo_exit(void)
+{
+	return platform_driver_unregister(&keycombo_driver);
+}
+
+module_init(keycombo_init);
+module_exit(keycombo_exit);
diff --git a/drivers/input/keyreset.c b/drivers/input/keyreset.c
new file mode 100644
index 0000000..7e5222ae
--- /dev/null
+++ b/drivers/input/keyreset.c
@@ -0,0 +1,144 @@
+/* drivers/input/keyreset.c
+ *
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/input.h>
+#include <linux/keyreset.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+#include <linux/keycombo.h>
+
+struct keyreset_state {
+	int restart_requested;
+	int (*reset_fn)(void);
+	struct platform_device *pdev_child;
+	struct work_struct restart_work;
+};
+
+static void do_restart(struct work_struct *unused)
+{
+	orderly_reboot();
+}
+
+static void do_reset_fn(void *priv)
+{
+	struct keyreset_state *state = priv;
+	if (state->restart_requested)
+		panic("keyboard reset failed, %d", state->restart_requested);
+	if (state->reset_fn) {
+		state->restart_requested = state->reset_fn();
+	} else {
+		pr_info("keyboard reset\n");
+		schedule_work(&state->restart_work);
+		state->restart_requested = 1;
+	}
+}
+
+static int keyreset_probe(struct platform_device *pdev)
+{
+	int ret = -ENOMEM;
+	struct keycombo_platform_data *pdata_child;
+	struct keyreset_platform_data *pdata = pdev->dev.platform_data;
+	int up_size = 0, down_size = 0, size;
+	int key, *keyp;
+	struct keyreset_state *state;
+
+	if (!pdata)
+		return -EINVAL;
+	state = devm_kzalloc(&pdev->dev, sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return -ENOMEM;
+
+	state->pdev_child = platform_device_alloc(KEYCOMBO_NAME,
+							PLATFORM_DEVID_AUTO);
+	if (!state->pdev_child)
+		return -ENOMEM;
+	state->pdev_child->dev.parent = &pdev->dev;
+	INIT_WORK(&state->restart_work, do_restart);
+
+	keyp = pdata->keys_down;
+	while ((key = *keyp++)) {
+		if (key >= KEY_MAX)
+			continue;
+		down_size++;
+	}
+	if (pdata->keys_up) {
+		keyp = pdata->keys_up;
+		while ((key = *keyp++)) {
+			if (key >= KEY_MAX)
+				continue;
+			up_size++;
+		}
+	}
+	size = sizeof(struct keycombo_platform_data)
+			+ sizeof(int) * (down_size + 1);
+	pdata_child = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
+	if (!pdata_child)
+		goto error;
+	memcpy(pdata_child->keys_down, pdata->keys_down,
+						sizeof(int) * down_size);
+	if (up_size > 0) {
+		pdata_child->keys_up = devm_kzalloc(&pdev->dev, up_size + 1,
+								GFP_KERNEL);
+		if (!pdata_child->keys_up)
+			goto error;
+		memcpy(pdata_child->keys_up, pdata->keys_up,
+							sizeof(int) * up_size);
+		if (!pdata_child->keys_up)
+			goto error;
+	}
+	state->reset_fn = pdata->reset_fn;
+	pdata_child->key_down_fn = do_reset_fn;
+	pdata_child->priv = state;
+	pdata_child->key_down_delay = pdata->key_down_delay;
+	ret = platform_device_add_data(state->pdev_child, pdata_child, size);
+	if (ret)
+		goto error;
+	platform_set_drvdata(pdev, state);
+	return platform_device_add(state->pdev_child);
+error:
+	platform_device_put(state->pdev_child);
+	return ret;
+}
+
+int keyreset_remove(struct platform_device *pdev)
+{
+	struct keyreset_state *state = platform_get_drvdata(pdev);
+	platform_device_put(state->pdev_child);
+	return 0;
+}
+
+
+struct platform_driver keyreset_driver = {
+	.driver.name = KEYRESET_NAME,
+	.probe = keyreset_probe,
+	.remove = keyreset_remove,
+};
+
+static int __init keyreset_init(void)
+{
+	return platform_driver_register(&keyreset_driver);
+}
+
+static void __exit keyreset_exit(void)
+{
+	return platform_driver_unregister(&keyreset_driver);
+}
+
+module_init(keyreset_init);
+module_exit(keyreset_exit);
diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index 7ffb614..94360fe 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig
@@ -367,6 +367,17 @@
 	  To compile this driver as a module, choose M here: the module will be
 	  called ati_remote2.
 
+config INPUT_KEYCHORD
+	tristate "Key chord input driver support"
+	help
+	  Say Y here if you want to enable the key chord driver
+	  accessible at /dev/keychord.  This driver can be used
+	  for receiving notifications when client specified key
+	  combinations are pressed.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called keychord.
+
 config INPUT_KEYSPAN_REMOTE
 	tristate "Keyspan DMR USB remote control"
 	depends on USB_ARCH_HAS_HCD
@@ -535,6 +546,11 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called sgi_btns.
 
+config INPUT_GPIO
+	tristate "GPIO driver support"
+	help
+	  Say Y here if you want to support gpio based keys, wheels etc...
+
 config HP_SDC_RTC
 	tristate "HP SDC Real Time Clock"
 	depends on (GSC || HP300) && SERIO
diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile
index 0b6d025..64bf231 100644
--- a/drivers/input/misc/Makefile
+++ b/drivers/input/misc/Makefile
@@ -36,10 +36,12 @@
 obj-$(CONFIG_INPUT_GPIO_BEEPER)		+= gpio-beeper.o
 obj-$(CONFIG_INPUT_GPIO_TILT_POLLED)	+= gpio_tilt_polled.o
 obj-$(CONFIG_INPUT_GPIO_DECODER)	+= gpio_decoder.o
+obj-$(CONFIG_INPUT_GPIO)		+= gpio_event.o gpio_matrix.o gpio_input.o gpio_output.o gpio_axis.o
 obj-$(CONFIG_INPUT_HISI_POWERKEY)	+= hisi_powerkey.o
 obj-$(CONFIG_HP_SDC_RTC)		+= hp_sdc_rtc.o
 obj-$(CONFIG_INPUT_IMS_PCU)		+= ims-pcu.o
 obj-$(CONFIG_INPUT_IXP4XX_BEEPER)	+= ixp4xx-beeper.o
+obj-$(CONFIG_INPUT_KEYCHORD)		+= keychord.o
 obj-$(CONFIG_INPUT_KEYSPAN_REMOTE)	+= keyspan_remote.o
 obj-$(CONFIG_INPUT_KXTJ9)		+= kxtj9.o
 obj-$(CONFIG_INPUT_M68K_BEEP)		+= m68kspkr.o
diff --git a/drivers/input/misc/gpio_axis.c b/drivers/input/misc/gpio_axis.c
new file mode 100644
index 0000000..0acf4a5
--- /dev/null
+++ b/drivers/input/misc/gpio_axis.c
@@ -0,0 +1,192 @@
+/* drivers/input/misc/gpio_axis.c
+ *
+ * Copyright (C) 2007 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/gpio.h>
+#include <linux/gpio_event.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+
+struct gpio_axis_state {
+	struct gpio_event_input_devs *input_devs;
+	struct gpio_event_axis_info *info;
+	uint32_t pos;
+};
+
+uint16_t gpio_axis_4bit_gray_map_table[] = {
+	[0x0] = 0x0, [0x1] = 0x1, /* 0000 0001 */
+	[0x3] = 0x2, [0x2] = 0x3, /* 0011 0010 */
+	[0x6] = 0x4, [0x7] = 0x5, /* 0110 0111 */
+	[0x5] = 0x6, [0x4] = 0x7, /* 0101 0100 */
+	[0xc] = 0x8, [0xd] = 0x9, /* 1100 1101 */
+	[0xf] = 0xa, [0xe] = 0xb, /* 1111 1110 */
+	[0xa] = 0xc, [0xb] = 0xd, /* 1010 1011 */
+	[0x9] = 0xe, [0x8] = 0xf, /* 1001 1000 */
+};
+uint16_t gpio_axis_4bit_gray_map(struct gpio_event_axis_info *info, uint16_t in)
+{
+	return gpio_axis_4bit_gray_map_table[in];
+}
+
+uint16_t gpio_axis_5bit_singletrack_map_table[] = {
+	[0x10] = 0x00, [0x14] = 0x01, [0x1c] = 0x02, /*     10000 10100 11100 */
+	[0x1e] = 0x03, [0x1a] = 0x04, [0x18] = 0x05, /*     11110 11010 11000 */
+	[0x08] = 0x06, [0x0a] = 0x07, [0x0e] = 0x08, /*    01000 01010 01110  */
+	[0x0f] = 0x09, [0x0d] = 0x0a, [0x0c] = 0x0b, /*    01111 01101 01100  */
+	[0x04] = 0x0c, [0x05] = 0x0d, [0x07] = 0x0e, /*   00100 00101 00111   */
+	[0x17] = 0x0f, [0x16] = 0x10, [0x06] = 0x11, /*   10111 10110 00110   */
+	[0x02] = 0x12, [0x12] = 0x13, [0x13] = 0x14, /*  00010 10010 10011    */
+	[0x1b] = 0x15, [0x0b] = 0x16, [0x03] = 0x17, /*  11011 01011 00011    */
+	[0x01] = 0x18, [0x09] = 0x19, [0x19] = 0x1a, /* 00001 01001 11001     */
+	[0x1d] = 0x1b, [0x15] = 0x1c, [0x11] = 0x1d, /* 11101 10101 10001     */
+};
+uint16_t gpio_axis_5bit_singletrack_map(
+	struct gpio_event_axis_info *info, uint16_t in)
+{
+	return gpio_axis_5bit_singletrack_map_table[in];
+}
+
+static void gpio_event_update_axis(struct gpio_axis_state *as, int report)
+{
+	struct gpio_event_axis_info *ai = as->info;
+	int i;
+	int change;
+	uint16_t state = 0;
+	uint16_t pos;
+	uint16_t old_pos = as->pos;
+	for (i = ai->count - 1; i >= 0; i--)
+		state = (state << 1) | gpio_get_value(ai->gpio[i]);
+	pos = ai->map(ai, state);
+	if (ai->flags & GPIOEAF_PRINT_RAW)
+		pr_info("axis %d-%d raw %x, pos %d -> %d\n",
+			ai->type, ai->code, state, old_pos, pos);
+	if (report && pos != old_pos) {
+		if (ai->type == EV_REL) {
+			change = (ai->decoded_size + pos - old_pos) %
+				  ai->decoded_size;
+			if (change > ai->decoded_size / 2)
+				change -= ai->decoded_size;
+			if (change == ai->decoded_size / 2) {
+				if (ai->flags & GPIOEAF_PRINT_EVENT)
+					pr_info("axis %d-%d unknown direction, "
+						"pos %d -> %d\n", ai->type,
+						ai->code, old_pos, pos);
+				change = 0; /* no closest direction */
+			}
+			if (ai->flags & GPIOEAF_PRINT_EVENT)
+				pr_info("axis %d-%d change %d\n",
+					ai->type, ai->code, change);
+			input_report_rel(as->input_devs->dev[ai->dev],
+						ai->code, change);
+		} else {
+			if (ai->flags & GPIOEAF_PRINT_EVENT)
+				pr_info("axis %d-%d now %d\n",
+					ai->type, ai->code, pos);
+			input_event(as->input_devs->dev[ai->dev],
+					ai->type, ai->code, pos);
+		}
+		input_sync(as->input_devs->dev[ai->dev]);
+	}
+	as->pos = pos;
+}
+
+static irqreturn_t gpio_axis_irq_handler(int irq, void *dev_id)
+{
+	struct gpio_axis_state *as = dev_id;
+	gpio_event_update_axis(as, 1);
+	return IRQ_HANDLED;
+}
+
+int gpio_event_axis_func(struct gpio_event_input_devs *input_devs,
+			 struct gpio_event_info *info, void **data, int func)
+{
+	int ret;
+	int i;
+	int irq;
+	struct gpio_event_axis_info *ai;
+	struct gpio_axis_state *as;
+
+	ai = container_of(info, struct gpio_event_axis_info, info);
+	if (func == GPIO_EVENT_FUNC_SUSPEND) {
+		for (i = 0; i < ai->count; i++)
+			disable_irq(gpio_to_irq(ai->gpio[i]));
+		return 0;
+	}
+	if (func == GPIO_EVENT_FUNC_RESUME) {
+		for (i = 0; i < ai->count; i++)
+			enable_irq(gpio_to_irq(ai->gpio[i]));
+		return 0;
+	}
+
+	if (func == GPIO_EVENT_FUNC_INIT) {
+		*data = as = kmalloc(sizeof(*as), GFP_KERNEL);
+		if (as == NULL) {
+			ret = -ENOMEM;
+			goto err_alloc_axis_state_failed;
+		}
+		as->input_devs = input_devs;
+		as->info = ai;
+		if (ai->dev >= input_devs->count) {
+			pr_err("gpio_event_axis: bad device index %d >= %d "
+				"for %d:%d\n", ai->dev, input_devs->count,
+				ai->type, ai->code);
+			ret = -EINVAL;
+			goto err_bad_device_index;
+		}
+
+		input_set_capability(input_devs->dev[ai->dev],
+				     ai->type, ai->code);
+		if (ai->type == EV_ABS) {
+			input_set_abs_params(input_devs->dev[ai->dev], ai->code,
+					     0, ai->decoded_size - 1, 0, 0);
+		}
+		for (i = 0; i < ai->count; i++) {
+			ret = gpio_request(ai->gpio[i], "gpio_event_axis");
+			if (ret < 0)
+				goto err_request_gpio_failed;
+			ret = gpio_direction_input(ai->gpio[i]);
+			if (ret < 0)
+				goto err_gpio_direction_input_failed;
+			ret = irq = gpio_to_irq(ai->gpio[i]);
+			if (ret < 0)
+				goto err_get_irq_num_failed;
+			ret = request_irq(irq, gpio_axis_irq_handler,
+					  IRQF_TRIGGER_RISING |
+					  IRQF_TRIGGER_FALLING,
+					  "gpio_event_axis", as);
+			if (ret < 0)
+				goto err_request_irq_failed;
+		}
+		gpio_event_update_axis(as, 0);
+		return 0;
+	}
+
+	ret = 0;
+	as = *data;
+	for (i = ai->count - 1; i >= 0; i--) {
+		free_irq(gpio_to_irq(ai->gpio[i]), as);
+err_request_irq_failed:
+err_get_irq_num_failed:
+err_gpio_direction_input_failed:
+		gpio_free(ai->gpio[i]);
+err_request_gpio_failed:
+		;
+	}
+err_bad_device_index:
+	kfree(as);
+	*data = NULL;
+err_alloc_axis_state_failed:
+	return ret;
+}
diff --git a/drivers/input/misc/gpio_event.c b/drivers/input/misc/gpio_event.c
new file mode 100644
index 0000000..90f07eb
--- /dev/null
+++ b/drivers/input/misc/gpio_event.c
@@ -0,0 +1,228 @@
+/* drivers/input/misc/gpio_event.c
+ *
+ * Copyright (C) 2007 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/input.h>
+#include <linux/gpio_event.h>
+#include <linux/hrtimer.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+struct gpio_event {
+	struct gpio_event_input_devs *input_devs;
+	const struct gpio_event_platform_data *info;
+	void *state[0];
+};
+
+static int gpio_input_event(
+	struct input_dev *dev, unsigned int type, unsigned int code, int value)
+{
+	int i;
+	int devnr;
+	int ret = 0;
+	int tmp_ret;
+	struct gpio_event_info **ii;
+	struct gpio_event *ip = input_get_drvdata(dev);
+
+	for (devnr = 0; devnr < ip->input_devs->count; devnr++)
+		if (ip->input_devs->dev[devnr] == dev)
+			break;
+	if (devnr == ip->input_devs->count) {
+		pr_err("gpio_input_event: unknown device %p\n", dev);
+		return -EIO;
+	}
+
+	for (i = 0, ii = ip->info->info; i < ip->info->info_count; i++, ii++) {
+		if ((*ii)->event) {
+			tmp_ret = (*ii)->event(ip->input_devs, *ii,
+						&ip->state[i],
+						devnr, type, code, value);
+			if (tmp_ret)
+				ret = tmp_ret;
+		}
+	}
+	return ret;
+}
+
+static int gpio_event_call_all_func(struct gpio_event *ip, int func)
+{
+	int i;
+	int ret;
+	struct gpio_event_info **ii;
+
+	if (func == GPIO_EVENT_FUNC_INIT || func == GPIO_EVENT_FUNC_RESUME) {
+		ii = ip->info->info;
+		for (i = 0; i < ip->info->info_count; i++, ii++) {
+			if ((*ii)->func == NULL) {
+				ret = -ENODEV;
+				pr_err("gpio_event_probe: Incomplete pdata, "
+					"no function\n");
+				goto err_no_func;
+			}
+			if (func == GPIO_EVENT_FUNC_RESUME && (*ii)->no_suspend)
+				continue;
+			ret = (*ii)->func(ip->input_devs, *ii, &ip->state[i],
+					  func);
+			if (ret) {
+				pr_err("gpio_event_probe: function failed\n");
+				goto err_func_failed;
+			}
+		}
+		return 0;
+	}
+
+	ret = 0;
+	i = ip->info->info_count;
+	ii = ip->info->info + i;
+	while (i > 0) {
+		i--;
+		ii--;
+		if ((func & ~1) == GPIO_EVENT_FUNC_SUSPEND && (*ii)->no_suspend)
+			continue;
+		(*ii)->func(ip->input_devs, *ii, &ip->state[i], func & ~1);
+err_func_failed:
+err_no_func:
+		;
+	}
+	return ret;
+}
+
+static void __maybe_unused gpio_event_suspend(struct gpio_event *ip)
+{
+	gpio_event_call_all_func(ip, GPIO_EVENT_FUNC_SUSPEND);
+	if (ip->info->power)
+		ip->info->power(ip->info, 0);
+}
+
+static void __maybe_unused gpio_event_resume(struct gpio_event *ip)
+{
+	if (ip->info->power)
+		ip->info->power(ip->info, 1);
+	gpio_event_call_all_func(ip, GPIO_EVENT_FUNC_RESUME);
+}
+
+static int gpio_event_probe(struct platform_device *pdev)
+{
+	int err;
+	struct gpio_event *ip;
+	struct gpio_event_platform_data *event_info;
+	int dev_count = 1;
+	int i;
+	int registered = 0;
+
+	event_info = pdev->dev.platform_data;
+	if (event_info == NULL) {
+		pr_err("gpio_event_probe: No pdata\n");
+		return -ENODEV;
+	}
+	if ((!event_info->name && !event_info->names[0]) ||
+	    !event_info->info || !event_info->info_count) {
+		pr_err("gpio_event_probe: Incomplete pdata\n");
+		return -ENODEV;
+	}
+	if (!event_info->name)
+		while (event_info->names[dev_count])
+			dev_count++;
+	ip = kzalloc(sizeof(*ip) +
+		     sizeof(ip->state[0]) * event_info->info_count +
+		     sizeof(*ip->input_devs) +
+		     sizeof(ip->input_devs->dev[0]) * dev_count, GFP_KERNEL);
+	if (ip == NULL) {
+		err = -ENOMEM;
+		pr_err("gpio_event_probe: Failed to allocate private data\n");
+		goto err_kp_alloc_failed;
+	}
+	ip->input_devs = (void*)&ip->state[event_info->info_count];
+	platform_set_drvdata(pdev, ip);
+
+	for (i = 0; i < dev_count; i++) {
+		struct input_dev *input_dev = input_allocate_device();
+		if (input_dev == NULL) {
+			err = -ENOMEM;
+			pr_err("gpio_event_probe: "
+				"Failed to allocate input device\n");
+			goto err_input_dev_alloc_failed;
+		}
+		input_set_drvdata(input_dev, ip);
+		input_dev->name = event_info->name ?
+					event_info->name : event_info->names[i];
+		input_dev->event = gpio_input_event;
+		ip->input_devs->dev[i] = input_dev;
+	}
+	ip->input_devs->count = dev_count;
+	ip->info = event_info;
+	if (event_info->power)
+		ip->info->power(ip->info, 1);
+
+	err = gpio_event_call_all_func(ip, GPIO_EVENT_FUNC_INIT);
+	if (err)
+		goto err_call_all_func_failed;
+
+	for (i = 0; i < dev_count; i++) {
+		err = input_register_device(ip->input_devs->dev[i]);
+		if (err) {
+			pr_err("gpio_event_probe: Unable to register %s "
+				"input device\n", ip->input_devs->dev[i]->name);
+			goto err_input_register_device_failed;
+		}
+		registered++;
+	}
+
+	return 0;
+
+err_input_register_device_failed:
+	gpio_event_call_all_func(ip, GPIO_EVENT_FUNC_UNINIT);
+err_call_all_func_failed:
+	if (event_info->power)
+		ip->info->power(ip->info, 0);
+	for (i = 0; i < registered; i++)
+		input_unregister_device(ip->input_devs->dev[i]);
+	for (i = dev_count - 1; i >= registered; i--) {
+		input_free_device(ip->input_devs->dev[i]);
+err_input_dev_alloc_failed:
+		;
+	}
+	kfree(ip);
+err_kp_alloc_failed:
+	return err;
+}
+
+static int gpio_event_remove(struct platform_device *pdev)
+{
+	struct gpio_event *ip = platform_get_drvdata(pdev);
+	int i;
+
+	gpio_event_call_all_func(ip, GPIO_EVENT_FUNC_UNINIT);
+	if (ip->info->power)
+		ip->info->power(ip->info, 0);
+	for (i = 0; i < ip->input_devs->count; i++)
+		input_unregister_device(ip->input_devs->dev[i]);
+	kfree(ip);
+	return 0;
+}
+
+static struct platform_driver gpio_event_driver = {
+	.probe		= gpio_event_probe,
+	.remove		= gpio_event_remove,
+	.driver		= {
+		.name	= GPIO_EVENT_DEV_NAME,
+	},
+};
+
+module_platform_driver(gpio_event_driver);
+
+MODULE_DESCRIPTION("GPIO Event Driver");
+MODULE_LICENSE("GPL");
+
diff --git a/drivers/input/misc/gpio_input.c b/drivers/input/misc/gpio_input.c
new file mode 100644
index 0000000..eefd027
--- /dev/null
+++ b/drivers/input/misc/gpio_input.c
@@ -0,0 +1,390 @@
+/* drivers/input/misc/gpio_input.c
+ *
+ * Copyright (C) 2007 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/gpio.h>
+#include <linux/gpio_event.h>
+#include <linux/hrtimer.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/pm_wakeup.h>
+
+enum {
+	DEBOUNCE_UNSTABLE     = BIT(0),	/* Got irq, while debouncing */
+	DEBOUNCE_PRESSED      = BIT(1),
+	DEBOUNCE_NOTPRESSED   = BIT(2),
+	DEBOUNCE_WAIT_IRQ     = BIT(3),	/* Stable irq state */
+	DEBOUNCE_POLL         = BIT(4),	/* Stable polling state */
+
+	DEBOUNCE_UNKNOWN =
+		DEBOUNCE_PRESSED | DEBOUNCE_NOTPRESSED,
+};
+
+struct gpio_key_state {
+	struct gpio_input_state *ds;
+	uint8_t debounce;
+};
+
+struct gpio_input_state {
+	struct gpio_event_input_devs *input_devs;
+	const struct gpio_event_input_info *info;
+	struct hrtimer timer;
+	int use_irq;
+	int debounce_count;
+	spinlock_t irq_lock;
+	struct wakeup_source *ws;
+	struct gpio_key_state key_state[0];
+};
+
+static enum hrtimer_restart gpio_event_input_timer_func(struct hrtimer *timer)
+{
+	int i;
+	int pressed;
+	struct gpio_input_state *ds =
+		container_of(timer, struct gpio_input_state, timer);
+	unsigned gpio_flags = ds->info->flags;
+	unsigned npolarity;
+	int nkeys = ds->info->keymap_size;
+	const struct gpio_event_direct_entry *key_entry;
+	struct gpio_key_state *key_state;
+	unsigned long irqflags;
+	uint8_t debounce;
+	bool sync_needed;
+
+#if 0
+	key_entry = kp->keys_info->keymap;
+	key_state = kp->key_state;
+	for (i = 0; i < nkeys; i++, key_entry++, key_state++)
+		pr_info("gpio_read_detect_status %d %d\n", key_entry->gpio,
+			gpio_read_detect_status(key_entry->gpio));
+#endif
+	key_entry = ds->info->keymap;
+	key_state = ds->key_state;
+	sync_needed = false;
+	spin_lock_irqsave(&ds->irq_lock, irqflags);
+	for (i = 0; i < nkeys; i++, key_entry++, key_state++) {
+		debounce = key_state->debounce;
+		if (debounce & DEBOUNCE_WAIT_IRQ)
+			continue;
+		if (key_state->debounce & DEBOUNCE_UNSTABLE) {
+			debounce = key_state->debounce = DEBOUNCE_UNKNOWN;
+			enable_irq(gpio_to_irq(key_entry->gpio));
+			if (gpio_flags & GPIOEDF_PRINT_KEY_UNSTABLE)
+				pr_info("gpio_keys_scan_keys: key %x-%x, %d "
+					"(%d) continue debounce\n",
+					ds->info->type, key_entry->code,
+					i, key_entry->gpio);
+		}
+		npolarity = !(gpio_flags & GPIOEDF_ACTIVE_HIGH);
+		pressed = gpio_get_value(key_entry->gpio) ^ npolarity;
+		if (debounce & DEBOUNCE_POLL) {
+			if (pressed == !(debounce & DEBOUNCE_PRESSED)) {
+				ds->debounce_count++;
+				key_state->debounce = DEBOUNCE_UNKNOWN;
+				if (gpio_flags & GPIOEDF_PRINT_KEY_DEBOUNCE)
+					pr_info("gpio_keys_scan_keys: key %x-"
+						"%x, %d (%d) start debounce\n",
+						ds->info->type, key_entry->code,
+						i, key_entry->gpio);
+			}
+			continue;
+		}
+		if (pressed && (debounce & DEBOUNCE_NOTPRESSED)) {
+			if (gpio_flags & GPIOEDF_PRINT_KEY_DEBOUNCE)
+				pr_info("gpio_keys_scan_keys: key %x-%x, %d "
+					"(%d) debounce pressed 1\n",
+					ds->info->type, key_entry->code,
+					i, key_entry->gpio);
+			key_state->debounce = DEBOUNCE_PRESSED;
+			continue;
+		}
+		if (!pressed && (debounce & DEBOUNCE_PRESSED)) {
+			if (gpio_flags & GPIOEDF_PRINT_KEY_DEBOUNCE)
+				pr_info("gpio_keys_scan_keys: key %x-%x, %d "
+					"(%d) debounce pressed 0\n",
+					ds->info->type, key_entry->code,
+					i, key_entry->gpio);
+			key_state->debounce = DEBOUNCE_NOTPRESSED;
+			continue;
+		}
+		/* key is stable */
+		ds->debounce_count--;
+		if (ds->use_irq)
+			key_state->debounce |= DEBOUNCE_WAIT_IRQ;
+		else
+			key_state->debounce |= DEBOUNCE_POLL;
+		if (gpio_flags & GPIOEDF_PRINT_KEYS)
+			pr_info("gpio_keys_scan_keys: key %x-%x, %d (%d) "
+				"changed to %d\n", ds->info->type,
+				key_entry->code, i, key_entry->gpio, pressed);
+		input_event(ds->input_devs->dev[key_entry->dev], ds->info->type,
+			    key_entry->code, pressed);
+		sync_needed = true;
+	}
+	if (sync_needed) {
+		for (i = 0; i < ds->input_devs->count; i++)
+			input_sync(ds->input_devs->dev[i]);
+	}
+
+#if 0
+	key_entry = kp->keys_info->keymap;
+	key_state = kp->key_state;
+	for (i = 0; i < nkeys; i++, key_entry++, key_state++) {
+		pr_info("gpio_read_detect_status %d %d\n", key_entry->gpio,
+			gpio_read_detect_status(key_entry->gpio));
+	}
+#endif
+
+	if (ds->debounce_count)
+		hrtimer_start(timer, ds->info->debounce_time, HRTIMER_MODE_REL);
+	else if (!ds->use_irq)
+		hrtimer_start(timer, ds->info->poll_time, HRTIMER_MODE_REL);
+	else
+		__pm_relax(ds->ws);
+
+	spin_unlock_irqrestore(&ds->irq_lock, irqflags);
+
+	return HRTIMER_NORESTART;
+}
+
+static irqreturn_t gpio_event_input_irq_handler(int irq, void *dev_id)
+{
+	struct gpio_key_state *ks = dev_id;
+	struct gpio_input_state *ds = ks->ds;
+	int keymap_index = ks - ds->key_state;
+	const struct gpio_event_direct_entry *key_entry;
+	unsigned long irqflags;
+	int pressed;
+
+	if (!ds->use_irq)
+		return IRQ_HANDLED;
+
+	key_entry = &ds->info->keymap[keymap_index];
+
+	if (ds->info->debounce_time.tv64) {
+		spin_lock_irqsave(&ds->irq_lock, irqflags);
+		if (ks->debounce & DEBOUNCE_WAIT_IRQ) {
+			ks->debounce = DEBOUNCE_UNKNOWN;
+			if (ds->debounce_count++ == 0) {
+				__pm_stay_awake(ds->ws);
+				hrtimer_start(
+					&ds->timer, ds->info->debounce_time,
+					HRTIMER_MODE_REL);
+			}
+			if (ds->info->flags & GPIOEDF_PRINT_KEY_DEBOUNCE)
+				pr_info("gpio_event_input_irq_handler: "
+					"key %x-%x, %d (%d) start debounce\n",
+					ds->info->type, key_entry->code,
+					keymap_index, key_entry->gpio);
+		} else {
+			disable_irq_nosync(irq);
+			ks->debounce = DEBOUNCE_UNSTABLE;
+		}
+		spin_unlock_irqrestore(&ds->irq_lock, irqflags);
+	} else {
+		pressed = gpio_get_value(key_entry->gpio) ^
+			!(ds->info->flags & GPIOEDF_ACTIVE_HIGH);
+		if (ds->info->flags & GPIOEDF_PRINT_KEYS)
+			pr_info("gpio_event_input_irq_handler: key %x-%x, %d "
+				"(%d) changed to %d\n",
+				ds->info->type, key_entry->code, keymap_index,
+				key_entry->gpio, pressed);
+		input_event(ds->input_devs->dev[key_entry->dev], ds->info->type,
+			    key_entry->code, pressed);
+		input_sync(ds->input_devs->dev[key_entry->dev]);
+	}
+	return IRQ_HANDLED;
+}
+
+static int gpio_event_input_request_irqs(struct gpio_input_state *ds)
+{
+	int i;
+	int err;
+	unsigned int irq;
+	unsigned long req_flags = IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING;
+
+	for (i = 0; i < ds->info->keymap_size; i++) {
+		err = irq = gpio_to_irq(ds->info->keymap[i].gpio);
+		if (err < 0)
+			goto err_gpio_get_irq_num_failed;
+		err = request_irq(irq, gpio_event_input_irq_handler,
+				  req_flags, "gpio_keys", &ds->key_state[i]);
+		if (err) {
+			pr_err("gpio_event_input_request_irqs: request_irq "
+				"failed for input %d, irq %d\n",
+				ds->info->keymap[i].gpio, irq);
+			goto err_request_irq_failed;
+		}
+		if (ds->info->info.no_suspend) {
+			err = enable_irq_wake(irq);
+			if (err) {
+				pr_err("gpio_event_input_request_irqs: "
+					"enable_irq_wake failed for input %d, "
+					"irq %d\n",
+					ds->info->keymap[i].gpio, irq);
+				goto err_enable_irq_wake_failed;
+			}
+		}
+	}
+	return 0;
+
+	for (i = ds->info->keymap_size - 1; i >= 0; i--) {
+		irq = gpio_to_irq(ds->info->keymap[i].gpio);
+		if (ds->info->info.no_suspend)
+			disable_irq_wake(irq);
+err_enable_irq_wake_failed:
+		free_irq(irq, &ds->key_state[i]);
+err_request_irq_failed:
+err_gpio_get_irq_num_failed:
+		;
+	}
+	return err;
+}
+
+int gpio_event_input_func(struct gpio_event_input_devs *input_devs,
+			struct gpio_event_info *info, void **data, int func)
+{
+	int ret;
+	int i;
+	unsigned long irqflags;
+	struct gpio_event_input_info *di;
+	struct gpio_input_state *ds = *data;
+	char *wlname;
+
+	di = container_of(info, struct gpio_event_input_info, info);
+
+	if (func == GPIO_EVENT_FUNC_SUSPEND) {
+		if (ds->use_irq)
+			for (i = 0; i < di->keymap_size; i++)
+				disable_irq(gpio_to_irq(di->keymap[i].gpio));
+		hrtimer_cancel(&ds->timer);
+		return 0;
+	}
+	if (func == GPIO_EVENT_FUNC_RESUME) {
+		spin_lock_irqsave(&ds->irq_lock, irqflags);
+		if (ds->use_irq)
+			for (i = 0; i < di->keymap_size; i++)
+				enable_irq(gpio_to_irq(di->keymap[i].gpio));
+		hrtimer_start(&ds->timer, ktime_set(0, 0), HRTIMER_MODE_REL);
+		spin_unlock_irqrestore(&ds->irq_lock, irqflags);
+		return 0;
+	}
+
+	if (func == GPIO_EVENT_FUNC_INIT) {
+		if (ktime_to_ns(di->poll_time) <= 0)
+			di->poll_time = ktime_set(0, 20 * NSEC_PER_MSEC);
+
+		*data = ds = kzalloc(sizeof(*ds) + sizeof(ds->key_state[0]) *
+					di->keymap_size, GFP_KERNEL);
+		if (ds == NULL) {
+			ret = -ENOMEM;
+			pr_err("gpio_event_input_func: "
+				"Failed to allocate private data\n");
+			goto err_ds_alloc_failed;
+		}
+		ds->debounce_count = di->keymap_size;
+		ds->input_devs = input_devs;
+		ds->info = di;
+		wlname = kasprintf(GFP_KERNEL, "gpio_input:%s%s",
+				   input_devs->dev[0]->name,
+				   (input_devs->count > 1) ? "..." : "");
+
+		ds->ws = wakeup_source_register(wlname);
+		kfree(wlname);
+		if (!ds->ws) {
+			ret = -ENOMEM;
+			pr_err("gpio_event_input_func: "
+				"Failed to allocate wakeup source\n");
+			goto err_ws_failed;
+		}
+
+		spin_lock_init(&ds->irq_lock);
+
+		for (i = 0; i < di->keymap_size; i++) {
+			int dev = di->keymap[i].dev;
+			if (dev >= input_devs->count) {
+				pr_err("gpio_event_input_func: bad device "
+					"index %d >= %d for key code %d\n",
+					dev, input_devs->count,
+					di->keymap[i].code);
+				ret = -EINVAL;
+				goto err_bad_keymap;
+			}
+			input_set_capability(input_devs->dev[dev], di->type,
+					     di->keymap[i].code);
+			ds->key_state[i].ds = ds;
+			ds->key_state[i].debounce = DEBOUNCE_UNKNOWN;
+		}
+
+		for (i = 0; i < di->keymap_size; i++) {
+			ret = gpio_request(di->keymap[i].gpio, "gpio_kp_in");
+			if (ret) {
+				pr_err("gpio_event_input_func: gpio_request "
+					"failed for %d\n", di->keymap[i].gpio);
+				goto err_gpio_request_failed;
+			}
+			ret = gpio_direction_input(di->keymap[i].gpio);
+			if (ret) {
+				pr_err("gpio_event_input_func: "
+					"gpio_direction_input failed for %d\n",
+					di->keymap[i].gpio);
+				goto err_gpio_configure_failed;
+			}
+		}
+
+		ret = gpio_event_input_request_irqs(ds);
+
+		spin_lock_irqsave(&ds->irq_lock, irqflags);
+		ds->use_irq = ret == 0;
+
+		pr_info("GPIO Input Driver: Start gpio inputs for %s%s in %s "
+			"mode\n", input_devs->dev[0]->name,
+			(input_devs->count > 1) ? "..." : "",
+			ret == 0 ? "interrupt" : "polling");
+
+		hrtimer_init(&ds->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		ds->timer.function = gpio_event_input_timer_func;
+		hrtimer_start(&ds->timer, ktime_set(0, 0), HRTIMER_MODE_REL);
+		spin_unlock_irqrestore(&ds->irq_lock, irqflags);
+		return 0;
+	}
+
+	ret = 0;
+	spin_lock_irqsave(&ds->irq_lock, irqflags);
+	hrtimer_cancel(&ds->timer);
+	if (ds->use_irq) {
+		for (i = di->keymap_size - 1; i >= 0; i--) {
+			int irq = gpio_to_irq(di->keymap[i].gpio);
+			if (ds->info->info.no_suspend)
+				disable_irq_wake(irq);
+			free_irq(irq, &ds->key_state[i]);
+		}
+	}
+	spin_unlock_irqrestore(&ds->irq_lock, irqflags);
+
+	for (i = di->keymap_size - 1; i >= 0; i--) {
+err_gpio_configure_failed:
+		gpio_free(di->keymap[i].gpio);
+err_gpio_request_failed:
+		;
+	}
+err_bad_keymap:
+	wakeup_source_unregister(ds->ws);
+err_ws_failed:
+	kfree(ds);
+err_ds_alloc_failed:
+	return ret;
+}
diff --git a/drivers/input/misc/gpio_matrix.c b/drivers/input/misc/gpio_matrix.c
new file mode 100644
index 0000000..08769dd
--- /dev/null
+++ b/drivers/input/misc/gpio_matrix.c
@@ -0,0 +1,440 @@
+/* drivers/input/misc/gpio_matrix.c
+ *
+ * Copyright (C) 2007 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/gpio.h>
+#include <linux/gpio_event.h>
+#include <linux/hrtimer.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+
+struct gpio_kp {
+	struct gpio_event_input_devs *input_devs;
+	struct gpio_event_matrix_info *keypad_info;
+	struct hrtimer timer;
+	struct wakeup_source wake_src;
+	int current_output;
+	unsigned int use_irq:1;
+	unsigned int key_state_changed:1;
+	unsigned int last_key_state_changed:1;
+	unsigned int some_keys_pressed:2;
+	unsigned int disabled_irq:1;
+	unsigned long keys_pressed[0];
+};
+
+static void clear_phantom_key(struct gpio_kp *kp, int out, int in)
+{
+	struct gpio_event_matrix_info *mi = kp->keypad_info;
+	int key_index = out * mi->ninputs + in;
+	unsigned short keyentry = mi->keymap[key_index];
+	unsigned short keycode = keyentry & MATRIX_KEY_MASK;
+	unsigned short dev = keyentry >> MATRIX_CODE_BITS;
+
+	if (!test_bit(keycode, kp->input_devs->dev[dev]->key)) {
+		if (mi->flags & GPIOKPF_PRINT_PHANTOM_KEYS)
+			pr_info("gpiomatrix: phantom key %x, %d-%d (%d-%d) "
+				"cleared\n", keycode, out, in,
+				mi->output_gpios[out], mi->input_gpios[in]);
+		__clear_bit(key_index, kp->keys_pressed);
+	} else {
+		if (mi->flags & GPIOKPF_PRINT_PHANTOM_KEYS)
+			pr_info("gpiomatrix: phantom key %x, %d-%d (%d-%d) "
+				"not cleared\n", keycode, out, in,
+				mi->output_gpios[out], mi->input_gpios[in]);
+	}
+}
+
+static int restore_keys_for_input(struct gpio_kp *kp, int out, int in)
+{
+	int rv = 0;
+	int key_index;
+
+	key_index = out * kp->keypad_info->ninputs + in;
+	while (out < kp->keypad_info->noutputs) {
+		if (test_bit(key_index, kp->keys_pressed)) {
+			rv = 1;
+			clear_phantom_key(kp, out, in);
+		}
+		key_index += kp->keypad_info->ninputs;
+		out++;
+	}
+	return rv;
+}
+
+static void remove_phantom_keys(struct gpio_kp *kp)
+{
+	int out, in, inp;
+	int key_index;
+
+	if (kp->some_keys_pressed < 3)
+		return;
+
+	for (out = 0; out < kp->keypad_info->noutputs; out++) {
+		inp = -1;
+		key_index = out * kp->keypad_info->ninputs;
+		for (in = 0; in < kp->keypad_info->ninputs; in++, key_index++) {
+			if (test_bit(key_index, kp->keys_pressed)) {
+				if (inp == -1) {
+					inp = in;
+					continue;
+				}
+				if (inp >= 0) {
+					if (!restore_keys_for_input(kp, out + 1,
+									inp))
+						break;
+					clear_phantom_key(kp, out, inp);
+					inp = -2;
+				}
+				restore_keys_for_input(kp, out, in);
+			}
+		}
+	}
+}
+
+static void report_key(struct gpio_kp *kp, int key_index, int out, int in)
+{
+	struct gpio_event_matrix_info *mi = kp->keypad_info;
+	int pressed = test_bit(key_index, kp->keys_pressed);
+	unsigned short keyentry = mi->keymap[key_index];
+	unsigned short keycode = keyentry & MATRIX_KEY_MASK;
+	unsigned short dev = keyentry >> MATRIX_CODE_BITS;
+
+	if (pressed != test_bit(keycode, kp->input_devs->dev[dev]->key)) {
+		if (keycode == KEY_RESERVED) {
+			if (mi->flags & GPIOKPF_PRINT_UNMAPPED_KEYS)
+				pr_info("gpiomatrix: unmapped key, %d-%d "
+					"(%d-%d) changed to %d\n",
+					out, in, mi->output_gpios[out],
+					mi->input_gpios[in], pressed);
+		} else {
+			if (mi->flags & GPIOKPF_PRINT_MAPPED_KEYS)
+				pr_info("gpiomatrix: key %x, %d-%d (%d-%d) "
+					"changed to %d\n", keycode,
+					out, in, mi->output_gpios[out],
+					mi->input_gpios[in], pressed);
+			input_report_key(kp->input_devs->dev[dev], keycode, pressed);
+		}
+	}
+}
+
+static void report_sync(struct gpio_kp *kp)
+{
+	int i;
+
+	for (i = 0; i < kp->input_devs->count; i++)
+		input_sync(kp->input_devs->dev[i]);
+}
+
+static enum hrtimer_restart gpio_keypad_timer_func(struct hrtimer *timer)
+{
+	int out, in;
+	int key_index;
+	int gpio;
+	struct gpio_kp *kp = container_of(timer, struct gpio_kp, timer);
+	struct gpio_event_matrix_info *mi = kp->keypad_info;
+	unsigned gpio_keypad_flags = mi->flags;
+	unsigned polarity = !!(gpio_keypad_flags & GPIOKPF_ACTIVE_HIGH);
+
+	out = kp->current_output;
+	if (out == mi->noutputs) {
+		out = 0;
+		kp->last_key_state_changed = kp->key_state_changed;
+		kp->key_state_changed = 0;
+		kp->some_keys_pressed = 0;
+	} else {
+		key_index = out * mi->ninputs;
+		for (in = 0; in < mi->ninputs; in++, key_index++) {
+			gpio = mi->input_gpios[in];
+			if (gpio_get_value(gpio) ^ !polarity) {
+				if (kp->some_keys_pressed < 3)
+					kp->some_keys_pressed++;
+				kp->key_state_changed |= !__test_and_set_bit(
+						key_index, kp->keys_pressed);
+			} else
+				kp->key_state_changed |= __test_and_clear_bit(
+						key_index, kp->keys_pressed);
+		}
+		gpio = mi->output_gpios[out];
+		if (gpio_keypad_flags & GPIOKPF_DRIVE_INACTIVE)
+			gpio_set_value(gpio, !polarity);
+		else
+			gpio_direction_input(gpio);
+		out++;
+	}
+	kp->current_output = out;
+	if (out < mi->noutputs) {
+		gpio = mi->output_gpios[out];
+		if (gpio_keypad_flags & GPIOKPF_DRIVE_INACTIVE)
+			gpio_set_value(gpio, polarity);
+		else
+			gpio_direction_output(gpio, polarity);
+		hrtimer_start(timer, mi->settle_time, HRTIMER_MODE_REL);
+		return HRTIMER_NORESTART;
+	}
+	if (gpio_keypad_flags & GPIOKPF_DEBOUNCE) {
+		if (kp->key_state_changed) {
+			hrtimer_start(&kp->timer, mi->debounce_delay,
+				      HRTIMER_MODE_REL);
+			return HRTIMER_NORESTART;
+		}
+		kp->key_state_changed = kp->last_key_state_changed;
+	}
+	if (kp->key_state_changed) {
+		if (gpio_keypad_flags & GPIOKPF_REMOVE_SOME_PHANTOM_KEYS)
+			remove_phantom_keys(kp);
+		key_index = 0;
+		for (out = 0; out < mi->noutputs; out++)
+			for (in = 0; in < mi->ninputs; in++, key_index++)
+				report_key(kp, key_index, out, in);
+		report_sync(kp);
+	}
+	if (!kp->use_irq || kp->some_keys_pressed) {
+		hrtimer_start(timer, mi->poll_time, HRTIMER_MODE_REL);
+		return HRTIMER_NORESTART;
+	}
+
+	/* No keys are pressed, reenable interrupt */
+	for (out = 0; out < mi->noutputs; out++) {
+		if (gpio_keypad_flags & GPIOKPF_DRIVE_INACTIVE)
+			gpio_set_value(mi->output_gpios[out], polarity);
+		else
+			gpio_direction_output(mi->output_gpios[out], polarity);
+	}
+	for (in = 0; in < mi->ninputs; in++)
+		enable_irq(gpio_to_irq(mi->input_gpios[in]));
+	__pm_relax(&kp->wake_src);
+	return HRTIMER_NORESTART;
+}
+
+static irqreturn_t gpio_keypad_irq_handler(int irq_in, void *dev_id)
+{
+	int i;
+	struct gpio_kp *kp = dev_id;
+	struct gpio_event_matrix_info *mi = kp->keypad_info;
+	unsigned gpio_keypad_flags = mi->flags;
+
+	if (!kp->use_irq) {
+		/* ignore interrupt while registering the handler */
+		kp->disabled_irq = 1;
+		disable_irq_nosync(irq_in);
+		return IRQ_HANDLED;
+	}
+
+	for (i = 0; i < mi->ninputs; i++)
+		disable_irq_nosync(gpio_to_irq(mi->input_gpios[i]));
+	for (i = 0; i < mi->noutputs; i++) {
+		if (gpio_keypad_flags & GPIOKPF_DRIVE_INACTIVE)
+			gpio_set_value(mi->output_gpios[i],
+				!(gpio_keypad_flags & GPIOKPF_ACTIVE_HIGH));
+		else
+			gpio_direction_input(mi->output_gpios[i]);
+	}
+	__pm_stay_awake(&kp->wake_src);
+	hrtimer_start(&kp->timer, ktime_set(0, 0), HRTIMER_MODE_REL);
+	return IRQ_HANDLED;
+}
+
+static int gpio_keypad_request_irqs(struct gpio_kp *kp)
+{
+	int i;
+	int err;
+	unsigned int irq;
+	unsigned long request_flags;
+	struct gpio_event_matrix_info *mi = kp->keypad_info;
+
+	switch (mi->flags & (GPIOKPF_ACTIVE_HIGH|GPIOKPF_LEVEL_TRIGGERED_IRQ)) {
+	default:
+		request_flags = IRQF_TRIGGER_FALLING;
+		break;
+	case GPIOKPF_ACTIVE_HIGH:
+		request_flags = IRQF_TRIGGER_RISING;
+		break;
+	case GPIOKPF_LEVEL_TRIGGERED_IRQ:
+		request_flags = IRQF_TRIGGER_LOW;
+		break;
+	case GPIOKPF_LEVEL_TRIGGERED_IRQ | GPIOKPF_ACTIVE_HIGH:
+		request_flags = IRQF_TRIGGER_HIGH;
+		break;
+	}
+
+	for (i = 0; i < mi->ninputs; i++) {
+		err = irq = gpio_to_irq(mi->input_gpios[i]);
+		if (err < 0)
+			goto err_gpio_get_irq_num_failed;
+		err = request_irq(irq, gpio_keypad_irq_handler, request_flags,
+				  "gpio_kp", kp);
+		if (err) {
+			pr_err("gpiomatrix: request_irq failed for input %d, "
+				"irq %d\n", mi->input_gpios[i], irq);
+			goto err_request_irq_failed;
+		}
+		err = enable_irq_wake(irq);
+		if (err) {
+			pr_err("gpiomatrix: set_irq_wake failed for input %d, "
+				"irq %d\n", mi->input_gpios[i], irq);
+		}
+		disable_irq(irq);
+		if (kp->disabled_irq) {
+			kp->disabled_irq = 0;
+			enable_irq(irq);
+		}
+	}
+	return 0;
+
+	for (i = mi->noutputs - 1; i >= 0; i--) {
+		free_irq(gpio_to_irq(mi->input_gpios[i]), kp);
+err_request_irq_failed:
+err_gpio_get_irq_num_failed:
+		;
+	}
+	return err;
+}
+
+int gpio_event_matrix_func(struct gpio_event_input_devs *input_devs,
+	struct gpio_event_info *info, void **data, int func)
+{
+	int i;
+	int err;
+	int key_count;
+	struct gpio_kp *kp;
+	struct gpio_event_matrix_info *mi;
+
+	mi = container_of(info, struct gpio_event_matrix_info, info);
+	if (func == GPIO_EVENT_FUNC_SUSPEND || func == GPIO_EVENT_FUNC_RESUME) {
+		/* TODO: disable scanning */
+		return 0;
+	}
+
+	if (func == GPIO_EVENT_FUNC_INIT) {
+		if (mi->keymap == NULL ||
+		   mi->input_gpios == NULL ||
+		   mi->output_gpios == NULL) {
+			err = -ENODEV;
+			pr_err("gpiomatrix: Incomplete pdata\n");
+			goto err_invalid_platform_data;
+		}
+		key_count = mi->ninputs * mi->noutputs;
+
+		*data = kp = kzalloc(sizeof(*kp) + sizeof(kp->keys_pressed[0]) *
+				     BITS_TO_LONGS(key_count), GFP_KERNEL);
+		if (kp == NULL) {
+			err = -ENOMEM;
+			pr_err("gpiomatrix: Failed to allocate private data\n");
+			goto err_kp_alloc_failed;
+		}
+		kp->input_devs = input_devs;
+		kp->keypad_info = mi;
+		for (i = 0; i < key_count; i++) {
+			unsigned short keyentry = mi->keymap[i];
+			unsigned short keycode = keyentry & MATRIX_KEY_MASK;
+			unsigned short dev = keyentry >> MATRIX_CODE_BITS;
+			if (dev >= input_devs->count) {
+				pr_err("gpiomatrix: bad device index %d >= "
+					"%d for key code %d\n",
+					dev, input_devs->count, keycode);
+				err = -EINVAL;
+				goto err_bad_keymap;
+			}
+			if (keycode && keycode <= KEY_MAX)
+				input_set_capability(input_devs->dev[dev],
+							EV_KEY, keycode);
+		}
+
+		for (i = 0; i < mi->noutputs; i++) {
+			err = gpio_request(mi->output_gpios[i], "gpio_kp_out");
+			if (err) {
+				pr_err("gpiomatrix: gpio_request failed for "
+					"output %d\n", mi->output_gpios[i]);
+				goto err_request_output_gpio_failed;
+			}
+			if (gpio_cansleep(mi->output_gpios[i])) {
+				pr_err("gpiomatrix: unsupported output gpio %d,"
+					" can sleep\n", mi->output_gpios[i]);
+				err = -EINVAL;
+				goto err_output_gpio_configure_failed;
+			}
+			if (mi->flags & GPIOKPF_DRIVE_INACTIVE)
+				err = gpio_direction_output(mi->output_gpios[i],
+					!(mi->flags & GPIOKPF_ACTIVE_HIGH));
+			else
+				err = gpio_direction_input(mi->output_gpios[i]);
+			if (err) {
+				pr_err("gpiomatrix: gpio_configure failed for "
+					"output %d\n", mi->output_gpios[i]);
+				goto err_output_gpio_configure_failed;
+			}
+		}
+		for (i = 0; i < mi->ninputs; i++) {
+			err = gpio_request(mi->input_gpios[i], "gpio_kp_in");
+			if (err) {
+				pr_err("gpiomatrix: gpio_request failed for "
+					"input %d\n", mi->input_gpios[i]);
+				goto err_request_input_gpio_failed;
+			}
+			err = gpio_direction_input(mi->input_gpios[i]);
+			if (err) {
+				pr_err("gpiomatrix: gpio_direction_input failed"
+					" for input %d\n", mi->input_gpios[i]);
+				goto err_gpio_direction_input_failed;
+			}
+		}
+		kp->current_output = mi->noutputs;
+		kp->key_state_changed = 1;
+
+		hrtimer_init(&kp->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		kp->timer.function = gpio_keypad_timer_func;
+		wakeup_source_init(&kp->wake_src, "gpio_kp");
+		err = gpio_keypad_request_irqs(kp);
+		kp->use_irq = err == 0;
+
+		pr_info("GPIO Matrix Keypad Driver: Start keypad matrix for "
+			"%s%s in %s mode\n", input_devs->dev[0]->name,
+			(input_devs->count > 1) ? "..." : "",
+			kp->use_irq ? "interrupt" : "polling");
+
+		if (kp->use_irq)
+			__pm_stay_awake(&kp->wake_src);
+		hrtimer_start(&kp->timer, ktime_set(0, 0), HRTIMER_MODE_REL);
+
+		return 0;
+	}
+
+	err = 0;
+	kp = *data;
+
+	if (kp->use_irq)
+		for (i = mi->noutputs - 1; i >= 0; i--)
+			free_irq(gpio_to_irq(mi->input_gpios[i]), kp);
+
+	hrtimer_cancel(&kp->timer);
+	wakeup_source_trash(&kp->wake_src);
+	for (i = mi->noutputs - 1; i >= 0; i--) {
+err_gpio_direction_input_failed:
+		gpio_free(mi->input_gpios[i]);
+err_request_input_gpio_failed:
+		;
+	}
+	for (i = mi->noutputs - 1; i >= 0; i--) {
+err_output_gpio_configure_failed:
+		gpio_free(mi->output_gpios[i]);
+err_request_output_gpio_failed:
+		;
+	}
+err_bad_keymap:
+	kfree(kp);
+err_kp_alloc_failed:
+err_invalid_platform_data:
+	return err;
+}
diff --git a/drivers/input/misc/gpio_output.c b/drivers/input/misc/gpio_output.c
new file mode 100644
index 0000000..2aac2fa
--- /dev/null
+++ b/drivers/input/misc/gpio_output.c
@@ -0,0 +1,97 @@
+/* drivers/input/misc/gpio_output.c
+ *
+ * Copyright (C) 2007 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/gpio.h>
+#include <linux/gpio_event.h>
+
+int gpio_event_output_event(
+	struct gpio_event_input_devs *input_devs, struct gpio_event_info *info,
+	void **data, unsigned int dev, unsigned int type,
+	unsigned int code, int value)
+{
+	int i;
+	struct gpio_event_output_info *oi;
+	oi = container_of(info, struct gpio_event_output_info, info);
+	if (type != oi->type)
+		return 0;
+	if (!(oi->flags & GPIOEDF_ACTIVE_HIGH))
+		value = !value;
+	for (i = 0; i < oi->keymap_size; i++)
+		if (dev == oi->keymap[i].dev && code == oi->keymap[i].code)
+			gpio_set_value(oi->keymap[i].gpio, value);
+	return 0;
+}
+
+int gpio_event_output_func(
+	struct gpio_event_input_devs *input_devs, struct gpio_event_info *info,
+	void **data, int func)
+{
+	int ret;
+	int i;
+	struct gpio_event_output_info *oi;
+	oi = container_of(info, struct gpio_event_output_info, info);
+
+	if (func == GPIO_EVENT_FUNC_SUSPEND || func == GPIO_EVENT_FUNC_RESUME)
+		return 0;
+
+	if (func == GPIO_EVENT_FUNC_INIT) {
+		int output_level = !(oi->flags & GPIOEDF_ACTIVE_HIGH);
+
+		for (i = 0; i < oi->keymap_size; i++) {
+			int dev = oi->keymap[i].dev;
+			if (dev >= input_devs->count) {
+				pr_err("gpio_event_output_func: bad device "
+					"index %d >= %d for key code %d\n",
+					dev, input_devs->count,
+					oi->keymap[i].code);
+				ret = -EINVAL;
+				goto err_bad_keymap;
+			}
+			input_set_capability(input_devs->dev[dev], oi->type,
+					     oi->keymap[i].code);
+		}
+
+		for (i = 0; i < oi->keymap_size; i++) {
+			ret = gpio_request(oi->keymap[i].gpio,
+					   "gpio_event_output");
+			if (ret) {
+				pr_err("gpio_event_output_func: gpio_request "
+					"failed for %d\n", oi->keymap[i].gpio);
+				goto err_gpio_request_failed;
+			}
+			ret = gpio_direction_output(oi->keymap[i].gpio,
+						    output_level);
+			if (ret) {
+				pr_err("gpio_event_output_func: "
+					"gpio_direction_output failed for %d\n",
+					oi->keymap[i].gpio);
+				goto err_gpio_direction_output_failed;
+			}
+		}
+		return 0;
+	}
+
+	ret = 0;
+	for (i = oi->keymap_size - 1; i >= 0; i--) {
+err_gpio_direction_output_failed:
+		gpio_free(oi->keymap[i].gpio);
+err_gpio_request_failed:
+		;
+	}
+err_bad_keymap:
+	return ret;
+}
+
diff --git a/drivers/input/misc/keychord.c b/drivers/input/misc/keychord.c
new file mode 100644
index 0000000..fdcc146
--- /dev/null
+++ b/drivers/input/misc/keychord.c
@@ -0,0 +1,467 @@
+/*
+ *  drivers/input/misc/keychord.c
+ *
+ * Copyright (C) 2008 Google, Inc.
+ * Author: Mike Lockwood <lockwood@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+*/
+
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/keychord.h>
+#include <linux/sched.h>
+
+#define KEYCHORD_NAME		"keychord"
+#define BUFFER_SIZE			16
+
+MODULE_AUTHOR("Mike Lockwood <lockwood@android.com>");
+MODULE_DESCRIPTION("Key chord input driver");
+MODULE_SUPPORTED_DEVICE("keychord");
+MODULE_LICENSE("GPL");
+
+#define NEXT_KEYCHORD(kc) ((struct input_keychord *) \
+		((char *)kc + sizeof(struct input_keychord) + \
+		kc->count * sizeof(kc->keycodes[0])))
+
+struct keychord_device {
+	struct input_handler	input_handler;
+	int			registered;
+
+	/* list of keychords to monitor */
+	struct input_keychord	*keychords;
+	int			keychord_count;
+
+	/* bitmask of keys contained in our keychords */
+	unsigned long keybit[BITS_TO_LONGS(KEY_CNT)];
+	/* current state of the keys */
+	unsigned long keystate[BITS_TO_LONGS(KEY_CNT)];
+	/* number of keys that are currently pressed */
+	int key_down;
+
+	/* second input_device_id is needed for null termination */
+	struct input_device_id  device_ids[2];
+
+	spinlock_t		lock;
+	wait_queue_head_t	waitq;
+	unsigned char		head;
+	unsigned char		tail;
+	__u16			buff[BUFFER_SIZE];
+	/* Bit to serialize writes to this device */
+#define KEYCHORD_BUSY			0x01
+	unsigned long		flags;
+	wait_queue_head_t	write_waitq;
+};
+
+static int check_keychord(struct keychord_device *kdev,
+		struct input_keychord *keychord)
+{
+	int i;
+
+	if (keychord->count != kdev->key_down)
+		return 0;
+
+	for (i = 0; i < keychord->count; i++) {
+		if (!test_bit(keychord->keycodes[i], kdev->keystate))
+			return 0;
+	}
+
+	/* we have a match */
+	return 1;
+}
+
+static void keychord_event(struct input_handle *handle, unsigned int type,
+			   unsigned int code, int value)
+{
+	struct keychord_device *kdev = handle->private;
+	struct input_keychord *keychord;
+	unsigned long flags;
+	int i, got_chord = 0;
+
+	if (type != EV_KEY || code >= KEY_MAX)
+		return;
+
+	spin_lock_irqsave(&kdev->lock, flags);
+	/* do nothing if key state did not change */
+	if (!test_bit(code, kdev->keystate) == !value)
+		goto done;
+	__change_bit(code, kdev->keystate);
+	if (value)
+		kdev->key_down++;
+	else
+		kdev->key_down--;
+
+	/* don't notify on key up */
+	if (!value)
+		goto done;
+	/* ignore this event if it is not one of the keys we are monitoring */
+	if (!test_bit(code, kdev->keybit))
+		goto done;
+
+	keychord = kdev->keychords;
+	if (!keychord)
+		goto done;
+
+	/* check to see if the keyboard state matches any keychords */
+	for (i = 0; i < kdev->keychord_count; i++) {
+		if (check_keychord(kdev, keychord)) {
+			kdev->buff[kdev->head] = keychord->id;
+			kdev->head = (kdev->head + 1) % BUFFER_SIZE;
+			got_chord = 1;
+			break;
+		}
+		/* skip to next keychord */
+		keychord = NEXT_KEYCHORD(keychord);
+	}
+
+done:
+	spin_unlock_irqrestore(&kdev->lock, flags);
+
+	if (got_chord) {
+		pr_info("keychord: got keychord id %d. Any tasks: %d\n",
+			keychord->id,
+			!list_empty_careful(&kdev->waitq.task_list));
+		wake_up_interruptible(&kdev->waitq);
+	}
+}
+
+static int keychord_connect(struct input_handler *handler,
+					  struct input_dev *dev,
+					  const struct input_device_id *id)
+{
+	int i, ret;
+	struct input_handle *handle;
+	struct keychord_device *kdev =
+		container_of(handler, struct keychord_device, input_handler);
+
+	/*
+	 * ignore this input device if it does not contain any keycodes
+	 * that we are monitoring
+	 */
+	for (i = 0; i < KEY_MAX; i++) {
+		if (test_bit(i, kdev->keybit) && test_bit(i, dev->keybit))
+			break;
+	}
+	if (i == KEY_MAX)
+		return -ENODEV;
+
+	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+	if (!handle)
+		return -ENOMEM;
+
+	handle->dev = dev;
+	handle->handler = handler;
+	handle->name = KEYCHORD_NAME;
+	handle->private = kdev;
+
+	ret = input_register_handle(handle);
+	if (ret)
+		goto err_input_register_handle;
+
+	ret = input_open_device(handle);
+	if (ret)
+		goto err_input_open_device;
+
+	pr_info("keychord: using input dev %s for fevent\n", dev->name);
+	return 0;
+
+err_input_open_device:
+	input_unregister_handle(handle);
+err_input_register_handle:
+	kfree(handle);
+	return ret;
+}
+
+static void keychord_disconnect(struct input_handle *handle)
+{
+	input_close_device(handle);
+	input_unregister_handle(handle);
+	kfree(handle);
+}
+
+/*
+ * keychord_read is used to read keychord events from the driver
+ */
+static ssize_t keychord_read(struct file *file, char __user *buffer,
+		size_t count, loff_t *ppos)
+{
+	struct keychord_device *kdev = file->private_data;
+	__u16   id;
+	int retval;
+	unsigned long flags;
+
+	if (count < sizeof(id))
+		return -EINVAL;
+	count = sizeof(id);
+
+	if (kdev->head == kdev->tail && (file->f_flags & O_NONBLOCK))
+		return -EAGAIN;
+
+	retval = wait_event_interruptible(kdev->waitq,
+			kdev->head != kdev->tail);
+	if (retval)
+		return retval;
+
+	spin_lock_irqsave(&kdev->lock, flags);
+	/* pop a keychord ID off the queue */
+	id = kdev->buff[kdev->tail];
+	kdev->tail = (kdev->tail + 1) % BUFFER_SIZE;
+	spin_unlock_irqrestore(&kdev->lock, flags);
+
+	if (copy_to_user(buffer, &id, count))
+		return -EFAULT;
+
+	return count;
+}
+
+/*
+ * serializes writes on a device. can use mutex_lock_interruptible()
+ * for this particular use case as well - a matter of preference.
+ */
+static int
+keychord_write_lock(struct keychord_device *kdev)
+{
+	int ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kdev->lock, flags);
+	while (kdev->flags & KEYCHORD_BUSY) {
+		spin_unlock_irqrestore(&kdev->lock, flags);
+		ret = wait_event_interruptible(kdev->write_waitq,
+			       ((kdev->flags & KEYCHORD_BUSY) == 0));
+		if (ret)
+			return ret;
+		spin_lock_irqsave(&kdev->lock, flags);
+	}
+	kdev->flags |= KEYCHORD_BUSY;
+	spin_unlock_irqrestore(&kdev->lock, flags);
+	return 0;
+}
+
+static void
+keychord_write_unlock(struct keychord_device *kdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kdev->lock, flags);
+	kdev->flags &= ~KEYCHORD_BUSY;
+	spin_unlock_irqrestore(&kdev->lock, flags);
+	wake_up_interruptible(&kdev->write_waitq);
+}
+
+/*
+ * keychord_write is used to configure the driver
+ */
+static ssize_t keychord_write(struct file *file, const char __user *buffer,
+		size_t count, loff_t *ppos)
+{
+	struct keychord_device *kdev = file->private_data;
+	struct input_keychord *keychords = 0;
+	struct input_keychord *keychord;
+	int ret, i, key;
+	unsigned long flags;
+	size_t resid = count;
+	size_t key_bytes;
+
+	if (count < sizeof(struct input_keychord))
+		return -EINVAL;
+	keychords = kzalloc(count, GFP_KERNEL);
+	if (!keychords)
+		return -ENOMEM;
+
+	/* read list of keychords from userspace */
+	if (copy_from_user(keychords, buffer, count)) {
+		kfree(keychords);
+		return -EFAULT;
+	}
+
+	/*
+	 * Serialize writes to this device to prevent various races.
+	 * 1) writers racing here could do duplicate input_unregister_handler()
+	 *    calls, resulting in attempting to unlink a node from a list that
+	 *    does not exist.
+	 * 2) writers racing here could do duplicate input_register_handler() calls
+	 *    below, resulting in a duplicate insertion of a node into the list.
+	 * 3) a double kfree of keychords can occur (in the event that
+	 *    input_register_handler() fails below.
+	 */
+	ret = keychord_write_lock(kdev);
+	if (ret) {
+		kfree(keychords);
+		return ret;
+	}
+
+	/* unregister handler before changing configuration */
+	if (kdev->registered) {
+		input_unregister_handler(&kdev->input_handler);
+		kdev->registered = 0;
+	}
+
+	spin_lock_irqsave(&kdev->lock, flags);
+	/* clear any existing configuration */
+	kfree(kdev->keychords);
+	kdev->keychords = 0;
+	kdev->keychord_count = 0;
+	kdev->key_down = 0;
+	memset(kdev->keybit, 0, sizeof(kdev->keybit));
+	memset(kdev->keystate, 0, sizeof(kdev->keystate));
+	kdev->head = kdev->tail = 0;
+
+	keychord = keychords;
+
+	while (resid > 0) {
+		/* Is the entire keychord entry header present ? */
+		if (resid < sizeof(struct input_keychord)) {
+			pr_err("keychord: Insufficient bytes present for header %zu\n",
+			       resid);
+			goto err_unlock_return;
+		}
+		resid -= sizeof(struct input_keychord);
+		if (keychord->count <= 0) {
+			pr_err("keychord: invalid keycode count %d\n",
+				keychord->count);
+			goto err_unlock_return;
+		}
+		key_bytes = keychord->count * sizeof(keychord->keycodes[0]);
+		/* Do we have all the expected keycodes ? */
+		if (resid < key_bytes) {
+			pr_err("keychord: Insufficient bytes present for keycount %zu\n",
+			       resid);
+			goto err_unlock_return;
+		}
+		resid -= key_bytes;
+
+		if (keychord->version != KEYCHORD_VERSION) {
+			pr_err("keychord: unsupported version %d\n",
+				keychord->version);
+			goto err_unlock_return;
+		}
+
+		/* keep track of the keys we are monitoring in keybit */
+		for (i = 0; i < keychord->count; i++) {
+			key = keychord->keycodes[i];
+			if (key < 0 || key >= KEY_CNT) {
+				pr_err("keychord: keycode %d out of range\n",
+					key);
+				goto err_unlock_return;
+			}
+			__set_bit(key, kdev->keybit);
+		}
+
+		kdev->keychord_count++;
+		keychord = NEXT_KEYCHORD(keychord);
+	}
+
+	kdev->keychords = keychords;
+	spin_unlock_irqrestore(&kdev->lock, flags);
+
+	ret = input_register_handler(&kdev->input_handler);
+	if (ret) {
+		kfree(keychords);
+		kdev->keychords = 0;
+		keychord_write_unlock(kdev);
+		return ret;
+	}
+	kdev->registered = 1;
+
+	keychord_write_unlock(kdev);
+
+	return count;
+
+err_unlock_return:
+	spin_unlock_irqrestore(&kdev->lock, flags);
+	kfree(keychords);
+	keychord_write_unlock(kdev);
+	return -EINVAL;
+}
+
+static unsigned int keychord_poll(struct file *file, poll_table *wait)
+{
+	struct keychord_device *kdev = file->private_data;
+
+	poll_wait(file, &kdev->waitq, wait);
+
+	if (kdev->head != kdev->tail)
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
+static int keychord_open(struct inode *inode, struct file *file)
+{
+	struct keychord_device *kdev;
+
+	kdev = kzalloc(sizeof(struct keychord_device), GFP_KERNEL);
+	if (!kdev)
+		return -ENOMEM;
+
+	spin_lock_init(&kdev->lock);
+	init_waitqueue_head(&kdev->waitq);
+	init_waitqueue_head(&kdev->write_waitq);
+
+	kdev->input_handler.event = keychord_event;
+	kdev->input_handler.connect = keychord_connect;
+	kdev->input_handler.disconnect = keychord_disconnect;
+	kdev->input_handler.name = KEYCHORD_NAME;
+	kdev->input_handler.id_table = kdev->device_ids;
+
+	kdev->device_ids[0].flags = INPUT_DEVICE_ID_MATCH_EVBIT;
+	__set_bit(EV_KEY, kdev->device_ids[0].evbit);
+
+	file->private_data = kdev;
+
+	return 0;
+}
+
+static int keychord_release(struct inode *inode, struct file *file)
+{
+	struct keychord_device *kdev = file->private_data;
+
+	if (kdev->registered)
+		input_unregister_handler(&kdev->input_handler);
+	kfree(kdev->keychords);
+	kfree(kdev);
+
+	return 0;
+}
+
+static const struct file_operations keychord_fops = {
+	.owner		= THIS_MODULE,
+	.open		= keychord_open,
+	.release	= keychord_release,
+	.read		= keychord_read,
+	.write		= keychord_write,
+	.poll		= keychord_poll,
+};
+
+static struct miscdevice keychord_misc = {
+	.fops		= &keychord_fops,
+	.name		= KEYCHORD_NAME,
+	.minor		= MISC_DYNAMIC_MINOR,
+};
+
+static int __init keychord_init(void)
+{
+	return misc_register(&keychord_misc);
+}
+
+static void __exit keychord_exit(void)
+{
+	misc_deregister(&keychord_misc);
+}
+
+module_init(keychord_init);
+module_exit(keychord_exit);
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 197e29d..e7b8f49 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -459,6 +459,21 @@
 
 	  If unsure, say N.
 
+config DM_VERITY_HASH_PREFETCH_MIN_SIZE_128
+	bool "Prefetch size 128"
+
+config DM_VERITY_HASH_PREFETCH_MIN_SIZE
+	int "Verity hash prefetch minimum size"
+	depends on DM_VERITY
+	range 1 4096
+	default 128 if DM_VERITY_HASH_PREFETCH_MIN_SIZE_128
+	default 1
+	---help---
+	  This sets minimum number of hash blocks to prefetch for dm-verity.
+	  For devices like eMMC, having larger prefetch size like 128 can improve
+	  performance with increased memory consumption for keeping more hashes
+	  in RAM.
+
 config DM_VERITY_FEC
 	bool "Verity forward error correction support"
 	depends on DM_VERITY
@@ -501,4 +516,22 @@
 
 	  If unsure, say N.
 
+config DM_ANDROID_VERITY
+	bool "Android verity target support"
+	depends on DM_VERITY=y
+	depends on X509_CERTIFICATE_PARSER
+	depends on SYSTEM_TRUSTED_KEYRING
+	depends on PUBLIC_KEY_ALGO_RSA
+	depends on KEYS
+	depends on ASYMMETRIC_KEY_TYPE
+	depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE
+	depends on MD_LINEAR=y
+	select DM_VERITY_HASH_PREFETCH_MIN_SIZE_128
+	---help---
+	  This device-mapper target is virtually a VERITY target. This
+	  target is setup by reading the metadata contents piggybacked
+	  to the actual data blocks in the block device. The signature
+	  of the metadata contents are verified against the key included
+	  in the system keyring. Upon success, the underlying verity
+	  target is setup.
 endif # MD
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 3cbda1a..f26ce41 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -59,6 +59,7 @@
 obj-$(CONFIG_DM_CACHE_CLEANER)	+= dm-cache-cleaner.o
 obj-$(CONFIG_DM_ERA)		+= dm-era.o
 obj-$(CONFIG_DM_LOG_WRITES)	+= dm-log-writes.o
+obj-$(CONFIG_DM_ANDROID_VERITY) += dm-android-verity.o
 
 ifeq ($(CONFIG_DM_UEVENT),y)
 dm-mod-objs			+= dm-uevent.o
diff --git a/drivers/md/dm-android-verity.c b/drivers/md/dm-android-verity.c
new file mode 100644
index 0000000..eb4bdf6
--- /dev/null
+++ b/drivers/md/dm-android-verity.c
@@ -0,0 +1,947 @@
+/*
+ * Copyright (C) 2015 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/buffer_head.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/device-mapper.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/key.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/of.h>
+#include <linux/reboot.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+
+#include <asm/setup.h>
+#include <crypto/hash.h>
+#include <crypto/public_key.h>
+#include <crypto/sha.h>
+#include <keys/asymmetric-type.h>
+#include <keys/system_keyring.h>
+
+#include "dm-verity.h"
+#include "dm-android-verity.h"
+
+static char verifiedbootstate[VERITY_COMMANDLINE_PARAM_LENGTH];
+static char veritymode[VERITY_COMMANDLINE_PARAM_LENGTH];
+static char veritykeyid[VERITY_DEFAULT_KEY_ID_LENGTH];
+static char buildvariant[BUILD_VARIANT];
+
+static bool target_added;
+static bool verity_enabled = true;
+struct dentry *debug_dir;
+static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv);
+
+static struct target_type android_verity_target = {
+	.name                   = "android-verity",
+	.version                = {1, 0, 0},
+	.module                 = THIS_MODULE,
+	.ctr                    = android_verity_ctr,
+	.dtr                    = verity_dtr,
+	.map                    = verity_map,
+	.status                 = verity_status,
+	.prepare_ioctl          = verity_prepare_ioctl,
+	.iterate_devices        = verity_iterate_devices,
+	.io_hints               = verity_io_hints,
+};
+
+static int __init verified_boot_state_param(char *line)
+{
+	strlcpy(verifiedbootstate, line, sizeof(verifiedbootstate));
+	return 1;
+}
+
+__setup("androidboot.verifiedbootstate=", verified_boot_state_param);
+
+static int __init verity_mode_param(char *line)
+{
+	strlcpy(veritymode, line, sizeof(veritymode));
+	return 1;
+}
+
+__setup("androidboot.veritymode=", verity_mode_param);
+
+static int __init verity_keyid_param(char *line)
+{
+	strlcpy(veritykeyid, line, sizeof(veritykeyid));
+	return 1;
+}
+
+__setup("veritykeyid=", verity_keyid_param);
+
+static int __init verity_buildvariant(char *line)
+{
+	strlcpy(buildvariant, line, sizeof(buildvariant));
+	return 1;
+}
+
+__setup("buildvariant=", verity_buildvariant);
+
+static inline bool default_verity_key_id(void)
+{
+	return veritykeyid[0] != '\0';
+}
+
+static inline bool is_eng(void)
+{
+	static const char typeeng[]  = "eng";
+
+	return !strncmp(buildvariant, typeeng, sizeof(typeeng));
+}
+
+static inline bool is_userdebug(void)
+{
+	static const char typeuserdebug[]  = "userdebug";
+
+	return !strncmp(buildvariant, typeuserdebug, sizeof(typeuserdebug));
+}
+
+static inline bool is_unlocked(void)
+{
+	static const char unlocked[] = "orange";
+
+	return !strncmp(verifiedbootstate, unlocked, sizeof(unlocked));
+}
+
+static int table_extract_mpi_array(struct public_key_signature *pks,
+				const void *data, size_t len)
+{
+	MPI mpi = mpi_read_raw_data(data, len);
+
+	if (!mpi) {
+		DMERR("Error while allocating mpi array");
+		return -ENOMEM;
+	}
+
+	pks->mpi[0] = mpi;
+	pks->nr_mpi = 1;
+	return 0;
+}
+
+static struct public_key_signature *table_make_digest(
+						enum hash_algo hash,
+						const void *table,
+						unsigned long table_len)
+{
+	struct public_key_signature *pks = NULL;
+	struct crypto_shash *tfm;
+	struct shash_desc *desc;
+	size_t digest_size, desc_size;
+	int ret;
+
+	/* Allocate the hashing algorithm we're going to need and find out how
+	 * big the hash operational data will be.
+	 */
+	tfm = crypto_alloc_shash(hash_algo_name[hash], 0, 0);
+	if (IS_ERR(tfm))
+		return ERR_CAST(tfm);
+
+	desc_size = crypto_shash_descsize(tfm) + sizeof(*desc);
+	digest_size = crypto_shash_digestsize(tfm);
+
+	/* We allocate the hash operational data storage on the end of out
+	 * context data and the digest output buffer on the end of that.
+	 */
+	ret = -ENOMEM;
+	pks = kzalloc(digest_size + sizeof(*pks) + desc_size, GFP_KERNEL);
+	if (!pks)
+		goto error;
+
+	pks->pkey_hash_algo = hash;
+	pks->digest = (u8 *)pks + sizeof(*pks) + desc_size;
+	pks->digest_size = digest_size;
+
+	desc = (struct shash_desc *)(pks + 1);
+	desc->tfm = tfm;
+	desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	ret = crypto_shash_init(desc);
+	if (ret < 0)
+		goto error;
+
+	ret = crypto_shash_finup(desc, table, table_len, pks->digest);
+	if (ret < 0)
+		goto error;
+
+	crypto_free_shash(tfm);
+	return pks;
+
+error:
+	kfree(pks);
+	crypto_free_shash(tfm);
+	return ERR_PTR(ret);
+}
+
+static int read_block_dev(struct bio_read *payload, struct block_device *bdev,
+		sector_t offset, int length)
+{
+	struct bio *bio;
+	int err = 0, i;
+
+	payload->number_of_pages = DIV_ROUND_UP(length, PAGE_SIZE);
+
+	bio = bio_alloc(GFP_KERNEL, payload->number_of_pages);
+	if (!bio) {
+		DMERR("Error while allocating bio");
+		return -ENOMEM;
+	}
+
+	bio->bi_bdev = bdev;
+	bio->bi_iter.bi_sector = offset;
+
+	payload->page_io = kzalloc(sizeof(struct page *) *
+		payload->number_of_pages, GFP_KERNEL);
+	if (!payload->page_io) {
+		DMERR("page_io array alloc failed");
+		err = -ENOMEM;
+		goto free_bio;
+	}
+
+	for (i = 0; i < payload->number_of_pages; i++) {
+		payload->page_io[i] = alloc_page(GFP_KERNEL);
+		if (!payload->page_io[i]) {
+			DMERR("alloc_page failed");
+			err = -ENOMEM;
+			goto free_pages;
+		}
+		if (!bio_add_page(bio, payload->page_io[i], PAGE_SIZE, 0)) {
+			DMERR("bio_add_page error");
+			err = -EIO;
+			goto free_pages;
+		}
+	}
+
+	if (!submit_bio_wait(READ, bio))
+		/* success */
+		goto free_bio;
+	DMERR("bio read failed");
+	err = -EIO;
+
+free_pages:
+	for (i = 0; i < payload->number_of_pages; i++)
+		if (payload->page_io[i])
+			__free_page(payload->page_io[i]);
+	kfree(payload->page_io);
+free_bio:
+	bio_put(bio);
+	return err;
+}
+
+static inline u64 fec_div_round_up(u64 x, u64 y)
+{
+	u64 remainder;
+
+	return div64_u64_rem(x, y, &remainder) +
+		(remainder > 0 ? 1 : 0);
+}
+
+static inline void populate_fec_metadata(struct fec_header *header,
+				struct fec_ecc_metadata *ecc)
+{
+	ecc->blocks = fec_div_round_up(le64_to_cpu(header->inp_size),
+			FEC_BLOCK_SIZE);
+	ecc->roots = le32_to_cpu(header->roots);
+	ecc->start = le64_to_cpu(header->inp_size);
+}
+
+static inline int validate_fec_header(struct fec_header *header, u64 offset)
+{
+	/* move offset to make the sanity check work for backup header
+	 * as well. */
+	offset -= offset % FEC_BLOCK_SIZE;
+	if (le32_to_cpu(header->magic) != FEC_MAGIC ||
+		le32_to_cpu(header->version) != FEC_VERSION ||
+		le32_to_cpu(header->size) != sizeof(struct fec_header) ||
+		le32_to_cpu(header->roots) == 0 ||
+		le32_to_cpu(header->roots) >= FEC_RSM)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int extract_fec_header(dev_t dev, struct fec_header *fec,
+				struct fec_ecc_metadata *ecc)
+{
+	u64 device_size;
+	struct bio_read payload;
+	int i, err = 0;
+	struct block_device *bdev;
+
+	bdev = blkdev_get_by_dev(dev, FMODE_READ, NULL);
+
+	if (IS_ERR_OR_NULL(bdev)) {
+		DMERR("bdev get error");
+		return PTR_ERR(bdev);
+	}
+
+	device_size = i_size_read(bdev->bd_inode);
+
+	/* fec metadata size is a power of 2 and PAGE_SIZE
+	 * is a power of 2 as well.
+	 */
+	BUG_ON(FEC_BLOCK_SIZE > PAGE_SIZE);
+	/* 512 byte sector alignment */
+	BUG_ON(((device_size - FEC_BLOCK_SIZE) % (1 << SECTOR_SHIFT)) != 0);
+
+	err = read_block_dev(&payload, bdev, (device_size -
+		FEC_BLOCK_SIZE) / (1 << SECTOR_SHIFT), FEC_BLOCK_SIZE);
+	if (err) {
+		DMERR("Error while reading verity metadata");
+		goto error;
+	}
+
+	BUG_ON(sizeof(struct fec_header) > PAGE_SIZE);
+	memcpy(fec, page_address(payload.page_io[0]),
+			sizeof(*fec));
+
+	ecc->valid = true;
+	if (validate_fec_header(fec, device_size - FEC_BLOCK_SIZE)) {
+		/* Try the backup header */
+		memcpy(fec, page_address(payload.page_io[0]) + FEC_BLOCK_SIZE
+			- sizeof(*fec) ,
+			sizeof(*fec));
+		if (validate_fec_header(fec, device_size -
+			sizeof(struct fec_header)))
+			ecc->valid = false;
+	}
+
+	if (ecc->valid)
+		populate_fec_metadata(fec, ecc);
+
+	for (i = 0; i < payload.number_of_pages; i++)
+		__free_page(payload.page_io[i]);
+	kfree(payload.page_io);
+
+error:
+	blkdev_put(bdev, FMODE_READ);
+	return err;
+}
+static void find_metadata_offset(struct fec_header *fec,
+		struct block_device *bdev, u64 *metadata_offset)
+{
+	u64 device_size;
+
+	device_size = i_size_read(bdev->bd_inode);
+
+	if (le32_to_cpu(fec->magic) == FEC_MAGIC)
+		*metadata_offset = le64_to_cpu(fec->inp_size) -
+					VERITY_METADATA_SIZE;
+	else
+		*metadata_offset = device_size - VERITY_METADATA_SIZE;
+}
+
+static int find_size(dev_t dev, u64 *device_size)
+{
+	struct block_device *bdev;
+
+	bdev = blkdev_get_by_dev(dev, FMODE_READ, NULL);
+	if (IS_ERR_OR_NULL(bdev)) {
+		DMERR("blkdev_get_by_dev failed");
+		return PTR_ERR(bdev);
+	}
+
+	*device_size = i_size_read(bdev->bd_inode);
+	*device_size >>= SECTOR_SHIFT;
+
+	DMINFO("blkdev size in sectors: %llu", *device_size);
+	blkdev_put(bdev, FMODE_READ);
+	return 0;
+}
+
+static int verify_header(struct android_metadata_header *header)
+{
+	int retval = -EINVAL;
+
+	if (is_userdebug() && le32_to_cpu(header->magic_number) ==
+			VERITY_METADATA_MAGIC_DISABLE)
+		return VERITY_STATE_DISABLE;
+
+	if (!(le32_to_cpu(header->magic_number) ==
+			VERITY_METADATA_MAGIC_NUMBER) ||
+			(le32_to_cpu(header->magic_number) ==
+			VERITY_METADATA_MAGIC_DISABLE)) {
+		DMERR("Incorrect magic number");
+		return retval;
+	}
+
+	if (le32_to_cpu(header->protocol_version) !=
+			VERITY_METADATA_VERSION) {
+		DMERR("Unsupported version %u",
+			le32_to_cpu(header->protocol_version));
+		return retval;
+	}
+
+	return 0;
+}
+
+static int extract_metadata(dev_t dev, struct fec_header *fec,
+				struct android_metadata **metadata,
+				bool *verity_enabled)
+{
+	struct block_device *bdev;
+	struct android_metadata_header *header;
+	int i;
+	u32 table_length, copy_length, offset;
+	u64 metadata_offset;
+	struct bio_read payload;
+	int err = 0;
+
+	bdev = blkdev_get_by_dev(dev, FMODE_READ, NULL);
+
+	if (IS_ERR_OR_NULL(bdev)) {
+		DMERR("blkdev_get_by_dev failed");
+		return -ENODEV;
+	}
+
+	find_metadata_offset(fec, bdev, &metadata_offset);
+
+	/* Verity metadata size is a power of 2 and PAGE_SIZE
+	 * is a power of 2 as well.
+	 * PAGE_SIZE is also a multiple of 512 bytes.
+	*/
+	if (VERITY_METADATA_SIZE > PAGE_SIZE)
+		BUG_ON(VERITY_METADATA_SIZE % PAGE_SIZE != 0);
+	/* 512 byte sector alignment */
+	BUG_ON(metadata_offset % (1 << SECTOR_SHIFT) != 0);
+
+	err = read_block_dev(&payload, bdev, metadata_offset /
+		(1 << SECTOR_SHIFT), VERITY_METADATA_SIZE);
+	if (err) {
+		DMERR("Error while reading verity metadata");
+		goto blkdev_release;
+	}
+
+	header = kzalloc(sizeof(*header), GFP_KERNEL);
+	if (!header) {
+		DMERR("kzalloc failed for header");
+		err = -ENOMEM;
+		goto free_payload;
+	}
+
+	memcpy(header, page_address(payload.page_io[0]),
+		sizeof(*header));
+
+	DMINFO("bio magic_number:%u protocol_version:%d table_length:%u",
+		le32_to_cpu(header->magic_number),
+		le32_to_cpu(header->protocol_version),
+		le32_to_cpu(header->table_length));
+
+	err = verify_header(header);
+
+	if (err == VERITY_STATE_DISABLE) {
+		DMERR("Mounting root with verity disabled");
+		*verity_enabled = false;
+		/* we would still have to read the metadata to figure out
+		 * the data blocks size. Or may be could map the entire
+		 * partition similar to mounting the device.
+		 *
+		 * Reset error as well as the verity_enabled flag is changed.
+		 */
+		err = 0;
+	} else if (err)
+		goto free_header;
+
+	*metadata = kzalloc(sizeof(**metadata), GFP_KERNEL);
+	if (!*metadata) {
+		DMERR("kzalloc for metadata failed");
+		err = -ENOMEM;
+		goto free_header;
+	}
+
+	(*metadata)->header = header;
+	table_length = le32_to_cpu(header->table_length);
+
+	if (table_length == 0 ||
+		table_length > (VERITY_METADATA_SIZE -
+			sizeof(struct android_metadata_header))) {
+		DMERR("table_length too long");
+		err = -EINVAL;
+		goto free_metadata;
+	}
+
+	(*metadata)->verity_table = kzalloc(table_length + 1, GFP_KERNEL);
+
+	if (!(*metadata)->verity_table) {
+		DMERR("kzalloc verity_table failed");
+		err = -ENOMEM;
+		goto free_metadata;
+	}
+
+	if (sizeof(struct android_metadata_header) +
+			table_length <= PAGE_SIZE) {
+		memcpy((*metadata)->verity_table,
+			page_address(payload.page_io[0])
+			+ sizeof(struct android_metadata_header),
+			table_length);
+	} else {
+		copy_length = PAGE_SIZE -
+			sizeof(struct android_metadata_header);
+		memcpy((*metadata)->verity_table,
+			page_address(payload.page_io[0])
+			+ sizeof(struct android_metadata_header),
+			copy_length);
+		table_length -= copy_length;
+		offset = copy_length;
+		i = 1;
+		while (table_length != 0) {
+			if (table_length > PAGE_SIZE) {
+				memcpy((*metadata)->verity_table + offset,
+					page_address(payload.page_io[i]),
+					PAGE_SIZE);
+				offset += PAGE_SIZE;
+				table_length -= PAGE_SIZE;
+			} else {
+				memcpy((*metadata)->verity_table + offset,
+					page_address(payload.page_io[i]),
+					table_length);
+				table_length = 0;
+			}
+			i++;
+		}
+	}
+	(*metadata)->verity_table[table_length] = '\0';
+
+	DMINFO("verity_table: %s", (*metadata)->verity_table);
+	goto free_payload;
+
+free_metadata:
+	kfree(*metadata);
+free_header:
+	kfree(header);
+free_payload:
+	for (i = 0; i < payload.number_of_pages; i++)
+		if (payload.page_io[i])
+			__free_page(payload.page_io[i]);
+	kfree(payload.page_io);
+blkdev_release:
+	blkdev_put(bdev, FMODE_READ);
+	return err;
+}
+
+/* helper functions to extract properties from dts */
+const char *find_dt_value(const char *name)
+{
+	struct device_node *firmware;
+	const char *value;
+
+	firmware = of_find_node_by_path("/firmware/android");
+	if (!firmware)
+		return NULL;
+	value = of_get_property(firmware, name, NULL);
+	of_node_put(firmware);
+
+	return value;
+}
+
+static int verity_mode(void)
+{
+	static const char enforcing[] = "enforcing";
+	static const char verified_mode_prop[] = "veritymode";
+	const char *value;
+
+	value = find_dt_value(verified_mode_prop);
+	if (!value)
+		value = veritymode;
+	if (!strncmp(value, enforcing, sizeof(enforcing) - 1))
+		return DM_VERITY_MODE_RESTART;
+
+	return DM_VERITY_MODE_EIO;
+}
+
+static int verify_verity_signature(char *key_id,
+		struct android_metadata *metadata)
+{
+	key_ref_t key_ref;
+	struct key *key;
+	struct public_key_signature *pks = NULL;
+	int retval = -EINVAL;
+
+	key_ref = keyring_search(make_key_ref(system_trusted_keyring, 1),
+		&key_type_asymmetric, key_id);
+
+	if (IS_ERR(key_ref)) {
+		DMERR("keyring: key not found");
+		return -ENOKEY;
+	}
+
+	key = key_ref_to_ptr(key_ref);
+
+	pks = table_make_digest(HASH_ALGO_SHA256,
+			(const void *)metadata->verity_table,
+			le32_to_cpu(metadata->header->table_length));
+
+	if (IS_ERR(pks)) {
+		DMERR("hashing failed");
+		retval = PTR_ERR(pks);
+		pks = NULL;
+		goto error;
+	}
+
+	retval = table_extract_mpi_array(pks, &metadata->header->signature[0],
+				RSANUMBYTES);
+	if (retval < 0) {
+		DMERR("Error extracting mpi %d", retval);
+		goto error;
+	}
+
+	retval = verify_signature(key, pks);
+	mpi_free(pks->rsa.s);
+error:
+	kfree(pks);
+	key_put(key);
+
+	return retval;
+}
+
+static void handle_error(void)
+{
+	int mode = verity_mode();
+	if (mode == DM_VERITY_MODE_RESTART) {
+		DMERR("triggering restart");
+		kernel_restart("dm-verity device corrupted");
+	} else {
+		DMERR("Mounting verity root failed");
+	}
+}
+
+static inline bool test_mult_overflow(sector_t a, u32 b)
+{
+	sector_t r = (sector_t)~0ULL;
+
+	sector_div(r, b);
+	return a > r;
+}
+
+static int add_as_linear_device(struct dm_target *ti, char *dev)
+{
+	/*Move to linear mapping defines*/
+	char *linear_table_args[DM_LINEAR_ARGS] = {dev,
+					DM_LINEAR_TARGET_OFFSET};
+	int err = 0;
+
+	android_verity_target.dtr = dm_linear_dtr,
+	android_verity_target.map = dm_linear_map,
+	android_verity_target.status = dm_linear_status,
+	android_verity_target.prepare_ioctl = dm_linear_prepare_ioctl,
+	android_verity_target.iterate_devices = dm_linear_iterate_devices,
+        android_verity_target.direct_access = dm_linear_direct_access,
+	android_verity_target.io_hints = NULL;
+
+	set_disk_ro(dm_disk(dm_table_get_md(ti->table)), 0);
+
+	err = dm_linear_ctr(ti, DM_LINEAR_ARGS, linear_table_args);
+
+	if (!err) {
+		DMINFO("Added android-verity as a linear target");
+		target_added = true;
+	} else
+		DMERR("Failed to add android-verity as linear target");
+
+	return err;
+}
+
+static int create_linear_device(struct dm_target *ti, dev_t dev,
+				char *target_device)
+{
+	u64 device_size = 0;
+	int err = find_size(dev, &device_size);
+
+	if (err) {
+		DMERR("error finding bdev size");
+		handle_error();
+		return err;
+	}
+
+	ti->len = device_size;
+	err = add_as_linear_device(ti, target_device);
+	if (err) {
+		handle_error();
+		return err;
+	}
+	verity_enabled = false;
+	return 0;
+}
+
+/*
+ * Target parameters:
+ *	<key id>	Key id of the public key in the system keyring.
+ *			Verity metadata's signature would be verified against
+ *			this. If the key id contains spaces, replace them
+ *			with '#'.
+ *	<block device>	The block device for which dm-verity is being setup.
+ */
+static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
+{
+	dev_t uninitialized_var(dev);
+	struct android_metadata *metadata = NULL;
+	int err = 0, i, mode;
+	char *key_id, *table_ptr, dummy, *target_device,
+	*verity_table_args[VERITY_TABLE_ARGS + 2 + VERITY_TABLE_OPT_FEC_ARGS];
+	/* One for specifying number of opt args and one for mode */
+	sector_t data_sectors;
+	u32 data_block_size;
+	unsigned int no_of_args = VERITY_TABLE_ARGS + 2 + VERITY_TABLE_OPT_FEC_ARGS;
+	struct fec_header uninitialized_var(fec);
+	struct fec_ecc_metadata uninitialized_var(ecc);
+	char buf[FEC_ARG_LENGTH], *buf_ptr;
+	unsigned long long tmpll;
+
+	if (argc == 1) {
+		/* Use the default keyid */
+		if (default_verity_key_id())
+			key_id = veritykeyid;
+		else if (!is_eng()) {
+			DMERR("veritykeyid= is not set");
+			handle_error();
+			return -EINVAL;
+		}
+	} else if (argc == 2)
+		key_id = argv[1];
+	else {
+		DMERR("Incorrect number of arguments");
+		handle_error();
+		return -EINVAL;
+	}
+
+	target_device = argv[0];
+
+	dev = name_to_dev_t(target_device);
+	if (!dev) {
+		DMERR("no dev found for %s", target_device);
+		handle_error();
+		return -EINVAL;
+	}
+
+	if (is_eng())
+		return create_linear_device(ti, dev, target_device);
+
+	strreplace(key_id, '#', ' ');
+
+	DMINFO("key:%s dev:%s", key_id, target_device);
+
+	if (extract_fec_header(dev, &fec, &ecc)) {
+		DMERR("Error while extracting fec header");
+		handle_error();
+		return -EINVAL;
+	}
+
+	err = extract_metadata(dev, &fec, &metadata, &verity_enabled);
+
+	if (err) {
+		/* Allow invalid metadata when the device is unlocked */
+		if (is_unlocked()) {
+			DMWARN("Allow invalid metadata when unlocked");
+			return create_linear_device(ti, dev, target_device);
+		}
+		DMERR("Error while extracting metadata");
+		handle_error();
+		goto free_metadata;
+	}
+
+	if (verity_enabled) {
+		err = verify_verity_signature(key_id, metadata);
+
+		if (err) {
+			DMERR("Signature verification failed");
+			handle_error();
+			goto free_metadata;
+		} else
+			DMINFO("Signature verification success");
+	}
+
+	table_ptr = metadata->verity_table;
+
+	for (i = 0; i < VERITY_TABLE_ARGS; i++) {
+		verity_table_args[i] = strsep(&table_ptr, " ");
+		if (verity_table_args[i] == NULL)
+			break;
+	}
+
+	if (i != VERITY_TABLE_ARGS) {
+		DMERR("Verity table not in the expected format");
+		err = -EINVAL;
+		handle_error();
+		goto free_metadata;
+	}
+
+	if (sscanf(verity_table_args[5], "%llu%c", &tmpll, &dummy)
+							!= 1) {
+		DMERR("Verity table not in the expected format");
+		handle_error();
+		err = -EINVAL;
+		goto free_metadata;
+	}
+
+	if (tmpll > ULONG_MAX) {
+		DMERR("<num_data_blocks> too large. Forgot to turn on CONFIG_LBDAF?");
+		handle_error();
+		err = -EINVAL;
+		goto free_metadata;
+	}
+
+	data_sectors = tmpll;
+
+	if (sscanf(verity_table_args[3], "%u%c", &data_block_size, &dummy)
+								!= 1) {
+		DMERR("Verity table not in the expected format");
+		handle_error();
+		err = -EINVAL;
+		goto free_metadata;
+	}
+
+	if (test_mult_overflow(data_sectors, data_block_size >>
+							SECTOR_SHIFT)) {
+		DMERR("data_sectors too large");
+		handle_error();
+		err = -EOVERFLOW;
+		goto free_metadata;
+	}
+
+	data_sectors *= data_block_size >> SECTOR_SHIFT;
+	DMINFO("Data sectors %llu", (unsigned long long)data_sectors);
+
+	/* update target length */
+	ti->len = data_sectors;
+
+	/* Setup linear target and free */
+	if (!verity_enabled) {
+		err = add_as_linear_device(ti, target_device);
+		goto free_metadata;
+	}
+
+	/*substitute data_dev and hash_dev*/
+	verity_table_args[1] = target_device;
+	verity_table_args[2] = target_device;
+
+	mode = verity_mode();
+
+	if (ecc.valid && IS_BUILTIN(CONFIG_DM_VERITY_FEC)) {
+		if (mode) {
+			err = snprintf(buf, FEC_ARG_LENGTH,
+				"%u %s " VERITY_TABLE_OPT_FEC_FORMAT,
+				1 + VERITY_TABLE_OPT_FEC_ARGS,
+				mode == DM_VERITY_MODE_RESTART ?
+					VERITY_TABLE_OPT_RESTART :
+					VERITY_TABLE_OPT_LOGGING,
+				target_device,
+				ecc.start / FEC_BLOCK_SIZE, ecc.blocks,
+				ecc.roots);
+		} else {
+			err = snprintf(buf, FEC_ARG_LENGTH,
+				"%u " VERITY_TABLE_OPT_FEC_FORMAT,
+				VERITY_TABLE_OPT_FEC_ARGS, target_device,
+				ecc.start / FEC_BLOCK_SIZE, ecc.blocks,
+				ecc.roots);
+		}
+	} else if (mode) {
+		err = snprintf(buf, FEC_ARG_LENGTH,
+			"2 " VERITY_TABLE_OPT_IGNZERO " %s",
+			mode == DM_VERITY_MODE_RESTART ?
+			VERITY_TABLE_OPT_RESTART : VERITY_TABLE_OPT_LOGGING);
+	} else {
+		err = snprintf(buf, FEC_ARG_LENGTH, "1 %s",
+				 "ignore_zero_blocks");
+	}
+
+	if (err < 0 || err >= FEC_ARG_LENGTH)
+		goto free_metadata;
+
+	buf_ptr = buf;
+
+	for (i = VERITY_TABLE_ARGS; i < (VERITY_TABLE_ARGS +
+		VERITY_TABLE_OPT_FEC_ARGS + 2); i++) {
+		verity_table_args[i] = strsep(&buf_ptr, " ");
+		if (verity_table_args[i] == NULL) {
+			no_of_args = i;
+			break;
+		}
+	}
+
+	err = verity_ctr(ti, no_of_args, verity_table_args);
+
+	if (err)
+		DMERR("android-verity failed to mount as verity target");
+	else {
+		target_added = true;
+		DMINFO("android-verity mounted as verity target");
+	}
+
+free_metadata:
+	if (metadata) {
+		kfree(metadata->header);
+		kfree(metadata->verity_table);
+	}
+	kfree(metadata);
+	return err;
+}
+
+static int __init dm_android_verity_init(void)
+{
+	int r;
+	struct dentry *file;
+
+	r = dm_register_target(&android_verity_target);
+	if (r < 0)
+		DMERR("register failed %d", r);
+
+	/* Tracks the status of the last added target */
+	debug_dir = debugfs_create_dir("android_verity", NULL);
+
+	if (IS_ERR_OR_NULL(debug_dir)) {
+		DMERR("Cannot create android_verity debugfs directory: %ld",
+			PTR_ERR(debug_dir));
+		goto end;
+	}
+
+	file = debugfs_create_bool("target_added", S_IRUGO, debug_dir,
+				&target_added);
+
+	if (IS_ERR_OR_NULL(file)) {
+		DMERR("Cannot create android_verity debugfs directory: %ld",
+			PTR_ERR(debug_dir));
+		debugfs_remove_recursive(debug_dir);
+		goto end;
+	}
+
+	file = debugfs_create_bool("verity_enabled", S_IRUGO, debug_dir,
+				&verity_enabled);
+
+	if (IS_ERR_OR_NULL(file)) {
+		DMERR("Cannot create android_verity debugfs directory: %ld",
+			PTR_ERR(debug_dir));
+		debugfs_remove_recursive(debug_dir);
+	}
+
+end:
+	return r;
+}
+
+static void __exit dm_android_verity_exit(void)
+{
+	if (!IS_ERR_OR_NULL(debug_dir))
+		debugfs_remove_recursive(debug_dir);
+
+	dm_unregister_target(&android_verity_target);
+}
+
+module_init(dm_android_verity_init);
+module_exit(dm_android_verity_exit);
diff --git a/drivers/md/dm-android-verity.h b/drivers/md/dm-android-verity.h
new file mode 100644
index 0000000..c8d7ab64
--- /dev/null
+++ b/drivers/md/dm-android-verity.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2015 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef DM_ANDROID_VERITY_H
+#define DM_ANDROID_VERITY_H
+
+#include <crypto/sha.h>
+
+#define RSANUMBYTES 256
+#define VERITY_METADATA_MAGIC_NUMBER 0xb001b001
+#define VERITY_METADATA_MAGIC_DISABLE 0x46464f56
+#define VERITY_METADATA_VERSION 0
+#define VERITY_STATE_DISABLE 1
+#define DATA_BLOCK_SIZE (4 * 1024)
+#define VERITY_METADATA_SIZE (8 * DATA_BLOCK_SIZE)
+#define VERITY_TABLE_ARGS 10
+#define VERITY_COMMANDLINE_PARAM_LENGTH 20
+#define BUILD_VARIANT 20
+
+/*
+ * <subject>:<sha1-id> is the format for the identifier.
+ * subject can either be the Common Name(CN) + Organization Name(O) or
+ * just the CN if the it is prefixed with O
+ * From https://tools.ietf.org/html/rfc5280#appendix-A
+ * ub-organization-name-length INTEGER ::= 64
+ * ub-common-name-length INTEGER ::= 64
+ *
+ * http://lxr.free-electrons.com/source/crypto/asymmetric_keys/x509_cert_parser.c?v=3.9#L278
+ * ctx->o_size + 2 + ctx->cn_size + 1
+ * + 41 characters for ":" and sha1 id
+ * 64 + 2 + 64 + 1 + 1 + 40 (172)
+ * setting VERITY_DEFAULT_KEY_ID_LENGTH to 200 characters.
+ */
+#define VERITY_DEFAULT_KEY_ID_LENGTH 200
+
+#define FEC_MAGIC 0xFECFECFE
+#define FEC_BLOCK_SIZE (4 * 1024)
+#define FEC_VERSION 0
+#define FEC_RSM 255
+#define FEC_ARG_LENGTH 300
+
+#define VERITY_TABLE_OPT_RESTART "restart_on_corruption"
+#define VERITY_TABLE_OPT_LOGGING "ignore_corruption"
+#define VERITY_TABLE_OPT_IGNZERO "ignore_zero_blocks"
+
+#define VERITY_TABLE_OPT_FEC_FORMAT \
+	"use_fec_from_device %s fec_start %llu fec_blocks %llu fec_roots %u ignore_zero_blocks"
+#define VERITY_TABLE_OPT_FEC_ARGS 9
+
+#define VERITY_DEBUG 0
+
+#define DM_MSG_PREFIX                   "android-verity"
+
+#define DM_LINEAR_ARGS 2
+#define DM_LINEAR_TARGET_OFFSET "0"
+
+/*
+ * There can be two formats.
+ * if fec is present
+ * <data_blocks> <verity_tree> <verity_metdata_32K><fec_data><fec_data_4K>
+ * if fec is not present
+ * <data_blocks> <verity_tree> <verity_metdata_32K>
+ */
+struct fec_header {
+	__le32 magic;
+	__le32 version;
+	__le32 size;
+	__le32 roots;
+	__le32 fec_size;
+	__le64 inp_size;
+	u8 hash[SHA256_DIGEST_SIZE];
+} __attribute__((packed));
+
+struct android_metadata_header {
+	__le32 magic_number;
+	__le32 protocol_version;
+	char signature[RSANUMBYTES];
+	__le32 table_length;
+};
+
+struct android_metadata {
+	struct android_metadata_header *header;
+	char *verity_table;
+};
+
+struct fec_ecc_metadata {
+	bool valid;
+	u32 roots;
+	u64 blocks;
+	u64 rounds;
+	u64 start;
+};
+
+struct bio_read {
+	struct page **page_io;
+	int number_of_pages;
+};
+
+extern struct target_type linear_target;
+
+extern void dm_linear_dtr(struct dm_target *ti);
+extern int dm_linear_map(struct dm_target *ti, struct bio *bio);
+extern void dm_linear_status(struct dm_target *ti, status_type_t type,
+			unsigned status_flags, char *result, unsigned maxlen);
+extern int dm_linear_prepare_ioctl(struct dm_target *ti,
+                struct block_device **bdev, fmode_t *mode);
+extern int dm_linear_iterate_devices(struct dm_target *ti,
+			iterate_devices_callout_fn fn, void *data);
+extern int dm_linear_ctr(struct dm_target *ti, unsigned int argc, char **argv);
+extern long dm_linear_direct_access(struct dm_target *ti, sector_t sector,
+                                 void **kaddr, pfn_t *pfn, long size);
+#endif /* DM_ANDROID_VERITY_H */
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 0aedd0e..7a5b75f 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1866,16 +1866,24 @@
 	}
 
 	ret = -ENOMEM;
-	cc->io_queue = alloc_workqueue("kcryptd_io", WQ_MEM_RECLAIM, 1);
+	cc->io_queue = alloc_workqueue("kcryptd_io",
+				       WQ_HIGHPRI |
+				       WQ_MEM_RECLAIM,
+				       1);
 	if (!cc->io_queue) {
 		ti->error = "Couldn't create kcryptd io queue";
 		goto bad;
 	}
 
 	if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
-		cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1);
+		cc->crypt_queue = alloc_workqueue("kcryptd",
+						  WQ_HIGHPRI |
+						  WQ_MEM_RECLAIM, 1);
 	else
-		cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
+		cc->crypt_queue = alloc_workqueue("kcryptd",
+						  WQ_HIGHPRI |
+						  WQ_MEM_RECLAIM |
+						  WQ_UNBOUND,
 						  num_online_cpus());
 	if (!cc->crypt_queue) {
 		ti->error = "Couldn't create kcryptd queue";
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 6964b252..446d76e 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1921,6 +1921,45 @@
 	dm_hash_exit();
 }
 
+
+/**
+ * dm_ioctl_export - Permanently export a mapped device via the ioctl interface
+ * @md: Pointer to mapped_device
+ * @name: Buffer (size DM_NAME_LEN) for name
+ * @uuid: Buffer (size DM_UUID_LEN) for uuid or NULL if not desired
+ */
+int dm_ioctl_export(struct mapped_device *md, const char *name,
+		    const char *uuid)
+{
+	int r = 0;
+	struct hash_cell *hc;
+
+	if (!md) {
+		r = -ENXIO;
+		goto out;
+	}
+
+	/* The name and uuid can only be set once. */
+	mutex_lock(&dm_hash_cells_mutex);
+	hc = dm_get_mdptr(md);
+	mutex_unlock(&dm_hash_cells_mutex);
+	if (hc) {
+		DMERR("%s: already exported", dm_device_name(md));
+		r = -ENXIO;
+		goto out;
+	}
+
+	r = dm_hash_insert(name, uuid, md);
+	if (r) {
+		DMERR("%s: could not bind to '%s'", dm_device_name(md), name);
+		goto out;
+	}
+
+	/* Let udev know we've changed. */
+	dm_kobject_uevent(md, KOBJ_CHANGE, dm_get_event_nr(md));
+out:
+	return r;
+}
 /**
  * dm_copy_name_and_uuid - Copy mapped device name & uuid into supplied buffers
  * @md: Pointer to mapped_device
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 4788b0b..4ad62d6 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -25,7 +25,7 @@
 /*
  * Construct a linear mapping: <dev_path> <offset>
  */
-static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+int dm_linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 {
 	struct linear_c *lc;
 	unsigned long long tmp;
@@ -66,14 +66,16 @@
 	kfree(lc);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(dm_linear_ctr);
 
-static void linear_dtr(struct dm_target *ti)
+void dm_linear_dtr(struct dm_target *ti)
 {
 	struct linear_c *lc = (struct linear_c *) ti->private;
 
 	dm_put_device(ti, lc->dev);
 	kfree(lc);
 }
+EXPORT_SYMBOL_GPL(dm_linear_dtr);
 
 static sector_t linear_map_sector(struct dm_target *ti, sector_t bi_sector)
 {
@@ -92,14 +94,15 @@
 			linear_map_sector(ti, bio->bi_iter.bi_sector);
 }
 
-static int linear_map(struct dm_target *ti, struct bio *bio)
+int dm_linear_map(struct dm_target *ti, struct bio *bio)
 {
 	linear_map_bio(ti, bio);
 
 	return DM_MAPIO_REMAPPED;
 }
+EXPORT_SYMBOL_GPL(dm_linear_map);
 
-static void linear_status(struct dm_target *ti, status_type_t type,
+void dm_linear_status(struct dm_target *ti, status_type_t type,
 			  unsigned status_flags, char *result, unsigned maxlen)
 {
 	struct linear_c *lc = (struct linear_c *) ti->private;
@@ -115,8 +118,9 @@
 		break;
 	}
 }
+EXPORT_SYMBOL_GPL(dm_linear_status);
 
-static int linear_prepare_ioctl(struct dm_target *ti,
+int dm_linear_prepare_ioctl(struct dm_target *ti,
 		struct block_device **bdev, fmode_t *mode)
 {
 	struct linear_c *lc = (struct linear_c *) ti->private;
@@ -132,16 +136,18 @@
 		return 1;
 	return 0;
 }
+EXPORT_SYMBOL_GPL(dm_linear_prepare_ioctl);
 
-static int linear_iterate_devices(struct dm_target *ti,
+int dm_linear_iterate_devices(struct dm_target *ti,
 				  iterate_devices_callout_fn fn, void *data)
 {
 	struct linear_c *lc = ti->private;
 
 	return fn(ti, lc->dev, lc->start, ti->len, data);
 }
+EXPORT_SYMBOL_GPL(dm_linear_iterate_devices);
 
-static long linear_direct_access(struct dm_target *ti, sector_t sector,
+long dm_linear_direct_access(struct dm_target *ti, sector_t sector,
 				 void **kaddr, pfn_t *pfn, long size)
 {
 	struct linear_c *lc = ti->private;
@@ -158,18 +164,19 @@
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(dm_linear_direct_access);
 
 static struct target_type linear_target = {
 	.name   = "linear",
 	.version = {1, 3, 0},
 	.module = THIS_MODULE,
-	.ctr    = linear_ctr,
-	.dtr    = linear_dtr,
-	.map    = linear_map,
-	.status = linear_status,
-	.prepare_ioctl = linear_prepare_ioctl,
-	.iterate_devices = linear_iterate_devices,
-	.direct_access = linear_direct_access,
+	.ctr    = dm_linear_ctr,
+	.dtr    = dm_linear_dtr,
+	.map    = dm_linear_map,
+	.status = dm_linear_status,
+	.prepare_ioctl = dm_linear_prepare_ioctl,
+	.iterate_devices = dm_linear_iterate_devices,
+	.direct_access = dm_linear_direct_access,
 };
 
 int __init dm_linear_init(void)
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 5ac239d..d837a28 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -11,6 +11,7 @@
 #include <linux/vmalloc.h>
 #include <linux/blkdev.h>
 #include <linux/namei.h>
+#include <linux/mount.h>
 #include <linux/ctype.h>
 #include <linux/string.h>
 #include <linux/slab.h>
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index 78f3601..3b62315 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -11,6 +11,7 @@
 
 #include "dm-verity-fec.h"
 #include <linux/math64.h>
+#include <linux/sysfs.h>
 
 #define DM_MSG_PREFIX	"verity-fec"
 
@@ -175,9 +176,11 @@
 	if (r < 0 && neras)
 		DMERR_LIMIT("%s: FEC %llu: failed to correct: %d",
 			    v->data_dev->name, (unsigned long long)rsb, r);
-	else if (r > 0)
+	else if (r > 0) {
 		DMWARN_LIMIT("%s: FEC %llu: corrected %d errors",
 			     v->data_dev->name, (unsigned long long)rsb, r);
+		atomic_add_unless(&v->fec->corrected, 1, INT_MAX);
+	}
 
 	return r;
 }
@@ -556,6 +559,7 @@
 void verity_fec_dtr(struct dm_verity *v)
 {
 	struct dm_verity_fec *f = v->fec;
+	struct kobject *kobj = &f->kobj_holder.kobj;
 
 	if (!verity_fec_is_enabled(v))
 		goto out;
@@ -572,6 +576,12 @@
 
 	if (f->dev)
 		dm_put_device(v->ti, f->dev);
+
+	if (kobj->state_initialized) {
+		kobject_put(kobj);
+		wait_for_completion(dm_get_completion_from_kobject(kobj));
+	}
+
 out:
 	kfree(f);
 	v->fec = NULL;
@@ -660,6 +670,28 @@
 	return 0;
 }
 
+static ssize_t corrected_show(struct kobject *kobj, struct kobj_attribute *attr,
+			      char *buf)
+{
+	struct dm_verity_fec *f = container_of(kobj, struct dm_verity_fec,
+					       kobj_holder.kobj);
+
+	return sprintf(buf, "%d\n", atomic_read(&f->corrected));
+}
+
+static struct kobj_attribute attr_corrected = __ATTR_RO(corrected);
+
+static struct attribute *fec_attrs[] = {
+	&attr_corrected.attr,
+	NULL
+};
+
+static struct kobj_type fec_ktype = {
+	.sysfs_ops = &kobj_sysfs_ops,
+	.default_attrs = fec_attrs,
+	.release = dm_kobject_release
+};
+
 /*
  * Allocate dm_verity_fec for v->fec. Must be called before verity_fec_ctr.
  */
@@ -683,8 +715,10 @@
  */
 int verity_fec_ctr(struct dm_verity *v)
 {
+	int r;
 	struct dm_verity_fec *f = v->fec;
 	struct dm_target *ti = v->ti;
+	struct mapped_device *md = dm_table_get_md(ti->table);
 	u64 hash_blocks;
 
 	if (!verity_fec_is_enabled(v)) {
@@ -692,6 +726,16 @@
 		return 0;
 	}
 
+	/* Create a kobject and sysfs attributes */
+	init_completion(&f->kobj_holder.completion);
+
+	r = kobject_init_and_add(&f->kobj_holder.kobj, &fec_ktype,
+				 &disk_to_dev(dm_disk(md))->kobj, "%s", "fec");
+	if (r) {
+		ti->error = "Cannot create kobject";
+		return r;
+	}
+
 	/*
 	 * FEC is computed over data blocks, possible metadata, and
 	 * hash blocks. In other words, FEC covers total of fec_blocks
diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h
index bb31ce8..4db0cae 100644
--- a/drivers/md/dm-verity-fec.h
+++ b/drivers/md/dm-verity-fec.h
@@ -12,6 +12,8 @@
 #ifndef DM_VERITY_FEC_H
 #define DM_VERITY_FEC_H
 
+#include "dm.h"
+#include "dm-core.h"
 #include "dm-verity.h"
 #include <linux/rslib.h>
 
@@ -51,6 +53,8 @@
 	mempool_t *extra_pool;	/* mempool for extra buffers */
 	mempool_t *output_pool;	/* mempool for output */
 	struct kmem_cache *cache;	/* cache for buffers */
+	atomic_t corrected;		/* corrected errors */
+	struct dm_kobject_holder kobj_holder;	/* for sysfs attributes */
 };
 
 /* per-bio data */
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 0aba34a..5d0a996 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -501,6 +501,7 @@
 		container_of(work, struct dm_verity_prefetch_work, work);
 	struct dm_verity *v = pw->v;
 	int i;
+	sector_t prefetch_size;
 
 	for (i = v->levels - 2; i >= 0; i--) {
 		sector_t hash_block_start;
@@ -523,8 +524,14 @@
 				hash_block_end = v->hash_blocks - 1;
 		}
 no_prefetch_cluster:
+		// for emmc, it is more efficient to send bigger read
+		prefetch_size = max((sector_t)CONFIG_DM_VERITY_HASH_PREFETCH_MIN_SIZE,
+			hash_block_end - hash_block_start + 1);
+		if ((hash_block_start + prefetch_size) >= (v->hash_start + v->hash_blocks)) {
+			prefetch_size = hash_block_end - hash_block_start + 1;
+		}
 		dm_bufio_prefetch(v->bufio, hash_block_start,
-				  hash_block_end - hash_block_start + 1);
+				  prefetch_size);
 	}
 
 	kfree(pw);
@@ -551,7 +558,7 @@
  * Bio map function. It allocates dm_verity_io structure and bio vector and
  * fills them. Then it issues prefetches and the I/O.
  */
-static int verity_map(struct dm_target *ti, struct bio *bio)
+int verity_map(struct dm_target *ti, struct bio *bio)
 {
 	struct dm_verity *v = ti->private;
 	struct dm_verity_io *io;
@@ -592,11 +599,12 @@
 
 	return DM_MAPIO_SUBMITTED;
 }
+EXPORT_SYMBOL_GPL(verity_map);
 
 /*
  * Status: V (valid) or C (corruption found)
  */
-static void verity_status(struct dm_target *ti, status_type_t type,
+void verity_status(struct dm_target *ti, status_type_t type,
 			  unsigned status_flags, char *result, unsigned maxlen)
 {
 	struct dm_verity *v = ti->private;
@@ -655,8 +663,9 @@
 		break;
 	}
 }
+EXPORT_SYMBOL_GPL(verity_status);
 
-static int verity_prepare_ioctl(struct dm_target *ti,
+int verity_prepare_ioctl(struct dm_target *ti,
 		struct block_device **bdev, fmode_t *mode)
 {
 	struct dm_verity *v = ti->private;
@@ -668,16 +677,18 @@
 		return 1;
 	return 0;
 }
+EXPORT_SYMBOL_GPL(verity_prepare_ioctl);
 
-static int verity_iterate_devices(struct dm_target *ti,
+int verity_iterate_devices(struct dm_target *ti,
 				  iterate_devices_callout_fn fn, void *data)
 {
 	struct dm_verity *v = ti->private;
 
 	return fn(ti, v->data_dev, v->data_start, ti->len, data);
 }
+EXPORT_SYMBOL_GPL(verity_iterate_devices);
 
-static void verity_io_hints(struct dm_target *ti, struct queue_limits *limits)
+void verity_io_hints(struct dm_target *ti, struct queue_limits *limits)
 {
 	struct dm_verity *v = ti->private;
 
@@ -689,8 +700,9 @@
 
 	blk_limits_io_min(limits, limits->logical_block_size);
 }
+EXPORT_SYMBOL_GPL(verity_io_hints);
 
-static void verity_dtr(struct dm_target *ti)
+void verity_dtr(struct dm_target *ti)
 {
 	struct dm_verity *v = ti->private;
 
@@ -719,6 +731,7 @@
 
 	kfree(v);
 }
+EXPORT_SYMBOL_GPL(verity_dtr);
 
 static int verity_alloc_zero_digest(struct dm_verity *v)
 {
@@ -817,7 +830,7 @@
  *	<digest>
  *	<salt>		Hex string or "-" if no salt.
  */
-static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
+int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
 {
 	struct dm_verity *v;
 	struct dm_arg_set as;
@@ -1053,6 +1066,7 @@
 
 	return r;
 }
+EXPORT_SYMBOL_GPL(verity_ctr);
 
 static struct target_type verity_target = {
 	.name		= "verity",
diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h
index fb419f4..75effca 100644
--- a/drivers/md/dm-verity.h
+++ b/drivers/md/dm-verity.h
@@ -126,4 +126,14 @@
 extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
 				 sector_t block, u8 *digest, bool *is_zero);
 
+extern void verity_status(struct dm_target *ti, status_type_t type,
+			unsigned status_flags, char *result, unsigned maxlen);
+extern int verity_prepare_ioctl(struct dm_target *ti,
+                struct block_device **bdev, fmode_t *mode);
+extern int verity_iterate_devices(struct dm_target *ti,
+				iterate_devices_callout_fn fn, void *data);
+extern void verity_io_hints(struct dm_target *ti, struct queue_limits *limits);
+extern void verity_dtr(struct dm_target *ti);
+extern int verity_ctr(struct dm_target *ti, unsigned argc, char **argv);
+extern int verity_map(struct dm_target *ti, struct bio *bio);
 #endif /* DM_VERITY_H */
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index f0aad08..ed25f30 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -80,8 +80,6 @@
 unsigned dm_get_md_type(struct mapped_device *md);
 struct target_type *dm_get_immutable_target_type(struct mapped_device *md);
 
-int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
-
 /*
  * To check the return value from dm_table_find_target().
  */
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 64971ba..9360e6e 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -766,6 +766,27 @@
 	  An empty message will only clear the display at driver init time. Any other
 	  printf()-formatted message is valid with newline and escape codes.
 
+config UID_SYS_STATS
+	bool "Per-UID statistics"
+	depends on PROFILING && TASK_XACCT && TASK_IO_ACCOUNTING
+	help
+	  Per UID based cpu time statistics exported to /proc/uid_cputime
+	  Per UID based io statistics exported to /proc/uid_io
+	  Per UID based procstat control in /proc/uid_procstat
+
+config UID_SYS_STATS_DEBUG
+	bool "Per-TASK statistics"
+	depends on UID_SYS_STATS
+	default n
+	help
+	  Per TASK based io statistics exported to /proc/uid_io
+
+config MEMORY_STATE_TIME
+	tristate "Memory freq/bandwidth time statistics"
+	depends on PROFILING
+	help
+	  Memory time statistics exported to /sys/kernel/memory_state_time
+
 source "drivers/misc/c2port/Kconfig"
 source "drivers/misc/eeprom/Kconfig"
 source "drivers/misc/cb710/Kconfig"
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 2bf79ba..234754db 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -54,6 +54,9 @@
 obj-$(CONFIG_CXL_BASE)		+= cxl/
 obj-$(CONFIG_PANEL)             += panel.o
 
+obj-$(CONFIG_UID_SYS_STATS)	+= uid_sys_stats.o
+obj-$(CONFIG_MEMORY_STATE_TIME)	+= memory_state_time.o
+
 lkdtm-$(CONFIG_LKDTM)		+= lkdtm_core.o
 lkdtm-$(CONFIG_LKDTM)		+= lkdtm_bugs.o
 lkdtm-$(CONFIG_LKDTM)		+= lkdtm_heap.o
diff --git a/drivers/misc/memory_state_time.c b/drivers/misc/memory_state_time.c
new file mode 100644
index 0000000..ba94dcf
--- /dev/null
+++ b/drivers/misc/memory_state_time.c
@@ -0,0 +1,462 @@
+/* drivers/misc/memory_state_time.c
+ *
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/hashtable.h>
+#include <linux/kconfig.h>
+#include <linux/kernel.h>
+#include <linux/kobject.h>
+#include <linux/memory-state-time.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <linux/time.h>
+#include <linux/timekeeping.h>
+#include <linux/workqueue.h>
+
+#define KERNEL_ATTR_RO(_name) \
+static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
+
+#define KERNEL_ATTR_RW(_name) \
+static struct kobj_attribute _name##_attr = \
+	__ATTR(_name, 0644, _name##_show, _name##_store)
+
+#define FREQ_HASH_BITS 4
+DECLARE_HASHTABLE(freq_hash_table, FREQ_HASH_BITS);
+
+static DEFINE_MUTEX(mem_lock);
+
+#define TAG "memory_state_time"
+#define BW_NODE "/soc/memory-state-time"
+#define FREQ_TBL "freq-tbl"
+#define BW_TBL "bw-buckets"
+#define NUM_SOURCES "num-sources"
+
+#define LOWEST_FREQ 2
+
+static int curr_bw;
+static int curr_freq;
+static u32 *bw_buckets;
+static u32 *freq_buckets;
+static int num_freqs;
+static int num_buckets;
+static int registered_bw_sources;
+static u64 last_update;
+static bool init_success;
+static struct workqueue_struct *memory_wq;
+static u32 num_sources = 10;
+static int *bandwidths;
+
+struct freq_entry {
+	int freq;
+	u64 *buckets; /* Bandwidth buckets. */
+	struct hlist_node hash;
+};
+
+struct queue_container {
+	struct work_struct update_state;
+	int value;
+	u64 time_now;
+	int id;
+	struct mutex *lock;
+};
+
+static int find_bucket(int bw)
+{
+	int i;
+
+	if (bw_buckets != NULL) {
+		for (i = 0; i < num_buckets; i++) {
+			if (bw_buckets[i] > bw) {
+				pr_debug("Found bucket %d for bandwidth %d\n",
+					i, bw);
+				return i;
+			}
+		}
+		return num_buckets - 1;
+	}
+	return 0;
+}
+
+static u64 get_time_diff(u64 time_now)
+{
+	u64 ms;
+
+	ms = time_now - last_update;
+	last_update = time_now;
+	return ms;
+}
+
+static ssize_t show_stat_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	int i, j;
+	int len = 0;
+	struct freq_entry *freq_entry;
+
+	for (i = 0; i < num_freqs; i++) {
+		hash_for_each_possible(freq_hash_table, freq_entry, hash,
+				freq_buckets[i]) {
+			if (freq_entry->freq == freq_buckets[i]) {
+				len += scnprintf(buf + len, PAGE_SIZE - len,
+						"%d ", freq_buckets[i]);
+				if (len >= PAGE_SIZE)
+					break;
+				for (j = 0; j < num_buckets; j++) {
+					len += scnprintf(buf + len,
+							PAGE_SIZE - len,
+							"%llu ",
+							freq_entry->buckets[j]);
+				}
+				len += scnprintf(buf + len, PAGE_SIZE - len,
+						"\n");
+			}
+		}
+	}
+	pr_debug("Current Time: %llu\n", ktime_get_boot_ns());
+	return len;
+}
+KERNEL_ATTR_RO(show_stat);
+
+static void update_table(u64 time_now)
+{
+	struct freq_entry *freq_entry;
+
+	pr_debug("Last known bw %d freq %d\n", curr_bw, curr_freq);
+	hash_for_each_possible(freq_hash_table, freq_entry, hash, curr_freq) {
+		if (curr_freq == freq_entry->freq) {
+			freq_entry->buckets[find_bucket(curr_bw)]
+					+= get_time_diff(time_now);
+			break;
+		}
+	}
+}
+
+static bool freq_exists(int freq)
+{
+	int i;
+
+	for (i = 0; i < num_freqs; i++) {
+		if (freq == freq_buckets[i])
+			return true;
+	}
+	return false;
+}
+
+static int calculate_total_bw(int bw, int index)
+{
+	int i;
+	int total_bw = 0;
+
+	pr_debug("memory_state_time New bw %d for id %d\n", bw, index);
+	bandwidths[index] = bw;
+	for (i = 0; i < registered_bw_sources; i++)
+		total_bw += bandwidths[i];
+	return total_bw;
+}
+
+static void freq_update_do_work(struct work_struct *work)
+{
+	struct queue_container *freq_state_update
+			= container_of(work, struct queue_container,
+			update_state);
+	if (freq_state_update) {
+		mutex_lock(&mem_lock);
+		update_table(freq_state_update->time_now);
+		curr_freq = freq_state_update->value;
+		mutex_unlock(&mem_lock);
+		kfree(freq_state_update);
+	}
+}
+
+static void bw_update_do_work(struct work_struct *work)
+{
+	struct queue_container *bw_state_update
+			= container_of(work, struct queue_container,
+			update_state);
+	if (bw_state_update) {
+		mutex_lock(&mem_lock);
+		update_table(bw_state_update->time_now);
+		curr_bw = calculate_total_bw(bw_state_update->value,
+				bw_state_update->id);
+		mutex_unlock(&mem_lock);
+		kfree(bw_state_update);
+	}
+}
+
+static void memory_state_freq_update(struct memory_state_update_block *ub,
+		int value)
+{
+	if (IS_ENABLED(CONFIG_MEMORY_STATE_TIME)) {
+		if (freq_exists(value) && init_success) {
+			struct queue_container *freq_container
+				= kmalloc(sizeof(struct queue_container),
+				GFP_KERNEL);
+			if (!freq_container)
+				return;
+			INIT_WORK(&freq_container->update_state,
+					freq_update_do_work);
+			freq_container->time_now = ktime_get_boot_ns();
+			freq_container->value = value;
+			pr_debug("Scheduling freq update in work queue\n");
+			queue_work(memory_wq, &freq_container->update_state);
+		} else {
+			pr_debug("Freq does not exist.\n");
+		}
+	}
+}
+
+static void memory_state_bw_update(struct memory_state_update_block *ub,
+		int value)
+{
+	if (IS_ENABLED(CONFIG_MEMORY_STATE_TIME)) {
+		if (init_success) {
+			struct queue_container *bw_container
+				= kmalloc(sizeof(struct queue_container),
+				GFP_KERNEL);
+			if (!bw_container)
+				return;
+			INIT_WORK(&bw_container->update_state,
+					bw_update_do_work);
+			bw_container->time_now = ktime_get_boot_ns();
+			bw_container->value = value;
+			bw_container->id = ub->id;
+			pr_debug("Scheduling bandwidth update in work queue\n");
+			queue_work(memory_wq, &bw_container->update_state);
+		}
+	}
+}
+
+struct memory_state_update_block *memory_state_register_frequency_source(void)
+{
+	struct memory_state_update_block *block;
+
+	if (IS_ENABLED(CONFIG_MEMORY_STATE_TIME)) {
+		pr_debug("Allocating frequency source\n");
+		block = kmalloc(sizeof(struct memory_state_update_block),
+					GFP_KERNEL);
+		if (!block)
+			return NULL;
+		block->update_call = memory_state_freq_update;
+		return block;
+	}
+	pr_err("Config option disabled.\n");
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(memory_state_register_frequency_source);
+
+struct memory_state_update_block *memory_state_register_bandwidth_source(void)
+{
+	struct memory_state_update_block *block;
+
+	if (IS_ENABLED(CONFIG_MEMORY_STATE_TIME)) {
+		pr_debug("Allocating bandwidth source %d\n",
+				registered_bw_sources);
+		block = kmalloc(sizeof(struct memory_state_update_block),
+					GFP_KERNEL);
+		if (!block)
+			return NULL;
+		block->update_call = memory_state_bw_update;
+		if (registered_bw_sources < num_sources) {
+			block->id = registered_bw_sources++;
+		} else {
+			pr_err("Unable to allocate source; max number reached\n");
+			kfree(block);
+			return NULL;
+		}
+		return block;
+	}
+	pr_err("Config option disabled.\n");
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(memory_state_register_bandwidth_source);
+
+/* Buckets are designated by their maximum.
+ * Returns the buckets decided by the capability of the device.
+ */
+static int get_bw_buckets(struct device *dev)
+{
+	int ret, lenb;
+	struct device_node *node = dev->of_node;
+
+	of_property_read_u32(node, NUM_SOURCES, &num_sources);
+	if (!of_find_property(node, BW_TBL, &lenb)) {
+		pr_err("Missing %s property\n", BW_TBL);
+		return -ENODATA;
+	}
+
+	bandwidths = devm_kzalloc(dev,
+			sizeof(*bandwidths) * num_sources, GFP_KERNEL);
+	if (!bandwidths)
+		return -ENOMEM;
+	lenb /= sizeof(*bw_buckets);
+	bw_buckets = devm_kzalloc(dev, lenb * sizeof(*bw_buckets),
+			GFP_KERNEL);
+	if (!bw_buckets) {
+		devm_kfree(dev, bandwidths);
+		return -ENOMEM;
+	}
+	ret = of_property_read_u32_array(node, BW_TBL, bw_buckets,
+			lenb);
+	if (ret < 0) {
+		devm_kfree(dev, bandwidths);
+		devm_kfree(dev, bw_buckets);
+		pr_err("Unable to read bandwidth table from device tree.\n");
+		return ret;
+	}
+
+	curr_bw = 0;
+	num_buckets = lenb;
+	return 0;
+}
+
+/* Adds struct freq_entry nodes to the hashtable for each compatible frequency.
+ * Returns the supported number of frequencies.
+ */
+static int freq_buckets_init(struct device *dev)
+{
+	struct freq_entry *freq_entry;
+	int i;
+	int ret, lenf;
+	struct device_node *node = dev->of_node;
+
+	if (!of_find_property(node, FREQ_TBL, &lenf)) {
+		pr_err("Missing %s property\n", FREQ_TBL);
+		return -ENODATA;
+	}
+
+	lenf /= sizeof(*freq_buckets);
+	freq_buckets = devm_kzalloc(dev, lenf * sizeof(*freq_buckets),
+			GFP_KERNEL);
+	if (!freq_buckets)
+		return -ENOMEM;
+	pr_debug("freqs found len %d\n", lenf);
+	ret = of_property_read_u32_array(node, FREQ_TBL, freq_buckets,
+			lenf);
+	if (ret < 0) {
+		devm_kfree(dev, freq_buckets);
+		pr_err("Unable to read frequency table from device tree.\n");
+		return ret;
+	}
+	pr_debug("ret freq %d\n", ret);
+
+	num_freqs = lenf;
+	curr_freq = freq_buckets[LOWEST_FREQ];
+
+	for (i = 0; i < num_freqs; i++) {
+		freq_entry = devm_kzalloc(dev, sizeof(struct freq_entry),
+				GFP_KERNEL);
+		if (!freq_entry)
+			return -ENOMEM;
+		freq_entry->buckets = devm_kzalloc(dev, sizeof(u64)*num_buckets,
+				GFP_KERNEL);
+		if (!freq_entry->buckets) {
+			devm_kfree(dev, freq_entry);
+			return -ENOMEM;
+		}
+		pr_debug("memory_state_time Adding freq to ht %d\n",
+				freq_buckets[i]);
+		freq_entry->freq = freq_buckets[i];
+		hash_add(freq_hash_table, &freq_entry->hash, freq_buckets[i]);
+	}
+	return 0;
+}
+
+struct kobject *memory_kobj;
+EXPORT_SYMBOL_GPL(memory_kobj);
+
+static struct attribute *memory_attrs[] = {
+	&show_stat_attr.attr,
+	NULL
+};
+
+static struct attribute_group memory_attr_group = {
+	.attrs = memory_attrs,
+};
+
+static int memory_state_time_probe(struct platform_device *pdev)
+{
+	int error;
+
+	error = get_bw_buckets(&pdev->dev);
+	if (error)
+		return error;
+	error = freq_buckets_init(&pdev->dev);
+	if (error)
+		return error;
+	last_update = ktime_get_boot_ns();
+	init_success = true;
+
+	pr_debug("memory_state_time initialized with num_freqs %d\n",
+			num_freqs);
+	return 0;
+}
+
+static const struct of_device_id match_table[] = {
+	{ .compatible = "memory-state-time" },
+	{}
+};
+
+static struct platform_driver memory_state_time_driver = {
+	.probe = memory_state_time_probe,
+	.driver = {
+		.name = "memory-state-time",
+		.of_match_table = match_table,
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init memory_state_time_init(void)
+{
+	int error;
+
+	hash_init(freq_hash_table);
+	memory_wq = create_singlethread_workqueue("memory_wq");
+	if (!memory_wq) {
+		pr_err("Unable to create workqueue.\n");
+		return -EINVAL;
+	}
+	/*
+	 * Create sys/kernel directory for memory_state_time.
+	 */
+	memory_kobj = kobject_create_and_add(TAG, kernel_kobj);
+	if (!memory_kobj) {
+		pr_err("Unable to allocate memory_kobj for sysfs directory.\n");
+		error = -ENOMEM;
+		goto wq;
+	}
+	error = sysfs_create_group(memory_kobj, &memory_attr_group);
+	if (error) {
+		pr_err("Unable to create sysfs folder.\n");
+		goto kobj;
+	}
+
+	error = platform_driver_register(&memory_state_time_driver);
+	if (error) {
+		pr_err("Unable to register memory_state_time platform driver.\n");
+		goto group;
+	}
+	return 0;
+
+group:	sysfs_remove_group(memory_kobj, &memory_attr_group);
+kobj:	kobject_put(memory_kobj);
+wq:	destroy_workqueue(memory_wq);
+	return error;
+}
+module_init(memory_state_time_init);
diff --git a/drivers/misc/uid_sys_stats.c b/drivers/misc/uid_sys_stats.c
new file mode 100644
index 0000000..456406b
--- /dev/null
+++ b/drivers/misc/uid_sys_stats.c
@@ -0,0 +1,701 @@
+/* drivers/misc/uid_cputime.c
+ *
+ * Copyright (C) 2014 - 2015 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/atomic.h>
+#include <linux/err.h>
+#include <linux/hashtable.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/profile.h>
+#include <linux/rtmutex.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+
+#define UID_HASH_BITS	10
+DECLARE_HASHTABLE(hash_table, UID_HASH_BITS);
+
+static DEFINE_RT_MUTEX(uid_lock);
+static struct proc_dir_entry *cpu_parent;
+static struct proc_dir_entry *io_parent;
+static struct proc_dir_entry *proc_parent;
+
+struct io_stats {
+	u64 read_bytes;
+	u64 write_bytes;
+	u64 rchar;
+	u64 wchar;
+	u64 fsync;
+};
+
+#define UID_STATE_FOREGROUND	0
+#define UID_STATE_BACKGROUND	1
+#define UID_STATE_BUCKET_SIZE	2
+
+#define UID_STATE_TOTAL_CURR	2
+#define UID_STATE_TOTAL_LAST	3
+#define UID_STATE_DEAD_TASKS	4
+#define UID_STATE_SIZE		5
+
+#define MAX_TASK_COMM_LEN 256
+
+struct task_entry {
+	char comm[MAX_TASK_COMM_LEN];
+	pid_t pid;
+	struct io_stats io[UID_STATE_SIZE];
+	struct hlist_node hash;
+};
+
+struct uid_entry {
+	uid_t uid;
+	cputime_t utime;
+	cputime_t stime;
+	cputime_t active_utime;
+	cputime_t active_stime;
+	int state;
+	struct io_stats io[UID_STATE_SIZE];
+	struct hlist_node hash;
+#ifdef CONFIG_UID_SYS_STATS_DEBUG
+	DECLARE_HASHTABLE(task_entries, UID_HASH_BITS);
+#endif
+};
+
+static u64 compute_write_bytes(struct task_struct *task)
+{
+	if (task->ioac.write_bytes <= task->ioac.cancelled_write_bytes)
+		return 0;
+
+	return task->ioac.write_bytes - task->ioac.cancelled_write_bytes;
+}
+
+static void compute_io_bucket_stats(struct io_stats *io_bucket,
+					struct io_stats *io_curr,
+					struct io_stats *io_last,
+					struct io_stats *io_dead)
+{
+	/* tasks could switch to another uid group, but its io_last in the
+	 * previous uid group could still be positive.
+	 * therefore before each update, do an overflow check first
+	 */
+	int64_t delta;
+
+	delta = io_curr->read_bytes + io_dead->read_bytes -
+		io_last->read_bytes;
+	io_bucket->read_bytes += delta > 0 ? delta : 0;
+	delta = io_curr->write_bytes + io_dead->write_bytes -
+		io_last->write_bytes;
+	io_bucket->write_bytes += delta > 0 ? delta : 0;
+	delta = io_curr->rchar + io_dead->rchar - io_last->rchar;
+	io_bucket->rchar += delta > 0 ? delta : 0;
+	delta = io_curr->wchar + io_dead->wchar - io_last->wchar;
+	io_bucket->wchar += delta > 0 ? delta : 0;
+	delta = io_curr->fsync + io_dead->fsync - io_last->fsync;
+	io_bucket->fsync += delta > 0 ? delta : 0;
+
+	io_last->read_bytes = io_curr->read_bytes;
+	io_last->write_bytes = io_curr->write_bytes;
+	io_last->rchar = io_curr->rchar;
+	io_last->wchar = io_curr->wchar;
+	io_last->fsync = io_curr->fsync;
+
+	memset(io_dead, 0, sizeof(struct io_stats));
+}
+
+#ifdef CONFIG_UID_SYS_STATS_DEBUG
+static void get_full_task_comm(struct task_entry *task_entry,
+		struct task_struct *task)
+{
+	int i = 0, offset = 0, len = 0;
+	/* save one byte for terminating null character */
+	int unused_len = MAX_TASK_COMM_LEN - TASK_COMM_LEN - 1;
+	char buf[unused_len];
+	struct mm_struct *mm = task->mm;
+
+	/* fill the first TASK_COMM_LEN bytes with thread name */
+	__get_task_comm(task_entry->comm, TASK_COMM_LEN, task);
+	i = strlen(task_entry->comm);
+	while (i < TASK_COMM_LEN)
+		task_entry->comm[i++] = ' ';
+
+	/* next the executable file name */
+	if (mm) {
+		down_read(&mm->mmap_sem);
+		if (mm->exe_file) {
+			char *pathname = d_path(&mm->exe_file->f_path, buf,
+					unused_len);
+
+			if (!IS_ERR(pathname)) {
+				len = strlcpy(task_entry->comm + i, pathname,
+						unused_len);
+				i += len;
+				task_entry->comm[i++] = ' ';
+				unused_len--;
+			}
+		}
+		up_read(&mm->mmap_sem);
+	}
+	unused_len -= len;
+
+	/* fill the rest with command line argument
+	 * replace each null or new line character
+	 * between args in argv with whitespace */
+	len = get_cmdline(task, buf, unused_len);
+	while (offset < len) {
+		if (buf[offset] != '\0' && buf[offset] != '\n')
+			task_entry->comm[i++] = buf[offset];
+		else
+			task_entry->comm[i++] = ' ';
+		offset++;
+	}
+
+	/* get rid of trailing whitespaces in case when arg is memset to
+	 * zero before being reset in userspace
+	 */
+	while (task_entry->comm[i-1] == ' ')
+		i--;
+	task_entry->comm[i] = '\0';
+}
+
+static struct task_entry *find_task_entry(struct uid_entry *uid_entry,
+		struct task_struct *task)
+{
+	struct task_entry *task_entry;
+
+	hash_for_each_possible(uid_entry->task_entries, task_entry, hash,
+			task->pid) {
+		if (task->pid == task_entry->pid) {
+			/* if thread name changed, update the entire command */
+			int len = strnchr(task_entry->comm, ' ', TASK_COMM_LEN)
+				- task_entry->comm;
+
+			if (strncmp(task_entry->comm, task->comm, len))
+				get_full_task_comm(task_entry, task);
+			return task_entry;
+		}
+	}
+	return NULL;
+}
+
+static struct task_entry *find_or_register_task(struct uid_entry *uid_entry,
+		struct task_struct *task)
+{
+	struct task_entry *task_entry;
+	pid_t pid = task->pid;
+
+	task_entry = find_task_entry(uid_entry, task);
+	if (task_entry)
+		return task_entry;
+
+	task_entry = kzalloc(sizeof(struct task_entry), GFP_ATOMIC);
+	if (!task_entry)
+		return NULL;
+
+	get_full_task_comm(task_entry, task);
+
+	task_entry->pid = pid;
+	hash_add(uid_entry->task_entries, &task_entry->hash, (unsigned int)pid);
+
+	return task_entry;
+}
+
+static void remove_uid_tasks(struct uid_entry *uid_entry)
+{
+	struct task_entry *task_entry;
+	unsigned long bkt_task;
+	struct hlist_node *tmp_task;
+
+	hash_for_each_safe(uid_entry->task_entries, bkt_task,
+			tmp_task, task_entry, hash) {
+		hash_del(&task_entry->hash);
+		kfree(task_entry);
+	}
+}
+
+static void set_io_uid_tasks_zero(struct uid_entry *uid_entry)
+{
+	struct task_entry *task_entry;
+	unsigned long bkt_task;
+
+	hash_for_each(uid_entry->task_entries, bkt_task, task_entry, hash) {
+		memset(&task_entry->io[UID_STATE_TOTAL_CURR], 0,
+			sizeof(struct io_stats));
+	}
+}
+
+static void add_uid_tasks_io_stats(struct uid_entry *uid_entry,
+		struct task_struct *task, int slot)
+{
+	struct task_entry *task_entry = find_or_register_task(uid_entry, task);
+	struct io_stats *task_io_slot = &task_entry->io[slot];
+
+	task_io_slot->read_bytes += task->ioac.read_bytes;
+	task_io_slot->write_bytes += compute_write_bytes(task);
+	task_io_slot->rchar += task->ioac.rchar;
+	task_io_slot->wchar += task->ioac.wchar;
+	task_io_slot->fsync += task->ioac.syscfs;
+}
+
+static void compute_io_uid_tasks(struct uid_entry *uid_entry)
+{
+	struct task_entry *task_entry;
+	unsigned long bkt_task;
+
+	hash_for_each(uid_entry->task_entries, bkt_task, task_entry, hash) {
+		compute_io_bucket_stats(&task_entry->io[uid_entry->state],
+					&task_entry->io[UID_STATE_TOTAL_CURR],
+					&task_entry->io[UID_STATE_TOTAL_LAST],
+					&task_entry->io[UID_STATE_DEAD_TASKS]);
+	}
+}
+
+static void show_io_uid_tasks(struct seq_file *m, struct uid_entry *uid_entry)
+{
+	struct task_entry *task_entry;
+	unsigned long bkt_task;
+
+	hash_for_each(uid_entry->task_entries, bkt_task, task_entry, hash) {
+		/* Separated by comma because space exists in task comm */
+		seq_printf(m, "task,%s,%lu,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu\n",
+				task_entry->comm,
+				(unsigned long)task_entry->pid,
+				task_entry->io[UID_STATE_FOREGROUND].rchar,
+				task_entry->io[UID_STATE_FOREGROUND].wchar,
+				task_entry->io[UID_STATE_FOREGROUND].read_bytes,
+				task_entry->io[UID_STATE_FOREGROUND].write_bytes,
+				task_entry->io[UID_STATE_BACKGROUND].rchar,
+				task_entry->io[UID_STATE_BACKGROUND].wchar,
+				task_entry->io[UID_STATE_BACKGROUND].read_bytes,
+				task_entry->io[UID_STATE_BACKGROUND].write_bytes,
+				task_entry->io[UID_STATE_FOREGROUND].fsync,
+				task_entry->io[UID_STATE_BACKGROUND].fsync);
+	}
+}
+#else
+static void remove_uid_tasks(struct uid_entry *uid_entry) {};
+static void set_io_uid_tasks_zero(struct uid_entry *uid_entry) {};
+static void add_uid_tasks_io_stats(struct uid_entry *uid_entry,
+		struct task_struct *task, int slot) {};
+static void compute_io_uid_tasks(struct uid_entry *uid_entry) {};
+static void show_io_uid_tasks(struct seq_file *m,
+		struct uid_entry *uid_entry) {}
+#endif
+
+static struct uid_entry *find_uid_entry(uid_t uid)
+{
+	struct uid_entry *uid_entry;
+	hash_for_each_possible(hash_table, uid_entry, hash, uid) {
+		if (uid_entry->uid == uid)
+			return uid_entry;
+	}
+	return NULL;
+}
+
+static struct uid_entry *find_or_register_uid(uid_t uid)
+{
+	struct uid_entry *uid_entry;
+
+	uid_entry = find_uid_entry(uid);
+	if (uid_entry)
+		return uid_entry;
+
+	uid_entry = kzalloc(sizeof(struct uid_entry), GFP_ATOMIC);
+	if (!uid_entry)
+		return NULL;
+
+	uid_entry->uid = uid;
+#ifdef CONFIG_UID_SYS_STATS_DEBUG
+	hash_init(uid_entry->task_entries);
+#endif
+	hash_add(hash_table, &uid_entry->hash, uid);
+
+	return uid_entry;
+}
+
+static int uid_cputime_show(struct seq_file *m, void *v)
+{
+	struct uid_entry *uid_entry = NULL;
+	struct task_struct *task, *temp;
+	struct user_namespace *user_ns = current_user_ns();
+	cputime_t utime;
+	cputime_t stime;
+	unsigned long bkt;
+	uid_t uid;
+
+	rt_mutex_lock(&uid_lock);
+
+	hash_for_each(hash_table, bkt, uid_entry, hash) {
+		uid_entry->active_stime = 0;
+		uid_entry->active_utime = 0;
+	}
+
+	read_lock(&tasklist_lock);
+	do_each_thread(temp, task) {
+		uid = from_kuid_munged(user_ns, task_uid(task));
+		if (!uid_entry || uid_entry->uid != uid)
+			uid_entry = find_or_register_uid(uid);
+		if (!uid_entry) {
+			read_unlock(&tasklist_lock);
+			rt_mutex_unlock(&uid_lock);
+			pr_err("%s: failed to find the uid_entry for uid %d\n",
+				__func__, uid);
+			return -ENOMEM;
+		}
+		task_cputime_adjusted(task, &utime, &stime);
+		uid_entry->active_utime += utime;
+		uid_entry->active_stime += stime;
+	} while_each_thread(temp, task);
+	read_unlock(&tasklist_lock);
+
+	hash_for_each(hash_table, bkt, uid_entry, hash) {
+		cputime_t total_utime = uid_entry->utime +
+							uid_entry->active_utime;
+		cputime_t total_stime = uid_entry->stime +
+							uid_entry->active_stime;
+		seq_printf(m, "%d: %llu %llu\n", uid_entry->uid,
+			(unsigned long long)jiffies_to_msecs(
+				cputime_to_jiffies(total_utime)) * USEC_PER_MSEC,
+			(unsigned long long)jiffies_to_msecs(
+				cputime_to_jiffies(total_stime)) * USEC_PER_MSEC);
+	}
+
+	rt_mutex_unlock(&uid_lock);
+	return 0;
+}
+
+static int uid_cputime_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, uid_cputime_show, PDE_DATA(inode));
+}
+
+static const struct file_operations uid_cputime_fops = {
+	.open		= uid_cputime_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int uid_remove_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, NULL, NULL);
+}
+
+static ssize_t uid_remove_write(struct file *file,
+			const char __user *buffer, size_t count, loff_t *ppos)
+{
+	struct uid_entry *uid_entry;
+	struct hlist_node *tmp;
+	char uids[128];
+	char *start_uid, *end_uid = NULL;
+	long int uid_start = 0, uid_end = 0;
+
+	if (count >= sizeof(uids))
+		count = sizeof(uids) - 1;
+
+	if (copy_from_user(uids, buffer, count))
+		return -EFAULT;
+
+	uids[count] = '\0';
+	end_uid = uids;
+	start_uid = strsep(&end_uid, "-");
+
+	if (!start_uid || !end_uid)
+		return -EINVAL;
+
+	if (kstrtol(start_uid, 10, &uid_start) != 0 ||
+		kstrtol(end_uid, 10, &uid_end) != 0) {
+		return -EINVAL;
+	}
+	rt_mutex_lock(&uid_lock);
+
+	for (; uid_start <= uid_end; uid_start++) {
+		hash_for_each_possible_safe(hash_table, uid_entry, tmp,
+							hash, (uid_t)uid_start) {
+			if (uid_start == uid_entry->uid) {
+				remove_uid_tasks(uid_entry);
+				hash_del(&uid_entry->hash);
+				kfree(uid_entry);
+			}
+		}
+	}
+
+	rt_mutex_unlock(&uid_lock);
+	return count;
+}
+
+static const struct file_operations uid_remove_fops = {
+	.open		= uid_remove_open,
+	.release	= single_release,
+	.write		= uid_remove_write,
+};
+
+
+static void add_uid_io_stats(struct uid_entry *uid_entry,
+			struct task_struct *task, int slot)
+{
+	struct io_stats *io_slot = &uid_entry->io[slot];
+
+	io_slot->read_bytes += task->ioac.read_bytes;
+	io_slot->write_bytes += compute_write_bytes(task);
+	io_slot->rchar += task->ioac.rchar;
+	io_slot->wchar += task->ioac.wchar;
+	io_slot->fsync += task->ioac.syscfs;
+
+	add_uid_tasks_io_stats(uid_entry, task, slot);
+}
+
+static void update_io_stats_all_locked(void)
+{
+	struct uid_entry *uid_entry = NULL;
+	struct task_struct *task, *temp;
+	struct user_namespace *user_ns = current_user_ns();
+	unsigned long bkt;
+	uid_t uid;
+
+	hash_for_each(hash_table, bkt, uid_entry, hash) {
+		memset(&uid_entry->io[UID_STATE_TOTAL_CURR], 0,
+			sizeof(struct io_stats));
+		set_io_uid_tasks_zero(uid_entry);
+	}
+
+	rcu_read_lock();
+	do_each_thread(temp, task) {
+		uid = from_kuid_munged(user_ns, task_uid(task));
+		if (!uid_entry || uid_entry->uid != uid)
+			uid_entry = find_or_register_uid(uid);
+		if (!uid_entry)
+			continue;
+		add_uid_io_stats(uid_entry, task, UID_STATE_TOTAL_CURR);
+	} while_each_thread(temp, task);
+	rcu_read_unlock();
+
+	hash_for_each(hash_table, bkt, uid_entry, hash) {
+		compute_io_bucket_stats(&uid_entry->io[uid_entry->state],
+					&uid_entry->io[UID_STATE_TOTAL_CURR],
+					&uid_entry->io[UID_STATE_TOTAL_LAST],
+					&uid_entry->io[UID_STATE_DEAD_TASKS]);
+		compute_io_uid_tasks(uid_entry);
+	}
+}
+
+static void update_io_stats_uid_locked(struct uid_entry *uid_entry)
+{
+	struct task_struct *task, *temp;
+	struct user_namespace *user_ns = current_user_ns();
+
+	memset(&uid_entry->io[UID_STATE_TOTAL_CURR], 0,
+		sizeof(struct io_stats));
+	set_io_uid_tasks_zero(uid_entry);
+
+	rcu_read_lock();
+	do_each_thread(temp, task) {
+		if (from_kuid_munged(user_ns, task_uid(task)) != uid_entry->uid)
+			continue;
+		add_uid_io_stats(uid_entry, task, UID_STATE_TOTAL_CURR);
+	} while_each_thread(temp, task);
+	rcu_read_unlock();
+
+	compute_io_bucket_stats(&uid_entry->io[uid_entry->state],
+				&uid_entry->io[UID_STATE_TOTAL_CURR],
+				&uid_entry->io[UID_STATE_TOTAL_LAST],
+				&uid_entry->io[UID_STATE_DEAD_TASKS]);
+	compute_io_uid_tasks(uid_entry);
+}
+
+
+static int uid_io_show(struct seq_file *m, void *v)
+{
+	struct uid_entry *uid_entry;
+	unsigned long bkt;
+
+	rt_mutex_lock(&uid_lock);
+
+	update_io_stats_all_locked();
+
+	hash_for_each(hash_table, bkt, uid_entry, hash) {
+		seq_printf(m, "%d %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
+				uid_entry->uid,
+				uid_entry->io[UID_STATE_FOREGROUND].rchar,
+				uid_entry->io[UID_STATE_FOREGROUND].wchar,
+				uid_entry->io[UID_STATE_FOREGROUND].read_bytes,
+				uid_entry->io[UID_STATE_FOREGROUND].write_bytes,
+				uid_entry->io[UID_STATE_BACKGROUND].rchar,
+				uid_entry->io[UID_STATE_BACKGROUND].wchar,
+				uid_entry->io[UID_STATE_BACKGROUND].read_bytes,
+				uid_entry->io[UID_STATE_BACKGROUND].write_bytes,
+				uid_entry->io[UID_STATE_FOREGROUND].fsync,
+				uid_entry->io[UID_STATE_BACKGROUND].fsync);
+
+		show_io_uid_tasks(m, uid_entry);
+	}
+
+	rt_mutex_unlock(&uid_lock);
+	return 0;
+}
+
+static int uid_io_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, uid_io_show, PDE_DATA(inode));
+}
+
+static const struct file_operations uid_io_fops = {
+	.open		= uid_io_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int uid_procstat_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, NULL, NULL);
+}
+
+static ssize_t uid_procstat_write(struct file *file,
+			const char __user *buffer, size_t count, loff_t *ppos)
+{
+	struct uid_entry *uid_entry;
+	uid_t uid;
+	int argc, state;
+	char input[128];
+
+	if (count >= sizeof(input))
+		return -EINVAL;
+
+	if (copy_from_user(input, buffer, count))
+		return -EFAULT;
+
+	input[count] = '\0';
+
+	argc = sscanf(input, "%u %d", &uid, &state);
+	if (argc != 2)
+		return -EINVAL;
+
+	if (state != UID_STATE_BACKGROUND && state != UID_STATE_FOREGROUND)
+		return -EINVAL;
+
+	rt_mutex_lock(&uid_lock);
+
+	uid_entry = find_or_register_uid(uid);
+	if (!uid_entry) {
+		rt_mutex_unlock(&uid_lock);
+		return -EINVAL;
+	}
+
+	if (uid_entry->state == state) {
+		rt_mutex_unlock(&uid_lock);
+		return count;
+	}
+
+	update_io_stats_uid_locked(uid_entry);
+
+	uid_entry->state = state;
+
+	rt_mutex_unlock(&uid_lock);
+
+	return count;
+}
+
+static const struct file_operations uid_procstat_fops = {
+	.open		= uid_procstat_open,
+	.release	= single_release,
+	.write		= uid_procstat_write,
+};
+
+static int process_notifier(struct notifier_block *self,
+			unsigned long cmd, void *v)
+{
+	struct task_struct *task = v;
+	struct uid_entry *uid_entry;
+	cputime_t utime, stime;
+	uid_t uid;
+
+	if (!task)
+		return NOTIFY_OK;
+
+	rt_mutex_lock(&uid_lock);
+	uid = from_kuid_munged(current_user_ns(), task_uid(task));
+	uid_entry = find_or_register_uid(uid);
+	if (!uid_entry) {
+		pr_err("%s: failed to find uid %d\n", __func__, uid);
+		goto exit;
+	}
+
+	task_cputime_adjusted(task, &utime, &stime);
+	uid_entry->utime += utime;
+	uid_entry->stime += stime;
+
+	add_uid_io_stats(uid_entry, task, UID_STATE_DEAD_TASKS);
+
+exit:
+	rt_mutex_unlock(&uid_lock);
+	return NOTIFY_OK;
+}
+
+static struct notifier_block process_notifier_block = {
+	.notifier_call	= process_notifier,
+};
+
+static int __init proc_uid_sys_stats_init(void)
+{
+	hash_init(hash_table);
+
+	cpu_parent = proc_mkdir("uid_cputime", NULL);
+	if (!cpu_parent) {
+		pr_err("%s: failed to create uid_cputime proc entry\n",
+			__func__);
+		goto err;
+	}
+
+	proc_create_data("remove_uid_range", 0222, cpu_parent,
+		&uid_remove_fops, NULL);
+	proc_create_data("show_uid_stat", 0444, cpu_parent,
+		&uid_cputime_fops, NULL);
+
+	io_parent = proc_mkdir("uid_io", NULL);
+	if (!io_parent) {
+		pr_err("%s: failed to create uid_io proc entry\n",
+			__func__);
+		goto err;
+	}
+
+	proc_create_data("stats", 0444, io_parent,
+		&uid_io_fops, NULL);
+
+	proc_parent = proc_mkdir("uid_procstat", NULL);
+	if (!proc_parent) {
+		pr_err("%s: failed to create uid_procstat proc entry\n",
+			__func__);
+		goto err;
+	}
+
+	proc_create_data("set", 0222, proc_parent,
+		&uid_procstat_fops, NULL);
+
+	profile_event_register(PROFILE_TASK_EXIT, &process_notifier_block);
+
+	return 0;
+
+err:
+	remove_proc_subtree("uid_cputime", NULL);
+	remove_proc_subtree("uid_io", NULL);
+	remove_proc_subtree("uid_procstat", NULL);
+	return -ENOMEM;
+}
+
+early_initcall(proc_uid_sys_stats_init);
diff --git a/drivers/mmc/card/Kconfig b/drivers/mmc/card/Kconfig
index 5562308..6142ec1 100644
--- a/drivers/mmc/card/Kconfig
+++ b/drivers/mmc/card/Kconfig
@@ -68,3 +68,15 @@
 
 	  This driver is only of interest to those developing or
 	  testing a host driver. Most people should say N here.
+
+config MMC_SIMULATE_MAX_SPEED
+	bool "Turn on maximum speed control per block device"
+	depends on MMC_BLOCK
+	help
+	  Say Y here to enable MMC device speed limiting. Used to test and
+	  simulate the behavior of the system when confronted with a slow MMC.
+
+	  Enables max_read_speed, max_write_speed and cache_size attributes to
+	  control the write or read maximum KB/second speed behaviors.
+
+	  If unsure, say N here.
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 709a872..817fcf8 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -287,6 +287,250 @@
 	return ret;
 }
 
+#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED
+
+static int max_read_speed, max_write_speed, cache_size = 4;
+
+module_param(max_read_speed, int, S_IRUSR | S_IRGRP);
+MODULE_PARM_DESC(max_read_speed, "maximum KB/s read speed 0=off");
+module_param(max_write_speed, int, S_IRUSR | S_IRGRP);
+MODULE_PARM_DESC(max_write_speed, "maximum KB/s write speed 0=off");
+module_param(cache_size, int, S_IRUSR | S_IRGRP);
+MODULE_PARM_DESC(cache_size, "MB high speed memory or SLC cache");
+
+/*
+ * helper macros and expectations:
+ *  size    - unsigned long number of bytes
+ *  jiffies - unsigned long HZ timestamp difference
+ *  speed   - unsigned KB/s transfer rate
+ */
+#define size_and_speed_to_jiffies(size, speed) \
+		((size) * HZ / (speed) / 1024UL)
+#define jiffies_and_speed_to_size(jiffies, speed) \
+		(((speed) * (jiffies) * 1024UL) / HZ)
+#define jiffies_and_size_to_speed(jiffies, size) \
+		((size) * HZ / (jiffies) / 1024UL)
+
+/* Limits to report warning */
+/* jiffies_and_size_to_speed(10*HZ, queue_max_hw_sectors(q) * 512UL) ~ 25 */
+#define MIN_SPEED(q) 250 /* 10 times faster than a floppy disk */
+#define MAX_SPEED(q) jiffies_and_size_to_speed(1, queue_max_sectors(q) * 512UL)
+
+#define speed_valid(speed) ((speed) > 0)
+
+static const char off[] = "off\n";
+
+static int max_speed_show(int speed, char *buf)
+{
+	if (speed)
+		return scnprintf(buf, PAGE_SIZE, "%uKB/s\n", speed);
+	else
+		return scnprintf(buf, PAGE_SIZE, off);
+}
+
+static int max_speed_store(const char *buf, struct request_queue *q)
+{
+	unsigned int limit, set = 0;
+
+	if (!strncasecmp(off, buf, sizeof(off) - 2))
+		return set;
+	if (kstrtouint(buf, 0, &set) || (set > INT_MAX))
+		return -EINVAL;
+	if (set == 0)
+		return set;
+	limit = MAX_SPEED(q);
+	if (set > limit)
+		pr_warn("max speed %u ineffective above %u\n", set, limit);
+	limit = MIN_SPEED(q);
+	if (set < limit)
+		pr_warn("max speed %u painful below %u\n", set, limit);
+	return set;
+}
+
+static ssize_t max_write_speed_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
+	int ret = max_speed_show(atomic_read(&md->queue.max_write_speed), buf);
+
+	mmc_blk_put(md);
+	return ret;
+}
+
+static ssize_t max_write_speed_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
+	int set = max_speed_store(buf, md->queue.queue);
+
+	if (set < 0) {
+		mmc_blk_put(md);
+		return set;
+	}
+
+	atomic_set(&md->queue.max_write_speed, set);
+	mmc_blk_put(md);
+	return count;
+}
+
+static const DEVICE_ATTR(max_write_speed, S_IRUGO | S_IWUSR,
+	max_write_speed_show, max_write_speed_store);
+
+static ssize_t max_read_speed_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
+	int ret = max_speed_show(atomic_read(&md->queue.max_read_speed), buf);
+
+	mmc_blk_put(md);
+	return ret;
+}
+
+static ssize_t max_read_speed_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
+	int set = max_speed_store(buf, md->queue.queue);
+
+	if (set < 0) {
+		mmc_blk_put(md);
+		return set;
+	}
+
+	atomic_set(&md->queue.max_read_speed, set);
+	mmc_blk_put(md);
+	return count;
+}
+
+static const DEVICE_ATTR(max_read_speed, S_IRUGO | S_IWUSR,
+	max_read_speed_show, max_read_speed_store);
+
+static ssize_t cache_size_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
+	struct mmc_queue *mq = &md->queue;
+	int cache_size = atomic_read(&mq->cache_size);
+	int ret;
+
+	if (!cache_size)
+		ret = scnprintf(buf, PAGE_SIZE, off);
+	else {
+		int speed = atomic_read(&mq->max_write_speed);
+
+		if (!speed_valid(speed))
+			ret = scnprintf(buf, PAGE_SIZE, "%uMB\n", cache_size);
+		else { /* We accept race between cache_jiffies and cache_used */
+			unsigned long size = jiffies_and_speed_to_size(
+				jiffies - mq->cache_jiffies, speed);
+			long used = atomic_long_read(&mq->cache_used);
+
+			if (size >= used)
+				size = 0;
+			else
+				size = (used - size) * 100 / cache_size
+					/ 1024UL / 1024UL;
+
+			ret = scnprintf(buf, PAGE_SIZE, "%uMB %lu%% used\n",
+				cache_size, size);
+		}
+	}
+
+	mmc_blk_put(md);
+	return ret;
+}
+
+static ssize_t cache_size_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct mmc_blk_data *md;
+	unsigned int set = 0;
+
+	if (strncasecmp(off, buf, sizeof(off) - 2)
+	 && (kstrtouint(buf, 0, &set) || (set > INT_MAX)))
+		return -EINVAL;
+
+	md = mmc_blk_get(dev_to_disk(dev));
+	atomic_set(&md->queue.cache_size, set);
+	mmc_blk_put(md);
+	return count;
+}
+
+static const DEVICE_ATTR(cache_size, S_IRUGO | S_IWUSR,
+	cache_size_show, cache_size_store);
+
+/* correct for write-back */
+static long mmc_blk_cache_used(struct mmc_queue *mq, unsigned long waitfor)
+{
+	long used = 0;
+	int speed = atomic_read(&mq->max_write_speed);
+
+	if (speed_valid(speed)) {
+		unsigned long size = jiffies_and_speed_to_size(
+					waitfor - mq->cache_jiffies, speed);
+		used = atomic_long_read(&mq->cache_used);
+
+		if (size >= used)
+			used = 0;
+		else
+			used -= size;
+	}
+
+	atomic_long_set(&mq->cache_used, used);
+	mq->cache_jiffies = waitfor;
+
+	return used;
+}
+
+static void mmc_blk_simulate_delay(
+	struct mmc_queue *mq,
+	struct request *req,
+	unsigned long waitfor)
+{
+	int max_speed;
+
+	if (!req)
+		return;
+
+	max_speed = (rq_data_dir(req) == READ)
+		? atomic_read(&mq->max_read_speed)
+		: atomic_read(&mq->max_write_speed);
+	if (speed_valid(max_speed)) {
+		unsigned long bytes = blk_rq_bytes(req);
+
+		if (rq_data_dir(req) != READ) {
+			int cache_size = atomic_read(&mq->cache_size);
+
+			if (cache_size) {
+				unsigned long size = cache_size * 1024L * 1024L;
+				long used = mmc_blk_cache_used(mq, waitfor);
+
+				used += bytes;
+				atomic_long_set(&mq->cache_used, used);
+				bytes = 0;
+				if (used > size)
+					bytes = used - size;
+			}
+		}
+		waitfor += size_and_speed_to_jiffies(bytes, max_speed);
+		if (time_is_after_jiffies(waitfor)) {
+			long msecs = jiffies_to_msecs(waitfor - jiffies);
+
+			if (likely(msecs > 0))
+				msleep(msecs);
+		}
+	}
+}
+
+#else
+
+#define mmc_blk_simulate_delay(mq, req, waitfor)
+
+#endif
+
 static int mmc_blk_open(struct block_device *bdev, fmode_t mode)
 {
 	struct mmc_blk_data *md = mmc_blk_get(bdev->bd_disk);
@@ -1284,6 +1528,23 @@
 	if (ret)
 		ret = -EIO;
 
+#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED
+	else if (atomic_read(&mq->cache_size)) {
+		long used = mmc_blk_cache_used(mq, jiffies);
+
+		if (used) {
+			int speed = atomic_read(&mq->max_write_speed);
+
+			if (speed_valid(speed)) {
+				unsigned long msecs = jiffies_to_msecs(
+					size_and_speed_to_jiffies(
+						used, speed));
+				if (msecs)
+					msleep(msecs);
+			}
+		}
+	}
+#endif
 	blk_end_request_all(req, ret);
 
 	return ret ? 0 : 1;
@@ -1965,6 +2226,9 @@
 	struct mmc_async_req *areq;
 	const u8 packed_nr = 2;
 	u8 reqs = 0;
+#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED
+	unsigned long waitfor = jiffies;
+#endif
 
 	if (!rqc && !mq->mqrq_prev->req)
 		return 0;
@@ -2015,6 +2279,8 @@
 			 */
 			mmc_blk_reset_success(md, type);
 
+			mmc_blk_simulate_delay(mq, rqc, waitfor);
+
 			if (mmc_packed_cmd(mq_rq->cmd_type)) {
 				ret = mmc_blk_end_packed_req(mq_rq);
 				break;
@@ -2437,6 +2703,14 @@
 					card->ext_csd.boot_ro_lockable)
 				device_remove_file(disk_to_dev(md->disk),
 					&md->power_ro_lock);
+#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED
+			device_remove_file(disk_to_dev(md->disk),
+						&dev_attr_max_write_speed);
+			device_remove_file(disk_to_dev(md->disk),
+						&dev_attr_max_read_speed);
+			device_remove_file(disk_to_dev(md->disk),
+						&dev_attr_cache_size);
+#endif
 
 			del_gendisk(md->disk);
 		}
@@ -2471,6 +2745,24 @@
 	ret = device_create_file(disk_to_dev(md->disk), &md->force_ro);
 	if (ret)
 		goto force_ro_fail;
+#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED
+	atomic_set(&md->queue.max_write_speed, max_write_speed);
+	ret = device_create_file(disk_to_dev(md->disk),
+			&dev_attr_max_write_speed);
+	if (ret)
+		goto max_write_speed_fail;
+	atomic_set(&md->queue.max_read_speed, max_read_speed);
+	ret = device_create_file(disk_to_dev(md->disk),
+			&dev_attr_max_read_speed);
+	if (ret)
+		goto max_read_speed_fail;
+	atomic_set(&md->queue.cache_size, cache_size);
+	atomic_long_set(&md->queue.cache_used, 0);
+	md->queue.cache_jiffies = jiffies;
+	ret = device_create_file(disk_to_dev(md->disk), &dev_attr_cache_size);
+	if (ret)
+		goto cache_size_fail;
+#endif
 
 	if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) &&
 	     card->ext_csd.boot_ro_lockable) {
@@ -2495,6 +2787,14 @@
 	return ret;
 
 power_ro_lock_fail:
+#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED
+	device_remove_file(disk_to_dev(md->disk), &dev_attr_cache_size);
+cache_size_fail:
+	device_remove_file(disk_to_dev(md->disk), &dev_attr_max_read_speed);
+max_read_speed_fail:
+	device_remove_file(disk_to_dev(md->disk), &dev_attr_max_write_speed);
+max_write_speed_fail:
+#endif
 	device_remove_file(disk_to_dev(md->disk), &md->force_ro);
 force_ro_fail:
 	del_gendisk(md->disk);
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index 8037f73..1810f76 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -19,6 +19,7 @@
 
 #include <linux/mmc/card.h>
 #include <linux/mmc/host.h>
+#include <linux/sched/rt.h>
 
 #include "queue.h"
 #include "block.h"
@@ -53,6 +54,11 @@
 {
 	struct mmc_queue *mq = d;
 	struct request_queue *q = mq->queue;
+	struct sched_param scheduler_params = {0};
+
+	scheduler_params.sched_priority = 1;
+
+	sched_setscheduler(current, SCHED_FIFO, &scheduler_params);
 
 	current->flags |= PF_MEMALLOC;
 
diff --git a/drivers/mmc/card/queue.h b/drivers/mmc/card/queue.h
index 342f1e3..fe58d31 100644
--- a/drivers/mmc/card/queue.h
+++ b/drivers/mmc/card/queue.h
@@ -62,6 +62,14 @@
 	struct mmc_queue_req	mqrq[2];
 	struct mmc_queue_req	*mqrq_cur;
 	struct mmc_queue_req	*mqrq_prev;
+#ifdef CONFIG_MMC_SIMULATE_MAX_SPEED
+	atomic_t max_write_speed;
+	atomic_t max_read_speed;
+	atomic_t cache_size;
+	/* i/o tracking */
+	atomic_long_t cache_used;
+	unsigned long cache_jiffies;
+#endif
 };
 
 extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *,
diff --git a/drivers/mmc/core/Kconfig b/drivers/mmc/core/Kconfig
index 250f223..daad32f 100644
--- a/drivers/mmc/core/Kconfig
+++ b/drivers/mmc/core/Kconfig
@@ -22,3 +22,18 @@
 
 	  This driver can also be built as a module. If so, the module
 	  will be called pwrseq_simple.
+
+config MMC_EMBEDDED_SDIO
+	boolean "MMC embedded SDIO device support (EXPERIMENTAL)"
+	help
+	  If you say Y here, support will be added for embedded SDIO
+	  devices which do not contain the necessary enumeration
+	  support in hardware to be properly detected.
+
+config MMC_PARANOID_SD_INIT
+	bool "Enable paranoid SD card initialization (EXPERIMENTAL)"
+	help
+	  If you say Y here, the MMC layer will be extra paranoid
+	  about re-trying SD init requests. This can be a useful
+	  work-around for buggy controllers and hardware. Enable
+	  if you are experiencing issues with SD detection.
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index cff58297..c82c203 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -201,6 +201,19 @@
 			pr_debug("%s:     %d bytes transferred: %d\n",
 				mmc_hostname(host),
 				mrq->data->bytes_xfered, mrq->data->error);
+#ifdef CONFIG_BLOCK
+			if (mrq->lat_hist_enabled) {
+				ktime_t completion;
+				u_int64_t delta_us;
+
+				completion = ktime_get();
+				delta_us = ktime_us_delta(completion,
+							  mrq->io_start);
+				blk_update_latency_hist(&host->io_lat_s,
+					(mrq->data->flags & MMC_DATA_READ),
+					delta_us);
+			}
+#endif
 		}
 
 		if (mrq->stop) {
@@ -699,8 +712,16 @@
 		}
 	}
 
-	if (!err && areq)
+	if (!err && areq) {
+#ifdef CONFIG_BLOCK
+		if (host->latency_hist_enabled) {
+			areq->mrq->io_start = ktime_get();
+			areq->mrq->lat_hist_enabled = 1;
+		} else
+			areq->mrq->lat_hist_enabled = 0;
+#endif
 		start_err = __mmc_start_data_req(host, areq->mrq);
+	}
 
 	if (host->areq)
 		mmc_post_req(host, host->areq->mrq, 0);
@@ -2051,7 +2072,7 @@
 }
 
 static unsigned int mmc_mmc_erase_timeout(struct mmc_card *card,
-				          unsigned int arg, unsigned int qty)
+					  unsigned int arg, unsigned int qty)
 {
 	unsigned int erase_timeout;
 
@@ -3034,6 +3055,22 @@
 	init_waitqueue_head(&host->context_info.wait);
 }
 
+#ifdef CONFIG_MMC_EMBEDDED_SDIO
+void mmc_set_embedded_sdio_data(struct mmc_host *host,
+				struct sdio_cis *cis,
+				struct sdio_cccr *cccr,
+				struct sdio_embedded_func *funcs,
+				int num_funcs)
+{
+	host->embedded_sdio_data.cis = cis;
+	host->embedded_sdio_data.cccr = cccr;
+	host->embedded_sdio_data.funcs = funcs;
+	host->embedded_sdio_data.num_funcs = num_funcs;
+}
+
+EXPORT_SYMBOL(mmc_set_embedded_sdio_data);
+#endif
+
 static int __init mmc_init(void)
 {
 	int ret;
@@ -3066,6 +3103,56 @@
 	mmc_unregister_bus();
 }
 
+#ifdef CONFIG_BLOCK
+static ssize_t
+latency_hist_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct mmc_host *host = cls_dev_to_mmc_host(dev);
+
+	return blk_latency_hist_show(&host->io_lat_s, buf);
+}
+
+/*
+ * Values permitted 0, 1, 2.
+ * 0 -> Disable IO latency histograms (default)
+ * 1 -> Enable IO latency histograms
+ * 2 -> Zero out IO latency histograms
+ */
+static ssize_t
+latency_hist_store(struct device *dev, struct device_attribute *attr,
+		   const char *buf, size_t count)
+{
+	struct mmc_host *host = cls_dev_to_mmc_host(dev);
+	long value;
+
+	if (kstrtol(buf, 0, &value))
+		return -EINVAL;
+	if (value == BLK_IO_LAT_HIST_ZERO)
+		blk_zero_latency_hist(&host->io_lat_s);
+	else if (value == BLK_IO_LAT_HIST_ENABLE ||
+		 value == BLK_IO_LAT_HIST_DISABLE)
+		host->latency_hist_enabled = value;
+	return count;
+}
+
+static DEVICE_ATTR(latency_hist, S_IRUGO | S_IWUSR,
+		   latency_hist_show, latency_hist_store);
+
+void
+mmc_latency_hist_sysfs_init(struct mmc_host *host)
+{
+	if (device_create_file(&host->class_dev, &dev_attr_latency_hist))
+		dev_err(&host->class_dev,
+			"Failed to create latency_hist sysfs entry\n");
+}
+
+void
+mmc_latency_hist_sysfs_exit(struct mmc_host *host)
+{
+	device_remove_file(&host->class_dev, &dev_attr_latency_hist);
+}
+#endif
+
 subsys_initcall(mmc_init);
 module_exit(mmc_exit);
 
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 848b345..07f8891 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -31,8 +31,6 @@
 #include "slot-gpio.h"
 #include "pwrseq.h"
 
-#define cls_dev_to_mmc_host(d)	container_of(d, struct mmc_host, class_dev)
-
 static DEFINE_IDA(mmc_host_ida);
 static DEFINE_SPINLOCK(mmc_host_lock);
 
@@ -428,8 +426,13 @@
 	mmc_add_host_debugfs(host);
 #endif
 
+#ifdef CONFIG_BLOCK
+	mmc_latency_hist_sysfs_init(host);
+#endif
+
 	mmc_start_host(host);
-	mmc_register_pm_notifier(host);
+	if (!(host->pm_flags & MMC_PM_IGNORE_PM_NOTIFY))
+		mmc_register_pm_notifier(host);
 
 	return 0;
 }
@@ -446,13 +449,18 @@
  */
 void mmc_remove_host(struct mmc_host *host)
 {
-	mmc_unregister_pm_notifier(host);
+	if (!(host->pm_flags & MMC_PM_IGNORE_PM_NOTIFY))
+		mmc_unregister_pm_notifier(host);
 	mmc_stop_host(host);
 
 #ifdef CONFIG_DEBUG_FS
 	mmc_remove_host_debugfs(host);
 #endif
 
+#ifdef CONFIG_BLOCK
+	mmc_latency_hist_sysfs_exit(host);
+#endif
+
 	device_del(&host->class_dev);
 
 	led_trigger_unregister_simple(host->led);
diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h
index 992bf53..bf38533 100644
--- a/drivers/mmc/core/host.h
+++ b/drivers/mmc/core/host.h
@@ -12,6 +12,8 @@
 #define _MMC_CORE_HOST_H
 #include <linux/mmc/host.h>
 
+#define cls_dev_to_mmc_host(d)	container_of(d, struct mmc_host, class_dev)
+
 int mmc_register_host_class(void);
 void mmc_unregister_host_class(void);
 
@@ -21,5 +23,8 @@
 void mmc_retune_release(struct mmc_host *host);
 int mmc_retune(struct mmc_host *host);
 
+void mmc_latency_hist_sysfs_init(struct mmc_host *host);
+void mmc_latency_hist_sysfs_exit(struct mmc_host *host);
+
 #endif
 
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 97e5130..675c6f2 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -617,6 +617,12 @@
 		card->ext_csd.ffu_capable =
 			(ext_csd[EXT_CSD_SUPPORTED_MODE] & 0x1) &&
 			!(ext_csd[EXT_CSD_FW_CONFIG] & 0x1);
+
+		card->ext_csd.pre_eol_info = ext_csd[EXT_CSD_PRE_EOL_INFO];
+		card->ext_csd.device_life_time_est_typ_a =
+			ext_csd[EXT_CSD_DEVICE_LIFE_TIME_EST_TYP_A];
+		card->ext_csd.device_life_time_est_typ_b =
+			ext_csd[EXT_CSD_DEVICE_LIFE_TIME_EST_TYP_B];
 	}
 out:
 	return err;
@@ -746,6 +752,11 @@
 MMC_DEV_ATTR(name, "%s\n", card->cid.prod_name);
 MMC_DEV_ATTR(oemid, "0x%04x\n", card->cid.oemid);
 MMC_DEV_ATTR(prv, "0x%x\n", card->cid.prv);
+MMC_DEV_ATTR(rev, "0x%x\n", card->ext_csd.rev);
+MMC_DEV_ATTR(pre_eol_info, "%02x\n", card->ext_csd.pre_eol_info);
+MMC_DEV_ATTR(life_time, "0x%02x 0x%02x\n",
+	card->ext_csd.device_life_time_est_typ_a,
+	card->ext_csd.device_life_time_est_typ_b);
 MMC_DEV_ATTR(serial, "0x%08x\n", card->cid.serial);
 MMC_DEV_ATTR(enhanced_area_offset, "%llu\n",
 		card->ext_csd.enhanced_area_offset);
@@ -799,6 +810,9 @@
 	&dev_attr_name.attr,
 	&dev_attr_oemid.attr,
 	&dev_attr_prv.attr,
+	&dev_attr_rev.attr,
+	&dev_attr_pre_eol_info.attr,
+	&dev_attr_life_time.attr,
 	&dev_attr_serial.attr,
 	&dev_attr_enhanced_area_offset.attr,
 	&dev_attr_enhanced_area_size.attr,
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index f09148a..ad70934 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -847,6 +847,9 @@
 	bool reinit)
 {
 	int err;
+#ifdef CONFIG_MMC_PARANOID_SD_INIT
+	int retries;
+#endif
 
 	if (!reinit) {
 		/*
@@ -873,7 +876,26 @@
 		/*
 		 * Fetch switch information from card.
 		 */
+#ifdef CONFIG_MMC_PARANOID_SD_INIT
+		for (retries = 1; retries <= 3; retries++) {
+			err = mmc_read_switch(card);
+			if (!err) {
+				if (retries > 1) {
+					printk(KERN_WARNING
+					       "%s: recovered\n",
+					       mmc_hostname(host));
+				}
+				break;
+			} else {
+				printk(KERN_WARNING
+				       "%s: read switch failed (attempt %d)\n",
+				       mmc_hostname(host), retries);
+			}
+		}
+#else
 		err = mmc_read_switch(card);
+#endif
+
 		if (err)
 			return err;
 	}
@@ -1071,7 +1093,10 @@
  */
 static void mmc_sd_detect(struct mmc_host *host)
 {
-	int err;
+	int err = 0;
+#ifdef CONFIG_MMC_PARANOID_SD_INIT
+	int retries = 5;
+#endif
 
 	BUG_ON(!host);
 	BUG_ON(!host->card);
@@ -1081,7 +1106,23 @@
 	/*
 	 * Just check if our card has been removed.
 	 */
+#ifdef CONFIG_MMC_PARANOID_SD_INIT
+	while(retries) {
+		err = mmc_send_status(host->card, NULL);
+		if (err) {
+			retries--;
+			udelay(5);
+			continue;
+		}
+		break;
+	}
+	if (!retries) {
+		printk(KERN_ERR "%s(%s): Unable to re-detect card (%d)\n",
+		       __func__, mmc_hostname(host), err);
+	}
+#else
 	err = _mmc_detect_card_removed(host);
+#endif
 
 	mmc_put_card(host->card);
 
@@ -1143,6 +1184,9 @@
 static int _mmc_sd_resume(struct mmc_host *host)
 {
 	int err = 0;
+#ifdef CONFIG_MMC_PARANOID_SD_INIT
+	int retries;
+#endif
 
 	BUG_ON(!host);
 	BUG_ON(!host->card);
@@ -1153,7 +1197,23 @@
 		goto out;
 
 	mmc_power_up(host, host->card->ocr);
+#ifdef CONFIG_MMC_PARANOID_SD_INIT
+	retries = 5;
+	while (retries) {
+		err = mmc_sd_init_card(host, host->card->ocr, host->card);
+
+		if (err) {
+			printk(KERN_ERR "%s: Re-init card rc = %d (retries = %d)\n",
+			       mmc_hostname(host), err, retries);
+			mdelay(5);
+			retries--;
+			continue;
+		}
+		break;
+	}
+#else
 	err = mmc_sd_init_card(host, host->card->ocr, host->card);
+#endif
 	mmc_card_clr_suspended(host->card);
 
 out:
@@ -1228,6 +1288,9 @@
 {
 	int err;
 	u32 ocr, rocr;
+#ifdef CONFIG_MMC_PARANOID_SD_INIT
+	int retries;
+#endif
 
 	BUG_ON(!host);
 	WARN_ON(!host->claimed);
@@ -1264,9 +1327,27 @@
 	/*
 	 * Detect and init the card.
 	 */
+#ifdef CONFIG_MMC_PARANOID_SD_INIT
+	retries = 5;
+	while (retries) {
+		err = mmc_sd_init_card(host, rocr, NULL);
+		if (err) {
+			retries--;
+			continue;
+		}
+		break;
+	}
+
+	if (!retries) {
+		printk(KERN_ERR "%s: mmc_sd_init_card() failure (err = %d)\n",
+		       mmc_hostname(host), err);
+		goto err;
+	}
+#else
 	err = mmc_sd_init_card(host, rocr, NULL);
 	if (err)
 		goto err;
+#endif
 
 	mmc_release_host(host);
 	err = mmc_add_card(host->card);
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index bd44ba8..b5ec3c8 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/err.h>
+#include <linux/module.h>
 #include <linux/pm_runtime.h>
 
 #include <linux/mmc/host.h>
@@ -21,6 +22,7 @@
 
 #include "core.h"
 #include "bus.h"
+#include "host.h"
 #include "sd.h"
 #include "sdio_bus.h"
 #include "mmc_ops.h"
@@ -28,6 +30,10 @@
 #include "sdio_ops.h"
 #include "sdio_cis.h"
 
+#ifdef CONFIG_MMC_EMBEDDED_SDIO
+#include <linux/mmc/sdio_ids.h>
+#endif
+
 static int sdio_read_fbr(struct sdio_func *func)
 {
 	int ret;
@@ -697,19 +703,35 @@
 		goto finish;
 	}
 
-	/*
-	 * Read the common registers.
-	 */
-	err = sdio_read_cccr(card, ocr);
-	if (err)
-		goto remove;
+#ifdef CONFIG_MMC_EMBEDDED_SDIO
+	if (host->embedded_sdio_data.cccr)
+		memcpy(&card->cccr, host->embedded_sdio_data.cccr, sizeof(struct sdio_cccr));
+	else {
+#endif
+		/*
+		 * Read the common registers.
+		 */
+		err = sdio_read_cccr(card,  ocr);
+		if (err)
+			goto remove;
+#ifdef CONFIG_MMC_EMBEDDED_SDIO
+	}
+#endif
 
-	/*
-	 * Read the common CIS tuples.
-	 */
-	err = sdio_read_common_cis(card);
-	if (err)
-		goto remove;
+#ifdef CONFIG_MMC_EMBEDDED_SDIO
+	if (host->embedded_sdio_data.cis)
+		memcpy(&card->cis, host->embedded_sdio_data.cis, sizeof(struct sdio_cis));
+	else {
+#endif
+		/*
+		 * Read the common CIS tuples.
+		 */
+		err = sdio_read_common_cis(card);
+		if (err)
+			goto remove;
+#ifdef CONFIG_MMC_EMBEDDED_SDIO
+	}
+#endif
 
 	if (oldcard) {
 		int same = (card->cis.vendor == oldcard->cis.vendor &&
@@ -1118,14 +1140,36 @@
 	funcs = (ocr & 0x70000000) >> 28;
 	card->sdio_funcs = 0;
 
+#ifdef CONFIG_MMC_EMBEDDED_SDIO
+	if (host->embedded_sdio_data.funcs)
+		card->sdio_funcs = funcs = host->embedded_sdio_data.num_funcs;
+#endif
+
 	/*
 	 * Initialize (but don't add) all present functions.
 	 */
 	for (i = 0; i < funcs; i++, card->sdio_funcs++) {
-		err = sdio_init_func(host->card, i + 1);
-		if (err)
-			goto remove;
+#ifdef CONFIG_MMC_EMBEDDED_SDIO
+		if (host->embedded_sdio_data.funcs) {
+			struct sdio_func *tmp;
 
+			tmp = sdio_alloc_func(host->card);
+			if (IS_ERR(tmp))
+				goto remove;
+			tmp->num = (i + 1);
+			card->sdio_func[i] = tmp;
+			tmp->class = host->embedded_sdio_data.funcs[i].f_class;
+			tmp->max_blksize = host->embedded_sdio_data.funcs[i].f_maxblksize;
+			tmp->vendor = card->cis.vendor;
+			tmp->device = card->cis.device;
+		} else {
+#endif
+			err = sdio_init_func(host->card, i + 1);
+			if (err)
+				goto remove;
+#ifdef CONFIG_MMC_EMBEDDED_SDIO
+		}
+#endif
 		/*
 		 * Enable Runtime PM for this func (if supported)
 		 */
@@ -1173,3 +1217,42 @@
 	return err;
 }
 
+int sdio_reset_comm(struct mmc_card *card)
+{
+	struct mmc_host *host = card->host;
+	u32 ocr;
+	u32 rocr;
+	int err;
+
+	printk("%s():\n", __func__);
+	mmc_claim_host(host);
+
+	mmc_retune_disable(host);
+
+	mmc_go_idle(host);
+
+	mmc_set_clock(host, host->f_min);
+
+	err = mmc_send_io_op_cond(host, 0, &ocr);
+	if (err)
+		goto err;
+
+	rocr = mmc_select_voltage(host, ocr);
+	if (!rocr) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	err = mmc_sdio_init_card(host, rocr, card, 0);
+	if (err)
+		goto err;
+
+	mmc_release_host(host);
+	return 0;
+err:
+	printk("%s: Error resetting SDIO communications (%d)\n",
+	       mmc_hostname(host), err);
+	mmc_release_host(host);
+	return err;
+}
+EXPORT_SYMBOL(sdio_reset_comm);
diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c
index d56a3b6..528524a2 100644
--- a/drivers/mmc/core/sdio_bus.c
+++ b/drivers/mmc/core/sdio_bus.c
@@ -28,6 +28,10 @@
 #include "sdio_cis.h"
 #include "sdio_bus.h"
 
+#ifdef CONFIG_MMC_EMBEDDED_SDIO
+#include <linux/mmc/host.h>
+#endif
+
 #define to_sdio_driver(d)	container_of(d, struct sdio_driver, drv)
 
 /* show configuration fields */
@@ -263,7 +267,14 @@
 {
 	struct sdio_func *func = dev_to_sdio_func(dev);
 
-	sdio_free_func_cis(func);
+#ifdef CONFIG_MMC_EMBEDDED_SDIO
+	/*
+	 * If this device is embedded then we never allocated
+	 * cis tables for this func
+	 */
+	if (!func->card->host->embedded_sdio_data.funcs)
+#endif
+		sdio_free_func_cis(func);
 
 	kfree(func->info);
 	kfree(func->tmpbuf);
diff --git a/drivers/mmc/core/sdio_io.c b/drivers/mmc/core/sdio_io.c
index 406e5f0..3734cba 100644
--- a/drivers/mmc/core/sdio_io.c
+++ b/drivers/mmc/core/sdio_io.c
@@ -390,6 +390,39 @@
 EXPORT_SYMBOL_GPL(sdio_readb);
 
 /**
+ *	sdio_readb_ext - read a single byte from a SDIO function
+ *	@func: SDIO function to access
+ *	@addr: address to read
+ *	@err_ret: optional status value from transfer
+ *	@in: value to add to argument
+ *
+ *	Reads a single byte from the address space of a given SDIO
+ *	function. If there is a problem reading the address, 0xff
+ *	is returned and @err_ret will contain the error code.
+ */
+unsigned char sdio_readb_ext(struct sdio_func *func, unsigned int addr,
+	int *err_ret, unsigned in)
+{
+	int ret;
+	unsigned char val;
+
+	BUG_ON(!func);
+
+	if (err_ret)
+		*err_ret = 0;
+
+	ret = mmc_io_rw_direct(func->card, 0, func->num, addr, (u8)in, &val);
+	if (ret) {
+		if (err_ret)
+			*err_ret = ret;
+		return 0xFF;
+	}
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(sdio_readb_ext);
+
+/**
  *	sdio_writeb - write a single byte to a SDIO function
  *	@func: SDIO function to access
  *	@b: byte to write
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index b254090..50ee1ba 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -1,3 +1,10 @@
+config MTD_NAND_IDS
+	tristate "Include chip ids for known NAND devices."
+	depends on MTD
+	help
+	  Useful for NAND drivers that do not use the NAND subsystem but
+	  still like to take advantage of the known chip information.
+
 config MTD_NAND_ECC
 	tristate
 
@@ -109,9 +116,6 @@
 config MTD_NAND_OMAP_BCH_BUILD
 	def_tristate MTD_NAND_OMAP2 && MTD_NAND_OMAP_BCH
 
-config MTD_NAND_IDS
-	tristate
-
 config MTD_NAND_RICOH
 	tristate "Ricoh xD card reader"
 	default n
diff --git a/drivers/net/ppp/Kconfig b/drivers/net/ppp/Kconfig
index 1373c6d..282aec4 100644
--- a/drivers/net/ppp/Kconfig
+++ b/drivers/net/ppp/Kconfig
@@ -149,6 +149,23 @@
 	  tunnels. L2TP is replacing PPTP for VPN uses.
 if TTY
 
+config PPPOLAC
+	tristate "PPP on L2TP Access Concentrator"
+	depends on PPP && INET
+	help
+	  L2TP (RFC 2661) is a tunneling protocol widely used in virtual private
+	  networks. This driver handles L2TP data packets between a UDP socket
+	  and a PPP channel, but only permits one session per socket. Thus it is
+	  fairly simple and suited for clients.
+
+config PPPOPNS
+	tristate "PPP on PPTP Network Server"
+	depends on PPP && INET
+	help
+	  PPTP (RFC 2637) is a tunneling protocol widely used in virtual private
+	  networks. This driver handles PPTP data packets between a RAW socket
+	  and a PPP channel. It is fairly simple and easy to use.
+
 config PPP_ASYNC
 	tristate "PPP support for async serial ports"
 	depends on PPP
diff --git a/drivers/net/ppp/Makefile b/drivers/net/ppp/Makefile
index a6b6297..d283d03c 100644
--- a/drivers/net/ppp/Makefile
+++ b/drivers/net/ppp/Makefile
@@ -11,3 +11,5 @@
 obj-$(CONFIG_PPPOE) += pppox.o pppoe.o
 obj-$(CONFIG_PPPOL2TP) += pppox.o
 obj-$(CONFIG_PPTP) += pppox.o pptp.o
+obj-$(CONFIG_PPPOLAC) += pppox.o pppolac.o
+obj-$(CONFIG_PPPOPNS) += pppox.o pppopns.o
diff --git a/drivers/net/ppp/pppolac.c b/drivers/net/ppp/pppolac.c
new file mode 100644
index 0000000..3a45cf80
--- /dev/null
+++ b/drivers/net/ppp/pppolac.c
@@ -0,0 +1,450 @@
+/* drivers/net/pppolac.c
+ *
+ * Driver for PPP on L2TP Access Concentrator / PPPoLAC Socket (RFC 2661)
+ *
+ * Copyright (C) 2009 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/* This driver handles L2TP data packets between a UDP socket and a PPP channel.
+ * The socket must keep connected, and only one session per socket is permitted.
+ * Sequencing of outgoing packets is controlled by LNS. Incoming packets with
+ * sequences are reordered within a sliding window of one second. Currently
+ * reordering only happens when a packet is received. It is done for simplicity
+ * since no additional locks or threads are required. This driver only works on
+ * IPv4 due to the lack of UDP encapsulation support in IPv6. */
+
+#include <linux/module.h>
+#include <linux/jiffies.h>
+#include <linux/workqueue.h>
+#include <linux/skbuff.h>
+#include <linux/file.h>
+#include <linux/netdevice.h>
+#include <linux/net.h>
+#include <linux/udp.h>
+#include <linux/ppp_defs.h>
+#include <linux/if_ppp.h>
+#include <linux/if_pppox.h>
+#include <linux/ppp_channel.h>
+#include <net/tcp_states.h>
+#include <asm/uaccess.h>
+
+#define L2TP_CONTROL_BIT	0x80
+#define L2TP_LENGTH_BIT		0x40
+#define L2TP_SEQUENCE_BIT	0x08
+#define L2TP_OFFSET_BIT		0x02
+#define L2TP_VERSION		0x02
+#define L2TP_VERSION_MASK	0x0F
+
+#define PPP_ADDR	0xFF
+#define PPP_CTRL	0x03
+
+union unaligned {
+	__u32 u32;
+} __attribute__((packed));
+
+static inline union unaligned *unaligned(void *ptr)
+{
+	return (union unaligned *)ptr;
+}
+
+struct meta {
+	__u32 sequence;
+	__u32 timestamp;
+};
+
+static inline struct meta *skb_meta(struct sk_buff *skb)
+{
+	return (struct meta *)skb->cb;
+}
+
+/******************************************************************************/
+
+static int pppolac_recv_core(struct sock *sk_udp, struct sk_buff *skb)
+{
+	struct sock *sk = (struct sock *)sk_udp->sk_user_data;
+	struct pppolac_opt *opt = &pppox_sk(sk)->proto.lac;
+	struct meta *meta = skb_meta(skb);
+	__u32 now = jiffies;
+	__u8 bits;
+	__u8 *ptr;
+
+	/* Drop the packet if L2TP header is missing. */
+	if (skb->len < sizeof(struct udphdr) + 6)
+		goto drop;
+
+	/* Put it back if it is a control packet. */
+	if (skb->data[sizeof(struct udphdr)] & L2TP_CONTROL_BIT)
+		return opt->backlog_rcv(sk_udp, skb);
+
+	/* Skip UDP header. */
+	skb_pull(skb, sizeof(struct udphdr));
+
+	/* Check the version. */
+	if ((skb->data[1] & L2TP_VERSION_MASK) != L2TP_VERSION)
+		goto drop;
+	bits = skb->data[0];
+	ptr = &skb->data[2];
+
+	/* Check the length if it is present. */
+	if (bits & L2TP_LENGTH_BIT) {
+		if ((ptr[0] << 8 | ptr[1]) != skb->len)
+			goto drop;
+		ptr += 2;
+	}
+
+	/* Skip all fields including optional ones. */
+	if (!skb_pull(skb, 6 + (bits & L2TP_SEQUENCE_BIT ? 4 : 0) +
+			(bits & L2TP_LENGTH_BIT ? 2 : 0) +
+			(bits & L2TP_OFFSET_BIT ? 2 : 0)))
+		goto drop;
+
+	/* Skip the offset padding if it is present. */
+	if (bits & L2TP_OFFSET_BIT &&
+			!skb_pull(skb, skb->data[-2] << 8 | skb->data[-1]))
+		goto drop;
+
+	/* Check the tunnel and the session. */
+	if (unaligned(ptr)->u32 != opt->local)
+		goto drop;
+
+	/* Check the sequence if it is present. */
+	if (bits & L2TP_SEQUENCE_BIT) {
+		meta->sequence = ptr[4] << 8 | ptr[5];
+		if ((__s16)(meta->sequence - opt->recv_sequence) < 0)
+			goto drop;
+	}
+
+	/* Skip PPP address and control if they are present. */
+	if (skb->len >= 2 && skb->data[0] == PPP_ADDR &&
+			skb->data[1] == PPP_CTRL)
+		skb_pull(skb, 2);
+
+	/* Fix PPP protocol if it is compressed. */
+	if (skb->len >= 1 && skb->data[0] & 1)
+		skb_push(skb, 1)[0] = 0;
+
+	/* Drop the packet if PPP protocol is missing. */
+	if (skb->len < 2)
+		goto drop;
+
+	/* Perform reordering if sequencing is enabled. */
+	atomic_set(&opt->sequencing, bits & L2TP_SEQUENCE_BIT);
+	if (bits & L2TP_SEQUENCE_BIT) {
+		struct sk_buff *skb1;
+
+		/* Insert the packet into receive queue in order. */
+		skb_set_owner_r(skb, sk);
+		skb_queue_walk(&sk->sk_receive_queue, skb1) {
+			struct meta *meta1 = skb_meta(skb1);
+			__s16 order = meta->sequence - meta1->sequence;
+			if (order == 0)
+				goto drop;
+			if (order < 0) {
+				meta->timestamp = meta1->timestamp;
+				skb_insert(skb1, skb, &sk->sk_receive_queue);
+				skb = NULL;
+				break;
+			}
+		}
+		if (skb) {
+			meta->timestamp = now;
+			skb_queue_tail(&sk->sk_receive_queue, skb);
+		}
+
+		/* Remove packets from receive queue as long as
+		 * 1. the receive buffer is full,
+		 * 2. they are queued longer than one second, or
+		 * 3. there are no missing packets before them. */
+		skb_queue_walk_safe(&sk->sk_receive_queue, skb, skb1) {
+			meta = skb_meta(skb);
+			if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
+					now - meta->timestamp < HZ &&
+					meta->sequence != opt->recv_sequence)
+				break;
+			skb_unlink(skb, &sk->sk_receive_queue);
+			opt->recv_sequence = (__u16)(meta->sequence + 1);
+			skb_orphan(skb);
+			ppp_input(&pppox_sk(sk)->chan, skb);
+		}
+		return NET_RX_SUCCESS;
+	}
+
+	/* Flush receive queue if sequencing is disabled. */
+	skb_queue_purge(&sk->sk_receive_queue);
+	skb_orphan(skb);
+	ppp_input(&pppox_sk(sk)->chan, skb);
+	return NET_RX_SUCCESS;
+drop:
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
+static int pppolac_recv(struct sock *sk_udp, struct sk_buff *skb)
+{
+	sock_hold(sk_udp);
+	sk_receive_skb(sk_udp, skb, 0);
+	return 0;
+}
+
+static struct sk_buff_head delivery_queue;
+
+static void pppolac_xmit_core(struct work_struct *delivery_work)
+{
+	mm_segment_t old_fs = get_fs();
+	struct sk_buff *skb;
+
+	set_fs(KERNEL_DS);
+	while ((skb = skb_dequeue(&delivery_queue))) {
+		struct sock *sk_udp = skb->sk;
+		struct kvec iov = {.iov_base = skb->data, .iov_len = skb->len};
+		struct msghdr msg = {
+			.msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT,
+		};
+
+		iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1,
+			      skb->len);
+		sk_udp->sk_prot->sendmsg(sk_udp, &msg, skb->len);
+		kfree_skb(skb);
+	}
+	set_fs(old_fs);
+}
+
+static DECLARE_WORK(delivery_work, pppolac_xmit_core);
+
+static int pppolac_xmit(struct ppp_channel *chan, struct sk_buff *skb)
+{
+	struct sock *sk_udp = (struct sock *)chan->private;
+	struct pppolac_opt *opt = &pppox_sk(sk_udp->sk_user_data)->proto.lac;
+
+	/* Install PPP address and control. */
+	skb_push(skb, 2);
+	skb->data[0] = PPP_ADDR;
+	skb->data[1] = PPP_CTRL;
+
+	/* Install L2TP header. */
+	if (atomic_read(&opt->sequencing)) {
+		skb_push(skb, 10);
+		skb->data[0] = L2TP_SEQUENCE_BIT;
+		skb->data[6] = opt->xmit_sequence >> 8;
+		skb->data[7] = opt->xmit_sequence;
+		skb->data[8] = 0;
+		skb->data[9] = 0;
+		opt->xmit_sequence++;
+	} else {
+		skb_push(skb, 6);
+		skb->data[0] = 0;
+	}
+	skb->data[1] = L2TP_VERSION;
+	unaligned(&skb->data[2])->u32 = opt->remote;
+
+	/* Now send the packet via the delivery queue. */
+	skb_set_owner_w(skb, sk_udp);
+	skb_queue_tail(&delivery_queue, skb);
+	schedule_work(&delivery_work);
+	return 1;
+}
+
+/******************************************************************************/
+
+static struct ppp_channel_ops pppolac_channel_ops = {
+	.start_xmit = pppolac_xmit,
+};
+
+static int pppolac_connect(struct socket *sock, struct sockaddr *useraddr,
+	int addrlen, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct pppox_sock *po = pppox_sk(sk);
+	struct sockaddr_pppolac *addr = (struct sockaddr_pppolac *)useraddr;
+	struct socket *sock_udp = NULL;
+	struct sock *sk_udp;
+	int error;
+
+	if (addrlen != sizeof(struct sockaddr_pppolac) ||
+			!addr->local.tunnel || !addr->local.session ||
+			!addr->remote.tunnel || !addr->remote.session) {
+		return -EINVAL;
+	}
+
+	lock_sock(sk);
+	error = -EALREADY;
+	if (sk->sk_state != PPPOX_NONE)
+		goto out;
+
+	sock_udp = sockfd_lookup(addr->udp_socket, &error);
+	if (!sock_udp)
+		goto out;
+	sk_udp = sock_udp->sk;
+	lock_sock(sk_udp);
+
+	/* Remove this check when IPv6 supports UDP encapsulation. */
+	error = -EAFNOSUPPORT;
+	if (sk_udp->sk_family != AF_INET)
+		goto out;
+	error = -EPROTONOSUPPORT;
+	if (sk_udp->sk_protocol != IPPROTO_UDP)
+		goto out;
+	error = -EDESTADDRREQ;
+	if (sk_udp->sk_state != TCP_ESTABLISHED)
+		goto out;
+	error = -EBUSY;
+	if (udp_sk(sk_udp)->encap_type || sk_udp->sk_user_data)
+		goto out;
+	if (!sk_udp->sk_bound_dev_if) {
+		struct dst_entry *dst = sk_dst_get(sk_udp);
+		error = -ENODEV;
+		if (!dst)
+			goto out;
+		sk_udp->sk_bound_dev_if = dst->dev->ifindex;
+		dst_release(dst);
+	}
+
+	po->chan.hdrlen = 12;
+	po->chan.private = sk_udp;
+	po->chan.ops = &pppolac_channel_ops;
+	po->chan.mtu = PPP_MRU - 80;
+	po->proto.lac.local = unaligned(&addr->local)->u32;
+	po->proto.lac.remote = unaligned(&addr->remote)->u32;
+	atomic_set(&po->proto.lac.sequencing, 1);
+	po->proto.lac.backlog_rcv = sk_udp->sk_backlog_rcv;
+
+	error = ppp_register_channel(&po->chan);
+	if (error)
+		goto out;
+
+	sk->sk_state = PPPOX_CONNECTED;
+	udp_sk(sk_udp)->encap_type = UDP_ENCAP_L2TPINUDP;
+	udp_sk(sk_udp)->encap_rcv = pppolac_recv;
+	sk_udp->sk_backlog_rcv = pppolac_recv_core;
+	sk_udp->sk_user_data = sk;
+out:
+	if (sock_udp) {
+		release_sock(sk_udp);
+		if (error)
+			sockfd_put(sock_udp);
+	}
+	release_sock(sk);
+	return error;
+}
+
+static int pppolac_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	if (!sk)
+		return 0;
+
+	lock_sock(sk);
+	if (sock_flag(sk, SOCK_DEAD)) {
+		release_sock(sk);
+		return -EBADF;
+	}
+
+	if (sk->sk_state != PPPOX_NONE) {
+		struct sock *sk_udp = (struct sock *)pppox_sk(sk)->chan.private;
+		lock_sock(sk_udp);
+		skb_queue_purge(&sk->sk_receive_queue);
+		pppox_unbind_sock(sk);
+		udp_sk(sk_udp)->encap_type = 0;
+		udp_sk(sk_udp)->encap_rcv = NULL;
+		sk_udp->sk_backlog_rcv = pppox_sk(sk)->proto.lac.backlog_rcv;
+		sk_udp->sk_user_data = NULL;
+		release_sock(sk_udp);
+		sockfd_put(sk_udp->sk_socket);
+	}
+
+	sock_orphan(sk);
+	sock->sk = NULL;
+	release_sock(sk);
+	sock_put(sk);
+	return 0;
+}
+
+/******************************************************************************/
+
+static struct proto pppolac_proto = {
+	.name = "PPPOLAC",
+	.owner = THIS_MODULE,
+	.obj_size = sizeof(struct pppox_sock),
+};
+
+static struct proto_ops pppolac_proto_ops = {
+	.family = PF_PPPOX,
+	.owner = THIS_MODULE,
+	.release = pppolac_release,
+	.bind = sock_no_bind,
+	.connect = pppolac_connect,
+	.socketpair = sock_no_socketpair,
+	.accept = sock_no_accept,
+	.getname = sock_no_getname,
+	.poll = sock_no_poll,
+	.ioctl = pppox_ioctl,
+	.listen = sock_no_listen,
+	.shutdown = sock_no_shutdown,
+	.setsockopt = sock_no_setsockopt,
+	.getsockopt = sock_no_getsockopt,
+	.sendmsg = sock_no_sendmsg,
+	.recvmsg = sock_no_recvmsg,
+	.mmap = sock_no_mmap,
+};
+
+static int pppolac_create(struct net *net, struct socket *sock, int kern)
+{
+	struct sock *sk;
+
+	sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppolac_proto, kern);
+	if (!sk)
+		return -ENOMEM;
+
+	sock_init_data(sock, sk);
+	sock->state = SS_UNCONNECTED;
+	sock->ops = &pppolac_proto_ops;
+	sk->sk_protocol = PX_PROTO_OLAC;
+	sk->sk_state = PPPOX_NONE;
+	return 0;
+}
+
+/******************************************************************************/
+
+static struct pppox_proto pppolac_pppox_proto = {
+	.create = pppolac_create,
+	.owner = THIS_MODULE,
+};
+
+static int __init pppolac_init(void)
+{
+	int error;
+
+	error = proto_register(&pppolac_proto, 0);
+	if (error)
+		return error;
+
+	error = register_pppox_proto(PX_PROTO_OLAC, &pppolac_pppox_proto);
+	if (error)
+		proto_unregister(&pppolac_proto);
+	else
+		skb_queue_head_init(&delivery_queue);
+	return error;
+}
+
+static void __exit pppolac_exit(void)
+{
+	unregister_pppox_proto(PX_PROTO_OLAC);
+	proto_unregister(&pppolac_proto);
+}
+
+module_init(pppolac_init);
+module_exit(pppolac_exit);
+
+MODULE_DESCRIPTION("PPP on L2TP Access Concentrator (PPPoLAC)");
+MODULE_AUTHOR("Chia-chi Yeh <chiachi@android.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ppp/pppopns.c b/drivers/net/ppp/pppopns.c
new file mode 100644
index 0000000..cdb4fa1
--- /dev/null
+++ b/drivers/net/ppp/pppopns.c
@@ -0,0 +1,429 @@
+/* drivers/net/pppopns.c
+ *
+ * Driver for PPP on PPTP Network Server / PPPoPNS Socket (RFC 2637)
+ *
+ * Copyright (C) 2009 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/* This driver handles PPTP data packets between a RAW socket and a PPP channel.
+ * The socket is created in the kernel space and connected to the same address
+ * of the control socket. Outgoing packets are always sent with sequences but
+ * without acknowledgements. Incoming packets with sequences are reordered
+ * within a sliding window of one second. Currently reordering only happens when
+ * a packet is received. It is done for simplicity since no additional locks or
+ * threads are required. This driver should work on both IPv4 and IPv6. */
+
+#include <linux/module.h>
+#include <linux/jiffies.h>
+#include <linux/workqueue.h>
+#include <linux/skbuff.h>
+#include <linux/file.h>
+#include <linux/netdevice.h>
+#include <linux/net.h>
+#include <linux/ppp_defs.h>
+#include <linux/if.h>
+#include <linux/if_ppp.h>
+#include <linux/if_pppox.h>
+#include <linux/ppp_channel.h>
+#include <asm/uaccess.h>
+
+#define GRE_HEADER_SIZE		8
+
+#define PPTP_GRE_BITS		htons(0x2001)
+#define PPTP_GRE_BITS_MASK	htons(0xEF7F)
+#define PPTP_GRE_SEQ_BIT	htons(0x1000)
+#define PPTP_GRE_ACK_BIT	htons(0x0080)
+#define PPTP_GRE_TYPE		htons(0x880B)
+
+#define PPP_ADDR	0xFF
+#define PPP_CTRL	0x03
+
+struct header {
+	__u16	bits;
+	__u16	type;
+	__u16	length;
+	__u16	call;
+	__u32	sequence;
+} __attribute__((packed));
+
+struct meta {
+	__u32 sequence;
+	__u32 timestamp;
+};
+
+static inline struct meta *skb_meta(struct sk_buff *skb)
+{
+	return (struct meta *)skb->cb;
+}
+
+/******************************************************************************/
+
+static int pppopns_recv_core(struct sock *sk_raw, struct sk_buff *skb)
+{
+	struct sock *sk = (struct sock *)sk_raw->sk_user_data;
+	struct pppopns_opt *opt = &pppox_sk(sk)->proto.pns;
+	struct meta *meta = skb_meta(skb);
+	__u32 now = jiffies;
+	struct header *hdr;
+
+	/* Skip transport header */
+	skb_pull(skb, skb_transport_header(skb) - skb->data);
+
+	/* Drop the packet if GRE header is missing. */
+	if (skb->len < GRE_HEADER_SIZE)
+		goto drop;
+	hdr = (struct header *)skb->data;
+
+	/* Check the header. */
+	if (hdr->type != PPTP_GRE_TYPE || hdr->call != opt->local ||
+			(hdr->bits & PPTP_GRE_BITS_MASK) != PPTP_GRE_BITS)
+		goto drop;
+
+	/* Skip all fields including optional ones. */
+	if (!skb_pull(skb, GRE_HEADER_SIZE +
+			(hdr->bits & PPTP_GRE_SEQ_BIT ? 4 : 0) +
+			(hdr->bits & PPTP_GRE_ACK_BIT ? 4 : 0)))
+		goto drop;
+
+	/* Check the length. */
+	if (skb->len != ntohs(hdr->length))
+		goto drop;
+
+	/* Check the sequence if it is present. */
+	if (hdr->bits & PPTP_GRE_SEQ_BIT) {
+		meta->sequence = ntohl(hdr->sequence);
+		if ((__s32)(meta->sequence - opt->recv_sequence) < 0)
+			goto drop;
+	}
+
+	/* Skip PPP address and control if they are present. */
+	if (skb->len >= 2 && skb->data[0] == PPP_ADDR &&
+			skb->data[1] == PPP_CTRL)
+		skb_pull(skb, 2);
+
+	/* Fix PPP protocol if it is compressed. */
+	if (skb->len >= 1 && skb->data[0] & 1)
+		skb_push(skb, 1)[0] = 0;
+
+	/* Drop the packet if PPP protocol is missing. */
+	if (skb->len < 2)
+		goto drop;
+
+	/* Perform reordering if sequencing is enabled. */
+	if (hdr->bits & PPTP_GRE_SEQ_BIT) {
+		struct sk_buff *skb1;
+
+		/* Insert the packet into receive queue in order. */
+		skb_set_owner_r(skb, sk);
+		skb_queue_walk(&sk->sk_receive_queue, skb1) {
+			struct meta *meta1 = skb_meta(skb1);
+			__s32 order = meta->sequence - meta1->sequence;
+			if (order == 0)
+				goto drop;
+			if (order < 0) {
+				meta->timestamp = meta1->timestamp;
+				skb_insert(skb1, skb, &sk->sk_receive_queue);
+				skb = NULL;
+				break;
+			}
+		}
+		if (skb) {
+			meta->timestamp = now;
+			skb_queue_tail(&sk->sk_receive_queue, skb);
+		}
+
+		/* Remove packets from receive queue as long as
+		 * 1. the receive buffer is full,
+		 * 2. they are queued longer than one second, or
+		 * 3. there are no missing packets before them. */
+		skb_queue_walk_safe(&sk->sk_receive_queue, skb, skb1) {
+			meta = skb_meta(skb);
+			if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
+					now - meta->timestamp < HZ &&
+					meta->sequence != opt->recv_sequence)
+				break;
+			skb_unlink(skb, &sk->sk_receive_queue);
+			opt->recv_sequence = meta->sequence + 1;
+			skb_orphan(skb);
+			ppp_input(&pppox_sk(sk)->chan, skb);
+		}
+		return NET_RX_SUCCESS;
+	}
+
+	/* Flush receive queue if sequencing is disabled. */
+	skb_queue_purge(&sk->sk_receive_queue);
+	skb_orphan(skb);
+	ppp_input(&pppox_sk(sk)->chan, skb);
+	return NET_RX_SUCCESS;
+drop:
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
+static void pppopns_recv(struct sock *sk_raw)
+{
+	struct sk_buff *skb;
+	while ((skb = skb_dequeue(&sk_raw->sk_receive_queue))) {
+		sock_hold(sk_raw);
+		sk_receive_skb(sk_raw, skb, 0);
+	}
+}
+
+static struct sk_buff_head delivery_queue;
+
+static void pppopns_xmit_core(struct work_struct *delivery_work)
+{
+	mm_segment_t old_fs = get_fs();
+	struct sk_buff *skb;
+
+	set_fs(KERNEL_DS);
+	while ((skb = skb_dequeue(&delivery_queue))) {
+		struct sock *sk_raw = skb->sk;
+		struct kvec iov = {.iov_base = skb->data, .iov_len = skb->len};
+		struct msghdr msg = {
+			.msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT,
+		};
+
+		iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1,
+			      skb->len);
+		sk_raw->sk_prot->sendmsg(sk_raw, &msg, skb->len);
+		kfree_skb(skb);
+	}
+	set_fs(old_fs);
+}
+
+static DECLARE_WORK(delivery_work, pppopns_xmit_core);
+
+static int pppopns_xmit(struct ppp_channel *chan, struct sk_buff *skb)
+{
+	struct sock *sk_raw = (struct sock *)chan->private;
+	struct pppopns_opt *opt = &pppox_sk(sk_raw->sk_user_data)->proto.pns;
+	struct header *hdr;
+	__u16 length;
+
+	/* Install PPP address and control. */
+	skb_push(skb, 2);
+	skb->data[0] = PPP_ADDR;
+	skb->data[1] = PPP_CTRL;
+	length = skb->len;
+
+	/* Install PPTP GRE header. */
+	hdr = (struct header *)skb_push(skb, 12);
+	hdr->bits = PPTP_GRE_BITS | PPTP_GRE_SEQ_BIT;
+	hdr->type = PPTP_GRE_TYPE;
+	hdr->length = htons(length);
+	hdr->call = opt->remote;
+	hdr->sequence = htonl(opt->xmit_sequence);
+	opt->xmit_sequence++;
+
+	/* Now send the packet via the delivery queue. */
+	skb_set_owner_w(skb, sk_raw);
+	skb_queue_tail(&delivery_queue, skb);
+	schedule_work(&delivery_work);
+	return 1;
+}
+
+/******************************************************************************/
+
+static struct ppp_channel_ops pppopns_channel_ops = {
+	.start_xmit = pppopns_xmit,
+};
+
+static int pppopns_connect(struct socket *sock, struct sockaddr *useraddr,
+	int addrlen, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct pppox_sock *po = pppox_sk(sk);
+	struct sockaddr_pppopns *addr = (struct sockaddr_pppopns *)useraddr;
+	struct sockaddr_storage ss;
+	struct socket *sock_tcp = NULL;
+	struct socket *sock_raw = NULL;
+	struct sock *sk_tcp;
+	struct sock *sk_raw;
+	int error;
+
+	if (addrlen != sizeof(struct sockaddr_pppopns))
+		return -EINVAL;
+
+	lock_sock(sk);
+	error = -EALREADY;
+	if (sk->sk_state != PPPOX_NONE)
+		goto out;
+
+	sock_tcp = sockfd_lookup(addr->tcp_socket, &error);
+	if (!sock_tcp)
+		goto out;
+	sk_tcp = sock_tcp->sk;
+	error = -EPROTONOSUPPORT;
+	if (sk_tcp->sk_protocol != IPPROTO_TCP)
+		goto out;
+	addrlen = sizeof(struct sockaddr_storage);
+	error = kernel_getpeername(sock_tcp, (struct sockaddr *)&ss, &addrlen);
+	if (error)
+		goto out;
+	if (!sk_tcp->sk_bound_dev_if) {
+		struct dst_entry *dst = sk_dst_get(sk_tcp);
+		error = -ENODEV;
+		if (!dst)
+			goto out;
+		sk_tcp->sk_bound_dev_if = dst->dev->ifindex;
+		dst_release(dst);
+	}
+
+	error = sock_create(ss.ss_family, SOCK_RAW, IPPROTO_GRE, &sock_raw);
+	if (error)
+		goto out;
+	sk_raw = sock_raw->sk;
+	sk_raw->sk_bound_dev_if = sk_tcp->sk_bound_dev_if;
+	error = kernel_connect(sock_raw, (struct sockaddr *)&ss, addrlen, 0);
+	if (error)
+		goto out;
+
+	po->chan.hdrlen = 14;
+	po->chan.private = sk_raw;
+	po->chan.ops = &pppopns_channel_ops;
+	po->chan.mtu = PPP_MRU - 80;
+	po->proto.pns.local = addr->local;
+	po->proto.pns.remote = addr->remote;
+	po->proto.pns.data_ready = sk_raw->sk_data_ready;
+	po->proto.pns.backlog_rcv = sk_raw->sk_backlog_rcv;
+
+	error = ppp_register_channel(&po->chan);
+	if (error)
+		goto out;
+
+	sk->sk_state = PPPOX_CONNECTED;
+	lock_sock(sk_raw);
+	sk_raw->sk_data_ready = pppopns_recv;
+	sk_raw->sk_backlog_rcv = pppopns_recv_core;
+	sk_raw->sk_user_data = sk;
+	release_sock(sk_raw);
+out:
+	if (sock_tcp)
+		sockfd_put(sock_tcp);
+	if (error && sock_raw)
+		sock_release(sock_raw);
+	release_sock(sk);
+	return error;
+}
+
+static int pppopns_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	if (!sk)
+		return 0;
+
+	lock_sock(sk);
+	if (sock_flag(sk, SOCK_DEAD)) {
+		release_sock(sk);
+		return -EBADF;
+	}
+
+	if (sk->sk_state != PPPOX_NONE) {
+		struct sock *sk_raw = (struct sock *)pppox_sk(sk)->chan.private;
+		lock_sock(sk_raw);
+		skb_queue_purge(&sk->sk_receive_queue);
+		pppox_unbind_sock(sk);
+		sk_raw->sk_data_ready = pppox_sk(sk)->proto.pns.data_ready;
+		sk_raw->sk_backlog_rcv = pppox_sk(sk)->proto.pns.backlog_rcv;
+		sk_raw->sk_user_data = NULL;
+		release_sock(sk_raw);
+		sock_release(sk_raw->sk_socket);
+	}
+
+	sock_orphan(sk);
+	sock->sk = NULL;
+	release_sock(sk);
+	sock_put(sk);
+	return 0;
+}
+
+/******************************************************************************/
+
+static struct proto pppopns_proto = {
+	.name = "PPPOPNS",
+	.owner = THIS_MODULE,
+	.obj_size = sizeof(struct pppox_sock),
+};
+
+static struct proto_ops pppopns_proto_ops = {
+	.family = PF_PPPOX,
+	.owner = THIS_MODULE,
+	.release = pppopns_release,
+	.bind = sock_no_bind,
+	.connect = pppopns_connect,
+	.socketpair = sock_no_socketpair,
+	.accept = sock_no_accept,
+	.getname = sock_no_getname,
+	.poll = sock_no_poll,
+	.ioctl = pppox_ioctl,
+	.listen = sock_no_listen,
+	.shutdown = sock_no_shutdown,
+	.setsockopt = sock_no_setsockopt,
+	.getsockopt = sock_no_getsockopt,
+	.sendmsg = sock_no_sendmsg,
+	.recvmsg = sock_no_recvmsg,
+	.mmap = sock_no_mmap,
+};
+
+static int pppopns_create(struct net *net, struct socket *sock, int kern)
+{
+	struct sock *sk;
+
+	sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppopns_proto, kern);
+	if (!sk)
+		return -ENOMEM;
+
+	sock_init_data(sock, sk);
+	sock->state = SS_UNCONNECTED;
+	sock->ops = &pppopns_proto_ops;
+	sk->sk_protocol = PX_PROTO_OPNS;
+	sk->sk_state = PPPOX_NONE;
+	return 0;
+}
+
+/******************************************************************************/
+
+static struct pppox_proto pppopns_pppox_proto = {
+	.create = pppopns_create,
+	.owner = THIS_MODULE,
+};
+
+static int __init pppopns_init(void)
+{
+	int error;
+
+	error = proto_register(&pppopns_proto, 0);
+	if (error)
+		return error;
+
+	error = register_pppox_proto(PX_PROTO_OPNS, &pppopns_pppox_proto);
+	if (error)
+		proto_unregister(&pppopns_proto);
+	else
+		skb_queue_head_init(&delivery_queue);
+	return error;
+}
+
+static void __exit pppopns_exit(void)
+{
+	unregister_pppox_proto(PX_PROTO_OPNS);
+	proto_unregister(&pppopns_proto);
+}
+
+module_init(pppopns_init);
+module_exit(pppopns_exit);
+
+MODULE_DESCRIPTION("PPP on PPTP Network Server (PPPoPNS)");
+MODULE_AUTHOR("Chia-chi Yeh <chiachi@android.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 88fe38d..d8f81b06 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2029,6 +2029,12 @@
 	int le;
 	int ret;
 
+#ifdef CONFIG_ANDROID_PARANOID_NETWORK
+	if (cmd != TUNGETIFF && !capable(CAP_NET_ADMIN)) {
+		return -EPERM;
+	}
+#endif
+
 	if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == 0x89) {
 		if (copy_from_user(&ifr, argp, ifreq_len))
 			return -EFAULT;
diff --git a/drivers/net/wireless/ti/wlcore/init.c b/drivers/net/wireless/ti/wlcore/init.c
index d0b7734..b7974b4 100644
--- a/drivers/net/wireless/ti/wlcore/init.c
+++ b/drivers/net/wireless/ti/wlcore/init.c
@@ -549,6 +549,11 @@
 {
 	int ret;
 
+	/* Disable filtering */
+	ret = wl1271_acx_group_address_tbl(wl, wlvif, false, NULL, 0);
+	if (ret < 0)
+		return ret;
+
 	ret = wl1271_acx_ap_max_tx_retry(wl, wlvif);
 	if (ret < 0)
 		return ret;
diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c
index 712936f..fbd26ec 100644
--- a/drivers/nfc/fdp/i2c.c
+++ b/drivers/nfc/fdp/i2c.c
@@ -177,6 +177,16 @@
 		/* Packet that contains a length */
 		if (tmp[0] == 0 && tmp[1] == 0) {
 			phy->next_read_size = (tmp[2] << 8) + tmp[3] + 3;
+			/*
+			 * Ensure next_read_size does not exceed sizeof(tmp)
+			 * for reading that many bytes during next iteration
+			 */
+			if (phy->next_read_size > FDP_NCI_I2C_MAX_PAYLOAD) {
+				dev_dbg(&client->dev, "%s: corrupted packet\n",
+					__func__);
+				phy->next_read_size = 5;
+				goto flush;
+			}
 		} else {
 			phy->next_read_size = FDP_NCI_I2C_MIN_PAYLOAD;
 
diff --git a/drivers/nfc/st21nfca/dep.c b/drivers/nfc/st21nfca/dep.c
index 798a32b..2062852 100644
--- a/drivers/nfc/st21nfca/dep.c
+++ b/drivers/nfc/st21nfca/dep.c
@@ -217,7 +217,8 @@
 
 	atr_req = (struct st21nfca_atr_req *)skb->data;
 
-	if (atr_req->length < sizeof(struct st21nfca_atr_req)) {
+	if (atr_req->length < sizeof(struct st21nfca_atr_req) ||
+	    atr_req->length > skb->len) {
 		r = -EPROTO;
 		goto exit;
 	}
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index e9360d5..e37a2a5 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -1063,42 +1063,66 @@
 	return 0;
 }
 
+/*
+ * Convert configs to something easy to use in C code
+ */
+#if defined(CONFIG_CMDLINE_FORCE)
+static const int overwrite_incoming_cmdline = 1;
+static const int read_dt_cmdline;
+static const int concat_cmdline;
+#elif defined(CONFIG_CMDLINE_EXTEND)
+static const int overwrite_incoming_cmdline;
+static const int read_dt_cmdline = 1;
+static const int concat_cmdline = 1;
+#else /* CMDLINE_FROM_BOOTLOADER */
+static const int overwrite_incoming_cmdline;
+static const int read_dt_cmdline = 1;
+static const int concat_cmdline;
+#endif
+
+#ifdef CONFIG_CMDLINE
+static const char *config_cmdline = CONFIG_CMDLINE;
+#else
+static const char *config_cmdline = "";
+#endif
+
 int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
 				     int depth, void *data)
 {
-	int l;
-	const char *p;
+	int l = 0;
+	const char *p = NULL;
+	char *cmdline = data;
 
 	pr_debug("search \"chosen\", depth: %d, uname: %s\n", depth, uname);
 
-	if (depth != 1 || !data ||
+	if (depth != 1 || !cmdline ||
 	    (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0))
 		return 0;
 
 	early_init_dt_check_for_initrd(node);
 
-	/* Retrieve command line */
-	p = of_get_flat_dt_prop(node, "bootargs", &l);
-	if (p != NULL && l > 0)
-		strlcpy(data, p, min((int)l, COMMAND_LINE_SIZE));
+	/* Put CONFIG_CMDLINE in if forced or if data had nothing in it to start */
+	if (overwrite_incoming_cmdline || !cmdline[0])
+		strlcpy(cmdline, config_cmdline, COMMAND_LINE_SIZE);
 
-	/*
-	 * CONFIG_CMDLINE is meant to be a default in case nothing else
-	 * managed to set the command line, unless CONFIG_CMDLINE_FORCE
-	 * is set in which case we override whatever was found earlier.
-	 */
-#ifdef CONFIG_CMDLINE
-#if defined(CONFIG_CMDLINE_EXTEND)
-	strlcat(data, " ", COMMAND_LINE_SIZE);
-	strlcat(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
-#elif defined(CONFIG_CMDLINE_FORCE)
-	strlcpy(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
-#else
-	/* No arguments from boot loader, use kernel's  cmdl*/
-	if (!((char *)data)[0])
-		strlcpy(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
-#endif
-#endif /* CONFIG_CMDLINE */
+	/* Retrieve command line unless forcing */
+	if (read_dt_cmdline)
+		p = of_get_flat_dt_prop(node, "bootargs", &l);
+
+	if (p != NULL && l > 0) {
+		if (concat_cmdline) {
+			int cmdline_len;
+			int copy_len;
+			strlcat(cmdline, " ", COMMAND_LINE_SIZE);
+			cmdline_len = strlen(cmdline);
+			copy_len = COMMAND_LINE_SIZE - cmdline_len - 1;
+			copy_len = min((int)l, copy_len);
+			strncpy(cmdline + cmdline_len, p, copy_len);
+			cmdline[cmdline_len + copy_len] = '\0';
+		} else {
+			strlcpy(cmdline, p, min((int)l, COMMAND_LINE_SIZE));
+		}
+	}
 
 	pr_debug("Command line is: %s\n", (char*)data);
 
diff --git a/drivers/platform/goldfish/Makefile b/drivers/platform/goldfish/Makefile
index d348712..277a820 100644
--- a/drivers/platform/goldfish/Makefile
+++ b/drivers/platform/goldfish/Makefile
@@ -2,4 +2,5 @@
 # Makefile for Goldfish platform specific drivers
 #
 obj-$(CONFIG_GOLDFISH_BUS)	+= pdev_bus.o
-obj-$(CONFIG_GOLDFISH_PIPE)	+= goldfish_pipe.o
+obj-$(CONFIG_GOLDFISH_PIPE)	+= goldfish_pipe_all.o
+goldfish_pipe_all-objs := goldfish_pipe.o goldfish_pipe_v2.o
diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c
index 1aba2c7..91e0a56 100644
--- a/drivers/platform/goldfish/goldfish_pipe.c
+++ b/drivers/platform/goldfish/goldfish_pipe.c
@@ -15,52 +15,11 @@
  *
  */
 
-/* This source file contains the implementation of a special device driver
- * that intends to provide a *very* fast communication channel between the
- * guest system and the QEMU emulator.
- *
- * Usage from the guest is simply the following (error handling simplified):
- *
- *    int  fd = open("/dev/qemu_pipe",O_RDWR);
- *    .... write() or read() through the pipe.
- *
- * This driver doesn't deal with the exact protocol used during the session.
- * It is intended to be as simple as something like:
- *
- *    // do this _just_ after opening the fd to connect to a specific
- *    // emulator service.
- *    const char*  msg = "<pipename>";
- *    if (write(fd, msg, strlen(msg)+1) < 0) {
- *       ... could not connect to <pipename> service
- *       close(fd);
- *    }
- *
- *    // after this, simply read() and write() to communicate with the
- *    // service. Exact protocol details left as an exercise to the reader.
- *
- * This driver is very fast because it doesn't copy any data through
- * intermediate buffers, since the emulator is capable of translating
- * guest user addresses into host ones.
- *
- * Note that we must however ensure that each user page involved in the
- * exchange is properly mapped during a transfer.
+/* This source file contains the implementation of the legacy version of
+ * a goldfish pipe device driver. See goldfish_pipe_v2.c for the current
+ * version.
  */
-
-#include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/spinlock.h>
-#include <linux/miscdevice.h>
-#include <linux/platform_device.h>
-#include <linux/poll.h>
-#include <linux/sched.h>
-#include <linux/bitops.h>
-#include <linux/slab.h>
-#include <linux/io.h>
-#include <linux/goldfish.h>
-#include <linux/dma-mapping.h>
-#include <linux/mm.h>
-#include <linux/acpi.h>
+#include "goldfish_pipe.h"
 
 /*
  * IMPORTANT: The following constants must match the ones used and defined
@@ -110,29 +69,15 @@
 #define PIPE_WAKE_READ         (1 << 1)  /* pipe can now be read from */
 #define PIPE_WAKE_WRITE        (1 << 2)  /* pipe can now be written to */
 
-struct access_params {
-	unsigned long channel;
-	u32 size;
-	unsigned long address;
-	u32 cmd;
-	u32 result;
-	/* reserved for future extension */
-	u32 flags;
-};
+#define MAX_PAGES_TO_GRAB 32
 
-/* The global driver data. Holds a reference to the i/o page used to
- * communicate with the emulator, and a wake queue for blocked tasks
- * waiting to be awoken.
- */
-struct goldfish_pipe_dev {
-	spinlock_t lock;
-	unsigned char __iomem *base;
-	struct access_params *aps;
-	int irq;
-	u32 version;
-};
+#define DEBUG 0
 
-static struct goldfish_pipe_dev   pipe_dev[1];
+#if DEBUG
+#define DPRINT(...) { printk(KERN_ERR __VA_ARGS__); }
+#else
+#define DPRINT(...)
+#endif
 
 /* This data type models a given pipe instance */
 struct goldfish_pipe {
@@ -142,6 +87,15 @@
 	wait_queue_head_t wake_queue;
 };
 
+struct access_params {
+	unsigned long channel;
+	u32 size;
+	unsigned long address;
+	u32 cmd;
+	u32 result;
+	/* reserved for future extension */
+	u32 flags;
+};
 
 /* Bit flags for the 'flags' field */
 enum {
@@ -231,8 +185,10 @@
 	if (valid_batchbuffer_addr(dev, aps)) {
 		dev->aps = aps;
 		return 0;
-	} else
+	} else {
+		devm_kfree(&pdev->dev, aps);
 		return -1;
+	}
 }
 
 /* A value that will not be set by qemu emulator */
@@ -269,6 +225,7 @@
 	struct goldfish_pipe *pipe = filp->private_data;
 	struct goldfish_pipe_dev *dev = pipe->dev;
 	unsigned long address, address_end;
+	struct page* pages[MAX_PAGES_TO_GRAB] = {};
 	int count = 0, ret = -EINVAL;
 
 	/* If the emulator already closed the pipe, no need to go further */
@@ -293,45 +250,61 @@
 
 	while (address < address_end) {
 		unsigned long page_end = (address & PAGE_MASK) + PAGE_SIZE;
-		unsigned long next     = page_end < address_end ? page_end
-								: address_end;
-		unsigned long avail    = next - address;
-		int status, wakeBit;
-		struct page *page;
-
-		/* Either vaddr or paddr depending on the device version */
-		unsigned long xaddr;
+		unsigned long next, avail;
+		int status, wakeBit, page_i, num_contiguous_pages;
+		long first_page, last_page, requested_pages;
+		unsigned long xaddr, xaddr_prev, xaddr_i;
 
 		/*
-		 * We grab the pages on a page-by-page basis in case user
-		 * space gives us a potentially huge buffer but the read only
-		 * returns a small amount, then there's no need to pin that
-		 * much memory to the process.
+		 * Attempt to grab multiple physically contiguous pages.
 		 */
-		down_read(&current->mm->mmap_sem);
-		ret = get_user_pages(address, 1, is_write ? 0 : FOLL_WRITE,
-				&page, NULL);
-		up_read(&current->mm->mmap_sem);
-		if (ret < 0)
-			break;
-
-		if (dev->version) {
-			/* Device version 1 or newer (qemu-android) expects the
-			 * physical address.
-			 */
-			xaddr = page_to_phys(page) | (address & ~PAGE_MASK);
-		} else {
-			/* Device version 0 (classic emulator) expects the
-			 * virtual address.
-			 */
-			xaddr = address;
+		first_page = address & PAGE_MASK;
+		last_page = (address_end - 1) & PAGE_MASK;
+		requested_pages = ((last_page - first_page) >> PAGE_SHIFT) + 1;
+		if (requested_pages > MAX_PAGES_TO_GRAB) {
+			requested_pages = MAX_PAGES_TO_GRAB;
 		}
+		ret = get_user_pages_fast(first_page, requested_pages,
+				!is_write, pages);
+
+		DPRINT("%s: requested pages: %d %d %p\n", __FUNCTION__,
+			ret, requested_pages, first_page);
+		if (ret == 0) {
+			DPRINT("%s: error: (requested pages == 0) (wanted %d)\n",
+					__FUNCTION__, requested_pages);
+			mutex_unlock(&pipe->lock);
+			return ret;
+		}
+		if (ret < 0) {
+			DPRINT("%s: (requested pages < 0) %d \n",
+					__FUNCTION__, requested_pages);
+			mutex_unlock(&pipe->lock);
+			return ret;
+		}
+
+		xaddr = page_to_phys(pages[0]) | (address & ~PAGE_MASK);
+		xaddr_prev = xaddr;
+		num_contiguous_pages = ret == 0 ? 0 : 1;
+		for (page_i = 1; page_i < ret; page_i++) {
+			xaddr_i = page_to_phys(pages[page_i]) | (address & ~PAGE_MASK);
+			if (xaddr_i == xaddr_prev + PAGE_SIZE) {
+				page_end += PAGE_SIZE;
+				xaddr_prev = xaddr_i;
+				num_contiguous_pages++;
+			} else {
+				DPRINT("%s: discontinuous page boundary: %d pages instead\n",
+						__FUNCTION__, page_i);
+				break;
+			}
+		}
+		next = page_end < address_end ? page_end : address_end;
+		avail = next - address;
 
 		/* Now, try to transfer the bytes in the current page */
 		spin_lock_irqsave(&dev->lock, irq_flags);
 		if (access_with_param(dev,
-				is_write ? CMD_WRITE_BUFFER : CMD_READ_BUFFER,
-				xaddr, avail, pipe, &status)) {
+					is_write ? CMD_WRITE_BUFFER : CMD_READ_BUFFER,
+					xaddr, avail, pipe, &status)) {
 			gf_write_ptr(pipe, dev->base + PIPE_REG_CHANNEL,
 				     dev->base + PIPE_REG_CHANNEL_HIGH);
 			writel(avail, dev->base + PIPE_REG_SIZE);
@@ -344,9 +317,13 @@
 		}
 		spin_unlock_irqrestore(&dev->lock, irq_flags);
 
-		if (status > 0 && !is_write)
-			set_page_dirty(page);
-		put_page(page);
+		for (page_i = 0; page_i < ret; page_i++) {
+			if (status > 0 && !is_write &&
+				page_i < num_contiguous_pages) {
+				set_page_dirty(pages[page_i]);
+			}
+			put_page(pages[page_i]);
+		}
 
 		if (status > 0) { /* Correct transfer */
 			count += status;
@@ -368,7 +345,7 @@
 			 */
 			if (status != PIPE_ERROR_AGAIN)
 				pr_info_ratelimited("goldfish_pipe: backend returned error %d on %s\n",
-					status, is_write ? "write" : "read");
+						status, is_write ? "write" : "read");
 			ret = 0;
 			break;
 		}
@@ -378,7 +355,7 @@
 		 * non-blocking mode, just return the error code.
 		 */
 		if (status != PIPE_ERROR_AGAIN ||
-			(filp->f_flags & O_NONBLOCK) != 0) {
+				(filp->f_flags & O_NONBLOCK) != 0) {
 			ret = goldfish_pipe_error_convert(status);
 			break;
 		}
@@ -392,7 +369,7 @@
 
 		/* Tell the emulator we're going to wait for a wake event */
 		goldfish_cmd(pipe,
-			is_write ? CMD_WAKE_ON_WRITE : CMD_WAKE_ON_READ);
+				is_write ? CMD_WAKE_ON_WRITE : CMD_WAKE_ON_READ);
 
 		/* Unlock the pipe, then wait for the wake signal */
 		mutex_unlock(&pipe->lock);
@@ -538,6 +515,8 @@
 
 	pipe->dev = dev;
 	mutex_init(&pipe->lock);
+	DPRINT("%s: call. pipe_dev pipe_dev=0x%lx new_pipe_addr=0x%lx file=0x%lx\n", __FUNCTION__, pipe_dev, pipe, file);
+	// spin lock init, write head of list, i guess
 	init_waitqueue_head(&pipe->wake_queue);
 
 	/*
@@ -560,6 +539,7 @@
 {
 	struct goldfish_pipe *pipe = filp->private_data;
 
+	DPRINT("%s: call. pipe=0x%lx file=0x%lx\n", __FUNCTION__, pipe, filp);
 	/* The guest is closing the channel, so tell the emulator right now */
 	goldfish_cmd(pipe, CMD_CLOSE);
 	kfree(pipe);
@@ -576,98 +556,33 @@
 	.release = goldfish_pipe_release,
 };
 
-static struct miscdevice goldfish_pipe_device = {
+static struct miscdevice goldfish_pipe_dev = {
 	.minor = MISC_DYNAMIC_MINOR,
 	.name = "goldfish_pipe",
 	.fops = &goldfish_pipe_fops,
 };
 
-static int goldfish_pipe_probe(struct platform_device *pdev)
+int goldfish_pipe_device_init_v1(struct platform_device *pdev)
 {
-	int err;
-	struct resource *r;
 	struct goldfish_pipe_dev *dev = pipe_dev;
-
-	/* not thread safe, but this should not happen */
-	WARN_ON(dev->base != NULL);
-
-	spin_lock_init(&dev->lock);
-
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (r == NULL || resource_size(r) < PAGE_SIZE) {
-		dev_err(&pdev->dev, "can't allocate i/o page\n");
-		return -EINVAL;
-	}
-	dev->base = devm_ioremap(&pdev->dev, r->start, PAGE_SIZE);
-	if (dev->base == NULL) {
-		dev_err(&pdev->dev, "ioremap failed\n");
-		return -EINVAL;
-	}
-
-	r = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (r == NULL) {
-		err = -EINVAL;
-		goto error;
-	}
-	dev->irq = r->start;
-
-	err = devm_request_irq(&pdev->dev, dev->irq, goldfish_pipe_interrupt,
+	int err = devm_request_irq(&pdev->dev, dev->irq, goldfish_pipe_interrupt,
 				IRQF_SHARED, "goldfish_pipe", dev);
 	if (err) {
-		dev_err(&pdev->dev, "unable to allocate IRQ\n");
-		goto error;
+		dev_err(&pdev->dev, "unable to allocate IRQ for v1\n");
+		return err;
 	}
 
-	err = misc_register(&goldfish_pipe_device);
+	err = misc_register(&goldfish_pipe_dev);
 	if (err) {
-		dev_err(&pdev->dev, "unable to register device\n");
-		goto error;
+		dev_err(&pdev->dev, "unable to register v1 device\n");
+		return err;
 	}
+
 	setup_access_params_addr(pdev, dev);
-
-	/* Although the pipe device in the classic Android emulator does not
-	 * recognize the 'version' register, it won't treat this as an error
-	 * either and will simply return 0, which is fine.
-	 */
-	dev->version = readl(dev->base + PIPE_REG_VERSION);
 	return 0;
-
-error:
-	dev->base = NULL;
-	return err;
 }
 
-static int goldfish_pipe_remove(struct platform_device *pdev)
+void goldfish_pipe_device_deinit_v1(struct platform_device *pdev)
 {
-	struct goldfish_pipe_dev *dev = pipe_dev;
-	misc_deregister(&goldfish_pipe_device);
-	dev->base = NULL;
-	return 0;
+    misc_deregister(&goldfish_pipe_dev);
 }
-
-static const struct acpi_device_id goldfish_pipe_acpi_match[] = {
-	{ "GFSH0003", 0 },
-	{ },
-};
-MODULE_DEVICE_TABLE(acpi, goldfish_pipe_acpi_match);
-
-static const struct of_device_id goldfish_pipe_of_match[] = {
-	{ .compatible = "google,android-pipe", },
-	{},
-};
-MODULE_DEVICE_TABLE(of, goldfish_pipe_of_match);
-
-static struct platform_driver goldfish_pipe = {
-	.probe = goldfish_pipe_probe,
-	.remove = goldfish_pipe_remove,
-	.driver = {
-		.name = "goldfish_pipe",
-		.owner = THIS_MODULE,
-		.of_match_table = goldfish_pipe_of_match,
-		.acpi_match_table = ACPI_PTR(goldfish_pipe_acpi_match),
-	}
-};
-
-module_platform_driver(goldfish_pipe);
-MODULE_AUTHOR("David Turner <digit@google.com>");
-MODULE_LICENSE("GPL");
diff --git a/drivers/platform/goldfish/goldfish_pipe.h b/drivers/platform/goldfish/goldfish_pipe.h
new file mode 100644
index 0000000..6cd1b63
--- /dev/null
+++ b/drivers/platform/goldfish/goldfish_pipe.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef GOLDFISH_PIPE_H
+#define GOLDFISH_PIPE_H
+
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/miscdevice.h>
+#include <linux/platform_device.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/bitops.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/goldfish.h>
+#include <linux/dma-mapping.h>
+#include <linux/mm.h>
+#include <linux/acpi.h>
+
+
+/* Initialize the legacy version of the pipe device driver */
+int goldfish_pipe_device_init_v1(struct platform_device *pdev);
+
+/* Deinitialize the legacy version of the pipe device driver */
+void goldfish_pipe_device_deinit_v1(struct platform_device *pdev);
+
+/* Forward declarations for the device struct */
+struct goldfish_pipe;
+struct goldfish_pipe_device_buffers;
+
+/* The global driver data. Holds a reference to the i/o page used to
+ * communicate with the emulator, and a wake queue for blocked tasks
+ * waiting to be awoken.
+ */
+struct goldfish_pipe_dev {
+	/*
+	 * Global device spinlock. Protects the following members:
+	 *  - pipes, pipes_capacity
+	 *  - [*pipes, *pipes + pipes_capacity) - array data
+	 *  - first_signalled_pipe,
+	 *      goldfish_pipe::prev_signalled,
+	 *      goldfish_pipe::next_signalled,
+	 *      goldfish_pipe::signalled_flags - all singnalled-related fields,
+	 *                                       in all allocated pipes
+	 *  - open_command_params - PIPE_CMD_OPEN-related buffers
+	 *
+	 * It looks like a lot of different fields, but the trick is that the only
+	 * operation that happens often is the signalled pipes array manipulation.
+	 * That's why it's OK for now to keep the rest of the fields under the same
+	 * lock. If we notice too much contention because of PIPE_CMD_OPEN,
+	 * then we should add a separate lock there.
+	 */
+	spinlock_t lock;
+
+	/*
+	 * Array of the pipes of |pipes_capacity| elements,
+	 * indexed by goldfish_pipe::id
+	 */
+	struct goldfish_pipe **pipes;
+	u32 pipes_capacity;
+
+	/* Pointers to the buffers host uses for interaction with this driver */
+	struct goldfish_pipe_dev_buffers *buffers;
+
+	/* Head of a doubly linked list of signalled pipes */
+	struct goldfish_pipe *first_signalled_pipe;
+
+	/* Some device-specific data */
+	int irq;
+	int version;
+	unsigned char __iomem *base;
+
+	/* v1-specific access parameters */
+	struct access_params *aps;
+};
+
+extern struct goldfish_pipe_dev pipe_dev[1];
+
+#endif /* GOLDFISH_PIPE_H */
diff --git a/drivers/platform/goldfish/goldfish_pipe_v2.c b/drivers/platform/goldfish/goldfish_pipe_v2.c
new file mode 100644
index 0000000..ad373ed
--- /dev/null
+++ b/drivers/platform/goldfish/goldfish_pipe_v2.c
@@ -0,0 +1,889 @@
+/*
+ * Copyright (C) 2012 Intel, Inc.
+ * Copyright (C) 2013 Intel, Inc.
+ * Copyright (C) 2014 Linaro Limited
+ * Copyright (C) 2011-2016 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+/* This source file contains the implementation of a special device driver
+ * that intends to provide a *very* fast communication channel between the
+ * guest system and the QEMU emulator.
+ *
+ * Usage from the guest is simply the following (error handling simplified):
+ *
+ *    int  fd = open("/dev/qemu_pipe",O_RDWR);
+ *    .... write() or read() through the pipe.
+ *
+ * This driver doesn't deal with the exact protocol used during the session.
+ * It is intended to be as simple as something like:
+ *
+ *    // do this _just_ after opening the fd to connect to a specific
+ *    // emulator service.
+ *    const char*  msg = "<pipename>";
+ *    if (write(fd, msg, strlen(msg)+1) < 0) {
+ *       ... could not connect to <pipename> service
+ *       close(fd);
+ *    }
+ *
+ *    // after this, simply read() and write() to communicate with the
+ *    // service. Exact protocol details left as an exercise to the reader.
+ *
+ * This driver is very fast because it doesn't copy any data through
+ * intermediate buffers, since the emulator is capable of translating
+ * guest user addresses into host ones.
+ *
+ * Note that we must however ensure that each user page involved in the
+ * exchange is properly mapped during a transfer.
+ */
+
+#include "goldfish_pipe.h"
+
+
+/*
+ * Update this when something changes in the driver's behavior so the host
+ * can benefit from knowing it
+ */
+enum {
+	PIPE_DRIVER_VERSION = 2,
+	PIPE_CURRENT_DEVICE_VERSION = 2
+};
+
+/*
+ * IMPORTANT: The following constants must match the ones used and defined
+ * in external/qemu/hw/goldfish_pipe.c in the Android source tree.
+ */
+
+/* List of bitflags returned in status of CMD_POLL command */
+enum PipePollFlags {
+	PIPE_POLL_IN	= 1 << 0,
+	PIPE_POLL_OUT	= 1 << 1,
+	PIPE_POLL_HUP	= 1 << 2
+};
+
+/* Possible status values used to signal errors - see goldfish_pipe_error_convert */
+enum PipeErrors {
+	PIPE_ERROR_INVAL  = -1,
+	PIPE_ERROR_AGAIN  = -2,
+	PIPE_ERROR_NOMEM  = -3,
+	PIPE_ERROR_IO     = -4
+};
+
+/* Bit-flags used to signal events from the emulator */
+enum PipeWakeFlags {
+	PIPE_WAKE_CLOSED = 1 << 0,  /* emulator closed pipe */
+	PIPE_WAKE_READ   = 1 << 1,  /* pipe can now be read from */
+	PIPE_WAKE_WRITE  = 1 << 2  /* pipe can now be written to */
+};
+
+/* Bit flags for the 'flags' field */
+enum PipeFlagsBits {
+	BIT_CLOSED_ON_HOST = 0,  /* pipe closed by host */
+	BIT_WAKE_ON_WRITE  = 1,  /* want to be woken on writes */
+	BIT_WAKE_ON_READ   = 2,  /* want to be woken on reads */
+};
+
+enum PipeRegs {
+	PIPE_REG_CMD = 0,
+
+	PIPE_REG_SIGNAL_BUFFER_HIGH = 4,
+	PIPE_REG_SIGNAL_BUFFER = 8,
+	PIPE_REG_SIGNAL_BUFFER_COUNT = 12,
+
+	PIPE_REG_OPEN_BUFFER_HIGH = 20,
+	PIPE_REG_OPEN_BUFFER = 24,
+
+	PIPE_REG_VERSION = 36,
+
+	PIPE_REG_GET_SIGNALLED = 48,
+};
+
+enum PipeCmdCode {
+	PIPE_CMD_OPEN = 1,	/* to be used by the pipe device itself */
+	PIPE_CMD_CLOSE,
+	PIPE_CMD_POLL,
+	PIPE_CMD_WRITE,
+	PIPE_CMD_WAKE_ON_WRITE,
+	PIPE_CMD_READ,
+	PIPE_CMD_WAKE_ON_READ,
+
+	/*
+	 * TODO(zyy): implement a deferred read/write execution to allow parallel
+	 *  processing of pipe operations on the host.
+	*/
+	PIPE_CMD_WAKE_ON_DONE_IO,
+};
+
+enum {
+	MAX_BUFFERS_PER_COMMAND = 336,
+	MAX_SIGNALLED_PIPES = 64,
+	INITIAL_PIPES_CAPACITY = 64
+};
+
+struct goldfish_pipe_dev;
+struct goldfish_pipe;
+struct goldfish_pipe_command;
+
+/* A per-pipe command structure, shared with the host */
+struct goldfish_pipe_command {
+	s32 cmd;		/* PipeCmdCode, guest -> host */
+	s32 id;			/* pipe id, guest -> host */
+	s32 status;		/* command execution status, host -> guest */
+	s32 reserved;	/* to pad to 64-bit boundary */
+	union {
+		/* Parameters for PIPE_CMD_{READ,WRITE} */
+		struct {
+			u32 buffers_count;					/* number of buffers, guest -> host */
+			s32 consumed_size;					/* number of consumed bytes, host -> guest */
+			u64 ptrs[MAX_BUFFERS_PER_COMMAND]; 	/* buffer pointers, guest -> host */
+			u32 sizes[MAX_BUFFERS_PER_COMMAND];	/* buffer sizes, guest -> host */
+		} rw_params;
+	};
+};
+
+/* A single signalled pipe information */
+struct signalled_pipe_buffer {
+	u32 id;
+	u32 flags;
+};
+
+/* Parameters for the PIPE_CMD_OPEN command */
+struct open_command_param {
+	u64 command_buffer_ptr;
+	u32 rw_params_max_count;
+};
+
+/* Device-level set of buffers shared with the host */
+struct goldfish_pipe_dev_buffers {
+	struct open_command_param open_command_params;
+	struct signalled_pipe_buffer signalled_pipe_buffers[MAX_SIGNALLED_PIPES];
+};
+
+/* This data type models a given pipe instance */
+struct goldfish_pipe {
+	u32 id;							/* pipe ID - index into goldfish_pipe_dev::pipes array */
+	unsigned long flags;			/* The wake flags pipe is waiting for
+									 * Note: not protected with any lock, uses atomic operations
+									 *  and barriers to make it thread-safe.
+									 */
+	unsigned long signalled_flags;	/* wake flags host have signalled,
+									 *  - protected by goldfish_pipe_dev::lock */
+
+	struct goldfish_pipe_command *command_buffer;	/* A pointer to command buffer */
+
+	/* doubly linked list of signalled pipes, protected by goldfish_pipe_dev::lock */
+	struct goldfish_pipe *prev_signalled;
+	struct goldfish_pipe *next_signalled;
+
+	/*
+	 * A pipe's own lock. Protects the following:
+	 *  - *command_buffer - makes sure a command can safely write its parameters
+	 *    to the host and read the results back.
+	 */
+	struct mutex lock;
+
+	wait_queue_head_t wake_queue;	/* A wake queue for sleeping until host signals an event */
+	struct goldfish_pipe_dev *dev;	/* Pointer to the parent goldfish_pipe_dev instance */
+};
+
+struct goldfish_pipe_dev pipe_dev[1] = {};
+
+static int goldfish_cmd_locked(struct goldfish_pipe *pipe, enum PipeCmdCode cmd)
+{
+	pipe->command_buffer->cmd = cmd;
+	pipe->command_buffer->status = PIPE_ERROR_INVAL;	/* failure by default */
+	writel(pipe->id, pipe->dev->base + PIPE_REG_CMD);
+	return pipe->command_buffer->status;
+}
+
+static int goldfish_cmd(struct goldfish_pipe *pipe, enum PipeCmdCode cmd)
+{
+	int status;
+	if (mutex_lock_interruptible(&pipe->lock))
+		return PIPE_ERROR_IO;
+	status = goldfish_cmd_locked(pipe, cmd);
+	mutex_unlock(&pipe->lock);
+	return status;
+}
+
+/*
+ * This function converts an error code returned by the emulator through
+ * the PIPE_REG_STATUS i/o register into a valid negative errno value.
+ */
+static int goldfish_pipe_error_convert(int status)
+{
+	switch (status) {
+	case PIPE_ERROR_AGAIN:
+		return -EAGAIN;
+	case PIPE_ERROR_NOMEM:
+		return -ENOMEM;
+	case PIPE_ERROR_IO:
+		return -EIO;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int pin_user_pages(unsigned long first_page, unsigned long last_page,
+	unsigned last_page_size, int is_write,
+	struct page *pages[MAX_BUFFERS_PER_COMMAND], unsigned *iter_last_page_size)
+{
+	int ret;
+	int requested_pages = ((last_page - first_page) >> PAGE_SHIFT) + 1;
+	if (requested_pages > MAX_BUFFERS_PER_COMMAND) {
+		requested_pages = MAX_BUFFERS_PER_COMMAND;
+		*iter_last_page_size = PAGE_SIZE;
+	} else {
+		*iter_last_page_size = last_page_size;
+	}
+
+	ret = get_user_pages_fast(
+			first_page, requested_pages, !is_write, pages);
+	if (ret <= 0)
+		return -EFAULT;
+	if (ret < requested_pages)
+		*iter_last_page_size = PAGE_SIZE;
+	return ret;
+
+}
+
+static void release_user_pages(struct page **pages, int pages_count,
+	int is_write, s32 consumed_size)
+{
+	int i;
+	for (i = 0; i < pages_count; i++) {
+		if (!is_write && consumed_size > 0) {
+			set_page_dirty(pages[i]);
+		}
+		put_page(pages[i]);
+	}
+}
+
+/* Populate the call parameters, merging adjacent pages together */
+static void populate_rw_params(
+	struct page **pages, int pages_count,
+	unsigned long address, unsigned long address_end,
+	unsigned long first_page, unsigned long last_page,
+	unsigned iter_last_page_size, int is_write,
+	struct goldfish_pipe_command *command)
+{
+	/*
+	 * Process the first page separately - it's the only page that
+	 * needs special handling for its start address.
+	 */
+	unsigned long xaddr = page_to_phys(pages[0]);
+	unsigned long xaddr_prev = xaddr;
+	int buffer_idx = 0;
+	int i = 1;
+	int size_on_page = first_page == last_page
+			? (int)(address_end - address)
+			: (PAGE_SIZE - (address & ~PAGE_MASK));
+	command->rw_params.ptrs[0] = (u64)(xaddr | (address & ~PAGE_MASK));
+	command->rw_params.sizes[0] = size_on_page;
+	for (; i < pages_count; ++i) {
+		xaddr = page_to_phys(pages[i]);
+		size_on_page = (i == pages_count - 1) ? iter_last_page_size : PAGE_SIZE;
+		if (xaddr == xaddr_prev + PAGE_SIZE) {
+			command->rw_params.sizes[buffer_idx] += size_on_page;
+		} else {
+			++buffer_idx;
+			command->rw_params.ptrs[buffer_idx] = (u64)xaddr;
+			command->rw_params.sizes[buffer_idx] = size_on_page;
+		}
+		xaddr_prev = xaddr;
+	}
+	command->rw_params.buffers_count = buffer_idx + 1;
+}
+
+static int transfer_max_buffers(struct goldfish_pipe* pipe,
+	unsigned long address, unsigned long address_end, int is_write,
+	unsigned long last_page, unsigned int last_page_size,
+	s32* consumed_size, int* status)
+{
+	struct page *pages[MAX_BUFFERS_PER_COMMAND];
+	unsigned long first_page = address & PAGE_MASK;
+	unsigned int iter_last_page_size;
+	int pages_count = pin_user_pages(first_page, last_page,
+			last_page_size, is_write,
+			pages, &iter_last_page_size);
+	if (pages_count < 0)
+		return pages_count;
+
+	/* Serialize access to the pipe command buffers */
+	if (mutex_lock_interruptible(&pipe->lock))
+		return -ERESTARTSYS;
+
+	populate_rw_params(pages, pages_count, address, address_end,
+		first_page, last_page, iter_last_page_size, is_write,
+		pipe->command_buffer);
+
+	/* Transfer the data */
+	*status = goldfish_cmd_locked(pipe,
+						is_write ? PIPE_CMD_WRITE : PIPE_CMD_READ);
+
+	*consumed_size = pipe->command_buffer->rw_params.consumed_size;
+
+	mutex_unlock(&pipe->lock);
+
+	release_user_pages(pages, pages_count, is_write, *consumed_size);
+
+	return 0;
+}
+
+static int wait_for_host_signal(struct goldfish_pipe *pipe, int is_write)
+{
+	u32 wakeBit = is_write ? BIT_WAKE_ON_WRITE : BIT_WAKE_ON_READ;
+	set_bit(wakeBit, &pipe->flags);
+
+	/* Tell the emulator we're going to wait for a wake event */
+	(void)goldfish_cmd(pipe,
+			is_write ? PIPE_CMD_WAKE_ON_WRITE : PIPE_CMD_WAKE_ON_READ);
+
+	while (test_bit(wakeBit, &pipe->flags)) {
+		if (wait_event_interruptible(
+				pipe->wake_queue,
+				!test_bit(wakeBit, &pipe->flags)))
+			return -ERESTARTSYS;
+
+		if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags))
+			return -EIO;
+	}
+
+	return 0;
+}
+
+static ssize_t goldfish_pipe_read_write(struct file *filp,
+	char __user *buffer, size_t bufflen, int is_write)
+{
+	struct goldfish_pipe *pipe = filp->private_data;
+	int count = 0, ret = -EINVAL;
+	unsigned long address, address_end, last_page;
+	unsigned int last_page_size;
+
+	/* If the emulator already closed the pipe, no need to go further */
+	if (unlikely(test_bit(BIT_CLOSED_ON_HOST, &pipe->flags)))
+		return -EIO;
+	/* Null reads or writes succeeds */
+	if (unlikely(bufflen == 0))
+		return 0;
+	/* Check the buffer range for access */
+	if (unlikely(!access_ok(is_write ? VERIFY_WRITE : VERIFY_READ,
+			buffer, bufflen)))
+		return -EFAULT;
+
+	address = (unsigned long)buffer;
+	address_end = address + bufflen;
+	last_page = (address_end - 1) & PAGE_MASK;
+	last_page_size = ((address_end - 1) & ~PAGE_MASK) + 1;
+
+	while (address < address_end) {
+		s32 consumed_size;
+		int status;
+		ret = transfer_max_buffers(pipe, address, address_end, is_write,
+				last_page, last_page_size, &consumed_size, &status);
+		if (ret < 0)
+			break;
+
+		if (consumed_size > 0) {
+			/* No matter what's the status, we've transfered something */
+			count += consumed_size;
+			address += consumed_size;
+		}
+		if (status > 0)
+			continue;
+		if (status == 0) {
+			/* EOF */
+			ret = 0;
+			break;
+		}
+		if (count > 0) {
+			/*
+			 * An error occured, but we already transfered
+			 * something on one of the previous iterations.
+			 * Just return what we already copied and log this
+			 * err.
+			 */
+			if (status != PIPE_ERROR_AGAIN)
+				pr_info_ratelimited("goldfish_pipe: backend error %d on %s\n",
+									status, is_write ? "write" : "read");
+			break;
+		}
+
+		/*
+		 * If the error is not PIPE_ERROR_AGAIN, or if we are in
+		 * non-blocking mode, just return the error code.
+		 */
+		if (status != PIPE_ERROR_AGAIN || (filp->f_flags & O_NONBLOCK) != 0) {
+			ret = goldfish_pipe_error_convert(status);
+			break;
+		}
+
+		status = wait_for_host_signal(pipe, is_write);
+		if (status < 0)
+			return status;
+	}
+
+	if (count > 0)
+		return count;
+	return ret;
+}
+
+static ssize_t goldfish_pipe_read(struct file *filp, char __user *buffer,
+				size_t bufflen, loff_t *ppos)
+{
+	return goldfish_pipe_read_write(filp, buffer, bufflen, /* is_write */ 0);
+}
+
+static ssize_t goldfish_pipe_write(struct file *filp,
+				const char __user *buffer, size_t bufflen,
+				loff_t *ppos)
+{
+	return goldfish_pipe_read_write(filp,
+			/* cast away the const */(char __user *)buffer, bufflen,
+			/* is_write */ 1);
+}
+
+static unsigned int goldfish_pipe_poll(struct file *filp, poll_table *wait)
+{
+	struct goldfish_pipe *pipe = filp->private_data;
+	unsigned int mask = 0;
+	int status;
+
+	poll_wait(filp, &pipe->wake_queue, wait);
+
+	status = goldfish_cmd(pipe, PIPE_CMD_POLL);
+	if (status < 0) {
+		return -ERESTARTSYS;
+	}
+
+	if (status & PIPE_POLL_IN)
+		mask |= POLLIN | POLLRDNORM;
+	if (status & PIPE_POLL_OUT)
+		mask |= POLLOUT | POLLWRNORM;
+	if (status & PIPE_POLL_HUP)
+		mask |= POLLHUP;
+	if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags))
+		mask |= POLLERR;
+
+	return mask;
+}
+
+static void signalled_pipes_add_locked(struct goldfish_pipe_dev *dev,
+	u32 id, u32 flags)
+{
+	struct goldfish_pipe *pipe;
+
+	BUG_ON(id >= dev->pipes_capacity);
+
+	pipe = dev->pipes[id];
+	if (!pipe)
+		return;
+	pipe->signalled_flags |= flags;
+
+	if (pipe->prev_signalled || pipe->next_signalled
+		|| dev->first_signalled_pipe == pipe)
+		return;	/* already in the list */
+	pipe->next_signalled = dev->first_signalled_pipe;
+	if (dev->first_signalled_pipe) {
+		dev->first_signalled_pipe->prev_signalled = pipe;
+	}
+	dev->first_signalled_pipe = pipe;
+}
+
+static void signalled_pipes_remove_locked(struct goldfish_pipe_dev *dev,
+	struct goldfish_pipe *pipe) {
+	if (pipe->prev_signalled)
+		pipe->prev_signalled->next_signalled = pipe->next_signalled;
+	if (pipe->next_signalled)
+		pipe->next_signalled->prev_signalled = pipe->prev_signalled;
+	if (pipe == dev->first_signalled_pipe)
+		dev->first_signalled_pipe = pipe->next_signalled;
+	pipe->prev_signalled = NULL;
+	pipe->next_signalled = NULL;
+}
+
+static struct goldfish_pipe *signalled_pipes_pop_front(struct goldfish_pipe_dev *dev,
+		int *wakes)
+{
+	struct goldfish_pipe *pipe;
+	unsigned long flags;
+	spin_lock_irqsave(&dev->lock, flags);
+
+	pipe = dev->first_signalled_pipe;
+	if (pipe) {
+		*wakes = pipe->signalled_flags;
+		pipe->signalled_flags = 0;
+		/*
+		 * This is an optimized version of signalled_pipes_remove_locked() -
+		 * we want to make it as fast as possible to wake the sleeping pipe
+		 * operations faster
+		 */
+		dev->first_signalled_pipe = pipe->next_signalled;
+		if (dev->first_signalled_pipe)
+			dev->first_signalled_pipe->prev_signalled = NULL;
+		pipe->next_signalled = NULL;
+	}
+
+	spin_unlock_irqrestore(&dev->lock, flags);
+	return pipe;
+}
+
+static void goldfish_interrupt_task(unsigned long unused)
+{
+	struct goldfish_pipe_dev *dev = pipe_dev;
+	/* Iterate over the signalled pipes and wake them one by one */
+	struct goldfish_pipe *pipe;
+	int wakes;
+	while ((pipe = signalled_pipes_pop_front(dev, &wakes)) != NULL) {
+		if (wakes & PIPE_WAKE_CLOSED) {
+			pipe->flags = 1 << BIT_CLOSED_ON_HOST;
+		} else {
+			if (wakes & PIPE_WAKE_READ)
+				clear_bit(BIT_WAKE_ON_READ, &pipe->flags);
+			if (wakes & PIPE_WAKE_WRITE)
+				clear_bit(BIT_WAKE_ON_WRITE, &pipe->flags);
+		}
+		/*
+		 * wake_up_interruptible() implies a write barrier, so don't explicitly
+		 * add another one here.
+		 */
+		wake_up_interruptible(&pipe->wake_queue);
+	}
+}
+DECLARE_TASKLET(goldfish_interrupt_tasklet, goldfish_interrupt_task, 0);
+
+/*
+ * The general idea of the interrupt handling:
+ *
+ *  1. device raises an interrupt if there's at least one signalled pipe
+ *  2. IRQ handler reads the signalled pipes and their count from the device
+ *  3. device writes them into a shared buffer and returns the count
+ *      it only resets the IRQ if it has returned all signalled pipes,
+ *      otherwise it leaves it raised, so IRQ handler will be called
+ *      again for the next chunk
+ *  4. IRQ handler adds all returned pipes to the device's signalled pipes list
+ *  5. IRQ handler launches a tasklet to process the signalled pipes from the
+ *      list in a separate context
+ */
+static irqreturn_t goldfish_pipe_interrupt(int irq, void *dev_id)
+{
+	u32 count;
+	u32 i;
+	unsigned long flags;
+	struct goldfish_pipe_dev *dev = dev_id;
+	if (dev != pipe_dev)
+		return IRQ_NONE;
+
+	/* Request the signalled pipes from the device */
+	spin_lock_irqsave(&dev->lock, flags);
+
+	count = readl(dev->base + PIPE_REG_GET_SIGNALLED);
+	if (count == 0) {
+		spin_unlock_irqrestore(&dev->lock, flags);
+		return IRQ_NONE;
+	}
+	if (count > MAX_SIGNALLED_PIPES)
+		count = MAX_SIGNALLED_PIPES;
+
+	for (i = 0; i < count; ++i)
+		signalled_pipes_add_locked(dev,
+			dev->buffers->signalled_pipe_buffers[i].id,
+			dev->buffers->signalled_pipe_buffers[i].flags);
+
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	tasklet_schedule(&goldfish_interrupt_tasklet);
+	return IRQ_HANDLED;
+}
+
+static int get_free_pipe_id_locked(struct goldfish_pipe_dev *dev)
+{
+	int id;
+	for (id = 0; id < dev->pipes_capacity; ++id)
+		if (!dev->pipes[id])
+			return id;
+
+	{
+		/* Reallocate the array */
+		u32 new_capacity = 2 * dev->pipes_capacity;
+		struct goldfish_pipe **pipes =
+				kcalloc(new_capacity, sizeof(*pipes),
+					GFP_ATOMIC);
+		if (!pipes)
+			return -ENOMEM;
+		memcpy(pipes, dev->pipes, sizeof(*pipes) * dev->pipes_capacity);
+		kfree(dev->pipes);
+		dev->pipes = pipes;
+		id = dev->pipes_capacity;
+		dev->pipes_capacity = new_capacity;
+	}
+	return id;
+}
+
+/**
+ *	goldfish_pipe_open - open a channel to the AVD
+ *	@inode: inode of device
+ *	@file: file struct of opener
+ *
+ *	Create a new pipe link between the emulator and the use application.
+ *	Each new request produces a new pipe.
+ *
+ *	Note: we use the pipe ID as a mux. All goldfish emulations are 32bit
+ *	right now so this is fine. A move to 64bit will need this addressing
+ */
+static int goldfish_pipe_open(struct inode *inode, struct file *file)
+{
+	struct goldfish_pipe_dev *dev = pipe_dev;
+	unsigned long flags;
+	int id;
+	int status;
+
+	/* Allocate new pipe kernel object */
+	struct goldfish_pipe *pipe = kzalloc(sizeof(*pipe), GFP_KERNEL);
+	if (pipe == NULL)
+		return -ENOMEM;
+
+	pipe->dev = dev;
+	mutex_init(&pipe->lock);
+	init_waitqueue_head(&pipe->wake_queue);
+
+	/*
+	 * Command buffer needs to be allocated on its own page to make sure it is
+	 * physically contiguous in host's address space.
+	 */
+	pipe->command_buffer =
+			(struct goldfish_pipe_command*)__get_free_page(GFP_KERNEL);
+	if (!pipe->command_buffer) {
+		status = -ENOMEM;
+		goto err_pipe;
+	}
+
+	spin_lock_irqsave(&dev->lock, flags);
+
+	id = get_free_pipe_id_locked(dev);
+	if (id < 0) {
+		status = id;
+		goto err_id_locked;
+	}
+
+	dev->pipes[id] = pipe;
+	pipe->id = id;
+	pipe->command_buffer->id = id;
+
+	/* Now tell the emulator we're opening a new pipe. */
+	dev->buffers->open_command_params.rw_params_max_count =
+			MAX_BUFFERS_PER_COMMAND;
+	dev->buffers->open_command_params.command_buffer_ptr =
+			(u64)(unsigned long)__pa(pipe->command_buffer);
+	status = goldfish_cmd_locked(pipe, PIPE_CMD_OPEN);
+	spin_unlock_irqrestore(&dev->lock, flags);
+	if (status < 0)
+		goto err_cmd;
+	/* All is done, save the pipe into the file's private data field */
+	file->private_data = pipe;
+	return 0;
+
+err_cmd:
+	spin_lock_irqsave(&dev->lock, flags);
+	dev->pipes[id] = NULL;
+err_id_locked:
+	spin_unlock_irqrestore(&dev->lock, flags);
+	free_page((unsigned long)pipe->command_buffer);
+err_pipe:
+	kfree(pipe);
+	return status;
+}
+
+static int goldfish_pipe_release(struct inode *inode, struct file *filp)
+{
+	unsigned long flags;
+	struct goldfish_pipe *pipe = filp->private_data;
+	struct goldfish_pipe_dev *dev = pipe->dev;
+
+	/* The guest is closing the channel, so tell the emulator right now */
+	(void)goldfish_cmd(pipe, PIPE_CMD_CLOSE);
+
+	spin_lock_irqsave(&dev->lock, flags);
+	dev->pipes[pipe->id] = NULL;
+	signalled_pipes_remove_locked(dev, pipe);
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	filp->private_data = NULL;
+	free_page((unsigned long)pipe->command_buffer);
+	kfree(pipe);
+	return 0;
+}
+
+static const struct file_operations goldfish_pipe_fops = {
+	.owner = THIS_MODULE,
+	.read = goldfish_pipe_read,
+	.write = goldfish_pipe_write,
+	.poll = goldfish_pipe_poll,
+	.open = goldfish_pipe_open,
+	.release = goldfish_pipe_release,
+};
+
+static struct miscdevice goldfish_pipe_dev = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "goldfish_pipe",
+	.fops = &goldfish_pipe_fops,
+};
+
+static int goldfish_pipe_device_init_v2(struct platform_device *pdev)
+{
+	char *page;
+	struct goldfish_pipe_dev *dev = pipe_dev;
+	int err = devm_request_irq(&pdev->dev, dev->irq, goldfish_pipe_interrupt,
+				IRQF_SHARED, "goldfish_pipe", dev);
+	if (err) {
+		dev_err(&pdev->dev, "unable to allocate IRQ for v2\n");
+		return err;
+	}
+
+	err = misc_register(&goldfish_pipe_dev);
+	if (err) {
+		dev_err(&pdev->dev, "unable to register v2 device\n");
+		return err;
+	}
+
+	dev->first_signalled_pipe = NULL;
+	dev->pipes_capacity = INITIAL_PIPES_CAPACITY;
+	dev->pipes = kcalloc(dev->pipes_capacity, sizeof(*dev->pipes), GFP_KERNEL);
+	if (!dev->pipes)
+		return -ENOMEM;
+
+	/*
+	 * We're going to pass two buffers, open_command_params and
+	 * signalled_pipe_buffers, to the host. This means each of those buffers
+	 * needs to be contained in a single physical page. The easiest choice is
+	 * to just allocate a page and place the buffers in it.
+	 */
+	BUG_ON(sizeof(*dev->buffers) > PAGE_SIZE);
+	page = (char*)__get_free_page(GFP_KERNEL);
+	if (!page) {
+		kfree(dev->pipes);
+		return -ENOMEM;
+	}
+	dev->buffers = (struct goldfish_pipe_dev_buffers*)page;
+
+	/* Send the buffer addresses to the host */
+	{
+		u64 paddr = __pa(&dev->buffers->signalled_pipe_buffers);
+		writel((u32)(unsigned long)(paddr >> 32), dev->base + PIPE_REG_SIGNAL_BUFFER_HIGH);
+		writel((u32)(unsigned long)paddr, dev->base + PIPE_REG_SIGNAL_BUFFER);
+		writel((u32)MAX_SIGNALLED_PIPES, dev->base + PIPE_REG_SIGNAL_BUFFER_COUNT);
+
+		paddr = __pa(&dev->buffers->open_command_params);
+		writel((u32)(unsigned long)(paddr >> 32), dev->base + PIPE_REG_OPEN_BUFFER_HIGH);
+		writel((u32)(unsigned long)paddr, dev->base + PIPE_REG_OPEN_BUFFER);
+	}
+	return 0;
+}
+
+static void goldfish_pipe_device_deinit_v2(struct platform_device *pdev) {
+	struct goldfish_pipe_dev *dev = pipe_dev;
+	misc_deregister(&goldfish_pipe_dev);
+	kfree(dev->pipes);
+	free_page((unsigned long)dev->buffers);
+}
+
+static int goldfish_pipe_probe(struct platform_device *pdev)
+{
+	int err;
+	struct resource *r;
+	struct goldfish_pipe_dev *dev = pipe_dev;
+
+	BUG_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE);
+
+	/* not thread safe, but this should not happen */
+	WARN_ON(dev->base != NULL);
+
+	spin_lock_init(&dev->lock);
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (r == NULL || resource_size(r) < PAGE_SIZE) {
+		dev_err(&pdev->dev, "can't allocate i/o page\n");
+		return -EINVAL;
+	}
+	dev->base = devm_ioremap(&pdev->dev, r->start, PAGE_SIZE);
+	if (dev->base == NULL) {
+		dev_err(&pdev->dev, "ioremap failed\n");
+		return -EINVAL;
+	}
+
+	r = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (r == NULL) {
+		err = -EINVAL;
+		goto error;
+	}
+	dev->irq = r->start;
+
+	/*
+	 * Exchange the versions with the host device
+	 *
+	 * Note: v1 driver used to not report its version, so we write it before
+	 *  reading device version back: this allows the host implementation to
+	 *  detect the old driver (if there was no version write before read).
+	 */
+	writel((u32)PIPE_DRIVER_VERSION, dev->base + PIPE_REG_VERSION);
+	dev->version = readl(dev->base + PIPE_REG_VERSION);
+	if (dev->version < PIPE_CURRENT_DEVICE_VERSION) {
+		/* initialize the old device version */
+		err = goldfish_pipe_device_init_v1(pdev);
+	} else {
+		/* Host device supports the new interface */
+		err = goldfish_pipe_device_init_v2(pdev);
+	}
+	if (!err)
+		return 0;
+
+error:
+	dev->base = NULL;
+	return err;
+}
+
+static int goldfish_pipe_remove(struct platform_device *pdev)
+{
+	struct goldfish_pipe_dev *dev = pipe_dev;
+	if (dev->version < PIPE_CURRENT_DEVICE_VERSION)
+		goldfish_pipe_device_deinit_v1(pdev);
+	else
+		goldfish_pipe_device_deinit_v2(pdev);
+	dev->base = NULL;
+	return 0;
+}
+
+static const struct acpi_device_id goldfish_pipe_acpi_match[] = {
+	{ "GFSH0003", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, goldfish_pipe_acpi_match);
+
+static const struct of_device_id goldfish_pipe_of_match[] = {
+	{ .compatible = "google,android-pipe", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, goldfish_pipe_of_match);
+
+static struct platform_driver goldfish_pipe_driver = {
+	.probe = goldfish_pipe_probe,
+	.remove = goldfish_pipe_remove,
+	.driver = {
+		.name = "goldfish_pipe",
+		.of_match_table = goldfish_pipe_of_match,
+		.acpi_match_table = ACPI_PTR(goldfish_pipe_acpi_match),
+	}
+};
+
+module_platform_driver(goldfish_pipe_driver);
+MODULE_AUTHOR("David Turner <digit@google.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c
index bcde8d1..fdb824f 100644
--- a/drivers/power/supply/power_supply_sysfs.c
+++ b/drivers/power/supply/power_supply_sysfs.c
@@ -107,7 +107,10 @@
 	else if (off >= POWER_SUPPLY_PROP_MODEL_NAME)
 		return sprintf(buf, "%s\n", value.strval);
 
-	return sprintf(buf, "%d\n", value.intval);
+	if (off == POWER_SUPPLY_PROP_CHARGE_COUNTER_EXT)
+		return sprintf(buf, "%lld\n", value.int64val);
+	else
+		return sprintf(buf, "%d\n", value.intval);
 }
 
 static ssize_t power_supply_store_property(struct device *dev,
@@ -198,6 +201,12 @@
 	POWER_SUPPLY_ATTR(scope),
 	POWER_SUPPLY_ATTR(charge_term_current),
 	POWER_SUPPLY_ATTR(calibrate),
+	/* Local extensions */
+	POWER_SUPPLY_ATTR(usb_hc),
+	POWER_SUPPLY_ATTR(usb_otg),
+	POWER_SUPPLY_ATTR(charge_enabled),
+	/* Local extensions of type int64_t */
+	POWER_SUPPLY_ATTR(charge_counter_ext),
 	/* Properties of type `const char *' */
 	POWER_SUPPLY_ATTR(model_name),
 	POWER_SUPPLY_ATTR(manufacturer),
diff --git a/drivers/rtc/rtc-palmas.c b/drivers/rtc/rtc-palmas.c
index 4bcfb88..34aea38 100644
--- a/drivers/rtc/rtc-palmas.c
+++ b/drivers/rtc/rtc-palmas.c
@@ -45,6 +45,42 @@
 /* Total number of RTC registers needed to set time*/
 #define PALMAS_NUM_TIME_REGS	(PALMAS_YEARS_REG - PALMAS_SECONDS_REG + 1)
 
+/*
+ * Special bin2bcd mapping to deal with bcd storage of year.
+ *
+ *   0-69                -> 0xD0
+ *  70-99  (1970 - 1999) -> 0xD0 - 0xF9 (correctly rolls to 0x00)
+ * 100-199 (2000 - 2099) -> 0x00 - 0x99 (does not roll to 0xA0 :-( )
+ * 200-229 (2100 - 2129) -> 0xA0 - 0xC9 (really for completeness)
+ * 230-                  -> 0xC9
+ *
+ * Confirmed: the only transition that does not work correctly for this rtc
+ * clock is the transition from 2099 to 2100, it proceeds to 2000. We will
+ * accept this issue since the clock retains and transitions the year correctly
+ * in all other conditions.
+ */
+static unsigned char year_bin2bcd(int val)
+{
+	if (val < 70)
+		return 0xD0;
+	if (val < 100)
+		return bin2bcd(val - 20) | 0x80; /* KISS leverage of bin2bcd */
+	if (val >= 230)
+		return 0xC9;
+	if (val >= 200)
+		return bin2bcd(val - 180) | 0x80;
+	return bin2bcd(val - 100);
+}
+
+static int year_bcd2bin(unsigned char val)
+{
+	if (val >= 0xD0)
+		return bcd2bin(val & 0x7F) + 20;
+	if (val >= 0xA0)
+		return bcd2bin(val & 0x7F) + 180;
+	return bcd2bin(val) + 100;
+}
+
 static int palmas_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
 	unsigned char rtc_data[PALMAS_NUM_TIME_REGS];
@@ -71,7 +107,7 @@
 	tm->tm_hour = bcd2bin(rtc_data[2]);
 	tm->tm_mday = bcd2bin(rtc_data[3]);
 	tm->tm_mon = bcd2bin(rtc_data[4]) - 1;
-	tm->tm_year = bcd2bin(rtc_data[5]) + 100;
+	tm->tm_year = year_bcd2bin(rtc_data[5]);
 
 	return ret;
 }
@@ -87,7 +123,7 @@
 	rtc_data[2] = bin2bcd(tm->tm_hour);
 	rtc_data[3] = bin2bcd(tm->tm_mday);
 	rtc_data[4] = bin2bcd(tm->tm_mon + 1);
-	rtc_data[5] = bin2bcd(tm->tm_year - 100);
+	rtc_data[5] = year_bin2bcd(tm->tm_year);
 
 	/* Stop RTC while updating the RTC time registers */
 	ret = palmas_update_bits(palmas, PALMAS_RTC_BASE, PALMAS_RTC_CTRL_REG,
@@ -142,7 +178,7 @@
 	alm->time.tm_hour = bcd2bin(alarm_data[2]);
 	alm->time.tm_mday = bcd2bin(alarm_data[3]);
 	alm->time.tm_mon = bcd2bin(alarm_data[4]) - 1;
-	alm->time.tm_year = bcd2bin(alarm_data[5]) + 100;
+	alm->time.tm_year = year_bcd2bin(alarm_data[5]);
 
 	ret = palmas_read(palmas, PALMAS_RTC_BASE, PALMAS_RTC_INTERRUPTS_REG,
 			&int_val);
@@ -173,7 +209,7 @@
 	alarm_data[2] = bin2bcd(alm->time.tm_hour);
 	alarm_data[3] = bin2bcd(alm->time.tm_mday);
 	alarm_data[4] = bin2bcd(alm->time.tm_mon + 1);
-	alarm_data[5] = bin2bcd(alm->time.tm_year - 100);
+	alarm_data[5] = year_bin2bcd(alm->time.tm_year);
 
 	ret = palmas_bulk_write(palmas, PALMAS_RTC_BASE,
 		PALMAS_ALARM_SECONDS_REG, alarm_data, PALMAS_NUM_TIME_REGS);
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 0b85841..be5c5e0 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -41,6 +41,7 @@
 #include <linux/devfreq.h>
 #include <linux/nls.h>
 #include <linux/of.h>
+#include <linux/blkdev.h>
 #include "ufshcd.h"
 #include "ufs_quirks.h"
 #include "unipro.h"
@@ -1490,6 +1491,17 @@
 		clear_bit_unlock(tag, &hba->lrb_in_use);
 		goto out;
 	}
+
+	/* IO svc time latency histogram */
+	if (hba != NULL && cmd->request != NULL) {
+		if (hba->latency_hist_enabled &&
+		    (cmd->request->cmd_type == REQ_TYPE_FS)) {
+			cmd->request->lat_hist_io_start = ktime_get();
+			cmd->request->lat_hist_enabled = 1;
+		} else
+			cmd->request->lat_hist_enabled = 0;
+	}
+
 	WARN_ON(hba->clk_gating.state != CLKS_ON);
 
 	lrbp = &hba->lrb[tag];
@@ -3698,6 +3710,7 @@
 	struct scsi_cmnd *cmd;
 	int result;
 	int index;
+	struct request *req;
 
 	for_each_set_bit(index, &completed_reqs, hba->nutrs) {
 		lrbp = &hba->lrb[index];
@@ -3709,6 +3722,22 @@
 			/* Mark completed command as NULL in LRB */
 			lrbp->cmd = NULL;
 			clear_bit_unlock(index, &hba->lrb_in_use);
+			req = cmd->request;
+			if (req) {
+				/* Update IO svc time latency histogram */
+				if (req->lat_hist_enabled) {
+					ktime_t completion;
+					u_int64_t delta_us;
+
+					completion = ktime_get();
+					delta_us = ktime_us_delta(completion,
+						  req->lat_hist_io_start);
+					/* rq_data_dir() => true if WRITE */
+					blk_update_latency_hist(&hba->io_lat_s,
+						(rq_data_dir(req) == READ),
+						delta_us);
+				}
+			}
 			/* Do not touch lrbp after scsi done */
 			cmd->scsi_done(cmd);
 			__ufshcd_release(hba);
@@ -6497,6 +6526,54 @@
 }
 EXPORT_SYMBOL(ufshcd_shutdown);
 
+/*
+ * Values permitted 0, 1, 2.
+ * 0 -> Disable IO latency histograms (default)
+ * 1 -> Enable IO latency histograms
+ * 2 -> Zero out IO latency histograms
+ */
+static ssize_t
+latency_hist_store(struct device *dev, struct device_attribute *attr,
+		   const char *buf, size_t count)
+{
+	struct ufs_hba *hba = dev_get_drvdata(dev);
+	long value;
+
+	if (kstrtol(buf, 0, &value))
+		return -EINVAL;
+	if (value == BLK_IO_LAT_HIST_ZERO)
+		blk_zero_latency_hist(&hba->io_lat_s);
+	else if (value == BLK_IO_LAT_HIST_ENABLE ||
+		 value == BLK_IO_LAT_HIST_DISABLE)
+		hba->latency_hist_enabled = value;
+	return count;
+}
+
+ssize_t
+latency_hist_show(struct device *dev, struct device_attribute *attr,
+		  char *buf)
+{
+	struct ufs_hba *hba = dev_get_drvdata(dev);
+
+	return blk_latency_hist_show(&hba->io_lat_s, buf);
+}
+
+static DEVICE_ATTR(latency_hist, S_IRUGO | S_IWUSR,
+		   latency_hist_show, latency_hist_store);
+
+static void
+ufshcd_init_latency_hist(struct ufs_hba *hba)
+{
+	if (device_create_file(hba->dev, &dev_attr_latency_hist))
+		dev_err(hba->dev, "Failed to create latency_hist sysfs entry\n");
+}
+
+static void
+ufshcd_exit_latency_hist(struct ufs_hba *hba)
+{
+	device_create_file(hba->dev, &dev_attr_latency_hist);
+}
+
 /**
  * ufshcd_remove - de-allocate SCSI host and host memory space
  *		data structure memory
@@ -6510,6 +6587,7 @@
 	ufshcd_hba_stop(hba, true);
 
 	ufshcd_exit_clk_gating(hba);
+	ufshcd_exit_latency_hist(hba);
 	if (ufshcd_is_clkscaling_enabled(hba))
 		devfreq_remove_device(hba->devfreq);
 	ufshcd_hba_exit(hba);
@@ -6858,6 +6936,8 @@
 	/* Hold auto suspend until async scan completes */
 	pm_runtime_get_sync(dev);
 
+	ufshcd_init_latency_hist(hba);
+
 	/*
 	 * We are assuming that device wasn't put in sleep/power-down
 	 * state exclusively during the boot stage before kernel.
@@ -6874,6 +6954,7 @@
 	scsi_remove_host(hba->host);
 exit_gating:
 	ufshcd_exit_clk_gating(hba);
+	ufshcd_exit_latency_hist(hba);
 out_disable:
 	hba->is_irq_enabled = false;
 	ufshcd_hba_exit(hba);
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 6dbd2e1..845812d 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -575,6 +575,9 @@
 	bool is_urgent_bkops_lvl_checked;
 
 	struct ufs_desc_size desc_size;
+
+	int latency_hist_enabled;
+	struct io_latency_state io_lat_s;
 };
 
 /* Returns true if clocks can be gated. Otherwise false */
diff --git a/drivers/staging/android/Kconfig b/drivers/staging/android/Kconfig
index 6c00d6f..a17c483 100644
--- a/drivers/staging/android/Kconfig
+++ b/drivers/staging/android/Kconfig
@@ -24,8 +24,28 @@
 	  scripts (/init.rc), and it defines priority values with minimum free memory size
 	  for each priority.
 
+config ANDROID_LOW_MEMORY_KILLER_AUTODETECT_OOM_ADJ_VALUES
+	bool "Android Low Memory Killer: detect oom_adj values"
+	depends on ANDROID_LOW_MEMORY_KILLER
+	default y
+	---help---
+	  Detect oom_adj values written to
+	  /sys/module/lowmemorykiller/parameters/adj and convert them
+	  to oom_score_adj values.
+
+config ANDROID_VSOC
+	tristate "Android Virtual SoC support"
+	default n
+	depends on PCI_MSI
+	---help---
+	  This option adds support for the Virtual SoC driver needed to boot
+	  a 'cuttlefish' Android image inside QEmu. The driver interacts with
+	  a QEmu ivshmem device. If built as a module, it will be called vsoc.
+
 source "drivers/staging/android/ion/Kconfig"
 
+source "drivers/staging/android/fiq_debugger/Kconfig"
+
 endif # if ANDROID
 
 endmenu
diff --git a/drivers/staging/android/Makefile b/drivers/staging/android/Makefile
index 7ed1be7..93c5f5a 100644
--- a/drivers/staging/android/Makefile
+++ b/drivers/staging/android/Makefile
@@ -1,6 +1,8 @@
 ccflags-y += -I$(src)			# needed for trace events
 
 obj-y					+= ion/
+obj-$(CONFIG_FIQ_DEBUGGER)		+= fiq_debugger/
 
 obj-$(CONFIG_ASHMEM)			+= ashmem.o
 obj-$(CONFIG_ANDROID_LOW_MEMORY_KILLER)	+= lowmemorykiller.o
+obj-$(CONFIG_ANDROID_VSOC)		+= vsoc.o
diff --git a/drivers/staging/android/TODO b/drivers/staging/android/TODO
index 64d8c87..edfb680 100644
--- a/drivers/staging/android/TODO
+++ b/drivers/staging/android/TODO
@@ -33,5 +33,14 @@
  - clean up and ABI check for security issues
  - move it to drivers/base/dma-buf
 
+vsoc.c, uapi/vsoc_shm.h
+ - The current driver uses the same wait queue for all of the futexes in a
+   region. This will cause false wakeups in regions with a large number of
+   waiting threads. We should eventually use multiple queues and select the
+   queue based on the region.
+ - Add debugfs support for examining the permissions of regions.
+ - Remove VSOC_WAIT_FOR_INCOMING_INTERRUPT ioctl. This functionality has been
+   superseded by the futex and is there for legacy reasons.
+
 Please send patches to Greg Kroah-Hartman <greg@kroah.com> and Cc:
 Arve Hjønnevåg <arve@android.com> and Riley Andrews <riandrews@android.com>
diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
index c6314d1..5af176b 100644
--- a/drivers/staging/android/ashmem.c
+++ b/drivers/staging/android/ashmem.c
@@ -415,22 +415,14 @@
 	}
 	get_file(asma->file);
 
-	/*
-	 * XXX - Reworked to use shmem_zero_setup() instead of
-	 * shmem_set_file while we're in staging. -jstultz
-	 */
-	if (vma->vm_flags & VM_SHARED) {
-		ret = shmem_zero_setup(vma);
-		if (ret) {
-			fput(asma->file);
-			goto out;
-		}
+	if (vma->vm_flags & VM_SHARED)
+		shmem_set_file(vma, asma->file);
+	else {
+		if (vma->vm_file)
+			fput(vma->vm_file);
+		vma->vm_file = asma->file;
 	}
 
-	if (vma->vm_file)
-		fput(vma->vm_file);
-	vma->vm_file = asma->file;
-
 out:
 	mutex_unlock(&ashmem_mutex);
 	return ret;
@@ -467,9 +459,9 @@
 		loff_t start = range->pgstart * PAGE_SIZE;
 		loff_t end = (range->pgend + 1) * PAGE_SIZE;
 
-		vfs_fallocate(range->asma->file,
-			      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
-			      start, end - start);
+		range->asma->file->f_op->fallocate(range->asma->file,
+				FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+				start, end - start);
 		range->purged = ASHMEM_WAS_PURGED;
 		lru_del(range);
 
diff --git a/drivers/staging/android/fiq_debugger/Kconfig b/drivers/staging/android/fiq_debugger/Kconfig
new file mode 100644
index 0000000..60fc224
--- /dev/null
+++ b/drivers/staging/android/fiq_debugger/Kconfig
@@ -0,0 +1,58 @@
+config FIQ_DEBUGGER
+	bool "FIQ Mode Serial Debugger"
+	default n
+	depends on ARM || ARM64
+	help
+	  The FIQ serial debugger can accept commands even when the
+	  kernel is unresponsive due to being stuck with interrupts
+	  disabled.
+
+config FIQ_DEBUGGER_NO_SLEEP
+	bool "Keep serial debugger active"
+	depends on FIQ_DEBUGGER
+	default n
+	help
+	  Enables the serial debugger at boot. Passing
+	  fiq_debugger.no_sleep on the kernel commandline will
+	  override this config option.
+
+config FIQ_DEBUGGER_WAKEUP_IRQ_ALWAYS_ON
+	bool "Don't disable wakeup IRQ when debugger is active"
+	depends on FIQ_DEBUGGER
+	default n
+	help
+	  Don't disable the wakeup irq when enabling the uart clock.  This will
+	  cause extra interrupts, but it makes the serial debugger usable with
+	  on some MSM radio builds that ignore the uart clock request in power
+	  collapse.
+
+config FIQ_DEBUGGER_CONSOLE
+	bool "Console on FIQ Serial Debugger port"
+	depends on FIQ_DEBUGGER
+	default n
+	help
+	  Enables a console so that printk messages are displayed on
+	  the debugger serial port as the occur.
+
+config FIQ_DEBUGGER_CONSOLE_DEFAULT_ENABLE
+	bool "Put the FIQ debugger into console mode by default"
+	depends on FIQ_DEBUGGER_CONSOLE
+	default n
+	help
+	  If enabled, this puts the fiq debugger into console mode by default.
+	  Otherwise, the fiq debugger will start out in debug mode.
+
+config FIQ_DEBUGGER_UART_OVERLAY
+	bool "Install uart DT overlay"
+	depends on FIQ_DEBUGGER
+	select OF_OVERLAY
+	default n
+	help
+	  If enabled, fiq debugger is calling fiq_debugger_uart_overlay()
+	  that will apply overlay uart_overlay@0 to disable proper uart.
+
+config FIQ_WATCHDOG
+	bool
+	select FIQ_DEBUGGER
+	select PSTORE_RAM
+	default n
diff --git a/drivers/staging/android/fiq_debugger/Makefile b/drivers/staging/android/fiq_debugger/Makefile
new file mode 100644
index 0000000..a7ca487
--- /dev/null
+++ b/drivers/staging/android/fiq_debugger/Makefile
@@ -0,0 +1,4 @@
+obj-y			+= fiq_debugger.o
+obj-$(CONFIG_ARM)	+= fiq_debugger_arm.o
+obj-$(CONFIG_ARM64)	+= fiq_debugger_arm64.o
+obj-$(CONFIG_FIQ_WATCHDOG)	+= fiq_watchdog.o
diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger.c b/drivers/staging/android/fiq_debugger/fiq_debugger.c
new file mode 100644
index 0000000..675b974
--- /dev/null
+++ b/drivers/staging/android/fiq_debugger/fiq_debugger.c
@@ -0,0 +1,1246 @@
+/*
+ * drivers/staging/android/fiq_debugger.c
+ *
+ * Serial Debugger Interface accessed through an FIQ interrupt.
+ *
+ * Copyright (C) 2008 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <stdarg.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/console.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+#include <linux/kernel_stat.h>
+#include <linux/kmsg_dump.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/timer.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+
+#ifdef CONFIG_FIQ_GLUE
+#include <asm/fiq_glue.h>
+#endif
+
+#ifdef CONFIG_FIQ_DEBUGGER_UART_OVERLAY
+#include <linux/of.h>
+#endif
+
+#include <linux/uaccess.h>
+
+#include "fiq_debugger.h"
+#include "fiq_debugger_priv.h"
+#include "fiq_debugger_ringbuf.h"
+
+#define DEBUG_MAX 64
+#define MAX_UNHANDLED_FIQ_COUNT 1000000
+
+#define MAX_FIQ_DEBUGGER_PORTS 4
+
+struct fiq_debugger_state {
+#ifdef CONFIG_FIQ_GLUE
+	struct fiq_glue_handler handler;
+#endif
+	struct fiq_debugger_output output;
+
+	int fiq;
+	int uart_irq;
+	int signal_irq;
+	int wakeup_irq;
+	bool wakeup_irq_no_set_wake;
+	struct clk *clk;
+	struct fiq_debugger_pdata *pdata;
+	struct platform_device *pdev;
+
+	char debug_cmd[DEBUG_MAX];
+	int debug_busy;
+	int debug_abort;
+
+	char debug_buf[DEBUG_MAX];
+	int debug_count;
+
+	bool no_sleep;
+	bool debug_enable;
+	bool ignore_next_wakeup_irq;
+	struct timer_list sleep_timer;
+	spinlock_t sleep_timer_lock;
+	bool uart_enabled;
+	struct wakeup_source debugger_wake_src;
+	bool console_enable;
+	int current_cpu;
+	atomic_t unhandled_fiq_count;
+	bool in_fiq;
+
+	struct work_struct work;
+	spinlock_t work_lock;
+	char work_cmd[DEBUG_MAX];
+
+#ifdef CONFIG_FIQ_DEBUGGER_CONSOLE
+	spinlock_t console_lock;
+	struct console console;
+	struct tty_port tty_port;
+	struct fiq_debugger_ringbuf *tty_rbuf;
+	bool syslog_dumping;
+#endif
+
+	unsigned int last_irqs[NR_IRQS];
+	unsigned int last_local_timer_irqs[NR_CPUS];
+};
+
+#ifdef CONFIG_FIQ_DEBUGGER_CONSOLE
+struct tty_driver *fiq_tty_driver;
+#endif
+
+#ifdef CONFIG_FIQ_DEBUGGER_NO_SLEEP
+static bool initial_no_sleep = true;
+#else
+static bool initial_no_sleep;
+#endif
+
+#ifdef CONFIG_FIQ_DEBUGGER_CONSOLE_DEFAULT_ENABLE
+static bool initial_debug_enable = true;
+static bool initial_console_enable = true;
+#else
+static bool initial_debug_enable;
+static bool initial_console_enable;
+#endif
+
+static bool fiq_kgdb_enable;
+static bool fiq_debugger_disable;
+
+module_param_named(no_sleep, initial_no_sleep, bool, 0644);
+module_param_named(debug_enable, initial_debug_enable, bool, 0644);
+module_param_named(console_enable, initial_console_enable, bool, 0644);
+module_param_named(kgdb_enable, fiq_kgdb_enable, bool, 0644);
+module_param_named(disable, fiq_debugger_disable, bool, 0644);
+
+#ifdef CONFIG_FIQ_DEBUGGER_WAKEUP_IRQ_ALWAYS_ON
+static inline
+void fiq_debugger_enable_wakeup_irq(struct fiq_debugger_state *state) {}
+static inline
+void fiq_debugger_disable_wakeup_irq(struct fiq_debugger_state *state) {}
+#else
+static inline
+void fiq_debugger_enable_wakeup_irq(struct fiq_debugger_state *state)
+{
+	if (state->wakeup_irq < 0)
+		return;
+	enable_irq(state->wakeup_irq);
+	if (!state->wakeup_irq_no_set_wake)
+		enable_irq_wake(state->wakeup_irq);
+}
+static inline
+void fiq_debugger_disable_wakeup_irq(struct fiq_debugger_state *state)
+{
+	if (state->wakeup_irq < 0)
+		return;
+	disable_irq_nosync(state->wakeup_irq);
+	if (!state->wakeup_irq_no_set_wake)
+		disable_irq_wake(state->wakeup_irq);
+}
+#endif
+
+static inline bool fiq_debugger_have_fiq(struct fiq_debugger_state *state)
+{
+	return (state->fiq >= 0);
+}
+
+#ifdef CONFIG_FIQ_GLUE
+static void fiq_debugger_force_irq(struct fiq_debugger_state *state)
+{
+	unsigned int irq = state->signal_irq;
+
+	if (WARN_ON(!fiq_debugger_have_fiq(state)))
+		return;
+	if (state->pdata->force_irq) {
+		state->pdata->force_irq(state->pdev, irq);
+	} else {
+		struct irq_chip *chip = irq_get_chip(irq);
+		if (chip && chip->irq_retrigger)
+			chip->irq_retrigger(irq_get_irq_data(irq));
+	}
+}
+#endif
+
+static void fiq_debugger_uart_enable(struct fiq_debugger_state *state)
+{
+	if (state->clk)
+		clk_enable(state->clk);
+	if (state->pdata->uart_enable)
+		state->pdata->uart_enable(state->pdev);
+}
+
+static void fiq_debugger_uart_disable(struct fiq_debugger_state *state)
+{
+	if (state->pdata->uart_disable)
+		state->pdata->uart_disable(state->pdev);
+	if (state->clk)
+		clk_disable(state->clk);
+}
+
+static void fiq_debugger_uart_flush(struct fiq_debugger_state *state)
+{
+	if (state->pdata->uart_flush)
+		state->pdata->uart_flush(state->pdev);
+}
+
+static void fiq_debugger_putc(struct fiq_debugger_state *state, char c)
+{
+	state->pdata->uart_putc(state->pdev, c);
+}
+
+static void fiq_debugger_puts(struct fiq_debugger_state *state, char *s)
+{
+	unsigned c;
+	while ((c = *s++)) {
+		if (c == '\n')
+			fiq_debugger_putc(state, '\r');
+		fiq_debugger_putc(state, c);
+	}
+}
+
+static void fiq_debugger_prompt(struct fiq_debugger_state *state)
+{
+	fiq_debugger_puts(state, "debug> ");
+}
+
+static void fiq_debugger_dump_kernel_log(struct fiq_debugger_state *state)
+{
+	char buf[512];
+	size_t len;
+	struct kmsg_dumper dumper = { .active = true };
+
+
+	kmsg_dump_rewind_nolock(&dumper);
+	while (kmsg_dump_get_line_nolock(&dumper, true, buf,
+					 sizeof(buf) - 1, &len)) {
+		buf[len] = 0;
+		fiq_debugger_puts(state, buf);
+	}
+}
+
+static void fiq_debugger_printf(struct fiq_debugger_output *output,
+			       const char *fmt, ...)
+{
+	struct fiq_debugger_state *state;
+	char buf[256];
+	va_list ap;
+
+	state = container_of(output, struct fiq_debugger_state, output);
+	va_start(ap, fmt);
+	vsnprintf(buf, sizeof(buf), fmt, ap);
+	va_end(ap);
+
+	fiq_debugger_puts(state, buf);
+}
+
+/* Safe outside fiq context */
+static int fiq_debugger_printf_nfiq(void *cookie, const char *fmt, ...)
+{
+	struct fiq_debugger_state *state = cookie;
+	char buf[256];
+	va_list ap;
+	unsigned long irq_flags;
+
+	va_start(ap, fmt);
+	vsnprintf(buf, 128, fmt, ap);
+	va_end(ap);
+
+	local_irq_save(irq_flags);
+	fiq_debugger_puts(state, buf);
+	fiq_debugger_uart_flush(state);
+	local_irq_restore(irq_flags);
+	return state->debug_abort;
+}
+
+static void fiq_debugger_dump_irqs(struct fiq_debugger_state *state)
+{
+	int n;
+	struct irq_desc *desc;
+
+	fiq_debugger_printf(&state->output,
+			"irqnr       total  since-last   status  name\n");
+	for_each_irq_desc(n, desc) {
+		struct irqaction *act = desc->action;
+		if (!act && !kstat_irqs(n))
+			continue;
+		fiq_debugger_printf(&state->output, "%5d: %10u %11u %8x  %s\n", n,
+			kstat_irqs(n),
+			kstat_irqs(n) - state->last_irqs[n],
+			desc->status_use_accessors,
+			(act && act->name) ? act->name : "???");
+		state->last_irqs[n] = kstat_irqs(n);
+	}
+}
+
+static void fiq_debugger_do_ps(struct fiq_debugger_state *state)
+{
+	struct task_struct *g;
+	struct task_struct *p;
+	unsigned task_state;
+	static const char stat_nam[] = "RSDTtZX";
+
+	fiq_debugger_printf(&state->output, "pid   ppid  prio task            pc\n");
+	read_lock(&tasklist_lock);
+	do_each_thread(g, p) {
+		task_state = p->state ? __ffs(p->state) + 1 : 0;
+		fiq_debugger_printf(&state->output,
+			     "%5d %5d %4d ", p->pid, p->parent->pid, p->prio);
+		fiq_debugger_printf(&state->output, "%-13.13s %c", p->comm,
+			     task_state >= sizeof(stat_nam) ? '?' : stat_nam[task_state]);
+		if (task_state == TASK_RUNNING)
+			fiq_debugger_printf(&state->output, " running\n");
+		else
+			fiq_debugger_printf(&state->output, " %08lx\n",
+					thread_saved_pc(p));
+	} while_each_thread(g, p);
+	read_unlock(&tasklist_lock);
+}
+
+#ifdef CONFIG_FIQ_DEBUGGER_CONSOLE
+static void fiq_debugger_begin_syslog_dump(struct fiq_debugger_state *state)
+{
+	state->syslog_dumping = true;
+}
+
+static void fiq_debugger_end_syslog_dump(struct fiq_debugger_state *state)
+{
+	state->syslog_dumping = false;
+}
+#else
+extern int do_syslog(int type, char __user *bug, int count);
+static void fiq_debugger_begin_syslog_dump(struct fiq_debugger_state *state)
+{
+	do_syslog(5 /* clear */, NULL, 0);
+}
+
+static void fiq_debugger_end_syslog_dump(struct fiq_debugger_state *state)
+{
+	fiq_debugger_dump_kernel_log(state);
+}
+#endif
+
+static void fiq_debugger_do_sysrq(struct fiq_debugger_state *state, char rq)
+{
+	if ((rq == 'g' || rq == 'G') && !fiq_kgdb_enable) {
+		fiq_debugger_printf(&state->output, "sysrq-g blocked\n");
+		return;
+	}
+	fiq_debugger_begin_syslog_dump(state);
+	handle_sysrq(rq);
+	fiq_debugger_end_syslog_dump(state);
+}
+
+#ifdef CONFIG_KGDB
+static void fiq_debugger_do_kgdb(struct fiq_debugger_state *state)
+{
+	if (!fiq_kgdb_enable) {
+		fiq_debugger_printf(&state->output, "kgdb through fiq debugger not enabled\n");
+		return;
+	}
+
+	fiq_debugger_printf(&state->output, "enabling console and triggering kgdb\n");
+	state->console_enable = true;
+	handle_sysrq('g');
+}
+#endif
+
+static void fiq_debugger_schedule_work(struct fiq_debugger_state *state,
+		char *cmd)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&state->work_lock, flags);
+	if (state->work_cmd[0] != '\0') {
+		fiq_debugger_printf(&state->output, "work command processor busy\n");
+		spin_unlock_irqrestore(&state->work_lock, flags);
+		return;
+	}
+
+	strlcpy(state->work_cmd, cmd, sizeof(state->work_cmd));
+	spin_unlock_irqrestore(&state->work_lock, flags);
+
+	schedule_work(&state->work);
+}
+
+static void fiq_debugger_work(struct work_struct *work)
+{
+	struct fiq_debugger_state *state;
+	char work_cmd[DEBUG_MAX];
+	char *cmd;
+	unsigned long flags;
+
+	state = container_of(work, struct fiq_debugger_state, work);
+
+	spin_lock_irqsave(&state->work_lock, flags);
+
+	strlcpy(work_cmd, state->work_cmd, sizeof(work_cmd));
+	state->work_cmd[0] = '\0';
+
+	spin_unlock_irqrestore(&state->work_lock, flags);
+
+	cmd = work_cmd;
+	if (!strncmp(cmd, "reboot", 6)) {
+		cmd += 6;
+		while (*cmd == ' ')
+			cmd++;
+		if (cmd != '\0')
+			kernel_restart(cmd);
+		else
+			kernel_restart(NULL);
+	} else {
+		fiq_debugger_printf(&state->output, "unknown work command '%s'\n",
+				work_cmd);
+	}
+}
+
+/* This function CANNOT be called in FIQ context */
+static void fiq_debugger_irq_exec(struct fiq_debugger_state *state, char *cmd)
+{
+	if (!strcmp(cmd, "ps"))
+		fiq_debugger_do_ps(state);
+	if (!strcmp(cmd, "sysrq"))
+		fiq_debugger_do_sysrq(state, 'h');
+	if (!strncmp(cmd, "sysrq ", 6))
+		fiq_debugger_do_sysrq(state, cmd[6]);
+#ifdef CONFIG_KGDB
+	if (!strcmp(cmd, "kgdb"))
+		fiq_debugger_do_kgdb(state);
+#endif
+	if (!strncmp(cmd, "reboot", 6))
+		fiq_debugger_schedule_work(state, cmd);
+}
+
+static void fiq_debugger_help(struct fiq_debugger_state *state)
+{
+	fiq_debugger_printf(&state->output,
+				"FIQ Debugger commands:\n"
+				" pc            PC status\n"
+				" regs          Register dump\n"
+				" allregs       Extended Register dump\n"
+				" bt            Stack trace\n"
+				" reboot [<c>]  Reboot with command <c>\n"
+				" reset [<c>]   Hard reset with command <c>\n"
+				" irqs          Interupt status\n"
+				" kmsg          Kernel log\n"
+				" version       Kernel version\n");
+	fiq_debugger_printf(&state->output,
+				" sleep         Allow sleep while in FIQ\n"
+				" nosleep       Disable sleep while in FIQ\n"
+				" console       Switch terminal to console\n"
+				" cpu           Current CPU\n"
+				" cpu <number>  Switch to CPU<number>\n");
+	fiq_debugger_printf(&state->output,
+				" ps            Process list\n"
+				" sysrq         sysrq options\n"
+				" sysrq <param> Execute sysrq with <param>\n");
+#ifdef CONFIG_KGDB
+	fiq_debugger_printf(&state->output,
+				" kgdb          Enter kernel debugger\n");
+#endif
+}
+
+static void fiq_debugger_take_affinity(void *info)
+{
+	struct fiq_debugger_state *state = info;
+	struct cpumask cpumask;
+
+	cpumask_clear(&cpumask);
+	cpumask_set_cpu(get_cpu(), &cpumask);
+
+	irq_set_affinity(state->uart_irq, &cpumask);
+}
+
+static void fiq_debugger_switch_cpu(struct fiq_debugger_state *state, int cpu)
+{
+	if (!fiq_debugger_have_fiq(state))
+		smp_call_function_single(cpu, fiq_debugger_take_affinity, state,
+				false);
+	state->current_cpu = cpu;
+}
+
+static bool fiq_debugger_fiq_exec(struct fiq_debugger_state *state,
+			const char *cmd, const struct pt_regs *regs,
+			void *svc_sp)
+{
+	bool signal_helper = false;
+
+	if (!strcmp(cmd, "help") || !strcmp(cmd, "?")) {
+		fiq_debugger_help(state);
+	} else if (!strcmp(cmd, "pc")) {
+		fiq_debugger_dump_pc(&state->output, regs);
+	} else if (!strcmp(cmd, "regs")) {
+		fiq_debugger_dump_regs(&state->output, regs);
+	} else if (!strcmp(cmd, "allregs")) {
+		fiq_debugger_dump_allregs(&state->output, regs);
+	} else if (!strcmp(cmd, "bt")) {
+		fiq_debugger_dump_stacktrace(&state->output, regs, 100, svc_sp);
+	} else if (!strncmp(cmd, "reset", 5)) {
+		cmd += 5;
+		while (*cmd == ' ')
+			cmd++;
+		if (*cmd) {
+			char tmp_cmd[32];
+			strlcpy(tmp_cmd, cmd, sizeof(tmp_cmd));
+			machine_restart(tmp_cmd);
+		} else {
+			machine_restart(NULL);
+		}
+	} else if (!strcmp(cmd, "irqs")) {
+		fiq_debugger_dump_irqs(state);
+	} else if (!strcmp(cmd, "kmsg")) {
+		fiq_debugger_dump_kernel_log(state);
+	} else if (!strcmp(cmd, "version")) {
+		fiq_debugger_printf(&state->output, "%s\n", linux_banner);
+	} else if (!strcmp(cmd, "sleep")) {
+		state->no_sleep = false;
+		fiq_debugger_printf(&state->output, "enabling sleep\n");
+	} else if (!strcmp(cmd, "nosleep")) {
+		state->no_sleep = true;
+		fiq_debugger_printf(&state->output, "disabling sleep\n");
+	} else if (!strcmp(cmd, "console")) {
+		fiq_debugger_printf(&state->output, "console mode\n");
+		fiq_debugger_uart_flush(state);
+		state->console_enable = true;
+	} else if (!strcmp(cmd, "cpu")) {
+		fiq_debugger_printf(&state->output, "cpu %d\n", state->current_cpu);
+	} else if (!strncmp(cmd, "cpu ", 4)) {
+		unsigned long cpu = 0;
+		if (kstrtoul(cmd + 4, 10, &cpu) == 0)
+			fiq_debugger_switch_cpu(state, cpu);
+		else
+			fiq_debugger_printf(&state->output, "invalid cpu\n");
+		fiq_debugger_printf(&state->output, "cpu %d\n", state->current_cpu);
+	} else {
+		if (state->debug_busy) {
+			fiq_debugger_printf(&state->output,
+				"command processor busy. trying to abort.\n");
+			state->debug_abort = -1;
+		} else {
+			strcpy(state->debug_cmd, cmd);
+			state->debug_busy = 1;
+		}
+
+		return true;
+	}
+	if (!state->console_enable)
+		fiq_debugger_prompt(state);
+
+	return signal_helper;
+}
+
+static void fiq_debugger_sleep_timer_expired(unsigned long data)
+{
+	struct fiq_debugger_state *state = (struct fiq_debugger_state *)data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&state->sleep_timer_lock, flags);
+	if (state->uart_enabled && !state->no_sleep) {
+		if (state->debug_enable && !state->console_enable) {
+			state->debug_enable = false;
+			fiq_debugger_printf_nfiq(state,
+					"suspending fiq debugger\n");
+		}
+		state->ignore_next_wakeup_irq = true;
+		fiq_debugger_uart_disable(state);
+		state->uart_enabled = false;
+		fiq_debugger_enable_wakeup_irq(state);
+	}
+	__pm_relax(&state->debugger_wake_src);
+	spin_unlock_irqrestore(&state->sleep_timer_lock, flags);
+}
+
+static void fiq_debugger_handle_wakeup(struct fiq_debugger_state *state)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&state->sleep_timer_lock, flags);
+	if (state->wakeup_irq >= 0 && state->ignore_next_wakeup_irq) {
+		state->ignore_next_wakeup_irq = false;
+	} else if (!state->uart_enabled) {
+		__pm_stay_awake(&state->debugger_wake_src);
+		fiq_debugger_uart_enable(state);
+		state->uart_enabled = true;
+		fiq_debugger_disable_wakeup_irq(state);
+		mod_timer(&state->sleep_timer, jiffies + HZ / 2);
+	}
+	spin_unlock_irqrestore(&state->sleep_timer_lock, flags);
+}
+
+static irqreturn_t fiq_debugger_wakeup_irq_handler(int irq, void *dev)
+{
+	struct fiq_debugger_state *state = dev;
+
+	if (!state->no_sleep)
+		fiq_debugger_puts(state, "WAKEUP\n");
+	fiq_debugger_handle_wakeup(state);
+
+	return IRQ_HANDLED;
+}
+
+static
+void fiq_debugger_handle_console_irq_context(struct fiq_debugger_state *state)
+{
+#if defined(CONFIG_FIQ_DEBUGGER_CONSOLE)
+	if (state->tty_port.ops) {
+		int i;
+		int count = fiq_debugger_ringbuf_level(state->tty_rbuf);
+		for (i = 0; i < count; i++) {
+			int c = fiq_debugger_ringbuf_peek(state->tty_rbuf, 0);
+			tty_insert_flip_char(&state->tty_port, c, TTY_NORMAL);
+			if (!fiq_debugger_ringbuf_consume(state->tty_rbuf, 1))
+				pr_warn("fiq tty failed to consume byte\n");
+		}
+		tty_flip_buffer_push(&state->tty_port);
+	}
+#endif
+}
+
+static void fiq_debugger_handle_irq_context(struct fiq_debugger_state *state)
+{
+	if (!state->no_sleep) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&state->sleep_timer_lock, flags);
+		__pm_stay_awake(&state->debugger_wake_src);
+		mod_timer(&state->sleep_timer, jiffies + HZ * 5);
+		spin_unlock_irqrestore(&state->sleep_timer_lock, flags);
+	}
+	fiq_debugger_handle_console_irq_context(state);
+	if (state->debug_busy) {
+		fiq_debugger_irq_exec(state, state->debug_cmd);
+		if (!state->console_enable)
+			fiq_debugger_prompt(state);
+		state->debug_busy = 0;
+	}
+}
+
+static int fiq_debugger_getc(struct fiq_debugger_state *state)
+{
+	return state->pdata->uart_getc(state->pdev);
+}
+
+static bool fiq_debugger_handle_uart_interrupt(struct fiq_debugger_state *state,
+			int this_cpu, const struct pt_regs *regs, void *svc_sp)
+{
+	int c;
+	static int last_c;
+	int count = 0;
+	bool signal_helper = false;
+
+	if (this_cpu != state->current_cpu) {
+		if (state->in_fiq)
+			return false;
+
+		if (atomic_inc_return(&state->unhandled_fiq_count) !=
+					MAX_UNHANDLED_FIQ_COUNT)
+			return false;
+
+		fiq_debugger_printf(&state->output,
+			"fiq_debugger: cpu %d not responding, "
+			"reverting to cpu %d\n", state->current_cpu,
+			this_cpu);
+
+		atomic_set(&state->unhandled_fiq_count, 0);
+		fiq_debugger_switch_cpu(state, this_cpu);
+		return false;
+	}
+
+	state->in_fiq = true;
+
+	while ((c = fiq_debugger_getc(state)) != FIQ_DEBUGGER_NO_CHAR) {
+		count++;
+		if (!state->debug_enable) {
+			if ((c == 13) || (c == 10)) {
+				state->debug_enable = true;
+				state->debug_count = 0;
+				fiq_debugger_prompt(state);
+			}
+		} else if (c == FIQ_DEBUGGER_BREAK) {
+			state->console_enable = false;
+			fiq_debugger_puts(state, "fiq debugger mode\n");
+			state->debug_count = 0;
+			fiq_debugger_prompt(state);
+#ifdef CONFIG_FIQ_DEBUGGER_CONSOLE
+		} else if (state->console_enable && state->tty_rbuf) {
+			fiq_debugger_ringbuf_push(state->tty_rbuf, c);
+			signal_helper = true;
+#endif
+		} else if ((c >= ' ') && (c < 127)) {
+			if (state->debug_count < (DEBUG_MAX - 1)) {
+				state->debug_buf[state->debug_count++] = c;
+				fiq_debugger_putc(state, c);
+			}
+		} else if ((c == 8) || (c == 127)) {
+			if (state->debug_count > 0) {
+				state->debug_count--;
+				fiq_debugger_putc(state, 8);
+				fiq_debugger_putc(state, ' ');
+				fiq_debugger_putc(state, 8);
+			}
+		} else if ((c == 13) || (c == 10)) {
+			if (c == '\r' || (c == '\n' && last_c != '\r')) {
+				fiq_debugger_putc(state, '\r');
+				fiq_debugger_putc(state, '\n');
+			}
+			if (state->debug_count) {
+				state->debug_buf[state->debug_count] = 0;
+				state->debug_count = 0;
+				signal_helper |=
+					fiq_debugger_fiq_exec(state,
+							state->debug_buf,
+							regs, svc_sp);
+			} else {
+				fiq_debugger_prompt(state);
+			}
+		}
+		last_c = c;
+	}
+	if (!state->console_enable)
+		fiq_debugger_uart_flush(state);
+	if (state->pdata->fiq_ack)
+		state->pdata->fiq_ack(state->pdev, state->fiq);
+
+	/* poke sleep timer if necessary */
+	if (state->debug_enable && !state->no_sleep)
+		signal_helper = true;
+
+	atomic_set(&state->unhandled_fiq_count, 0);
+	state->in_fiq = false;
+
+	return signal_helper;
+}
+
+#ifdef CONFIG_FIQ_GLUE
+static void fiq_debugger_fiq(struct fiq_glue_handler *h,
+		const struct pt_regs *regs, void *svc_sp)
+{
+	struct fiq_debugger_state *state =
+		container_of(h, struct fiq_debugger_state, handler);
+	unsigned int this_cpu = THREAD_INFO(svc_sp)->cpu;
+	bool need_irq;
+
+	need_irq = fiq_debugger_handle_uart_interrupt(state, this_cpu, regs,
+			svc_sp);
+	if (need_irq)
+		fiq_debugger_force_irq(state);
+}
+#endif
+
+/*
+ * When not using FIQs, we only use this single interrupt as an entry point.
+ * This just effectively takes over the UART interrupt and does all the work
+ * in this context.
+ */
+static irqreturn_t fiq_debugger_uart_irq(int irq, void *dev)
+{
+	struct fiq_debugger_state *state = dev;
+	bool not_done;
+
+	fiq_debugger_handle_wakeup(state);
+
+	/* handle the debugger irq in regular context */
+	not_done = fiq_debugger_handle_uart_interrupt(state, smp_processor_id(),
+					      get_irq_regs(),
+					      current_thread_info());
+	if (not_done)
+		fiq_debugger_handle_irq_context(state);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * If FIQs are used, not everything can happen in fiq context.
+ * FIQ handler does what it can and then signals this interrupt to finish the
+ * job in irq context.
+ */
+static irqreturn_t fiq_debugger_signal_irq(int irq, void *dev)
+{
+	struct fiq_debugger_state *state = dev;
+
+	if (state->pdata->force_irq_ack)
+		state->pdata->force_irq_ack(state->pdev, state->signal_irq);
+
+	fiq_debugger_handle_irq_context(state);
+
+	return IRQ_HANDLED;
+}
+
+#ifdef CONFIG_FIQ_GLUE
+static void fiq_debugger_resume(struct fiq_glue_handler *h)
+{
+	struct fiq_debugger_state *state =
+		container_of(h, struct fiq_debugger_state, handler);
+	if (state->pdata->uart_resume)
+		state->pdata->uart_resume(state->pdev);
+}
+#endif
+
+#if defined(CONFIG_FIQ_DEBUGGER_CONSOLE)
+struct tty_driver *fiq_debugger_console_device(struct console *co, int *index)
+{
+	*index = co->index;
+	return fiq_tty_driver;
+}
+
+static void fiq_debugger_console_write(struct console *co,
+				const char *s, unsigned int count)
+{
+	struct fiq_debugger_state *state;
+	unsigned long flags;
+
+	state = container_of(co, struct fiq_debugger_state, console);
+
+	if (!state->console_enable && !state->syslog_dumping)
+		return;
+
+	fiq_debugger_uart_enable(state);
+	spin_lock_irqsave(&state->console_lock, flags);
+	while (count--) {
+		if (*s == '\n')
+			fiq_debugger_putc(state, '\r');
+		fiq_debugger_putc(state, *s++);
+	}
+	fiq_debugger_uart_flush(state);
+	spin_unlock_irqrestore(&state->console_lock, flags);
+	fiq_debugger_uart_disable(state);
+}
+
+static struct console fiq_debugger_console = {
+	.name = "ttyFIQ",
+	.device = fiq_debugger_console_device,
+	.write = fiq_debugger_console_write,
+	.flags = CON_PRINTBUFFER | CON_ANYTIME | CON_ENABLED,
+};
+
+int fiq_tty_open(struct tty_struct *tty, struct file *filp)
+{
+	int line = tty->index;
+	struct fiq_debugger_state **states = tty->driver->driver_state;
+	struct fiq_debugger_state *state = states[line];
+
+	return tty_port_open(&state->tty_port, tty, filp);
+}
+
+void fiq_tty_close(struct tty_struct *tty, struct file *filp)
+{
+	tty_port_close(tty->port, tty, filp);
+}
+
+int  fiq_tty_write(struct tty_struct *tty, const unsigned char *buf, int count)
+{
+	int i;
+	int line = tty->index;
+	struct fiq_debugger_state **states = tty->driver->driver_state;
+	struct fiq_debugger_state *state = states[line];
+
+	if (!state->console_enable)
+		return count;
+
+	fiq_debugger_uart_enable(state);
+	spin_lock_irq(&state->console_lock);
+	for (i = 0; i < count; i++)
+		fiq_debugger_putc(state, *buf++);
+	spin_unlock_irq(&state->console_lock);
+	fiq_debugger_uart_disable(state);
+
+	return count;
+}
+
+int  fiq_tty_write_room(struct tty_struct *tty)
+{
+	return 16;
+}
+
+#ifdef CONFIG_CONSOLE_POLL
+static int fiq_tty_poll_init(struct tty_driver *driver, int line, char *options)
+{
+	return 0;
+}
+
+static int fiq_tty_poll_get_char(struct tty_driver *driver, int line)
+{
+	struct fiq_debugger_state **states = driver->driver_state;
+	struct fiq_debugger_state *state = states[line];
+	int c = NO_POLL_CHAR;
+
+	fiq_debugger_uart_enable(state);
+	if (fiq_debugger_have_fiq(state)) {
+		int count = fiq_debugger_ringbuf_level(state->tty_rbuf);
+		if (count > 0) {
+			c = fiq_debugger_ringbuf_peek(state->tty_rbuf, 0);
+			fiq_debugger_ringbuf_consume(state->tty_rbuf, 1);
+		}
+	} else {
+		c = fiq_debugger_getc(state);
+		if (c == FIQ_DEBUGGER_NO_CHAR)
+			c = NO_POLL_CHAR;
+	}
+	fiq_debugger_uart_disable(state);
+
+	return c;
+}
+
+static void fiq_tty_poll_put_char(struct tty_driver *driver, int line, char ch)
+{
+	struct fiq_debugger_state **states = driver->driver_state;
+	struct fiq_debugger_state *state = states[line];
+	fiq_debugger_uart_enable(state);
+	fiq_debugger_putc(state, ch);
+	fiq_debugger_uart_disable(state);
+}
+#endif
+
+static const struct tty_port_operations fiq_tty_port_ops;
+
+static const struct tty_operations fiq_tty_driver_ops = {
+	.write = fiq_tty_write,
+	.write_room = fiq_tty_write_room,
+	.open = fiq_tty_open,
+	.close = fiq_tty_close,
+#ifdef CONFIG_CONSOLE_POLL
+	.poll_init = fiq_tty_poll_init,
+	.poll_get_char = fiq_tty_poll_get_char,
+	.poll_put_char = fiq_tty_poll_put_char,
+#endif
+};
+
+static int fiq_debugger_tty_init(void)
+{
+	int ret;
+	struct fiq_debugger_state **states = NULL;
+
+	states = kzalloc(sizeof(*states) * MAX_FIQ_DEBUGGER_PORTS, GFP_KERNEL);
+	if (!states) {
+		pr_err("Failed to allocate fiq debugger state structres\n");
+		return -ENOMEM;
+	}
+
+	fiq_tty_driver = alloc_tty_driver(MAX_FIQ_DEBUGGER_PORTS);
+	if (!fiq_tty_driver) {
+		pr_err("Failed to allocate fiq debugger tty\n");
+		ret = -ENOMEM;
+		goto err_free_state;
+	}
+
+	fiq_tty_driver->owner		= THIS_MODULE;
+	fiq_tty_driver->driver_name	= "fiq-debugger";
+	fiq_tty_driver->name		= "ttyFIQ";
+	fiq_tty_driver->type		= TTY_DRIVER_TYPE_SERIAL;
+	fiq_tty_driver->subtype		= SERIAL_TYPE_NORMAL;
+	fiq_tty_driver->init_termios	= tty_std_termios;
+	fiq_tty_driver->flags		= TTY_DRIVER_REAL_RAW |
+					  TTY_DRIVER_DYNAMIC_DEV;
+	fiq_tty_driver->driver_state	= states;
+
+	fiq_tty_driver->init_termios.c_cflag =
+					B115200 | CS8 | CREAD | HUPCL | CLOCAL;
+	fiq_tty_driver->init_termios.c_ispeed = 115200;
+	fiq_tty_driver->init_termios.c_ospeed = 115200;
+
+	tty_set_operations(fiq_tty_driver, &fiq_tty_driver_ops);
+
+	ret = tty_register_driver(fiq_tty_driver);
+	if (ret) {
+		pr_err("Failed to register fiq tty: %d\n", ret);
+		goto err_free_tty;
+	}
+
+	pr_info("Registered FIQ tty driver\n");
+	return 0;
+
+err_free_tty:
+	put_tty_driver(fiq_tty_driver);
+	fiq_tty_driver = NULL;
+err_free_state:
+	kfree(states);
+	return ret;
+}
+
+static int fiq_debugger_tty_init_one(struct fiq_debugger_state *state)
+{
+	int ret;
+	struct device *tty_dev;
+	struct fiq_debugger_state **states = fiq_tty_driver->driver_state;
+
+	states[state->pdev->id] = state;
+
+	state->tty_rbuf = fiq_debugger_ringbuf_alloc(1024);
+	if (!state->tty_rbuf) {
+		pr_err("Failed to allocate fiq debugger ringbuf\n");
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	tty_port_init(&state->tty_port);
+	state->tty_port.ops = &fiq_tty_port_ops;
+
+	tty_dev = tty_port_register_device(&state->tty_port, fiq_tty_driver,
+					   state->pdev->id, &state->pdev->dev);
+	if (IS_ERR(tty_dev)) {
+		pr_err("Failed to register fiq debugger tty device\n");
+		ret = PTR_ERR(tty_dev);
+		goto err;
+	}
+
+	device_set_wakeup_capable(tty_dev, 1);
+
+	pr_info("Registered fiq debugger ttyFIQ%d\n", state->pdev->id);
+
+	return 0;
+
+err:
+	fiq_debugger_ringbuf_free(state->tty_rbuf);
+	state->tty_rbuf = NULL;
+	return ret;
+}
+#endif
+
+static int fiq_debugger_dev_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct fiq_debugger_state *state = platform_get_drvdata(pdev);
+
+	if (state->pdata->uart_dev_suspend)
+		return state->pdata->uart_dev_suspend(pdev);
+	return 0;
+}
+
+static int fiq_debugger_dev_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct fiq_debugger_state *state = platform_get_drvdata(pdev);
+
+	if (state->pdata->uart_dev_resume)
+		return state->pdata->uart_dev_resume(pdev);
+	return 0;
+}
+
+static int fiq_debugger_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct fiq_debugger_pdata *pdata = dev_get_platdata(&pdev->dev);
+	struct fiq_debugger_state *state;
+	int fiq;
+	int uart_irq;
+
+	if (pdev->id >= MAX_FIQ_DEBUGGER_PORTS)
+		return -EINVAL;
+
+	if (!pdata->uart_getc || !pdata->uart_putc)
+		return -EINVAL;
+	if ((pdata->uart_enable && !pdata->uart_disable) ||
+	    (!pdata->uart_enable && pdata->uart_disable))
+		return -EINVAL;
+
+	fiq = platform_get_irq_byname(pdev, "fiq");
+	uart_irq = platform_get_irq_byname(pdev, "uart_irq");
+
+	/* uart_irq mode and fiq mode are mutually exclusive, but one of them
+	 * is required */
+	if ((uart_irq < 0 && fiq < 0) || (uart_irq >= 0 && fiq >= 0))
+		return -EINVAL;
+	if (fiq >= 0 && !pdata->fiq_enable)
+		return -EINVAL;
+
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
+	state->output.printf = fiq_debugger_printf;
+	setup_timer(&state->sleep_timer, fiq_debugger_sleep_timer_expired,
+		    (unsigned long)state);
+	state->pdata = pdata;
+	state->pdev = pdev;
+	state->no_sleep = initial_no_sleep;
+	state->debug_enable = initial_debug_enable;
+	state->console_enable = initial_console_enable;
+
+	state->fiq = fiq;
+	state->uart_irq = uart_irq;
+	state->signal_irq = platform_get_irq_byname(pdev, "signal");
+	state->wakeup_irq = platform_get_irq_byname(pdev, "wakeup");
+
+	INIT_WORK(&state->work, fiq_debugger_work);
+	spin_lock_init(&state->work_lock);
+
+	platform_set_drvdata(pdev, state);
+
+	spin_lock_init(&state->sleep_timer_lock);
+
+	if (state->wakeup_irq < 0 && fiq_debugger_have_fiq(state))
+		state->no_sleep = true;
+	state->ignore_next_wakeup_irq = !state->no_sleep;
+
+	wakeup_source_init(&state->debugger_wake_src, "serial-debug");
+
+	state->clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(state->clk))
+		state->clk = NULL;
+
+	/* do not call pdata->uart_enable here since uart_init may still
+	 * need to do some initialization before uart_enable can work.
+	 * So, only try to manage the clock during init.
+	 */
+	if (state->clk)
+		clk_enable(state->clk);
+
+	if (pdata->uart_init) {
+		ret = pdata->uart_init(pdev);
+		if (ret)
+			goto err_uart_init;
+	}
+
+	fiq_debugger_printf_nfiq(state,
+				"<hit enter %sto activate fiq debugger>\n",
+				state->no_sleep ? "" : "twice ");
+
+#ifdef CONFIG_FIQ_GLUE
+	if (fiq_debugger_have_fiq(state)) {
+		state->handler.fiq = fiq_debugger_fiq;
+		state->handler.resume = fiq_debugger_resume;
+		ret = fiq_glue_register_handler(&state->handler);
+		if (ret) {
+			pr_err("%s: could not install fiq handler\n", __func__);
+			goto err_register_irq;
+		}
+
+		pdata->fiq_enable(pdev, state->fiq, 1);
+	} else
+#endif
+	{
+		ret = request_irq(state->uart_irq, fiq_debugger_uart_irq,
+				  IRQF_NO_SUSPEND, "debug", state);
+		if (ret) {
+			pr_err("%s: could not install irq handler\n", __func__);
+			goto err_register_irq;
+		}
+
+		/* for irq-only mode, we want this irq to wake us up, if it
+		 * can.
+		 */
+		enable_irq_wake(state->uart_irq);
+	}
+
+	if (state->clk)
+		clk_disable(state->clk);
+
+	if (state->signal_irq >= 0) {
+		ret = request_irq(state->signal_irq, fiq_debugger_signal_irq,
+			  IRQF_TRIGGER_RISING, "debug-signal", state);
+		if (ret)
+			pr_err("serial_debugger: could not install signal_irq");
+	}
+
+	if (state->wakeup_irq >= 0) {
+		ret = request_irq(state->wakeup_irq,
+				  fiq_debugger_wakeup_irq_handler,
+				  IRQF_TRIGGER_FALLING,
+				  "debug-wakeup", state);
+		if (ret) {
+			pr_err("serial_debugger: "
+				"could not install wakeup irq\n");
+			state->wakeup_irq = -1;
+		} else {
+			ret = enable_irq_wake(state->wakeup_irq);
+			if (ret) {
+				pr_err("serial_debugger: "
+					"could not enable wakeup\n");
+				state->wakeup_irq_no_set_wake = true;
+			}
+		}
+	}
+	if (state->no_sleep)
+		fiq_debugger_handle_wakeup(state);
+
+#if defined(CONFIG_FIQ_DEBUGGER_CONSOLE)
+	spin_lock_init(&state->console_lock);
+	state->console = fiq_debugger_console;
+	state->console.index = pdev->id;
+	if (!console_set_on_cmdline)
+		add_preferred_console(state->console.name,
+			state->console.index, NULL);
+	register_console(&state->console);
+	fiq_debugger_tty_init_one(state);
+#endif
+	return 0;
+
+err_register_irq:
+	if (pdata->uart_free)
+		pdata->uart_free(pdev);
+err_uart_init:
+	if (state->clk)
+		clk_disable(state->clk);
+	if (state->clk)
+		clk_put(state->clk);
+	wakeup_source_trash(&state->debugger_wake_src);
+	platform_set_drvdata(pdev, NULL);
+	kfree(state);
+	return ret;
+}
+
+static const struct dev_pm_ops fiq_debugger_dev_pm_ops = {
+	.suspend	= fiq_debugger_dev_suspend,
+	.resume		= fiq_debugger_dev_resume,
+};
+
+static struct platform_driver fiq_debugger_driver = {
+	.probe	= fiq_debugger_probe,
+	.driver	= {
+		.name	= "fiq_debugger",
+		.pm	= &fiq_debugger_dev_pm_ops,
+	},
+};
+
+#if defined(CONFIG_FIQ_DEBUGGER_UART_OVERLAY)
+int fiq_debugger_uart_overlay(void)
+{
+	struct device_node *onp = of_find_node_by_path("/uart_overlay@0");
+	int ret;
+
+	if (!onp) {
+		pr_err("serial_debugger: uart overlay not found\n");
+		return -ENODEV;
+	}
+
+	ret = of_overlay_create(onp);
+	if (ret < 0) {
+		pr_err("serial_debugger: fail to create overlay: %d\n", ret);
+		of_node_put(onp);
+		return ret;
+	}
+
+	pr_info("serial_debugger: uart overlay applied\n");
+	return 0;
+}
+#endif
+
+static int __init fiq_debugger_init(void)
+{
+	if (fiq_debugger_disable) {
+		pr_err("serial_debugger: disabled\n");
+		return -ENODEV;
+	}
+#if defined(CONFIG_FIQ_DEBUGGER_CONSOLE)
+	fiq_debugger_tty_init();
+#endif
+#if defined(CONFIG_FIQ_DEBUGGER_UART_OVERLAY)
+	fiq_debugger_uart_overlay();
+#endif
+	return platform_driver_register(&fiq_debugger_driver);
+}
+
+postcore_initcall(fiq_debugger_init);
diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger.h b/drivers/staging/android/fiq_debugger/fiq_debugger.h
new file mode 100644
index 0000000..c9ec4f8
--- /dev/null
+++ b/drivers/staging/android/fiq_debugger/fiq_debugger.h
@@ -0,0 +1,64 @@
+/*
+ * drivers/staging/android/fiq_debugger/fiq_debugger.h
+ *
+ * Copyright (C) 2010 Google, Inc.
+ * Author: Colin Cross <ccross@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _ARCH_ARM_MACH_TEGRA_FIQ_DEBUGGER_H_
+#define _ARCH_ARM_MACH_TEGRA_FIQ_DEBUGGER_H_
+
+#include <linux/serial_core.h>
+
+#define FIQ_DEBUGGER_NO_CHAR NO_POLL_CHAR
+#define FIQ_DEBUGGER_BREAK 0x00ff0100
+
+#define FIQ_DEBUGGER_FIQ_IRQ_NAME	"fiq"
+#define FIQ_DEBUGGER_SIGNAL_IRQ_NAME	"signal"
+#define FIQ_DEBUGGER_WAKEUP_IRQ_NAME	"wakeup"
+
+/**
+ * struct fiq_debugger_pdata - fiq debugger platform data
+ * @uart_resume:	used to restore uart state right before enabling
+ *			the fiq.
+ * @uart_enable:	Do the work necessary to communicate with the uart
+ *			hw (enable clocks, etc.). This must be ref-counted.
+ * @uart_disable:	Do the work necessary to disable the uart hw
+ *			(disable clocks, etc.). This must be ref-counted.
+ * @uart_dev_suspend:	called during PM suspend, generally not needed
+ *			for real fiq mode debugger.
+ * @uart_dev_resume:	called during PM resume, generally not needed
+ *			for real fiq mode debugger.
+ */
+struct fiq_debugger_pdata {
+	int (*uart_init)(struct platform_device *pdev);
+	void (*uart_free)(struct platform_device *pdev);
+	int (*uart_resume)(struct platform_device *pdev);
+	int (*uart_getc)(struct platform_device *pdev);
+	void (*uart_putc)(struct platform_device *pdev, unsigned int c);
+	void (*uart_flush)(struct platform_device *pdev);
+	void (*uart_enable)(struct platform_device *pdev);
+	void (*uart_disable)(struct platform_device *pdev);
+
+	int (*uart_dev_suspend)(struct platform_device *pdev);
+	int (*uart_dev_resume)(struct platform_device *pdev);
+
+	void (*fiq_enable)(struct platform_device *pdev, unsigned int fiq,
+								bool enable);
+	void (*fiq_ack)(struct platform_device *pdev, unsigned int fiq);
+
+	void (*force_irq)(struct platform_device *pdev, unsigned int irq);
+	void (*force_irq_ack)(struct platform_device *pdev, unsigned int irq);
+};
+
+#endif
diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger_arm.c b/drivers/staging/android/fiq_debugger/fiq_debugger_arm.c
new file mode 100644
index 0000000..8b3e013
--- /dev/null
+++ b/drivers/staging/android/fiq_debugger/fiq_debugger_arm.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright (C) 2014 Google, Inc.
+ * Author: Colin Cross <ccross@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/ptrace.h>
+#include <linux/uaccess.h>
+
+#include <asm/stacktrace.h>
+
+#include "fiq_debugger_priv.h"
+
+static char *mode_name(unsigned cpsr)
+{
+	switch (cpsr & MODE_MASK) {
+	case USR_MODE: return "USR";
+	case FIQ_MODE: return "FIQ";
+	case IRQ_MODE: return "IRQ";
+	case SVC_MODE: return "SVC";
+	case ABT_MODE: return "ABT";
+	case UND_MODE: return "UND";
+	case SYSTEM_MODE: return "SYS";
+	default: return "???";
+	}
+}
+
+void fiq_debugger_dump_pc(struct fiq_debugger_output *output,
+		const struct pt_regs *regs)
+{
+	output->printf(output, " pc %08x cpsr %08x mode %s\n",
+		regs->ARM_pc, regs->ARM_cpsr, mode_name(regs->ARM_cpsr));
+}
+
+void fiq_debugger_dump_regs(struct fiq_debugger_output *output,
+		const struct pt_regs *regs)
+{
+	output->printf(output,
+			" r0 %08x  r1 %08x  r2 %08x  r3 %08x\n",
+			regs->ARM_r0, regs->ARM_r1, regs->ARM_r2, regs->ARM_r3);
+	output->printf(output,
+			" r4 %08x  r5 %08x  r6 %08x  r7 %08x\n",
+			regs->ARM_r4, regs->ARM_r5, regs->ARM_r6, regs->ARM_r7);
+	output->printf(output,
+			" r8 %08x  r9 %08x r10 %08x r11 %08x  mode %s\n",
+			regs->ARM_r8, regs->ARM_r9, regs->ARM_r10, regs->ARM_fp,
+			mode_name(regs->ARM_cpsr));
+	output->printf(output,
+			" ip %08x  sp %08x  lr %08x  pc %08x cpsr %08x\n",
+			regs->ARM_ip, regs->ARM_sp, regs->ARM_lr, regs->ARM_pc,
+			regs->ARM_cpsr);
+}
+
+struct mode_regs {
+	unsigned long sp_svc;
+	unsigned long lr_svc;
+	unsigned long spsr_svc;
+
+	unsigned long sp_abt;
+	unsigned long lr_abt;
+	unsigned long spsr_abt;
+
+	unsigned long sp_und;
+	unsigned long lr_und;
+	unsigned long spsr_und;
+
+	unsigned long sp_irq;
+	unsigned long lr_irq;
+	unsigned long spsr_irq;
+
+	unsigned long r8_fiq;
+	unsigned long r9_fiq;
+	unsigned long r10_fiq;
+	unsigned long r11_fiq;
+	unsigned long r12_fiq;
+	unsigned long sp_fiq;
+	unsigned long lr_fiq;
+	unsigned long spsr_fiq;
+};
+
+static void __naked get_mode_regs(struct mode_regs *regs)
+{
+	asm volatile (
+	"mrs	r1, cpsr\n"
+	"msr	cpsr_c, #0xd3 @(SVC_MODE | PSR_I_BIT | PSR_F_BIT)\n"
+	"stmia	r0!, {r13 - r14}\n"
+	"mrs	r2, spsr\n"
+	"msr	cpsr_c, #0xd7 @(ABT_MODE | PSR_I_BIT | PSR_F_BIT)\n"
+	"stmia	r0!, {r2, r13 - r14}\n"
+	"mrs	r2, spsr\n"
+	"msr	cpsr_c, #0xdb @(UND_MODE | PSR_I_BIT | PSR_F_BIT)\n"
+	"stmia	r0!, {r2, r13 - r14}\n"
+	"mrs	r2, spsr\n"
+	"msr	cpsr_c, #0xd2 @(IRQ_MODE | PSR_I_BIT | PSR_F_BIT)\n"
+	"stmia	r0!, {r2, r13 - r14}\n"
+	"mrs	r2, spsr\n"
+	"msr	cpsr_c, #0xd1 @(FIQ_MODE | PSR_I_BIT | PSR_F_BIT)\n"
+	"stmia	r0!, {r2, r8 - r14}\n"
+	"mrs	r2, spsr\n"
+	"stmia	r0!, {r2}\n"
+	"msr	cpsr_c, r1\n"
+	"bx	lr\n");
+}
+
+
+void fiq_debugger_dump_allregs(struct fiq_debugger_output *output,
+		const struct pt_regs *regs)
+{
+	struct mode_regs mode_regs;
+	unsigned long mode = regs->ARM_cpsr & MODE_MASK;
+
+	fiq_debugger_dump_regs(output, regs);
+	get_mode_regs(&mode_regs);
+
+	output->printf(output,
+			"%csvc: sp %08x  lr %08x  spsr %08x\n",
+			mode == SVC_MODE ? '*' : ' ',
+			mode_regs.sp_svc, mode_regs.lr_svc, mode_regs.spsr_svc);
+	output->printf(output,
+			"%cabt: sp %08x  lr %08x  spsr %08x\n",
+			mode == ABT_MODE ? '*' : ' ',
+			mode_regs.sp_abt, mode_regs.lr_abt, mode_regs.spsr_abt);
+	output->printf(output,
+			"%cund: sp %08x  lr %08x  spsr %08x\n",
+			mode == UND_MODE ? '*' : ' ',
+			mode_regs.sp_und, mode_regs.lr_und, mode_regs.spsr_und);
+	output->printf(output,
+			"%cirq: sp %08x  lr %08x  spsr %08x\n",
+			mode == IRQ_MODE ? '*' : ' ',
+			mode_regs.sp_irq, mode_regs.lr_irq, mode_regs.spsr_irq);
+	output->printf(output,
+			"%cfiq: r8 %08x  r9 %08x  r10 %08x  r11 %08x  r12 %08x\n",
+			mode == FIQ_MODE ? '*' : ' ',
+			mode_regs.r8_fiq, mode_regs.r9_fiq, mode_regs.r10_fiq,
+			mode_regs.r11_fiq, mode_regs.r12_fiq);
+	output->printf(output,
+			" fiq: sp %08x  lr %08x  spsr %08x\n",
+			mode_regs.sp_fiq, mode_regs.lr_fiq, mode_regs.spsr_fiq);
+}
+
+struct stacktrace_state {
+	struct fiq_debugger_output *output;
+	unsigned int depth;
+};
+
+static int report_trace(struct stackframe *frame, void *d)
+{
+	struct stacktrace_state *sts = d;
+
+	if (sts->depth) {
+		sts->output->printf(sts->output,
+			"  pc: %p (%pF), lr %p (%pF), sp %p, fp %p\n",
+			frame->pc, frame->pc, frame->lr, frame->lr,
+			frame->sp, frame->fp);
+		sts->depth--;
+		return 0;
+	}
+	sts->output->printf(sts->output, "  ...\n");
+
+	return sts->depth == 0;
+}
+
+struct frame_tail {
+	struct frame_tail *fp;
+	unsigned long sp;
+	unsigned long lr;
+} __attribute__((packed));
+
+static struct frame_tail *user_backtrace(struct fiq_debugger_output *output,
+					struct frame_tail *tail)
+{
+	struct frame_tail buftail[2];
+
+	/* Also check accessibility of one struct frame_tail beyond */
+	if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) {
+		output->printf(output, "  invalid frame pointer %p\n",
+				tail);
+		return NULL;
+	}
+	if (__copy_from_user_inatomic(buftail, tail, sizeof(buftail))) {
+		output->printf(output,
+			"  failed to copy frame pointer %p\n", tail);
+		return NULL;
+	}
+
+	output->printf(output, "  %p\n", buftail[0].lr);
+
+	/* frame pointers should strictly progress back up the stack
+	 * (towards higher addresses) */
+	if (tail >= buftail[0].fp)
+		return NULL;
+
+	return buftail[0].fp-1;
+}
+
+void fiq_debugger_dump_stacktrace(struct fiq_debugger_output *output,
+		const struct pt_regs *regs, unsigned int depth, void *ssp)
+{
+	struct frame_tail *tail;
+	struct thread_info *real_thread_info = THREAD_INFO(ssp);
+	struct stacktrace_state sts;
+
+	sts.depth = depth;
+	sts.output = output;
+	*current_thread_info() = *real_thread_info;
+
+	if (!current)
+		output->printf(output, "current NULL\n");
+	else
+		output->printf(output, "pid: %d  comm: %s\n",
+			current->pid, current->comm);
+	fiq_debugger_dump_regs(output, regs);
+
+	if (!user_mode(regs)) {
+		struct stackframe frame;
+		frame.fp = regs->ARM_fp;
+		frame.sp = regs->ARM_sp;
+		frame.lr = regs->ARM_lr;
+		frame.pc = regs->ARM_pc;
+		output->printf(output,
+			"  pc: %p (%pF), lr %p (%pF), sp %p, fp %p\n",
+			regs->ARM_pc, regs->ARM_pc, regs->ARM_lr, regs->ARM_lr,
+			regs->ARM_sp, regs->ARM_fp);
+		walk_stackframe(&frame, report_trace, &sts);
+		return;
+	}
+
+	tail = ((struct frame_tail *) regs->ARM_fp) - 1;
+	while (depth-- && tail && !((unsigned long) tail & 3))
+		tail = user_backtrace(output, tail);
+}
diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger_arm64.c b/drivers/staging/android/fiq_debugger/fiq_debugger_arm64.c
new file mode 100644
index 0000000..97246bc
--- /dev/null
+++ b/drivers/staging/android/fiq_debugger/fiq_debugger_arm64.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2014 Google, Inc.
+ * Author: Colin Cross <ccross@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/ptrace.h>
+#include <asm/stacktrace.h>
+
+#include "fiq_debugger_priv.h"
+
+static char *mode_name(const struct pt_regs *regs)
+{
+	if (compat_user_mode(regs)) {
+		return "USR";
+	} else {
+		switch (processor_mode(regs)) {
+		case PSR_MODE_EL0t: return "EL0t";
+		case PSR_MODE_EL1t: return "EL1t";
+		case PSR_MODE_EL1h: return "EL1h";
+		case PSR_MODE_EL2t: return "EL2t";
+		case PSR_MODE_EL2h: return "EL2h";
+		default: return "???";
+		}
+	}
+}
+
+void fiq_debugger_dump_pc(struct fiq_debugger_output *output,
+		const struct pt_regs *regs)
+{
+	output->printf(output, " pc %016lx cpsr %08lx mode %s\n",
+		regs->pc, regs->pstate, mode_name(regs));
+}
+
+void fiq_debugger_dump_regs_aarch32(struct fiq_debugger_output *output,
+		const struct pt_regs *regs)
+{
+	output->printf(output, " r0 %08x  r1 %08x  r2 %08x  r3 %08x\n",
+			regs->compat_usr(0), regs->compat_usr(1),
+			regs->compat_usr(2), regs->compat_usr(3));
+	output->printf(output, " r4 %08x  r5 %08x  r6 %08x  r7 %08x\n",
+			regs->compat_usr(4), regs->compat_usr(5),
+			regs->compat_usr(6), regs->compat_usr(7));
+	output->printf(output, " r8 %08x  r9 %08x r10 %08x r11 %08x\n",
+			regs->compat_usr(8), regs->compat_usr(9),
+			regs->compat_usr(10), regs->compat_usr(11));
+	output->printf(output, " ip %08x  sp %08x  lr %08x  pc %08x\n",
+			regs->compat_usr(12), regs->compat_sp,
+			regs->compat_lr, regs->pc);
+	output->printf(output, " cpsr %08x (%s)\n",
+			regs->pstate, mode_name(regs));
+}
+
+void fiq_debugger_dump_regs_aarch64(struct fiq_debugger_output *output,
+		const struct pt_regs *regs)
+{
+
+	output->printf(output, "  x0 %016lx   x1 %016lx\n",
+			regs->regs[0], regs->regs[1]);
+	output->printf(output, "  x2 %016lx   x3 %016lx\n",
+			regs->regs[2], regs->regs[3]);
+	output->printf(output, "  x4 %016lx   x5 %016lx\n",
+			regs->regs[4], regs->regs[5]);
+	output->printf(output, "  x6 %016lx   x7 %016lx\n",
+			regs->regs[6], regs->regs[7]);
+	output->printf(output, "  x8 %016lx   x9 %016lx\n",
+			regs->regs[8], regs->regs[9]);
+	output->printf(output, " x10 %016lx  x11 %016lx\n",
+			regs->regs[10], regs->regs[11]);
+	output->printf(output, " x12 %016lx  x13 %016lx\n",
+			regs->regs[12], regs->regs[13]);
+	output->printf(output, " x14 %016lx  x15 %016lx\n",
+			regs->regs[14], regs->regs[15]);
+	output->printf(output, " x16 %016lx  x17 %016lx\n",
+			regs->regs[16], regs->regs[17]);
+	output->printf(output, " x18 %016lx  x19 %016lx\n",
+			regs->regs[18], regs->regs[19]);
+	output->printf(output, " x20 %016lx  x21 %016lx\n",
+			regs->regs[20], regs->regs[21]);
+	output->printf(output, " x22 %016lx  x23 %016lx\n",
+			regs->regs[22], regs->regs[23]);
+	output->printf(output, " x24 %016lx  x25 %016lx\n",
+			regs->regs[24], regs->regs[25]);
+	output->printf(output, " x26 %016lx  x27 %016lx\n",
+			regs->regs[26], regs->regs[27]);
+	output->printf(output, " x28 %016lx  x29 %016lx\n",
+			regs->regs[28], regs->regs[29]);
+	output->printf(output, " x30 %016lx   sp %016lx\n",
+			regs->regs[30], regs->sp);
+	output->printf(output, "  pc %016lx cpsr %08x (%s)\n",
+			regs->pc, regs->pstate, mode_name(regs));
+}
+
+void fiq_debugger_dump_regs(struct fiq_debugger_output *output,
+		const struct pt_regs *regs)
+{
+	if (compat_user_mode(regs))
+		fiq_debugger_dump_regs_aarch32(output, regs);
+	else
+		fiq_debugger_dump_regs_aarch64(output, regs);
+}
+
+#define READ_SPECIAL_REG(x) ({ \
+	u64 val; \
+	asm volatile ("mrs %0, " # x : "=r"(val)); \
+	val; \
+})
+
+void fiq_debugger_dump_allregs(struct fiq_debugger_output *output,
+		const struct pt_regs *regs)
+{
+	u32 pstate = READ_SPECIAL_REG(CurrentEl);
+	bool in_el2 = (pstate & PSR_MODE_MASK) >= PSR_MODE_EL2t;
+
+	fiq_debugger_dump_regs(output, regs);
+
+	output->printf(output, " sp_el0   %016lx\n",
+			READ_SPECIAL_REG(sp_el0));
+
+	if (in_el2)
+		output->printf(output, " sp_el1   %016lx\n",
+				READ_SPECIAL_REG(sp_el1));
+
+	output->printf(output, " elr_el1  %016lx\n",
+			READ_SPECIAL_REG(elr_el1));
+
+	output->printf(output, " spsr_el1 %08lx\n",
+			READ_SPECIAL_REG(spsr_el1));
+
+	if (in_el2) {
+		output->printf(output, " spsr_irq %08lx\n",
+				READ_SPECIAL_REG(spsr_irq));
+		output->printf(output, " spsr_abt %08lx\n",
+				READ_SPECIAL_REG(spsr_abt));
+		output->printf(output, " spsr_und %08lx\n",
+				READ_SPECIAL_REG(spsr_und));
+		output->printf(output, " spsr_fiq %08lx\n",
+				READ_SPECIAL_REG(spsr_fiq));
+		output->printf(output, " spsr_el2 %08lx\n",
+				READ_SPECIAL_REG(elr_el2));
+		output->printf(output, " spsr_el2 %08lx\n",
+				READ_SPECIAL_REG(spsr_el2));
+	}
+}
+
+struct stacktrace_state {
+	struct fiq_debugger_output *output;
+	unsigned int depth;
+};
+
+static int report_trace(struct stackframe *frame, void *d)
+{
+	struct stacktrace_state *sts = d;
+
+	if (sts->depth) {
+		sts->output->printf(sts->output, "%pF:\n", frame->pc);
+		sts->output->printf(sts->output,
+				"  pc %016lx   sp %016lx   fp %016lx\n",
+				frame->pc, frame->sp, frame->fp);
+		sts->depth--;
+		return 0;
+	}
+	sts->output->printf(sts->output, "  ...\n");
+
+	return sts->depth == 0;
+}
+
+void fiq_debugger_dump_stacktrace(struct fiq_debugger_output *output,
+		const struct pt_regs *regs, unsigned int depth, void *ssp)
+{
+	struct thread_info *real_thread_info = THREAD_INFO(ssp);
+	struct stacktrace_state sts;
+
+	sts.depth = depth;
+	sts.output = output;
+	*current_thread_info() = *real_thread_info;
+
+	if (!current)
+		output->printf(output, "current NULL\n");
+	else
+		output->printf(output, "pid: %d  comm: %s\n",
+			current->pid, current->comm);
+	fiq_debugger_dump_regs(output, regs);
+
+	if (!user_mode(regs)) {
+		struct stackframe frame;
+		frame.fp = regs->regs[29];
+		frame.sp = regs->sp;
+		frame.pc = regs->pc;
+		output->printf(output, "\n");
+		walk_stackframe(current, &frame, report_trace, &sts);
+	}
+}
diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger_priv.h b/drivers/staging/android/fiq_debugger/fiq_debugger_priv.h
new file mode 100644
index 0000000..d5d051f
--- /dev/null
+++ b/drivers/staging/android/fiq_debugger/fiq_debugger_priv.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2014 Google, Inc.
+ * Author: Colin Cross <ccross@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _FIQ_DEBUGGER_PRIV_H_
+#define _FIQ_DEBUGGER_PRIV_H_
+
+#define THREAD_INFO(sp) ((struct thread_info *) \
+		((unsigned long)(sp) & ~(THREAD_SIZE - 1)))
+
+struct fiq_debugger_output {
+	void (*printf)(struct fiq_debugger_output *output, const char *fmt, ...);
+};
+
+struct pt_regs;
+
+void fiq_debugger_dump_pc(struct fiq_debugger_output *output,
+		const struct pt_regs *regs);
+void fiq_debugger_dump_regs(struct fiq_debugger_output *output,
+		const struct pt_regs *regs);
+void fiq_debugger_dump_allregs(struct fiq_debugger_output *output,
+		const struct pt_regs *regs);
+void fiq_debugger_dump_stacktrace(struct fiq_debugger_output *output,
+		const struct pt_regs *regs, unsigned int depth, void *ssp);
+
+#endif
diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger_ringbuf.h b/drivers/staging/android/fiq_debugger/fiq_debugger_ringbuf.h
new file mode 100644
index 0000000..10c3c5d0
--- /dev/null
+++ b/drivers/staging/android/fiq_debugger/fiq_debugger_ringbuf.h
@@ -0,0 +1,94 @@
+/*
+ * drivers/staging/android/fiq_debugger/fiq_debugger_ringbuf.h
+ *
+ * simple lockless ringbuffer
+ *
+ * Copyright (C) 2010 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+struct fiq_debugger_ringbuf {
+	int len;
+	int head;
+	int tail;
+	u8 buf[];
+};
+
+
+static inline struct fiq_debugger_ringbuf *fiq_debugger_ringbuf_alloc(int len)
+{
+	struct fiq_debugger_ringbuf *rbuf;
+
+	rbuf = kzalloc(sizeof(*rbuf) + len, GFP_KERNEL);
+	if (rbuf == NULL)
+		return NULL;
+
+	rbuf->len = len;
+	rbuf->head = 0;
+	rbuf->tail = 0;
+	smp_mb();
+
+	return rbuf;
+}
+
+static inline void fiq_debugger_ringbuf_free(struct fiq_debugger_ringbuf *rbuf)
+{
+	kfree(rbuf);
+}
+
+static inline int fiq_debugger_ringbuf_level(struct fiq_debugger_ringbuf *rbuf)
+{
+	int level = rbuf->head - rbuf->tail;
+
+	if (level < 0)
+		level = rbuf->len + level;
+
+	return level;
+}
+
+static inline int fiq_debugger_ringbuf_room(struct fiq_debugger_ringbuf *rbuf)
+{
+	return rbuf->len - fiq_debugger_ringbuf_level(rbuf) - 1;
+}
+
+static inline u8
+fiq_debugger_ringbuf_peek(struct fiq_debugger_ringbuf *rbuf, int i)
+{
+	return rbuf->buf[(rbuf->tail + i) % rbuf->len];
+}
+
+static inline int
+fiq_debugger_ringbuf_consume(struct fiq_debugger_ringbuf *rbuf, int count)
+{
+	count = min(count, fiq_debugger_ringbuf_level(rbuf));
+
+	rbuf->tail = (rbuf->tail + count) % rbuf->len;
+	smp_mb();
+
+	return count;
+}
+
+static inline int
+fiq_debugger_ringbuf_push(struct fiq_debugger_ringbuf *rbuf, u8 datum)
+{
+	if (fiq_debugger_ringbuf_room(rbuf) == 0)
+		return 0;
+
+	rbuf->buf[rbuf->head] = datum;
+	smp_mb();
+	rbuf->head = (rbuf->head + 1) % rbuf->len;
+	smp_mb();
+
+	return 1;
+}
diff --git a/drivers/staging/android/fiq_debugger/fiq_watchdog.c b/drivers/staging/android/fiq_debugger/fiq_watchdog.c
new file mode 100644
index 0000000..194b541
--- /dev/null
+++ b/drivers/staging/android/fiq_debugger/fiq_watchdog.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/pstore_ram.h>
+
+#include "fiq_watchdog.h"
+#include "fiq_debugger_priv.h"
+
+static DEFINE_RAW_SPINLOCK(fiq_watchdog_lock);
+
+static void fiq_watchdog_printf(struct fiq_debugger_output *output,
+				const char *fmt, ...)
+{
+	char buf[256];
+	va_list ap;
+	int len;
+
+	va_start(ap, fmt);
+	len = vscnprintf(buf, sizeof(buf), fmt, ap);
+	va_end(ap);
+
+	ramoops_console_write_buf(buf, len);
+}
+
+struct fiq_debugger_output fiq_watchdog_output = {
+	.printf = fiq_watchdog_printf,
+};
+
+void fiq_watchdog_triggered(const struct pt_regs *regs, void *svc_sp)
+{
+	char msg[24];
+	int len;
+
+	raw_spin_lock(&fiq_watchdog_lock);
+
+	len = scnprintf(msg, sizeof(msg), "watchdog fiq cpu %d\n",
+			THREAD_INFO(svc_sp)->cpu);
+	ramoops_console_write_buf(msg, len);
+
+	fiq_debugger_dump_stacktrace(&fiq_watchdog_output, regs, 100, svc_sp);
+
+	raw_spin_unlock(&fiq_watchdog_lock);
+}
diff --git a/drivers/staging/android/fiq_debugger/fiq_watchdog.h b/drivers/staging/android/fiq_debugger/fiq_watchdog.h
new file mode 100644
index 0000000..c6b507f
--- /dev/null
+++ b/drivers/staging/android/fiq_debugger/fiq_watchdog.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _FIQ_WATCHDOG_H_
+#define _FIQ_WATCHDOG_H_
+
+void fiq_watchdog_triggered(const struct pt_regs *regs, void *svc_sp);
+
+#endif
diff --git a/drivers/staging/android/ion/ion_heap.c b/drivers/staging/android/ion/ion_heap.c
index c2a7cb9..f9a6339 100644
--- a/drivers/staging/android/ion/ion_heap.c
+++ b/drivers/staging/android/ion/ion_heap.c
@@ -321,8 +321,9 @@
 
 	switch (heap_data->type) {
 	case ION_HEAP_TYPE_SYSTEM_CONTIG:
-		heap = ion_system_contig_heap_create(heap_data);
-		break;
+		pr_err("%s: Heap type is disabled: %d\n", __func__,
+		       heap_data->type);
+		return ERR_PTR(-EINVAL);
 	case ION_HEAP_TYPE_SYSTEM:
 		heap = ion_system_heap_create(heap_data);
 		break;
@@ -361,7 +362,8 @@
 
 	switch (heap->type) {
 	case ION_HEAP_TYPE_SYSTEM_CONTIG:
-		ion_system_contig_heap_destroy(heap);
+		pr_err("%s: Heap type is disabled: %d\n", __func__,
+		       heap->type);
 		break;
 	case ION_HEAP_TYPE_SYSTEM:
 		ion_system_heap_destroy(heap);
diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c
index ec3b665..687be36 100644
--- a/drivers/staging/android/lowmemorykiller.c
+++ b/drivers/staging/android/lowmemorykiller.c
@@ -43,6 +43,9 @@
 #include <linux/profile.h>
 #include <linux/notifier.h>
 
+#define CREATE_TRACE_POINTS
+#include "trace/lowmemorykiller.h"
+
 static u32 lowmem_debug_level = 1;
 static short lowmem_adj[6] = {
 	0,
@@ -93,6 +96,7 @@
 	int other_free = global_page_state(NR_FREE_PAGES) - totalreserve_pages;
 	int other_file = global_node_page_state(NR_FILE_PAGES) -
 				global_node_page_state(NR_SHMEM) -
+				global_node_page_state(NR_UNEVICTABLE) -
 				total_swapcache_pages();
 
 	if (lowmem_adj_size < array_size)
@@ -160,23 +164,27 @@
 			     p->comm, p->pid, oom_score_adj, tasksize);
 	}
 	if (selected) {
+		long cache_size = other_file * (long)(PAGE_SIZE / 1024);
+		long cache_limit = minfree * (long)(PAGE_SIZE / 1024);
+		long free = other_free * (long)(PAGE_SIZE / 1024);
+
 		task_lock(selected);
 		send_sig(SIGKILL, selected, 0);
 		if (selected->mm)
 			task_set_lmk_waiting(selected);
 		task_unlock(selected);
-		lowmem_print(1, "Killing '%s' (%d), adj %hd,\n"
+		trace_lowmemory_kill(selected, cache_size, cache_limit, free);
+		lowmem_print(1, "Killing '%s' (%d) (tgid %d), adj %hd,\n"
 				 "   to free %ldkB on behalf of '%s' (%d) because\n"
 				 "   cache %ldkB is below limit %ldkB for oom_score_adj %hd\n"
 				 "   Free memory is %ldkB above reserved\n",
-			     selected->comm, selected->pid,
+			     selected->comm, selected->pid, selected->tgid,
 			     selected_oom_score_adj,
 			     selected_tasksize * (long)(PAGE_SIZE / 1024),
 			     current->comm, current->pid,
-			     other_file * (long)(PAGE_SIZE / 1024),
-			     minfree * (long)(PAGE_SIZE / 1024),
+			     cache_size, cache_limit,
 			     min_score_adj,
-			     other_free * (long)(PAGE_SIZE / 1024));
+			     free);
 		lowmem_deathpending_timeout = jiffies + HZ;
 		rem += selected_tasksize;
 	}
@@ -200,12 +208,96 @@
 }
 device_initcall(lowmem_init);
 
+#ifdef CONFIG_ANDROID_LOW_MEMORY_KILLER_AUTODETECT_OOM_ADJ_VALUES
+static short lowmem_oom_adj_to_oom_score_adj(short oom_adj)
+{
+	if (oom_adj == OOM_ADJUST_MAX)
+		return OOM_SCORE_ADJ_MAX;
+	else
+		return (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
+}
+
+static void lowmem_autodetect_oom_adj_values(void)
+{
+	int i;
+	short oom_adj;
+	short oom_score_adj;
+	int array_size = ARRAY_SIZE(lowmem_adj);
+
+	if (lowmem_adj_size < array_size)
+		array_size = lowmem_adj_size;
+
+	if (array_size <= 0)
+		return;
+
+	oom_adj = lowmem_adj[array_size - 1];
+	if (oom_adj > OOM_ADJUST_MAX)
+		return;
+
+	oom_score_adj = lowmem_oom_adj_to_oom_score_adj(oom_adj);
+	if (oom_score_adj <= OOM_ADJUST_MAX)
+		return;
+
+	lowmem_print(1, "lowmem_shrink: convert oom_adj to oom_score_adj:\n");
+	for (i = 0; i < array_size; i++) {
+		oom_adj = lowmem_adj[i];
+		oom_score_adj = lowmem_oom_adj_to_oom_score_adj(oom_adj);
+		lowmem_adj[i] = oom_score_adj;
+		lowmem_print(1, "oom_adj %d => oom_score_adj %d\n",
+			     oom_adj, oom_score_adj);
+	}
+}
+
+static int lowmem_adj_array_set(const char *val, const struct kernel_param *kp)
+{
+	int ret;
+
+	ret = param_array_ops.set(val, kp);
+
+	/* HACK: Autodetect oom_adj values in lowmem_adj array */
+	lowmem_autodetect_oom_adj_values();
+
+	return ret;
+}
+
+static int lowmem_adj_array_get(char *buffer, const struct kernel_param *kp)
+{
+	return param_array_ops.get(buffer, kp);
+}
+
+static void lowmem_adj_array_free(void *arg)
+{
+	param_array_ops.free(arg);
+}
+
+static struct kernel_param_ops lowmem_adj_array_ops = {
+	.set = lowmem_adj_array_set,
+	.get = lowmem_adj_array_get,
+	.free = lowmem_adj_array_free,
+};
+
+static const struct kparam_array __param_arr_adj = {
+	.max = ARRAY_SIZE(lowmem_adj),
+	.num = &lowmem_adj_size,
+	.ops = &param_ops_short,
+	.elemsize = sizeof(lowmem_adj[0]),
+	.elem = lowmem_adj,
+};
+#endif
+
 /*
  * not really modular, but the easiest way to keep compat with existing
  * bootargs behaviour is to continue using module_param here.
  */
 module_param_named(cost, lowmem_shrinker.seeks, int, 0644);
+#ifdef CONFIG_ANDROID_LOW_MEMORY_KILLER_AUTODETECT_OOM_ADJ_VALUES
+module_param_cb(adj, &lowmem_adj_array_ops,
+		.arr = &__param_arr_adj,
+		0644);
+__MODULE_PARM_TYPE(adj, "array of short");
+#else
 module_param_array_named(adj, lowmem_adj, short, &lowmem_adj_size, 0644);
+#endif
 module_param_array_named(minfree, lowmem_minfree, uint, &lowmem_minfree_size,
 			 0644);
 module_param_named(debug_level, lowmem_debug_level, uint, 0644);
diff --git a/drivers/staging/android/trace/lowmemorykiller.h b/drivers/staging/android/trace/lowmemorykiller.h
new file mode 100644
index 0000000..f43d3fa
--- /dev/null
+++ b/drivers/staging/android/trace/lowmemorykiller.h
@@ -0,0 +1,41 @@
+#undef TRACE_SYSTEM
+#define TRACE_INCLUDE_PATH ../../drivers/staging/android/trace
+#define TRACE_SYSTEM lowmemorykiller
+
+#if !defined(_TRACE_LOWMEMORYKILLER_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_LOWMEMORYKILLER_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(lowmemory_kill,
+	TP_PROTO(struct task_struct *killed_task, long cache_size, \
+		 long cache_limit, long free),
+
+	TP_ARGS(killed_task, cache_size, cache_limit, free),
+
+	TP_STRUCT__entry(
+			__array(char, comm, TASK_COMM_LEN)
+			__field(pid_t, pid)
+			__field(long, pagecache_size)
+			__field(long, pagecache_limit)
+			__field(long, free)
+	),
+
+	TP_fast_assign(
+			memcpy(__entry->comm, killed_task->comm, TASK_COMM_LEN);
+			__entry->pid = killed_task->pid;
+			__entry->pagecache_size = cache_size;
+			__entry->pagecache_limit = cache_limit;
+			__entry->free = free;
+	),
+
+	TP_printk("%s (%d), page cache %ldkB (limit %ldkB), free %ldKb",
+		__entry->comm, __entry->pid, __entry->pagecache_size,
+		__entry->pagecache_limit, __entry->free)
+);
+
+
+#endif /* if !defined(_TRACE_LOWMEMORYKILLER_H) || defined(TRACE_HEADER_MULTI_READ) */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/drivers/staging/android/uapi/vsoc_shm.h b/drivers/staging/android/uapi/vsoc_shm.h
new file mode 100644
index 0000000..741b138
--- /dev/null
+++ b/drivers/staging/android/uapi/vsoc_shm.h
@@ -0,0 +1,303 @@
+/*
+ * Copyright (C) 2017 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _UAPI_LINUX_VSOC_SHM_H
+#define _UAPI_LINUX_VSOC_SHM_H
+
+#include <linux/types.h>
+
+/**
+ * A permission is a token that permits a receiver to read and/or write an area
+ * of memory within a Vsoc region.
+ *
+ * An fd_scoped permission grants both read and write access, and can be
+ * attached to a file description (see open(2)).
+ * Ownership of the area can then be shared by passing a file descriptor
+ * among processes.
+ *
+ * begin_offset and end_offset define the area of memory that is controlled by
+ * the permission. owner_offset points to a word, also in shared memory, that
+ * controls ownership of the area.
+ *
+ * ownership of the region expires when the associated file description is
+ * released.
+ *
+ * At most one permission can be attached to each file description.
+ *
+ * This is useful when implementing HALs like gralloc that scope and pass
+ * ownership of shared resources via file descriptors.
+ *
+ * The caller is responsibe for doing any fencing.
+ *
+ * The calling process will normally identify a currently free area of
+ * memory. It will construct a proposed fd_scoped_permission_arg structure:
+ *
+ *   begin_offset and end_offset describe the area being claimed
+ *
+ *   owner_offset points to the location in shared memory that indicates the
+ *   owner of the area.
+ *
+ *   owned_value is the value that will be stored in owner_offset iff the
+ *   permission can be granted. It must be different than VSOC_REGION_FREE.
+ *
+ * Two fd_scoped_permission structures are compatible if they vary only by
+ * their owned_value fields.
+ *
+ * The driver ensures that, for any group of simultaneous callers proposing
+ * compatible fd_scoped_permissions, it will accept exactly one of the
+ * propopsals. The other callers will get a failure with errno of EAGAIN.
+ *
+ * A process receiving a file descriptor can identify the region being
+ * granted using the VSOC_GET_FD_SCOPED_PERMISSION ioctl.
+ */
+struct fd_scoped_permission {
+	__u32 begin_offset;
+	__u32 end_offset;
+	__u32 owner_offset;
+	__u32 owned_value;
+};
+
+/*
+ * This value represents a free area of memory. The driver expects to see this
+ * value at owner_offset when creating a permission otherwise it will not do it,
+ * and will write this value back once the permission is no longer needed.
+ */
+#define VSOC_REGION_FREE ((__u32)0)
+
+/**
+ * ioctl argument for VSOC_CREATE_FD_SCOPE_PERMISSION
+ */
+struct fd_scoped_permission_arg {
+	struct fd_scoped_permission perm;
+	__s32 managed_region_fd;
+};
+
+#define VSOC_NODE_FREE ((__u32)0)
+
+/*
+ * Describes a signal table in shared memory. Each non-zero entry in the
+ * table indicates that the receiver should signal the futex at the given
+ * offset. Offsets are relative to the region, not the shared memory window.
+ *
+ * interrupt_signalled_offset is used to reliably signal interrupts across the
+ * vmm boundary. There are two roles: transmitter and receiver. For example,
+ * in the host_to_guest_signal_table the host is the transmitter and the
+ * guest is the receiver. The protocol is as follows:
+ *
+ * 1. The transmitter should convert the offset of the futex to an offset
+ *    in the signal table [0, (1 << num_nodes_lg2))
+ *    The transmitter can choose any appropriate hashing algorithm, including
+ *    hash = futex_offset & ((1 << num_nodes_lg2) - 1)
+ *
+ * 3. The transmitter should atomically compare and swap futex_offset with 0
+ *    at hash. There are 3 possible outcomes
+ *      a. The swap fails because the futex_offset is already in the table.
+ *         The transmitter should stop.
+ *      b. Some other offset is in the table. This is a hash collision. The
+ *         transmitter should move to another table slot and try again. One
+ *         possible algorithm:
+ *         hash = (hash + 1) & ((1 << num_nodes_lg2) - 1)
+ *      c. The swap worked. Continue below.
+ *
+ * 3. The transmitter atomically swaps 1 with the value at the
+ *    interrupt_signalled_offset. There are two outcomes:
+ *      a. The prior value was 1. In this case an interrupt has already been
+ *         posted. The transmitter is done.
+ *      b. The prior value was 0, indicating that the receiver may be sleeping.
+ *         The transmitter will issue an interrupt.
+ *
+ * 4. On waking the receiver immediately exchanges a 0 with the
+ *    interrupt_signalled_offset. If it receives a 0 then this a spurious
+ *    interrupt. That may occasionally happen in the current protocol, but
+ *    should be rare.
+ *
+ * 5. The receiver scans the signal table by atomicaly exchanging 0 at each
+ *    location. If a non-zero offset is returned from the exchange the
+ *    receiver wakes all sleepers at the given offset:
+ *      futex((int*)(region_base + old_value), FUTEX_WAKE, MAX_INT);
+ *
+ * 6. The receiver thread then does a conditional wait, waking immediately
+ *    if the value at interrupt_signalled_offset is non-zero. This catches cases
+ *    here additional  signals were posted while the table was being scanned.
+ *    On the guest the wait is handled via the VSOC_WAIT_FOR_INCOMING_INTERRUPT
+ *    ioctl.
+ */
+struct vsoc_signal_table_layout {
+	/* log_2(Number of signal table entries) */
+	__u32 num_nodes_lg2;
+	/*
+	 * Offset to the first signal table entry relative to the start of the
+	 * region
+	 */
+	__u32 futex_uaddr_table_offset;
+	/*
+	 * Offset to an atomic_t / atomic uint32_t. A non-zero value indicates
+	 * that one or more offsets are currently posted in the table.
+	 * semi-unique access to an entry in the table
+	 */
+	__u32 interrupt_signalled_offset;
+};
+
+#define VSOC_REGION_WHOLE ((__s32)0)
+#define VSOC_DEVICE_NAME_SZ 16
+
+/**
+ * Each HAL would (usually) talk to a single device region
+ * Mulitple entities care about these regions:
+ * - The ivshmem_server will populate the regions in shared memory
+ * - The guest kernel will read the region, create minor device nodes, and
+ *   allow interested parties to register for FUTEX_WAKE events in the region
+ * - HALs will access via the minor device nodes published by the guest kernel
+ * - Host side processes will access the region via the ivshmem_server:
+ *   1. Pass name to ivshmem_server at a UNIX socket
+ *   2. ivshmemserver will reply with 2 fds:
+ *     - host->guest doorbell fd
+ *     - guest->host doorbell fd
+ *     - fd for the shared memory region
+ *     - region offset
+ *   3. Start a futex receiver thread on the doorbell fd pointed at the
+ *      signal_nodes
+ */
+struct vsoc_device_region {
+	__u16 current_version;
+	__u16 min_compatible_version;
+	__u32 region_begin_offset;
+	__u32 region_end_offset;
+	__u32 offset_of_region_data;
+	struct vsoc_signal_table_layout guest_to_host_signal_table;
+	struct vsoc_signal_table_layout host_to_guest_signal_table;
+	/* Name of the device. Must always be terminated with a '\0', so
+	 * the longest supported device name is 15 characters.
+	 */
+	char device_name[VSOC_DEVICE_NAME_SZ];
+	/* There are two ways that permissions to access regions are handled:
+	 *   - When subdivided_by is VSOC_REGION_WHOLE, any process that can
+	 *     open the device node for the region gains complete access to it.
+	 *   - When subdivided is set processes that open the region cannot
+	 *     access it. Access to a sub-region must be established by invoking
+	 *     the VSOC_CREATE_FD_SCOPE_PERMISSION ioctl on the region
+	 *     referenced in subdivided_by, providing a fileinstance
+	 *     (represented by a fd) opened on this region.
+	 */
+	__u32 managed_by;
+};
+
+/*
+ * The vsoc layout descriptor.
+ * The first 4K should be reserved for the shm header and region descriptors.
+ * The regions should be page aligned.
+ */
+
+struct vsoc_shm_layout_descriptor {
+	__u16 major_version;
+	__u16 minor_version;
+
+	/* size of the shm. This may be redundant but nice to have */
+	__u32 size;
+
+	/* number of shared memory regions */
+	__u32 region_count;
+
+	/* The offset to the start of region descriptors */
+	__u32 vsoc_region_desc_offset;
+};
+
+/*
+ * This specifies the current version that should be stored in
+ * vsoc_shm_layout_descriptor.major_version and
+ * vsoc_shm_layout_descriptor.minor_version.
+ * It should be updated only if the vsoc_device_region and
+ * vsoc_shm_layout_descriptor structures have changed.
+ * Versioning within each region is transferred
+ * via the min_compatible_version and current_version fields in
+ * vsoc_device_region. The driver does not consult these fields: they are left
+ * for the HALs and host processes and will change independently of the layout
+ * version.
+ */
+#define CURRENT_VSOC_LAYOUT_MAJOR_VERSION 2
+#define CURRENT_VSOC_LAYOUT_MINOR_VERSION 0
+
+#define VSOC_CREATE_FD_SCOPED_PERMISSION \
+	_IOW(0xF5, 0, struct fd_scoped_permission)
+#define VSOC_GET_FD_SCOPED_PERMISSION _IOR(0xF5, 1, struct fd_scoped_permission)
+
+/*
+ * This is used to signal the host to scan the guest_to_host_signal_table
+ * for new futexes to wake. This sends an interrupt if one is not already
+ * in flight.
+ */
+#define VSOC_MAYBE_SEND_INTERRUPT_TO_HOST _IO(0xF5, 2)
+
+/*
+ * When this returns the guest will scan host_to_guest_signal_table to
+ * check for new futexes to wake.
+ */
+/* TODO(ghartman): Consider moving this to the bottom half */
+#define VSOC_WAIT_FOR_INCOMING_INTERRUPT _IO(0xF5, 3)
+
+/*
+ * Guest HALs will use this to retrieve the region description after
+ * opening their device node.
+ */
+#define VSOC_DESCRIBE_REGION _IOR(0xF5, 4, struct vsoc_device_region)
+
+/*
+ * Wake any threads that may be waiting for a host interrupt on this region.
+ * This is mostly used during shutdown.
+ */
+#define VSOC_SELF_INTERRUPT _IO(0xF5, 5)
+
+/*
+ * This is used to signal the host to scan the guest_to_host_signal_table
+ * for new futexes to wake. This sends an interrupt unconditionally.
+ */
+#define VSOC_SEND_INTERRUPT_TO_HOST _IO(0xF5, 6)
+
+enum wait_types {
+	VSOC_WAIT_UNDEFINED = 0,
+	VSOC_WAIT_IF_EQUAL = 1,
+	VSOC_WAIT_IF_EQUAL_TIMEOUT = 2
+};
+
+/*
+ * Wait for a condition to be true
+ *
+ * Note, this is sized and aligned so the 32 bit and 64 bit layouts are
+ * identical.
+ */
+struct vsoc_cond_wait {
+	/* Input: Offset of the 32 bit word to check */
+	__u32 offset;
+	/* Input: Value that will be compared with the offset */
+	__u32 value;
+	/* Monotonic time to wake at in seconds */
+	__u64 wake_time_sec;
+	/* Input: Monotonic time to wait in nanoseconds */
+	__u32 wake_time_nsec;
+	/* Input: Type of wait */
+	__u32 wait_type;
+	/* Output: Number of times the thread woke before returning. */
+	__u32 wakes;
+	/* Ensure that we're 8-byte aligned and 8 byte length for 32/64 bit
+	 * compatibility.
+	 */
+	__u32 reserved_1;
+};
+
+#define VSOC_COND_WAIT _IOWR(0xF5, 7, struct vsoc_cond_wait)
+
+/* Wake any local threads waiting at the offset given in arg */
+#define VSOC_COND_WAKE _IO(0xF5, 8)
+
+#endif /* _UAPI_LINUX_VSOC_SHM_H */
diff --git a/drivers/staging/android/vsoc.c b/drivers/staging/android/vsoc.c
new file mode 100644
index 0000000..954ed2c
--- /dev/null
+++ b/drivers/staging/android/vsoc.c
@@ -0,0 +1,1165 @@
+/*
+ * drivers/android/staging/vsoc.c
+ *
+ * Android Virtual System on a Chip (VSoC) driver
+ *
+ * Copyright (C) 2017 Google, Inc.
+ *
+ * Author: ghartman@google.com
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ *
+ * Based on drivers/char/kvm_ivshmem.c - driver for KVM Inter-VM shared memory
+ *         Copyright 2009 Cam Macdonell <cam@cs.ualberta.ca>
+ *
+ * Based on cirrusfb.c and 8139cp.c:
+ *   Copyright 1999-2001 Jeff Garzik
+ *   Copyright 2001-2004 Jeff Garzik
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/freezer.h>
+#include <linux/futex.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/sched.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/cdev.h>
+#include <linux/file.h>
+#include "uapi/vsoc_shm.h"
+
+#define VSOC_DEV_NAME "vsoc"
+
+/*
+ * Description of the ivshmem-doorbell PCI device used by QEmu. These
+ * constants follow docs/specs/ivshmem-spec.txt, which can be found in
+ * the QEmu repository. This was last reconciled with the version that
+ * came out with 2.8
+ */
+
+/*
+ * These constants are determined KVM Inter-VM shared memory device
+ * register offsets
+ */
+enum {
+	INTR_MASK = 0x00,	/* Interrupt Mask */
+	INTR_STATUS = 0x04,	/* Interrupt Status */
+	IV_POSITION = 0x08,	/* VM ID */
+	DOORBELL = 0x0c,	/* Doorbell */
+};
+
+static const int REGISTER_BAR;  /* Equal to 0 */
+static const int MAX_REGISTER_BAR_LEN = 0x100;
+/*
+ * The MSI-x BAR is not used directly.
+ *
+ * static const int MSI_X_BAR = 1;
+ */
+static const int SHARED_MEMORY_BAR = 2;
+
+struct vsoc_region_data {
+	char name[VSOC_DEVICE_NAME_SZ + 1];
+	wait_queue_head_t interrupt_wait_queue;
+	/* TODO(b/73664181): Use multiple futex wait queues */
+	wait_queue_head_t futex_wait_queue;
+	/* Flag indicating that an interrupt has been signalled by the host. */
+	atomic_t *incoming_signalled;
+	/* Flag indicating the guest has signalled the host. */
+	atomic_t *outgoing_signalled;
+	bool irq_requested;
+	bool device_created;
+};
+
+struct vsoc_device {
+	/* Kernel virtual address of REGISTER_BAR. */
+	void __iomem *regs;
+	/* Physical address of SHARED_MEMORY_BAR. */
+	phys_addr_t shm_phys_start;
+	/* Kernel virtual address of SHARED_MEMORY_BAR. */
+	void __iomem *kernel_mapped_shm;
+	/* Size of the entire shared memory window in bytes. */
+	size_t shm_size;
+	/*
+	 * Pointer to the virtual address of the shared memory layout structure.
+	 * This is probably identical to kernel_mapped_shm, but saving this
+	 * here saves a lot of annoying casts.
+	 */
+	struct vsoc_shm_layout_descriptor *layout;
+	/*
+	 * Points to a table of region descriptors in the kernel's virtual
+	 * address space. Calculated from
+	 * vsoc_shm_layout_descriptor.vsoc_region_desc_offset
+	 */
+	struct vsoc_device_region *regions;
+	/* Head of a list of permissions that have been granted. */
+	struct list_head permissions;
+	struct pci_dev *dev;
+	/* Per-region (and therefore per-interrupt) information. */
+	struct vsoc_region_data *regions_data;
+	/*
+	 * Table of msi-x entries. This has to be separated from struct
+	 * vsoc_region_data because the kernel deals with them as an array.
+	 */
+	struct msix_entry *msix_entries;
+	/* Mutex that protectes the permission list */
+	struct mutex mtx;
+	/* Major number assigned by the kernel */
+	int major;
+	/* Character device assigned by the kernel */
+	struct cdev cdev;
+	/* Device class assigned by the kernel */
+	struct class *class;
+	/*
+	 * Flags that indicate what we've initialized. These are used to do an
+	 * orderly cleanup of the device.
+	 */
+	bool enabled_device;
+	bool requested_regions;
+	bool cdev_added;
+	bool class_added;
+	bool msix_enabled;
+};
+
+static struct vsoc_device vsoc_dev;
+
+/*
+ * TODO(ghartman): Add a /sys filesystem entry that summarizes the permissions.
+ */
+
+struct fd_scoped_permission_node {
+	struct fd_scoped_permission permission;
+	struct list_head list;
+};
+
+struct vsoc_private_data {
+	struct fd_scoped_permission_node *fd_scoped_permission_node;
+};
+
+static long vsoc_ioctl(struct file *, unsigned int, unsigned long);
+static int vsoc_mmap(struct file *, struct vm_area_struct *);
+static int vsoc_open(struct inode *, struct file *);
+static int vsoc_release(struct inode *, struct file *);
+static ssize_t vsoc_read(struct file *, char __user *, size_t, loff_t *);
+static ssize_t vsoc_write(struct file *, const char __user *, size_t, loff_t *);
+static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin);
+static int do_create_fd_scoped_permission(
+	struct vsoc_device_region *region_p,
+	struct fd_scoped_permission_node *np,
+	struct fd_scoped_permission_arg __user *arg);
+static void do_destroy_fd_scoped_permission(
+	struct vsoc_device_region *owner_region_p,
+	struct fd_scoped_permission *perm);
+static long do_vsoc_describe_region(struct file *,
+				    struct vsoc_device_region __user *);
+static ssize_t vsoc_get_area(struct file *filp, __u32 *perm_off);
+
+/**
+ * Validate arguments on entry points to the driver.
+ */
+inline int vsoc_validate_inode(struct inode *inode)
+{
+	if (iminor(inode) >= vsoc_dev.layout->region_count) {
+		dev_err(&vsoc_dev.dev->dev,
+			"describe_region: invalid region %d\n", iminor(inode));
+		return -ENODEV;
+	}
+	return 0;
+}
+
+inline int vsoc_validate_filep(struct file *filp)
+{
+	int ret = vsoc_validate_inode(file_inode(filp));
+
+	if (ret)
+		return ret;
+	if (!filp->private_data) {
+		dev_err(&vsoc_dev.dev->dev,
+			"No private data on fd, region %d\n",
+			iminor(file_inode(filp)));
+		return -EBADFD;
+	}
+	return 0;
+}
+
+/* Converts from shared memory offset to virtual address */
+static inline void *shm_off_to_virtual_addr(__u32 offset)
+{
+	return (void __force *)vsoc_dev.kernel_mapped_shm + offset;
+}
+
+/* Converts from shared memory offset to physical address */
+static inline phys_addr_t shm_off_to_phys_addr(__u32 offset)
+{
+	return vsoc_dev.shm_phys_start + offset;
+}
+
+/**
+ * Convenience functions to obtain the region from the inode or file.
+ * Dangerous to call before validating the inode/file.
+ */
+static inline struct vsoc_device_region *vsoc_region_from_inode(
+	struct inode *inode)
+{
+	return &vsoc_dev.regions[iminor(inode)];
+}
+
+static inline struct vsoc_device_region *vsoc_region_from_filep(
+	struct file *inode)
+{
+	return vsoc_region_from_inode(file_inode(inode));
+}
+
+static inline uint32_t vsoc_device_region_size(struct vsoc_device_region *r)
+{
+	return r->region_end_offset - r->region_begin_offset;
+}
+
+static const struct file_operations vsoc_ops = {
+	.owner = THIS_MODULE,
+	.open = vsoc_open,
+	.mmap = vsoc_mmap,
+	.read = vsoc_read,
+	.unlocked_ioctl = vsoc_ioctl,
+	.compat_ioctl = vsoc_ioctl,
+	.write = vsoc_write,
+	.llseek = vsoc_lseek,
+	.release = vsoc_release,
+};
+
+static struct pci_device_id vsoc_id_table[] = {
+	{0x1af4, 0x1110, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{0},
+};
+
+MODULE_DEVICE_TABLE(pci, vsoc_id_table);
+
+static void vsoc_remove_device(struct pci_dev *pdev);
+static int vsoc_probe_device(struct pci_dev *pdev,
+			     const struct pci_device_id *ent);
+
+static struct pci_driver vsoc_pci_driver = {
+	.name = "vsoc",
+	.id_table = vsoc_id_table,
+	.probe = vsoc_probe_device,
+	.remove = vsoc_remove_device,
+};
+
+static int do_create_fd_scoped_permission(
+	struct vsoc_device_region *region_p,
+	struct fd_scoped_permission_node *np,
+	struct fd_scoped_permission_arg __user *arg)
+{
+	struct file *managed_filp;
+	s32 managed_fd;
+	atomic_t *owner_ptr = NULL;
+	struct vsoc_device_region *managed_region_p;
+
+	if (copy_from_user(&np->permission, &arg->perm, sizeof(*np)) ||
+	    copy_from_user(&managed_fd,
+			   &arg->managed_region_fd, sizeof(managed_fd))) {
+		return -EFAULT;
+	}
+	managed_filp = fdget(managed_fd).file;
+	/* Check that it's a valid fd, */
+	if (!managed_filp || vsoc_validate_filep(managed_filp))
+		return -EPERM;
+	/* EEXIST if the given fd already has a permission. */
+	if (((struct vsoc_private_data *)managed_filp->private_data)->
+	    fd_scoped_permission_node)
+		return -EEXIST;
+	managed_region_p = vsoc_region_from_filep(managed_filp);
+	/* Check that the provided region is managed by this one */
+	if (&vsoc_dev.regions[managed_region_p->managed_by] != region_p)
+		return -EPERM;
+	/* The area must be well formed and have non-zero size */
+	if (np->permission.begin_offset >= np->permission.end_offset)
+		return -EINVAL;
+	/* The area must fit in the memory window */
+	if (np->permission.end_offset >
+	    vsoc_device_region_size(managed_region_p))
+		return -ERANGE;
+	/* The area must be in the region data section */
+	if (np->permission.begin_offset <
+	    managed_region_p->offset_of_region_data)
+		return -ERANGE;
+	/* The area must be page aligned */
+	if (!PAGE_ALIGNED(np->permission.begin_offset) ||
+	    !PAGE_ALIGNED(np->permission.end_offset))
+		return -EINVAL;
+	/* Owner offset must be naturally aligned in the window */
+	if (np->permission.owner_offset &
+	    (sizeof(np->permission.owner_offset) - 1))
+		return -EINVAL;
+	/* The owner flag must reside in the owner memory */
+	if (np->permission.owner_offset + sizeof(np->permission.owner_offset) >
+	    vsoc_device_region_size(region_p))
+		return -ERANGE;
+	/* The owner flag must reside in the data section */
+	if (np->permission.owner_offset < region_p->offset_of_region_data)
+		return -EINVAL;
+	/* The owner value must change to claim the memory */
+	if (np->permission.owned_value == VSOC_REGION_FREE)
+		return -EINVAL;
+	owner_ptr =
+	    (atomic_t *)shm_off_to_virtual_addr(region_p->region_begin_offset +
+						np->permission.owner_offset);
+	/* We've already verified that this is in the shared memory window, so
+	 * it should be safe to write to this address.
+	 */
+	if (atomic_cmpxchg(owner_ptr,
+			   VSOC_REGION_FREE,
+			   np->permission.owned_value) != VSOC_REGION_FREE) {
+		return -EBUSY;
+	}
+	((struct vsoc_private_data *)managed_filp->private_data)->
+	    fd_scoped_permission_node = np;
+	/* The file offset needs to be adjusted if the calling
+	 * process did any read/write operations on the fd
+	 * before creating the permission.
+	 */
+	if (managed_filp->f_pos) {
+		if (managed_filp->f_pos > np->permission.end_offset) {
+			/* If the offset is beyond the permission end, set it
+			 * to the end.
+			 */
+			managed_filp->f_pos = np->permission.end_offset;
+		} else {
+			/* If the offset is within the permission interval
+			 * keep it there otherwise reset it to zero.
+			 */
+			if (managed_filp->f_pos < np->permission.begin_offset) {
+				managed_filp->f_pos = 0;
+			} else {
+				managed_filp->f_pos -=
+				    np->permission.begin_offset;
+			}
+		}
+	}
+	return 0;
+}
+
+static void do_destroy_fd_scoped_permission_node(
+	struct vsoc_device_region *owner_region_p,
+	struct fd_scoped_permission_node *node)
+{
+	if (node) {
+		do_destroy_fd_scoped_permission(owner_region_p,
+						&node->permission);
+		mutex_lock(&vsoc_dev.mtx);
+		list_del(&node->list);
+		mutex_unlock(&vsoc_dev.mtx);
+		kfree(node);
+	}
+}
+
+static void do_destroy_fd_scoped_permission(
+		struct vsoc_device_region *owner_region_p,
+		struct fd_scoped_permission *perm)
+{
+	atomic_t *owner_ptr = NULL;
+	int prev = 0;
+
+	if (!perm)
+		return;
+	owner_ptr = (atomic_t *)shm_off_to_virtual_addr(
+		owner_region_p->region_begin_offset + perm->owner_offset);
+	prev = atomic_xchg(owner_ptr, VSOC_REGION_FREE);
+	if (prev != perm->owned_value)
+		dev_err(&vsoc_dev.dev->dev,
+			"%x-%x: owner (%s) %x: expected to be %x was %x",
+			perm->begin_offset, perm->end_offset,
+			owner_region_p->device_name, perm->owner_offset,
+			perm->owned_value, prev);
+}
+
+static long do_vsoc_describe_region(struct file *filp,
+				    struct vsoc_device_region __user *dest)
+{
+	struct vsoc_device_region *region_p;
+	int retval = vsoc_validate_filep(filp);
+
+	if (retval)
+		return retval;
+	region_p = vsoc_region_from_filep(filp);
+	if (copy_to_user(dest, region_p, sizeof(*region_p)))
+		return -EFAULT;
+	return 0;
+}
+
+/**
+ * Implements the inner logic of cond_wait. Copies to and from userspace are
+ * done in the helper function below.
+ */
+static int handle_vsoc_cond_wait(struct file *filp, struct vsoc_cond_wait *arg)
+{
+	DEFINE_WAIT(wait);
+	u32 region_number = iminor(file_inode(filp));
+	struct vsoc_region_data *data = vsoc_dev.regions_data + region_number;
+	struct hrtimer_sleeper timeout, *to = NULL;
+	int ret = 0;
+	struct vsoc_device_region *region_p = vsoc_region_from_filep(filp);
+	atomic_t *address = NULL;
+	struct timespec ts;
+
+	/* Ensure that the offset is aligned */
+	if (arg->offset & (sizeof(uint32_t) - 1))
+		return -EADDRNOTAVAIL;
+	/* Ensure that the offset is within shared memory */
+	if (((uint64_t)arg->offset) + region_p->region_begin_offset +
+	    sizeof(uint32_t) > region_p->region_end_offset)
+		return -E2BIG;
+	address = shm_off_to_virtual_addr(region_p->region_begin_offset +
+					  arg->offset);
+
+	/* Ensure that the type of wait is valid */
+	switch (arg->wait_type) {
+	case VSOC_WAIT_IF_EQUAL:
+		break;
+	case VSOC_WAIT_IF_EQUAL_TIMEOUT:
+		to = &timeout;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (to) {
+		/* Copy the user-supplied timesec into the kernel structure.
+		 * We do things this way to flatten differences between 32 bit
+		 * and 64 bit timespecs.
+		 */
+		ts.tv_sec = arg->wake_time_sec;
+		ts.tv_nsec = arg->wake_time_nsec;
+
+		if (!timespec_valid(&ts))
+			return -EINVAL;
+		hrtimer_init_on_stack(&to->timer, CLOCK_MONOTONIC,
+				      HRTIMER_MODE_ABS);
+		hrtimer_set_expires_range_ns(&to->timer, timespec_to_ktime(ts),
+					     current->timer_slack_ns);
+
+		hrtimer_init_sleeper(to, current);
+	}
+
+	while (1) {
+		prepare_to_wait(&data->futex_wait_queue, &wait,
+				TASK_INTERRUPTIBLE);
+		/*
+		 * Check the sentinel value after prepare_to_wait. If the value
+		 * changes after this check the writer will call signal,
+		 * changing the task state from INTERRUPTIBLE to RUNNING. That
+		 * will ensure that schedule() will eventually schedule this
+		 * task.
+		 */
+		if (atomic_read(address) != arg->value) {
+			ret = 0;
+			break;
+		}
+		if (to) {
+			hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS);
+			if (likely(to->task))
+				freezable_schedule();
+			hrtimer_cancel(&to->timer);
+			if (!to->task) {
+				ret = -ETIMEDOUT;
+				break;
+			}
+		} else {
+			freezable_schedule();
+		}
+		/* Count the number of times that we woke up. This is useful
+		 * for unit testing.
+		 */
+		++arg->wakes;
+		if (signal_pending(current)) {
+			ret = -EINTR;
+			break;
+		}
+	}
+	finish_wait(&data->futex_wait_queue, &wait);
+	if (to)
+		destroy_hrtimer_on_stack(&to->timer);
+	return ret;
+}
+
+/**
+ * Handles the details of copying from/to userspace to ensure that the copies
+ * happen on all of the return paths of cond_wait.
+ */
+static int do_vsoc_cond_wait(struct file *filp,
+			     struct vsoc_cond_wait __user *untrusted_in)
+{
+	struct vsoc_cond_wait arg;
+	int rval = 0;
+
+	if (copy_from_user(&arg, untrusted_in, sizeof(arg)))
+		return -EFAULT;
+	/* wakes is an out parameter. Initialize it to something sensible. */
+	arg.wakes = 0;
+	rval = handle_vsoc_cond_wait(filp, &arg);
+	if (copy_to_user(untrusted_in, &arg, sizeof(arg)))
+		return -EFAULT;
+	return rval;
+}
+
+static int do_vsoc_cond_wake(struct file *filp, uint32_t offset)
+{
+	struct vsoc_device_region *region_p = vsoc_region_from_filep(filp);
+	u32 region_number = iminor(file_inode(filp));
+	struct vsoc_region_data *data = vsoc_dev.regions_data + region_number;
+	/* Ensure that the offset is aligned */
+	if (offset & (sizeof(uint32_t) - 1))
+		return -EADDRNOTAVAIL;
+	/* Ensure that the offset is within shared memory */
+	if (((uint64_t)offset) + region_p->region_begin_offset +
+	    sizeof(uint32_t) > region_p->region_end_offset)
+		return -E2BIG;
+	/*
+	 * TODO(b/73664181): Use multiple futex wait queues.
+	 * We need to wake every sleeper when the condition changes. Typically
+	 * only a single thread will be waiting on the condition, but there
+	 * are exceptions. The worst case is about 10 threads.
+	 */
+	wake_up_interruptible_all(&data->futex_wait_queue);
+	return 0;
+}
+
+static long vsoc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	int rv = 0;
+	struct vsoc_device_region *region_p;
+	u32 reg_num;
+	struct vsoc_region_data *reg_data;
+	int retval = vsoc_validate_filep(filp);
+
+	if (retval)
+		return retval;
+	region_p = vsoc_region_from_filep(filp);
+	reg_num = iminor(file_inode(filp));
+	reg_data = vsoc_dev.regions_data + reg_num;
+	switch (cmd) {
+	case VSOC_CREATE_FD_SCOPED_PERMISSION:
+		{
+			struct fd_scoped_permission_node *node = NULL;
+
+			node = kzalloc(sizeof(*node), GFP_KERNEL);
+			/* We can't allocate memory for the permission */
+			if (!node)
+				return -ENOMEM;
+			INIT_LIST_HEAD(&node->list);
+			rv = do_create_fd_scoped_permission(
+				region_p,
+				node,
+				(struct fd_scoped_permission_arg __user *)arg);
+			if (!rv) {
+				mutex_lock(&vsoc_dev.mtx);
+				list_add(&node->list, &vsoc_dev.permissions);
+				mutex_unlock(&vsoc_dev.mtx);
+			} else {
+				kfree(node);
+				return rv;
+			}
+		}
+		break;
+
+	case VSOC_GET_FD_SCOPED_PERMISSION:
+		{
+			struct fd_scoped_permission_node *node =
+			    ((struct vsoc_private_data *)filp->private_data)->
+			    fd_scoped_permission_node;
+			if (!node)
+				return -ENOENT;
+			if (copy_to_user
+			    ((struct fd_scoped_permission __user *)arg,
+			     &node->permission, sizeof(node->permission)))
+				return -EFAULT;
+		}
+		break;
+
+	case VSOC_MAYBE_SEND_INTERRUPT_TO_HOST:
+		if (!atomic_xchg(
+			    reg_data->outgoing_signalled,
+			    1)) {
+			writel(reg_num, vsoc_dev.regs + DOORBELL);
+			return 0;
+		} else {
+			return -EBUSY;
+		}
+		break;
+
+	case VSOC_SEND_INTERRUPT_TO_HOST:
+		writel(reg_num, vsoc_dev.regs + DOORBELL);
+		return 0;
+
+	case VSOC_WAIT_FOR_INCOMING_INTERRUPT:
+		wait_event_interruptible(
+			reg_data->interrupt_wait_queue,
+			(atomic_read(reg_data->incoming_signalled) != 0));
+		break;
+
+	case VSOC_DESCRIBE_REGION:
+		return do_vsoc_describe_region(
+			filp,
+			(struct vsoc_device_region __user *)arg);
+
+	case VSOC_SELF_INTERRUPT:
+		atomic_set(reg_data->incoming_signalled, 1);
+		wake_up_interruptible(&reg_data->interrupt_wait_queue);
+		break;
+
+	case VSOC_COND_WAIT:
+		return do_vsoc_cond_wait(filp,
+					 (struct vsoc_cond_wait __user *)arg);
+	case VSOC_COND_WAKE:
+		return do_vsoc_cond_wake(filp, arg);
+
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static ssize_t vsoc_read(struct file *filp, char __user *buffer, size_t len,
+			 loff_t *poffset)
+{
+	__u32 area_off;
+	const void *area_p;
+	ssize_t area_len;
+	int retval = vsoc_validate_filep(filp);
+
+	if (retval)
+		return retval;
+	area_len = vsoc_get_area(filp, &area_off);
+	area_p = shm_off_to_virtual_addr(area_off);
+	area_p += *poffset;
+	area_len -= *poffset;
+	if (area_len <= 0)
+		return 0;
+	if (area_len < len)
+		len = area_len;
+	if (copy_to_user(buffer, area_p, len))
+		return -EFAULT;
+	*poffset += len;
+	return len;
+}
+
+static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin)
+{
+	ssize_t area_len = 0;
+	int retval = vsoc_validate_filep(filp);
+
+	if (retval)
+		return retval;
+	area_len = vsoc_get_area(filp, NULL);
+	switch (origin) {
+	case SEEK_SET:
+		break;
+
+	case SEEK_CUR:
+		if (offset > 0 && offset + filp->f_pos < 0)
+			return -EOVERFLOW;
+		offset += filp->f_pos;
+		break;
+
+	case SEEK_END:
+		if (offset > 0 && offset + area_len < 0)
+			return -EOVERFLOW;
+		offset += area_len;
+		break;
+
+	case SEEK_DATA:
+		if (offset >= area_len)
+			return -EINVAL;
+		if (offset < 0)
+			offset = 0;
+		break;
+
+	case SEEK_HOLE:
+		/* Next hole is always the end of the region, unless offset is
+		 * beyond that
+		 */
+		if (offset < area_len)
+			offset = area_len;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	if (offset < 0 || offset > area_len)
+		return -EINVAL;
+	filp->f_pos = offset;
+
+	return offset;
+}
+
+static ssize_t vsoc_write(struct file *filp, const char __user *buffer,
+			  size_t len, loff_t *poffset)
+{
+	__u32 area_off;
+	void *area_p;
+	ssize_t area_len;
+	int retval = vsoc_validate_filep(filp);
+
+	if (retval)
+		return retval;
+	area_len = vsoc_get_area(filp, &area_off);
+	area_p = shm_off_to_virtual_addr(area_off);
+	area_p += *poffset;
+	area_len -= *poffset;
+	if (area_len <= 0)
+		return 0;
+	if (area_len < len)
+		len = area_len;
+	if (copy_from_user(area_p, buffer, len))
+		return -EFAULT;
+	*poffset += len;
+	return len;
+}
+
+static irqreturn_t vsoc_interrupt(int irq, void *region_data_v)
+{
+	struct vsoc_region_data *region_data =
+	    (struct vsoc_region_data *)region_data_v;
+	int reg_num = region_data - vsoc_dev.regions_data;
+
+	if (unlikely(!region_data))
+		return IRQ_NONE;
+
+	if (unlikely(reg_num < 0 ||
+		     reg_num >= vsoc_dev.layout->region_count)) {
+		dev_err(&vsoc_dev.dev->dev,
+			"invalid irq @%p reg_num=0x%04x\n",
+			region_data, reg_num);
+		return IRQ_NONE;
+	}
+	if (unlikely(vsoc_dev.regions_data + reg_num != region_data)) {
+		dev_err(&vsoc_dev.dev->dev,
+			"irq not aligned @%p reg_num=0x%04x\n",
+			region_data, reg_num);
+		return IRQ_NONE;
+	}
+	wake_up_interruptible(&region_data->interrupt_wait_queue);
+	return IRQ_HANDLED;
+}
+
+static int vsoc_probe_device(struct pci_dev *pdev,
+			     const struct pci_device_id *ent)
+{
+	int result;
+	int i;
+	resource_size_t reg_size;
+	dev_t devt;
+
+	vsoc_dev.dev = pdev;
+	result = pci_enable_device(pdev);
+	if (result) {
+		dev_err(&pdev->dev,
+			"pci_enable_device failed %s: error %d\n",
+			pci_name(pdev), result);
+		return result;
+	}
+	vsoc_dev.enabled_device = true;
+	result = pci_request_regions(pdev, "vsoc");
+	if (result < 0) {
+		dev_err(&pdev->dev, "pci_request_regions failed\n");
+		vsoc_remove_device(pdev);
+		return -EBUSY;
+	}
+	vsoc_dev.requested_regions = true;
+	/* Set up the control registers in BAR 0 */
+	reg_size = pci_resource_len(pdev, REGISTER_BAR);
+	if (reg_size > MAX_REGISTER_BAR_LEN)
+		vsoc_dev.regs =
+		    pci_iomap(pdev, REGISTER_BAR, MAX_REGISTER_BAR_LEN);
+	else
+		vsoc_dev.regs = pci_iomap(pdev, REGISTER_BAR, reg_size);
+
+	if (!vsoc_dev.regs) {
+		dev_err(&pdev->dev,
+			"cannot map registers of size %zu\n",
+		       (size_t)reg_size);
+		vsoc_remove_device(pdev);
+		return -EBUSY;
+	}
+
+	/* Map the shared memory in BAR 2 */
+	vsoc_dev.shm_phys_start = pci_resource_start(pdev, SHARED_MEMORY_BAR);
+	vsoc_dev.shm_size = pci_resource_len(pdev, SHARED_MEMORY_BAR);
+
+	dev_info(&pdev->dev, "shared memory @ DMA %pa size=0x%zx\n",
+		 &vsoc_dev.shm_phys_start, vsoc_dev.shm_size);
+	vsoc_dev.kernel_mapped_shm = pci_iomap_wc(pdev, SHARED_MEMORY_BAR, 0);
+	if (!vsoc_dev.kernel_mapped_shm) {
+		dev_err(&vsoc_dev.dev->dev, "cannot iomap region\n");
+		vsoc_remove_device(pdev);
+		return -EBUSY;
+	}
+
+	vsoc_dev.layout = (struct vsoc_shm_layout_descriptor __force *)
+				vsoc_dev.kernel_mapped_shm;
+	dev_info(&pdev->dev, "major_version: %d\n",
+		 vsoc_dev.layout->major_version);
+	dev_info(&pdev->dev, "minor_version: %d\n",
+		 vsoc_dev.layout->minor_version);
+	dev_info(&pdev->dev, "size: 0x%x\n", vsoc_dev.layout->size);
+	dev_info(&pdev->dev, "regions: %d\n", vsoc_dev.layout->region_count);
+	if (vsoc_dev.layout->major_version !=
+	    CURRENT_VSOC_LAYOUT_MAJOR_VERSION) {
+		dev_err(&vsoc_dev.dev->dev,
+			"driver supports only major_version %d\n",
+			CURRENT_VSOC_LAYOUT_MAJOR_VERSION);
+		vsoc_remove_device(pdev);
+		return -EBUSY;
+	}
+	result = alloc_chrdev_region(&devt, 0, vsoc_dev.layout->region_count,
+				     VSOC_DEV_NAME);
+	if (result) {
+		dev_err(&vsoc_dev.dev->dev, "alloc_chrdev_region failed\n");
+		vsoc_remove_device(pdev);
+		return -EBUSY;
+	}
+	vsoc_dev.major = MAJOR(devt);
+	cdev_init(&vsoc_dev.cdev, &vsoc_ops);
+	vsoc_dev.cdev.owner = THIS_MODULE;
+	result = cdev_add(&vsoc_dev.cdev, devt, vsoc_dev.layout->region_count);
+	if (result) {
+		dev_err(&vsoc_dev.dev->dev, "cdev_add error\n");
+		vsoc_remove_device(pdev);
+		return -EBUSY;
+	}
+	vsoc_dev.cdev_added = true;
+	vsoc_dev.class = class_create(THIS_MODULE, VSOC_DEV_NAME);
+	if (IS_ERR(vsoc_dev.class)) {
+		dev_err(&vsoc_dev.dev->dev, "class_create failed\n");
+		vsoc_remove_device(pdev);
+		return PTR_ERR(vsoc_dev.class);
+	}
+	vsoc_dev.class_added = true;
+	vsoc_dev.regions = (struct vsoc_device_region __force *)
+		((void *)vsoc_dev.layout +
+		 vsoc_dev.layout->vsoc_region_desc_offset);
+	vsoc_dev.msix_entries = kcalloc(
+			vsoc_dev.layout->region_count,
+			sizeof(vsoc_dev.msix_entries[0]), GFP_KERNEL);
+	if (!vsoc_dev.msix_entries) {
+		dev_err(&vsoc_dev.dev->dev,
+			"unable to allocate msix_entries\n");
+		vsoc_remove_device(pdev);
+		return -ENOSPC;
+	}
+	vsoc_dev.regions_data = kcalloc(
+			vsoc_dev.layout->region_count,
+			sizeof(vsoc_dev.regions_data[0]), GFP_KERNEL);
+	if (!vsoc_dev.regions_data) {
+		dev_err(&vsoc_dev.dev->dev,
+			"unable to allocate regions' data\n");
+		vsoc_remove_device(pdev);
+		return -ENOSPC;
+	}
+	for (i = 0; i < vsoc_dev.layout->region_count; ++i)
+		vsoc_dev.msix_entries[i].entry = i;
+
+	result = pci_enable_msix_exact(vsoc_dev.dev, vsoc_dev.msix_entries,
+				       vsoc_dev.layout->region_count);
+	if (result) {
+		dev_info(&pdev->dev, "pci_enable_msix failed: %d\n", result);
+		vsoc_remove_device(pdev);
+		return -ENOSPC;
+	}
+	/* Check that all regions are well formed */
+	for (i = 0; i < vsoc_dev.layout->region_count; ++i) {
+		const struct vsoc_device_region *region = vsoc_dev.regions + i;
+
+		if (!PAGE_ALIGNED(region->region_begin_offset) ||
+		    !PAGE_ALIGNED(region->region_end_offset)) {
+			dev_err(&vsoc_dev.dev->dev,
+				"region %d not aligned (%x:%x)", i,
+				region->region_begin_offset,
+				region->region_end_offset);
+			vsoc_remove_device(pdev);
+			return -EFAULT;
+		}
+		if (region->region_begin_offset >= region->region_end_offset ||
+		    region->region_end_offset > vsoc_dev.shm_size) {
+			dev_err(&vsoc_dev.dev->dev,
+				"region %d offsets are wrong: %x %x %zx",
+				i, region->region_begin_offset,
+				region->region_end_offset, vsoc_dev.shm_size);
+			vsoc_remove_device(pdev);
+			return -EFAULT;
+		}
+		if (region->managed_by >= vsoc_dev.layout->region_count) {
+			dev_err(&vsoc_dev.dev->dev,
+				"region %d has invalid owner: %u",
+				i, region->managed_by);
+			vsoc_remove_device(pdev);
+			return -EFAULT;
+		}
+	}
+	vsoc_dev.msix_enabled = true;
+	for (i = 0; i < vsoc_dev.layout->region_count; ++i) {
+		const struct vsoc_device_region *region = vsoc_dev.regions + i;
+		size_t name_sz = sizeof(vsoc_dev.regions_data[i].name) - 1;
+		const struct vsoc_signal_table_layout *h_to_g_signal_table =
+			&region->host_to_guest_signal_table;
+		const struct vsoc_signal_table_layout *g_to_h_signal_table =
+			&region->guest_to_host_signal_table;
+
+		vsoc_dev.regions_data[i].name[name_sz] = '\0';
+		memcpy(vsoc_dev.regions_data[i].name, region->device_name,
+		       name_sz);
+		dev_info(&pdev->dev, "region %d name=%s\n",
+			 i, vsoc_dev.regions_data[i].name);
+		init_waitqueue_head(
+				&vsoc_dev.regions_data[i].interrupt_wait_queue);
+		init_waitqueue_head(&vsoc_dev.regions_data[i].futex_wait_queue);
+		vsoc_dev.regions_data[i].incoming_signalled =
+			shm_off_to_virtual_addr(region->region_begin_offset) +
+			h_to_g_signal_table->interrupt_signalled_offset;
+		vsoc_dev.regions_data[i].outgoing_signalled =
+			shm_off_to_virtual_addr(region->region_begin_offset) +
+			g_to_h_signal_table->interrupt_signalled_offset;
+		result = request_irq(
+				vsoc_dev.msix_entries[i].vector,
+				vsoc_interrupt, 0,
+				vsoc_dev.regions_data[i].name,
+				vsoc_dev.regions_data + i);
+		if (result) {
+			dev_info(&pdev->dev,
+				 "request_irq failed irq=%d vector=%d\n",
+				i, vsoc_dev.msix_entries[i].vector);
+			vsoc_remove_device(pdev);
+			return -ENOSPC;
+		}
+		vsoc_dev.regions_data[i].irq_requested = true;
+		if (!device_create(vsoc_dev.class, NULL,
+				   MKDEV(vsoc_dev.major, i),
+				   NULL, vsoc_dev.regions_data[i].name)) {
+			dev_err(&vsoc_dev.dev->dev, "device_create failed\n");
+			vsoc_remove_device(pdev);
+			return -EBUSY;
+		}
+		vsoc_dev.regions_data[i].device_created = true;
+	}
+	return 0;
+}
+
+/*
+ * This should undo all of the allocations in the probe function in reverse
+ * order.
+ *
+ * Notes:
+ *
+ *   The device may have been partially initialized, so double check
+ *   that the allocations happened.
+ *
+ *   This function may be called multiple times, so mark resources as freed
+ *   as they are deallocated.
+ */
+static void vsoc_remove_device(struct pci_dev *pdev)
+{
+	int i;
+	/*
+	 * pdev is the first thing to be set on probe and the last thing
+	 * to be cleared here. If it's NULL then there is no cleanup.
+	 */
+	if (!pdev || !vsoc_dev.dev)
+		return;
+	dev_info(&pdev->dev, "remove_device\n");
+	if (vsoc_dev.regions_data) {
+		for (i = 0; i < vsoc_dev.layout->region_count; ++i) {
+			if (vsoc_dev.regions_data[i].device_created) {
+				device_destroy(vsoc_dev.class,
+					       MKDEV(vsoc_dev.major, i));
+				vsoc_dev.regions_data[i].device_created = false;
+			}
+			if (vsoc_dev.regions_data[i].irq_requested)
+				free_irq(vsoc_dev.msix_entries[i].vector, NULL);
+			vsoc_dev.regions_data[i].irq_requested = false;
+		}
+		kfree(vsoc_dev.regions_data);
+		vsoc_dev.regions_data = NULL;
+	}
+	if (vsoc_dev.msix_enabled) {
+		pci_disable_msix(pdev);
+		vsoc_dev.msix_enabled = false;
+	}
+	kfree(vsoc_dev.msix_entries);
+	vsoc_dev.msix_entries = NULL;
+	vsoc_dev.regions = NULL;
+	if (vsoc_dev.class_added) {
+		class_destroy(vsoc_dev.class);
+		vsoc_dev.class_added = false;
+	}
+	if (vsoc_dev.cdev_added) {
+		cdev_del(&vsoc_dev.cdev);
+		vsoc_dev.cdev_added = false;
+	}
+	if (vsoc_dev.major && vsoc_dev.layout) {
+		unregister_chrdev_region(MKDEV(vsoc_dev.major, 0),
+					 vsoc_dev.layout->region_count);
+		vsoc_dev.major = 0;
+	}
+	vsoc_dev.layout = NULL;
+	if (vsoc_dev.kernel_mapped_shm) {
+		pci_iounmap(pdev, vsoc_dev.kernel_mapped_shm);
+		vsoc_dev.kernel_mapped_shm = NULL;
+	}
+	if (vsoc_dev.regs) {
+		pci_iounmap(pdev, vsoc_dev.regs);
+		vsoc_dev.regs = NULL;
+	}
+	if (vsoc_dev.requested_regions) {
+		pci_release_regions(pdev);
+		vsoc_dev.requested_regions = false;
+	}
+	if (vsoc_dev.enabled_device) {
+		pci_disable_device(pdev);
+		vsoc_dev.enabled_device = false;
+	}
+	/* Do this last: it indicates that the device is not initialized. */
+	vsoc_dev.dev = NULL;
+}
+
+static void __exit vsoc_cleanup_module(void)
+{
+	vsoc_remove_device(vsoc_dev.dev);
+	pci_unregister_driver(&vsoc_pci_driver);
+}
+
+static int __init vsoc_init_module(void)
+{
+	int err = -ENOMEM;
+
+	INIT_LIST_HEAD(&vsoc_dev.permissions);
+	mutex_init(&vsoc_dev.mtx);
+
+	err = pci_register_driver(&vsoc_pci_driver);
+	if (err < 0)
+		return err;
+	return 0;
+}
+
+static int vsoc_open(struct inode *inode, struct file *filp)
+{
+	/* Can't use vsoc_validate_filep because filp is still incomplete */
+	int ret = vsoc_validate_inode(inode);
+
+	if (ret)
+		return ret;
+	filp->private_data =
+		kzalloc(sizeof(struct vsoc_private_data), GFP_KERNEL);
+	if (!filp->private_data)
+		return -ENOMEM;
+	return 0;
+}
+
+static int vsoc_release(struct inode *inode, struct file *filp)
+{
+	struct vsoc_private_data *private_data = NULL;
+	struct fd_scoped_permission_node *node = NULL;
+	struct vsoc_device_region *owner_region_p = NULL;
+	int retval = vsoc_validate_filep(filp);
+
+	if (retval)
+		return retval;
+	private_data = (struct vsoc_private_data *)filp->private_data;
+	if (!private_data)
+		return 0;
+
+	node = private_data->fd_scoped_permission_node;
+	if (node) {
+		owner_region_p = vsoc_region_from_inode(inode);
+		if (owner_region_p->managed_by != VSOC_REGION_WHOLE) {
+			owner_region_p =
+			    &vsoc_dev.regions[owner_region_p->managed_by];
+		}
+		do_destroy_fd_scoped_permission_node(owner_region_p, node);
+		private_data->fd_scoped_permission_node = NULL;
+	}
+	kfree(private_data);
+	filp->private_data = NULL;
+
+	return 0;
+}
+
+/*
+ * Returns the device relative offset and length of the area specified by the
+ * fd scoped permission. If there is no fd scoped permission set, a default
+ * permission covering the entire region is assumed, unless the region is owned
+ * by another one, in which case the default is a permission with zero size.
+ */
+static ssize_t vsoc_get_area(struct file *filp, __u32 *area_offset)
+{
+	__u32 off = 0;
+	ssize_t length = 0;
+	struct vsoc_device_region *region_p;
+	struct fd_scoped_permission *perm;
+
+	region_p = vsoc_region_from_filep(filp);
+	off = region_p->region_begin_offset;
+	perm = &((struct vsoc_private_data *)filp->private_data)->
+		fd_scoped_permission_node->permission;
+	if (perm) {
+		off += perm->begin_offset;
+		length = perm->end_offset - perm->begin_offset;
+	} else if (region_p->managed_by == VSOC_REGION_WHOLE) {
+		/* No permission set and the regions is not owned by another,
+		 * default to full region access.
+		 */
+		length = vsoc_device_region_size(region_p);
+	} else {
+		/* return zero length, access is denied. */
+		length = 0;
+	}
+	if (area_offset)
+		*area_offset = off;
+	return length;
+}
+
+static int vsoc_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	unsigned long len = vma->vm_end - vma->vm_start;
+	__u32 area_off;
+	phys_addr_t mem_off;
+	ssize_t area_len;
+	int retval = vsoc_validate_filep(filp);
+
+	if (retval)
+		return retval;
+	area_len = vsoc_get_area(filp, &area_off);
+	/* Add the requested offset */
+	area_off += (vma->vm_pgoff << PAGE_SHIFT);
+	area_len -= (vma->vm_pgoff << PAGE_SHIFT);
+	if (area_len < len)
+		return -EINVAL;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	mem_off = shm_off_to_phys_addr(area_off);
+	if (io_remap_pfn_range(vma, vma->vm_start, mem_off >> PAGE_SHIFT,
+			       len, vma->vm_page_prot))
+		return -EAGAIN;
+	return 0;
+}
+
+module_init(vsoc_init_module);
+module_exit(vsoc_cleanup_module);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Greg Hartman <ghartman@google.com>");
+MODULE_DESCRIPTION("VSoC interpretation of QEmu's ivshmem device");
+MODULE_VERSION("1.0");
diff --git a/drivers/staging/goldfish/Kconfig b/drivers/staging/goldfish/Kconfig
index 4e09460..d293bbc 100644
--- a/drivers/staging/goldfish/Kconfig
+++ b/drivers/staging/goldfish/Kconfig
@@ -4,6 +4,14 @@
 	---help---
 	  Emulated audio channel for the Goldfish Android Virtual Device
 
+config GOLDFISH_SYNC
+    tristate "Goldfish AVD Sync Driver"
+    depends on GOLDFISH
+    depends on SW_SYNC
+    depends on SYNC_FILE
+	---help---
+	  Emulated sync fences for the Goldfish Android Virtual Device
+
 config MTD_GOLDFISH_NAND
 	tristate "Goldfish NAND device"
 	depends on GOLDFISH
diff --git a/drivers/staging/goldfish/Makefile b/drivers/staging/goldfish/Makefile
index dec34ad..3313fce 100644
--- a/drivers/staging/goldfish/Makefile
+++ b/drivers/staging/goldfish/Makefile
@@ -4,3 +4,9 @@
 
 obj-$(CONFIG_GOLDFISH_AUDIO) += goldfish_audio.o
 obj-$(CONFIG_MTD_GOLDFISH_NAND)	+= goldfish_nand.o
+
+# and sync
+
+ccflags-y := -Idrivers/staging/android
+goldfish_sync-objs := goldfish_sync_timeline_fence.o goldfish_sync_timeline.o
+obj-$(CONFIG_GOLDFISH_SYNC) += goldfish_sync.o
diff --git a/drivers/staging/goldfish/goldfish_audio.c b/drivers/staging/goldfish/goldfish_audio.c
index bd55995..0bb0ee2 100644
--- a/drivers/staging/goldfish/goldfish_audio.c
+++ b/drivers/staging/goldfish/goldfish_audio.c
@@ -28,6 +28,7 @@
 #include <linux/uaccess.h>
 #include <linux/slab.h>
 #include <linux/goldfish.h>
+#include <linux/acpi.h>
 
 MODULE_AUTHOR("Google, Inc.");
 MODULE_DESCRIPTION("Android QEMU Audio Driver");
@@ -116,6 +117,7 @@
 				   size_t count, loff_t *pos)
 {
 	struct goldfish_audio *data = fp->private_data;
+	unsigned long irq_flags;
 	int length;
 	int result = 0;
 
@@ -129,6 +131,10 @@
 		wait_event_interruptible(data->wait, data->buffer_status &
 					 AUDIO_INT_READ_BUFFER_FULL);
 
+		spin_lock_irqsave(&data->lock, irq_flags);
+		data->buffer_status &= ~AUDIO_INT_READ_BUFFER_FULL;
+		spin_unlock_irqrestore(&data->lock, irq_flags);
+
 		length = AUDIO_READ(data, AUDIO_READ_BUFFER_AVAILABLE);
 
 		/* copy data to user space */
@@ -351,12 +357,19 @@
 };
 MODULE_DEVICE_TABLE(of, goldfish_audio_of_match);
 
+static const struct acpi_device_id goldfish_audio_acpi_match[] = {
+	{ "GFSH0005", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, goldfish_audio_acpi_match);
+
 static struct platform_driver goldfish_audio_driver = {
 	.probe		= goldfish_audio_probe,
 	.remove		= goldfish_audio_remove,
 	.driver = {
 		.name = "goldfish_audio",
 		.of_match_table = goldfish_audio_of_match,
+		.acpi_match_table = ACPI_PTR(goldfish_audio_acpi_match),
 	}
 };
 
diff --git a/drivers/staging/goldfish/goldfish_sync_timeline.c b/drivers/staging/goldfish/goldfish_sync_timeline.c
new file mode 100644
index 0000000..5bef4c6
--- /dev/null
+++ b/drivers/staging/goldfish/goldfish_sync_timeline.c
@@ -0,0 +1,962 @@
+/*
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/init.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+
+#include <linux/interrupt.h>
+#include <linux/kref.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/acpi.h>
+
+#include <linux/string.h>
+
+#include <linux/fs.h>
+#include <linux/syscalls.h>
+#include <linux/sync_file.h>
+#include <linux/fence.h>
+
+#include "goldfish_sync_timeline_fence.h"
+
+#define ERR(...) printk(KERN_ERR __VA_ARGS__);
+
+#define INFO(...) printk(KERN_INFO __VA_ARGS__);
+
+#define DPRINT(...) pr_debug(__VA_ARGS__);
+
+#define DTRACE() DPRINT("%s: enter", __func__)
+
+/* The Goldfish sync driver is designed to provide a interface
+ * between the underlying host's sync device and the kernel's
+ * fence sync framework..
+ * The purpose of the device/driver is to enable lightweight
+ * creation and signaling of timelines and fences
+ * in order to synchronize the guest with host-side graphics events.
+ *
+ * Each time the interrupt trips, the driver
+ * may perform a sync operation.
+ */
+
+/* The operations are: */
+
+/* Ready signal - used to mark when irq should lower */
+#define CMD_SYNC_READY            0
+
+/* Create a new timeline. writes timeline handle */
+#define CMD_CREATE_SYNC_TIMELINE  1
+
+/* Create a fence object. reads timeline handle and time argument.
+ * Writes fence fd to the SYNC_REG_HANDLE register. */
+#define CMD_CREATE_SYNC_FENCE     2
+
+/* Increments timeline. reads timeline handle and time argument */
+#define CMD_SYNC_TIMELINE_INC     3
+
+/* Destroys a timeline. reads timeline handle */
+#define CMD_DESTROY_SYNC_TIMELINE 4
+
+/* Starts a wait on the host with
+ * the given glsync object and sync thread handle. */
+#define CMD_TRIGGER_HOST_WAIT     5
+
+/* The register layout is: */
+
+#define SYNC_REG_BATCH_COMMAND                0x00 /* host->guest batch commands */
+#define SYNC_REG_BATCH_GUESTCOMMAND           0x04 /* guest->host batch commands */
+#define SYNC_REG_BATCH_COMMAND_ADDR           0x08 /* communicate physical address of host->guest batch commands */
+#define SYNC_REG_BATCH_COMMAND_ADDR_HIGH      0x0c /* 64-bit part */
+#define SYNC_REG_BATCH_GUESTCOMMAND_ADDR      0x10 /* communicate physical address of guest->host commands */
+#define SYNC_REG_BATCH_GUESTCOMMAND_ADDR_HIGH 0x14 /* 64-bit part */
+#define SYNC_REG_INIT                         0x18 /* signals that the device has been probed */
+
+/* There is an ioctl associated with goldfish sync driver.
+ * Make it conflict with ioctls that are not likely to be used
+ * in the emulator.
+ *
+ * '@'	00-0F	linux/radeonfb.h	conflict!
+ * '@'	00-0F	drivers/video/aty/aty128fb.c	conflict!
+ */
+#define GOLDFISH_SYNC_IOC_MAGIC	'@'
+
+#define GOLDFISH_SYNC_IOC_QUEUE_WORK	_IOWR(GOLDFISH_SYNC_IOC_MAGIC, 0, struct goldfish_sync_ioctl_info)
+
+/* The above definitions (command codes, register layout, ioctl definitions)
+ * need to be in sync with the following files:
+ *
+ * Host-side (emulator):
+ * external/qemu/android/emulation/goldfish_sync.h
+ * external/qemu-android/hw/misc/goldfish_sync.c
+ *
+ * Guest-side (system image):
+ * device/generic/goldfish-opengl/system/egl/goldfish_sync.h
+ * device/generic/goldfish/ueventd.ranchu.rc
+ * platform/build/target/board/generic/sepolicy/file_contexts
+ */
+struct goldfish_sync_hostcmd {
+	/* sorted for alignment */
+	uint64_t handle;
+	uint64_t hostcmd_handle;
+	uint32_t cmd;
+	uint32_t time_arg;
+};
+
+struct goldfish_sync_guestcmd {
+	uint64_t host_command; /* uint64_t for alignment */
+	uint64_t glsync_handle;
+	uint64_t thread_handle;
+	uint64_t guest_timeline_handle;
+};
+
+#define GOLDFISH_SYNC_MAX_CMDS 32
+
+struct goldfish_sync_state {
+	char __iomem *reg_base;
+	int irq;
+
+	/* Spinlock protects |to_do| / |to_do_end|. */
+	spinlock_t lock;
+	/* |mutex_lock| protects all concurrent access
+	 * to timelines for both kernel and user space. */
+	struct mutex mutex_lock;
+
+	/* Buffer holding commands issued from host. */
+	struct goldfish_sync_hostcmd to_do[GOLDFISH_SYNC_MAX_CMDS];
+	uint32_t to_do_end;
+
+	/* Addresses for the reading or writing
+	 * of individual commands. The host can directly write
+	 * to |batch_hostcmd| (and then this driver immediately
+	 * copies contents to |to_do|). This driver either replies
+	 * through |batch_hostcmd| or simply issues a
+	 * guest->host command through |batch_guestcmd|.
+	 */
+	struct goldfish_sync_hostcmd *batch_hostcmd;
+	struct goldfish_sync_guestcmd *batch_guestcmd;
+
+	/* Used to give this struct itself to a work queue
+	 * function for executing actual sync commands. */
+	struct work_struct work_item;
+};
+
+static struct goldfish_sync_state global_sync_state[1];
+
+struct goldfish_sync_timeline_obj {
+	struct goldfish_sync_timeline *sync_tl;
+	uint32_t current_time;
+	/* We need to be careful about when we deallocate
+	 * this |goldfish_sync_timeline_obj| struct.
+	 * In order to ensure proper cleanup, we need to
+	 * consider the triggered host-side wait that may
+	 * still be in flight when the guest close()'s a
+	 * goldfish_sync device's sync context fd (and
+	 * destroys the |sync_tl| field above).
+	 * The host-side wait may raise IRQ
+	 * and tell the kernel to increment the timeline _after_
+	 * the |sync_tl| has already been set to null.
+	 *
+	 * From observations on OpenGL apps and CTS tests, this
+	 * happens at some very low probability upon context
+	 * destruction or process close, but it does happen
+	 * and it needs to be handled properly. Otherwise,
+	 * if we clean up the surrounding |goldfish_sync_timeline_obj|
+	 * too early, any |handle| field of any host->guest command
+	 * might not even point to a null |sync_tl| field,
+	 * but to garbage memory or even a reclaimed |sync_tl|.
+	 * If we do not count such "pending waits" and kfree the object
+	 * immediately upon |goldfish_sync_timeline_destroy|,
+	 * we might get mysterous RCU stalls after running a long
+	 * time because the garbage memory that is being read
+	 * happens to be interpretable as a |spinlock_t| struct
+	 * that is currently in the locked state.
+	 *
+	 * To track when to free the |goldfish_sync_timeline_obj|
+	 * itself, we maintain a kref.
+	 * The kref essentially counts the timeline itself plus
+	 * the number of waits in flight. kref_init/kref_put
+	 * are issued on
+	 * |goldfish_sync_timeline_create|/|goldfish_sync_timeline_destroy|
+	 * and kref_get/kref_put are issued on
+	 * |goldfish_sync_fence_create|/|goldfish_sync_timeline_inc|.
+	 *
+	 * The timeline is destroyed after reference count
+	 * reaches zero, which would happen after
+	 * |goldfish_sync_timeline_destroy| and all pending
+	 * |goldfish_sync_timeline_inc|'s are fulfilled.
+	 *
+	 * NOTE (1): We assume that |fence_create| and
+	 * |timeline_inc| calls are 1:1, otherwise the kref scheme
+	 * will not work. This is a valid assumption as long
+	 * as the host-side virtual device implementation
+	 * does not insert any timeline increments
+	 * that we did not trigger from here.
+	 *
+	 * NOTE (2): The use of kref by itself requires no locks,
+	 * but this does not mean everything works without locks.
+	 * Related timeline operations do require a lock of some sort,
+	 * or at least are not proven to work without it.
+	 * In particualr, we assume that all the operations
+	 * done on the |kref| field above are done in contexts where
+	 * |global_sync_state->mutex_lock| is held. Do not
+	 * remove that lock until everything is proven to work
+	 * without it!!! */
+	struct kref kref;
+};
+
+/* We will call |delete_timeline_obj| when the last reference count
+ * of the kref is decremented. This deletes the sync
+ * timeline object along with the wrapper itself. */
+static void delete_timeline_obj(struct kref* kref) {
+	struct goldfish_sync_timeline_obj* obj =
+		container_of(kref, struct goldfish_sync_timeline_obj, kref);
+
+	goldfish_sync_timeline_put_internal(obj->sync_tl);
+	obj->sync_tl = NULL;
+	kfree(obj);
+}
+
+static uint64_t gensym_ctr;
+static void gensym(char *dst)
+{
+	sprintf(dst, "goldfish_sync:gensym:%llu", gensym_ctr);
+	gensym_ctr++;
+}
+
+/* |goldfish_sync_timeline_create| assumes that |global_sync_state->mutex_lock|
+ * is held. */
+static struct goldfish_sync_timeline_obj*
+goldfish_sync_timeline_create(void)
+{
+
+	char timeline_name[256];
+	struct goldfish_sync_timeline *res_sync_tl = NULL;
+	struct goldfish_sync_timeline_obj *res;
+
+	DTRACE();
+
+	gensym(timeline_name);
+
+	res_sync_tl = goldfish_sync_timeline_create_internal(timeline_name);
+	if (!res_sync_tl) {
+		ERR("Failed to create goldfish_sw_sync timeline.");
+		return NULL;
+	}
+
+	res = kzalloc(sizeof(struct goldfish_sync_timeline_obj), GFP_KERNEL);
+	res->sync_tl = res_sync_tl;
+	res->current_time = 0;
+	kref_init(&res->kref);
+
+	DPRINT("new timeline_obj=0x%p", res);
+	return res;
+}
+
+/* |goldfish_sync_fence_create| assumes that |global_sync_state->mutex_lock|
+ * is held. */
+static int
+goldfish_sync_fence_create(struct goldfish_sync_timeline_obj *obj,
+							uint32_t val)
+{
+
+	int fd;
+	char fence_name[256];
+	struct sync_pt *syncpt = NULL;
+	struct sync_file *sync_file_obj = NULL;
+	struct goldfish_sync_timeline *tl;
+
+	DTRACE();
+
+	if (!obj) return -1;
+
+	tl = obj->sync_tl;
+
+	syncpt = goldfish_sync_pt_create_internal(
+				tl, sizeof(struct sync_pt) + 4, val);
+	if (!syncpt) {
+		ERR("could not create sync point! "
+			"goldfish_sync_timeline=0x%p val=%d",
+			   tl, val);
+		return -1;
+	}
+
+	fd = get_unused_fd_flags(O_CLOEXEC);
+	if (fd < 0) {
+		ERR("could not get unused fd for sync fence. "
+			"errno=%d", fd);
+		goto err_cleanup_pt;
+	}
+
+	gensym(fence_name);
+
+	sync_file_obj = sync_file_create(&syncpt->base);
+	if (!sync_file_obj) {
+		ERR("could not create sync fence! "
+			"goldfish_sync_timeline=0x%p val=%d sync_pt=0x%p",
+			   tl, val, syncpt);
+		goto err_cleanup_fd_pt;
+	}
+
+	DPRINT("installing sync fence into fd %d sync_file_obj=0x%p",
+			fd, sync_file_obj);
+	fd_install(fd, sync_file_obj->file);
+	kref_get(&obj->kref);
+
+	return fd;
+
+err_cleanup_fd_pt:
+	put_unused_fd(fd);
+err_cleanup_pt:
+	fence_put(&syncpt->base);
+	return -1;
+}
+
+/* |goldfish_sync_timeline_inc| assumes that |global_sync_state->mutex_lock|
+ * is held. */
+static void
+goldfish_sync_timeline_inc(struct goldfish_sync_timeline_obj *obj, uint32_t inc)
+{
+	DTRACE();
+	/* Just give up if someone else nuked the timeline.
+	 * Whoever it was won't care that it doesn't get signaled. */
+	if (!obj) return;
+
+	DPRINT("timeline_obj=0x%p", obj);
+	goldfish_sync_timeline_signal_internal(obj->sync_tl, inc);
+	DPRINT("incremented timeline. increment max_time");
+	obj->current_time += inc;
+
+	/* Here, we will end up deleting the timeline object if it
+	 * turns out that this call was a pending increment after
+	 * |goldfish_sync_timeline_destroy| was called. */
+	kref_put(&obj->kref, delete_timeline_obj);
+	DPRINT("done");
+}
+
+/* |goldfish_sync_timeline_destroy| assumes
+ * that |global_sync_state->mutex_lock| is held. */
+static void
+goldfish_sync_timeline_destroy(struct goldfish_sync_timeline_obj *obj)
+{
+	DTRACE();
+	/* See description of |goldfish_sync_timeline_obj| for why we
+	 * should not immediately destroy |obj| */
+	kref_put(&obj->kref, delete_timeline_obj);
+}
+
+static inline void
+goldfish_sync_cmd_queue(struct goldfish_sync_state *sync_state,
+						uint32_t cmd,
+						uint64_t handle,
+						uint32_t time_arg,
+						uint64_t hostcmd_handle)
+{
+	struct goldfish_sync_hostcmd *to_add;
+
+	DTRACE();
+
+	BUG_ON(sync_state->to_do_end == GOLDFISH_SYNC_MAX_CMDS);
+
+	to_add = &sync_state->to_do[sync_state->to_do_end];
+
+	to_add->cmd = cmd;
+	to_add->handle = handle;
+	to_add->time_arg = time_arg;
+	to_add->hostcmd_handle = hostcmd_handle;
+
+	sync_state->to_do_end += 1;
+}
+
+static inline void
+goldfish_sync_hostcmd_reply(struct goldfish_sync_state *sync_state,
+							uint32_t cmd,
+							uint64_t handle,
+							uint32_t time_arg,
+							uint64_t hostcmd_handle)
+{
+	unsigned long irq_flags;
+	struct goldfish_sync_hostcmd *batch_hostcmd =
+		sync_state->batch_hostcmd;
+
+	DTRACE();
+
+	spin_lock_irqsave(&sync_state->lock, irq_flags);
+
+	batch_hostcmd->cmd = cmd;
+	batch_hostcmd->handle = handle;
+	batch_hostcmd->time_arg = time_arg;
+	batch_hostcmd->hostcmd_handle = hostcmd_handle;
+	writel(0, sync_state->reg_base + SYNC_REG_BATCH_COMMAND);
+
+	spin_unlock_irqrestore(&sync_state->lock, irq_flags);
+}
+
+static inline void
+goldfish_sync_send_guestcmd(struct goldfish_sync_state *sync_state,
+							uint32_t cmd,
+							uint64_t glsync_handle,
+							uint64_t thread_handle,
+							uint64_t timeline_handle)
+{
+	unsigned long irq_flags;
+	struct goldfish_sync_guestcmd *batch_guestcmd =
+		sync_state->batch_guestcmd;
+
+	DTRACE();
+
+	spin_lock_irqsave(&sync_state->lock, irq_flags);
+
+	batch_guestcmd->host_command = (uint64_t)cmd;
+	batch_guestcmd->glsync_handle = (uint64_t)glsync_handle;
+	batch_guestcmd->thread_handle = (uint64_t)thread_handle;
+	batch_guestcmd->guest_timeline_handle = (uint64_t)timeline_handle;
+	writel(0, sync_state->reg_base + SYNC_REG_BATCH_GUESTCOMMAND);
+
+	spin_unlock_irqrestore(&sync_state->lock, irq_flags);
+}
+
+/* |goldfish_sync_interrupt| handles IRQ raises from the virtual device.
+ * In the context of OpenGL, this interrupt will fire whenever we need
+ * to signal a fence fd in the guest, with the command
+ * |CMD_SYNC_TIMELINE_INC|.
+ * However, because this function will be called in an interrupt context,
+ * it is necessary to do the actual work of signaling off of interrupt context.
+ * The shared work queue is used for this purpose. At the end when
+ * all pending commands are intercepted by the interrupt handler,
+ * we call |schedule_work|, which will later run the actual
+ * desired sync command in |goldfish_sync_work_item_fn|.
+ */
+static irqreturn_t goldfish_sync_interrupt(int irq, void *dev_id)
+{
+
+	struct goldfish_sync_state *sync_state = dev_id;
+
+	uint32_t nextcmd;
+	uint32_t command_r;
+	uint64_t handle_rw;
+	uint32_t time_r;
+	uint64_t hostcmd_handle_rw;
+
+	int count = 0;
+
+	DTRACE();
+
+	sync_state = dev_id;
+
+	spin_lock(&sync_state->lock);
+
+	for (;;) {
+
+		readl(sync_state->reg_base + SYNC_REG_BATCH_COMMAND);
+		nextcmd = sync_state->batch_hostcmd->cmd;
+
+		if (nextcmd == 0)
+			break;
+
+		command_r = nextcmd;
+		handle_rw = sync_state->batch_hostcmd->handle;
+		time_r = sync_state->batch_hostcmd->time_arg;
+		hostcmd_handle_rw = sync_state->batch_hostcmd->hostcmd_handle;
+
+		goldfish_sync_cmd_queue(
+				sync_state,
+				command_r,
+				handle_rw,
+				time_r,
+				hostcmd_handle_rw);
+
+		count++;
+	}
+
+	spin_unlock(&sync_state->lock);
+
+	schedule_work(&sync_state->work_item);
+
+	return (count == 0) ? IRQ_NONE : IRQ_HANDLED;
+}
+
+/* |goldfish_sync_work_item_fn| does the actual work of servicing
+ * host->guest sync commands. This function is triggered whenever
+ * the IRQ for the goldfish sync device is raised. Once it starts
+ * running, it grabs the contents of the buffer containing the
+ * commands it needs to execute (there may be multiple, because
+ * our IRQ is active high and not edge triggered), and then
+ * runs all of them one after the other.
+ */
+static void goldfish_sync_work_item_fn(struct work_struct *input)
+{
+
+	struct goldfish_sync_state *sync_state;
+	int sync_fence_fd;
+
+	struct goldfish_sync_timeline_obj *timeline;
+	uint64_t timeline_ptr;
+
+	uint64_t hostcmd_handle;
+
+	uint32_t cmd;
+	uint64_t handle;
+	uint32_t time_arg;
+
+	struct goldfish_sync_hostcmd *todo;
+	uint32_t todo_end;
+
+	unsigned long irq_flags;
+
+	struct goldfish_sync_hostcmd to_run[GOLDFISH_SYNC_MAX_CMDS];
+	uint32_t i = 0;
+
+	sync_state = container_of(input, struct goldfish_sync_state, work_item);
+
+	mutex_lock(&sync_state->mutex_lock);
+
+	spin_lock_irqsave(&sync_state->lock, irq_flags); {
+
+		todo_end = sync_state->to_do_end;
+
+		DPRINT("num sync todos: %u", sync_state->to_do_end);
+
+		for (i = 0; i < todo_end; i++)
+			to_run[i] = sync_state->to_do[i];
+
+		/* We expect that commands will come in at a slow enough rate
+		 * so that incoming items will not be more than
+		 * GOLDFISH_SYNC_MAX_CMDS.
+		 *
+		 * This is because the way the sync device is used,
+		 * it's only for managing buffer data transfers per frame,
+		 * with a sequential dependency between putting things in
+		 * to_do and taking them out. Once a set of commands is
+		 * queued up in to_do, the user of the device waits for
+		 * them to be processed before queuing additional commands,
+		 * which limits the rate at which commands come in
+		 * to the rate at which we take them out here.
+		 *
+		 * We also don't expect more than MAX_CMDS to be issued
+		 * at once; there is a correspondence between
+		 * which buffers need swapping to the (display / buffer queue)
+		 * to particular commands, and we don't expect there to be
+		 * enough display or buffer queues in operation at once
+		 * to overrun GOLDFISH_SYNC_MAX_CMDS.
+		 */
+		sync_state->to_do_end = 0;
+
+	} spin_unlock_irqrestore(&sync_state->lock, irq_flags);
+
+	for (i = 0; i < todo_end; i++) {
+		DPRINT("todo index: %u", i);
+
+		todo = &to_run[i];
+
+		cmd = todo->cmd;
+
+		handle = (uint64_t)todo->handle;
+		time_arg = todo->time_arg;
+		hostcmd_handle = (uint64_t)todo->hostcmd_handle;
+
+		DTRACE();
+
+		timeline = (struct goldfish_sync_timeline_obj *)(uintptr_t)handle;
+
+		switch (cmd) {
+		case CMD_SYNC_READY:
+			break;
+		case CMD_CREATE_SYNC_TIMELINE:
+			DPRINT("exec CMD_CREATE_SYNC_TIMELINE: "
+					"handle=0x%llx time_arg=%d",
+					handle, time_arg);
+			timeline = goldfish_sync_timeline_create();
+			timeline_ptr = (uintptr_t)timeline;
+			goldfish_sync_hostcmd_reply(sync_state, CMD_CREATE_SYNC_TIMELINE,
+										timeline_ptr,
+										0,
+										hostcmd_handle);
+			DPRINT("sync timeline created: %p", timeline);
+			break;
+		case CMD_CREATE_SYNC_FENCE:
+			DPRINT("exec CMD_CREATE_SYNC_FENCE: "
+					"handle=0x%llx time_arg=%d",
+					handle, time_arg);
+			sync_fence_fd = goldfish_sync_fence_create(timeline, time_arg);
+			goldfish_sync_hostcmd_reply(sync_state, CMD_CREATE_SYNC_FENCE,
+										sync_fence_fd,
+										0,
+										hostcmd_handle);
+			break;
+		case CMD_SYNC_TIMELINE_INC:
+			DPRINT("exec CMD_SYNC_TIMELINE_INC: "
+					"handle=0x%llx time_arg=%d",
+					handle, time_arg);
+			goldfish_sync_timeline_inc(timeline, time_arg);
+			break;
+		case CMD_DESTROY_SYNC_TIMELINE:
+			DPRINT("exec CMD_DESTROY_SYNC_TIMELINE: "
+					"handle=0x%llx time_arg=%d",
+					handle, time_arg);
+			goldfish_sync_timeline_destroy(timeline);
+			break;
+		}
+		DPRINT("Done executing sync command");
+	}
+	mutex_unlock(&sync_state->mutex_lock);
+}
+
+/* Guest-side interface: file operations */
+
+/* Goldfish sync context and ioctl info.
+ *
+ * When a sync context is created by open()-ing the goldfish sync device, we
+ * create a sync context (|goldfish_sync_context|).
+ *
+ * Currently, the only data required to track is the sync timeline itself
+ * along with the current time, which are all packed up in the
+ * |goldfish_sync_timeline_obj| field. We use a |goldfish_sync_context|
+ * as the filp->private_data.
+ *
+ * Next, when a sync context user requests that work be queued and a fence
+ * fd provided, we use the |goldfish_sync_ioctl_info| struct, which holds
+ * information about which host handles to touch for this particular
+ * queue-work operation. We need to know about the host-side sync thread
+ * and the particular host-side GLsync object. We also possibly write out
+ * a file descriptor.
+ */
+struct goldfish_sync_context {
+	struct goldfish_sync_timeline_obj *timeline;
+};
+
+struct goldfish_sync_ioctl_info {
+	uint64_t host_glsync_handle_in;
+	uint64_t host_syncthread_handle_in;
+	int fence_fd_out;
+};
+
+static int goldfish_sync_open(struct inode *inode, struct file *file)
+{
+
+	struct goldfish_sync_context *sync_context;
+
+	DTRACE();
+
+	mutex_lock(&global_sync_state->mutex_lock);
+
+	sync_context = kzalloc(sizeof(struct goldfish_sync_context), GFP_KERNEL);
+
+	if (sync_context == NULL) {
+		ERR("Creation of goldfish sync context failed!");
+		mutex_unlock(&global_sync_state->mutex_lock);
+		return -ENOMEM;
+	}
+
+	sync_context->timeline = NULL;
+
+	file->private_data = sync_context;
+
+	DPRINT("successfully create a sync context @0x%p", sync_context);
+
+	mutex_unlock(&global_sync_state->mutex_lock);
+
+	return 0;
+}
+
+static int goldfish_sync_release(struct inode *inode, struct file *file)
+{
+
+	struct goldfish_sync_context *sync_context;
+
+	DTRACE();
+
+	mutex_lock(&global_sync_state->mutex_lock);
+
+	sync_context = file->private_data;
+
+	if (sync_context->timeline)
+		goldfish_sync_timeline_destroy(sync_context->timeline);
+
+	sync_context->timeline = NULL;
+
+	kfree(sync_context);
+
+	mutex_unlock(&global_sync_state->mutex_lock);
+
+	return 0;
+}
+
+/* |goldfish_sync_ioctl| is the guest-facing interface of goldfish sync
+ * and is used in conjunction with eglCreateSyncKHR to queue up the
+ * actual work of waiting for the EGL sync command to complete,
+ * possibly returning a fence fd to the guest.
+ */
+static long goldfish_sync_ioctl(struct file *file,
+								unsigned int cmd,
+								unsigned long arg)
+{
+	struct goldfish_sync_context *sync_context_data;
+	struct goldfish_sync_timeline_obj *timeline;
+	int fd_out;
+	struct goldfish_sync_ioctl_info ioctl_data;
+
+	DTRACE();
+
+	sync_context_data = file->private_data;
+	fd_out = -1;
+
+	switch (cmd) {
+	case GOLDFISH_SYNC_IOC_QUEUE_WORK:
+
+		DPRINT("exec GOLDFISH_SYNC_IOC_QUEUE_WORK");
+
+		mutex_lock(&global_sync_state->mutex_lock);
+
+		if (copy_from_user(&ioctl_data,
+						(void __user *)arg,
+						sizeof(ioctl_data))) {
+			ERR("Failed to copy memory for ioctl_data from user.");
+			mutex_unlock(&global_sync_state->mutex_lock);
+			return -EFAULT;
+		}
+
+		if (ioctl_data.host_syncthread_handle_in == 0) {
+			DPRINT("Error: zero host syncthread handle!!!");
+			mutex_unlock(&global_sync_state->mutex_lock);
+			return -EFAULT;
+		}
+
+		if (!sync_context_data->timeline) {
+			DPRINT("no timeline yet, create one.");
+			sync_context_data->timeline = goldfish_sync_timeline_create();
+			DPRINT("timeline: 0x%p", &sync_context_data->timeline);
+		}
+
+		timeline = sync_context_data->timeline;
+		fd_out = goldfish_sync_fence_create(timeline,
+											timeline->current_time + 1);
+		DPRINT("Created fence with fd %d and current time %u (timeline: 0x%p)",
+			   fd_out,
+			   sync_context_data->timeline->current_time + 1,
+			   sync_context_data->timeline);
+
+		ioctl_data.fence_fd_out = fd_out;
+
+		if (copy_to_user((void __user *)arg,
+						&ioctl_data,
+						sizeof(ioctl_data))) {
+			DPRINT("Error, could not copy to user!!!");
+
+			sys_close(fd_out);
+			/* We won't be doing an increment, kref_put immediately. */
+			kref_put(&timeline->kref, delete_timeline_obj);
+			mutex_unlock(&global_sync_state->mutex_lock);
+			return -EFAULT;
+		}
+
+		/* We are now about to trigger a host-side wait;
+		 * accumulate on |pending_waits|. */
+		goldfish_sync_send_guestcmd(global_sync_state,
+				CMD_TRIGGER_HOST_WAIT,
+				ioctl_data.host_glsync_handle_in,
+				ioctl_data.host_syncthread_handle_in,
+				(uint64_t)(uintptr_t)(sync_context_data->timeline));
+
+		mutex_unlock(&global_sync_state->mutex_lock);
+		return 0;
+	default:
+		return -ENOTTY;
+	}
+}
+
+static const struct file_operations goldfish_sync_fops = {
+	.owner = THIS_MODULE,
+	.open = goldfish_sync_open,
+	.release = goldfish_sync_release,
+	.unlocked_ioctl = goldfish_sync_ioctl,
+	.compat_ioctl = goldfish_sync_ioctl,
+};
+
+static struct miscdevice goldfish_sync_device = {
+	.name = "goldfish_sync",
+	.fops = &goldfish_sync_fops,
+};
+
+
+static bool setup_verify_batch_cmd_addr(struct goldfish_sync_state *sync_state,
+										void *batch_addr,
+										uint32_t addr_offset,
+										uint32_t addr_offset_high)
+{
+	uint64_t batch_addr_phys;
+	uint32_t batch_addr_phys_test_lo;
+	uint32_t batch_addr_phys_test_hi;
+
+	if (!batch_addr) {
+		ERR("Could not use batch command address!");
+		return false;
+	}
+
+	batch_addr_phys = virt_to_phys(batch_addr);
+	writel((uint32_t)(batch_addr_phys),
+			sync_state->reg_base + addr_offset);
+	writel((uint32_t)(batch_addr_phys >> 32),
+			sync_state->reg_base + addr_offset_high);
+
+	batch_addr_phys_test_lo =
+		readl(sync_state->reg_base + addr_offset);
+	batch_addr_phys_test_hi =
+		readl(sync_state->reg_base + addr_offset_high);
+
+	if (virt_to_phys(batch_addr) !=
+			(((uint64_t)batch_addr_phys_test_hi << 32) |
+			 batch_addr_phys_test_lo)) {
+		ERR("Invalid batch command address!");
+		return false;
+	}
+
+	return true;
+}
+
+int goldfish_sync_probe(struct platform_device *pdev)
+{
+	struct resource *ioresource;
+	struct goldfish_sync_state *sync_state = global_sync_state;
+	int status;
+
+	DTRACE();
+
+	sync_state->to_do_end = 0;
+
+	spin_lock_init(&sync_state->lock);
+	mutex_init(&sync_state->mutex_lock);
+
+	platform_set_drvdata(pdev, sync_state);
+
+	ioresource = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (ioresource == NULL) {
+		ERR("platform_get_resource failed");
+		return -ENODEV;
+	}
+
+	sync_state->reg_base =
+		devm_ioremap(&pdev->dev, ioresource->start, PAGE_SIZE);
+	if (sync_state->reg_base == NULL) {
+		ERR("Could not ioremap");
+		return -ENOMEM;
+	}
+
+	sync_state->irq = platform_get_irq(pdev, 0);
+	if (sync_state->irq < 0) {
+		ERR("Could not platform_get_irq");
+		return -ENODEV;
+	}
+
+	status = devm_request_irq(&pdev->dev,
+							sync_state->irq,
+							goldfish_sync_interrupt,
+							IRQF_SHARED,
+							pdev->name,
+							sync_state);
+	if (status) {
+		ERR("request_irq failed");
+		return -ENODEV;
+	}
+
+	INIT_WORK(&sync_state->work_item,
+			  goldfish_sync_work_item_fn);
+
+	misc_register(&goldfish_sync_device);
+
+	/* Obtain addresses for batch send/recv of commands. */
+	{
+		struct goldfish_sync_hostcmd *batch_addr_hostcmd;
+		struct goldfish_sync_guestcmd *batch_addr_guestcmd;
+
+		batch_addr_hostcmd =
+			devm_kzalloc(&pdev->dev, sizeof(struct goldfish_sync_hostcmd),
+				GFP_KERNEL);
+		batch_addr_guestcmd =
+			devm_kzalloc(&pdev->dev, sizeof(struct goldfish_sync_guestcmd),
+				GFP_KERNEL);
+
+		if (!setup_verify_batch_cmd_addr(sync_state,
+					batch_addr_hostcmd,
+					SYNC_REG_BATCH_COMMAND_ADDR,
+					SYNC_REG_BATCH_COMMAND_ADDR_HIGH)) {
+			ERR("goldfish_sync: Could not setup batch command address");
+			return -ENODEV;
+		}
+
+		if (!setup_verify_batch_cmd_addr(sync_state,
+					batch_addr_guestcmd,
+					SYNC_REG_BATCH_GUESTCOMMAND_ADDR,
+					SYNC_REG_BATCH_GUESTCOMMAND_ADDR_HIGH)) {
+			ERR("goldfish_sync: Could not setup batch guest command address");
+			return -ENODEV;
+		}
+
+		sync_state->batch_hostcmd = batch_addr_hostcmd;
+		sync_state->batch_guestcmd = batch_addr_guestcmd;
+	}
+
+	INFO("goldfish_sync: Initialized goldfish sync device");
+
+	writel(0, sync_state->reg_base + SYNC_REG_INIT);
+
+	return 0;
+}
+
+static int goldfish_sync_remove(struct platform_device *pdev)
+{
+	struct goldfish_sync_state *sync_state = global_sync_state;
+
+	DTRACE();
+
+	misc_deregister(&goldfish_sync_device);
+	memset(sync_state, 0, sizeof(struct goldfish_sync_state));
+	return 0;
+}
+
+static const struct of_device_id goldfish_sync_of_match[] = {
+	{ .compatible = "google,goldfish-sync", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, goldfish_sync_of_match);
+
+static const struct acpi_device_id goldfish_sync_acpi_match[] = {
+	{ "GFSH0006", 0 },
+	{ },
+};
+
+MODULE_DEVICE_TABLE(acpi, goldfish_sync_acpi_match);
+
+static struct platform_driver goldfish_sync = {
+	.probe = goldfish_sync_probe,
+	.remove = goldfish_sync_remove,
+	.driver = {
+		.name = "goldfish_sync",
+		.of_match_table = goldfish_sync_of_match,
+		.acpi_match_table = ACPI_PTR(goldfish_sync_acpi_match),
+	}
+};
+
+module_platform_driver(goldfish_sync);
+
+MODULE_AUTHOR("Google, Inc.");
+MODULE_DESCRIPTION("Android QEMU Sync Driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0");
diff --git a/drivers/staging/goldfish/goldfish_sync_timeline_fence.c b/drivers/staging/goldfish/goldfish_sync_timeline_fence.c
new file mode 100644
index 0000000..e671618
--- /dev/null
+++ b/drivers/staging/goldfish/goldfish_sync_timeline_fence.c
@@ -0,0 +1,254 @@
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/syscalls.h>
+#include <linux/sync_file.h>
+#include <linux/fence.h>
+
+#include "goldfish_sync_timeline_fence.h"
+
+/*
+ * Timeline-based sync for Goldfish Sync
+ * Based on "Sync File validation framework"
+ * (drivers/dma-buf/sw_sync.c)
+ *
+ * Copyright (C) 2017 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+/**
+ * struct goldfish_sync_timeline - sync object
+ * @kref:		reference count on fence.
+ * @name:		name of the goldfish_sync_timeline. Useful for debugging
+ * @child_list_head:	list of children sync_pts for this goldfish_sync_timeline
+ * @child_list_lock:	lock protecting @child_list_head and fence.status
+ * @active_list_head:	list of active (unsignaled/errored) sync_pts
+ */
+struct goldfish_sync_timeline {
+	struct kref		kref;
+	char			name[32];
+
+	/* protected by child_list_lock */
+	u64			context;
+	int			value;
+
+	struct list_head	child_list_head;
+	spinlock_t		child_list_lock;
+
+	struct list_head	active_list_head;
+};
+
+static inline struct goldfish_sync_timeline *fence_parent(struct fence *fence)
+{
+	return container_of(fence->lock, struct goldfish_sync_timeline,
+				child_list_lock);
+}
+
+static const struct fence_ops goldfish_sync_timeline_fence_ops;
+
+static inline struct sync_pt *goldfish_sync_fence_to_sync_pt(struct fence *fence)
+{
+	if (fence->ops != &goldfish_sync_timeline_fence_ops)
+		return NULL;
+	return container_of(fence, struct sync_pt, base);
+}
+
+/**
+ * goldfish_sync_timeline_create_internal() - creates a sync object
+ * @name:	sync_timeline name
+ *
+ * Creates a new sync_timeline. Returns the sync_timeline object or NULL in
+ * case of error.
+ */
+struct goldfish_sync_timeline
+*goldfish_sync_timeline_create_internal(const char *name)
+{
+	struct goldfish_sync_timeline *obj;
+
+	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+	if (!obj)
+		return NULL;
+
+	kref_init(&obj->kref);
+	obj->context = fence_context_alloc(1);
+	strlcpy(obj->name, name, sizeof(obj->name));
+
+	INIT_LIST_HEAD(&obj->child_list_head);
+	INIT_LIST_HEAD(&obj->active_list_head);
+	spin_lock_init(&obj->child_list_lock);
+
+	return obj;
+}
+
+static void goldfish_sync_timeline_free_internal(struct kref *kref)
+{
+	struct goldfish_sync_timeline *obj =
+		container_of(kref, struct goldfish_sync_timeline, kref);
+
+	kfree(obj);
+}
+
+static void goldfish_sync_timeline_get_internal(
+					struct goldfish_sync_timeline *obj)
+{
+	kref_get(&obj->kref);
+}
+
+void goldfish_sync_timeline_put_internal(struct goldfish_sync_timeline *obj)
+{
+	kref_put(&obj->kref, goldfish_sync_timeline_free_internal);
+}
+
+/**
+ * goldfish_sync_timeline_signal() -
+ * signal a status change on a goldfish_sync_timeline
+ * @obj:	sync_timeline to signal
+ * @inc:	num to increment on timeline->value
+ *
+ * A sync implementation should call this any time one of it's fences
+ * has signaled or has an error condition.
+ */
+void goldfish_sync_timeline_signal_internal(struct goldfish_sync_timeline *obj,
+											unsigned int inc)
+{
+	unsigned long flags;
+	struct sync_pt *pt, *next;
+
+	spin_lock_irqsave(&obj->child_list_lock, flags);
+
+	obj->value += inc;
+
+	list_for_each_entry_safe(pt, next, &obj->active_list_head,
+				 active_list) {
+		if (fence_is_signaled_locked(&pt->base))
+			list_del_init(&pt->active_list);
+	}
+
+	spin_unlock_irqrestore(&obj->child_list_lock, flags);
+}
+
+/**
+ * goldfish_sync_pt_create_internal() - creates a sync pt
+ * @parent:	fence's parent sync_timeline
+ * @size:	size to allocate for this pt
+ * @inc:	value of the fence
+ *
+ * Creates a new sync_pt as a child of @parent.  @size bytes will be
+ * allocated allowing for implementation specific data to be kept after
+ * the generic sync_timeline struct. Returns the sync_pt object or
+ * NULL in case of error.
+ */
+struct sync_pt *goldfish_sync_pt_create_internal(
+					struct goldfish_sync_timeline *obj, int size,
+				 	unsigned int value)
+{
+	unsigned long flags;
+	struct sync_pt *pt;
+
+	if (size < sizeof(*pt))
+		return NULL;
+
+	pt = kzalloc(size, GFP_KERNEL);
+	if (!pt)
+		return NULL;
+
+	spin_lock_irqsave(&obj->child_list_lock, flags);
+	goldfish_sync_timeline_get_internal(obj);
+	fence_init(&pt->base, &goldfish_sync_timeline_fence_ops, &obj->child_list_lock,
+		   obj->context, value);
+	list_add_tail(&pt->child_list, &obj->child_list_head);
+	INIT_LIST_HEAD(&pt->active_list);
+	spin_unlock_irqrestore(&obj->child_list_lock, flags);
+	return pt;
+}
+
+static const char *goldfish_sync_timeline_fence_get_driver_name(
+						struct fence *fence)
+{
+	return "sw_sync";
+}
+
+static const char *goldfish_sync_timeline_fence_get_timeline_name(
+						struct fence *fence)
+{
+	struct goldfish_sync_timeline *parent = fence_parent(fence);
+
+	return parent->name;
+}
+
+static void goldfish_sync_timeline_fence_release(struct fence *fence)
+{
+	struct sync_pt *pt = goldfish_sync_fence_to_sync_pt(fence);
+	struct goldfish_sync_timeline *parent = fence_parent(fence);
+	unsigned long flags;
+
+	spin_lock_irqsave(fence->lock, flags);
+	list_del(&pt->child_list);
+	if (!list_empty(&pt->active_list))
+		list_del(&pt->active_list);
+	spin_unlock_irqrestore(fence->lock, flags);
+
+	goldfish_sync_timeline_put_internal(parent);
+	fence_free(fence);
+}
+
+static bool goldfish_sync_timeline_fence_signaled(struct fence *fence)
+{
+	struct goldfish_sync_timeline *parent = fence_parent(fence);
+
+	return (fence->seqno > parent->value) ? false : true;
+}
+
+static bool goldfish_sync_timeline_fence_enable_signaling(struct fence *fence)
+{
+	struct sync_pt *pt = goldfish_sync_fence_to_sync_pt(fence);
+	struct goldfish_sync_timeline *parent = fence_parent(fence);
+
+	if (goldfish_sync_timeline_fence_signaled(fence))
+		return false;
+
+	list_add_tail(&pt->active_list, &parent->active_list_head);
+	return true;
+}
+
+static void goldfish_sync_timeline_fence_disable_signaling(struct fence *fence)
+{
+	struct sync_pt *pt = container_of(fence, struct sync_pt, base);
+
+	list_del_init(&pt->active_list);
+}
+
+static void goldfish_sync_timeline_fence_value_str(struct fence *fence,
+					char *str, int size)
+{
+	snprintf(str, size, "%d", fence->seqno);
+}
+
+static void goldfish_sync_timeline_fence_timeline_value_str(
+				struct fence *fence,
+				char *str, int size)
+{
+	struct goldfish_sync_timeline *parent = fence_parent(fence);
+
+	snprintf(str, size, "%d", parent->value);
+}
+
+static const struct fence_ops goldfish_sync_timeline_fence_ops = {
+	.get_driver_name = goldfish_sync_timeline_fence_get_driver_name,
+	.get_timeline_name = goldfish_sync_timeline_fence_get_timeline_name,
+	.enable_signaling = goldfish_sync_timeline_fence_enable_signaling,
+	.disable_signaling = goldfish_sync_timeline_fence_disable_signaling,
+	.signaled = goldfish_sync_timeline_fence_signaled,
+	.wait = fence_default_wait,
+	.release = goldfish_sync_timeline_fence_release,
+	.fence_value_str = goldfish_sync_timeline_fence_value_str,
+	.timeline_value_str = goldfish_sync_timeline_fence_timeline_value_str,
+};
diff --git a/drivers/staging/goldfish/goldfish_sync_timeline_fence.h b/drivers/staging/goldfish/goldfish_sync_timeline_fence.h
new file mode 100644
index 0000000..fc25924
--- /dev/null
+++ b/drivers/staging/goldfish/goldfish_sync_timeline_fence.h
@@ -0,0 +1,58 @@
+#include <linux/sync_file.h>
+#include <linux/fence.h>
+
+/**
+ * struct sync_pt - sync_pt object
+ * @base: base fence object
+ * @child_list: sync timeline child's list
+ * @active_list: sync timeline active child's list
+ */
+struct sync_pt {
+	struct fence base;
+	struct list_head child_list;
+	struct list_head active_list;
+};
+
+/**
+ * goldfish_sync_timeline_create_internal() - creates a sync object
+ * @name:	goldfish_sync_timeline name
+ *
+ * Creates a new goldfish_sync_timeline.
+ * Returns the goldfish_sync_timeline object or NULL in case of error.
+ */
+struct goldfish_sync_timeline
+*goldfish_sync_timeline_create_internal(const char *name);
+
+/**
+ * goldfish_sync_pt_create_internal() - creates a sync pt
+ * @parent:	fence's parent goldfish_sync_timeline
+ * @size:	size to allocate for this pt
+ * @inc:	value of the fence
+ *
+ * Creates a new sync_pt as a child of @parent.  @size bytes will be
+ * allocated allowing for implementation specific data to be kept after
+ * the generic sync_timeline struct. Returns the sync_pt object or
+ * NULL in case of error.
+ */
+struct sync_pt
+*goldfish_sync_pt_create_internal(struct goldfish_sync_timeline *obj,
+									int size, unsigned int value);
+
+/**
+ * goldfish_sync_timeline_signal_internal() -
+ * signal a status change on a sync_timeline
+ * @obj:	goldfish_sync_timeline to signal
+ * @inc:	num to increment on timeline->value
+ *
+ * A sync implementation should call this any time one of it's fences
+ * has signaled or has an error condition.
+ */
+void goldfish_sync_timeline_signal_internal(struct goldfish_sync_timeline *obj,
+											unsigned int inc);
+
+/**
+ * goldfish_sync_timeline_put_internal() - dec refcount of a sync_timeline
+ * and clean up memory if it was the last ref.
+ * @obj:	goldfish_sync_timeline to decref
+ */
+void goldfish_sync_timeline_put_internal(struct goldfish_sync_timeline *obj);
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 55ed4c6..2a5979f 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -131,6 +131,9 @@
 	struct uart_state *state = tty->driver_data;
 	struct uart_port *port = state->uart_port;
 
+	if (port && port->ops->wake_peer)
+		port->ops->wake_peer(port);
+
 	if (port && !uart_tx_stopped(port))
 		port->ops->start_tx(port);
 }
diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index f3ee80e..f6cce5a 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -209,6 +209,18 @@
 config USB_F_TCM
 	tristate
 
+config USB_F_MTP
+	tristate
+
+config USB_F_PTP
+        tristate
+
+config USB_F_AUDIO_SRC
+	tristate
+
+config USB_F_ACC
+	tristate
+
 # this first set of drivers all depend on bulk-capable hardware.
 
 config USB_CONFIGFS
@@ -362,6 +374,44 @@
 	  implemented in kernel space (for instance Ethernet, serial or
 	  mass storage) and other are implemented in user space.
 
+config USB_CONFIGFS_F_MTP
+	boolean "MTP gadget"
+	depends on USB_CONFIGFS
+	select USB_F_MTP
+	help
+	  USB gadget MTP support
+
+config USB_CONFIGFS_F_PTP
+	boolean "PTP gadget"
+	depends on USB_CONFIGFS && USB_CONFIGFS_F_MTP
+	select USB_F_PTP
+	help
+	  USB gadget PTP support
+
+config USB_CONFIGFS_F_ACC
+	boolean "Accessory gadget"
+	depends on USB_CONFIGFS
+	select USB_F_ACC
+	help
+	  USB gadget Accessory support
+
+config USB_CONFIGFS_F_AUDIO_SRC
+	boolean "Audio Source gadget"
+	depends on USB_CONFIGFS && USB_CONFIGFS_F_ACC
+	depends on SND
+	select SND_PCM
+	select USB_F_AUDIO_SRC
+	help
+	  USB gadget Audio Source support
+
+config USB_CONFIGFS_UEVENT
+	boolean "Uevent notification of Gadget state"
+	depends on USB_CONFIGFS
+	help
+	  Enable uevent notifications to userspace when the gadget
+	  state changes. The gadget can be in any of the following
+	  three states: "CONNECTED/DISCONNECTED/CONFIGURED"
+
 config USB_CONFIGFS_F_UAC1
 	bool "Audio Class 1.0"
 	depends on USB_CONFIGFS
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 2c022a0..a8b4ca0 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -1996,6 +1996,12 @@
 	struct usb_composite_dev	*cdev = get_gadget_data(gadget);
 	unsigned long			flags;
 
+	if (cdev == NULL) {
+		WARN(1, "%s: Calling disconnect on a Gadget that is \
+			 not connected\n", __func__);
+		return;
+	}
+
 	/* REVISIT:  should we have config and device level
 	 * disconnect callbacks?
 	 */
diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index a5ca409..b1d22d8 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -9,6 +9,31 @@
 #include "u_f.h"
 #include "u_os_desc.h"
 
+#ifdef CONFIG_USB_CONFIGFS_UEVENT
+#include <linux/platform_device.h>
+#include <linux/kdev_t.h>
+#include <linux/usb/ch9.h>
+
+#ifdef CONFIG_USB_CONFIGFS_F_ACC
+extern int acc_ctrlrequest(struct usb_composite_dev *cdev,
+				const struct usb_ctrlrequest *ctrl);
+void acc_disconnect(void);
+#endif
+static struct class *android_class;
+static struct device *android_device;
+static int index;
+
+struct device *create_function_device(char *name)
+{
+	if (android_device && !IS_ERR(android_device))
+		return device_create(android_class, android_device,
+			MKDEV(0, index++), NULL, name);
+	else
+		return ERR_PTR(-EINVAL);
+}
+EXPORT_SYMBOL_GPL(create_function_device);
+#endif
+
 int check_user_usb_string(const char *name,
 		struct usb_gadget_strings *stringtab_dev)
 {
@@ -60,6 +85,12 @@
 	bool use_os_desc;
 	char b_vendor_code;
 	char qw_sign[OS_STRING_QW_SIGN_LEN];
+#ifdef CONFIG_USB_CONFIGFS_UEVENT
+	bool connected;
+	bool sw_connected;
+	struct work_struct work;
+	struct device *dev;
+#endif
 };
 
 static inline struct gadget_info *to_gadget_info(struct config_item *item)
@@ -265,7 +296,7 @@
 
 	mutex_lock(&gi->lock);
 
-	if (!strlen(name)) {
+	if (!strlen(name) || strcmp(name, "none") == 0) {
 		ret = unregister_gadget(gi);
 		if (ret)
 			goto err;
@@ -1369,6 +1400,60 @@
 	return ret;
 }
 
+#ifdef CONFIG_USB_CONFIGFS_UEVENT
+static void android_work(struct work_struct *data)
+{
+	struct gadget_info *gi = container_of(data, struct gadget_info, work);
+	struct usb_composite_dev *cdev = &gi->cdev;
+	char *disconnected[2] = { "USB_STATE=DISCONNECTED", NULL };
+	char *connected[2]    = { "USB_STATE=CONNECTED", NULL };
+	char *configured[2]   = { "USB_STATE=CONFIGURED", NULL };
+	/* 0-connected 1-configured 2-disconnected*/
+	bool status[3] = { false, false, false };
+	unsigned long flags;
+	bool uevent_sent = false;
+
+	spin_lock_irqsave(&cdev->lock, flags);
+	if (cdev->config)
+		status[1] = true;
+
+	if (gi->connected != gi->sw_connected) {
+		if (gi->connected)
+			status[0] = true;
+		else
+			status[2] = true;
+		gi->sw_connected = gi->connected;
+	}
+	spin_unlock_irqrestore(&cdev->lock, flags);
+
+	if (status[0]) {
+		kobject_uevent_env(&android_device->kobj,
+					KOBJ_CHANGE, connected);
+		pr_info("%s: sent uevent %s\n", __func__, connected[0]);
+		uevent_sent = true;
+	}
+
+	if (status[1]) {
+		kobject_uevent_env(&android_device->kobj,
+					KOBJ_CHANGE, configured);
+		pr_info("%s: sent uevent %s\n", __func__, configured[0]);
+		uevent_sent = true;
+	}
+
+	if (status[2]) {
+		kobject_uevent_env(&android_device->kobj,
+					KOBJ_CHANGE, disconnected);
+		pr_info("%s: sent uevent %s\n", __func__, disconnected[0]);
+		uevent_sent = true;
+	}
+
+	if (!uevent_sent) {
+		pr_info("%s: did not send uevent (%d %d %p)\n", __func__,
+			gi->connected, gi->sw_connected, cdev->config);
+	}
+}
+#endif
+
 static void configfs_composite_unbind(struct usb_gadget *gadget)
 {
 	struct usb_composite_dev	*cdev;
@@ -1388,14 +1473,91 @@
 	set_gadget_data(gadget, NULL);
 }
 
+#ifdef CONFIG_USB_CONFIGFS_UEVENT
+static int android_setup(struct usb_gadget *gadget,
+			const struct usb_ctrlrequest *c)
+{
+	struct usb_composite_dev *cdev = get_gadget_data(gadget);
+	unsigned long flags;
+	struct gadget_info *gi = container_of(cdev, struct gadget_info, cdev);
+	int value = -EOPNOTSUPP;
+	struct usb_function_instance *fi;
+
+	spin_lock_irqsave(&cdev->lock, flags);
+	if (!gi->connected) {
+		gi->connected = 1;
+		schedule_work(&gi->work);
+	}
+	spin_unlock_irqrestore(&cdev->lock, flags);
+	list_for_each_entry(fi, &gi->available_func, cfs_list) {
+		if (fi != NULL && fi->f != NULL && fi->f->setup != NULL) {
+			value = fi->f->setup(fi->f, c);
+			if (value >= 0)
+				break;
+		}
+	}
+
+#ifdef CONFIG_USB_CONFIGFS_F_ACC
+	if (value < 0)
+		value = acc_ctrlrequest(cdev, c);
+#endif
+
+	if (value < 0)
+		value = composite_setup(gadget, c);
+
+	spin_lock_irqsave(&cdev->lock, flags);
+	if (c->bRequest == USB_REQ_SET_CONFIGURATION &&
+						cdev->config) {
+		schedule_work(&gi->work);
+	}
+	spin_unlock_irqrestore(&cdev->lock, flags);
+
+	return value;
+}
+
+static void android_disconnect(struct usb_gadget *gadget)
+{
+	struct usb_composite_dev        *cdev = get_gadget_data(gadget);
+	struct gadget_info *gi = container_of(cdev, struct gadget_info, cdev);
+
+	/* FIXME: There's a race between usb_gadget_udc_stop() which is likely
+	 * to set the gadget driver to NULL in the udc driver and this drivers
+	 * gadget disconnect fn which likely checks for the gadget driver to
+	 * be a null ptr. It happens that unbind (doing set_gadget_data(NULL))
+	 * is called before the gadget driver is set to NULL and the udc driver
+	 * calls disconnect fn which results in cdev being a null ptr.
+	 */
+	if (cdev == NULL) {
+		WARN(1, "%s: gadget driver already disconnected\n", __func__);
+		return;
+	}
+
+	/* accessory HID support can be active while the
+		accessory function is not actually enabled,
+		so we need to inform it when we are disconnected.
+	*/
+
+#ifdef CONFIG_USB_CONFIGFS_F_ACC
+	acc_disconnect();
+#endif
+	gi->connected = 0;
+	schedule_work(&gi->work);
+	composite_disconnect(gadget);
+}
+#endif
+
 static const struct usb_gadget_driver configfs_driver_template = {
 	.bind           = configfs_composite_bind,
 	.unbind         = configfs_composite_unbind,
-
+#ifdef CONFIG_USB_CONFIGFS_UEVENT
+	.setup          = android_setup,
+	.reset          = android_disconnect,
+	.disconnect     = android_disconnect,
+#else
 	.setup          = composite_setup,
 	.reset          = composite_disconnect,
 	.disconnect     = composite_disconnect,
-
+#endif
 	.suspend	= composite_suspend,
 	.resume		= composite_resume,
 
@@ -1407,6 +1569,89 @@
 	.match_existing_only = 1,
 };
 
+#ifdef CONFIG_USB_CONFIGFS_UEVENT
+static ssize_t state_show(struct device *pdev, struct device_attribute *attr,
+			char *buf)
+{
+	struct gadget_info *dev = dev_get_drvdata(pdev);
+	struct usb_composite_dev *cdev;
+	char *state = "DISCONNECTED";
+	unsigned long flags;
+
+	if (!dev)
+		goto out;
+
+	cdev = &dev->cdev;
+
+	if (!cdev)
+		goto out;
+
+	spin_lock_irqsave(&cdev->lock, flags);
+	if (cdev->config)
+		state = "CONFIGURED";
+	else if (dev->connected)
+		state = "CONNECTED";
+	spin_unlock_irqrestore(&cdev->lock, flags);
+out:
+	return sprintf(buf, "%s\n", state);
+}
+
+static DEVICE_ATTR(state, S_IRUGO, state_show, NULL);
+
+static struct device_attribute *android_usb_attributes[] = {
+	&dev_attr_state,
+	NULL
+};
+
+static int android_device_create(struct gadget_info *gi)
+{
+	struct device_attribute **attrs;
+	struct device_attribute *attr;
+
+	INIT_WORK(&gi->work, android_work);
+	android_device = device_create(android_class, NULL,
+				MKDEV(0, 0), NULL, "android0");
+	if (IS_ERR(android_device))
+		return PTR_ERR(android_device);
+
+	dev_set_drvdata(android_device, gi);
+
+	attrs = android_usb_attributes;
+	while ((attr = *attrs++)) {
+		int err;
+
+		err = device_create_file(android_device, attr);
+		if (err) {
+			device_destroy(android_device->class,
+				       android_device->devt);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static void android_device_destroy(void)
+{
+	struct device_attribute **attrs;
+	struct device_attribute *attr;
+
+	attrs = android_usb_attributes;
+	while ((attr = *attrs++))
+		device_remove_file(android_device, attr);
+	device_destroy(android_device->class, android_device->devt);
+}
+#else
+static inline int android_device_create(struct gadget_info *gi)
+{
+	return 0;
+}
+
+static inline void android_device_destroy(void)
+{
+}
+#endif
+
 static struct config_group *gadgets_make(
 		struct config_group *group,
 		const char *name)
@@ -1458,7 +1703,11 @@
 	if (!gi->composite.gadget_driver.function)
 		goto err;
 
+	if (android_device_create(gi) < 0)
+		goto err;
+
 	return &gi->group;
+
 err:
 	kfree(gi);
 	return ERR_PTR(-ENOMEM);
@@ -1467,6 +1716,7 @@
 static void gadgets_drop(struct config_group *group, struct config_item *item)
 {
 	config_item_put(item);
+	android_device_destroy();
 }
 
 static struct configfs_group_operations gadgets_ops = {
@@ -1506,6 +1756,13 @@
 	config_group_init(&gadget_subsys.su_group);
 
 	ret = configfs_register_subsystem(&gadget_subsys);
+
+#ifdef CONFIG_USB_CONFIGFS_UEVENT
+	android_class = class_create(THIS_MODULE, "android_usb");
+	if (IS_ERR(android_class))
+		return PTR_ERR(android_class);
+#endif
+
 	return ret;
 }
 module_init(gadget_cfs_init);
@@ -1513,5 +1770,10 @@
 static void __exit gadget_cfs_exit(void)
 {
 	configfs_unregister_subsystem(&gadget_subsys);
+#ifdef CONFIG_USB_CONFIGFS_UEVENT
+	if (!IS_ERR(android_class))
+		class_destroy(android_class);
+#endif
+
 }
 module_exit(gadget_cfs_exit);
diff --git a/drivers/usb/gadget/function/Makefile b/drivers/usb/gadget/function/Makefile
index cb8c225..78682d5 100644
--- a/drivers/usb/gadget/function/Makefile
+++ b/drivers/usb/gadget/function/Makefile
@@ -46,3 +46,11 @@
 obj-$(CONFIG_USB_F_PRINTER)	+= usb_f_printer.o
 usb_f_tcm-y			:= f_tcm.o
 obj-$(CONFIG_USB_F_TCM)		+= usb_f_tcm.o
+usb_f_mtp-y                     := f_mtp.o
+obj-$(CONFIG_USB_F_MTP)         += usb_f_mtp.o
+usb_f_ptp-y                     := f_ptp.o
+obj-$(CONFIG_USB_F_PTP)         += usb_f_ptp.o
+usb_f_audio_source-y            := f_audio_source.o
+obj-$(CONFIG_USB_F_AUDIO_SRC)   += usb_f_audio_source.o
+usb_f_accessory-y               := f_accessory.o
+obj-$(CONFIG_USB_F_ACC)         += usb_f_accessory.o
diff --git a/drivers/usb/gadget/function/f_accessory.c b/drivers/usb/gadget/function/f_accessory.c
new file mode 100644
index 0000000..7aa2656
--- /dev/null
+++ b/drivers/usb/gadget/function/f_accessory.c
@@ -0,0 +1,1352 @@
+/*
+ * Gadget Function Driver for Android USB accessories
+ *
+ * Copyright (C) 2011 Google, Inc.
+ * Author: Mike Lockwood <lockwood@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+/* #define DEBUG */
+/* #define VERBOSE_DEBUG */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+
+#include <linux/types.h>
+#include <linux/file.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+
+#include <linux/hid.h>
+#include <linux/hiddev.h>
+#include <linux/usb.h>
+#include <linux/usb/ch9.h>
+#include <linux/usb/f_accessory.h>
+
+#include <linux/configfs.h>
+#include <linux/usb/composite.h>
+
+#define MAX_INST_NAME_LEN        40
+#define BULK_BUFFER_SIZE    16384
+#define ACC_STRING_SIZE     256
+
+#define PROTOCOL_VERSION    2
+
+/* String IDs */
+#define INTERFACE_STRING_INDEX	0
+
+/* number of tx and rx requests to allocate */
+#define TX_REQ_MAX 4
+#define RX_REQ_MAX 2
+
+struct acc_hid_dev {
+	struct list_head	list;
+	struct hid_device *hid;
+	struct acc_dev *dev;
+	/* accessory defined ID */
+	int id;
+	/* HID report descriptor */
+	u8 *report_desc;
+	/* length of HID report descriptor */
+	int report_desc_len;
+	/* number of bytes of report_desc we have received so far */
+	int report_desc_offset;
+};
+
+struct acc_dev {
+	struct usb_function function;
+	struct usb_composite_dev *cdev;
+	spinlock_t lock;
+
+	struct usb_ep *ep_in;
+	struct usb_ep *ep_out;
+
+	/* online indicates state of function_set_alt & function_unbind
+	 * set to 1 when we connect
+	 */
+	int online:1;
+
+	/* disconnected indicates state of open & release
+	 * Set to 1 when we disconnect.
+	 * Not cleared until our file is closed.
+	 */
+	int disconnected:1;
+
+	/* strings sent by the host */
+	char manufacturer[ACC_STRING_SIZE];
+	char model[ACC_STRING_SIZE];
+	char description[ACC_STRING_SIZE];
+	char version[ACC_STRING_SIZE];
+	char uri[ACC_STRING_SIZE];
+	char serial[ACC_STRING_SIZE];
+
+	/* for acc_complete_set_string */
+	int string_index;
+
+	/* set to 1 if we have a pending start request */
+	int start_requested;
+
+	int audio_mode;
+
+	/* synchronize access to our device file */
+	atomic_t open_excl;
+
+	struct list_head tx_idle;
+
+	wait_queue_head_t read_wq;
+	wait_queue_head_t write_wq;
+	struct usb_request *rx_req[RX_REQ_MAX];
+	int rx_done;
+
+	/* delayed work for handling ACCESSORY_START */
+	struct delayed_work start_work;
+
+	/* worker for registering and unregistering hid devices */
+	struct work_struct hid_work;
+
+	/* list of active HID devices */
+	struct list_head	hid_list;
+
+	/* list of new HID devices to register */
+	struct list_head	new_hid_list;
+
+	/* list of dead HID devices to unregister */
+	struct list_head	dead_hid_list;
+};
+
+static struct usb_interface_descriptor acc_interface_desc = {
+	.bLength                = USB_DT_INTERFACE_SIZE,
+	.bDescriptorType        = USB_DT_INTERFACE,
+	.bInterfaceNumber       = 0,
+	.bNumEndpoints          = 2,
+	.bInterfaceClass        = USB_CLASS_VENDOR_SPEC,
+	.bInterfaceSubClass     = USB_SUBCLASS_VENDOR_SPEC,
+	.bInterfaceProtocol     = 0,
+};
+
+static struct usb_endpoint_descriptor acc_highspeed_in_desc = {
+	.bLength                = USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType        = USB_DT_ENDPOINT,
+	.bEndpointAddress       = USB_DIR_IN,
+	.bmAttributes           = USB_ENDPOINT_XFER_BULK,
+	.wMaxPacketSize         = __constant_cpu_to_le16(512),
+};
+
+static struct usb_endpoint_descriptor acc_highspeed_out_desc = {
+	.bLength                = USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType        = USB_DT_ENDPOINT,
+	.bEndpointAddress       = USB_DIR_OUT,
+	.bmAttributes           = USB_ENDPOINT_XFER_BULK,
+	.wMaxPacketSize         = __constant_cpu_to_le16(512),
+};
+
+static struct usb_endpoint_descriptor acc_fullspeed_in_desc = {
+	.bLength                = USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType        = USB_DT_ENDPOINT,
+	.bEndpointAddress       = USB_DIR_IN,
+	.bmAttributes           = USB_ENDPOINT_XFER_BULK,
+};
+
+static struct usb_endpoint_descriptor acc_fullspeed_out_desc = {
+	.bLength                = USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType        = USB_DT_ENDPOINT,
+	.bEndpointAddress       = USB_DIR_OUT,
+	.bmAttributes           = USB_ENDPOINT_XFER_BULK,
+};
+
+static struct usb_descriptor_header *fs_acc_descs[] = {
+	(struct usb_descriptor_header *) &acc_interface_desc,
+	(struct usb_descriptor_header *) &acc_fullspeed_in_desc,
+	(struct usb_descriptor_header *) &acc_fullspeed_out_desc,
+	NULL,
+};
+
+static struct usb_descriptor_header *hs_acc_descs[] = {
+	(struct usb_descriptor_header *) &acc_interface_desc,
+	(struct usb_descriptor_header *) &acc_highspeed_in_desc,
+	(struct usb_descriptor_header *) &acc_highspeed_out_desc,
+	NULL,
+};
+
+static struct usb_string acc_string_defs[] = {
+	[INTERFACE_STRING_INDEX].s	= "Android Accessory Interface",
+	{  },	/* end of list */
+};
+
+static struct usb_gadget_strings acc_string_table = {
+	.language		= 0x0409,	/* en-US */
+	.strings		= acc_string_defs,
+};
+
+static struct usb_gadget_strings *acc_strings[] = {
+	&acc_string_table,
+	NULL,
+};
+
+/* temporary variable used between acc_open() and acc_gadget_bind() */
+static struct acc_dev *_acc_dev;
+
+struct acc_instance {
+	struct usb_function_instance func_inst;
+	const char *name;
+};
+
+static inline struct acc_dev *func_to_dev(struct usb_function *f)
+{
+	return container_of(f, struct acc_dev, function);
+}
+
+static struct usb_request *acc_request_new(struct usb_ep *ep, int buffer_size)
+{
+	struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL);
+
+	if (!req)
+		return NULL;
+
+	/* now allocate buffers for the requests */
+	req->buf = kmalloc(buffer_size, GFP_KERNEL);
+	if (!req->buf) {
+		usb_ep_free_request(ep, req);
+		return NULL;
+	}
+
+	return req;
+}
+
+static void acc_request_free(struct usb_request *req, struct usb_ep *ep)
+{
+	if (req) {
+		kfree(req->buf);
+		usb_ep_free_request(ep, req);
+	}
+}
+
+/* add a request to the tail of a list */
+static void req_put(struct acc_dev *dev, struct list_head *head,
+		struct usb_request *req)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	list_add_tail(&req->list, head);
+	spin_unlock_irqrestore(&dev->lock, flags);
+}
+
+/* remove a request from the head of a list */
+static struct usb_request *req_get(struct acc_dev *dev, struct list_head *head)
+{
+	unsigned long flags;
+	struct usb_request *req;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	if (list_empty(head)) {
+		req = 0;
+	} else {
+		req = list_first_entry(head, struct usb_request, list);
+		list_del(&req->list);
+	}
+	spin_unlock_irqrestore(&dev->lock, flags);
+	return req;
+}
+
+static void acc_set_disconnected(struct acc_dev *dev)
+{
+	dev->disconnected = 1;
+}
+
+static void acc_complete_in(struct usb_ep *ep, struct usb_request *req)
+{
+	struct acc_dev *dev = _acc_dev;
+
+	if (req->status == -ESHUTDOWN) {
+		pr_debug("acc_complete_in set disconnected");
+		acc_set_disconnected(dev);
+	}
+
+	req_put(dev, &dev->tx_idle, req);
+
+	wake_up(&dev->write_wq);
+}
+
+static void acc_complete_out(struct usb_ep *ep, struct usb_request *req)
+{
+	struct acc_dev *dev = _acc_dev;
+
+	dev->rx_done = 1;
+	if (req->status == -ESHUTDOWN) {
+		pr_debug("acc_complete_out set disconnected");
+		acc_set_disconnected(dev);
+	}
+
+	wake_up(&dev->read_wq);
+}
+
+static void acc_complete_set_string(struct usb_ep *ep, struct usb_request *req)
+{
+	struct acc_dev	*dev = ep->driver_data;
+	char *string_dest = NULL;
+	int length = req->actual;
+
+	if (req->status != 0) {
+		pr_err("acc_complete_set_string, err %d\n", req->status);
+		return;
+	}
+
+	switch (dev->string_index) {
+	case ACCESSORY_STRING_MANUFACTURER:
+		string_dest = dev->manufacturer;
+		break;
+	case ACCESSORY_STRING_MODEL:
+		string_dest = dev->model;
+		break;
+	case ACCESSORY_STRING_DESCRIPTION:
+		string_dest = dev->description;
+		break;
+	case ACCESSORY_STRING_VERSION:
+		string_dest = dev->version;
+		break;
+	case ACCESSORY_STRING_URI:
+		string_dest = dev->uri;
+		break;
+	case ACCESSORY_STRING_SERIAL:
+		string_dest = dev->serial;
+		break;
+	}
+	if (string_dest) {
+		unsigned long flags;
+
+		if (length >= ACC_STRING_SIZE)
+			length = ACC_STRING_SIZE - 1;
+
+		spin_lock_irqsave(&dev->lock, flags);
+		memcpy(string_dest, req->buf, length);
+		/* ensure zero termination */
+		string_dest[length] = 0;
+		spin_unlock_irqrestore(&dev->lock, flags);
+	} else {
+		pr_err("unknown accessory string index %d\n",
+			dev->string_index);
+	}
+}
+
+static void acc_complete_set_hid_report_desc(struct usb_ep *ep,
+		struct usb_request *req)
+{
+	struct acc_hid_dev *hid = req->context;
+	struct acc_dev *dev = hid->dev;
+	int length = req->actual;
+
+	if (req->status != 0) {
+		pr_err("acc_complete_set_hid_report_desc, err %d\n",
+			req->status);
+		return;
+	}
+
+	memcpy(hid->report_desc + hid->report_desc_offset, req->buf, length);
+	hid->report_desc_offset += length;
+	if (hid->report_desc_offset == hid->report_desc_len) {
+		/* After we have received the entire report descriptor
+		 * we schedule work to initialize the HID device
+		 */
+		schedule_work(&dev->hid_work);
+	}
+}
+
+static void acc_complete_send_hid_event(struct usb_ep *ep,
+		struct usb_request *req)
+{
+	struct acc_hid_dev *hid = req->context;
+	int length = req->actual;
+
+	if (req->status != 0) {
+		pr_err("acc_complete_send_hid_event, err %d\n", req->status);
+		return;
+	}
+
+	hid_report_raw_event(hid->hid, HID_INPUT_REPORT, req->buf, length, 1);
+}
+
+static int acc_hid_parse(struct hid_device *hid)
+{
+	struct acc_hid_dev *hdev = hid->driver_data;
+
+	hid_parse_report(hid, hdev->report_desc, hdev->report_desc_len);
+	return 0;
+}
+
+static int acc_hid_start(struct hid_device *hid)
+{
+	return 0;
+}
+
+static void acc_hid_stop(struct hid_device *hid)
+{
+}
+
+static int acc_hid_open(struct hid_device *hid)
+{
+	return 0;
+}
+
+static void acc_hid_close(struct hid_device *hid)
+{
+}
+
+static int acc_hid_raw_request(struct hid_device *hid, unsigned char reportnum,
+	__u8 *buf, size_t len, unsigned char rtype, int reqtype)
+{
+	return 0;
+}
+
+static struct hid_ll_driver acc_hid_ll_driver = {
+	.parse = acc_hid_parse,
+	.start = acc_hid_start,
+	.stop = acc_hid_stop,
+	.open = acc_hid_open,
+	.close = acc_hid_close,
+	.raw_request = acc_hid_raw_request,
+};
+
+static struct acc_hid_dev *acc_hid_new(struct acc_dev *dev,
+		int id, int desc_len)
+{
+	struct acc_hid_dev *hdev;
+
+	hdev = kzalloc(sizeof(*hdev), GFP_ATOMIC);
+	if (!hdev)
+		return NULL;
+	hdev->report_desc = kzalloc(desc_len, GFP_ATOMIC);
+	if (!hdev->report_desc) {
+		kfree(hdev);
+		return NULL;
+	}
+	hdev->dev = dev;
+	hdev->id = id;
+	hdev->report_desc_len = desc_len;
+
+	return hdev;
+}
+
+static struct acc_hid_dev *acc_hid_get(struct list_head *list, int id)
+{
+	struct acc_hid_dev *hid;
+
+	list_for_each_entry(hid, list, list) {
+		if (hid->id == id)
+			return hid;
+	}
+	return NULL;
+}
+
+static int acc_register_hid(struct acc_dev *dev, int id, int desc_length)
+{
+	struct acc_hid_dev *hid;
+	unsigned long flags;
+
+	/* report descriptor length must be > 0 */
+	if (desc_length <= 0)
+		return -EINVAL;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	/* replace HID if one already exists with this ID */
+	hid = acc_hid_get(&dev->hid_list, id);
+	if (!hid)
+		hid = acc_hid_get(&dev->new_hid_list, id);
+	if (hid)
+		list_move(&hid->list, &dev->dead_hid_list);
+
+	hid = acc_hid_new(dev, id, desc_length);
+	if (!hid) {
+		spin_unlock_irqrestore(&dev->lock, flags);
+		return -ENOMEM;
+	}
+
+	list_add(&hid->list, &dev->new_hid_list);
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	/* schedule work to register the HID device */
+	schedule_work(&dev->hid_work);
+	return 0;
+}
+
+static int acc_unregister_hid(struct acc_dev *dev, int id)
+{
+	struct acc_hid_dev *hid;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	hid = acc_hid_get(&dev->hid_list, id);
+	if (!hid)
+		hid = acc_hid_get(&dev->new_hid_list, id);
+	if (!hid) {
+		spin_unlock_irqrestore(&dev->lock, flags);
+		return -EINVAL;
+	}
+
+	list_move(&hid->list, &dev->dead_hid_list);
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	schedule_work(&dev->hid_work);
+	return 0;
+}
+
+static int create_bulk_endpoints(struct acc_dev *dev,
+				struct usb_endpoint_descriptor *in_desc,
+				struct usb_endpoint_descriptor *out_desc)
+{
+	struct usb_composite_dev *cdev = dev->cdev;
+	struct usb_request *req;
+	struct usb_ep *ep;
+	int i;
+
+	DBG(cdev, "create_bulk_endpoints dev: %p\n", dev);
+
+	ep = usb_ep_autoconfig(cdev->gadget, in_desc);
+	if (!ep) {
+		DBG(cdev, "usb_ep_autoconfig for ep_in failed\n");
+		return -ENODEV;
+	}
+	DBG(cdev, "usb_ep_autoconfig for ep_in got %s\n", ep->name);
+	ep->driver_data = dev;		/* claim the endpoint */
+	dev->ep_in = ep;
+
+	ep = usb_ep_autoconfig(cdev->gadget, out_desc);
+	if (!ep) {
+		DBG(cdev, "usb_ep_autoconfig for ep_out failed\n");
+		return -ENODEV;
+	}
+	DBG(cdev, "usb_ep_autoconfig for ep_out got %s\n", ep->name);
+	ep->driver_data = dev;		/* claim the endpoint */
+	dev->ep_out = ep;
+
+	/* now allocate requests for our endpoints */
+	for (i = 0; i < TX_REQ_MAX; i++) {
+		req = acc_request_new(dev->ep_in, BULK_BUFFER_SIZE);
+		if (!req)
+			goto fail;
+		req->complete = acc_complete_in;
+		req_put(dev, &dev->tx_idle, req);
+	}
+	for (i = 0; i < RX_REQ_MAX; i++) {
+		req = acc_request_new(dev->ep_out, BULK_BUFFER_SIZE);
+		if (!req)
+			goto fail;
+		req->complete = acc_complete_out;
+		dev->rx_req[i] = req;
+	}
+
+	return 0;
+
+fail:
+	pr_err("acc_bind() could not allocate requests\n");
+	while ((req = req_get(dev, &dev->tx_idle)))
+		acc_request_free(req, dev->ep_in);
+	for (i = 0; i < RX_REQ_MAX; i++)
+		acc_request_free(dev->rx_req[i], dev->ep_out);
+	return -1;
+}
+
+static ssize_t acc_read(struct file *fp, char __user *buf,
+	size_t count, loff_t *pos)
+{
+	struct acc_dev *dev = fp->private_data;
+	struct usb_request *req;
+	ssize_t r = count;
+	unsigned xfer;
+	int ret = 0;
+
+	pr_debug("acc_read(%zu)\n", count);
+
+	if (dev->disconnected) {
+		pr_debug("acc_read disconnected");
+		return -ENODEV;
+	}
+
+	if (count > BULK_BUFFER_SIZE)
+		count = BULK_BUFFER_SIZE;
+
+	/* we will block until we're online */
+	pr_debug("acc_read: waiting for online\n");
+	ret = wait_event_interruptible(dev->read_wq, dev->online);
+	if (ret < 0) {
+		r = ret;
+		goto done;
+	}
+
+	if (dev->rx_done) {
+		// last req cancelled. try to get it.
+		req = dev->rx_req[0];
+		goto copy_data;
+	}
+
+requeue_req:
+	/* queue a request */
+	req = dev->rx_req[0];
+	req->length = count;
+	dev->rx_done = 0;
+	ret = usb_ep_queue(dev->ep_out, req, GFP_KERNEL);
+	if (ret < 0) {
+		r = -EIO;
+		goto done;
+	} else {
+		pr_debug("rx %p queue\n", req);
+	}
+
+	/* wait for a request to complete */
+	ret = wait_event_interruptible(dev->read_wq, dev->rx_done);
+	if (ret < 0) {
+		r = ret;
+		ret = usb_ep_dequeue(dev->ep_out, req);
+		if (ret != 0) {
+			// cancel failed. There can be a data already received.
+			// it will be retrieved in the next read.
+			pr_debug("acc_read: cancelling failed %d", ret);
+		}
+		goto done;
+	}
+
+copy_data:
+	dev->rx_done = 0;
+	if (dev->online) {
+		/* If we got a 0-len packet, throw it back and try again. */
+		if (req->actual == 0)
+			goto requeue_req;
+
+		pr_debug("rx %p %u\n", req, req->actual);
+		xfer = (req->actual < count) ? req->actual : count;
+		r = xfer;
+		if (copy_to_user(buf, req->buf, xfer))
+			r = -EFAULT;
+	} else
+		r = -EIO;
+
+done:
+	pr_debug("acc_read returning %zd\n", r);
+	return r;
+}
+
+static ssize_t acc_write(struct file *fp, const char __user *buf,
+	size_t count, loff_t *pos)
+{
+	struct acc_dev *dev = fp->private_data;
+	struct usb_request *req = 0;
+	ssize_t r = count;
+	unsigned xfer;
+	int ret;
+
+	pr_debug("acc_write(%zu)\n", count);
+
+	if (!dev->online || dev->disconnected) {
+		pr_debug("acc_write disconnected or not online");
+		return -ENODEV;
+	}
+
+	while (count > 0) {
+		if (!dev->online) {
+			pr_debug("acc_write dev->error\n");
+			r = -EIO;
+			break;
+		}
+
+		/* get an idle tx request to use */
+		req = 0;
+		ret = wait_event_interruptible(dev->write_wq,
+			((req = req_get(dev, &dev->tx_idle)) || !dev->online));
+		if (!req) {
+			r = ret;
+			break;
+		}
+
+		if (count > BULK_BUFFER_SIZE) {
+			xfer = BULK_BUFFER_SIZE;
+			/* ZLP, They will be more TX requests so not yet. */
+			req->zero = 0;
+		} else {
+			xfer = count;
+			/* If the data length is a multple of the
+			 * maxpacket size then send a zero length packet(ZLP).
+			*/
+			req->zero = ((xfer % dev->ep_in->maxpacket) == 0);
+		}
+		if (copy_from_user(req->buf, buf, xfer)) {
+			r = -EFAULT;
+			break;
+		}
+
+		req->length = xfer;
+		ret = usb_ep_queue(dev->ep_in, req, GFP_KERNEL);
+		if (ret < 0) {
+			pr_debug("acc_write: xfer error %d\n", ret);
+			r = -EIO;
+			break;
+		}
+
+		buf += xfer;
+		count -= xfer;
+
+		/* zero this so we don't try to free it on error exit */
+		req = 0;
+	}
+
+	if (req)
+		req_put(dev, &dev->tx_idle, req);
+
+	pr_debug("acc_write returning %zd\n", r);
+	return r;
+}
+
+static long acc_ioctl(struct file *fp, unsigned code, unsigned long value)
+{
+	struct acc_dev *dev = fp->private_data;
+	char *src = NULL;
+	int ret;
+
+	switch (code) {
+	case ACCESSORY_GET_STRING_MANUFACTURER:
+		src = dev->manufacturer;
+		break;
+	case ACCESSORY_GET_STRING_MODEL:
+		src = dev->model;
+		break;
+	case ACCESSORY_GET_STRING_DESCRIPTION:
+		src = dev->description;
+		break;
+	case ACCESSORY_GET_STRING_VERSION:
+		src = dev->version;
+		break;
+	case ACCESSORY_GET_STRING_URI:
+		src = dev->uri;
+		break;
+	case ACCESSORY_GET_STRING_SERIAL:
+		src = dev->serial;
+		break;
+	case ACCESSORY_IS_START_REQUESTED:
+		return dev->start_requested;
+	case ACCESSORY_GET_AUDIO_MODE:
+		return dev->audio_mode;
+	}
+	if (!src)
+		return -EINVAL;
+
+	ret = strlen(src) + 1;
+	if (copy_to_user((void __user *)value, src, ret))
+		ret = -EFAULT;
+	return ret;
+}
+
+static int acc_open(struct inode *ip, struct file *fp)
+{
+	printk(KERN_INFO "acc_open\n");
+	if (atomic_xchg(&_acc_dev->open_excl, 1))
+		return -EBUSY;
+
+	_acc_dev->disconnected = 0;
+	fp->private_data = _acc_dev;
+	return 0;
+}
+
+static int acc_release(struct inode *ip, struct file *fp)
+{
+	printk(KERN_INFO "acc_release\n");
+
+	WARN_ON(!atomic_xchg(&_acc_dev->open_excl, 0));
+	/* indicate that we are disconnected
+	 * still could be online so don't touch online flag
+	 */
+	_acc_dev->disconnected = 1;
+	return 0;
+}
+
+/* file operations for /dev/usb_accessory */
+static const struct file_operations acc_fops = {
+	.owner = THIS_MODULE,
+	.read = acc_read,
+	.write = acc_write,
+	.unlocked_ioctl = acc_ioctl,
+	.open = acc_open,
+	.release = acc_release,
+};
+
+static int acc_hid_probe(struct hid_device *hdev,
+		const struct hid_device_id *id)
+{
+	int ret;
+
+	ret = hid_parse(hdev);
+	if (ret)
+		return ret;
+	return hid_hw_start(hdev, HID_CONNECT_DEFAULT);
+}
+
+static struct miscdevice acc_device = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "usb_accessory",
+	.fops = &acc_fops,
+};
+
+static const struct hid_device_id acc_hid_table[] = {
+	{ HID_USB_DEVICE(HID_ANY_ID, HID_ANY_ID) },
+	{ }
+};
+
+static struct hid_driver acc_hid_driver = {
+	.name = "USB accessory",
+	.id_table = acc_hid_table,
+	.probe = acc_hid_probe,
+};
+
+static void acc_complete_setup_noop(struct usb_ep *ep, struct usb_request *req)
+{
+	/*
+	 * Default no-op function when nothing needs to be done for the
+	 * setup request
+	 */
+}
+
+int acc_ctrlrequest(struct usb_composite_dev *cdev,
+				const struct usb_ctrlrequest *ctrl)
+{
+	struct acc_dev	*dev = _acc_dev;
+	int	value = -EOPNOTSUPP;
+	struct acc_hid_dev *hid;
+	int offset;
+	u8 b_requestType = ctrl->bRequestType;
+	u8 b_request = ctrl->bRequest;
+	u16	w_index = le16_to_cpu(ctrl->wIndex);
+	u16	w_value = le16_to_cpu(ctrl->wValue);
+	u16	w_length = le16_to_cpu(ctrl->wLength);
+	unsigned long flags;
+
+/*
+	printk(KERN_INFO "acc_ctrlrequest "
+			"%02x.%02x v%04x i%04x l%u\n",
+			b_requestType, b_request,
+			w_value, w_index, w_length);
+*/
+
+	if (b_requestType == (USB_DIR_OUT | USB_TYPE_VENDOR)) {
+		if (b_request == ACCESSORY_START) {
+			dev->start_requested = 1;
+			schedule_delayed_work(
+				&dev->start_work, msecs_to_jiffies(10));
+			value = 0;
+			cdev->req->complete = acc_complete_setup_noop;
+		} else if (b_request == ACCESSORY_SEND_STRING) {
+			dev->string_index = w_index;
+			cdev->gadget->ep0->driver_data = dev;
+			cdev->req->complete = acc_complete_set_string;
+			value = w_length;
+		} else if (b_request == ACCESSORY_SET_AUDIO_MODE &&
+				w_index == 0 && w_length == 0) {
+			dev->audio_mode = w_value;
+			cdev->req->complete = acc_complete_setup_noop;
+			value = 0;
+		} else if (b_request == ACCESSORY_REGISTER_HID) {
+			cdev->req->complete = acc_complete_setup_noop;
+			value = acc_register_hid(dev, w_value, w_index);
+		} else if (b_request == ACCESSORY_UNREGISTER_HID) {
+			cdev->req->complete = acc_complete_setup_noop;
+			value = acc_unregister_hid(dev, w_value);
+		} else if (b_request == ACCESSORY_SET_HID_REPORT_DESC) {
+			spin_lock_irqsave(&dev->lock, flags);
+			hid = acc_hid_get(&dev->new_hid_list, w_value);
+			spin_unlock_irqrestore(&dev->lock, flags);
+			if (!hid) {
+				value = -EINVAL;
+				goto err;
+			}
+			offset = w_index;
+			if (offset != hid->report_desc_offset
+				|| offset + w_length > hid->report_desc_len) {
+				value = -EINVAL;
+				goto err;
+			}
+			cdev->req->context = hid;
+			cdev->req->complete = acc_complete_set_hid_report_desc;
+			value = w_length;
+		} else if (b_request == ACCESSORY_SEND_HID_EVENT) {
+			spin_lock_irqsave(&dev->lock, flags);
+			hid = acc_hid_get(&dev->hid_list, w_value);
+			spin_unlock_irqrestore(&dev->lock, flags);
+			if (!hid) {
+				value = -EINVAL;
+				goto err;
+			}
+			cdev->req->context = hid;
+			cdev->req->complete = acc_complete_send_hid_event;
+			value = w_length;
+		}
+	} else if (b_requestType == (USB_DIR_IN | USB_TYPE_VENDOR)) {
+		if (b_request == ACCESSORY_GET_PROTOCOL) {
+			*((u16 *)cdev->req->buf) = PROTOCOL_VERSION;
+			value = sizeof(u16);
+			cdev->req->complete = acc_complete_setup_noop;
+			/* clear any string left over from a previous session */
+			memset(dev->manufacturer, 0, sizeof(dev->manufacturer));
+			memset(dev->model, 0, sizeof(dev->model));
+			memset(dev->description, 0, sizeof(dev->description));
+			memset(dev->version, 0, sizeof(dev->version));
+			memset(dev->uri, 0, sizeof(dev->uri));
+			memset(dev->serial, 0, sizeof(dev->serial));
+			dev->start_requested = 0;
+			dev->audio_mode = 0;
+		}
+	}
+
+	if (value >= 0) {
+		cdev->req->zero = 0;
+		cdev->req->length = value;
+		value = usb_ep_queue(cdev->gadget->ep0, cdev->req, GFP_ATOMIC);
+		if (value < 0)
+			ERROR(cdev, "%s setup response queue error\n",
+				__func__);
+	}
+
+err:
+	if (value == -EOPNOTSUPP)
+		VDBG(cdev,
+			"unknown class-specific control req "
+			"%02x.%02x v%04x i%04x l%u\n",
+			ctrl->bRequestType, ctrl->bRequest,
+			w_value, w_index, w_length);
+	return value;
+}
+EXPORT_SYMBOL_GPL(acc_ctrlrequest);
+
+static int
+__acc_function_bind(struct usb_configuration *c,
+			struct usb_function *f, bool configfs)
+{
+	struct usb_composite_dev *cdev = c->cdev;
+	struct acc_dev	*dev = func_to_dev(f);
+	int			id;
+	int			ret;
+
+	DBG(cdev, "acc_function_bind dev: %p\n", dev);
+
+	if (configfs) {
+		if (acc_string_defs[INTERFACE_STRING_INDEX].id == 0) {
+			ret = usb_string_id(c->cdev);
+			if (ret < 0)
+				return ret;
+			acc_string_defs[INTERFACE_STRING_INDEX].id = ret;
+			acc_interface_desc.iInterface = ret;
+		}
+		dev->cdev = c->cdev;
+	}
+	ret = hid_register_driver(&acc_hid_driver);
+	if (ret)
+		return ret;
+
+	dev->start_requested = 0;
+
+	/* allocate interface ID(s) */
+	id = usb_interface_id(c, f);
+	if (id < 0)
+		return id;
+	acc_interface_desc.bInterfaceNumber = id;
+
+	/* allocate endpoints */
+	ret = create_bulk_endpoints(dev, &acc_fullspeed_in_desc,
+			&acc_fullspeed_out_desc);
+	if (ret)
+		return ret;
+
+	/* support high speed hardware */
+	if (gadget_is_dualspeed(c->cdev->gadget)) {
+		acc_highspeed_in_desc.bEndpointAddress =
+			acc_fullspeed_in_desc.bEndpointAddress;
+		acc_highspeed_out_desc.bEndpointAddress =
+			acc_fullspeed_out_desc.bEndpointAddress;
+	}
+
+	DBG(cdev, "%s speed %s: IN/%s, OUT/%s\n",
+			gadget_is_dualspeed(c->cdev->gadget) ? "dual" : "full",
+			f->name, dev->ep_in->name, dev->ep_out->name);
+	return 0;
+}
+
+static int
+acc_function_bind_configfs(struct usb_configuration *c,
+			struct usb_function *f) {
+	return __acc_function_bind(c, f, true);
+}
+
+static void
+kill_all_hid_devices(struct acc_dev *dev)
+{
+	struct acc_hid_dev *hid;
+	struct list_head *entry, *temp;
+	unsigned long flags;
+
+	/* do nothing if usb accessory device doesn't exist */
+	if (!dev)
+		return;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	list_for_each_safe(entry, temp, &dev->hid_list) {
+		hid = list_entry(entry, struct acc_hid_dev, list);
+		list_del(&hid->list);
+		list_add(&hid->list, &dev->dead_hid_list);
+	}
+	list_for_each_safe(entry, temp, &dev->new_hid_list) {
+		hid = list_entry(entry, struct acc_hid_dev, list);
+		list_del(&hid->list);
+		list_add(&hid->list, &dev->dead_hid_list);
+	}
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	schedule_work(&dev->hid_work);
+}
+
+static void
+acc_hid_unbind(struct acc_dev *dev)
+{
+	hid_unregister_driver(&acc_hid_driver);
+	kill_all_hid_devices(dev);
+}
+
+static void
+acc_function_unbind(struct usb_configuration *c, struct usb_function *f)
+{
+	struct acc_dev	*dev = func_to_dev(f);
+	struct usb_request *req;
+	int i;
+
+	dev->online = 0;		/* clear online flag */
+	wake_up(&dev->read_wq);		/* unblock reads on closure */
+	wake_up(&dev->write_wq);	/* likewise for writes */
+
+	while ((req = req_get(dev, &dev->tx_idle)))
+		acc_request_free(req, dev->ep_in);
+	for (i = 0; i < RX_REQ_MAX; i++)
+		acc_request_free(dev->rx_req[i], dev->ep_out);
+
+	acc_hid_unbind(dev);
+}
+
+static void acc_start_work(struct work_struct *data)
+{
+	char *envp[2] = { "ACCESSORY=START", NULL };
+
+	kobject_uevent_env(&acc_device.this_device->kobj, KOBJ_CHANGE, envp);
+}
+
+static int acc_hid_init(struct acc_hid_dev *hdev)
+{
+	struct hid_device *hid;
+	int ret;
+
+	hid = hid_allocate_device();
+	if (IS_ERR(hid))
+		return PTR_ERR(hid);
+
+	hid->ll_driver = &acc_hid_ll_driver;
+	hid->dev.parent = acc_device.this_device;
+
+	hid->bus = BUS_USB;
+	hid->vendor = HID_ANY_ID;
+	hid->product = HID_ANY_ID;
+	hid->driver_data = hdev;
+	ret = hid_add_device(hid);
+	if (ret) {
+		pr_err("can't add hid device: %d\n", ret);
+		hid_destroy_device(hid);
+		return ret;
+	}
+
+	hdev->hid = hid;
+	return 0;
+}
+
+static void acc_hid_delete(struct acc_hid_dev *hid)
+{
+	kfree(hid->report_desc);
+	kfree(hid);
+}
+
+static void acc_hid_work(struct work_struct *data)
+{
+	struct acc_dev *dev = _acc_dev;
+	struct list_head	*entry, *temp;
+	struct acc_hid_dev *hid;
+	struct list_head	new_list, dead_list;
+	unsigned long flags;
+
+	INIT_LIST_HEAD(&new_list);
+
+	spin_lock_irqsave(&dev->lock, flags);
+
+	/* copy hids that are ready for initialization to new_list */
+	list_for_each_safe(entry, temp, &dev->new_hid_list) {
+		hid = list_entry(entry, struct acc_hid_dev, list);
+		if (hid->report_desc_offset == hid->report_desc_len)
+			list_move(&hid->list, &new_list);
+	}
+
+	if (list_empty(&dev->dead_hid_list)) {
+		INIT_LIST_HEAD(&dead_list);
+	} else {
+		/* move all of dev->dead_hid_list to dead_list */
+		dead_list.prev = dev->dead_hid_list.prev;
+		dead_list.next = dev->dead_hid_list.next;
+		dead_list.next->prev = &dead_list;
+		dead_list.prev->next = &dead_list;
+		INIT_LIST_HEAD(&dev->dead_hid_list);
+	}
+
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	/* register new HID devices */
+	list_for_each_safe(entry, temp, &new_list) {
+		hid = list_entry(entry, struct acc_hid_dev, list);
+		if (acc_hid_init(hid)) {
+			pr_err("can't add HID device %p\n", hid);
+			acc_hid_delete(hid);
+		} else {
+			spin_lock_irqsave(&dev->lock, flags);
+			list_move(&hid->list, &dev->hid_list);
+			spin_unlock_irqrestore(&dev->lock, flags);
+		}
+	}
+
+	/* remove dead HID devices */
+	list_for_each_safe(entry, temp, &dead_list) {
+		hid = list_entry(entry, struct acc_hid_dev, list);
+		list_del(&hid->list);
+		if (hid->hid)
+			hid_destroy_device(hid->hid);
+		acc_hid_delete(hid);
+	}
+}
+
+static int acc_function_set_alt(struct usb_function *f,
+		unsigned intf, unsigned alt)
+{
+	struct acc_dev	*dev = func_to_dev(f);
+	struct usb_composite_dev *cdev = f->config->cdev;
+	int ret;
+
+	DBG(cdev, "acc_function_set_alt intf: %d alt: %d\n", intf, alt);
+
+	ret = config_ep_by_speed(cdev->gadget, f, dev->ep_in);
+	if (ret)
+		return ret;
+
+	ret = usb_ep_enable(dev->ep_in);
+	if (ret)
+		return ret;
+
+	ret = config_ep_by_speed(cdev->gadget, f, dev->ep_out);
+	if (ret)
+		return ret;
+
+	ret = usb_ep_enable(dev->ep_out);
+	if (ret) {
+		usb_ep_disable(dev->ep_in);
+		return ret;
+	}
+
+	dev->online = 1;
+	dev->disconnected = 0; /* if online then not disconnected */
+
+	/* readers may be blocked waiting for us to go online */
+	wake_up(&dev->read_wq);
+	return 0;
+}
+
+static void acc_function_disable(struct usb_function *f)
+{
+	struct acc_dev	*dev = func_to_dev(f);
+	struct usb_composite_dev	*cdev = dev->cdev;
+
+	DBG(cdev, "acc_function_disable\n");
+	acc_set_disconnected(dev); /* this now only sets disconnected */
+	dev->online = 0; /* so now need to clear online flag here too */
+	usb_ep_disable(dev->ep_in);
+	usb_ep_disable(dev->ep_out);
+
+	/* readers may be blocked waiting for us to go online */
+	wake_up(&dev->read_wq);
+
+	VDBG(cdev, "%s disabled\n", dev->function.name);
+}
+
+static int acc_setup(void)
+{
+	struct acc_dev *dev;
+	int ret;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+
+	spin_lock_init(&dev->lock);
+	init_waitqueue_head(&dev->read_wq);
+	init_waitqueue_head(&dev->write_wq);
+	atomic_set(&dev->open_excl, 0);
+	INIT_LIST_HEAD(&dev->tx_idle);
+	INIT_LIST_HEAD(&dev->hid_list);
+	INIT_LIST_HEAD(&dev->new_hid_list);
+	INIT_LIST_HEAD(&dev->dead_hid_list);
+	INIT_DELAYED_WORK(&dev->start_work, acc_start_work);
+	INIT_WORK(&dev->hid_work, acc_hid_work);
+
+	/* _acc_dev must be set before calling usb_gadget_register_driver */
+	_acc_dev = dev;
+
+	ret = misc_register(&acc_device);
+	if (ret)
+		goto err;
+
+	return 0;
+
+err:
+	kfree(dev);
+	pr_err("USB accessory gadget driver failed to initialize\n");
+	return ret;
+}
+
+void acc_disconnect(void)
+{
+	/* unregister all HID devices if USB is disconnected */
+	kill_all_hid_devices(_acc_dev);
+}
+EXPORT_SYMBOL_GPL(acc_disconnect);
+
+static void acc_cleanup(void)
+{
+	misc_deregister(&acc_device);
+	kfree(_acc_dev);
+	_acc_dev = NULL;
+}
+static struct acc_instance *to_acc_instance(struct config_item *item)
+{
+	return container_of(to_config_group(item), struct acc_instance,
+		func_inst.group);
+}
+
+static void acc_attr_release(struct config_item *item)
+{
+	struct acc_instance *fi_acc = to_acc_instance(item);
+
+	usb_put_function_instance(&fi_acc->func_inst);
+}
+
+static struct configfs_item_operations acc_item_ops = {
+	.release        = acc_attr_release,
+};
+
+static struct config_item_type acc_func_type = {
+	.ct_item_ops    = &acc_item_ops,
+	.ct_owner       = THIS_MODULE,
+};
+
+static struct acc_instance *to_fi_acc(struct usb_function_instance *fi)
+{
+	return container_of(fi, struct acc_instance, func_inst);
+}
+
+static int acc_set_inst_name(struct usb_function_instance *fi, const char *name)
+{
+	struct acc_instance *fi_acc;
+	char *ptr;
+	int name_len;
+
+	name_len = strlen(name) + 1;
+	if (name_len > MAX_INST_NAME_LEN)
+		return -ENAMETOOLONG;
+
+	ptr = kstrndup(name, name_len, GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	fi_acc = to_fi_acc(fi);
+	fi_acc->name = ptr;
+	return 0;
+}
+
+static void acc_free_inst(struct usb_function_instance *fi)
+{
+	struct acc_instance *fi_acc;
+
+	fi_acc = to_fi_acc(fi);
+	kfree(fi_acc->name);
+	acc_cleanup();
+}
+
+static struct usb_function_instance *acc_alloc_inst(void)
+{
+	struct acc_instance *fi_acc;
+	struct acc_dev *dev;
+	int err;
+
+	fi_acc = kzalloc(sizeof(*fi_acc), GFP_KERNEL);
+	if (!fi_acc)
+		return ERR_PTR(-ENOMEM);
+	fi_acc->func_inst.set_inst_name = acc_set_inst_name;
+	fi_acc->func_inst.free_func_inst = acc_free_inst;
+
+	err = acc_setup();
+	if (err) {
+		kfree(fi_acc);
+		pr_err("Error setting ACCESSORY\n");
+		return ERR_PTR(err);
+	}
+
+	config_group_init_type_name(&fi_acc->func_inst.group,
+					"", &acc_func_type);
+	dev = _acc_dev;
+	return  &fi_acc->func_inst;
+}
+
+static void acc_free(struct usb_function *f)
+{
+/*NO-OP: no function specific resource allocation in mtp_alloc*/
+}
+
+int acc_ctrlrequest_configfs(struct usb_function *f,
+			const struct usb_ctrlrequest *ctrl) {
+	if (f->config != NULL && f->config->cdev != NULL)
+		return acc_ctrlrequest(f->config->cdev, ctrl);
+	else
+		return -1;
+}
+
+static struct usb_function *acc_alloc(struct usb_function_instance *fi)
+{
+	struct acc_dev *dev = _acc_dev;
+
+	pr_info("acc_alloc\n");
+
+	dev->function.name = "accessory";
+	dev->function.strings = acc_strings,
+	dev->function.fs_descriptors = fs_acc_descs;
+	dev->function.hs_descriptors = hs_acc_descs;
+	dev->function.bind = acc_function_bind_configfs;
+	dev->function.unbind = acc_function_unbind;
+	dev->function.set_alt = acc_function_set_alt;
+	dev->function.disable = acc_function_disable;
+	dev->function.free_func = acc_free;
+	dev->function.setup = acc_ctrlrequest_configfs;
+
+	return &dev->function;
+}
+DECLARE_USB_FUNCTION_INIT(accessory, acc_alloc_inst, acc_alloc);
+MODULE_LICENSE("GPL");
diff --git a/drivers/usb/gadget/function/f_audio_source.c b/drivers/usb/gadget/function/f_audio_source.c
new file mode 100644
index 0000000..8124af3
--- /dev/null
+++ b/drivers/usb/gadget/function/f_audio_source.c
@@ -0,0 +1,1071 @@
+/*
+ * Gadget Function Driver for USB audio source device
+ *
+ * Copyright (C) 2012 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/usb/audio.h>
+#include <linux/wait.h>
+#include <linux/pm_qos.h>
+#include <sound/core.h>
+#include <sound/initval.h>
+#include <sound/pcm.h>
+
+#include <linux/usb.h>
+#include <linux/usb_usual.h>
+#include <linux/usb/ch9.h>
+#include <linux/configfs.h>
+#include <linux/usb/composite.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#define SAMPLE_RATE 44100
+#define FRAMES_PER_MSEC (SAMPLE_RATE / 1000)
+
+#define IN_EP_MAX_PACKET_SIZE 256
+
+/* Number of requests to allocate */
+#define IN_EP_REQ_COUNT 4
+
+#define AUDIO_AC_INTERFACE	0
+#define AUDIO_AS_INTERFACE	1
+#define AUDIO_NUM_INTERFACES	2
+#define MAX_INST_NAME_LEN     40
+
+/* B.3.1  Standard AC Interface Descriptor */
+static struct usb_interface_descriptor ac_interface_desc = {
+	.bLength =		USB_DT_INTERFACE_SIZE,
+	.bDescriptorType =	USB_DT_INTERFACE,
+	.bNumEndpoints =	0,
+	.bInterfaceClass =	USB_CLASS_AUDIO,
+	.bInterfaceSubClass =	USB_SUBCLASS_AUDIOCONTROL,
+};
+
+DECLARE_UAC_AC_HEADER_DESCRIPTOR(2);
+
+#define UAC_DT_AC_HEADER_LENGTH	UAC_DT_AC_HEADER_SIZE(AUDIO_NUM_INTERFACES)
+/* 1 input terminal, 1 output terminal and 1 feature unit */
+#define UAC_DT_TOTAL_LENGTH (UAC_DT_AC_HEADER_LENGTH \
+	+ UAC_DT_INPUT_TERMINAL_SIZE + UAC_DT_OUTPUT_TERMINAL_SIZE \
+	+ UAC_DT_FEATURE_UNIT_SIZE(0))
+/* B.3.2  Class-Specific AC Interface Descriptor */
+static struct uac1_ac_header_descriptor_2 ac_header_desc = {
+	.bLength =		UAC_DT_AC_HEADER_LENGTH,
+	.bDescriptorType =	USB_DT_CS_INTERFACE,
+	.bDescriptorSubtype =	UAC_HEADER,
+	.bcdADC =		__constant_cpu_to_le16(0x0100),
+	.wTotalLength =		__constant_cpu_to_le16(UAC_DT_TOTAL_LENGTH),
+	.bInCollection =	AUDIO_NUM_INTERFACES,
+	.baInterfaceNr = {
+		[0] =		AUDIO_AC_INTERFACE,
+		[1] =		AUDIO_AS_INTERFACE,
+	}
+};
+
+#define INPUT_TERMINAL_ID	1
+static struct uac_input_terminal_descriptor input_terminal_desc = {
+	.bLength =		UAC_DT_INPUT_TERMINAL_SIZE,
+	.bDescriptorType =	USB_DT_CS_INTERFACE,
+	.bDescriptorSubtype =	UAC_INPUT_TERMINAL,
+	.bTerminalID =		INPUT_TERMINAL_ID,
+	.wTerminalType =	UAC_INPUT_TERMINAL_MICROPHONE,
+	.bAssocTerminal =	0,
+	.wChannelConfig =	0x3,
+};
+
+DECLARE_UAC_FEATURE_UNIT_DESCRIPTOR(0);
+
+#define FEATURE_UNIT_ID		2
+static struct uac_feature_unit_descriptor_0 feature_unit_desc = {
+	.bLength		= UAC_DT_FEATURE_UNIT_SIZE(0),
+	.bDescriptorType	= USB_DT_CS_INTERFACE,
+	.bDescriptorSubtype	= UAC_FEATURE_UNIT,
+	.bUnitID		= FEATURE_UNIT_ID,
+	.bSourceID		= INPUT_TERMINAL_ID,
+	.bControlSize		= 2,
+};
+
+#define OUTPUT_TERMINAL_ID	3
+static struct uac1_output_terminal_descriptor output_terminal_desc = {
+	.bLength		= UAC_DT_OUTPUT_TERMINAL_SIZE,
+	.bDescriptorType	= USB_DT_CS_INTERFACE,
+	.bDescriptorSubtype	= UAC_OUTPUT_TERMINAL,
+	.bTerminalID		= OUTPUT_TERMINAL_ID,
+	.wTerminalType		= UAC_TERMINAL_STREAMING,
+	.bAssocTerminal		= FEATURE_UNIT_ID,
+	.bSourceID		= FEATURE_UNIT_ID,
+};
+
+/* B.4.1  Standard AS Interface Descriptor */
+static struct usb_interface_descriptor as_interface_alt_0_desc = {
+	.bLength =		USB_DT_INTERFACE_SIZE,
+	.bDescriptorType =	USB_DT_INTERFACE,
+	.bAlternateSetting =	0,
+	.bNumEndpoints =	0,
+	.bInterfaceClass =	USB_CLASS_AUDIO,
+	.bInterfaceSubClass =	USB_SUBCLASS_AUDIOSTREAMING,
+};
+
+static struct usb_interface_descriptor as_interface_alt_1_desc = {
+	.bLength =		USB_DT_INTERFACE_SIZE,
+	.bDescriptorType =	USB_DT_INTERFACE,
+	.bAlternateSetting =	1,
+	.bNumEndpoints =	1,
+	.bInterfaceClass =	USB_CLASS_AUDIO,
+	.bInterfaceSubClass =	USB_SUBCLASS_AUDIOSTREAMING,
+};
+
+/* B.4.2  Class-Specific AS Interface Descriptor */
+static struct uac1_as_header_descriptor as_header_desc = {
+	.bLength =		UAC_DT_AS_HEADER_SIZE,
+	.bDescriptorType =	USB_DT_CS_INTERFACE,
+	.bDescriptorSubtype =	UAC_AS_GENERAL,
+	.bTerminalLink =	INPUT_TERMINAL_ID,
+	.bDelay =		1,
+	.wFormatTag =		UAC_FORMAT_TYPE_I_PCM,
+};
+
+DECLARE_UAC_FORMAT_TYPE_I_DISCRETE_DESC(1);
+
+static struct uac_format_type_i_discrete_descriptor_1 as_type_i_desc = {
+	.bLength =		UAC_FORMAT_TYPE_I_DISCRETE_DESC_SIZE(1),
+	.bDescriptorType =	USB_DT_CS_INTERFACE,
+	.bDescriptorSubtype =	UAC_FORMAT_TYPE,
+	.bFormatType =		UAC_FORMAT_TYPE_I,
+	.bSubframeSize =	2,
+	.bBitResolution =	16,
+	.bSamFreqType =		1,
+};
+
+/* Standard ISO IN Endpoint Descriptor for highspeed */
+static struct usb_endpoint_descriptor hs_as_in_ep_desc  = {
+	.bLength =		USB_DT_ENDPOINT_AUDIO_SIZE,
+	.bDescriptorType =	USB_DT_ENDPOINT,
+	.bEndpointAddress =	USB_DIR_IN,
+	.bmAttributes =		USB_ENDPOINT_SYNC_SYNC
+				| USB_ENDPOINT_XFER_ISOC,
+	.wMaxPacketSize =	__constant_cpu_to_le16(IN_EP_MAX_PACKET_SIZE),
+	.bInterval =		4, /* poll 1 per millisecond */
+};
+
+/* Standard ISO IN Endpoint Descriptor for highspeed */
+static struct usb_endpoint_descriptor fs_as_in_ep_desc  = {
+	.bLength =		USB_DT_ENDPOINT_AUDIO_SIZE,
+	.bDescriptorType =	USB_DT_ENDPOINT,
+	.bEndpointAddress =	USB_DIR_IN,
+	.bmAttributes =		USB_ENDPOINT_SYNC_SYNC
+				| USB_ENDPOINT_XFER_ISOC,
+	.wMaxPacketSize =	__constant_cpu_to_le16(IN_EP_MAX_PACKET_SIZE),
+	.bInterval =		1, /* poll 1 per millisecond */
+};
+
+/* Class-specific AS ISO OUT Endpoint Descriptor */
+static struct uac_iso_endpoint_descriptor as_iso_in_desc = {
+	.bLength =		UAC_ISO_ENDPOINT_DESC_SIZE,
+	.bDescriptorType =	USB_DT_CS_ENDPOINT,
+	.bDescriptorSubtype =	UAC_EP_GENERAL,
+	.bmAttributes =		1,
+	.bLockDelayUnits =	1,
+	.wLockDelay =		__constant_cpu_to_le16(1),
+};
+
+static struct usb_descriptor_header *hs_audio_desc[] = {
+	(struct usb_descriptor_header *)&ac_interface_desc,
+	(struct usb_descriptor_header *)&ac_header_desc,
+
+	(struct usb_descriptor_header *)&input_terminal_desc,
+	(struct usb_descriptor_header *)&output_terminal_desc,
+	(struct usb_descriptor_header *)&feature_unit_desc,
+
+	(struct usb_descriptor_header *)&as_interface_alt_0_desc,
+	(struct usb_descriptor_header *)&as_interface_alt_1_desc,
+	(struct usb_descriptor_header *)&as_header_desc,
+
+	(struct usb_descriptor_header *)&as_type_i_desc,
+
+	(struct usb_descriptor_header *)&hs_as_in_ep_desc,
+	(struct usb_descriptor_header *)&as_iso_in_desc,
+	NULL,
+};
+
+static struct usb_descriptor_header *fs_audio_desc[] = {
+	(struct usb_descriptor_header *)&ac_interface_desc,
+	(struct usb_descriptor_header *)&ac_header_desc,
+
+	(struct usb_descriptor_header *)&input_terminal_desc,
+	(struct usb_descriptor_header *)&output_terminal_desc,
+	(struct usb_descriptor_header *)&feature_unit_desc,
+
+	(struct usb_descriptor_header *)&as_interface_alt_0_desc,
+	(struct usb_descriptor_header *)&as_interface_alt_1_desc,
+	(struct usb_descriptor_header *)&as_header_desc,
+
+	(struct usb_descriptor_header *)&as_type_i_desc,
+
+	(struct usb_descriptor_header *)&fs_as_in_ep_desc,
+	(struct usb_descriptor_header *)&as_iso_in_desc,
+	NULL,
+};
+
+static struct snd_pcm_hardware audio_hw_info = {
+	.info =			SNDRV_PCM_INFO_MMAP |
+				SNDRV_PCM_INFO_MMAP_VALID |
+				SNDRV_PCM_INFO_BATCH |
+				SNDRV_PCM_INFO_INTERLEAVED |
+				SNDRV_PCM_INFO_BLOCK_TRANSFER,
+
+	.formats		= SNDRV_PCM_FMTBIT_S16_LE,
+	.channels_min		= 2,
+	.channels_max		= 2,
+	.rate_min		= SAMPLE_RATE,
+	.rate_max		= SAMPLE_RATE,
+
+	.buffer_bytes_max =	1024 * 1024,
+	.period_bytes_min =	64,
+	.period_bytes_max =	512 * 1024,
+	.periods_min =		2,
+	.periods_max =		1024,
+};
+
+/*-------------------------------------------------------------------------*/
+
+struct audio_source_config {
+	int	card;
+	int	device;
+};
+
+struct audio_dev {
+	struct usb_function		func;
+	struct snd_card			*card;
+	struct snd_pcm			*pcm;
+	struct snd_pcm_substream *substream;
+
+	struct list_head		idle_reqs;
+	struct usb_ep			*in_ep;
+
+	spinlock_t			lock;
+
+	/* beginning, end and current position in our buffer */
+	void				*buffer_start;
+	void				*buffer_end;
+	void				*buffer_pos;
+
+	/* byte size of a "period" */
+	unsigned int			period;
+	/* bytes sent since last call to snd_pcm_period_elapsed */
+	unsigned int			period_offset;
+	/* time we started playing */
+	ktime_t				start_time;
+	/* number of frames sent since start_time */
+	s64				frames_sent;
+	struct audio_source_config	*config;
+	/* for creating and issuing QoS requests */
+	struct pm_qos_request pm_qos;
+};
+
+static inline struct audio_dev *func_to_audio(struct usb_function *f)
+{
+	return container_of(f, struct audio_dev, func);
+}
+
+/*-------------------------------------------------------------------------*/
+
+struct audio_source_instance {
+	struct usb_function_instance func_inst;
+	const char *name;
+	struct audio_source_config *config;
+	struct device *audio_device;
+};
+
+static void audio_source_attr_release(struct config_item *item);
+
+static struct configfs_item_operations audio_source_item_ops = {
+	.release        = audio_source_attr_release,
+};
+
+static struct config_item_type audio_source_func_type = {
+	.ct_item_ops    = &audio_source_item_ops,
+	.ct_owner       = THIS_MODULE,
+};
+
+static ssize_t audio_source_pcm_show(struct device *dev,
+		struct device_attribute *attr, char *buf);
+
+static DEVICE_ATTR(pcm, S_IRUGO, audio_source_pcm_show, NULL);
+
+static struct device_attribute *audio_source_function_attributes[] = {
+	&dev_attr_pcm,
+	NULL
+};
+
+/*--------------------------------------------------------------------------*/
+
+static struct usb_request *audio_request_new(struct usb_ep *ep, int buffer_size)
+{
+	struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL);
+
+	if (!req)
+		return NULL;
+
+	req->buf = kmalloc(buffer_size, GFP_KERNEL);
+	if (!req->buf) {
+		usb_ep_free_request(ep, req);
+		return NULL;
+	}
+	req->length = buffer_size;
+	return req;
+}
+
+static void audio_request_free(struct usb_request *req, struct usb_ep *ep)
+{
+	if (req) {
+		kfree(req->buf);
+		usb_ep_free_request(ep, req);
+	}
+}
+
+static void audio_req_put(struct audio_dev *audio, struct usb_request *req)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&audio->lock, flags);
+	list_add_tail(&req->list, &audio->idle_reqs);
+	spin_unlock_irqrestore(&audio->lock, flags);
+}
+
+static struct usb_request *audio_req_get(struct audio_dev *audio)
+{
+	unsigned long flags;
+	struct usb_request *req;
+
+	spin_lock_irqsave(&audio->lock, flags);
+	if (list_empty(&audio->idle_reqs)) {
+		req = 0;
+	} else {
+		req = list_first_entry(&audio->idle_reqs, struct usb_request,
+				list);
+		list_del(&req->list);
+	}
+	spin_unlock_irqrestore(&audio->lock, flags);
+	return req;
+}
+
+/* send the appropriate number of packets to match our bitrate */
+static void audio_send(struct audio_dev *audio)
+{
+	struct snd_pcm_runtime *runtime;
+	struct usb_request *req;
+	int length, length1, length2, ret;
+	s64 msecs;
+	s64 frames;
+	ktime_t now;
+
+	/* audio->substream will be null if we have been closed */
+	if (!audio->substream)
+		return;
+	/* audio->buffer_pos will be null if we have been stopped */
+	if (!audio->buffer_pos)
+		return;
+
+	runtime = audio->substream->runtime;
+
+	/* compute number of frames to send */
+	now = ktime_get();
+	msecs = div_s64((ktime_to_ns(now) - ktime_to_ns(audio->start_time)),
+			1000000);
+	frames = div_s64((msecs * SAMPLE_RATE), 1000);
+
+	/* Readjust our frames_sent if we fall too far behind.
+	 * If we get too far behind it is better to drop some frames than
+	 * to keep sending data too fast in an attempt to catch up.
+	 */
+	if (frames - audio->frames_sent > 10 * FRAMES_PER_MSEC)
+		audio->frames_sent = frames - FRAMES_PER_MSEC;
+
+	frames -= audio->frames_sent;
+
+	/* We need to send something to keep the pipeline going */
+	if (frames <= 0)
+		frames = FRAMES_PER_MSEC;
+
+	while (frames > 0) {
+		req = audio_req_get(audio);
+		if (!req)
+			break;
+
+		length = frames_to_bytes(runtime, frames);
+		if (length > IN_EP_MAX_PACKET_SIZE)
+			length = IN_EP_MAX_PACKET_SIZE;
+
+		if (audio->buffer_pos + length > audio->buffer_end)
+			length1 = audio->buffer_end - audio->buffer_pos;
+		else
+			length1 = length;
+		memcpy(req->buf, audio->buffer_pos, length1);
+		if (length1 < length) {
+			/* Wrap around and copy remaining length
+			 * at beginning of buffer.
+			 */
+			length2 = length - length1;
+			memcpy(req->buf + length1, audio->buffer_start,
+					length2);
+			audio->buffer_pos = audio->buffer_start + length2;
+		} else {
+			audio->buffer_pos += length1;
+			if (audio->buffer_pos >= audio->buffer_end)
+				audio->buffer_pos = audio->buffer_start;
+		}
+
+		req->length = length;
+		ret = usb_ep_queue(audio->in_ep, req, GFP_ATOMIC);
+		if (ret < 0) {
+			pr_err("usb_ep_queue failed ret: %d\n", ret);
+			audio_req_put(audio, req);
+			break;
+		}
+
+		frames -= bytes_to_frames(runtime, length);
+		audio->frames_sent += bytes_to_frames(runtime, length);
+	}
+}
+
+static void audio_control_complete(struct usb_ep *ep, struct usb_request *req)
+{
+	/* nothing to do here */
+}
+
+static void audio_data_complete(struct usb_ep *ep, struct usb_request *req)
+{
+	struct audio_dev *audio = req->context;
+
+	pr_debug("audio_data_complete req->status %d req->actual %d\n",
+		req->status, req->actual);
+
+	audio_req_put(audio, req);
+
+	if (!audio->buffer_start || req->status)
+		return;
+
+	audio->period_offset += req->actual;
+	if (audio->period_offset >= audio->period) {
+		snd_pcm_period_elapsed(audio->substream);
+		audio->period_offset = 0;
+	}
+	audio_send(audio);
+}
+
+static int audio_set_endpoint_req(struct usb_function *f,
+		const struct usb_ctrlrequest *ctrl)
+{
+	int value = -EOPNOTSUPP;
+	u16 ep = le16_to_cpu(ctrl->wIndex);
+	u16 len = le16_to_cpu(ctrl->wLength);
+	u16 w_value = le16_to_cpu(ctrl->wValue);
+
+	pr_debug("bRequest 0x%x, w_value 0x%04x, len %d, endpoint %d\n",
+			ctrl->bRequest, w_value, len, ep);
+
+	switch (ctrl->bRequest) {
+	case UAC_SET_CUR:
+	case UAC_SET_MIN:
+	case UAC_SET_MAX:
+	case UAC_SET_RES:
+		value = len;
+		break;
+	default:
+		break;
+	}
+
+	return value;
+}
+
+static int audio_get_endpoint_req(struct usb_function *f,
+		const struct usb_ctrlrequest *ctrl)
+{
+	struct usb_composite_dev *cdev = f->config->cdev;
+	int value = -EOPNOTSUPP;
+	u8 ep = ((le16_to_cpu(ctrl->wIndex) >> 8) & 0xFF);
+	u16 len = le16_to_cpu(ctrl->wLength);
+	u16 w_value = le16_to_cpu(ctrl->wValue);
+	u8 *buf = cdev->req->buf;
+
+	pr_debug("bRequest 0x%x, w_value 0x%04x, len %d, endpoint %d\n",
+			ctrl->bRequest, w_value, len, ep);
+
+	if (w_value == UAC_EP_CS_ATTR_SAMPLE_RATE << 8) {
+		switch (ctrl->bRequest) {
+		case UAC_GET_CUR:
+		case UAC_GET_MIN:
+		case UAC_GET_MAX:
+		case UAC_GET_RES:
+			/* return our sample rate */
+			buf[0] = (u8)SAMPLE_RATE;
+			buf[1] = (u8)(SAMPLE_RATE >> 8);
+			buf[2] = (u8)(SAMPLE_RATE >> 16);
+			value = 3;
+			break;
+		default:
+			break;
+		}
+	}
+
+	return value;
+}
+
+static int
+audio_setup(struct usb_function *f, const struct usb_ctrlrequest *ctrl)
+{
+	struct usb_composite_dev *cdev = f->config->cdev;
+	struct usb_request *req = cdev->req;
+	int value = -EOPNOTSUPP;
+	u16 w_index = le16_to_cpu(ctrl->wIndex);
+	u16 w_value = le16_to_cpu(ctrl->wValue);
+	u16 w_length = le16_to_cpu(ctrl->wLength);
+
+	/* composite driver infrastructure handles everything; interface
+	 * activation uses set_alt().
+	 */
+	switch (ctrl->bRequestType) {
+	case USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_ENDPOINT:
+		value = audio_set_endpoint_req(f, ctrl);
+		break;
+
+	case USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_ENDPOINT:
+		value = audio_get_endpoint_req(f, ctrl);
+		break;
+	}
+
+	/* respond with data transfer or status phase? */
+	if (value >= 0) {
+		pr_debug("audio req%02x.%02x v%04x i%04x l%d\n",
+			ctrl->bRequestType, ctrl->bRequest,
+			w_value, w_index, w_length);
+		req->zero = 0;
+		req->length = value;
+		req->complete = audio_control_complete;
+		value = usb_ep_queue(cdev->gadget->ep0, req, GFP_ATOMIC);
+		if (value < 0)
+			pr_err("audio response on err %d\n", value);
+	}
+
+	/* device either stalls (value < 0) or reports success */
+	return value;
+}
+
+static int audio_set_alt(struct usb_function *f, unsigned intf, unsigned alt)
+{
+	struct audio_dev *audio = func_to_audio(f);
+	struct usb_composite_dev *cdev = f->config->cdev;
+	int ret;
+
+	pr_debug("audio_set_alt intf %d, alt %d\n", intf, alt);
+
+	ret = config_ep_by_speed(cdev->gadget, f, audio->in_ep);
+	if (ret)
+		return ret;
+
+	usb_ep_enable(audio->in_ep);
+	return 0;
+}
+
+static void audio_disable(struct usb_function *f)
+{
+	struct audio_dev	*audio = func_to_audio(f);
+
+	pr_debug("audio_disable\n");
+	usb_ep_disable(audio->in_ep);
+}
+
+static void audio_free_func(struct usb_function *f)
+{
+	/* no-op */
+}
+
+/*-------------------------------------------------------------------------*/
+
+static void audio_build_desc(struct audio_dev *audio)
+{
+	u8 *sam_freq;
+	int rate;
+
+	/* Set channel numbers */
+	input_terminal_desc.bNrChannels = 2;
+	as_type_i_desc.bNrChannels = 2;
+
+	/* Set sample rates */
+	rate = SAMPLE_RATE;
+	sam_freq = as_type_i_desc.tSamFreq[0];
+	memcpy(sam_freq, &rate, 3);
+}
+
+
+static int snd_card_setup(struct usb_configuration *c,
+	struct audio_source_config *config);
+static struct audio_source_instance *to_fi_audio_source(
+	const struct usb_function_instance *fi);
+
+
+/* audio function driver setup/binding */
+static int
+audio_bind(struct usb_configuration *c, struct usb_function *f)
+{
+	struct usb_composite_dev *cdev = c->cdev;
+	struct audio_dev *audio = func_to_audio(f);
+	int status;
+	struct usb_ep *ep;
+	struct usb_request *req;
+	int i;
+	int err;
+
+	if (IS_ENABLED(CONFIG_USB_CONFIGFS)) {
+		struct audio_source_instance *fi_audio =
+				to_fi_audio_source(f->fi);
+		struct audio_source_config *config =
+				fi_audio->config;
+
+		err = snd_card_setup(c, config);
+		if (err)
+			return err;
+	}
+
+	audio_build_desc(audio);
+
+	/* allocate instance-specific interface IDs, and patch descriptors */
+	status = usb_interface_id(c, f);
+	if (status < 0)
+		goto fail;
+	ac_interface_desc.bInterfaceNumber = status;
+
+	/* AUDIO_AC_INTERFACE */
+	ac_header_desc.baInterfaceNr[0] = status;
+
+	status = usb_interface_id(c, f);
+	if (status < 0)
+		goto fail;
+	as_interface_alt_0_desc.bInterfaceNumber = status;
+	as_interface_alt_1_desc.bInterfaceNumber = status;
+
+	/* AUDIO_AS_INTERFACE */
+	ac_header_desc.baInterfaceNr[1] = status;
+
+	status = -ENODEV;
+
+	/* allocate our endpoint */
+	ep = usb_ep_autoconfig(cdev->gadget, &fs_as_in_ep_desc);
+	if (!ep)
+		goto fail;
+	audio->in_ep = ep;
+	ep->driver_data = audio; /* claim */
+
+	if (gadget_is_dualspeed(c->cdev->gadget))
+		hs_as_in_ep_desc.bEndpointAddress =
+			fs_as_in_ep_desc.bEndpointAddress;
+
+	f->fs_descriptors = fs_audio_desc;
+	f->hs_descriptors = hs_audio_desc;
+
+	for (i = 0, status = 0; i < IN_EP_REQ_COUNT && status == 0; i++) {
+		req = audio_request_new(ep, IN_EP_MAX_PACKET_SIZE);
+		if (req) {
+			req->context = audio;
+			req->complete = audio_data_complete;
+			audio_req_put(audio, req);
+		} else
+			status = -ENOMEM;
+	}
+
+fail:
+	return status;
+}
+
+static void
+audio_unbind(struct usb_configuration *c, struct usb_function *f)
+{
+	struct audio_dev *audio = func_to_audio(f);
+	struct usb_request *req;
+
+	while ((req = audio_req_get(audio)))
+		audio_request_free(req, audio->in_ep);
+
+	snd_card_free_when_closed(audio->card);
+	audio->card = NULL;
+	audio->pcm = NULL;
+	audio->substream = NULL;
+	audio->in_ep = NULL;
+
+	if (IS_ENABLED(CONFIG_USB_CONFIGFS)) {
+		struct audio_source_instance *fi_audio =
+				to_fi_audio_source(f->fi);
+		struct audio_source_config *config =
+				fi_audio->config;
+
+		config->card = -1;
+		config->device = -1;
+	}
+}
+
+static void audio_pcm_playback_start(struct audio_dev *audio)
+{
+	audio->start_time = ktime_get();
+	audio->frames_sent = 0;
+	audio_send(audio);
+}
+
+static void audio_pcm_playback_stop(struct audio_dev *audio)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&audio->lock, flags);
+	audio->buffer_start = 0;
+	audio->buffer_end = 0;
+	audio->buffer_pos = 0;
+	spin_unlock_irqrestore(&audio->lock, flags);
+}
+
+static int audio_pcm_open(struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct audio_dev *audio = substream->private_data;
+
+	runtime->private_data = audio;
+	runtime->hw = audio_hw_info;
+	snd_pcm_limit_hw_rates(runtime);
+	runtime->hw.channels_max = 2;
+
+	audio->substream = substream;
+
+	/* Add the QoS request and set the latency to 0 */
+	pm_qos_add_request(&audio->pm_qos, PM_QOS_CPU_DMA_LATENCY, 0);
+
+	return 0;
+}
+
+static int audio_pcm_close(struct snd_pcm_substream *substream)
+{
+	struct audio_dev *audio = substream->private_data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&audio->lock, flags);
+
+	/* Remove the QoS request */
+	pm_qos_remove_request(&audio->pm_qos);
+
+	audio->substream = NULL;
+	spin_unlock_irqrestore(&audio->lock, flags);
+
+	return 0;
+}
+
+static int audio_pcm_hw_params(struct snd_pcm_substream *substream,
+				struct snd_pcm_hw_params *params)
+{
+	unsigned int channels = params_channels(params);
+	unsigned int rate = params_rate(params);
+
+	if (rate != SAMPLE_RATE)
+		return -EINVAL;
+	if (channels != 2)
+		return -EINVAL;
+
+	return snd_pcm_lib_alloc_vmalloc_buffer(substream,
+		params_buffer_bytes(params));
+}
+
+static int audio_pcm_hw_free(struct snd_pcm_substream *substream)
+{
+	return snd_pcm_lib_free_vmalloc_buffer(substream);
+}
+
+static int audio_pcm_prepare(struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct audio_dev *audio = runtime->private_data;
+
+	audio->period = snd_pcm_lib_period_bytes(substream);
+	audio->period_offset = 0;
+	audio->buffer_start = runtime->dma_area;
+	audio->buffer_end = audio->buffer_start
+		+ snd_pcm_lib_buffer_bytes(substream);
+	audio->buffer_pos = audio->buffer_start;
+
+	return 0;
+}
+
+static snd_pcm_uframes_t audio_pcm_pointer(struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct audio_dev *audio = runtime->private_data;
+	ssize_t bytes = audio->buffer_pos - audio->buffer_start;
+
+	/* return offset of next frame to fill in our buffer */
+	return bytes_to_frames(runtime, bytes);
+}
+
+static int audio_pcm_playback_trigger(struct snd_pcm_substream *substream,
+					int cmd)
+{
+	struct audio_dev *audio = substream->runtime->private_data;
+	int ret = 0;
+
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+	case SNDRV_PCM_TRIGGER_RESUME:
+		audio_pcm_playback_start(audio);
+		break;
+
+	case SNDRV_PCM_TRIGGER_STOP:
+	case SNDRV_PCM_TRIGGER_SUSPEND:
+		audio_pcm_playback_stop(audio);
+		break;
+
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static struct audio_dev _audio_dev = {
+	.func = {
+		.name = "audio_source",
+		.bind = audio_bind,
+		.unbind = audio_unbind,
+		.set_alt = audio_set_alt,
+		.setup = audio_setup,
+		.disable = audio_disable,
+		.free_func = audio_free_func,
+	},
+	.lock = __SPIN_LOCK_UNLOCKED(_audio_dev.lock),
+	.idle_reqs = LIST_HEAD_INIT(_audio_dev.idle_reqs),
+};
+
+static struct snd_pcm_ops audio_playback_ops = {
+	.open		= audio_pcm_open,
+	.close		= audio_pcm_close,
+	.ioctl		= snd_pcm_lib_ioctl,
+	.hw_params	= audio_pcm_hw_params,
+	.hw_free	= audio_pcm_hw_free,
+	.prepare	= audio_pcm_prepare,
+	.trigger	= audio_pcm_playback_trigger,
+	.pointer	= audio_pcm_pointer,
+};
+
+int audio_source_bind_config(struct usb_configuration *c,
+		struct audio_source_config *config)
+{
+	struct audio_dev *audio;
+	int err;
+
+	config->card = -1;
+	config->device = -1;
+
+	audio = &_audio_dev;
+
+	err = snd_card_setup(c, config);
+	if (err)
+		return err;
+
+	err = usb_add_function(c, &audio->func);
+	if (err)
+		goto add_fail;
+
+	return 0;
+
+add_fail:
+	snd_card_free(audio->card);
+	return err;
+}
+
+static int snd_card_setup(struct usb_configuration *c,
+		struct audio_source_config *config)
+{
+	struct audio_dev *audio;
+	struct snd_card *card;
+	struct snd_pcm *pcm;
+	int err;
+
+	audio = &_audio_dev;
+
+	err = snd_card_new(&c->cdev->gadget->dev,
+			SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1,
+			THIS_MODULE, 0, &card);
+	if (err)
+		return err;
+
+	err = snd_pcm_new(card, "USB audio source", 0, 1, 0, &pcm);
+	if (err)
+		goto pcm_fail;
+
+	pcm->private_data = audio;
+	pcm->info_flags = 0;
+	audio->pcm = pcm;
+
+	strlcpy(pcm->name, "USB gadget audio", sizeof(pcm->name));
+
+	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &audio_playback_ops);
+	snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV,
+				NULL, 0, 64 * 1024);
+
+	strlcpy(card->driver, "audio_source", sizeof(card->driver));
+	strlcpy(card->shortname, card->driver, sizeof(card->shortname));
+	strlcpy(card->longname, "USB accessory audio source",
+		sizeof(card->longname));
+
+	err = snd_card_register(card);
+	if (err)
+		goto register_fail;
+
+	config->card = pcm->card->number;
+	config->device = pcm->device;
+	audio->card = card;
+	return 0;
+
+register_fail:
+pcm_fail:
+	snd_card_free(audio->card);
+	return err;
+}
+
+static struct audio_source_instance *to_audio_source_instance(
+					struct config_item *item)
+{
+	return container_of(to_config_group(item), struct audio_source_instance,
+		func_inst.group);
+}
+
+static struct audio_source_instance *to_fi_audio_source(
+					const struct usb_function_instance *fi)
+{
+	return container_of(fi, struct audio_source_instance, func_inst);
+}
+
+static void audio_source_attr_release(struct config_item *item)
+{
+	struct audio_source_instance *fi_audio = to_audio_source_instance(item);
+
+	usb_put_function_instance(&fi_audio->func_inst);
+}
+
+static int audio_source_set_inst_name(struct usb_function_instance *fi,
+					const char *name)
+{
+	struct audio_source_instance *fi_audio;
+	char *ptr;
+	int name_len;
+
+	name_len = strlen(name) + 1;
+	if (name_len > MAX_INST_NAME_LEN)
+		return -ENAMETOOLONG;
+
+	ptr = kstrndup(name, name_len, GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	fi_audio = to_fi_audio_source(fi);
+	fi_audio->name = ptr;
+
+	return 0;
+}
+
+static void audio_source_free_inst(struct usb_function_instance *fi)
+{
+	struct audio_source_instance *fi_audio;
+
+	fi_audio = to_fi_audio_source(fi);
+	device_destroy(fi_audio->audio_device->class,
+			fi_audio->audio_device->devt);
+	kfree(fi_audio->name);
+	kfree(fi_audio->config);
+}
+
+static ssize_t audio_source_pcm_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct audio_source_instance *fi_audio = dev_get_drvdata(dev);
+	struct audio_source_config *config = fi_audio->config;
+
+	/* print PCM card and device numbers */
+	return sprintf(buf, "%d %d\n", config->card, config->device);
+}
+
+struct device *create_function_device(char *name);
+
+static struct usb_function_instance *audio_source_alloc_inst(void)
+{
+	struct audio_source_instance *fi_audio;
+	struct device_attribute **attrs;
+	struct device_attribute *attr;
+	struct device *dev;
+	void *err_ptr;
+	int err = 0;
+
+	fi_audio = kzalloc(sizeof(*fi_audio), GFP_KERNEL);
+	if (!fi_audio)
+		return ERR_PTR(-ENOMEM);
+
+	fi_audio->func_inst.set_inst_name = audio_source_set_inst_name;
+	fi_audio->func_inst.free_func_inst = audio_source_free_inst;
+
+	fi_audio->config = kzalloc(sizeof(struct audio_source_config),
+							GFP_KERNEL);
+	if (!fi_audio->config) {
+		err_ptr = ERR_PTR(-ENOMEM);
+		goto fail_audio;
+	}
+
+	config_group_init_type_name(&fi_audio->func_inst.group, "",
+						&audio_source_func_type);
+	dev = create_function_device("f_audio_source");
+
+	if (IS_ERR(dev)) {
+		err_ptr = dev;
+		goto fail_audio_config;
+	}
+
+	fi_audio->config->card = -1;
+	fi_audio->config->device = -1;
+	fi_audio->audio_device = dev;
+
+	attrs = audio_source_function_attributes;
+	if (attrs) {
+		while ((attr = *attrs++) && !err)
+			err = device_create_file(dev, attr);
+		if (err) {
+			err_ptr = ERR_PTR(-EINVAL);
+			goto fail_device;
+		}
+	}
+
+	dev_set_drvdata(dev, fi_audio);
+	_audio_dev.config = fi_audio->config;
+
+	return  &fi_audio->func_inst;
+
+fail_device:
+	device_destroy(dev->class, dev->devt);
+fail_audio_config:
+	kfree(fi_audio->config);
+fail_audio:
+	kfree(fi_audio);
+	return err_ptr;
+
+}
+
+static struct usb_function *audio_source_alloc(struct usb_function_instance *fi)
+{
+	return &_audio_dev.func;
+}
+
+DECLARE_USB_FUNCTION_INIT(audio_source, audio_source_alloc_inst,
+			audio_source_alloc);
+MODULE_LICENSE("GPL");
diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c
index 70ac196..7d6a48c5 100644
--- a/drivers/usb/gadget/function/f_midi.c
+++ b/drivers/usb/gadget/function/f_midi.c
@@ -1168,6 +1168,65 @@
 	kfree(opts);
 }
 
+#ifdef CONFIG_USB_CONFIGFS_UEVENT
+extern struct device *create_function_device(char *name);
+static ssize_t alsa_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct usb_function_instance *fi_midi = dev_get_drvdata(dev);
+	struct f_midi *midi;
+
+	if (!fi_midi->f)
+		dev_warn(dev, "f_midi: function not set\n");
+
+	if (fi_midi && fi_midi->f) {
+		midi = func_to_midi(fi_midi->f);
+		if (midi->rmidi && midi->rmidi->card)
+			return sprintf(buf, "%d %d\n",
+			midi->rmidi->card->number, midi->rmidi->device);
+	}
+
+	/* print PCM card and device numbers */
+	return sprintf(buf, "%d %d\n", -1, -1);
+}
+
+static DEVICE_ATTR(alsa, S_IRUGO, alsa_show, NULL);
+
+static struct device_attribute *alsa_function_attributes[] = {
+	&dev_attr_alsa,
+	NULL
+};
+
+static int create_alsa_device(struct usb_function_instance *fi)
+{
+	struct device *dev;
+	struct device_attribute **attrs;
+	struct device_attribute *attr;
+	int err = 0;
+
+	dev = create_function_device("f_midi");
+	if (IS_ERR(dev))
+		return PTR_ERR(dev);
+
+	attrs = alsa_function_attributes;
+	if (attrs) {
+		while ((attr = *attrs++) && !err)
+			err = device_create_file(dev, attr);
+		if (err) {
+			device_destroy(dev->class, dev->devt);
+			return -EINVAL;
+		}
+	}
+	dev_set_drvdata(dev, fi);
+	return 0;
+}
+#else
+static int create_alsa_device(struct usb_function_instance *fi)
+{
+	return 0;
+}
+#endif
+
 static struct usb_function_instance *f_midi_alloc_inst(void)
 {
 	struct f_midi_opts *opts;
@@ -1185,6 +1244,11 @@
 	opts->in_ports = 1;
 	opts->out_ports = 1;
 
+	if (create_alsa_device(&opts->func_inst)) {
+		kfree(opts);
+		return ERR_PTR(-ENODEV);
+	}
+
 	config_group_init_type_name(&opts->func_inst.group, "",
 				    &midi_func_type);
 
@@ -1202,6 +1266,7 @@
 	mutex_lock(&opts->lock);
 	kfifo_free(&midi->in_req_fifo);
 	kfree(midi);
+	opts->func_inst.f = NULL;
 	--opts->refcnt;
 	mutex_unlock(&opts->lock);
 }
@@ -1281,6 +1346,7 @@
 	midi->func.disable	= f_midi_disable;
 	midi->func.free_func	= f_midi_free;
 
+	fi->f = &midi->func;
 	return &midi->func;
 
 setup_fail:
diff --git a/drivers/usb/gadget/function/f_mtp.c b/drivers/usb/gadget/function/f_mtp.c
new file mode 100644
index 0000000..9515b2a
--- /dev/null
+++ b/drivers/usb/gadget/function/f_mtp.c
@@ -0,0 +1,1552 @@
+/*
+ * Gadget Function Driver for MTP
+ *
+ * Copyright (C) 2010 Google, Inc.
+ * Author: Mike Lockwood <lockwood@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+/* #define DEBUG */
+/* #define VERBOSE_DEBUG */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+
+#include <linux/types.h>
+#include <linux/file.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+
+#include <linux/usb.h>
+#include <linux/usb_usual.h>
+#include <linux/usb/ch9.h>
+#include <linux/usb/f_mtp.h>
+#include <linux/configfs.h>
+#include <linux/usb/composite.h>
+
+#include "configfs.h"
+
+#define MTP_BULK_BUFFER_SIZE       16384
+#define INTR_BUFFER_SIZE           28
+#define MAX_INST_NAME_LEN          40
+#define MTP_MAX_FILE_SIZE          0xFFFFFFFFL
+
+/* String IDs */
+#define INTERFACE_STRING_INDEX	0
+
+/* values for mtp_dev.state */
+#define STATE_OFFLINE               0   /* initial state, disconnected */
+#define STATE_READY                 1   /* ready for userspace calls */
+#define STATE_BUSY                  2   /* processing userspace calls */
+#define STATE_CANCELED              3   /* transaction canceled by host */
+#define STATE_ERROR                 4   /* error from completion routine */
+
+/* number of tx and rx requests to allocate */
+#define TX_REQ_MAX 4
+#define RX_REQ_MAX 2
+#define INTR_REQ_MAX 5
+
+/* ID for Microsoft MTP OS String */
+#define MTP_OS_STRING_ID   0xEE
+
+/* MTP class reqeusts */
+#define MTP_REQ_CANCEL              0x64
+#define MTP_REQ_GET_EXT_EVENT_DATA  0x65
+#define MTP_REQ_RESET               0x66
+#define MTP_REQ_GET_DEVICE_STATUS   0x67
+
+/* constants for device status */
+#define MTP_RESPONSE_OK             0x2001
+#define MTP_RESPONSE_DEVICE_BUSY    0x2019
+#define DRIVER_NAME "mtp"
+
+static const char mtp_shortname[] = DRIVER_NAME "_usb";
+
+struct mtp_dev {
+	struct usb_function function;
+	struct usb_composite_dev *cdev;
+	spinlock_t lock;
+
+	struct usb_ep *ep_in;
+	struct usb_ep *ep_out;
+	struct usb_ep *ep_intr;
+
+	int state;
+
+	/* synchronize access to our device file */
+	atomic_t open_excl;
+	/* to enforce only one ioctl at a time */
+	atomic_t ioctl_excl;
+
+	struct list_head tx_idle;
+	struct list_head intr_idle;
+
+	wait_queue_head_t read_wq;
+	wait_queue_head_t write_wq;
+	wait_queue_head_t intr_wq;
+	struct usb_request *rx_req[RX_REQ_MAX];
+	int rx_done;
+
+	/* for processing MTP_SEND_FILE, MTP_RECEIVE_FILE and
+	 * MTP_SEND_FILE_WITH_HEADER ioctls on a work queue
+	 */
+	struct workqueue_struct *wq;
+	struct work_struct send_file_work;
+	struct work_struct receive_file_work;
+	struct file *xfer_file;
+	loff_t xfer_file_offset;
+	int64_t xfer_file_length;
+	unsigned xfer_send_header;
+	uint16_t xfer_command;
+	uint32_t xfer_transaction_id;
+	int xfer_result;
+};
+
+static struct usb_interface_descriptor mtp_interface_desc = {
+	.bLength                = USB_DT_INTERFACE_SIZE,
+	.bDescriptorType        = USB_DT_INTERFACE,
+	.bInterfaceNumber       = 0,
+	.bNumEndpoints          = 3,
+	.bInterfaceClass        = USB_CLASS_VENDOR_SPEC,
+	.bInterfaceSubClass     = USB_SUBCLASS_VENDOR_SPEC,
+	.bInterfaceProtocol     = 0,
+};
+
+static struct usb_interface_descriptor ptp_interface_desc = {
+	.bLength                = USB_DT_INTERFACE_SIZE,
+	.bDescriptorType        = USB_DT_INTERFACE,
+	.bInterfaceNumber       = 0,
+	.bNumEndpoints          = 3,
+	.bInterfaceClass        = USB_CLASS_STILL_IMAGE,
+	.bInterfaceSubClass     = 1,
+	.bInterfaceProtocol     = 1,
+};
+
+static struct usb_endpoint_descriptor mtp_ss_in_desc = {
+	.bLength                = USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType        = USB_DT_ENDPOINT,
+	.bEndpointAddress       = USB_DIR_IN,
+	.bmAttributes           = USB_ENDPOINT_XFER_BULK,
+	.wMaxPacketSize         = __constant_cpu_to_le16(1024),
+};
+
+static struct usb_ss_ep_comp_descriptor mtp_ss_in_comp_desc = {
+	.bLength                = sizeof(mtp_ss_in_comp_desc),
+	.bDescriptorType        = USB_DT_SS_ENDPOINT_COMP,
+	/* .bMaxBurst           = DYNAMIC, */
+};
+
+static struct usb_endpoint_descriptor mtp_ss_out_desc = {
+	.bLength                = USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType        = USB_DT_ENDPOINT,
+	.bEndpointAddress       = USB_DIR_OUT,
+	.bmAttributes           = USB_ENDPOINT_XFER_BULK,
+	.wMaxPacketSize         = __constant_cpu_to_le16(1024),
+};
+
+static struct usb_ss_ep_comp_descriptor mtp_ss_out_comp_desc = {
+	.bLength                = sizeof(mtp_ss_out_comp_desc),
+	.bDescriptorType        = USB_DT_SS_ENDPOINT_COMP,
+	/* .bMaxBurst           = DYNAMIC, */
+};
+
+static struct usb_endpoint_descriptor mtp_highspeed_in_desc = {
+	.bLength                = USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType        = USB_DT_ENDPOINT,
+	.bEndpointAddress       = USB_DIR_IN,
+	.bmAttributes           = USB_ENDPOINT_XFER_BULK,
+	.wMaxPacketSize         = __constant_cpu_to_le16(512),
+};
+
+static struct usb_endpoint_descriptor mtp_highspeed_out_desc = {
+	.bLength                = USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType        = USB_DT_ENDPOINT,
+	.bEndpointAddress       = USB_DIR_OUT,
+	.bmAttributes           = USB_ENDPOINT_XFER_BULK,
+	.wMaxPacketSize         = __constant_cpu_to_le16(512),
+};
+
+static struct usb_endpoint_descriptor mtp_fullspeed_in_desc = {
+	.bLength                = USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType        = USB_DT_ENDPOINT,
+	.bEndpointAddress       = USB_DIR_IN,
+	.bmAttributes           = USB_ENDPOINT_XFER_BULK,
+};
+
+static struct usb_endpoint_descriptor mtp_fullspeed_out_desc = {
+	.bLength                = USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType        = USB_DT_ENDPOINT,
+	.bEndpointAddress       = USB_DIR_OUT,
+	.bmAttributes           = USB_ENDPOINT_XFER_BULK,
+};
+
+static struct usb_endpoint_descriptor mtp_intr_desc = {
+	.bLength                = USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType        = USB_DT_ENDPOINT,
+	.bEndpointAddress       = USB_DIR_IN,
+	.bmAttributes           = USB_ENDPOINT_XFER_INT,
+	.wMaxPacketSize         = __constant_cpu_to_le16(INTR_BUFFER_SIZE),
+	.bInterval              = 6,
+};
+
+static struct usb_ss_ep_comp_descriptor mtp_intr_ss_comp_desc = {
+	.bLength                = sizeof(mtp_intr_ss_comp_desc),
+	.bDescriptorType        = USB_DT_SS_ENDPOINT_COMP,
+	.wBytesPerInterval      = cpu_to_le16(INTR_BUFFER_SIZE),
+};
+
+static struct usb_descriptor_header *fs_mtp_descs[] = {
+	(struct usb_descriptor_header *) &mtp_interface_desc,
+	(struct usb_descriptor_header *) &mtp_fullspeed_in_desc,
+	(struct usb_descriptor_header *) &mtp_fullspeed_out_desc,
+	(struct usb_descriptor_header *) &mtp_intr_desc,
+	NULL,
+};
+
+static struct usb_descriptor_header *hs_mtp_descs[] = {
+	(struct usb_descriptor_header *) &mtp_interface_desc,
+	(struct usb_descriptor_header *) &mtp_highspeed_in_desc,
+	(struct usb_descriptor_header *) &mtp_highspeed_out_desc,
+	(struct usb_descriptor_header *) &mtp_intr_desc,
+	NULL,
+};
+
+static struct usb_descriptor_header *ss_mtp_descs[] = {
+	(struct usb_descriptor_header *) &mtp_interface_desc,
+	(struct usb_descriptor_header *) &mtp_ss_in_desc,
+	(struct usb_descriptor_header *) &mtp_ss_in_comp_desc,
+	(struct usb_descriptor_header *) &mtp_ss_out_desc,
+	(struct usb_descriptor_header *) &mtp_ss_out_comp_desc,
+	(struct usb_descriptor_header *) &mtp_intr_desc,
+	(struct usb_descriptor_header *) &mtp_intr_ss_comp_desc,
+	NULL,
+};
+
+static struct usb_descriptor_header *fs_ptp_descs[] = {
+	(struct usb_descriptor_header *) &ptp_interface_desc,
+	(struct usb_descriptor_header *) &mtp_fullspeed_in_desc,
+	(struct usb_descriptor_header *) &mtp_fullspeed_out_desc,
+	(struct usb_descriptor_header *) &mtp_intr_desc,
+	NULL,
+};
+
+static struct usb_descriptor_header *hs_ptp_descs[] = {
+	(struct usb_descriptor_header *) &ptp_interface_desc,
+	(struct usb_descriptor_header *) &mtp_highspeed_in_desc,
+	(struct usb_descriptor_header *) &mtp_highspeed_out_desc,
+	(struct usb_descriptor_header *) &mtp_intr_desc,
+	NULL,
+};
+
+static struct usb_descriptor_header *ss_ptp_descs[] = {
+	(struct usb_descriptor_header *) &ptp_interface_desc,
+	(struct usb_descriptor_header *) &mtp_ss_in_desc,
+	(struct usb_descriptor_header *) &mtp_ss_in_comp_desc,
+	(struct usb_descriptor_header *) &mtp_ss_out_desc,
+	(struct usb_descriptor_header *) &mtp_ss_out_comp_desc,
+	(struct usb_descriptor_header *) &mtp_intr_desc,
+	(struct usb_descriptor_header *) &mtp_intr_ss_comp_desc,
+	NULL,
+};
+
+static struct usb_string mtp_string_defs[] = {
+	/* Naming interface "MTP" so libmtp will recognize us */
+	[INTERFACE_STRING_INDEX].s	= "MTP",
+	{  },	/* end of list */
+};
+
+static struct usb_gadget_strings mtp_string_table = {
+	.language		= 0x0409,	/* en-US */
+	.strings		= mtp_string_defs,
+};
+
+static struct usb_gadget_strings *mtp_strings[] = {
+	&mtp_string_table,
+	NULL,
+};
+
+/* Microsoft MTP OS String */
+static u8 mtp_os_string[] = {
+	18, /* sizeof(mtp_os_string) */
+	USB_DT_STRING,
+	/* Signature field: "MSFT100" */
+	'M', 0, 'S', 0, 'F', 0, 'T', 0, '1', 0, '0', 0, '0', 0,
+	/* vendor code */
+	1,
+	/* padding */
+	0
+};
+
+/* Microsoft Extended Configuration Descriptor Header Section */
+struct mtp_ext_config_desc_header {
+	__le32	dwLength;
+	__u16	bcdVersion;
+	__le16	wIndex;
+	__u8	bCount;
+	__u8	reserved[7];
+};
+
+/* Microsoft Extended Configuration Descriptor Function Section */
+struct mtp_ext_config_desc_function {
+	__u8	bFirstInterfaceNumber;
+	__u8	bInterfaceCount;
+	__u8	compatibleID[8];
+	__u8	subCompatibleID[8];
+	__u8	reserved[6];
+};
+
+/* MTP Extended Configuration Descriptor */
+struct {
+	struct mtp_ext_config_desc_header	header;
+	struct mtp_ext_config_desc_function    function;
+} mtp_ext_config_desc = {
+	.header = {
+		.dwLength = __constant_cpu_to_le32(sizeof(mtp_ext_config_desc)),
+		.bcdVersion = __constant_cpu_to_le16(0x0100),
+		.wIndex = __constant_cpu_to_le16(4),
+		.bCount = 1,
+	},
+	.function = {
+		.bFirstInterfaceNumber = 0,
+		.bInterfaceCount = 1,
+		.compatibleID = { 'M', 'T', 'P' },
+	},
+};
+
+struct mtp_device_status {
+	__le16	wLength;
+	__le16	wCode;
+};
+
+struct mtp_data_header {
+	/* length of packet, including this header */
+	__le32	length;
+	/* container type (2 for data packet) */
+	__le16	type;
+	/* MTP command code */
+	__le16	command;
+	/* MTP transaction ID */
+	__le32	transaction_id;
+};
+
+struct mtp_instance {
+	struct usb_function_instance func_inst;
+	const char *name;
+	struct mtp_dev *dev;
+	char mtp_ext_compat_id[16];
+	struct usb_os_desc mtp_os_desc;
+};
+
+/* temporary variable used between mtp_open() and mtp_gadget_bind() */
+static struct mtp_dev *_mtp_dev;
+
+static inline struct mtp_dev *func_to_mtp(struct usb_function *f)
+{
+	return container_of(f, struct mtp_dev, function);
+}
+
+static struct usb_request *mtp_request_new(struct usb_ep *ep, int buffer_size)
+{
+	struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL);
+
+	if (!req)
+		return NULL;
+
+	/* now allocate buffers for the requests */
+	req->buf = kmalloc(buffer_size, GFP_KERNEL);
+	if (!req->buf) {
+		usb_ep_free_request(ep, req);
+		return NULL;
+	}
+
+	return req;
+}
+
+static void mtp_request_free(struct usb_request *req, struct usb_ep *ep)
+{
+	if (req) {
+		kfree(req->buf);
+		usb_ep_free_request(ep, req);
+	}
+}
+
+static inline int mtp_lock(atomic_t *excl)
+{
+	if (atomic_inc_return(excl) == 1) {
+		return 0;
+	} else {
+		atomic_dec(excl);
+		return -1;
+	}
+}
+
+static inline void mtp_unlock(atomic_t *excl)
+{
+	atomic_dec(excl);
+}
+
+/* add a request to the tail of a list */
+static void mtp_req_put(struct mtp_dev *dev, struct list_head *head,
+		struct usb_request *req)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	list_add_tail(&req->list, head);
+	spin_unlock_irqrestore(&dev->lock, flags);
+}
+
+/* remove a request from the head of a list */
+static struct usb_request
+*mtp_req_get(struct mtp_dev *dev, struct list_head *head)
+{
+	unsigned long flags;
+	struct usb_request *req;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	if (list_empty(head)) {
+		req = 0;
+	} else {
+		req = list_first_entry(head, struct usb_request, list);
+		list_del(&req->list);
+	}
+	spin_unlock_irqrestore(&dev->lock, flags);
+	return req;
+}
+
+static void mtp_complete_in(struct usb_ep *ep, struct usb_request *req)
+{
+	struct mtp_dev *dev = _mtp_dev;
+
+	if (req->status != 0)
+		dev->state = STATE_ERROR;
+
+	mtp_req_put(dev, &dev->tx_idle, req);
+
+	wake_up(&dev->write_wq);
+}
+
+static void mtp_complete_out(struct usb_ep *ep, struct usb_request *req)
+{
+	struct mtp_dev *dev = _mtp_dev;
+
+	dev->rx_done = 1;
+	if (req->status != 0)
+		dev->state = STATE_ERROR;
+
+	wake_up(&dev->read_wq);
+}
+
+static void mtp_complete_intr(struct usb_ep *ep, struct usb_request *req)
+{
+	struct mtp_dev *dev = _mtp_dev;
+
+	if (req->status != 0)
+		dev->state = STATE_ERROR;
+
+	mtp_req_put(dev, &dev->intr_idle, req);
+
+	wake_up(&dev->intr_wq);
+}
+
+static int mtp_create_bulk_endpoints(struct mtp_dev *dev,
+				struct usb_endpoint_descriptor *in_desc,
+				struct usb_endpoint_descriptor *out_desc,
+				struct usb_endpoint_descriptor *intr_desc)
+{
+	struct usb_composite_dev *cdev = dev->cdev;
+	struct usb_request *req;
+	struct usb_ep *ep;
+	int i;
+
+	DBG(cdev, "create_bulk_endpoints dev: %p\n", dev);
+
+	ep = usb_ep_autoconfig(cdev->gadget, in_desc);
+	if (!ep) {
+		DBG(cdev, "usb_ep_autoconfig for ep_in failed\n");
+		return -ENODEV;
+	}
+	DBG(cdev, "usb_ep_autoconfig for ep_in got %s\n", ep->name);
+	ep->driver_data = dev;		/* claim the endpoint */
+	dev->ep_in = ep;
+
+	ep = usb_ep_autoconfig(cdev->gadget, out_desc);
+	if (!ep) {
+		DBG(cdev, "usb_ep_autoconfig for ep_out failed\n");
+		return -ENODEV;
+	}
+	DBG(cdev, "usb_ep_autoconfig for mtp ep_out got %s\n", ep->name);
+	ep->driver_data = dev;		/* claim the endpoint */
+	dev->ep_out = ep;
+
+	ep = usb_ep_autoconfig(cdev->gadget, intr_desc);
+	if (!ep) {
+		DBG(cdev, "usb_ep_autoconfig for ep_intr failed\n");
+		return -ENODEV;
+	}
+	DBG(cdev, "usb_ep_autoconfig for mtp ep_intr got %s\n", ep->name);
+	ep->driver_data = dev;		/* claim the endpoint */
+	dev->ep_intr = ep;
+
+	/* now allocate requests for our endpoints */
+	for (i = 0; i < TX_REQ_MAX; i++) {
+		req = mtp_request_new(dev->ep_in, MTP_BULK_BUFFER_SIZE);
+		if (!req)
+			goto fail;
+		req->complete = mtp_complete_in;
+		mtp_req_put(dev, &dev->tx_idle, req);
+	}
+	for (i = 0; i < RX_REQ_MAX; i++) {
+		req = mtp_request_new(dev->ep_out, MTP_BULK_BUFFER_SIZE);
+		if (!req)
+			goto fail;
+		req->complete = mtp_complete_out;
+		dev->rx_req[i] = req;
+	}
+	for (i = 0; i < INTR_REQ_MAX; i++) {
+		req = mtp_request_new(dev->ep_intr, INTR_BUFFER_SIZE);
+		if (!req)
+			goto fail;
+		req->complete = mtp_complete_intr;
+		mtp_req_put(dev, &dev->intr_idle, req);
+	}
+
+	return 0;
+
+fail:
+	pr_err("mtp_bind() could not allocate requests\n");
+	return -1;
+}
+
+static ssize_t mtp_read(struct file *fp, char __user *buf,
+	size_t count, loff_t *pos)
+{
+	struct mtp_dev *dev = fp->private_data;
+	struct usb_composite_dev *cdev = dev->cdev;
+	struct usb_request *req;
+	ssize_t r = count;
+	unsigned xfer;
+	int ret = 0;
+	size_t len = 0;
+
+	DBG(cdev, "mtp_read(%zu)\n", count);
+
+	/* we will block until we're online */
+	DBG(cdev, "mtp_read: waiting for online state\n");
+	ret = wait_event_interruptible(dev->read_wq,
+		dev->state != STATE_OFFLINE);
+	if (ret < 0) {
+		r = ret;
+		goto done;
+	}
+	spin_lock_irq(&dev->lock);
+	if (dev->ep_out->desc) {
+		len = usb_ep_align_maybe(cdev->gadget, dev->ep_out, count);
+		if (len > MTP_BULK_BUFFER_SIZE) {
+			spin_unlock_irq(&dev->lock);
+			return -EINVAL;
+		}
+	}
+
+	if (dev->state == STATE_CANCELED) {
+		/* report cancelation to userspace */
+		dev->state = STATE_READY;
+		spin_unlock_irq(&dev->lock);
+		return -ECANCELED;
+	}
+	dev->state = STATE_BUSY;
+	spin_unlock_irq(&dev->lock);
+
+requeue_req:
+	/* queue a request */
+	req = dev->rx_req[0];
+	req->length = len;
+	dev->rx_done = 0;
+	ret = usb_ep_queue(dev->ep_out, req, GFP_KERNEL);
+	if (ret < 0) {
+		r = -EIO;
+		goto done;
+	} else {
+		DBG(cdev, "rx %p queue\n", req);
+	}
+
+	/* wait for a request to complete */
+	ret = wait_event_interruptible(dev->read_wq, dev->rx_done);
+	if (ret < 0) {
+		r = ret;
+		usb_ep_dequeue(dev->ep_out, req);
+		goto done;
+	}
+	if (dev->state == STATE_BUSY) {
+		/* If we got a 0-len packet, throw it back and try again. */
+		if (req->actual == 0)
+			goto requeue_req;
+
+		DBG(cdev, "rx %p %d\n", req, req->actual);
+		xfer = (req->actual < count) ? req->actual : count;
+		r = xfer;
+		if (copy_to_user(buf, req->buf, xfer))
+			r = -EFAULT;
+	} else
+		r = -EIO;
+
+done:
+	spin_lock_irq(&dev->lock);
+	if (dev->state == STATE_CANCELED)
+		r = -ECANCELED;
+	else if (dev->state != STATE_OFFLINE)
+		dev->state = STATE_READY;
+	spin_unlock_irq(&dev->lock);
+
+	DBG(cdev, "mtp_read returning %zd\n", r);
+	return r;
+}
+
+static ssize_t mtp_write(struct file *fp, const char __user *buf,
+	size_t count, loff_t *pos)
+{
+	struct mtp_dev *dev = fp->private_data;
+	struct usb_composite_dev *cdev = dev->cdev;
+	struct usb_request *req = 0;
+	ssize_t r = count;
+	unsigned xfer;
+	int sendZLP = 0;
+	int ret;
+
+	DBG(cdev, "mtp_write(%zu)\n", count);
+
+	spin_lock_irq(&dev->lock);
+	if (dev->state == STATE_CANCELED) {
+		/* report cancelation to userspace */
+		dev->state = STATE_READY;
+		spin_unlock_irq(&dev->lock);
+		return -ECANCELED;
+	}
+	if (dev->state == STATE_OFFLINE) {
+		spin_unlock_irq(&dev->lock);
+		return -ENODEV;
+	}
+	dev->state = STATE_BUSY;
+	spin_unlock_irq(&dev->lock);
+
+	/* we need to send a zero length packet to signal the end of transfer
+	 * if the transfer size is aligned to a packet boundary.
+	 */
+	if ((count & (dev->ep_in->maxpacket - 1)) == 0)
+		sendZLP = 1;
+
+	while (count > 0 || sendZLP) {
+		/* so we exit after sending ZLP */
+		if (count == 0)
+			sendZLP = 0;
+
+		if (dev->state != STATE_BUSY) {
+			DBG(cdev, "mtp_write dev->error\n");
+			r = -EIO;
+			break;
+		}
+
+		/* get an idle tx request to use */
+		req = 0;
+		ret = wait_event_interruptible(dev->write_wq,
+			((req = mtp_req_get(dev, &dev->tx_idle))
+				|| dev->state != STATE_BUSY));
+		if (!req) {
+			r = ret;
+			break;
+		}
+
+		if (count > MTP_BULK_BUFFER_SIZE)
+			xfer = MTP_BULK_BUFFER_SIZE;
+		else
+			xfer = count;
+		if (xfer && copy_from_user(req->buf, buf, xfer)) {
+			r = -EFAULT;
+			break;
+		}
+
+		req->length = xfer;
+		ret = usb_ep_queue(dev->ep_in, req, GFP_KERNEL);
+		if (ret < 0) {
+			DBG(cdev, "mtp_write: xfer error %d\n", ret);
+			r = -EIO;
+			break;
+		}
+
+		buf += xfer;
+		count -= xfer;
+
+		/* zero this so we don't try to free it on error exit */
+		req = 0;
+	}
+
+	if (req)
+		mtp_req_put(dev, &dev->tx_idle, req);
+
+	spin_lock_irq(&dev->lock);
+	if (dev->state == STATE_CANCELED)
+		r = -ECANCELED;
+	else if (dev->state != STATE_OFFLINE)
+		dev->state = STATE_READY;
+	spin_unlock_irq(&dev->lock);
+
+	DBG(cdev, "mtp_write returning %zd\n", r);
+	return r;
+}
+
+/* read from a local file and write to USB */
+static void send_file_work(struct work_struct *data)
+{
+	struct mtp_dev *dev = container_of(data, struct mtp_dev,
+						send_file_work);
+	struct usb_composite_dev *cdev = dev->cdev;
+	struct usb_request *req = 0;
+	struct mtp_data_header *header;
+	struct file *filp;
+	loff_t offset;
+	int64_t count;
+	int xfer, ret, hdr_size;
+	int r = 0;
+	int sendZLP = 0;
+
+	/* read our parameters */
+	smp_rmb();
+	filp = dev->xfer_file;
+	offset = dev->xfer_file_offset;
+	count = dev->xfer_file_length;
+
+	DBG(cdev, "send_file_work(%lld %lld)\n", offset, count);
+
+	if (dev->xfer_send_header) {
+		hdr_size = sizeof(struct mtp_data_header);
+		count += hdr_size;
+	} else {
+		hdr_size = 0;
+	}
+
+	/* we need to send a zero length packet to signal the end of transfer
+	 * if the transfer size is aligned to a packet boundary.
+	 */
+	if ((count & (dev->ep_in->maxpacket - 1)) == 0)
+		sendZLP = 1;
+
+	while (count > 0 || sendZLP) {
+		/* so we exit after sending ZLP */
+		if (count == 0)
+			sendZLP = 0;
+
+		/* get an idle tx request to use */
+		req = 0;
+		ret = wait_event_interruptible(dev->write_wq,
+			(req = mtp_req_get(dev, &dev->tx_idle))
+			|| dev->state != STATE_BUSY);
+		if (dev->state == STATE_CANCELED) {
+			r = -ECANCELED;
+			break;
+		}
+		if (!req) {
+			r = ret;
+			break;
+		}
+
+		if (count > MTP_BULK_BUFFER_SIZE)
+			xfer = MTP_BULK_BUFFER_SIZE;
+		else
+			xfer = count;
+
+		if (hdr_size) {
+			/* prepend MTP data header */
+			header = (struct mtp_data_header *)req->buf;
+			/*
+                         * set file size with header according to
+                         * MTP Specification v1.0
+                         */
+			header->length = (count > MTP_MAX_FILE_SIZE) ?
+				MTP_MAX_FILE_SIZE : __cpu_to_le32(count);
+			header->type = __cpu_to_le16(2); /* data packet */
+			header->command = __cpu_to_le16(dev->xfer_command);
+			header->transaction_id =
+					__cpu_to_le32(dev->xfer_transaction_id);
+		}
+
+		ret = vfs_read(filp, req->buf + hdr_size, xfer - hdr_size,
+								&offset);
+		if (ret < 0) {
+			r = ret;
+			break;
+		}
+		xfer = ret + hdr_size;
+		hdr_size = 0;
+
+		req->length = xfer;
+		ret = usb_ep_queue(dev->ep_in, req, GFP_KERNEL);
+		if (ret < 0) {
+			DBG(cdev, "send_file_work: xfer error %d\n", ret);
+			dev->state = STATE_ERROR;
+			r = -EIO;
+			break;
+		}
+
+		count -= xfer;
+
+		/* zero this so we don't try to free it on error exit */
+		req = 0;
+	}
+
+	if (req)
+		mtp_req_put(dev, &dev->tx_idle, req);
+
+	DBG(cdev, "send_file_work returning %d\n", r);
+	/* write the result */
+	dev->xfer_result = r;
+	smp_wmb();
+}
+
+/* read from USB and write to a local file */
+static void receive_file_work(struct work_struct *data)
+{
+	struct mtp_dev *dev = container_of(data, struct mtp_dev,
+						receive_file_work);
+	struct usb_composite_dev *cdev = dev->cdev;
+	struct usb_request *read_req = NULL, *write_req = NULL;
+	struct file *filp;
+	loff_t offset;
+	int64_t count;
+	int ret, cur_buf = 0;
+	int r = 0;
+
+	/* read our parameters */
+	smp_rmb();
+	filp = dev->xfer_file;
+	offset = dev->xfer_file_offset;
+	count = dev->xfer_file_length;
+
+	DBG(cdev, "receive_file_work(%lld)\n", count);
+
+	while (count > 0 || write_req) {
+		if (count > 0) {
+			/* queue a request */
+			read_req = dev->rx_req[cur_buf];
+			cur_buf = (cur_buf + 1) % RX_REQ_MAX;
+
+			read_req->length = (count > MTP_BULK_BUFFER_SIZE
+					? MTP_BULK_BUFFER_SIZE : count);
+			dev->rx_done = 0;
+			ret = usb_ep_queue(dev->ep_out, read_req, GFP_KERNEL);
+			if (ret < 0) {
+				r = -EIO;
+				dev->state = STATE_ERROR;
+				break;
+			}
+		}
+
+		if (write_req) {
+			DBG(cdev, "rx %p %d\n", write_req, write_req->actual);
+			ret = vfs_write(filp, write_req->buf, write_req->actual,
+				&offset);
+			DBG(cdev, "vfs_write %d\n", ret);
+			if (ret != write_req->actual) {
+				r = -EIO;
+				dev->state = STATE_ERROR;
+				break;
+			}
+			write_req = NULL;
+		}
+
+		if (read_req) {
+			/* wait for our last read to complete */
+			ret = wait_event_interruptible(dev->read_wq,
+				dev->rx_done || dev->state != STATE_BUSY);
+			if (dev->state == STATE_CANCELED) {
+				r = -ECANCELED;
+				if (!dev->rx_done)
+					usb_ep_dequeue(dev->ep_out, read_req);
+				break;
+			}
+			if (read_req->status) {
+				r = read_req->status;
+				break;
+			}
+			/* if xfer_file_length is 0xFFFFFFFF, then we read until
+			 * we get a zero length packet
+			 */
+			if (count != 0xFFFFFFFF)
+				count -= read_req->actual;
+			if (read_req->actual < read_req->length) {
+				/*
+				 * short packet is used to signal EOF for
+				 * sizes > 4 gig
+				 */
+				DBG(cdev, "got short packet\n");
+				count = 0;
+			}
+
+			write_req = read_req;
+			read_req = NULL;
+		}
+	}
+
+	DBG(cdev, "receive_file_work returning %d\n", r);
+	/* write the result */
+	dev->xfer_result = r;
+	smp_wmb();
+}
+
+static int mtp_send_event(struct mtp_dev *dev, struct mtp_event *event)
+{
+	struct usb_request *req = NULL;
+	int ret;
+	int length = event->length;
+
+	DBG(dev->cdev, "mtp_send_event(%zu)\n", event->length);
+
+	if (length < 0 || length > INTR_BUFFER_SIZE)
+		return -EINVAL;
+	if (dev->state == STATE_OFFLINE)
+		return -ENODEV;
+
+	ret = wait_event_interruptible_timeout(dev->intr_wq,
+			(req = mtp_req_get(dev, &dev->intr_idle)),
+			msecs_to_jiffies(1000));
+	if (!req)
+		return -ETIME;
+
+	if (copy_from_user(req->buf, (void __user *)event->data, length)) {
+		mtp_req_put(dev, &dev->intr_idle, req);
+		return -EFAULT;
+	}
+	req->length = length;
+	ret = usb_ep_queue(dev->ep_intr, req, GFP_KERNEL);
+	if (ret)
+		mtp_req_put(dev, &dev->intr_idle, req);
+
+	return ret;
+}
+
+static long mtp_ioctl(struct file *fp, unsigned code, unsigned long value)
+{
+	struct mtp_dev *dev = fp->private_data;
+	struct file *filp = NULL;
+	int ret = -EINVAL;
+
+	if (mtp_lock(&dev->ioctl_excl))
+		return -EBUSY;
+
+	switch (code) {
+	case MTP_SEND_FILE:
+	case MTP_RECEIVE_FILE:
+	case MTP_SEND_FILE_WITH_HEADER:
+	{
+		struct mtp_file_range	mfr;
+		struct work_struct *work;
+
+		spin_lock_irq(&dev->lock);
+		if (dev->state == STATE_CANCELED) {
+			/* report cancelation to userspace */
+			dev->state = STATE_READY;
+			spin_unlock_irq(&dev->lock);
+			ret = -ECANCELED;
+			goto out;
+		}
+		if (dev->state == STATE_OFFLINE) {
+			spin_unlock_irq(&dev->lock);
+			ret = -ENODEV;
+			goto out;
+		}
+		dev->state = STATE_BUSY;
+		spin_unlock_irq(&dev->lock);
+
+		if (copy_from_user(&mfr, (void __user *)value, sizeof(mfr))) {
+			ret = -EFAULT;
+			goto fail;
+		}
+		/* hold a reference to the file while we are working with it */
+		filp = fget(mfr.fd);
+		if (!filp) {
+			ret = -EBADF;
+			goto fail;
+		}
+
+		/* write the parameters */
+		dev->xfer_file = filp;
+		dev->xfer_file_offset = mfr.offset;
+		dev->xfer_file_length = mfr.length;
+		smp_wmb();
+
+		if (code == MTP_SEND_FILE_WITH_HEADER) {
+			work = &dev->send_file_work;
+			dev->xfer_send_header = 1;
+			dev->xfer_command = mfr.command;
+			dev->xfer_transaction_id = mfr.transaction_id;
+		} else if (code == MTP_SEND_FILE) {
+			work = &dev->send_file_work;
+			dev->xfer_send_header = 0;
+		} else {
+			work = &dev->receive_file_work;
+		}
+
+		/* We do the file transfer on a work queue so it will run
+		 * in kernel context, which is necessary for vfs_read and
+		 * vfs_write to use our buffers in the kernel address space.
+		 */
+		queue_work(dev->wq, work);
+		/* wait for operation to complete */
+		flush_workqueue(dev->wq);
+		fput(filp);
+
+		/* read the result */
+		smp_rmb();
+		ret = dev->xfer_result;
+		break;
+	}
+	case MTP_SEND_EVENT:
+	{
+		struct mtp_event	event;
+		/* return here so we don't change dev->state below,
+		 * which would interfere with bulk transfer state.
+		 */
+		if (copy_from_user(&event, (void __user *)value, sizeof(event)))
+			ret = -EFAULT;
+		else
+			ret = mtp_send_event(dev, &event);
+		goto out;
+	}
+	}
+
+fail:
+	spin_lock_irq(&dev->lock);
+	if (dev->state == STATE_CANCELED)
+		ret = -ECANCELED;
+	else if (dev->state != STATE_OFFLINE)
+		dev->state = STATE_READY;
+	spin_unlock_irq(&dev->lock);
+out:
+	mtp_unlock(&dev->ioctl_excl);
+	DBG(dev->cdev, "ioctl returning %d\n", ret);
+	return ret;
+}
+
+static int mtp_open(struct inode *ip, struct file *fp)
+{
+	printk(KERN_INFO "mtp_open\n");
+	if (mtp_lock(&_mtp_dev->open_excl))
+		return -EBUSY;
+
+	/* clear any error condition */
+	if (_mtp_dev->state != STATE_OFFLINE)
+		_mtp_dev->state = STATE_READY;
+
+	fp->private_data = _mtp_dev;
+	return 0;
+}
+
+static int mtp_release(struct inode *ip, struct file *fp)
+{
+	printk(KERN_INFO "mtp_release\n");
+
+	mtp_unlock(&_mtp_dev->open_excl);
+	return 0;
+}
+
+/* file operations for /dev/mtp_usb */
+static const struct file_operations mtp_fops = {
+	.owner = THIS_MODULE,
+	.read = mtp_read,
+	.write = mtp_write,
+	.unlocked_ioctl = mtp_ioctl,
+	.open = mtp_open,
+	.release = mtp_release,
+};
+
+static struct miscdevice mtp_device = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = mtp_shortname,
+	.fops = &mtp_fops,
+};
+
+static int mtp_ctrlrequest(struct usb_composite_dev *cdev,
+				const struct usb_ctrlrequest *ctrl)
+{
+	struct mtp_dev *dev = _mtp_dev;
+	int	value = -EOPNOTSUPP;
+	u16	w_index = le16_to_cpu(ctrl->wIndex);
+	u16	w_value = le16_to_cpu(ctrl->wValue);
+	u16	w_length = le16_to_cpu(ctrl->wLength);
+	unsigned long	flags;
+
+	VDBG(cdev, "mtp_ctrlrequest "
+			"%02x.%02x v%04x i%04x l%u\n",
+			ctrl->bRequestType, ctrl->bRequest,
+			w_value, w_index, w_length);
+
+	/* Handle MTP OS string */
+	if (ctrl->bRequestType ==
+			(USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE)
+			&& ctrl->bRequest == USB_REQ_GET_DESCRIPTOR
+			&& (w_value >> 8) == USB_DT_STRING
+			&& (w_value & 0xFF) == MTP_OS_STRING_ID) {
+		value = (w_length < sizeof(mtp_os_string)
+				? w_length : sizeof(mtp_os_string));
+		memcpy(cdev->req->buf, mtp_os_string, value);
+	} else if ((ctrl->bRequestType & USB_TYPE_MASK) == USB_TYPE_VENDOR) {
+		/* Handle MTP OS descriptor */
+		DBG(cdev, "vendor request: %d index: %d value: %d length: %d\n",
+			ctrl->bRequest, w_index, w_value, w_length);
+
+		if (ctrl->bRequest == 1
+				&& (ctrl->bRequestType & USB_DIR_IN)
+				&& (w_index == 4 || w_index == 5)) {
+			value = (w_length < sizeof(mtp_ext_config_desc) ?
+					w_length : sizeof(mtp_ext_config_desc));
+			memcpy(cdev->req->buf, &mtp_ext_config_desc, value);
+		}
+	} else if ((ctrl->bRequestType & USB_TYPE_MASK) == USB_TYPE_CLASS) {
+		DBG(cdev, "class request: %d index: %d value: %d length: %d\n",
+			ctrl->bRequest, w_index, w_value, w_length);
+
+		if (ctrl->bRequest == MTP_REQ_CANCEL && w_index == 0
+				&& w_value == 0) {
+			DBG(cdev, "MTP_REQ_CANCEL\n");
+
+			spin_lock_irqsave(&dev->lock, flags);
+			if (dev->state == STATE_BUSY) {
+				dev->state = STATE_CANCELED;
+				wake_up(&dev->read_wq);
+				wake_up(&dev->write_wq);
+			}
+			spin_unlock_irqrestore(&dev->lock, flags);
+
+			/* We need to queue a request to read the remaining
+			 *  bytes, but we don't actually need to look at
+			 * the contents.
+			 */
+			value = w_length;
+		} else if (ctrl->bRequest == MTP_REQ_GET_DEVICE_STATUS
+				&& w_index == 0 && w_value == 0) {
+			struct mtp_device_status *status = cdev->req->buf;
+
+			status->wLength =
+				__constant_cpu_to_le16(sizeof(*status));
+
+			DBG(cdev, "MTP_REQ_GET_DEVICE_STATUS\n");
+			spin_lock_irqsave(&dev->lock, flags);
+			/* device status is "busy" until we report
+			 * the cancelation to userspace
+			 */
+			if (dev->state == STATE_CANCELED)
+				status->wCode =
+					__cpu_to_le16(MTP_RESPONSE_DEVICE_BUSY);
+			else
+				status->wCode =
+					__cpu_to_le16(MTP_RESPONSE_OK);
+			spin_unlock_irqrestore(&dev->lock, flags);
+			value = sizeof(*status);
+		}
+	}
+
+	/* respond with data transfer or status phase? */
+	if (value >= 0) {
+		int rc;
+
+		cdev->req->zero = value < w_length;
+		cdev->req->length = value;
+		rc = usb_ep_queue(cdev->gadget->ep0, cdev->req, GFP_ATOMIC);
+		if (rc < 0)
+			ERROR(cdev, "%s: response queue error\n", __func__);
+	}
+	return value;
+}
+
+static int
+mtp_function_bind(struct usb_configuration *c, struct usb_function *f)
+{
+	struct usb_composite_dev *cdev = c->cdev;
+	struct mtp_dev	*dev = func_to_mtp(f);
+	int			id;
+	int			ret;
+	struct mtp_instance *fi_mtp;
+
+	dev->cdev = cdev;
+	DBG(cdev, "mtp_function_bind dev: %p\n", dev);
+
+	/* allocate interface ID(s) */
+	id = usb_interface_id(c, f);
+	if (id < 0)
+		return id;
+	mtp_interface_desc.bInterfaceNumber = id;
+
+	if (mtp_string_defs[INTERFACE_STRING_INDEX].id == 0) {
+		ret = usb_string_id(c->cdev);
+		if (ret < 0)
+			return ret;
+		mtp_string_defs[INTERFACE_STRING_INDEX].id = ret;
+		mtp_interface_desc.iInterface = ret;
+	}
+
+	fi_mtp = container_of(f->fi, struct mtp_instance, func_inst);
+
+	if (cdev->use_os_string) {
+		f->os_desc_table = kzalloc(sizeof(*f->os_desc_table),
+					GFP_KERNEL);
+		if (!f->os_desc_table)
+			return -ENOMEM;
+		f->os_desc_n = 1;
+		f->os_desc_table[0].os_desc = &fi_mtp->mtp_os_desc;
+	}
+
+	/* allocate endpoints */
+	ret = mtp_create_bulk_endpoints(dev, &mtp_fullspeed_in_desc,
+			&mtp_fullspeed_out_desc, &mtp_intr_desc);
+	if (ret)
+		return ret;
+
+	/* support high speed hardware */
+	if (gadget_is_dualspeed(c->cdev->gadget)) {
+		mtp_highspeed_in_desc.bEndpointAddress =
+			mtp_fullspeed_in_desc.bEndpointAddress;
+		mtp_highspeed_out_desc.bEndpointAddress =
+			mtp_fullspeed_out_desc.bEndpointAddress;
+	}
+	/* support super speed hardware */
+	if (gadget_is_superspeed(c->cdev->gadget)) {
+		unsigned max_burst;
+
+		/* Calculate bMaxBurst, we know packet size is 1024 */
+		max_burst = min_t(unsigned, MTP_BULK_BUFFER_SIZE / 1024, 15);
+		mtp_ss_in_desc.bEndpointAddress =
+			mtp_fullspeed_in_desc.bEndpointAddress;
+		mtp_ss_in_comp_desc.bMaxBurst = max_burst;
+		mtp_ss_out_desc.bEndpointAddress =
+			mtp_fullspeed_out_desc.bEndpointAddress;
+		mtp_ss_out_comp_desc.bMaxBurst = max_burst;
+	}
+
+	DBG(cdev, "%s speed %s: IN/%s, OUT/%s\n",
+		gadget_is_superspeed(c->cdev->gadget) ? "super" :
+		(gadget_is_dualspeed(c->cdev->gadget) ? "dual" : "full"),
+		f->name, dev->ep_in->name, dev->ep_out->name);
+	return 0;
+}
+
+static void
+mtp_function_unbind(struct usb_configuration *c, struct usb_function *f)
+{
+	struct mtp_dev	*dev = func_to_mtp(f);
+	struct usb_request *req;
+	int i;
+
+	mtp_string_defs[INTERFACE_STRING_INDEX].id = 0;
+	while ((req = mtp_req_get(dev, &dev->tx_idle)))
+		mtp_request_free(req, dev->ep_in);
+	for (i = 0; i < RX_REQ_MAX; i++)
+		mtp_request_free(dev->rx_req[i], dev->ep_out);
+	while ((req = mtp_req_get(dev, &dev->intr_idle)))
+		mtp_request_free(req, dev->ep_intr);
+	dev->state = STATE_OFFLINE;
+	kfree(f->os_desc_table);
+	f->os_desc_n = 0;
+}
+
+static int mtp_function_set_alt(struct usb_function *f,
+		unsigned intf, unsigned alt)
+{
+	struct mtp_dev	*dev = func_to_mtp(f);
+	struct usb_composite_dev *cdev = f->config->cdev;
+	int ret;
+
+	DBG(cdev, "mtp_function_set_alt intf: %d alt: %d\n", intf, alt);
+
+	ret = config_ep_by_speed(cdev->gadget, f, dev->ep_in);
+	if (ret)
+		return ret;
+
+	ret = usb_ep_enable(dev->ep_in);
+	if (ret)
+		return ret;
+
+	ret = config_ep_by_speed(cdev->gadget, f, dev->ep_out);
+	if (ret)
+		return ret;
+
+	ret = usb_ep_enable(dev->ep_out);
+	if (ret) {
+		usb_ep_disable(dev->ep_in);
+		return ret;
+	}
+
+	ret = config_ep_by_speed(cdev->gadget, f, dev->ep_intr);
+	if (ret)
+		return ret;
+
+	ret = usb_ep_enable(dev->ep_intr);
+	if (ret) {
+		usb_ep_disable(dev->ep_out);
+		usb_ep_disable(dev->ep_in);
+		return ret;
+	}
+	dev->state = STATE_READY;
+
+	/* readers may be blocked waiting for us to go online */
+	wake_up(&dev->read_wq);
+	return 0;
+}
+
+static void mtp_function_disable(struct usb_function *f)
+{
+	struct mtp_dev	*dev = func_to_mtp(f);
+	struct usb_composite_dev	*cdev = dev->cdev;
+
+	DBG(cdev, "mtp_function_disable\n");
+	dev->state = STATE_OFFLINE;
+	usb_ep_disable(dev->ep_in);
+	usb_ep_disable(dev->ep_out);
+	usb_ep_disable(dev->ep_intr);
+
+	/* readers may be blocked waiting for us to go online */
+	wake_up(&dev->read_wq);
+
+	VDBG(cdev, "%s disabled\n", dev->function.name);
+}
+
+static int __mtp_setup(struct mtp_instance *fi_mtp)
+{
+	struct mtp_dev *dev;
+	int ret;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+
+	if (fi_mtp != NULL)
+		fi_mtp->dev = dev;
+
+	if (!dev)
+		return -ENOMEM;
+
+	spin_lock_init(&dev->lock);
+	init_waitqueue_head(&dev->read_wq);
+	init_waitqueue_head(&dev->write_wq);
+	init_waitqueue_head(&dev->intr_wq);
+	atomic_set(&dev->open_excl, 0);
+	atomic_set(&dev->ioctl_excl, 0);
+	INIT_LIST_HEAD(&dev->tx_idle);
+	INIT_LIST_HEAD(&dev->intr_idle);
+
+	dev->wq = create_singlethread_workqueue("f_mtp");
+	if (!dev->wq) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+	INIT_WORK(&dev->send_file_work, send_file_work);
+	INIT_WORK(&dev->receive_file_work, receive_file_work);
+
+	_mtp_dev = dev;
+
+	ret = misc_register(&mtp_device);
+	if (ret)
+		goto err2;
+
+	return 0;
+
+err2:
+	destroy_workqueue(dev->wq);
+err1:
+	_mtp_dev = NULL;
+	kfree(dev);
+	printk(KERN_ERR "mtp gadget driver failed to initialize\n");
+	return ret;
+}
+
+static int mtp_setup_configfs(struct mtp_instance *fi_mtp)
+{
+	return __mtp_setup(fi_mtp);
+}
+
+
+static void mtp_cleanup(void)
+{
+	struct mtp_dev *dev = _mtp_dev;
+
+	if (!dev)
+		return;
+
+	misc_deregister(&mtp_device);
+	destroy_workqueue(dev->wq);
+	_mtp_dev = NULL;
+	kfree(dev);
+}
+
+static struct mtp_instance *to_mtp_instance(struct config_item *item)
+{
+	return container_of(to_config_group(item), struct mtp_instance,
+		func_inst.group);
+}
+
+static void mtp_attr_release(struct config_item *item)
+{
+	struct mtp_instance *fi_mtp = to_mtp_instance(item);
+
+	usb_put_function_instance(&fi_mtp->func_inst);
+}
+
+static struct configfs_item_operations mtp_item_ops = {
+	.release        = mtp_attr_release,
+};
+
+static struct config_item_type mtp_func_type = {
+	.ct_item_ops    = &mtp_item_ops,
+	.ct_owner       = THIS_MODULE,
+};
+
+
+static struct mtp_instance *to_fi_mtp(struct usb_function_instance *fi)
+{
+	return container_of(fi, struct mtp_instance, func_inst);
+}
+
+static int mtp_set_inst_name(struct usb_function_instance *fi, const char *name)
+{
+	struct mtp_instance *fi_mtp;
+	char *ptr;
+	int name_len;
+
+	name_len = strlen(name) + 1;
+	if (name_len > MAX_INST_NAME_LEN)
+		return -ENAMETOOLONG;
+
+	ptr = kstrndup(name, name_len, GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	fi_mtp = to_fi_mtp(fi);
+	fi_mtp->name = ptr;
+
+	return 0;
+}
+
+static void mtp_free_inst(struct usb_function_instance *fi)
+{
+	struct mtp_instance *fi_mtp;
+
+	fi_mtp = to_fi_mtp(fi);
+	kfree(fi_mtp->name);
+	mtp_cleanup();
+	kfree(fi_mtp);
+}
+
+struct usb_function_instance *alloc_inst_mtp_ptp(bool mtp_config)
+{
+	struct mtp_instance *fi_mtp;
+	int ret = 0;
+	struct usb_os_desc *descs[1];
+	char *names[1];
+
+	fi_mtp = kzalloc(sizeof(*fi_mtp), GFP_KERNEL);
+	if (!fi_mtp)
+		return ERR_PTR(-ENOMEM);
+	fi_mtp->func_inst.set_inst_name = mtp_set_inst_name;
+	fi_mtp->func_inst.free_func_inst = mtp_free_inst;
+
+	fi_mtp->mtp_os_desc.ext_compat_id = fi_mtp->mtp_ext_compat_id;
+	INIT_LIST_HEAD(&fi_mtp->mtp_os_desc.ext_prop);
+	descs[0] = &fi_mtp->mtp_os_desc;
+	names[0] = "MTP";
+
+	if (mtp_config) {
+		ret = mtp_setup_configfs(fi_mtp);
+		if (ret) {
+			kfree(fi_mtp);
+			pr_err("Error setting MTP\n");
+			return ERR_PTR(ret);
+		}
+	} else
+		fi_mtp->dev = _mtp_dev;
+
+	config_group_init_type_name(&fi_mtp->func_inst.group,
+					"", &mtp_func_type);
+	usb_os_desc_prepare_interf_dir(&fi_mtp->func_inst.group, 1,
+					descs, names, THIS_MODULE);
+
+	return  &fi_mtp->func_inst;
+}
+EXPORT_SYMBOL_GPL(alloc_inst_mtp_ptp);
+
+static struct usb_function_instance *mtp_alloc_inst(void)
+{
+		return alloc_inst_mtp_ptp(true);
+}
+
+static int mtp_ctrlreq_configfs(struct usb_function *f,
+				const struct usb_ctrlrequest *ctrl)
+{
+	return mtp_ctrlrequest(f->config->cdev, ctrl);
+}
+
+static void mtp_free(struct usb_function *f)
+{
+	/*NO-OP: no function specific resource allocation in mtp_alloc*/
+}
+
+struct usb_function *function_alloc_mtp_ptp(struct usb_function_instance *fi,
+					bool mtp_config)
+{
+	struct mtp_instance *fi_mtp = to_fi_mtp(fi);
+	struct mtp_dev *dev;
+
+	/*
+	 * PTP piggybacks on MTP function so make sure we have
+	 * created MTP function before we associate this PTP
+	 * function with a gadget configuration.
+	 */
+	if (fi_mtp->dev == NULL) {
+		pr_err("Error: Create MTP function before linking"
+				" PTP function with a gadget configuration\n");
+		pr_err("\t1: Delete existing PTP function if any\n");
+		pr_err("\t2: Create MTP function\n");
+		pr_err("\t3: Create and symlink PTP function"
+				" with a gadget configuration\n");
+		return ERR_PTR(-EINVAL); /* Invalid Configuration */
+	}
+
+	dev = fi_mtp->dev;
+	dev->function.name = DRIVER_NAME;
+	dev->function.strings = mtp_strings;
+	if (mtp_config) {
+		dev->function.fs_descriptors = fs_mtp_descs;
+		dev->function.hs_descriptors = hs_mtp_descs;
+		dev->function.ss_descriptors = ss_mtp_descs;
+	} else {
+		dev->function.fs_descriptors = fs_ptp_descs;
+		dev->function.hs_descriptors = hs_ptp_descs;
+		dev->function.ss_descriptors = ss_ptp_descs;
+	}
+	dev->function.bind = mtp_function_bind;
+	dev->function.unbind = mtp_function_unbind;
+	dev->function.set_alt = mtp_function_set_alt;
+	dev->function.disable = mtp_function_disable;
+	dev->function.setup = mtp_ctrlreq_configfs;
+	dev->function.free_func = mtp_free;
+
+	return &dev->function;
+}
+EXPORT_SYMBOL_GPL(function_alloc_mtp_ptp);
+
+static struct usb_function *mtp_alloc(struct usb_function_instance *fi)
+{
+	return function_alloc_mtp_ptp(fi, true);
+}
+
+DECLARE_USB_FUNCTION_INIT(mtp, mtp_alloc_inst, mtp_alloc);
+MODULE_LICENSE("GPL");
diff --git a/drivers/usb/gadget/function/f_mtp.h b/drivers/usb/gadget/function/f_mtp.h
new file mode 100644
index 0000000..7adb1ff
--- /dev/null
+++ b/drivers/usb/gadget/function/f_mtp.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2014 Google, Inc.
+ * Author: Badhri Jagan Sridharan <badhri@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+extern struct usb_function_instance *alloc_inst_mtp_ptp(bool mtp_config);
+extern struct usb_function *function_alloc_mtp_ptp(
+			struct usb_function_instance *fi, bool mtp_config);
diff --git a/drivers/usb/gadget/function/f_ptp.c b/drivers/usb/gadget/function/f_ptp.c
new file mode 100644
index 0000000..da3e4d5
--- /dev/null
+++ b/drivers/usb/gadget/function/f_ptp.c
@@ -0,0 +1,38 @@
+/*
+ * Gadget Function Driver for PTP
+ *
+ * Copyright (C) 2014 Google, Inc.
+ * Author: Badhri Jagan Sridharan <badhri@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+
+#include <linux/configfs.h>
+#include <linux/usb/composite.h>
+
+#include "f_mtp.h"
+
+static struct usb_function_instance *ptp_alloc_inst(void)
+{
+	return alloc_inst_mtp_ptp(false);
+}
+
+static struct usb_function *ptp_alloc(struct usb_function_instance *fi)
+{
+	return function_alloc_mtp_ptp(fi, false);
+}
+
+DECLARE_USB_FUNCTION_INIT(ptp, ptp_alloc_inst, ptp_alloc);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Badhri Jagan Sridharan");
diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig
index 19ce615..78c9658 100644
--- a/drivers/usb/phy/Kconfig
+++ b/drivers/usb/phy/Kconfig
@@ -6,6 +6,14 @@
 config USB_PHY
 	def_bool n
 
+config USB_OTG_WAKELOCK
+	bool "Hold a wakelock when USB connected"
+	depends on PM_WAKELOCKS
+	select USB_OTG_UTILS
+	help
+	  Select this to automatically hold a wakelock when USB is
+	  connected, preventing suspend.
+
 #
 # USB Transceiver Drivers
 #
@@ -209,4 +217,13 @@
 	  Provides read/write operations to the ULPI phy register set for
 	  controllers with a viewport register (e.g. Chipidea/ARC controllers).
 
+config DUAL_ROLE_USB_INTF
+	bool "Generic DUAL ROLE sysfs interface"
+	depends on SYSFS && USB_PHY
+	help
+	  A generic sysfs interface to track and change the state of
+	  dual role usb phys. The usb phy drivers can register to
+	  this interface to expose it capabilities to the userspace
+	  and thereby allowing userspace to change the port mode.
+
 endmenu
diff --git a/drivers/usb/phy/Makefile b/drivers/usb/phy/Makefile
index b433e5d..f65ac3e 100644
--- a/drivers/usb/phy/Makefile
+++ b/drivers/usb/phy/Makefile
@@ -3,6 +3,8 @@
 #
 obj-$(CONFIG_USB_PHY)			+= phy.o
 obj-$(CONFIG_OF)			+= of.o
+obj-$(CONFIG_USB_OTG_WAKELOCK)		+= otg-wakelock.o
+obj-$(CONFIG_DUAL_ROLE_USB_INTF)	+= class-dual-role.o
 
 # transceiver drivers, keep the list sorted
 
diff --git a/drivers/usb/phy/class-dual-role.c b/drivers/usb/phy/class-dual-role.c
new file mode 100644
index 0000000..51fcb54
--- /dev/null
+++ b/drivers/usb/phy/class-dual-role.c
@@ -0,0 +1,529 @@
+/*
+ * class-dual-role.c
+ *
+ * Copyright (C) 2015 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/ctype.h>
+#include <linux/device.h>
+#include <linux/usb/class-dual-role.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
+#include <linux/types.h>
+
+#define DUAL_ROLE_NOTIFICATION_TIMEOUT 2000
+
+static ssize_t dual_role_store_property(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count);
+static ssize_t dual_role_show_property(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf);
+
+#define DUAL_ROLE_ATTR(_name)				\
+{							\
+	.attr = { .name = #_name },			\
+	.show = dual_role_show_property,		\
+	.store = dual_role_store_property,		\
+}
+
+static struct device_attribute dual_role_attrs[] = {
+	DUAL_ROLE_ATTR(supported_modes),
+	DUAL_ROLE_ATTR(mode),
+	DUAL_ROLE_ATTR(power_role),
+	DUAL_ROLE_ATTR(data_role),
+	DUAL_ROLE_ATTR(powers_vconn),
+};
+
+struct class *dual_role_class;
+EXPORT_SYMBOL_GPL(dual_role_class);
+
+static struct device_type dual_role_dev_type;
+
+static char *kstrdupcase(const char *str, gfp_t gfp, bool to_upper)
+{
+	char *ret, *ustr;
+
+	ustr = ret = kmalloc(strlen(str) + 1, gfp);
+
+	if (!ret)
+		return NULL;
+
+	while (*str)
+		*ustr++ = to_upper ? toupper(*str++) : tolower(*str++);
+
+	*ustr = 0;
+
+	return ret;
+}
+
+static void dual_role_changed_work(struct work_struct *work)
+{
+	struct dual_role_phy_instance *dual_role =
+	    container_of(work, struct dual_role_phy_instance,
+			 changed_work);
+
+	dev_dbg(&dual_role->dev, "%s\n", __func__);
+	kobject_uevent(&dual_role->dev.kobj, KOBJ_CHANGE);
+}
+
+void dual_role_instance_changed(struct dual_role_phy_instance *dual_role)
+{
+	dev_dbg(&dual_role->dev, "%s\n", __func__);
+	pm_wakeup_event(&dual_role->dev, DUAL_ROLE_NOTIFICATION_TIMEOUT);
+	schedule_work(&dual_role->changed_work);
+}
+EXPORT_SYMBOL_GPL(dual_role_instance_changed);
+
+int dual_role_get_property(struct dual_role_phy_instance *dual_role,
+			   enum dual_role_property prop,
+			   unsigned int *val)
+{
+	return dual_role->desc->get_property(dual_role, prop, val);
+}
+EXPORT_SYMBOL_GPL(dual_role_get_property);
+
+int dual_role_set_property(struct dual_role_phy_instance *dual_role,
+			   enum dual_role_property prop,
+			   const unsigned int *val)
+{
+	if (!dual_role->desc->set_property)
+		return -ENODEV;
+
+	return dual_role->desc->set_property(dual_role, prop, val);
+}
+EXPORT_SYMBOL_GPL(dual_role_set_property);
+
+int dual_role_property_is_writeable(struct dual_role_phy_instance *dual_role,
+				    enum dual_role_property prop)
+{
+	if (!dual_role->desc->property_is_writeable)
+		return -ENODEV;
+
+	return dual_role->desc->property_is_writeable(dual_role, prop);
+}
+EXPORT_SYMBOL_GPL(dual_role_property_is_writeable);
+
+static void dual_role_dev_release(struct device *dev)
+{
+	struct dual_role_phy_instance *dual_role =
+	    container_of(dev, struct dual_role_phy_instance, dev);
+	pr_debug("device: '%s': %s\n", dev_name(dev), __func__);
+	kfree(dual_role);
+}
+
+static struct dual_role_phy_instance *__must_check
+__dual_role_register(struct device *parent,
+		     const struct dual_role_phy_desc *desc)
+{
+	struct device *dev;
+	struct dual_role_phy_instance *dual_role;
+	int rc;
+
+	dual_role = kzalloc(sizeof(*dual_role), GFP_KERNEL);
+	if (!dual_role)
+		return ERR_PTR(-ENOMEM);
+
+	dev = &dual_role->dev;
+
+	device_initialize(dev);
+
+	dev->class = dual_role_class;
+	dev->type = &dual_role_dev_type;
+	dev->parent = parent;
+	dev->release = dual_role_dev_release;
+	dev_set_drvdata(dev, dual_role);
+	dual_role->desc = desc;
+
+	rc = dev_set_name(dev, "%s", desc->name);
+	if (rc)
+		goto dev_set_name_failed;
+
+	INIT_WORK(&dual_role->changed_work, dual_role_changed_work);
+
+	rc = device_init_wakeup(dev, true);
+	if (rc)
+		goto wakeup_init_failed;
+
+	rc = device_add(dev);
+	if (rc)
+		goto device_add_failed;
+
+	dual_role_instance_changed(dual_role);
+
+	return dual_role;
+
+device_add_failed:
+	device_init_wakeup(dev, false);
+wakeup_init_failed:
+dev_set_name_failed:
+	put_device(dev);
+	kfree(dual_role);
+
+	return ERR_PTR(rc);
+}
+
+static void dual_role_instance_unregister(struct dual_role_phy_instance
+					  *dual_role)
+{
+	cancel_work_sync(&dual_role->changed_work);
+	device_init_wakeup(&dual_role->dev, false);
+	device_unregister(&dual_role->dev);
+}
+
+static void devm_dual_role_release(struct device *dev, void *res)
+{
+	struct dual_role_phy_instance **dual_role = res;
+
+	dual_role_instance_unregister(*dual_role);
+}
+
+struct dual_role_phy_instance *__must_check
+devm_dual_role_instance_register(struct device *parent,
+				 const struct dual_role_phy_desc *desc)
+{
+	struct dual_role_phy_instance **ptr, *dual_role;
+
+	ptr = devres_alloc(devm_dual_role_release, sizeof(*ptr), GFP_KERNEL);
+
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+	dual_role = __dual_role_register(parent, desc);
+	if (IS_ERR(dual_role)) {
+		devres_free(ptr);
+	} else {
+		*ptr = dual_role;
+		devres_add(parent, ptr);
+	}
+	return dual_role;
+}
+EXPORT_SYMBOL_GPL(devm_dual_role_instance_register);
+
+static int devm_dual_role_match(struct device *dev, void *res, void *data)
+{
+	struct dual_role_phy_instance **r = res;
+
+	if (WARN_ON(!r || !*r))
+		return 0;
+
+	return *r == data;
+}
+
+void devm_dual_role_instance_unregister(struct device *dev,
+					struct dual_role_phy_instance
+					*dual_role)
+{
+	int rc;
+
+	rc = devres_release(dev, devm_dual_role_release,
+			    devm_dual_role_match, dual_role);
+	WARN_ON(rc);
+}
+EXPORT_SYMBOL_GPL(devm_dual_role_instance_unregister);
+
+void *dual_role_get_drvdata(struct dual_role_phy_instance *dual_role)
+{
+	return dual_role->drv_data;
+}
+EXPORT_SYMBOL_GPL(dual_role_get_drvdata);
+
+/***************** Device attribute functions **************************/
+
+/* port type */
+static char *supported_modes_text[] = {
+	"ufp dfp", "dfp", "ufp"
+};
+
+/* current mode */
+static char *mode_text[] = {
+	"ufp", "dfp", "none"
+};
+
+/* Power role */
+static char *pr_text[] = {
+	"source", "sink", "none"
+};
+
+/* Data role */
+static char *dr_text[] = {
+	"host", "device", "none"
+};
+
+/* Vconn supply */
+static char *vconn_supply_text[] = {
+	"n", "y"
+};
+
+static ssize_t dual_role_show_property(struct device *dev,
+				       struct device_attribute *attr, char *buf)
+{
+	ssize_t ret = 0;
+	struct dual_role_phy_instance *dual_role = dev_get_drvdata(dev);
+	const ptrdiff_t off = attr - dual_role_attrs;
+	unsigned int value;
+
+	if (off == DUAL_ROLE_PROP_SUPPORTED_MODES) {
+		value = dual_role->desc->supported_modes;
+	} else {
+		ret = dual_role_get_property(dual_role, off, &value);
+
+		if (ret < 0) {
+			if (ret == -ENODATA)
+				dev_dbg(dev,
+					"driver has no data for `%s' property\n",
+					attr->attr.name);
+			else if (ret != -ENODEV)
+				dev_err(dev,
+					"driver failed to report `%s' property: %zd\n",
+					attr->attr.name, ret);
+			return ret;
+		}
+	}
+
+	if (off == DUAL_ROLE_PROP_SUPPORTED_MODES) {
+		BUILD_BUG_ON(DUAL_ROLE_PROP_SUPPORTED_MODES_TOTAL !=
+			ARRAY_SIZE(supported_modes_text));
+		if (value < DUAL_ROLE_PROP_SUPPORTED_MODES_TOTAL)
+			return snprintf(buf, PAGE_SIZE, "%s\n",
+					supported_modes_text[value]);
+		else
+			return -EIO;
+	} else if (off == DUAL_ROLE_PROP_MODE) {
+		BUILD_BUG_ON(DUAL_ROLE_PROP_MODE_TOTAL !=
+			ARRAY_SIZE(mode_text));
+		if (value < DUAL_ROLE_PROP_MODE_TOTAL)
+			return snprintf(buf, PAGE_SIZE, "%s\n",
+					mode_text[value]);
+		else
+			return -EIO;
+	} else if (off == DUAL_ROLE_PROP_PR) {
+		BUILD_BUG_ON(DUAL_ROLE_PROP_PR_TOTAL != ARRAY_SIZE(pr_text));
+		if (value < DUAL_ROLE_PROP_PR_TOTAL)
+			return snprintf(buf, PAGE_SIZE, "%s\n",
+					pr_text[value]);
+		else
+			return -EIO;
+	} else if (off == DUAL_ROLE_PROP_DR) {
+		BUILD_BUG_ON(DUAL_ROLE_PROP_DR_TOTAL != ARRAY_SIZE(dr_text));
+		if (value < DUAL_ROLE_PROP_DR_TOTAL)
+			return snprintf(buf, PAGE_SIZE, "%s\n",
+					dr_text[value]);
+		else
+			return -EIO;
+	} else if (off == DUAL_ROLE_PROP_VCONN_SUPPLY) {
+		BUILD_BUG_ON(DUAL_ROLE_PROP_VCONN_SUPPLY_TOTAL !=
+				ARRAY_SIZE(vconn_supply_text));
+		if (value < DUAL_ROLE_PROP_VCONN_SUPPLY_TOTAL)
+			return snprintf(buf, PAGE_SIZE, "%s\n",
+					vconn_supply_text[value]);
+		else
+			return -EIO;
+	} else
+		return -EIO;
+}
+
+static ssize_t dual_role_store_property(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	ssize_t ret;
+	struct dual_role_phy_instance *dual_role = dev_get_drvdata(dev);
+	const ptrdiff_t off = attr - dual_role_attrs;
+	unsigned int value;
+	int total, i;
+	char *dup_buf, **text_array;
+	bool result = false;
+
+	dup_buf = kstrdupcase(buf, GFP_KERNEL, false);
+	switch (off) {
+	case DUAL_ROLE_PROP_MODE:
+		total = DUAL_ROLE_PROP_MODE_TOTAL;
+		text_array = mode_text;
+		break;
+	case DUAL_ROLE_PROP_PR:
+		total = DUAL_ROLE_PROP_PR_TOTAL;
+		text_array = pr_text;
+		break;
+	case DUAL_ROLE_PROP_DR:
+		total = DUAL_ROLE_PROP_DR_TOTAL;
+		text_array = dr_text;
+		break;
+	case DUAL_ROLE_PROP_VCONN_SUPPLY:
+		ret = strtobool(dup_buf, &result);
+		value = result;
+		if (!ret)
+			goto setprop;
+	default:
+		ret = -EINVAL;
+		goto error;
+	}
+
+	for (i = 0; i <= total; i++) {
+		if (i == total) {
+			ret = -ENOTSUPP;
+			goto error;
+		}
+		if (!strncmp(*(text_array + i), dup_buf,
+			     strlen(*(text_array + i)))) {
+			value = i;
+			break;
+		}
+	}
+
+setprop:
+	ret = dual_role->desc->set_property(dual_role, off, &value);
+
+error:
+	kfree(dup_buf);
+
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static umode_t dual_role_attr_is_visible(struct kobject *kobj,
+					 struct attribute *attr, int attrno)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct dual_role_phy_instance *dual_role = dev_get_drvdata(dev);
+	umode_t mode = S_IRUSR | S_IRGRP | S_IROTH;
+	int i;
+
+	if (attrno == DUAL_ROLE_PROP_SUPPORTED_MODES)
+		return mode;
+
+	for (i = 0; i < dual_role->desc->num_properties; i++) {
+		int property = dual_role->desc->properties[i];
+
+		if (property == attrno) {
+			if (dual_role->desc->property_is_writeable &&
+			    dual_role_property_is_writeable(dual_role, property)
+			    > 0)
+				mode |= S_IWUSR;
+
+			return mode;
+		}
+	}
+
+	return 0;
+}
+
+static struct attribute *__dual_role_attrs[ARRAY_SIZE(dual_role_attrs) + 1];
+
+static struct attribute_group dual_role_attr_group = {
+	.attrs = __dual_role_attrs,
+	.is_visible = dual_role_attr_is_visible,
+};
+
+static const struct attribute_group *dual_role_attr_groups[] = {
+	&dual_role_attr_group,
+	NULL,
+};
+
+void dual_role_init_attrs(struct device_type *dev_type)
+{
+	int i;
+
+	dev_type->groups = dual_role_attr_groups;
+
+	for (i = 0; i < ARRAY_SIZE(dual_role_attrs); i++)
+		__dual_role_attrs[i] = &dual_role_attrs[i].attr;
+}
+
+int dual_role_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct dual_role_phy_instance *dual_role = dev_get_drvdata(dev);
+	int ret = 0, j;
+	char *prop_buf;
+	char *attrname;
+
+	dev_dbg(dev, "uevent\n");
+
+	if (!dual_role || !dual_role->desc) {
+		dev_dbg(dev, "No dual_role phy yet\n");
+		return ret;
+	}
+
+	dev_dbg(dev, "DUAL_ROLE_NAME=%s\n", dual_role->desc->name);
+
+	ret = add_uevent_var(env, "DUAL_ROLE_NAME=%s", dual_role->desc->name);
+	if (ret)
+		return ret;
+
+	prop_buf = (char *)get_zeroed_page(GFP_KERNEL);
+	if (!prop_buf)
+		return -ENOMEM;
+
+	for (j = 0; j < dual_role->desc->num_properties; j++) {
+		struct device_attribute *attr;
+		char *line;
+
+		attr = &dual_role_attrs[dual_role->desc->properties[j]];
+
+		ret = dual_role_show_property(dev, attr, prop_buf);
+		if (ret == -ENODEV || ret == -ENODATA) {
+			ret = 0;
+			continue;
+		}
+
+		if (ret < 0)
+			goto out;
+		line = strnchr(prop_buf, PAGE_SIZE, '\n');
+		if (line)
+			*line = 0;
+
+		attrname = kstrdupcase(attr->attr.name, GFP_KERNEL, true);
+		if (!attrname)
+			ret = -ENOMEM;
+
+		dev_dbg(dev, "prop %s=%s\n", attrname, prop_buf);
+
+		ret = add_uevent_var(env, "DUAL_ROLE_%s=%s", attrname,
+				     prop_buf);
+		kfree(attrname);
+		if (ret)
+			goto out;
+	}
+
+out:
+	free_page((unsigned long)prop_buf);
+
+	return ret;
+}
+
+/******************* Module Init ***********************************/
+
+static int __init dual_role_class_init(void)
+{
+	dual_role_class = class_create(THIS_MODULE, "dual_role_usb");
+
+	if (IS_ERR(dual_role_class))
+		return PTR_ERR(dual_role_class);
+
+	dual_role_class->dev_uevent = dual_role_uevent;
+	dual_role_init_attrs(&dual_role_dev_type);
+
+	return 0;
+}
+
+static void __exit dual_role_class_exit(void)
+{
+	class_destroy(dual_role_class);
+}
+
+subsys_initcall(dual_role_class_init);
+module_exit(dual_role_class_exit);
diff --git a/drivers/usb/phy/otg-wakelock.c b/drivers/usb/phy/otg-wakelock.c
new file mode 100644
index 0000000..ecd7410
--- /dev/null
+++ b/drivers/usb/phy/otg-wakelock.c
@@ -0,0 +1,170 @@
+/*
+ * otg-wakelock.c
+ *
+ * Copyright (C) 2011 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/spinlock.h>
+#include <linux/usb/otg.h>
+
+#define TEMPORARY_HOLD_TIME	2000
+
+static bool enabled = true;
+static struct usb_phy *otgwl_xceiv;
+static struct notifier_block otgwl_nb;
+
+/*
+ * otgwl_spinlock is held while the VBUS lock is grabbed or dropped and the
+ * held field is updated to match.
+ */
+
+static DEFINE_SPINLOCK(otgwl_spinlock);
+
+/*
+ * Only one lock, but since these 3 fields are associated with each other...
+ */
+
+struct otgwl_lock {
+	char name[40];
+	struct wakeup_source wakesrc;
+	bool held;
+};
+
+/*
+ * VBUS present lock.  Also used as a timed lock on charger
+ * connect/disconnect and USB host disconnect, to allow the system
+ * to react to the change in power.
+ */
+
+static struct otgwl_lock vbus_lock;
+
+static void otgwl_hold(struct otgwl_lock *lock)
+{
+	if (!lock->held) {
+		__pm_stay_awake(&lock->wakesrc);
+		lock->held = true;
+	}
+}
+
+static void otgwl_temporary_hold(struct otgwl_lock *lock)
+{
+	__pm_wakeup_event(&lock->wakesrc, TEMPORARY_HOLD_TIME);
+	lock->held = false;
+}
+
+static void otgwl_drop(struct otgwl_lock *lock)
+{
+	if (lock->held) {
+		__pm_relax(&lock->wakesrc);
+		lock->held = false;
+	}
+}
+
+static void otgwl_handle_event(unsigned long event)
+{
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&otgwl_spinlock, irqflags);
+
+	if (!enabled) {
+		otgwl_drop(&vbus_lock);
+		spin_unlock_irqrestore(&otgwl_spinlock, irqflags);
+		return;
+	}
+
+	switch (event) {
+	case USB_EVENT_VBUS:
+	case USB_EVENT_ENUMERATED:
+		otgwl_hold(&vbus_lock);
+		break;
+
+	case USB_EVENT_NONE:
+	case USB_EVENT_ID:
+	case USB_EVENT_CHARGER:
+		otgwl_temporary_hold(&vbus_lock);
+		break;
+
+	default:
+		break;
+	}
+
+	spin_unlock_irqrestore(&otgwl_spinlock, irqflags);
+}
+
+static int otgwl_otg_notifications(struct notifier_block *nb,
+				   unsigned long event, void *unused)
+{
+	otgwl_handle_event(event);
+	return NOTIFY_OK;
+}
+
+static int set_enabled(const char *val, const struct kernel_param *kp)
+{
+	int rv = param_set_bool(val, kp);
+
+	if (rv)
+		return rv;
+
+	if (otgwl_xceiv)
+		otgwl_handle_event(otgwl_xceiv->last_event);
+
+	return 0;
+}
+
+static struct kernel_param_ops enabled_param_ops = {
+	.set = set_enabled,
+	.get = param_get_bool,
+};
+
+module_param_cb(enabled, &enabled_param_ops, &enabled, 0644);
+MODULE_PARM_DESC(enabled, "enable wakelock when VBUS present");
+
+static int __init otg_wakelock_init(void)
+{
+	int ret;
+	struct usb_phy *phy;
+
+	phy = usb_get_phy(USB_PHY_TYPE_USB2);
+
+	if (IS_ERR(phy)) {
+		pr_err("%s: No USB transceiver found\n", __func__);
+		return PTR_ERR(phy);
+	}
+	otgwl_xceiv = phy;
+
+	snprintf(vbus_lock.name, sizeof(vbus_lock.name), "vbus-%s",
+		 dev_name(otgwl_xceiv->dev));
+	wakeup_source_init(&vbus_lock.wakesrc, vbus_lock.name);
+
+	otgwl_nb.notifier_call = otgwl_otg_notifications;
+	ret = usb_register_notifier(otgwl_xceiv, &otgwl_nb);
+
+	if (ret) {
+		pr_err("%s: usb_register_notifier on transceiver %s"
+		       " failed\n", __func__,
+		       dev_name(otgwl_xceiv->dev));
+		otgwl_xceiv = NULL;
+		wakeup_source_trash(&vbus_lock.wakesrc);
+		return ret;
+	}
+
+	otgwl_handle_event(otgwl_xceiv->last_event);
+	return ret;
+}
+
+late_initcall(otg_wakelock_init);
diff --git a/drivers/video/fbdev/goldfishfb.c b/drivers/video/fbdev/goldfishfb.c
index 66d58e9..8c93ad1d 100644
--- a/drivers/video/fbdev/goldfishfb.c
+++ b/drivers/video/fbdev/goldfishfb.c
@@ -26,6 +26,7 @@
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
 #include <linux/platform_device.h>
+#include <linux/acpi.h>
 
 enum {
 	FB_GET_WIDTH        = 0x00,
@@ -305,12 +306,25 @@
 	return 0;
 }
 
+static const struct of_device_id goldfish_fb_of_match[] = {
+	{ .compatible = "google,goldfish-fb", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, goldfish_fb_of_match);
+
+static const struct acpi_device_id goldfish_fb_acpi_match[] = {
+	{ "GFSH0004", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, goldfish_fb_acpi_match);
 
 static struct platform_driver goldfish_fb_driver = {
 	.probe		= goldfish_fb_probe,
 	.remove		= goldfish_fb_remove,
 	.driver = {
-		.name = "goldfish_fb"
+		.name = "goldfish_fb",
+		.of_match_table = goldfish_fb_of_match,
+		.acpi_match_table = ACPI_PTR(goldfish_fb_acpi_match),
 	}
 };
 
diff --git a/drivers/w1/masters/ds2482.c b/drivers/w1/masters/ds2482.c
index 2e30db1..fa13fa8 100644
--- a/drivers/w1/masters/ds2482.c
+++ b/drivers/w1/masters/ds2482.c
@@ -18,6 +18,8 @@
 #include <linux/slab.h>
 #include <linux/i2c.h>
 #include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/platform_data/ds2482.h>
 #include <asm/delay.h>
 
 #include "../w1.h"
@@ -97,7 +99,8 @@
 static int ds2482_probe(struct i2c_client *client,
 			const struct i2c_device_id *id);
 static int ds2482_remove(struct i2c_client *client);
-
+static int ds2482_suspend(struct device *dev);
+static int ds2482_resume(struct device *dev);
 
 /**
  * Driver data (common to all clients)
@@ -108,9 +111,15 @@
 };
 MODULE_DEVICE_TABLE(i2c, ds2482_id);
 
+static const struct dev_pm_ops ds2482_pm_ops = {
+	.suspend = ds2482_suspend,
+	.resume = ds2482_resume,
+};
+
 static struct i2c_driver ds2482_driver = {
 	.driver = {
 		.name	= "ds2482",
+		.pm = &ds2482_pm_ops,
 	},
 	.probe		= ds2482_probe,
 	.remove		= ds2482_remove,
@@ -132,6 +141,7 @@
 struct ds2482_data {
 	struct i2c_client	*client;
 	struct mutex		access_lock;
+	int			slpz_gpio;
 
 	/* 1-wire interface(s) */
 	int			w1_count;	/* 1 or 8 */
@@ -460,11 +470,31 @@
 	return retval;
 }
 
+static int ds2482_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct ds2482_data *data = i2c_get_clientdata(client);
+
+	if (data->slpz_gpio >= 0)
+		gpio_set_value(data->slpz_gpio, 0);
+	return 0;
+}
+
+static int ds2482_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct ds2482_data *data = i2c_get_clientdata(client);
+
+	if (data->slpz_gpio >= 0)
+		gpio_set_value(data->slpz_gpio, 1);
+	return 0;
+}
 
 static int ds2482_probe(struct i2c_client *client,
 			const struct i2c_device_id *id)
 {
 	struct ds2482_data *data;
+	struct ds2482_platform_data *pdata;
 	int err = -ENODEV;
 	int temp1;
 	int idx;
@@ -531,6 +561,16 @@
 		}
 	}
 
+	pdata = client->dev.platform_data;
+	data->slpz_gpio = pdata ? pdata->slpz_gpio : -1;
+
+	if (data->slpz_gpio >= 0) {
+		err = gpio_request_one(data->slpz_gpio, GPIOF_OUT_INIT_HIGH,
+				       "ds2482.slpz");
+		if (err < 0)
+			goto exit_w1_remove;
+	}
+
 	return 0;
 
 exit_w1_remove:
@@ -555,6 +595,11 @@
 			w1_remove_master_device(&data->w1_ch[idx].w1_bm);
 	}
 
+	if (data->slpz_gpio >= 0) {
+		gpio_set_value(data->slpz_gpio, 0);
+		gpio_free(data->slpz_gpio);
+	}
+
 	/* Free the memory */
 	kfree(data);
 	return 0;
diff --git a/fs/Kconfig b/fs/Kconfig
index 4bd03a2..20a8d95 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -227,6 +227,7 @@
 source "fs/adfs/Kconfig"
 source "fs/affs/Kconfig"
 source "fs/ecryptfs/Kconfig"
+source "fs/sdcardfs/Kconfig"
 source "fs/hfs/Kconfig"
 source "fs/hfsplus/Kconfig"
 source "fs/befs/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index ed2b632..f207d43 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -3,7 +3,7 @@
 #
 # 14 Sep 2000, Christoph Hellwig <hch@infradead.org>
 # Rewritten to use lists instead of if-statements.
-# 
+#
 
 obj-y :=	open.o read_write.o file_table.o super.o \
 		char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
@@ -61,7 +61,7 @@
 
 obj-$(CONFIG_PROFILING)		+= dcookies.o
 obj-$(CONFIG_DLM)		+= dlm/
- 
+
 # Do not add any filesystems before this line
 obj-$(CONFIG_FSCACHE)		+= fscache/
 obj-$(CONFIG_REISERFS_FS)	+= reiserfs/
@@ -83,6 +83,7 @@
 obj-$(CONFIG_HFSPLUS_FS)	+= hfsplus/ # Before hfs to find wrapped HFS+
 obj-$(CONFIG_HFS_FS)		+= hfs/
 obj-$(CONFIG_ECRYPT_FS)		+= ecryptfs/
+obj-$(CONFIG_SDCARD_FS)		+= sdcardfs/
 obj-$(CONFIG_VXFS_FS)		+= freevxfs/
 obj-$(CONFIG_NFS_FS)		+= nfs/
 obj-$(CONFIG_EXPORTFS)		+= exportfs/
diff --git a/fs/attr.c b/fs/attr.c
index c902b3d..c4093c5 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -200,7 +200,7 @@
  * the file open for write, as there can be no conflicting delegation in
  * that case.
  */
-int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode)
+int notify_change2(struct vfsmount *mnt, struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode)
 {
 	struct inode *inode = dentry->d_inode;
 	umode_t mode = inode->i_mode;
@@ -224,7 +224,7 @@
 			return -EPERM;
 
 		if (!inode_owner_or_capable(inode)) {
-			error = inode_permission(inode, MAY_WRITE);
+			error = inode_permission2(mnt, inode, MAY_WRITE);
 			if (error)
 				return error;
 		}
@@ -307,7 +307,9 @@
 	if (error)
 		return error;
 
-	if (inode->i_op->setattr)
+	if (mnt && inode->i_op->setattr2)
+		error = inode->i_op->setattr2(mnt, dentry, attr);
+	else if (inode->i_op->setattr)
 		error = inode->i_op->setattr(dentry, attr);
 	else
 		error = simple_setattr(dentry, attr);
@@ -320,4 +322,10 @@
 
 	return error;
 }
+EXPORT_SYMBOL(notify_change2);
+
+int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode)
+{
+	return notify_change2(NULL, dentry, attr, delegated_inode);
+}
 EXPORT_SYMBOL(notify_change);
diff --git a/fs/coredump.c b/fs/coredump.c
index 4407e27..00a900a 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -744,7 +744,7 @@
 			goto close_fail;
 		if (!(cprm.file->f_mode & FMODE_CAN_WRITE))
 			goto close_fail;
-		if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
+		if (do_truncate2(cprm.file->f_path.mnt, cprm.file->f_path.dentry, 0, 0, cprm.file))
 			goto close_fail;
 	}
 
diff --git a/fs/dcache.c b/fs/dcache.c
index 05bad55..426fe9b 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -3251,6 +3251,7 @@
 		return ERR_PTR(error);
 	return res;
 }
+EXPORT_SYMBOL(d_absolute_path);
 
 /*
  * same as __d_path but appends "(deleted)" for unlinked files.
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index a9c0bf8..e76d0c3 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -34,6 +34,7 @@
 #include <linux/mutex.h>
 #include <linux/anon_inodes.h>
 #include <linux/device.h>
+#include <linux/freezer.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
 #include <asm/mman.h>
@@ -1673,7 +1674,8 @@
 			}
 
 			spin_unlock_irqrestore(&ep->lock, flags);
-			if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
+			if (!freezable_schedule_hrtimeout_range(to, slack,
+								HRTIMER_MODE_ABS))
 				timed_out = 1;
 
 			spin_lock_irqsave(&ep->lock, flags);
diff --git a/fs/exec.c b/fs/exec.c
index 8147711..b9fdb83 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1303,7 +1303,7 @@
 void would_dump(struct linux_binprm *bprm, struct file *file)
 {
 	struct inode *inode = file_inode(file);
-	if (inode_permission(inode, MAY_READ) < 0) {
+	if (inode_permission2(file->f_path.mnt, inode, MAY_READ) < 0) {
 		struct user_namespace *old, *user_ns;
 		bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
 
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 567a6c7..0877f0b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2445,7 +2445,8 @@
 		ext4_group_t i, struct ext4_group_desc *desc);
 extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
 				ext4_fsblk_t block, unsigned long count);
-extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
+extern int ext4_trim_fs(struct super_block *, struct fstrim_range *,
+				unsigned long blkdev_flags);
 
 /* inode.c */
 int ext4_inode_is_fast_symlink(struct inode *inode);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 9a13f86..fa00af5 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -18,6 +18,7 @@
 #include "ext4.h"
 #include "xattr.h"
 #include "truncate.h"
+#include <trace/events/android_fs.h>
 
 #define EXT4_XATTR_SYSTEM_DATA	"data"
 #define EXT4_MIN_INLINE_DATA_SIZE	((sizeof(__le32) * EXT4_N_BLOCKS))
@@ -502,6 +503,17 @@
 		return -EAGAIN;
 	}
 
+	if (trace_android_fs_dataread_start_enabled()) {
+		char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+		path = android_fstrace_get_pathname(pathbuf,
+						    MAX_TRACE_PATHBUF_LEN,
+						    inode);
+		trace_android_fs_dataread_start(inode, page_offset(page),
+						PAGE_SIZE, current->pid,
+						path, current->comm);
+	}
+
 	/*
 	 * Current inline data can only exist in the 1st page,
 	 * So for all the other pages, just set them uptodate.
@@ -513,6 +525,8 @@
 		SetPageUptodate(page);
 	}
 
+	trace_android_fs_dataread_end(inode, page_offset(page), PAGE_SIZE);
+
 	up_read(&EXT4_I(inode)->xattr_sem);
 
 	unlock_page(page);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4815be2..9b0876b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -44,6 +44,7 @@
 #include "truncate.h"
 
 #include <trace/events/ext4.h>
+#include <trace/events/android_fs.h>
 
 #define MPAGE_DA_EXTENT_TAIL 0x01
 
@@ -1182,6 +1183,16 @@
 	pgoff_t index;
 	unsigned from, to;
 
+	if (trace_android_fs_datawrite_start_enabled()) {
+		char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+		path = android_fstrace_get_pathname(pathbuf,
+						    MAX_TRACE_PATHBUF_LEN,
+						    inode);
+		trace_android_fs_datawrite_start(inode, pos, len,
+						 current->pid, path,
+						 current->comm);
+	}
 	trace_ext4_write_begin(inode, pos, len, flags);
 	/*
 	 * Reserve one block more for addition to orphan list in case
@@ -1320,6 +1331,7 @@
 	int i_size_changed = 0;
 	int inline_data = ext4_has_inline_data(inode);
 
+	trace_android_fs_datawrite_end(inode, pos, len);
 	trace_ext4_write_end(inode, pos, len, copied);
 	if (inline_data) {
 		ret = ext4_write_inline_data_end(inode, pos, len,
@@ -1425,6 +1437,7 @@
 	int size_changed = 0;
 	int inline_data = ext4_has_inline_data(inode);
 
+	trace_android_fs_datawrite_end(inode, pos, len);
 	trace_ext4_journalled_write_end(inode, pos, len, copied);
 	from = pos & (PAGE_SIZE - 1);
 	to = from + len;
@@ -2922,6 +2935,16 @@
 					len, flags, pagep, fsdata);
 	}
 	*fsdata = (void *)0;
+	if (trace_android_fs_datawrite_start_enabled()) {
+		char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+		path = android_fstrace_get_pathname(pathbuf,
+						    MAX_TRACE_PATHBUF_LEN,
+						    inode);
+		trace_android_fs_datawrite_start(inode, pos, len,
+						 current->pid,
+						 path, current->comm);
+	}
 	trace_ext4_da_write_begin(inode, pos, len, flags);
 
 	if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
@@ -3040,6 +3063,7 @@
 		return ext4_write_end(file, mapping, pos,
 				      len, copied, page, fsdata);
 
+	trace_android_fs_datawrite_end(inode, pos, len);
 	trace_ext4_da_write_end(inode, pos, len, copied);
 	start = pos & (PAGE_SIZE - 1);
 	end = start + copied - 1;
@@ -3603,6 +3627,7 @@
 	size_t count = iov_iter_count(iter);
 	loff_t offset = iocb->ki_pos;
 	ssize_t ret;
+	int rw = iov_iter_rw(iter);
 
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
 	if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode))
@@ -3619,12 +3644,42 @@
 	if (ext4_has_inline_data(inode))
 		return 0;
 
+	if (trace_android_fs_dataread_start_enabled() &&
+	    (rw == READ)) {
+		char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+		path = android_fstrace_get_pathname(pathbuf,
+						    MAX_TRACE_PATHBUF_LEN,
+						    inode);
+		trace_android_fs_dataread_start(inode, offset, count,
+						current->pid, path,
+						current->comm);
+	}
+	if (trace_android_fs_datawrite_start_enabled() &&
+	    (rw == WRITE)) {
+		char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+		path = android_fstrace_get_pathname(pathbuf,
+						    MAX_TRACE_PATHBUF_LEN,
+						    inode);
+		trace_android_fs_datawrite_start(inode, offset, count,
+						 current->pid, path,
+						 current->comm);
+	}
 	trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
 	if (iov_iter_rw(iter) == READ)
 		ret = ext4_direct_IO_read(iocb, iter);
 	else
 		ret = ext4_direct_IO_write(iocb, iter);
 	trace_ext4_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret);
+
+	if (trace_android_fs_dataread_start_enabled() &&
+	    (rw == READ))
+		trace_android_fs_dataread_end(inode, offset, count);
+	if (trace_android_fs_datawrite_start_enabled() &&
+	    (rw == WRITE))
+		trace_android_fs_datawrite_end(inode, offset, count);
+
 	return ret;
 }
 
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 2880e01..e3ad9d4 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -737,11 +737,13 @@
 		return err;
 	}
 
+	case FIDTRIM:
 	case FITRIM:
 	{
 		struct request_queue *q = bdev_get_queue(sb->s_bdev);
 		struct fstrim_range range;
 		int ret = 0;
+		int flags  = cmd == FIDTRIM ? BLKDEV_DISCARD_SECURE : 0;
 
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
@@ -749,13 +751,16 @@
 		if (!blk_queue_discard(q))
 			return -EOPNOTSUPP;
 
+		if ((flags & BLKDEV_DISCARD_SECURE) && !blk_queue_secure_erase(q))
+			return -EOPNOTSUPP;
+
 		if (copy_from_user(&range, (struct fstrim_range __user *)arg,
 		    sizeof(range)))
 			return -EFAULT;
 
 		range.minlen = max((unsigned int)range.minlen,
 				   q->limits.discard_granularity);
-		ret = ext4_trim_fs(sb, &range);
+		ret = ext4_trim_fs(sb, &range, flags);
 		if (ret < 0)
 			return ret;
 
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index a49d0e5d7..3d6f73e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2775,7 +2775,8 @@
 }
 
 static inline int ext4_issue_discard(struct super_block *sb,
-		ext4_group_t block_group, ext4_grpblk_t cluster, int count)
+		ext4_group_t block_group, ext4_grpblk_t cluster, int count,
+		unsigned long flags)
 {
 	ext4_fsblk_t discard_block;
 
@@ -2784,7 +2785,7 @@
 	count = EXT4_C2B(EXT4_SB(sb), count);
 	trace_ext4_discard_blocks(sb,
 			(unsigned long long) discard_block, count);
-	return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
+	return sb_issue_discard(sb, discard_block, count, GFP_NOFS, flags);
 }
 
 /*
@@ -2806,7 +2807,7 @@
 	if (test_opt(sb, DISCARD)) {
 		err = ext4_issue_discard(sb, entry->efd_group,
 					 entry->efd_start_cluster,
-					 entry->efd_count);
+					 entry->efd_count, 0);
 		if (err && err != -EOPNOTSUPP)
 			ext4_msg(sb, KERN_WARNING, "discard request in"
 				 " group:%d block:%d count:%d failed"
@@ -4865,7 +4866,8 @@
 		 * them with group lock_held
 		 */
 		if (test_opt(sb, DISCARD)) {
-			err = ext4_issue_discard(sb, block_group, bit, count);
+			err = ext4_issue_discard(sb, block_group, bit, count,
+						 0);
 			if (err && err != -EOPNOTSUPP)
 				ext4_msg(sb, KERN_WARNING, "discard request in"
 					 " group:%d block:%d count:%lu failed"
@@ -5061,13 +5063,15 @@
  * @count:	number of blocks to TRIM
  * @group:	alloc. group we are working with
  * @e4b:	ext4 buddy for the group
+ * @blkdev_flags: flags for the block device
  *
  * Trim "count" blocks starting at "start" in the "group". To assure that no
  * one will allocate those blocks, mark it as used in buddy bitmap. This must
  * be called with under the group lock.
  */
 static int ext4_trim_extent(struct super_block *sb, int start, int count,
-			     ext4_group_t group, struct ext4_buddy *e4b)
+			    ext4_group_t group, struct ext4_buddy *e4b,
+			    unsigned long blkdev_flags)
 __releases(bitlock)
 __acquires(bitlock)
 {
@@ -5088,7 +5092,7 @@
 	 */
 	mb_mark_used(e4b, &ex);
 	ext4_unlock_group(sb, group);
-	ret = ext4_issue_discard(sb, group, start, count);
+	ret = ext4_issue_discard(sb, group, start, count, blkdev_flags);
 	ext4_lock_group(sb, group);
 	mb_free_blocks(NULL, e4b, start, ex.fe_len);
 	return ret;
@@ -5101,6 +5105,7 @@
  * @start:		first group block to examine
  * @max:		last group block to examine
  * @minblocks:		minimum extent block count
+ * @blkdev_flags:	flags for the block device
  *
  * ext4_trim_all_free walks through group's buddy bitmap searching for free
  * extents. When the free block is found, ext4_trim_extent is called to TRIM
@@ -5115,7 +5120,7 @@
 static ext4_grpblk_t
 ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
 		   ext4_grpblk_t start, ext4_grpblk_t max,
-		   ext4_grpblk_t minblocks)
+		   ext4_grpblk_t minblocks, unsigned long blkdev_flags)
 {
 	void *bitmap;
 	ext4_grpblk_t next, count = 0, free_count = 0;
@@ -5148,7 +5153,8 @@
 
 		if ((next - start) >= minblocks) {
 			ret = ext4_trim_extent(sb, start,
-					       next - start, group, &e4b);
+					       next - start, group, &e4b,
+					       blkdev_flags);
 			if (ret && ret != -EOPNOTSUPP)
 				break;
 			ret = 0;
@@ -5190,6 +5196,7 @@
  * ext4_trim_fs() -- trim ioctl handle function
  * @sb:			superblock for filesystem
  * @range:		fstrim_range structure
+ * @blkdev_flags:	flags for the block device
  *
  * start:	First Byte to trim
  * len:		number of Bytes to trim from start
@@ -5198,7 +5205,8 @@
  * start to start+len. For each such a group ext4_trim_all_free function
  * is invoked to trim all free space.
  */
-int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range,
+			unsigned long blkdev_flags)
 {
 	struct ext4_group_info *grp;
 	ext4_group_t group, first_group, last_group;
@@ -5254,7 +5262,7 @@
 
 		if (grp->bb_free >= minlen) {
 			cnt = ext4_trim_all_free(sb, group, first_cluster,
-						end, minlen);
+						end, minlen, blkdev_flags);
 			if (cnt < 0) {
 				ret = cnt;
 				break;
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index a81b829..2531cc1 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -45,6 +45,7 @@
 #include <linux/cleancache.h>
 
 #include "ext4.h"
+#include <trace/events/android_fs.h>
 
 static inline bool ext4_bio_encrypted(struct bio *bio)
 {
@@ -55,6 +56,17 @@
 #endif
 }
 
+static void
+ext4_trace_read_completion(struct bio *bio)
+{
+	struct page *first_page = bio->bi_io_vec[0].bv_page;
+
+	if (first_page != NULL)
+		trace_android_fs_dataread_end(first_page->mapping->host,
+					      page_offset(first_page),
+					      bio->bi_iter.bi_size);
+}
+
 /*
  * I/O completion handler for multipage BIOs.
  *
@@ -72,6 +84,9 @@
 	struct bio_vec *bv;
 	int i;
 
+	if (trace_android_fs_dataread_start_enabled())
+		ext4_trace_read_completion(bio);
+
 	if (ext4_bio_encrypted(bio)) {
 		if (bio->bi_error) {
 			fscrypt_release_ctx(bio->bi_private);
@@ -95,6 +110,30 @@
 	bio_put(bio);
 }
 
+static void
+ext4_submit_bio_read(struct bio *bio)
+{
+	if (trace_android_fs_dataread_start_enabled()) {
+		struct page *first_page = bio->bi_io_vec[0].bv_page;
+
+		if (first_page != NULL) {
+			char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+			path = android_fstrace_get_pathname(pathbuf,
+						    MAX_TRACE_PATHBUF_LEN,
+						    first_page->mapping->host);
+			trace_android_fs_dataread_start(
+				first_page->mapping->host,
+				page_offset(first_page),
+				bio->bi_iter.bi_size,
+				current->pid,
+				path,
+				current->comm);
+		}
+	}
+	submit_bio(bio);
+}
+
 int ext4_mpage_readpages(struct address_space *mapping,
 			 struct list_head *pages, struct page *page,
 			 unsigned nr_pages)
@@ -235,7 +274,7 @@
 		 */
 		if (bio && (last_block_in_bio != blocks[0] - 1)) {
 		submit_and_realloc:
-			submit_bio(bio);
+			ext4_submit_bio_read(bio);
 			bio = NULL;
 		}
 		if (bio == NULL) {
@@ -268,14 +307,14 @@
 		if (((map.m_flags & EXT4_MAP_BOUNDARY) &&
 		     (relative_block == map.m_len)) ||
 		    (first_hole != blocks_per_page)) {
-			submit_bio(bio);
+			ext4_submit_bio_read(bio);
 			bio = NULL;
 		} else
 			last_block_in_bio = blocks[blocks_per_page - 1];
 		goto next_page;
 	confused:
 		if (bio) {
-			submit_bio(bio);
+			ext4_submit_bio_read(bio);
 			bio = NULL;
 		}
 		if (!PageUptodate(page))
@@ -288,6 +327,6 @@
 	}
 	BUG_ON(pages && !list_empty(pages));
 	if (bio)
-		submit_bio(bio);
+		ext4_submit_bio_read(bio);
 	return 0;
 }
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 9041805..9ea4813 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -28,6 +28,7 @@
 #include "segment.h"
 #include "trace.h"
 #include <trace/events/f2fs.h>
+#include <trace/events/android_fs.h>
 
 static void f2fs_read_end_io(struct bio *bio)
 {
@@ -1629,6 +1630,16 @@
 	block_t blkaddr = NULL_ADDR;
 	int err = 0;
 
+	if (trace_android_fs_datawrite_start_enabled()) {
+		char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+		path = android_fstrace_get_pathname(pathbuf,
+						    MAX_TRACE_PATHBUF_LEN,
+						    inode);
+		trace_android_fs_datawrite_start(inode, pos, len,
+						 current->pid, path,
+						 current->comm);
+	}
 	trace_f2fs_write_begin(inode, pos, len, flags);
 
 	/*
@@ -1725,6 +1736,7 @@
 {
 	struct inode *inode = page->mapping->host;
 
+	trace_android_fs_datawrite_end(inode, pos, len);
 	trace_f2fs_write_end(inode, pos, len, copied);
 
 	/*
@@ -1786,6 +1798,29 @@
 
 	trace_f2fs_direct_IO_enter(inode, offset, count, rw);
 
+	if (trace_android_fs_dataread_start_enabled() &&
+	    (rw == READ)) {
+		char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+		path = android_fstrace_get_pathname(pathbuf,
+						    MAX_TRACE_PATHBUF_LEN,
+						    inode);
+		trace_android_fs_dataread_start(inode, offset,
+						count, current->pid, path,
+						current->comm);
+	}
+	if (trace_android_fs_datawrite_start_enabled() &&
+	    (rw == WRITE)) {
+		char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+		path = android_fstrace_get_pathname(pathbuf,
+						    MAX_TRACE_PATHBUF_LEN,
+						    inode);
+		trace_android_fs_datawrite_start(inode, offset, count,
+						 current->pid, path,
+						 current->comm);
+	}
+
 	down_read(&F2FS_I(inode)->dio_rwsem[rw]);
 	err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio);
 	up_read(&F2FS_I(inode)->dio_rwsem[rw]);
@@ -1797,6 +1832,13 @@
 			f2fs_write_failed(mapping, offset + count);
 	}
 
+	if (trace_android_fs_dataread_start_enabled() &&
+	    (rw == READ))
+		trace_android_fs_dataread_end(inode, offset, count);
+	if (trace_android_fs_datawrite_start_enabled() &&
+	    (rw == WRITE))
+		trace_android_fs_datawrite_end(inode, offset, count);
+
 	trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
 
 	return err;
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 482888e..066201a 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -13,6 +13,7 @@
 
 #include "f2fs.h"
 #include "node.h"
+#include <trace/events/android_fs.h>
 
 bool f2fs_may_inline_data(struct inode *inode)
 {
@@ -82,14 +83,29 @@
 {
 	struct page *ipage;
 
+	if (trace_android_fs_dataread_start_enabled()) {
+		char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+		path = android_fstrace_get_pathname(pathbuf,
+						    MAX_TRACE_PATHBUF_LEN,
+						    inode);
+		trace_android_fs_dataread_start(inode, page_offset(page),
+						PAGE_SIZE, current->pid,
+						path, current->comm);
+	}
+
 	ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
 	if (IS_ERR(ipage)) {
+		trace_android_fs_dataread_end(inode, page_offset(page),
+					      PAGE_SIZE);
 		unlock_page(page);
 		return PTR_ERR(ipage);
 	}
 
 	if (!f2fs_has_inline_data(inode)) {
 		f2fs_put_page(ipage, 1);
+		trace_android_fs_dataread_end(inode, page_offset(page),
+					      PAGE_SIZE);
 		return -EAGAIN;
 	}
 
@@ -101,6 +117,8 @@
 	if (!PageUptodate(page))
 		SetPageUptodate(page);
 	f2fs_put_page(ipage, 1);
+	trace_android_fs_dataread_end(inode, page_offset(page),
+				      PAGE_SIZE);
 	unlock_page(page);
 	return 0;
 }
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 83a9633..51328db 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1528,13 +1528,15 @@
 
 	for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
 		if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs ||
-			le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg)
+		    le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg) {
 			return 1;
+		}
 	}
 	for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
 		if (le32_to_cpu(ckpt->cur_data_segno[i]) >= main_segs ||
-			le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg)
+		    le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg) {
 			return 1;
+		}
 	}
 
 	sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index f3aea1b..17ad41d 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -2112,7 +2112,7 @@
 	    (dirtytime && (inode->i_state & I_DIRTY_INODE)))
 		return;
 
-	if (unlikely(block_dump))
+	if (unlikely(block_dump > 1))
 		block_dump___mark_inode_dirty(inode);
 
 	spin_lock(&inode->i_lock);
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 7dca743..940c683 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -44,6 +44,7 @@
 	if (old_pwd.dentry)
 		path_put(&old_pwd);
 }
+EXPORT_SYMBOL(set_fs_pwd);
 
 static inline int replace_path(struct path *p, const struct path *old, const struct path *new)
 {
@@ -89,6 +90,7 @@
 	path_put(&fs->pwd);
 	kmem_cache_free(fs_cachep, fs);
 }
+EXPORT_SYMBOL(free_fs_struct);
 
 void exit_fs(struct task_struct *tsk)
 {
@@ -127,6 +129,7 @@
 	}
 	return fs;
 }
+EXPORT_SYMBOL_GPL(copy_fs_struct);
 
 int unshare_fs_struct(void)
 {
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 411f161..4501a69 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -13,12 +13,14 @@
 #include <linux/poll.h>
 #include <linux/uio.h>
 #include <linux/miscdevice.h>
+#include <linux/namei.h>
 #include <linux/pagemap.h>
 #include <linux/file.h>
 #include <linux/slab.h>
 #include <linux/pipe_fs_i.h>
 #include <linux/swap.h>
 #include <linux/splice.h>
+#include <linux/freezer.h>
 
 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
 MODULE_ALIAS("devname:fuse");
@@ -471,7 +473,9 @@
 	 * Either request is already in userspace, or it was forced.
 	 * Wait it out.
 	 */
-	wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags));
+	while (!test_bit(FR_FINISHED, &req->flags))
+		wait_event_freezable(req->waitq,
+				test_bit(FR_FINISHED, &req->flags));
 }
 
 static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
@@ -1906,6 +1910,10 @@
 		cs->move_pages = 0;
 
 	err = copy_out_args(cs, &req->out, nbytes);
+	if (req->in.h.opcode == FUSE_CANONICAL_PATH) {
+		req->out.h.error = kern_path((char *)req->out.args[0].value, 0,
+							req->canonical_path);
+	}
 	fuse_copy_finish(cs);
 
 	spin_lock(&fpq->lock);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 60dd2bc..547a324 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -262,6 +262,50 @@
 	goto out;
 }
 
+/*
+ * Get the canonical path. Since we must translate to a path, this must be done
+ * in the context of the userspace daemon, however, the userspace daemon cannot
+ * look up paths on its own. Instead, we handle the lookup as a special case
+ * inside of the write request.
+ */
+static void fuse_dentry_canonical_path(const struct path *path, struct path *canonical_path) {
+	struct inode *inode = path->dentry->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_req *req;
+	int err;
+	char *path_name;
+
+	req = fuse_get_req(fc, 1);
+	err = PTR_ERR(req);
+	if (IS_ERR(req))
+		goto default_path;
+
+	path_name = (char*)__get_free_page(GFP_KERNEL);
+	if (!path_name) {
+		fuse_put_request(fc, req);
+		goto default_path;
+	}
+
+	req->in.h.opcode = FUSE_CANONICAL_PATH;
+	req->in.h.nodeid = get_node_id(inode);
+	req->in.numargs = 0;
+	req->out.numargs = 1;
+	req->out.args[0].size = PATH_MAX;
+	req->out.args[0].value = path_name;
+	req->canonical_path = canonical_path;
+	req->out.argvar = 1;
+	fuse_request_send(fc, req);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	free_page((unsigned long)path_name);
+	if (!err)
+		return;
+default_path:
+	canonical_path->dentry = path->dentry;
+	canonical_path->mnt = path->mnt;
+	path_get(canonical_path);
+}
+
 static int invalid_nodeid(u64 nodeid)
 {
 	return !nodeid || nodeid == FUSE_ROOT_ID;
@@ -284,11 +328,13 @@
 	.d_revalidate	= fuse_dentry_revalidate,
 	.d_init		= fuse_dentry_init,
 	.d_release	= fuse_dentry_release,
+	.d_canonical_path = fuse_dentry_canonical_path,
 };
 
 const struct dentry_operations fuse_root_dentry_operations = {
 	.d_init		= fuse_dentry_init,
 	.d_release	= fuse_dentry_release,
+	.d_canonical_path = fuse_dentry_canonical_path,
 };
 
 int fuse_valid_type(int m)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 1c905c7..a10c56e 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -368,6 +368,9 @@
 	/** Inode used in the request or NULL */
 	struct inode *inode;
 
+	/** Path used for completing d_canonical_path */
+	struct path *canonical_path;
+
 	/** AIO control block */
 	struct fuse_io_priv *io;
 
diff --git a/fs/inode.c b/fs/inode.c
index 2071ff5..1d1a957 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1781,7 +1781,7 @@
 	return mask;
 }
 
-static int __remove_privs(struct dentry *dentry, int kill)
+static int __remove_privs(struct vfsmount *mnt, struct dentry *dentry, int kill)
 {
 	struct iattr newattrs;
 
@@ -1790,7 +1790,7 @@
 	 * Note we call this on write, so notify_change will not
 	 * encounter any conflicting delegations:
 	 */
-	return notify_change(dentry, &newattrs, NULL);
+	return notify_change2(mnt, dentry, &newattrs, NULL);
 }
 
 /*
@@ -1812,7 +1812,7 @@
 	if (kill < 0)
 		return kill;
 	if (kill)
-		error = __remove_privs(dentry, kill);
+		error = __remove_privs(file->f_path.mnt, dentry, kill);
 	if (!error)
 		inode_has_no_xattr(inode);
 
diff --git a/fs/internal.h b/fs/internal.h
index 8b7143b..3e58863 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -88,9 +88,11 @@
  * super.c
  */
 extern int do_remount_sb(struct super_block *, int, void *, int);
+extern int do_remount_sb2(struct vfsmount *, struct super_block *, int,
+								void *, int);
 extern bool trylock_super(struct super_block *sb);
 extern struct dentry *mount_fs(struct file_system_type *,
-			       int, const char *, void *);
+			       int, const char *, struct vfsmount *, void *);
 extern struct super_block *user_get_super(dev_t);
 
 /*
diff --git a/fs/mpage.c b/fs/mpage.c
index e2ea442..d4e17c88 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -31,6 +31,14 @@
 #include <linux/cleancache.h>
 #include "internal.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/android_fs.h>
+
+EXPORT_TRACEPOINT_SYMBOL(android_fs_datawrite_start);
+EXPORT_TRACEPOINT_SYMBOL(android_fs_datawrite_end);
+EXPORT_TRACEPOINT_SYMBOL(android_fs_dataread_start);
+EXPORT_TRACEPOINT_SYMBOL(android_fs_dataread_end);
+
 /*
  * I/O completion handler for multipage BIOs.
  *
@@ -48,6 +56,16 @@
 	struct bio_vec *bv;
 	int i;
 
+	if (trace_android_fs_dataread_end_enabled() &&
+	    (bio_data_dir(bio) == READ)) {
+		struct page *first_page = bio->bi_io_vec[0].bv_page;
+
+		if (first_page != NULL)
+			trace_android_fs_dataread_end(first_page->mapping->host,
+						      page_offset(first_page),
+						      bio->bi_iter.bi_size);
+	}
+
 	bio_for_each_segment_all(bv, bio, i) {
 		struct page *page = bv->bv_page;
 		page_endio(page, op_is_write(bio_op(bio)), bio->bi_error);
@@ -58,6 +76,24 @@
 
 static struct bio *mpage_bio_submit(int op, int op_flags, struct bio *bio)
 {
+	if (trace_android_fs_dataread_start_enabled() && (op == REQ_OP_READ)) {
+		struct page *first_page = bio->bi_io_vec[0].bv_page;
+
+		if (first_page != NULL) {
+			char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+			path = android_fstrace_get_pathname(pathbuf,
+						    MAX_TRACE_PATHBUF_LEN,
+						    first_page->mapping->host);
+			trace_android_fs_dataread_start(
+				first_page->mapping->host,
+				page_offset(first_page),
+				bio->bi_iter.bi_size,
+				current->pid,
+				path,
+				current->comm);
+		}
+	}
 	bio->bi_end_io = mpage_end_io;
 	bio_set_op_attrs(bio, op, op_flags);
 	guard_bio_eod(op, bio);
diff --git a/fs/namei.c b/fs/namei.c
index eb4626b..88eb41c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -376,9 +376,11 @@
  * flag in inode->i_opflags, that says "this has not special
  * permission function, use the fast case".
  */
-static inline int do_inode_permission(struct inode *inode, int mask)
+static inline int do_inode_permission(struct vfsmount *mnt, struct inode *inode, int mask)
 {
 	if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) {
+		if (likely(mnt && inode->i_op->permission2))
+			return inode->i_op->permission2(mnt, inode, mask);
 		if (likely(inode->i_op->permission))
 			return inode->i_op->permission(inode, mask);
 
@@ -402,7 +404,7 @@
  * This does not check for a read-only file system.  You probably want
  * inode_permission().
  */
-int __inode_permission(struct inode *inode, int mask)
+int __inode_permission2(struct vfsmount *mnt, struct inode *inode, int mask)
 {
 	int retval;
 
@@ -422,7 +424,7 @@
 			return -EACCES;
 	}
 
-	retval = do_inode_permission(inode, mask);
+	retval = do_inode_permission(mnt, inode, mask);
 	if (retval)
 		return retval;
 
@@ -430,7 +432,14 @@
 	if (retval)
 		return retval;
 
-	return security_inode_permission(inode, mask);
+	retval = security_inode_permission(inode, mask);
+	return retval;
+}
+EXPORT_SYMBOL(__inode_permission2);
+
+int __inode_permission(struct inode *inode, int mask)
+{
+	return __inode_permission2(NULL, inode, mask);
 }
 EXPORT_SYMBOL(__inode_permission);
 
@@ -466,14 +475,20 @@
  *
  * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
  */
-int inode_permission(struct inode *inode, int mask)
+int inode_permission2(struct vfsmount *mnt, struct inode *inode, int mask)
 {
 	int retval;
 
 	retval = sb_permission(inode->i_sb, inode, mask);
 	if (retval)
 		return retval;
-	return __inode_permission(inode, mask);
+	return __inode_permission2(mnt, inode, mask);
+}
+EXPORT_SYMBOL(inode_permission2);
+
+int inode_permission(struct inode *inode, int mask)
+{
+	return inode_permission2(NULL, inode, mask);
 }
 EXPORT_SYMBOL(inode_permission);
 
@@ -1706,13 +1721,13 @@
 static inline int may_lookup(struct nameidata *nd)
 {
 	if (nd->flags & LOOKUP_RCU) {
-		int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
+		int err = inode_permission2(nd->path.mnt, nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
 		if (err != -ECHILD)
 			return err;
 		if (unlazy_walk(nd, NULL, 0))
 			return -ECHILD;
 	}
-	return inode_permission(nd->inode, MAY_EXEC);
+	return inode_permission2(nd->path.mnt, nd->inode, MAY_EXEC);
 }
 
 static inline int handle_dots(struct nameidata *nd, int type)
@@ -2186,11 +2201,12 @@
 	nd->depth = 0;
 	if (flags & LOOKUP_ROOT) {
 		struct dentry *root = nd->root.dentry;
+		struct vfsmount *mnt = nd->root.mnt;
 		struct inode *inode = root->d_inode;
 		if (*s) {
 			if (!d_can_lookup(root))
 				return ERR_PTR(-ENOTDIR);
-			retval = inode_permission(inode, MAY_EXEC);
+			retval = inode_permission2(mnt, inode, MAY_EXEC);
 			if (retval)
 				return ERR_PTR(retval);
 		}
@@ -2455,6 +2471,7 @@
 /**
  * lookup_one_len - filesystem helper to lookup single pathname component
  * @name:	pathname component to lookup
+ * @mnt:	mount we are looking up on
  * @base:	base directory to lookup from
  * @len:	maximum length @len should be interpreted to
  *
@@ -2463,7 +2480,7 @@
  *
  * The caller must hold base->i_mutex.
  */
-struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
+struct dentry *lookup_one_len2(const char *name, struct vfsmount *mnt, struct dentry *base, int len)
 {
 	struct qstr this;
 	unsigned int c;
@@ -2497,12 +2514,18 @@
 			return ERR_PTR(err);
 	}
 
-	err = inode_permission(base->d_inode, MAY_EXEC);
+	err = inode_permission2(mnt, base->d_inode, MAY_EXEC);
 	if (err)
 		return ERR_PTR(err);
 
 	return __lookup_hash(&this, base, 0);
 }
+EXPORT_SYMBOL(lookup_one_len2);
+
+struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
+{
+	return lookup_one_len2(name, NULL, base, len);
+}
 EXPORT_SYMBOL(lookup_one_len);
 
 /**
@@ -2805,7 +2828,7 @@
  * 11. We don't allow removal of NFS sillyrenamed files; it's handled by
  *     nfs_async_unlink().
  */
-static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
+static int may_delete(struct vfsmount *mnt, struct inode *dir, struct dentry *victim, bool isdir)
 {
 	struct inode *inode = d_backing_inode(victim);
 	int error;
@@ -2817,7 +2840,7 @@
 	BUG_ON(victim->d_parent->d_inode != dir);
 	audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
 
-	error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+	error = inode_permission2(mnt, dir, MAY_WRITE | MAY_EXEC);
 	if (error)
 		return error;
 	if (IS_APPEND(dir))
@@ -2849,7 +2872,7 @@
  *  4. We should have write and exec permissions on dir
  *  5. We can't do it if dir is immutable (done in permission())
  */
-static inline int may_create(struct inode *dir, struct dentry *child)
+static inline int may_create(struct vfsmount *mnt, struct inode *dir, struct dentry *child)
 {
 	struct user_namespace *s_user_ns;
 	audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
@@ -2861,7 +2884,7 @@
 	if (!kuid_has_mapping(s_user_ns, current_fsuid()) ||
 	    !kgid_has_mapping(s_user_ns, current_fsgid()))
 		return -EOVERFLOW;
-	return inode_permission(dir, MAY_WRITE | MAY_EXEC);
+	return inode_permission2(mnt, dir, MAY_WRITE | MAY_EXEC);
 }
 
 /*
@@ -2908,10 +2931,10 @@
 }
 EXPORT_SYMBOL(unlock_rename);
 
-int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-		bool want_excl)
+int vfs_create2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry,
+		umode_t mode, bool want_excl)
 {
-	int error = may_create(dir, dentry);
+	int error = may_create(mnt, dir, dentry);
 	if (error)
 		return error;
 
@@ -2927,6 +2950,13 @@
 		fsnotify_create(dir, dentry);
 	return error;
 }
+EXPORT_SYMBOL(vfs_create2);
+
+int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+		bool want_excl)
+{
+	return vfs_create2(NULL, dir, dentry, mode, want_excl);
+}
 EXPORT_SYMBOL(vfs_create);
 
 bool may_open_dev(const struct path *path)
@@ -2938,6 +2968,7 @@
 static int may_open(struct path *path, int acc_mode, int flag)
 {
 	struct dentry *dentry = path->dentry;
+	struct vfsmount *mnt = path->mnt;
 	struct inode *inode = dentry->d_inode;
 	int error;
 
@@ -2962,7 +2993,7 @@
 		break;
 	}
 
-	error = inode_permission(inode, MAY_OPEN | acc_mode);
+	error = inode_permission2(mnt, inode, MAY_OPEN | acc_mode);
 	if (error)
 		return error;
 
@@ -2997,7 +3028,7 @@
 	if (!error)
 		error = security_path_truncate(path);
 	if (!error) {
-		error = do_truncate(path->dentry, 0,
+		error = do_truncate2(path->mnt, path->dentry, 0,
 				    ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
 				    filp);
 	}
@@ -3024,7 +3055,7 @@
 	    !kgid_has_mapping(s_user_ns, current_fsgid()))
 		return -EOVERFLOW;
 
-	error = inode_permission(dir->dentry->d_inode, MAY_WRITE | MAY_EXEC);
+	error = inode_permission2(dir->mnt, dir->dentry->d_inode, MAY_WRITE | MAY_EXEC);
 	if (error)
 		return error;
 
@@ -3461,7 +3492,7 @@
 		goto out;
 	dir = path.dentry->d_inode;
 	/* we want directory to be writable */
-	error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+	error = inode_permission2(nd->path.mnt, dir, MAY_WRITE | MAY_EXEC);
 	if (error)
 		goto out2;
 	if (!dir->i_op->tmpfile) {
@@ -3714,9 +3745,9 @@
 }
 EXPORT_SYMBOL(user_path_create);
 
-int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
+int vfs_mknod2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
 {
-	int error = may_create(dir, dentry);
+	int error = may_create(mnt, dir, dentry);
 
 	if (error)
 		return error;
@@ -3740,6 +3771,12 @@
 		fsnotify_create(dir, dentry);
 	return error;
 }
+EXPORT_SYMBOL(vfs_mknod2);
+
+int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
+{
+	return vfs_mknod2(NULL, dir, dentry, mode, dev);
+}
 EXPORT_SYMBOL(vfs_mknod);
 
 static int may_mknod(umode_t mode)
@@ -3782,12 +3819,12 @@
 		goto out;
 	switch (mode & S_IFMT) {
 		case 0: case S_IFREG:
-			error = vfs_create(path.dentry->d_inode,dentry,mode,true);
+			error = vfs_create2(path.mnt, path.dentry->d_inode,dentry,mode,true);
 			if (!error)
 				ima_post_path_mknod(dentry);
 			break;
 		case S_IFCHR: case S_IFBLK:
-			error = vfs_mknod(path.dentry->d_inode,dentry,mode,
+			error = vfs_mknod2(path.mnt, path.dentry->d_inode,dentry,mode,
 					new_decode_dev(dev));
 			break;
 		case S_IFIFO: case S_IFSOCK:
@@ -3808,9 +3845,9 @@
 	return sys_mknodat(AT_FDCWD, filename, mode, dev);
 }
 
-int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+int vfs_mkdir2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, umode_t mode)
 {
-	int error = may_create(dir, dentry);
+	int error = may_create(mnt, dir, dentry);
 	unsigned max_links = dir->i_sb->s_max_links;
 
 	if (error)
@@ -3832,6 +3869,12 @@
 		fsnotify_mkdir(dir, dentry);
 	return error;
 }
+EXPORT_SYMBOL(vfs_mkdir2);
+
+int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	return vfs_mkdir2(NULL, dir, dentry, mode);
+}
 EXPORT_SYMBOL(vfs_mkdir);
 
 SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
@@ -3850,7 +3893,7 @@
 		mode &= ~current_umask();
 	error = security_path_mkdir(&path, dentry, mode);
 	if (!error)
-		error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
+		error = vfs_mkdir2(path.mnt, path.dentry->d_inode, dentry, mode);
 	done_path_create(&path, dentry);
 	if (retry_estale(error, lookup_flags)) {
 		lookup_flags |= LOOKUP_REVAL;
@@ -3864,9 +3907,9 @@
 	return sys_mkdirat(AT_FDCWD, pathname, mode);
 }
 
-int vfs_rmdir(struct inode *dir, struct dentry *dentry)
+int vfs_rmdir2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry)
 {
-	int error = may_delete(dir, dentry, 1);
+	int error = may_delete(mnt, dir, dentry, 1);
 
 	if (error)
 		return error;
@@ -3901,6 +3944,12 @@
 		d_delete(dentry);
 	return error;
 }
+EXPORT_SYMBOL(vfs_rmdir2);
+
+int vfs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	return vfs_rmdir2(NULL, dir, dentry);
+}
 EXPORT_SYMBOL(vfs_rmdir);
 
 static long do_rmdir(int dfd, const char __user *pathname)
@@ -3946,7 +3995,7 @@
 	error = security_path_rmdir(&path, dentry);
 	if (error)
 		goto exit3;
-	error = vfs_rmdir(path.dentry->d_inode, dentry);
+	error = vfs_rmdir2(path.mnt, path.dentry->d_inode, dentry);
 exit3:
 	dput(dentry);
 exit2:
@@ -3985,10 +4034,10 @@
  * be appropriate for callers that expect the underlying filesystem not
  * to be NFS exported.
  */
-int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
+int vfs_unlink2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
 {
 	struct inode *target = dentry->d_inode;
-	int error = may_delete(dir, dentry, 0);
+	int error = may_delete(mnt, dir, dentry, 0);
 
 	if (error)
 		return error;
@@ -4023,6 +4072,12 @@
 
 	return error;
 }
+EXPORT_SYMBOL(vfs_unlink2);
+
+int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
+{
+	return vfs_unlink2(NULL, dir, dentry, delegated_inode);
+}
 EXPORT_SYMBOL(vfs_unlink);
 
 /*
@@ -4070,7 +4125,7 @@
 		error = security_path_unlink(&path, dentry);
 		if (error)
 			goto exit2;
-		error = vfs_unlink(path.dentry->d_inode, dentry, &delegated_inode);
+		error = vfs_unlink2(path.mnt, path.dentry->d_inode, dentry, &delegated_inode);
 exit2:
 		dput(dentry);
 	}
@@ -4120,9 +4175,9 @@
 	return do_unlinkat(AT_FDCWD, pathname);
 }
 
-int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
+int vfs_symlink2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, const char *oldname)
 {
-	int error = may_create(dir, dentry);
+	int error = may_create(mnt, dir, dentry);
 
 	if (error)
 		return error;
@@ -4139,6 +4194,12 @@
 		fsnotify_create(dir, dentry);
 	return error;
 }
+EXPORT_SYMBOL(vfs_symlink2);
+
+int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
+{
+	return vfs_symlink2(NULL, dir, dentry, oldname);
+}
 EXPORT_SYMBOL(vfs_symlink);
 
 SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
@@ -4161,7 +4222,7 @@
 
 	error = security_path_symlink(&path, dentry, from->name);
 	if (!error)
-		error = vfs_symlink(path.dentry->d_inode, dentry, from->name);
+		error = vfs_symlink2(path.mnt, path.dentry->d_inode, dentry, from->name);
 	done_path_create(&path, dentry);
 	if (retry_estale(error, lookup_flags)) {
 		lookup_flags |= LOOKUP_REVAL;
@@ -4196,7 +4257,7 @@
  * be appropriate for callers that expect the underlying filesystem not
  * to be NFS exported.
  */
-int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode)
+int vfs_link2(struct vfsmount *mnt, struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode)
 {
 	struct inode *inode = old_dentry->d_inode;
 	unsigned max_links = dir->i_sb->s_max_links;
@@ -4205,7 +4266,7 @@
 	if (!inode)
 		return -ENOENT;
 
-	error = may_create(dir, new_dentry);
+	error = may_create(mnt, dir, new_dentry);
 	if (error)
 		return error;
 
@@ -4255,6 +4316,12 @@
 		fsnotify_link(dir, inode, new_dentry);
 	return error;
 }
+EXPORT_SYMBOL(vfs_link2);
+
+int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode)
+{
+	return vfs_link2(NULL, old_dentry, dir, new_dentry, delegated_inode);
+}
 EXPORT_SYMBOL(vfs_link);
 
 /*
@@ -4310,7 +4377,7 @@
 	error = security_path_link(old_path.dentry, &new_path, new_dentry);
 	if (error)
 		goto out_dput;
-	error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode);
+	error = vfs_link2(old_path.mnt, old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode);
 out_dput:
 	done_path_create(&new_path, new_dentry);
 	if (delegated_inode) {
@@ -4385,7 +4452,8 @@
  *	   ->i_mutex on parents, which works but leads to some truly excessive
  *	   locking].
  */
-int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+int vfs_rename2(struct vfsmount *mnt,
+	       struct inode *old_dir, struct dentry *old_dentry,
 	       struct inode *new_dir, struct dentry *new_dentry,
 	       struct inode **delegated_inode, unsigned int flags)
 {
@@ -4404,19 +4472,19 @@
 	if (d_real_inode(old_dentry) == d_real_inode(new_dentry))
 		return 0;
 
-	error = may_delete(old_dir, old_dentry, is_dir);
+	error = may_delete(mnt, old_dir, old_dentry, is_dir);
 	if (error)
 		return error;
 
 	if (!target) {
-		error = may_create(new_dir, new_dentry);
+		error = may_create(mnt, new_dir, new_dentry);
 	} else {
 		new_is_dir = d_is_dir(new_dentry);
 
 		if (!(flags & RENAME_EXCHANGE))
-			error = may_delete(new_dir, new_dentry, is_dir);
+			error = may_delete(mnt, new_dir, new_dentry, is_dir);
 		else
-			error = may_delete(new_dir, new_dentry, new_is_dir);
+			error = may_delete(mnt, new_dir, new_dentry, new_is_dir);
 	}
 	if (error)
 		return error;
@@ -4430,12 +4498,12 @@
 	 */
 	if (new_dir != old_dir) {
 		if (is_dir) {
-			error = inode_permission(source, MAY_WRITE);
+			error = inode_permission2(mnt, source, MAY_WRITE);
 			if (error)
 				return error;
 		}
 		if ((flags & RENAME_EXCHANGE) && new_is_dir) {
-			error = inode_permission(target, MAY_WRITE);
+			error = inode_permission2(mnt, target, MAY_WRITE);
 			if (error)
 				return error;
 		}
@@ -4512,6 +4580,14 @@
 
 	return error;
 }
+EXPORT_SYMBOL(vfs_rename2);
+
+int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+	       struct inode *new_dir, struct dentry *new_dentry,
+	       struct inode **delegated_inode, unsigned int flags)
+{
+	return vfs_rename2(NULL, old_dir, old_dentry, new_dir, new_dentry, delegated_inode, flags);
+}
 EXPORT_SYMBOL(vfs_rename);
 
 SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
@@ -4625,7 +4701,7 @@
 				     &new_path, new_dentry, flags);
 	if (error)
 		goto exit5;
-	error = vfs_rename(old_path.dentry->d_inode, old_dentry,
+	error = vfs_rename2(old_path.mnt, old_path.dentry->d_inode, old_dentry,
 			   new_path.dentry->d_inode, new_dentry,
 			   &delegated_inode, flags);
 exit5:
@@ -4670,7 +4746,7 @@
 
 int vfs_whiteout(struct inode *dir, struct dentry *dentry)
 {
-	int error = may_create(dir, dentry);
+	int error = may_create(NULL, dir, dentry);
 	if (error)
 		return error;
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 41f906a..77b46bf4 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -227,6 +227,7 @@
 		mnt->mnt_count = 1;
 		mnt->mnt_writers = 0;
 #endif
+		mnt->mnt.data = NULL;
 
 		INIT_HLIST_NODE(&mnt->mnt_hash);
 		INIT_LIST_HEAD(&mnt->mnt_child);
@@ -581,6 +582,7 @@
 
 static void free_vfsmnt(struct mount *mnt)
 {
+	kfree(mnt->mnt.data);
 	kfree_const(mnt->mnt_devname);
 #ifdef CONFIG_SMP
 	free_percpu(mnt->mnt_pcp);
@@ -984,10 +986,18 @@
 	if (!mnt)
 		return ERR_PTR(-ENOMEM);
 
+	if (type->alloc_mnt_data) {
+		mnt->mnt.data = type->alloc_mnt_data();
+		if (!mnt->mnt.data) {
+			mnt_free_id(mnt);
+			free_vfsmnt(mnt);
+			return ERR_PTR(-ENOMEM);
+		}
+	}
 	if (flags & MS_KERNMOUNT)
 		mnt->mnt.mnt_flags = MNT_INTERNAL;
 
-	root = mount_fs(type, flags, name, data);
+	root = mount_fs(type, flags, name, &mnt->mnt, data);
 	if (IS_ERR(root)) {
 		mnt_free_id(mnt);
 		free_vfsmnt(mnt);
@@ -1031,6 +1041,14 @@
 	if (!mnt)
 		return ERR_PTR(-ENOMEM);
 
+	if (sb->s_op->clone_mnt_data) {
+		mnt->mnt.data = sb->s_op->clone_mnt_data(old->mnt.data);
+		if (!mnt->mnt.data) {
+			err = -ENOMEM;
+			goto out_free;
+		}
+	}
+
 	if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
 		mnt->mnt_group_id = 0; /* not a peer of original */
 	else
@@ -2342,8 +2360,14 @@
 		err = change_mount_flags(path->mnt, flags);
 	else if (!capable(CAP_SYS_ADMIN))
 		err = -EPERM;
-	else
-		err = do_remount_sb(sb, flags, data, 0);
+	else {
+		err = do_remount_sb2(path->mnt, sb, flags, data, 0);
+		namespace_lock();
+		lock_mount_hash();
+		propagate_remount(mnt);
+		unlock_mount_hash();
+		namespace_unlock();
+	}
 	if (!err) {
 		lock_mount_hash();
 		mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 258e8f6..cef9885 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -498,7 +498,7 @@
 	}
 
 	/* you can only watch an inode if you have read permissions on it */
-	ret = inode_permission(path->dentry->d_inode, MAY_READ);
+	ret = inode_permission2(path->mnt, path->dentry->d_inode, MAY_READ);
 	if (ret)
 		path_put(path);
 out:
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 69d1ea3..4da5c6a 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -337,7 +337,7 @@
 	if (error)
 		return error;
 	/* you can only watch an inode if you have read permissions on it */
-	error = inode_permission(path->dentry->d_inode, MAY_READ);
+	error = inode_permission2(path->mnt, path->dentry->d_inode, MAY_READ);
 	if (error)
 		path_put(path);
 	return error;
@@ -702,6 +702,8 @@
 	struct fsnotify_group *group;
 	struct inode *inode;
 	struct path path;
+	struct path alteredpath;
+	struct path *canonical_path = &path;
 	struct fd f;
 	int ret;
 	unsigned flags = 0;
@@ -741,13 +743,22 @@
 	if (ret)
 		goto fput_and_out;
 
+	/* support stacked filesystems */
+	if(path.dentry && path.dentry->d_op) {
+		if (path.dentry->d_op->d_canonical_path) {
+			path.dentry->d_op->d_canonical_path(&path, &alteredpath);
+			canonical_path = &alteredpath;
+			path_put(&path);
+		}
+	}
+
 	/* inode held in place by reference to path; group by fget on fd */
-	inode = path.dentry->d_inode;
+	inode = canonical_path->dentry->d_inode;
 	group = f.file->private_data;
 
 	/* create/update an inode mark */
 	ret = inotify_update_watch(group, inode, mask);
-	path_put(&path);
+	path_put(canonical_path);
 fput_and_out:
 	fdput(f);
 	return ret;
diff --git a/fs/open.c b/fs/open.c
index f1deb36..e8818ea 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -34,8 +34,8 @@
 
 #include "internal.h"
 
-int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
-	struct file *filp)
+int do_truncate2(struct vfsmount *mnt, struct dentry *dentry, loff_t length,
+		unsigned int time_attrs, struct file *filp)
 {
 	int ret;
 	struct iattr newattrs;
@@ -60,18 +60,25 @@
 
 	inode_lock(dentry->d_inode);
 	/* Note any delegations or leases have already been broken: */
-	ret = notify_change(dentry, &newattrs, NULL);
+	ret = notify_change2(mnt, dentry, &newattrs, NULL);
 	inode_unlock(dentry->d_inode);
 	return ret;
 }
+int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
+	struct file *filp)
+{
+	return do_truncate2(NULL, dentry, length, time_attrs, filp);
+}
 
 long vfs_truncate(const struct path *path, loff_t length)
 {
 	struct inode *inode;
+	struct vfsmount *mnt;
 	struct dentry *upperdentry;
 	long error;
 
 	inode = path->dentry->d_inode;
+	mnt = path->mnt;
 
 	/* For directories it's -EISDIR, for other non-regulars - -EINVAL */
 	if (S_ISDIR(inode->i_mode))
@@ -83,7 +90,7 @@
 	if (error)
 		goto out;
 
-	error = inode_permission(inode, MAY_WRITE);
+	error = inode_permission2(mnt, inode, MAY_WRITE);
 	if (error)
 		goto mnt_drop_write_and_out;
 
@@ -117,7 +124,7 @@
 	if (!error)
 		error = security_path_truncate(path);
 	if (!error)
-		error = do_truncate(path->dentry, length, 0, NULL);
+		error = do_truncate2(mnt, path->dentry, length, 0, NULL);
 
 put_write_and_out:
 	put_write_access(upperdentry->d_inode);
@@ -166,6 +173,7 @@
 {
 	struct inode *inode;
 	struct dentry *dentry;
+	struct vfsmount *mnt;
 	struct fd f;
 	int error;
 
@@ -182,6 +190,7 @@
 		small = 0;
 
 	dentry = f.file->f_path.dentry;
+	mnt = f.file->f_path.mnt;
 	inode = dentry->d_inode;
 	error = -EINVAL;
 	if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE))
@@ -201,7 +210,7 @@
 	if (!error)
 		error = security_path_truncate(&f.file->f_path);
 	if (!error)
-		error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file);
+		error = do_truncate2(mnt, dentry, length, ATTR_MTIME|ATTR_CTIME, f.file);
 	sb_end_write(inode->i_sb);
 out_putf:
 	fdput(f);
@@ -357,6 +366,7 @@
 	struct cred *override_cred;
 	struct path path;
 	struct inode *inode;
+	struct vfsmount *mnt;
 	int res;
 	unsigned int lookup_flags = LOOKUP_FOLLOW;
 
@@ -387,6 +397,7 @@
 		goto out;
 
 	inode = d_backing_inode(path.dentry);
+	mnt = path.mnt;
 
 	if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
 		/*
@@ -398,7 +409,7 @@
 			goto out_path_release;
 	}
 
-	res = inode_permission(inode, mode | MAY_ACCESS);
+	res = inode_permission2(mnt, inode, mode | MAY_ACCESS);
 	/* SuS v2 requires we report a read only fs too */
 	if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
 		goto out_path_release;
@@ -442,7 +453,7 @@
 	if (error)
 		goto out;
 
-	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
+	error = inode_permission2(path.mnt, path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
 	if (error)
 		goto dput_and_out;
 
@@ -462,6 +473,7 @@
 {
 	struct fd f = fdget_raw(fd);
 	struct inode *inode;
+	struct vfsmount *mnt;
 	int error = -EBADF;
 
 	error = -EBADF;
@@ -469,12 +481,13 @@
 		goto out;
 
 	inode = file_inode(f.file);
+	mnt = f.file->f_path.mnt;
 
 	error = -ENOTDIR;
 	if (!S_ISDIR(inode->i_mode))
 		goto out_putf;
 
-	error = inode_permission(inode, MAY_EXEC | MAY_CHDIR);
+	error = inode_permission2(mnt, inode, MAY_EXEC | MAY_CHDIR);
 	if (!error)
 		set_fs_pwd(current->fs, &f.file->f_path);
 out_putf:
@@ -493,7 +506,7 @@
 	if (error)
 		goto out;
 
-	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
+	error = inode_permission2(path.mnt, path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
 	if (error)
 		goto dput_and_out;
 
@@ -533,7 +546,7 @@
 		goto out_unlock;
 	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
 	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-	error = notify_change(path->dentry, &newattrs, &delegated_inode);
+	error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode);
 out_unlock:
 	inode_unlock(inode);
 	if (delegated_inode) {
@@ -613,7 +626,7 @@
 	inode_lock(inode);
 	error = security_path_chown(path, uid, gid);
 	if (!error)
-		error = notify_change(path->dentry, &newattrs, &delegated_inode);
+		error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode);
 	inode_unlock(inode);
 	if (delegated_inode) {
 		error = break_deleg_wait(&delegated_inode);
diff --git a/fs/pnode.c b/fs/pnode.c
index d15c63e..ddb846f8 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -609,3 +609,37 @@
 
 	return 0;
 }
+
+/*
+ *  Iterates over all slaves, and slaves of slaves.
+ */
+static struct mount *next_descendent(struct mount *root, struct mount *cur)
+{
+	if (!IS_MNT_NEW(cur) && !list_empty(&cur->mnt_slave_list))
+		return first_slave(cur);
+	do {
+		struct mount *master = cur->mnt_master;
+
+		if (!master || cur->mnt_slave.next != &master->mnt_slave_list) {
+			struct mount *next = next_slave(cur);
+
+			return (next == root) ? NULL : next;
+		}
+		cur = master;
+	} while (cur != root);
+	return NULL;
+}
+
+void propagate_remount(struct mount *mnt)
+{
+	struct mount *m = mnt;
+	struct super_block *sb = mnt->mnt.mnt_sb;
+
+	if (sb->s_op->copy_mnt_data) {
+		m = next_descendent(mnt, m);
+		while (m) {
+			sb->s_op->copy_mnt_data(m->mnt.data, mnt->mnt.data);
+			m = next_descendent(mnt, m);
+		}
+	}
+}
diff --git a/fs/pnode.h b/fs/pnode.h
index dc87e65..a9a6576 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -44,6 +44,7 @@
 int propagate_umount(struct list_head *);
 int propagate_mount_busy(struct mount *, int);
 void propagate_mount_unlock(struct mount *);
+void propagate_remount(struct mount *);
 void mnt_release_group_id(struct mount *);
 int get_dominating_id(struct mount *mnt, const struct path *root);
 unsigned int mnt_get_count(struct mount *mnt);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b9e4183..c30792f 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2949,8 +2949,8 @@
 	ONE("cgroup",  S_IRUGO, proc_cgroup_show),
 #endif
 	ONE("oom_score",  S_IRUGO, proc_oom_score),
-	REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adj_operations),
-	REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
+	REG("oom_adj",    S_IRUSR, proc_oom_adj_operations),
+	REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations),
 #ifdef CONFIG_AUDITSYSCALL
 	REG("loginuid",   S_IWUSR|S_IRUGO, proc_loginuid_operations),
 	REG("sessionid",  S_IRUGO, proc_sessionid_operations),
@@ -3340,8 +3340,8 @@
 	ONE("cgroup",  S_IRUGO, proc_cgroup_show),
 #endif
 	ONE("oom_score", S_IRUGO, proc_oom_score),
-	REG("oom_adj",   S_IRUGO|S_IWUSR, proc_oom_adj_operations),
-	REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
+	REG("oom_adj",   S_IRUSR, proc_oom_adj_operations),
+	REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations),
 #ifdef CONFIG_AUDITSYSCALL
 	REG("loginuid",  S_IWUSR|S_IRUGO, proc_loginuid_operations),
 	REG("sessionid",  S_IRUGO, proc_sessionid_operations),
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 5138e78..9182f84 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -127,6 +127,56 @@
 }
 #endif
 
+static void seq_print_vma_name(struct seq_file *m, struct vm_area_struct *vma)
+{
+	const char __user *name = vma_get_anon_name(vma);
+	struct mm_struct *mm = vma->vm_mm;
+
+	unsigned long page_start_vaddr;
+	unsigned long page_offset;
+	unsigned long num_pages;
+	unsigned long max_len = NAME_MAX;
+	int i;
+
+	page_start_vaddr = (unsigned long)name & PAGE_MASK;
+	page_offset = (unsigned long)name - page_start_vaddr;
+	num_pages = DIV_ROUND_UP(page_offset + max_len, PAGE_SIZE);
+
+	seq_puts(m, "[anon:");
+
+	for (i = 0; i < num_pages; i++) {
+		int len;
+		int write_len;
+		const char *kaddr;
+		long pages_pinned;
+		struct page *page;
+
+		pages_pinned = get_user_pages_remote(current, mm,
+				page_start_vaddr, 1, 0, &page, NULL);
+		if (pages_pinned < 1) {
+			seq_puts(m, "<fault>]");
+			return;
+		}
+
+		kaddr = (const char *)kmap(page);
+		len = min(max_len, PAGE_SIZE - page_offset);
+		write_len = strnlen(kaddr + page_offset, len);
+		seq_write(m, kaddr + page_offset, write_len);
+		kunmap(page);
+		put_page(page);
+
+		/* if strnlen hit a null terminator then we're done */
+		if (write_len != len)
+			break;
+
+		max_len -= len;
+		page_offset = 0;
+		page_start_vaddr += PAGE_SIZE;
+	}
+
+	seq_putc(m, ']');
+}
+
 static void vma_stop(struct proc_maps_private *priv)
 {
 	struct mm_struct *mm = priv->mm;
@@ -341,8 +391,15 @@
 			goto done;
 		}
 
-		if (is_stack(priv, vma))
+		if (is_stack(priv, vma)) {
 			name = "[stack]";
+			goto done;
+		}
+
+		if (vma_get_anon_name(vma)) {
+			seq_pad(m, ' ');
+			seq_print_vma_name(m, vma);
+		}
 	}
 
 done:
@@ -756,6 +813,12 @@
 
 	show_map_vma(m, vma, is_pid);
 
+	if (vma_get_anon_name(vma)) {
+		seq_puts(m, "Name:           ");
+		seq_print_vma_name(m, vma);
+		seq_putc(m, '\n');
+	}
+
 	seq_printf(m,
 		   "Size:           %8lu kB\n"
 		   "Rss:            %8lu kB\n"
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 3f1190d..6863773 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -118,7 +118,9 @@
 	if (err)
 		goto out;
 	show_mnt_opts(m, mnt);
-	if (sb->s_op->show_options)
+	if (sb->s_op->show_options2)
+			err = sb->s_op->show_options2(mnt, m, mnt_path.dentry);
+	else if (sb->s_op->show_options)
 		err = sb->s_op->show_options(m, mnt_path.dentry);
 	seq_puts(m, " 0 0\n");
 out:
@@ -180,7 +182,9 @@
 	err = show_sb_opts(m, sb);
 	if (err)
 		goto out;
-	if (sb->s_op->show_options)
+	if (sb->s_op->show_options2) {
+		err = sb->s_op->show_options2(mnt, m, mnt->mnt_root);
+	} else if (sb->s_op->show_options)
 		err = sb->s_op->show_options(m, mnt->mnt_root);
 	seq_putc(m, '\n');
 out:
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 8b09271..8e151fb 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -550,6 +550,12 @@
 	return 0;
 }
 
+void notrace ramoops_console_write_buf(const char *buf, size_t size)
+{
+	struct ramoops_context *cxt = &oops_cxt;
+	persistent_ram_write(cxt->cprz, buf, size);
+}
+
 static int ramoops_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
diff --git a/fs/sdcardfs/Kconfig b/fs/sdcardfs/Kconfig
new file mode 100644
index 0000000..a1c1033
--- /dev/null
+++ b/fs/sdcardfs/Kconfig
@@ -0,0 +1,13 @@
+config SDCARD_FS
+	tristate "sdcard file system"
+	depends on CONFIGFS_FS
+	default n
+	help
+	  Sdcardfs is based on Wrapfs file system.
+
+config SDCARD_FS_FADV_NOACTIVE
+	bool "sdcardfs fadvise noactive support"
+	depends on FADV_NOACTIVE
+	default y
+	help
+	  Sdcardfs supports fadvise noactive mode.
diff --git a/fs/sdcardfs/Makefile b/fs/sdcardfs/Makefile
new file mode 100644
index 0000000..b84fbb2
--- /dev/null
+++ b/fs/sdcardfs/Makefile
@@ -0,0 +1,7 @@
+SDCARDFS_VERSION="0.1"
+
+EXTRA_CFLAGS += -DSDCARDFS_VERSION=\"$(SDCARDFS_VERSION)\"
+
+obj-$(CONFIG_SDCARD_FS) += sdcardfs.o
+
+sdcardfs-y := dentry.o file.o inode.o main.o super.o lookup.o mmap.o packagelist.o derived_perm.o
diff --git a/fs/sdcardfs/dentry.c b/fs/sdcardfs/dentry.c
new file mode 100644
index 0000000..e9426a6
--- /dev/null
+++ b/fs/sdcardfs/dentry.c
@@ -0,0 +1,193 @@
+/*
+ * fs/sdcardfs/dentry.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+#include "linux/ctype.h"
+
+/*
+ * returns: -ERRNO if error (returned to user)
+ *          0: tell VFS to invalidate dentry
+ *          1: dentry is valid
+ */
+static int sdcardfs_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+	int err = 1;
+	struct path parent_lower_path, lower_path;
+	struct dentry *parent_dentry = NULL;
+	struct dentry *parent_lower_dentry = NULL;
+	struct dentry *lower_cur_parent_dentry = NULL;
+	struct dentry *lower_dentry = NULL;
+	struct inode *inode;
+	struct sdcardfs_inode_data *data;
+
+	if (flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	spin_lock(&dentry->d_lock);
+	if (IS_ROOT(dentry)) {
+		spin_unlock(&dentry->d_lock);
+		return 1;
+	}
+	spin_unlock(&dentry->d_lock);
+
+	/* check uninitialized obb_dentry and
+	 * whether the base obbpath has been changed or not
+	 */
+	if (is_obbpath_invalid(dentry)) {
+		d_drop(dentry);
+		return 0;
+	}
+
+	parent_dentry = dget_parent(dentry);
+	sdcardfs_get_lower_path(parent_dentry, &parent_lower_path);
+	sdcardfs_get_real_lower(dentry, &lower_path);
+	parent_lower_dentry = parent_lower_path.dentry;
+	lower_dentry = lower_path.dentry;
+	lower_cur_parent_dentry = dget_parent(lower_dentry);
+
+	if ((lower_dentry->d_flags & DCACHE_OP_REVALIDATE)) {
+		err = lower_dentry->d_op->d_revalidate(lower_dentry, flags);
+		if (err == 0) {
+			d_drop(dentry);
+			goto out;
+		}
+	}
+
+	spin_lock(&lower_dentry->d_lock);
+	if (d_unhashed(lower_dentry)) {
+		spin_unlock(&lower_dentry->d_lock);
+		d_drop(dentry);
+		err = 0;
+		goto out;
+	}
+	spin_unlock(&lower_dentry->d_lock);
+
+	if (parent_lower_dentry != lower_cur_parent_dentry) {
+		d_drop(dentry);
+		err = 0;
+		goto out;
+	}
+
+	if (dentry < lower_dentry) {
+		spin_lock(&dentry->d_lock);
+		spin_lock_nested(&lower_dentry->d_lock, DENTRY_D_LOCK_NESTED);
+	} else {
+		spin_lock(&lower_dentry->d_lock);
+		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+	}
+
+	if (!qstr_case_eq(&dentry->d_name, &lower_dentry->d_name)) {
+		__d_drop(dentry);
+		err = 0;
+	}
+
+	if (dentry < lower_dentry) {
+		spin_unlock(&lower_dentry->d_lock);
+		spin_unlock(&dentry->d_lock);
+	} else {
+		spin_unlock(&dentry->d_lock);
+		spin_unlock(&lower_dentry->d_lock);
+	}
+	if (!err)
+		goto out;
+
+	/* If our top's inode is gone, we may be out of date */
+	inode = igrab(d_inode(dentry));
+	if (inode) {
+		data = top_data_get(SDCARDFS_I(inode));
+		if (!data || data->abandoned) {
+			d_drop(dentry);
+			err = 0;
+		}
+		if (data)
+			data_put(data);
+		iput(inode);
+	}
+
+out:
+	dput(parent_dentry);
+	dput(lower_cur_parent_dentry);
+	sdcardfs_put_lower_path(parent_dentry, &parent_lower_path);
+	sdcardfs_put_real_lower(dentry, &lower_path);
+	return err;
+}
+
+static void sdcardfs_d_release(struct dentry *dentry)
+{
+	/* release and reset the lower paths */
+	if (has_graft_path(dentry))
+		sdcardfs_put_reset_orig_path(dentry);
+	sdcardfs_put_reset_lower_path(dentry);
+	free_dentry_private_data(dentry);
+}
+
+static int sdcardfs_hash_ci(const struct dentry *dentry,
+				struct qstr *qstr)
+{
+	/*
+	 * This function is copy of vfat_hashi.
+	 * FIXME Should we support national language?
+	 *       Refer to vfat_hashi()
+	 * struct nls_table *t = MSDOS_SB(dentry->d_sb)->nls_io;
+	 */
+	const unsigned char *name;
+	unsigned int len;
+	unsigned long hash;
+
+	name = qstr->name;
+	len = qstr->len;
+
+	hash = init_name_hash(dentry);
+	while (len--)
+		hash = partial_name_hash(tolower(*name++), hash);
+	qstr->hash = end_name_hash(hash);
+
+	return 0;
+}
+
+/*
+ * Case insensitive compare of two vfat names.
+ */
+static int sdcardfs_cmp_ci(const struct dentry *dentry,
+		unsigned int len, const char *str, const struct qstr *name)
+{
+	/* FIXME Should we support national language? */
+
+	if (name->len == len) {
+		if (str_n_case_eq(name->name, str, len))
+			return 0;
+	}
+	return 1;
+}
+
+static void sdcardfs_canonical_path(const struct path *path,
+				struct path *actual_path)
+{
+	sdcardfs_get_real_lower(path->dentry, actual_path);
+}
+
+const struct dentry_operations sdcardfs_ci_dops = {
+	.d_revalidate	= sdcardfs_d_revalidate,
+	.d_release	= sdcardfs_d_release,
+	.d_hash	= sdcardfs_hash_ci,
+	.d_compare	= sdcardfs_cmp_ci,
+	.d_canonical_path = sdcardfs_canonical_path,
+};
+
diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c
new file mode 100644
index 0000000..1239d1c
--- /dev/null
+++ b/fs/sdcardfs/derived_perm.c
@@ -0,0 +1,471 @@
+/*
+ * fs/sdcardfs/derived_perm.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+
+/* copy derived state from parent inode */
+static void inherit_derived_state(struct inode *parent, struct inode *child)
+{
+	struct sdcardfs_inode_info *pi = SDCARDFS_I(parent);
+	struct sdcardfs_inode_info *ci = SDCARDFS_I(child);
+
+	ci->data->perm = PERM_INHERIT;
+	ci->data->userid = pi->data->userid;
+	ci->data->d_uid = pi->data->d_uid;
+	ci->data->under_android = pi->data->under_android;
+	ci->data->under_cache = pi->data->under_cache;
+	ci->data->under_obb = pi->data->under_obb;
+	set_top(ci, pi->top_data);
+}
+
+/* helper function for derived state */
+void setup_derived_state(struct inode *inode, perm_t perm, userid_t userid,
+					uid_t uid, bool under_android,
+					struct sdcardfs_inode_data *top)
+{
+	struct sdcardfs_inode_info *info = SDCARDFS_I(inode);
+
+	info->data->perm = perm;
+	info->data->userid = userid;
+	info->data->d_uid = uid;
+	info->data->under_android = under_android;
+	info->data->under_cache = false;
+	info->data->under_obb = false;
+	set_top(info, top);
+}
+
+/* While renaming, there is a point where we want the path from dentry,
+ * but the name from newdentry
+ */
+void get_derived_permission_new(struct dentry *parent, struct dentry *dentry,
+				const struct qstr *name)
+{
+	struct sdcardfs_inode_info *info = SDCARDFS_I(d_inode(dentry));
+	struct sdcardfs_inode_data *parent_data =
+			SDCARDFS_I(d_inode(parent))->data;
+	appid_t appid;
+	unsigned long user_num;
+	int err;
+	struct qstr q_Android = QSTR_LITERAL("Android");
+	struct qstr q_data = QSTR_LITERAL("data");
+	struct qstr q_obb = QSTR_LITERAL("obb");
+	struct qstr q_media = QSTR_LITERAL("media");
+	struct qstr q_cache = QSTR_LITERAL("cache");
+
+	/* By default, each inode inherits from its parent.
+	 * the properties are maintained on its private fields
+	 * because the inode attributes will be modified with that of
+	 * its lower inode.
+	 * These values are used by our custom permission call instead
+	 * of using the inode permissions.
+	 */
+
+	inherit_derived_state(d_inode(parent), d_inode(dentry));
+
+	/* Files don't get special labels */
+	if (!S_ISDIR(d_inode(dentry)->i_mode))
+		return;
+	/* Derive custom permissions based on parent and current node */
+	switch (parent_data->perm) {
+	case PERM_INHERIT:
+	case PERM_ANDROID_PACKAGE_CACHE:
+		/* Already inherited above */
+		break;
+	case PERM_PRE_ROOT:
+		/* Legacy internal layout places users at top level */
+		info->data->perm = PERM_ROOT;
+		err = kstrtoul(name->name, 10, &user_num);
+		if (err)
+			info->data->userid = 0;
+		else
+			info->data->userid = user_num;
+		set_top(info, info->data);
+		break;
+	case PERM_ROOT:
+		/* Assume masked off by default. */
+		if (qstr_case_eq(name, &q_Android)) {
+			/* App-specific directories inside; let anyone traverse */
+			info->data->perm = PERM_ANDROID;
+			info->data->under_android = true;
+			set_top(info, info->data);
+		}
+		break;
+	case PERM_ANDROID:
+		if (qstr_case_eq(name, &q_data)) {
+			/* App-specific directories inside; let anyone traverse */
+			info->data->perm = PERM_ANDROID_DATA;
+			set_top(info, info->data);
+		} else if (qstr_case_eq(name, &q_obb)) {
+			/* App-specific directories inside; let anyone traverse */
+			info->data->perm = PERM_ANDROID_OBB;
+			info->data->under_obb = true;
+			set_top(info, info->data);
+			/* Single OBB directory is always shared */
+		} else if (qstr_case_eq(name, &q_media)) {
+			/* App-specific directories inside; let anyone traverse */
+			info->data->perm = PERM_ANDROID_MEDIA;
+			set_top(info, info->data);
+		}
+		break;
+	case PERM_ANDROID_OBB:
+	case PERM_ANDROID_DATA:
+	case PERM_ANDROID_MEDIA:
+		info->data->perm = PERM_ANDROID_PACKAGE;
+		appid = get_appid(name->name);
+		if (appid != 0 && !is_excluded(name->name, parent_data->userid))
+			info->data->d_uid =
+				multiuser_get_uid(parent_data->userid, appid);
+		set_top(info, info->data);
+		break;
+	case PERM_ANDROID_PACKAGE:
+		if (qstr_case_eq(name, &q_cache)) {
+			info->data->perm = PERM_ANDROID_PACKAGE_CACHE;
+			info->data->under_cache = true;
+		}
+		break;
+	}
+}
+
+void get_derived_permission(struct dentry *parent, struct dentry *dentry)
+{
+	get_derived_permission_new(parent, dentry, &dentry->d_name);
+}
+
+static appid_t get_type(const char *name)
+{
+	const char *ext = strrchr(name, '.');
+	appid_t id;
+
+	if (ext && ext[0]) {
+		ext = &ext[1];
+		id = get_ext_gid(ext);
+		return id?:AID_MEDIA_RW;
+	}
+	return AID_MEDIA_RW;
+}
+
+void fixup_lower_ownership(struct dentry *dentry, const char *name)
+{
+	struct path path;
+	struct inode *inode;
+	struct inode *delegated_inode = NULL;
+	int error;
+	struct sdcardfs_inode_info *info;
+	struct sdcardfs_inode_data *info_d;
+	struct sdcardfs_inode_data *info_top;
+	perm_t perm;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+	uid_t uid = sbi->options.fs_low_uid;
+	gid_t gid = sbi->options.fs_low_gid;
+	struct iattr newattrs;
+
+	info = SDCARDFS_I(d_inode(dentry));
+	info_d = info->data;
+	perm = info_d->perm;
+	if (info_d->under_obb) {
+		perm = PERM_ANDROID_OBB;
+	} else if (info_d->under_cache) {
+		perm = PERM_ANDROID_PACKAGE_CACHE;
+	} else if (perm == PERM_INHERIT) {
+		info_top = top_data_get(info);
+		perm = info_top->perm;
+		data_put(info_top);
+	}
+
+	switch (perm) {
+	case PERM_ROOT:
+	case PERM_ANDROID:
+	case PERM_ANDROID_DATA:
+	case PERM_ANDROID_MEDIA:
+	case PERM_ANDROID_PACKAGE:
+	case PERM_ANDROID_PACKAGE_CACHE:
+		uid = multiuser_get_uid(info_d->userid, uid);
+		break;
+	case PERM_ANDROID_OBB:
+		uid = AID_MEDIA_OBB;
+		break;
+	case PERM_PRE_ROOT:
+	default:
+		break;
+	}
+	switch (perm) {
+	case PERM_ROOT:
+	case PERM_ANDROID:
+	case PERM_ANDROID_DATA:
+	case PERM_ANDROID_MEDIA:
+		if (S_ISDIR(d_inode(dentry)->i_mode))
+			gid = multiuser_get_uid(info_d->userid, AID_MEDIA_RW);
+		else
+			gid = multiuser_get_uid(info_d->userid, get_type(name));
+		break;
+	case PERM_ANDROID_OBB:
+		gid = AID_MEDIA_OBB;
+		break;
+	case PERM_ANDROID_PACKAGE:
+		if (uid_is_app(info_d->d_uid))
+			gid = multiuser_get_ext_gid(info_d->d_uid);
+		else
+			gid = multiuser_get_uid(info_d->userid, AID_MEDIA_RW);
+		break;
+	case PERM_ANDROID_PACKAGE_CACHE:
+		if (uid_is_app(info_d->d_uid))
+			gid = multiuser_get_ext_cache_gid(info_d->d_uid);
+		else
+			gid = multiuser_get_uid(info_d->userid, AID_MEDIA_RW);
+		break;
+	case PERM_PRE_ROOT:
+	default:
+		break;
+	}
+
+	sdcardfs_get_lower_path(dentry, &path);
+	inode = d_inode(path.dentry);
+	if (d_inode(path.dentry)->i_gid.val != gid || d_inode(path.dentry)->i_uid.val != uid) {
+retry_deleg:
+		newattrs.ia_valid = ATTR_GID | ATTR_UID | ATTR_FORCE;
+		newattrs.ia_uid = make_kuid(current_user_ns(), uid);
+		newattrs.ia_gid = make_kgid(current_user_ns(), gid);
+		if (!S_ISDIR(inode->i_mode))
+			newattrs.ia_valid |=
+				ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
+		inode_lock(inode);
+		error = security_path_chown(&path, newattrs.ia_uid, newattrs.ia_gid);
+		if (!error)
+			error = notify_change2(path.mnt, path.dentry, &newattrs, &delegated_inode);
+		inode_unlock(inode);
+		if (delegated_inode) {
+			error = break_deleg_wait(&delegated_inode);
+			if (!error)
+				goto retry_deleg;
+		}
+		if (error)
+			pr_debug("sdcardfs: Failed to touch up lower fs gid/uid for %s\n", name);
+	}
+	sdcardfs_put_lower_path(dentry, &path);
+}
+
+static int descendant_may_need_fixup(struct sdcardfs_inode_data *data,
+		struct limit_search *limit)
+{
+	if (data->perm == PERM_ROOT)
+		return (limit->flags & BY_USERID) ?
+				data->userid == limit->userid : 1;
+	if (data->perm == PERM_PRE_ROOT || data->perm == PERM_ANDROID)
+		return 1;
+	return 0;
+}
+
+static int needs_fixup(perm_t perm)
+{
+	if (perm == PERM_ANDROID_DATA || perm == PERM_ANDROID_OBB
+			|| perm == PERM_ANDROID_MEDIA)
+		return 1;
+	return 0;
+}
+
+static void __fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit, int depth)
+{
+	struct dentry *child;
+	struct sdcardfs_inode_info *info;
+
+	/*
+	 * All paths will terminate their recursion on hitting PERM_ANDROID_OBB,
+	 * PERM_ANDROID_MEDIA, or PERM_ANDROID_DATA. This happens at a depth of
+	 * at most 3.
+	 */
+	WARN(depth > 3, "%s: Max expected depth exceeded!\n", __func__);
+	spin_lock_nested(&dentry->d_lock, depth);
+	if (!d_inode(dentry)) {
+		spin_unlock(&dentry->d_lock);
+		return;
+	}
+	info = SDCARDFS_I(d_inode(dentry));
+
+	if (needs_fixup(info->data->perm)) {
+		list_for_each_entry(child, &dentry->d_subdirs, d_child) {
+			spin_lock_nested(&child->d_lock, depth + 1);
+			if (!(limit->flags & BY_NAME) || qstr_case_eq(&child->d_name, &limit->name)) {
+				if (d_inode(child)) {
+					get_derived_permission(dentry, child);
+					fixup_tmp_permissions(d_inode(child));
+					spin_unlock(&child->d_lock);
+					break;
+				}
+			}
+			spin_unlock(&child->d_lock);
+		}
+	} else if (descendant_may_need_fixup(info->data, limit)) {
+		list_for_each_entry(child, &dentry->d_subdirs, d_child) {
+			__fixup_perms_recursive(child, limit, depth + 1);
+		}
+	}
+	spin_unlock(&dentry->d_lock);
+}
+
+void fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit)
+{
+	__fixup_perms_recursive(dentry, limit, 0);
+}
+
+/* main function for updating derived permission */
+inline void update_derived_permission_lock(struct dentry *dentry)
+{
+	struct dentry *parent;
+
+	if (!dentry || !d_inode(dentry)) {
+		pr_err("sdcardfs: %s: invalid dentry\n", __func__);
+		return;
+	}
+	/* FIXME:
+	 * 1. need to check whether the dentry is updated or not
+	 * 2. remove the root dentry update
+	 */
+	if (!IS_ROOT(dentry)) {
+		parent = dget_parent(dentry);
+		if (parent) {
+			get_derived_permission(parent, dentry);
+			dput(parent);
+		}
+	}
+	fixup_tmp_permissions(d_inode(dentry));
+}
+
+int need_graft_path(struct dentry *dentry)
+{
+	int ret = 0;
+	struct dentry *parent = dget_parent(dentry);
+	struct sdcardfs_inode_info *parent_info = SDCARDFS_I(d_inode(parent));
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+	struct qstr obb = QSTR_LITERAL("obb");
+
+	if (parent_info->data->perm == PERM_ANDROID &&
+			qstr_case_eq(&dentry->d_name, &obb)) {
+
+		/* /Android/obb is the base obbpath of DERIVED_UNIFIED */
+		if (!(sbi->options.multiuser == false
+				&& parent_info->data->userid == 0)) {
+			ret = 1;
+		}
+	}
+	dput(parent);
+	return ret;
+}
+
+int is_obbpath_invalid(struct dentry *dent)
+{
+	int ret = 0;
+	struct sdcardfs_dentry_info *di = SDCARDFS_D(dent);
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dent->d_sb);
+	char *path_buf, *obbpath_s;
+	int need_put = 0;
+	struct path lower_path;
+
+	/* check the base obbpath has been changed.
+	 * this routine can check an uninitialized obb dentry as well.
+	 * regarding the uninitialized obb, refer to the sdcardfs_mkdir()
+	 */
+	spin_lock(&di->lock);
+	if (di->orig_path.dentry) {
+		if (!di->lower_path.dentry) {
+			ret = 1;
+		} else {
+			path_get(&di->lower_path);
+
+			path_buf = kmalloc(PATH_MAX, GFP_ATOMIC);
+			if (!path_buf) {
+				ret = 1;
+				pr_err("sdcardfs: fail to allocate path_buf in %s.\n", __func__);
+			} else {
+				obbpath_s = d_path(&di->lower_path, path_buf, PATH_MAX);
+				if (d_unhashed(di->lower_path.dentry) ||
+					!str_case_eq(sbi->obbpath_s, obbpath_s)) {
+					ret = 1;
+				}
+				kfree(path_buf);
+			}
+
+			pathcpy(&lower_path, &di->lower_path);
+			need_put = 1;
+		}
+	}
+	spin_unlock(&di->lock);
+	if (need_put)
+		path_put(&lower_path);
+	return ret;
+}
+
+int is_base_obbpath(struct dentry *dentry)
+{
+	int ret = 0;
+	struct dentry *parent = dget_parent(dentry);
+	struct sdcardfs_inode_info *parent_info = SDCARDFS_I(d_inode(parent));
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+	struct qstr q_obb = QSTR_LITERAL("obb");
+
+	spin_lock(&SDCARDFS_D(dentry)->lock);
+	if (sbi->options.multiuser) {
+		if (parent_info->data->perm == PERM_PRE_ROOT &&
+				qstr_case_eq(&dentry->d_name, &q_obb)) {
+			ret = 1;
+		}
+	} else  if (parent_info->data->perm == PERM_ANDROID &&
+			qstr_case_eq(&dentry->d_name, &q_obb)) {
+		ret = 1;
+	}
+	spin_unlock(&SDCARDFS_D(dentry)->lock);
+	return ret;
+}
+
+/* The lower_path will be stored to the dentry's orig_path
+ * and the base obbpath will be copyed to the lower_path variable.
+ * if an error returned, there's no change in the lower_path
+ * returns: -ERRNO if error (0: no error)
+ */
+int setup_obb_dentry(struct dentry *dentry, struct path *lower_path)
+{
+	int err = 0;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+	struct path obbpath;
+
+	/* A local obb dentry must have its own orig_path to support rmdir
+	 * and mkdir of itself. Usually, we expect that the sbi->obbpath
+	 * is avaiable on this stage.
+	 */
+	sdcardfs_set_orig_path(dentry, lower_path);
+
+	err = kern_path(sbi->obbpath_s,
+			LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &obbpath);
+
+	if (!err) {
+		/* the obbpath base has been found */
+		pathcpy(lower_path, &obbpath);
+	} else {
+		/* if the sbi->obbpath is not available, we can optionally
+		 * setup the lower_path with its orig_path.
+		 * but, the current implementation just returns an error
+		 * because the sdcard daemon also regards this case as
+		 * a lookup fail.
+		 */
+		pr_info("sdcardfs: the sbi->obbpath is not available\n");
+	}
+	return err;
+}
+
+
diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c
new file mode 100644
index 0000000..399531b
--- /dev/null
+++ b/fs/sdcardfs/file.c
@@ -0,0 +1,461 @@
+/*
+ * fs/sdcardfs/file.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+#ifdef CONFIG_SDCARD_FS_FADV_NOACTIVE
+#include <linux/backing-dev.h>
+#endif
+
+static ssize_t sdcardfs_read(struct file *file, char __user *buf,
+			   size_t count, loff_t *ppos)
+{
+	int err;
+	struct file *lower_file;
+	struct dentry *dentry = file->f_path.dentry;
+#ifdef CONFIG_SDCARD_FS_FADV_NOACTIVE
+	struct backing_dev_info *bdi;
+#endif
+
+	lower_file = sdcardfs_lower_file(file);
+
+#ifdef CONFIG_SDCARD_FS_FADV_NOACTIVE
+	if (file->f_mode & FMODE_NOACTIVE) {
+		if (!(lower_file->f_mode & FMODE_NOACTIVE)) {
+			bdi = lower_file->f_mapping->backing_dev_info;
+			lower_file->f_ra.ra_pages = bdi->ra_pages * 2;
+			spin_lock(&lower_file->f_lock);
+			lower_file->f_mode |= FMODE_NOACTIVE;
+			spin_unlock(&lower_file->f_lock);
+		}
+	}
+#endif
+
+	err = vfs_read(lower_file, buf, count, ppos);
+	/* update our inode atime upon a successful lower read */
+	if (err >= 0)
+		fsstack_copy_attr_atime(d_inode(dentry),
+					file_inode(lower_file));
+
+	return err;
+}
+
+static ssize_t sdcardfs_write(struct file *file, const char __user *buf,
+			    size_t count, loff_t *ppos)
+{
+	int err;
+	struct file *lower_file;
+	struct dentry *dentry = file->f_path.dentry;
+
+	/* check disk space */
+	if (!check_min_free_space(dentry, count, 0)) {
+		pr_err("No minimum free space.\n");
+		return -ENOSPC;
+	}
+
+	lower_file = sdcardfs_lower_file(file);
+	err = vfs_write(lower_file, buf, count, ppos);
+	/* update our inode times+sizes upon a successful lower write */
+	if (err >= 0) {
+		fsstack_copy_inode_size(d_inode(dentry),
+					file_inode(lower_file));
+		fsstack_copy_attr_times(d_inode(dentry),
+					file_inode(lower_file));
+	}
+
+	return err;
+}
+
+static int sdcardfs_readdir(struct file *file, struct dir_context *ctx)
+{
+	int err;
+	struct file *lower_file = NULL;
+	struct dentry *dentry = file->f_path.dentry;
+
+	lower_file = sdcardfs_lower_file(file);
+
+	lower_file->f_pos = file->f_pos;
+	err = iterate_dir(lower_file, ctx);
+	file->f_pos = lower_file->f_pos;
+	if (err >= 0)		/* copy the atime */
+		fsstack_copy_attr_atime(d_inode(dentry),
+					file_inode(lower_file));
+	return err;
+}
+
+static long sdcardfs_unlocked_ioctl(struct file *file, unsigned int cmd,
+				  unsigned long arg)
+{
+	long err = -ENOTTY;
+	struct file *lower_file;
+	const struct cred *saved_cred = NULL;
+	struct dentry *dentry = file->f_path.dentry;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+
+	lower_file = sdcardfs_lower_file(file);
+
+	/* XXX: use vfs_ioctl if/when VFS exports it */
+	if (!lower_file || !lower_file->f_op)
+		goto out;
+
+	/* save current_cred and override it */
+	saved_cred = override_fsids(sbi, SDCARDFS_I(file_inode(file))->data);
+	if (!saved_cred) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if (lower_file->f_op->unlocked_ioctl)
+		err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
+
+	/* some ioctls can change inode attributes (EXT2_IOC_SETFLAGS) */
+	if (!err)
+		sdcardfs_copy_and_fix_attrs(file_inode(file),
+				      file_inode(lower_file));
+	revert_fsids(saved_cred);
+out:
+	return err;
+}
+
+#ifdef CONFIG_COMPAT
+static long sdcardfs_compat_ioctl(struct file *file, unsigned int cmd,
+				unsigned long arg)
+{
+	long err = -ENOTTY;
+	struct file *lower_file;
+	const struct cred *saved_cred = NULL;
+	struct dentry *dentry = file->f_path.dentry;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+
+	lower_file = sdcardfs_lower_file(file);
+
+	/* XXX: use vfs_ioctl if/when VFS exports it */
+	if (!lower_file || !lower_file->f_op)
+		goto out;
+
+	/* save current_cred and override it */
+	saved_cred = override_fsids(sbi, SDCARDFS_I(file_inode(file))->data);
+	if (!saved_cred) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if (lower_file->f_op->compat_ioctl)
+		err = lower_file->f_op->compat_ioctl(lower_file, cmd, arg);
+
+	revert_fsids(saved_cred);
+out:
+	return err;
+}
+#endif
+
+static int sdcardfs_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	int err = 0;
+	bool willwrite;
+	struct file *lower_file;
+	const struct vm_operations_struct *saved_vm_ops = NULL;
+
+	/* this might be deferred to mmap's writepage */
+	willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags);
+
+	/*
+	 * File systems which do not implement ->writepage may use
+	 * generic_file_readonly_mmap as their ->mmap op.  If you call
+	 * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL.
+	 * But we cannot call the lower ->mmap op, so we can't tell that
+	 * writeable mappings won't work.  Therefore, our only choice is to
+	 * check if the lower file system supports the ->writepage, and if
+	 * not, return EINVAL (the same error that
+	 * generic_file_readonly_mmap returns in that case).
+	 */
+	lower_file = sdcardfs_lower_file(file);
+	if (willwrite && !lower_file->f_mapping->a_ops->writepage) {
+		err = -EINVAL;
+		pr_err("sdcardfs: lower file system does not support writeable mmap\n");
+		goto out;
+	}
+
+	/*
+	 * find and save lower vm_ops.
+	 *
+	 * XXX: the VFS should have a cleaner way of finding the lower vm_ops
+	 */
+	if (!SDCARDFS_F(file)->lower_vm_ops) {
+		err = lower_file->f_op->mmap(lower_file, vma);
+		if (err) {
+			pr_err("sdcardfs: lower mmap failed %d\n", err);
+			goto out;
+		}
+		saved_vm_ops = vma->vm_ops; /* save: came from lower ->mmap */
+	}
+
+	/*
+	 * Next 3 lines are all I need from generic_file_mmap.  I definitely
+	 * don't want its test for ->readpage which returns -ENOEXEC.
+	 */
+	file_accessed(file);
+	vma->vm_ops = &sdcardfs_vm_ops;
+
+	file->f_mapping->a_ops = &sdcardfs_aops; /* set our aops */
+	if (!SDCARDFS_F(file)->lower_vm_ops) /* save for our ->fault */
+		SDCARDFS_F(file)->lower_vm_ops = saved_vm_ops;
+	vma->vm_private_data = file;
+	get_file(lower_file);
+	vma->vm_file = lower_file;
+
+out:
+	return err;
+}
+
+static int sdcardfs_open(struct inode *inode, struct file *file)
+{
+	int err = 0;
+	struct file *lower_file = NULL;
+	struct path lower_path;
+	struct dentry *dentry = file->f_path.dentry;
+	struct dentry *parent = dget_parent(dentry);
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+	const struct cred *saved_cred = NULL;
+
+	/* don't open unhashed/deleted files */
+	if (d_unhashed(dentry)) {
+		err = -ENOENT;
+		goto out_err;
+	}
+
+	if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) {
+		err = -EACCES;
+		goto out_err;
+	}
+
+	/* save current_cred and override it */
+	saved_cred = override_fsids(sbi, SDCARDFS_I(inode)->data);
+	if (!saved_cred) {
+		err = -ENOMEM;
+		goto out_err;
+	}
+
+	file->private_data =
+		kzalloc(sizeof(struct sdcardfs_file_info), GFP_KERNEL);
+	if (!SDCARDFS_F(file)) {
+		err = -ENOMEM;
+		goto out_revert_cred;
+	}
+
+	/* open lower object and link sdcardfs's file struct to lower's */
+	sdcardfs_get_lower_path(file->f_path.dentry, &lower_path);
+	lower_file = dentry_open(&lower_path, file->f_flags, current_cred());
+	path_put(&lower_path);
+	if (IS_ERR(lower_file)) {
+		err = PTR_ERR(lower_file);
+		lower_file = sdcardfs_lower_file(file);
+		if (lower_file) {
+			sdcardfs_set_lower_file(file, NULL);
+			fput(lower_file); /* fput calls dput for lower_dentry */
+		}
+	} else {
+		sdcardfs_set_lower_file(file, lower_file);
+	}
+
+	if (err)
+		kfree(SDCARDFS_F(file));
+	else
+		sdcardfs_copy_and_fix_attrs(inode, sdcardfs_lower_inode(inode));
+
+out_revert_cred:
+	revert_fsids(saved_cred);
+out_err:
+	dput(parent);
+	return err;
+}
+
+static int sdcardfs_flush(struct file *file, fl_owner_t id)
+{
+	int err = 0;
+	struct file *lower_file = NULL;
+
+	lower_file = sdcardfs_lower_file(file);
+	if (lower_file && lower_file->f_op && lower_file->f_op->flush) {
+		filemap_write_and_wait(file->f_mapping);
+		err = lower_file->f_op->flush(lower_file, id);
+	}
+
+	return err;
+}
+
+/* release all lower object references & free the file info structure */
+static int sdcardfs_file_release(struct inode *inode, struct file *file)
+{
+	struct file *lower_file;
+
+	lower_file = sdcardfs_lower_file(file);
+	if (lower_file) {
+		sdcardfs_set_lower_file(file, NULL);
+		fput(lower_file);
+	}
+
+	kfree(SDCARDFS_F(file));
+	return 0;
+}
+
+static int sdcardfs_fsync(struct file *file, loff_t start, loff_t end,
+			int datasync)
+{
+	int err;
+	struct file *lower_file;
+	struct path lower_path;
+	struct dentry *dentry = file->f_path.dentry;
+
+	err = __generic_file_fsync(file, start, end, datasync);
+	if (err)
+		goto out;
+
+	lower_file = sdcardfs_lower_file(file);
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	err = vfs_fsync_range(lower_file, start, end, datasync);
+	sdcardfs_put_lower_path(dentry, &lower_path);
+out:
+	return err;
+}
+
+static int sdcardfs_fasync(int fd, struct file *file, int flag)
+{
+	int err = 0;
+	struct file *lower_file = NULL;
+
+	lower_file = sdcardfs_lower_file(file);
+	if (lower_file->f_op && lower_file->f_op->fasync)
+		err = lower_file->f_op->fasync(fd, lower_file, flag);
+
+	return err;
+}
+
+/*
+ * Sdcardfs cannot use generic_file_llseek as ->llseek, because it would
+ * only set the offset of the upper file.  So we have to implement our
+ * own method to set both the upper and lower file offsets
+ * consistently.
+ */
+static loff_t sdcardfs_file_llseek(struct file *file, loff_t offset, int whence)
+{
+	int err;
+	struct file *lower_file;
+
+	err = generic_file_llseek(file, offset, whence);
+	if (err < 0)
+		goto out;
+
+	lower_file = sdcardfs_lower_file(file);
+	err = generic_file_llseek(lower_file, offset, whence);
+
+out:
+	return err;
+}
+
+/*
+ * Sdcardfs read_iter, redirect modified iocb to lower read_iter
+ */
+ssize_t sdcardfs_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+	int err;
+	struct file *file = iocb->ki_filp, *lower_file;
+
+	lower_file = sdcardfs_lower_file(file);
+	if (!lower_file->f_op->read_iter) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	get_file(lower_file); /* prevent lower_file from being released */
+	iocb->ki_filp = lower_file;
+	err = lower_file->f_op->read_iter(iocb, iter);
+	iocb->ki_filp = file;
+	fput(lower_file);
+	/* update upper inode atime as needed */
+	if (err >= 0 || err == -EIOCBQUEUED)
+		fsstack_copy_attr_atime(file->f_path.dentry->d_inode,
+					file_inode(lower_file));
+out:
+	return err;
+}
+
+/*
+ * Sdcardfs write_iter, redirect modified iocb to lower write_iter
+ */
+ssize_t sdcardfs_write_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+	int err;
+	struct file *file = iocb->ki_filp, *lower_file;
+
+	lower_file = sdcardfs_lower_file(file);
+	if (!lower_file->f_op->write_iter) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	get_file(lower_file); /* prevent lower_file from being released */
+	iocb->ki_filp = lower_file;
+	err = lower_file->f_op->write_iter(iocb, iter);
+	iocb->ki_filp = file;
+	fput(lower_file);
+	/* update upper inode times/sizes as needed */
+	if (err >= 0 || err == -EIOCBQUEUED) {
+		fsstack_copy_inode_size(file->f_path.dentry->d_inode,
+					file_inode(lower_file));
+		fsstack_copy_attr_times(file->f_path.dentry->d_inode,
+					file_inode(lower_file));
+	}
+out:
+	return err;
+}
+
+const struct file_operations sdcardfs_main_fops = {
+	.llseek		= generic_file_llseek,
+	.read		= sdcardfs_read,
+	.write		= sdcardfs_write,
+	.unlocked_ioctl	= sdcardfs_unlocked_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= sdcardfs_compat_ioctl,
+#endif
+	.mmap		= sdcardfs_mmap,
+	.open		= sdcardfs_open,
+	.flush		= sdcardfs_flush,
+	.release	= sdcardfs_file_release,
+	.fsync		= sdcardfs_fsync,
+	.fasync		= sdcardfs_fasync,
+	.read_iter	= sdcardfs_read_iter,
+	.write_iter	= sdcardfs_write_iter,
+};
+
+/* trimmed directory options */
+const struct file_operations sdcardfs_dir_fops = {
+	.llseek		= sdcardfs_file_llseek,
+	.read		= generic_read_dir,
+	.iterate	= sdcardfs_readdir,
+	.unlocked_ioctl	= sdcardfs_unlocked_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= sdcardfs_compat_ioctl,
+#endif
+	.open		= sdcardfs_open,
+	.release	= sdcardfs_file_release,
+	.flush		= sdcardfs_flush,
+	.fsync		= sdcardfs_fsync,
+	.fasync		= sdcardfs_fasync,
+};
diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c
new file mode 100644
index 0000000..5318bc6
--- /dev/null
+++ b/fs/sdcardfs/inode.c
@@ -0,0 +1,808 @@
+/*
+ * fs/sdcardfs/inode.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+#include <linux/fs_struct.h>
+#include <linux/ratelimit.h>
+
+const struct cred *override_fsids(struct sdcardfs_sb_info *sbi,
+		struct sdcardfs_inode_data *data)
+{
+	struct cred *cred;
+	const struct cred *old_cred;
+	uid_t uid;
+
+	cred = prepare_creds();
+	if (!cred)
+		return NULL;
+
+	if (data->under_obb)
+		uid = AID_MEDIA_OBB;
+	else
+		uid = multiuser_get_uid(data->userid, sbi->options.fs_low_uid);
+	cred->fsuid = make_kuid(&init_user_ns, uid);
+	cred->fsgid = make_kgid(&init_user_ns, sbi->options.fs_low_gid);
+
+	old_cred = override_creds(cred);
+
+	return old_cred;
+}
+
+void revert_fsids(const struct cred *old_cred)
+{
+	const struct cred *cur_cred;
+
+	cur_cred = current->cred;
+	revert_creds(old_cred);
+	put_cred(cur_cred);
+}
+
+static int sdcardfs_create(struct inode *dir, struct dentry *dentry,
+			 umode_t mode, bool want_excl)
+{
+	int err;
+	struct dentry *lower_dentry;
+	struct vfsmount *lower_dentry_mnt;
+	struct dentry *lower_parent_dentry = NULL;
+	struct path lower_path;
+	const struct cred *saved_cred = NULL;
+	struct fs_struct *saved_fs;
+	struct fs_struct *copied_fs;
+
+	if (!check_caller_access_to_name(dir, &dentry->d_name)) {
+		err = -EACCES;
+		goto out_eacces;
+	}
+
+	/* save current_cred and override it */
+	saved_cred = override_fsids(SDCARDFS_SB(dir->i_sb),
+					SDCARDFS_I(dir)->data);
+	if (!saved_cred)
+		return -ENOMEM;
+
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	lower_dentry = lower_path.dentry;
+	lower_dentry_mnt = lower_path.mnt;
+	lower_parent_dentry = lock_parent(lower_dentry);
+
+	/* set last 16bytes of mode field to 0664 */
+	mode = (mode & S_IFMT) | 00664;
+
+	/* temporarily change umask for lower fs write */
+	saved_fs = current->fs;
+	copied_fs = copy_fs_struct(current->fs);
+	if (!copied_fs) {
+		err = -ENOMEM;
+		goto out_unlock;
+	}
+	copied_fs->umask = 0;
+	task_lock(current);
+	current->fs = copied_fs;
+	task_unlock(current);
+
+	err = vfs_create2(lower_dentry_mnt, d_inode(lower_parent_dentry), lower_dentry, mode, want_excl);
+	if (err)
+		goto out;
+
+	err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path,
+			SDCARDFS_I(dir)->data->userid);
+	if (err)
+		goto out;
+	fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir));
+	fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry));
+	fixup_lower_ownership(dentry, dentry->d_name.name);
+
+out:
+	task_lock(current);
+	current->fs = saved_fs;
+	task_unlock(current);
+	free_fs_struct(copied_fs);
+out_unlock:
+	unlock_dir(lower_parent_dentry);
+	sdcardfs_put_lower_path(dentry, &lower_path);
+	revert_fsids(saved_cred);
+out_eacces:
+	return err;
+}
+
+static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry)
+{
+	int err;
+	struct dentry *lower_dentry;
+	struct vfsmount *lower_mnt;
+	struct inode *lower_dir_inode = sdcardfs_lower_inode(dir);
+	struct dentry *lower_dir_dentry;
+	struct path lower_path;
+	const struct cred *saved_cred = NULL;
+
+	if (!check_caller_access_to_name(dir, &dentry->d_name)) {
+		err = -EACCES;
+		goto out_eacces;
+	}
+
+	/* save current_cred and override it */
+	saved_cred = override_fsids(SDCARDFS_SB(dir->i_sb),
+						SDCARDFS_I(dir)->data);
+	if (!saved_cred)
+		return -ENOMEM;
+
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	lower_dentry = lower_path.dentry;
+	lower_mnt = lower_path.mnt;
+	dget(lower_dentry);
+	lower_dir_dentry = lock_parent(lower_dentry);
+
+	err = vfs_unlink2(lower_mnt, lower_dir_inode, lower_dentry, NULL);
+
+	/*
+	 * Note: unlinking on top of NFS can cause silly-renamed files.
+	 * Trying to delete such files results in EBUSY from NFS
+	 * below.  Silly-renamed files will get deleted by NFS later on, so
+	 * we just need to detect them here and treat such EBUSY errors as
+	 * if the upper file was successfully deleted.
+	 */
+	if (err == -EBUSY && lower_dentry->d_flags & DCACHE_NFSFS_RENAMED)
+		err = 0;
+	if (err)
+		goto out;
+	fsstack_copy_attr_times(dir, lower_dir_inode);
+	fsstack_copy_inode_size(dir, lower_dir_inode);
+	set_nlink(d_inode(dentry),
+		  sdcardfs_lower_inode(d_inode(dentry))->i_nlink);
+	d_inode(dentry)->i_ctime = dir->i_ctime;
+	d_drop(dentry); /* this is needed, else LTP fails (VFS won't do it) */
+out:
+	unlock_dir(lower_dir_dentry);
+	dput(lower_dentry);
+	sdcardfs_put_lower_path(dentry, &lower_path);
+	revert_fsids(saved_cred);
+out_eacces:
+	return err;
+}
+
+static int touch(char *abs_path, mode_t mode)
+{
+	struct file *filp = filp_open(abs_path, O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW, mode);
+
+	if (IS_ERR(filp)) {
+		if (PTR_ERR(filp) == -EEXIST) {
+			return 0;
+		} else {
+			pr_err("sdcardfs: failed to open(%s): %ld\n",
+						abs_path, PTR_ERR(filp));
+			return PTR_ERR(filp);
+		}
+	}
+	filp_close(filp, current->files);
+	return 0;
+}
+
+static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	int err;
+	int make_nomedia_in_obb = 0;
+	struct dentry *lower_dentry;
+	struct vfsmount *lower_mnt;
+	struct dentry *lower_parent_dentry = NULL;
+	struct path lower_path;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+	const struct cred *saved_cred = NULL;
+	struct sdcardfs_inode_data *pd = SDCARDFS_I(dir)->data;
+	int touch_err = 0;
+	struct fs_struct *saved_fs;
+	struct fs_struct *copied_fs;
+	struct qstr q_obb = QSTR_LITERAL("obb");
+	struct qstr q_data = QSTR_LITERAL("data");
+
+	if (!check_caller_access_to_name(dir, &dentry->d_name)) {
+		err = -EACCES;
+		goto out_eacces;
+	}
+
+	/* save current_cred and override it */
+	saved_cred = override_fsids(SDCARDFS_SB(dir->i_sb),
+						SDCARDFS_I(dir)->data);
+	if (!saved_cred)
+		return -ENOMEM;
+
+	/* check disk space */
+	if (!check_min_free_space(dentry, 0, 1)) {
+		pr_err("sdcardfs: No minimum free space.\n");
+		err = -ENOSPC;
+		goto out_revert;
+	}
+
+	/* the lower_dentry is negative here */
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	lower_dentry = lower_path.dentry;
+	lower_mnt = lower_path.mnt;
+	lower_parent_dentry = lock_parent(lower_dentry);
+
+	/* set last 16bytes of mode field to 0775 */
+	mode = (mode & S_IFMT) | 00775;
+
+	/* temporarily change umask for lower fs write */
+	saved_fs = current->fs;
+	copied_fs = copy_fs_struct(current->fs);
+	if (!copied_fs) {
+		err = -ENOMEM;
+		unlock_dir(lower_parent_dentry);
+		goto out_unlock;
+	}
+	copied_fs->umask = 0;
+	task_lock(current);
+	current->fs = copied_fs;
+	task_unlock(current);
+
+	err = vfs_mkdir2(lower_mnt, d_inode(lower_parent_dentry), lower_dentry, mode);
+
+	if (err) {
+		unlock_dir(lower_parent_dentry);
+		goto out;
+	}
+
+	/* if it is a local obb dentry, setup it with the base obbpath */
+	if (need_graft_path(dentry)) {
+
+		err = setup_obb_dentry(dentry, &lower_path);
+		if (err) {
+			/* if the sbi->obbpath is not available, the lower_path won't be
+			 * changed by setup_obb_dentry() but the lower path is saved to
+			 * its orig_path. this dentry will be revalidated later.
+			 * but now, the lower_path should be NULL
+			 */
+			sdcardfs_put_reset_lower_path(dentry);
+
+			/* the newly created lower path which saved to its orig_path or
+			 * the lower_path is the base obbpath.
+			 * therefore, an additional path_get is required
+			 */
+			path_get(&lower_path);
+		} else
+			make_nomedia_in_obb = 1;
+	}
+
+	err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path, pd->userid);
+	if (err) {
+		unlock_dir(lower_parent_dentry);
+		goto out;
+	}
+
+	fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir));
+	fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry));
+	/* update number of links on parent directory */
+	set_nlink(dir, sdcardfs_lower_inode(dir)->i_nlink);
+	fixup_lower_ownership(dentry, dentry->d_name.name);
+	unlock_dir(lower_parent_dentry);
+	if ((!sbi->options.multiuser) && (qstr_case_eq(&dentry->d_name, &q_obb))
+		&& (pd->perm == PERM_ANDROID) && (pd->userid == 0))
+		make_nomedia_in_obb = 1;
+
+	/* When creating /Android/data and /Android/obb, mark them as .nomedia */
+	if (make_nomedia_in_obb ||
+		((pd->perm == PERM_ANDROID)
+				&& (qstr_case_eq(&dentry->d_name, &q_data)))) {
+		revert_fsids(saved_cred);
+		saved_cred = override_fsids(sbi,
+					SDCARDFS_I(d_inode(dentry))->data);
+		if (!saved_cred) {
+			pr_err("sdcardfs: failed to set up .nomedia in %s: %d\n",
+						lower_path.dentry->d_name.name,
+						-ENOMEM);
+			goto out;
+		}
+		set_fs_pwd(current->fs, &lower_path);
+		touch_err = touch(".nomedia", 0664);
+		if (touch_err) {
+			pr_err("sdcardfs: failed to create .nomedia in %s: %d\n",
+						lower_path.dentry->d_name.name,
+						touch_err);
+			goto out;
+		}
+	}
+out:
+	task_lock(current);
+	current->fs = saved_fs;
+	task_unlock(current);
+
+	free_fs_struct(copied_fs);
+out_unlock:
+	sdcardfs_put_lower_path(dentry, &lower_path);
+out_revert:
+	revert_fsids(saved_cred);
+out_eacces:
+	return err;
+}
+
+static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	struct dentry *lower_dentry;
+	struct dentry *lower_dir_dentry;
+	struct vfsmount *lower_mnt;
+	int err;
+	struct path lower_path;
+	const struct cred *saved_cred = NULL;
+
+	if (!check_caller_access_to_name(dir, &dentry->d_name)) {
+		err = -EACCES;
+		goto out_eacces;
+	}
+
+	/* save current_cred and override it */
+	saved_cred = override_fsids(SDCARDFS_SB(dir->i_sb),
+						SDCARDFS_I(dir)->data);
+	if (!saved_cred)
+		return -ENOMEM;
+
+	/* sdcardfs_get_real_lower(): in case of remove an user's obb dentry
+	 * the dentry on the original path should be deleted.
+	 */
+	sdcardfs_get_real_lower(dentry, &lower_path);
+
+	lower_dentry = lower_path.dentry;
+	lower_mnt = lower_path.mnt;
+	lower_dir_dentry = lock_parent(lower_dentry);
+
+	err = vfs_rmdir2(lower_mnt, d_inode(lower_dir_dentry), lower_dentry);
+	if (err)
+		goto out;
+
+	d_drop(dentry);	/* drop our dentry on success (why not VFS's job?) */
+	if (d_inode(dentry))
+		clear_nlink(d_inode(dentry));
+	fsstack_copy_attr_times(dir, d_inode(lower_dir_dentry));
+	fsstack_copy_inode_size(dir, d_inode(lower_dir_dentry));
+	set_nlink(dir, d_inode(lower_dir_dentry)->i_nlink);
+
+out:
+	unlock_dir(lower_dir_dentry);
+	sdcardfs_put_real_lower(dentry, &lower_path);
+	revert_fsids(saved_cred);
+out_eacces:
+	return err;
+}
+
+/*
+ * The locking rules in sdcardfs_rename are complex.  We could use a simpler
+ * superblock-level name-space lock for renames and copy-ups.
+ */
+static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+			 struct inode *new_dir, struct dentry *new_dentry,
+			 unsigned int flags)
+{
+	int err = 0;
+	struct dentry *lower_old_dentry = NULL;
+	struct dentry *lower_new_dentry = NULL;
+	struct dentry *lower_old_dir_dentry = NULL;
+	struct dentry *lower_new_dir_dentry = NULL;
+	struct vfsmount *lower_mnt = NULL;
+	struct dentry *trap = NULL;
+	struct path lower_old_path, lower_new_path;
+	const struct cred *saved_cred = NULL;
+
+	if (flags)
+		return -EINVAL;
+
+	if (!check_caller_access_to_name(old_dir, &old_dentry->d_name) ||
+		!check_caller_access_to_name(new_dir, &new_dentry->d_name)) {
+		err = -EACCES;
+		goto out_eacces;
+	}
+
+	/* save current_cred and override it */
+	saved_cred = override_fsids(SDCARDFS_SB(old_dir->i_sb),
+						SDCARDFS_I(new_dir)->data);
+	if (!saved_cred)
+		return -ENOMEM;
+
+	sdcardfs_get_real_lower(old_dentry, &lower_old_path);
+	sdcardfs_get_lower_path(new_dentry, &lower_new_path);
+	lower_old_dentry = lower_old_path.dentry;
+	lower_new_dentry = lower_new_path.dentry;
+	lower_mnt = lower_old_path.mnt;
+	lower_old_dir_dentry = dget_parent(lower_old_dentry);
+	lower_new_dir_dentry = dget_parent(lower_new_dentry);
+
+	trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
+	/* source should not be ancestor of target */
+	if (trap == lower_old_dentry) {
+		err = -EINVAL;
+		goto out;
+	}
+	/* target should not be ancestor of source */
+	if (trap == lower_new_dentry) {
+		err = -ENOTEMPTY;
+		goto out;
+	}
+
+	err = vfs_rename2(lower_mnt,
+			 d_inode(lower_old_dir_dentry), lower_old_dentry,
+			 d_inode(lower_new_dir_dentry), lower_new_dentry,
+			 NULL, 0);
+	if (err)
+		goto out;
+
+	/* Copy attrs from lower dir, but i_uid/i_gid */
+	sdcardfs_copy_and_fix_attrs(new_dir, d_inode(lower_new_dir_dentry));
+	fsstack_copy_inode_size(new_dir, d_inode(lower_new_dir_dentry));
+
+	if (new_dir != old_dir) {
+		sdcardfs_copy_and_fix_attrs(old_dir, d_inode(lower_old_dir_dentry));
+		fsstack_copy_inode_size(old_dir, d_inode(lower_old_dir_dentry));
+	}
+	get_derived_permission_new(new_dentry->d_parent, old_dentry, &new_dentry->d_name);
+	fixup_tmp_permissions(d_inode(old_dentry));
+	fixup_lower_ownership(old_dentry, new_dentry->d_name.name);
+	d_invalidate(old_dentry); /* Can't fixup ownership recursively :( */
+out:
+	unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
+	dput(lower_old_dir_dentry);
+	dput(lower_new_dir_dentry);
+	sdcardfs_put_real_lower(old_dentry, &lower_old_path);
+	sdcardfs_put_lower_path(new_dentry, &lower_new_path);
+	revert_fsids(saved_cred);
+out_eacces:
+	return err;
+}
+
+#if 0
+static int sdcardfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
+{
+	int err;
+	struct dentry *lower_dentry;
+	struct path lower_path;
+	/* XXX readlink does not requires overriding credential */
+
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	lower_dentry = lower_path.dentry;
+	if (!d_inode(lower_dentry)->i_op ||
+	    !d_inode(lower_dentry)->i_op->readlink) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = d_inode(lower_dentry)->i_op->readlink(lower_dentry,
+						    buf, bufsiz);
+	if (err < 0)
+		goto out;
+	fsstack_copy_attr_atime(d_inode(dentry), d_inode(lower_dentry));
+
+out:
+	sdcardfs_put_lower_path(dentry, &lower_path);
+	return err;
+}
+#endif
+
+#if 0
+static const char *sdcardfs_follow_link(struct dentry *dentry, void **cookie)
+{
+	char *buf;
+	int len = PAGE_SIZE, err;
+	mm_segment_t old_fs;
+
+	/* This is freed by the put_link method assuming a successful call. */
+	buf = kmalloc(len, GFP_KERNEL);
+	if (!buf) {
+		buf = ERR_PTR(-ENOMEM);
+		return buf;
+	}
+
+	/* read the symlink, and then we will follow it */
+	old_fs = get_fs();
+	set_fs(KERNEL_DS);
+	err = sdcardfs_readlink(dentry, buf, len);
+	set_fs(old_fs);
+	if (err < 0) {
+		kfree(buf);
+		buf = ERR_PTR(err);
+	} else {
+		buf[err] = '\0';
+	}
+	return *cookie = buf;
+}
+#endif
+
+static int sdcardfs_permission_wrn(struct inode *inode, int mask)
+{
+	WARN_RATELIMIT(1, "sdcardfs does not support permission. Use permission2.\n");
+	return -EINVAL;
+}
+
+void copy_attrs(struct inode *dest, const struct inode *src)
+{
+	dest->i_mode = src->i_mode;
+	dest->i_uid = src->i_uid;
+	dest->i_gid = src->i_gid;
+	dest->i_rdev = src->i_rdev;
+	dest->i_atime = src->i_atime;
+	dest->i_mtime = src->i_mtime;
+	dest->i_ctime = src->i_ctime;
+	dest->i_blkbits = src->i_blkbits;
+	dest->i_flags = src->i_flags;
+#ifdef CONFIG_FS_POSIX_ACL
+	dest->i_acl = src->i_acl;
+#endif
+#ifdef CONFIG_SECURITY
+	dest->i_security = src->i_security;
+#endif
+}
+
+static int sdcardfs_permission(struct vfsmount *mnt, struct inode *inode, int mask)
+{
+	int err;
+	struct inode tmp;
+	struct sdcardfs_inode_data *top = top_data_get(SDCARDFS_I(inode));
+
+	if (!top)
+		return -EINVAL;
+
+	/*
+	 * Permission check on sdcardfs inode.
+	 * Calling process should have AID_SDCARD_RW permission
+	 * Since generic_permission only needs i_mode, i_uid,
+	 * i_gid, and i_sb, we can create a fake inode to pass
+	 * this information down in.
+	 *
+	 * The underlying code may attempt to take locks in some
+	 * cases for features we're not using, but if that changes,
+	 * locks must be dealt with to avoid undefined behavior.
+	 */
+	copy_attrs(&tmp, inode);
+	tmp.i_uid = make_kuid(&init_user_ns, top->d_uid);
+	tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, top));
+	tmp.i_mode = (inode->i_mode & S_IFMT)
+			| get_mode(mnt, SDCARDFS_I(inode), top);
+	data_put(top);
+	tmp.i_sb = inode->i_sb;
+	if (IS_POSIXACL(inode))
+		pr_warn("%s: This may be undefined behavior...\n", __func__);
+	err = generic_permission(&tmp, mask);
+	return err;
+}
+
+static int sdcardfs_setattr_wrn(struct dentry *dentry, struct iattr *ia)
+{
+	WARN_RATELIMIT(1, "sdcardfs does not support setattr. User setattr2.\n");
+	return -EINVAL;
+}
+
+static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct iattr *ia)
+{
+	int err;
+	struct dentry *lower_dentry;
+	struct vfsmount *lower_mnt;
+	struct inode *inode;
+	struct inode *lower_inode;
+	struct path lower_path;
+	struct iattr lower_ia;
+	struct dentry *parent;
+	struct inode tmp;
+	struct dentry tmp_d;
+	struct sdcardfs_inode_data *top;
+
+	const struct cred *saved_cred = NULL;
+
+	inode = d_inode(dentry);
+	top = top_data_get(SDCARDFS_I(inode));
+
+	if (!top)
+		return -EINVAL;
+
+	/*
+	 * Permission check on sdcardfs inode.
+	 * Calling process should have AID_SDCARD_RW permission
+	 * Since generic_permission only needs i_mode, i_uid,
+	 * i_gid, and i_sb, we can create a fake inode to pass
+	 * this information down in.
+	 *
+	 * The underlying code may attempt to take locks in some
+	 * cases for features we're not using, but if that changes,
+	 * locks must be dealt with to avoid undefined behavior.
+	 *
+	 */
+	copy_attrs(&tmp, inode);
+	tmp.i_uid = make_kuid(&init_user_ns, top->d_uid);
+	tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, top));
+	tmp.i_mode = (inode->i_mode & S_IFMT)
+			| get_mode(mnt, SDCARDFS_I(inode), top);
+	tmp.i_size = i_size_read(inode);
+	data_put(top);
+	tmp.i_sb = inode->i_sb;
+	tmp_d.d_inode = &tmp;
+
+	/*
+	 * Check if user has permission to change dentry.  We don't check if
+	 * this user can change the lower inode: that should happen when
+	 * calling notify_change on the lower inode.
+	 */
+	/* prepare our own lower struct iattr (with the lower file) */
+	memcpy(&lower_ia, ia, sizeof(lower_ia));
+	/* Allow touch updating timestamps. A previous permission check ensures
+	 * we have write access. Changes to mode, owner, and group are ignored
+	 */
+	ia->ia_valid |= ATTR_FORCE;
+	err = setattr_prepare(&tmp_d, ia);
+
+	if (!err) {
+		/* check the Android group ID */
+		parent = dget_parent(dentry);
+		if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name))
+			err = -EACCES;
+		dput(parent);
+	}
+
+	if (err)
+		goto out_err;
+
+	/* save current_cred and override it */
+	saved_cred = override_fsids(SDCARDFS_SB(dentry->d_sb),
+						SDCARDFS_I(inode)->data);
+	if (!saved_cred)
+		return -ENOMEM;
+
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	lower_dentry = lower_path.dentry;
+	lower_mnt = lower_path.mnt;
+	lower_inode = sdcardfs_lower_inode(inode);
+
+	if (ia->ia_valid & ATTR_FILE)
+		lower_ia.ia_file = sdcardfs_lower_file(ia->ia_file);
+
+	lower_ia.ia_valid &= ~(ATTR_UID | ATTR_GID | ATTR_MODE);
+
+	/*
+	 * If shrinking, first truncate upper level to cancel writing dirty
+	 * pages beyond the new eof; and also if its' maxbytes is more
+	 * limiting (fail with -EFBIG before making any change to the lower
+	 * level).  There is no need to vmtruncate the upper level
+	 * afterwards in the other cases: we fsstack_copy_inode_size from
+	 * the lower level.
+	 */
+	if (ia->ia_valid & ATTR_SIZE) {
+		err = inode_newsize_ok(&tmp, ia->ia_size);
+		if (err) {
+			goto out;
+		}
+		truncate_setsize(inode, ia->ia_size);
+	}
+
+	/*
+	 * mode change is for clearing setuid/setgid bits. Allow lower fs
+	 * to interpret this in its own way.
+	 */
+	if (lower_ia.ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
+		lower_ia.ia_valid &= ~ATTR_MODE;
+
+	/* notify the (possibly copied-up) lower inode */
+	/*
+	 * Note: we use d_inode(lower_dentry), because lower_inode may be
+	 * unlinked (no inode->i_sb and i_ino==0.  This happens if someone
+	 * tries to open(), unlink(), then ftruncate() a file.
+	 */
+	inode_lock(d_inode(lower_dentry));
+	err = notify_change2(lower_mnt, lower_dentry, &lower_ia, /* note: lower_ia */
+			NULL);
+	inode_unlock(d_inode(lower_dentry));
+	if (err)
+		goto out;
+
+	/* get attributes from the lower inode and update derived permissions */
+	sdcardfs_copy_and_fix_attrs(inode, lower_inode);
+
+	/*
+	 * Not running fsstack_copy_inode_size(inode, lower_inode), because
+	 * VFS should update our inode size, and notify_change on
+	 * lower_inode should update its size.
+	 */
+
+out:
+	sdcardfs_put_lower_path(dentry, &lower_path);
+	revert_fsids(saved_cred);
+out_err:
+	return err;
+}
+
+static int sdcardfs_fillattr(struct vfsmount *mnt,
+				struct inode *inode, struct kstat *stat)
+{
+	struct sdcardfs_inode_info *info = SDCARDFS_I(inode);
+	struct sdcardfs_inode_data *top = top_data_get(info);
+
+	if (!top)
+		return -EINVAL;
+
+	stat->dev = inode->i_sb->s_dev;
+	stat->ino = inode->i_ino;
+	stat->mode = (inode->i_mode  & S_IFMT) | get_mode(mnt, info, top);
+	stat->nlink = inode->i_nlink;
+	stat->uid = make_kuid(&init_user_ns, top->d_uid);
+	stat->gid = make_kgid(&init_user_ns, get_gid(mnt, top));
+	stat->rdev = inode->i_rdev;
+	stat->size = i_size_read(inode);
+	stat->atime = inode->i_atime;
+	stat->mtime = inode->i_mtime;
+	stat->ctime = inode->i_ctime;
+	stat->blksize = (1 << inode->i_blkbits);
+	stat->blocks = inode->i_blocks;
+	data_put(top);
+	return 0;
+}
+
+static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
+		 struct kstat *stat)
+{
+	struct kstat lower_stat;
+	struct path lower_path;
+	struct dentry *parent;
+	int err;
+
+	parent = dget_parent(dentry);
+	if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) {
+		dput(parent);
+		return -EACCES;
+	}
+	dput(parent);
+
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	err = vfs_getattr(&lower_path, &lower_stat);
+	if (err)
+		goto out;
+	sdcardfs_copy_and_fix_attrs(d_inode(dentry),
+			      d_inode(lower_path.dentry));
+	err = sdcardfs_fillattr(mnt, d_inode(dentry), stat);
+	stat->blocks = lower_stat.blocks;
+out:
+	sdcardfs_put_lower_path(dentry, &lower_path);
+	return err;
+}
+
+const struct inode_operations sdcardfs_symlink_iops = {
+	.permission2	= sdcardfs_permission,
+	.setattr2	= sdcardfs_setattr,
+	/* XXX Following operations are implemented,
+	 *     but FUSE(sdcard) or FAT does not support them
+	 *     These methods are *NOT* perfectly tested.
+	.readlink	= sdcardfs_readlink,
+	.follow_link	= sdcardfs_follow_link,
+	.put_link	= kfree_put_link,
+	 */
+};
+
+const struct inode_operations sdcardfs_dir_iops = {
+	.create		= sdcardfs_create,
+	.lookup		= sdcardfs_lookup,
+	.permission	= sdcardfs_permission_wrn,
+	.permission2	= sdcardfs_permission,
+	.unlink		= sdcardfs_unlink,
+	.mkdir		= sdcardfs_mkdir,
+	.rmdir		= sdcardfs_rmdir,
+	.rename		= sdcardfs_rename,
+	.setattr	= sdcardfs_setattr_wrn,
+	.setattr2	= sdcardfs_setattr,
+	.getattr	= sdcardfs_getattr,
+};
+
+const struct inode_operations sdcardfs_main_iops = {
+	.permission	= sdcardfs_permission_wrn,
+	.permission2	= sdcardfs_permission,
+	.setattr	= sdcardfs_setattr_wrn,
+	.setattr2	= sdcardfs_setattr,
+	.getattr	= sdcardfs_getattr,
+};
diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c
new file mode 100644
index 0000000..e1bff0d
--- /dev/null
+++ b/fs/sdcardfs/lookup.c
@@ -0,0 +1,471 @@
+/*
+ * fs/sdcardfs/lookup.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+#include "linux/delay.h"
+
+/* The dentry cache is just so we have properly sized dentries */
+static struct kmem_cache *sdcardfs_dentry_cachep;
+
+int sdcardfs_init_dentry_cache(void)
+{
+	sdcardfs_dentry_cachep =
+		kmem_cache_create("sdcardfs_dentry",
+				  sizeof(struct sdcardfs_dentry_info),
+				  0, SLAB_RECLAIM_ACCOUNT, NULL);
+
+	return sdcardfs_dentry_cachep ? 0 : -ENOMEM;
+}
+
+void sdcardfs_destroy_dentry_cache(void)
+{
+	kmem_cache_destroy(sdcardfs_dentry_cachep);
+}
+
+void free_dentry_private_data(struct dentry *dentry)
+{
+	if (!dentry || !dentry->d_fsdata)
+		return;
+	kmem_cache_free(sdcardfs_dentry_cachep, dentry->d_fsdata);
+	dentry->d_fsdata = NULL;
+}
+
+/* allocate new dentry private data */
+int new_dentry_private_data(struct dentry *dentry)
+{
+	struct sdcardfs_dentry_info *info = SDCARDFS_D(dentry);
+
+	/* use zalloc to init dentry_info.lower_path */
+	info = kmem_cache_zalloc(sdcardfs_dentry_cachep, GFP_ATOMIC);
+	if (!info)
+		return -ENOMEM;
+
+	spin_lock_init(&info->lock);
+	dentry->d_fsdata = info;
+
+	return 0;
+}
+
+struct inode_data {
+	struct inode *lower_inode;
+	userid_t id;
+};
+
+static int sdcardfs_inode_test(struct inode *inode, void *candidate_data/*void *candidate_lower_inode*/)
+{
+	struct inode *current_lower_inode = sdcardfs_lower_inode(inode);
+	userid_t current_userid = SDCARDFS_I(inode)->data->userid;
+
+	if (current_lower_inode == ((struct inode_data *)candidate_data)->lower_inode &&
+			current_userid == ((struct inode_data *)candidate_data)->id)
+		return 1; /* found a match */
+	else
+		return 0; /* no match */
+}
+
+static int sdcardfs_inode_set(struct inode *inode, void *lower_inode)
+{
+	/* we do actual inode initialization in sdcardfs_iget */
+	return 0;
+}
+
+struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode, userid_t id)
+{
+	struct sdcardfs_inode_info *info;
+	struct inode_data data;
+	struct inode *inode; /* the new inode to return */
+
+	if (!igrab(lower_inode))
+		return ERR_PTR(-ESTALE);
+
+	data.id = id;
+	data.lower_inode = lower_inode;
+	inode = iget5_locked(sb, /* our superblock */
+			     /*
+			      * hashval: we use inode number, but we can
+			      * also use "(unsigned long)lower_inode"
+			      * instead.
+			      */
+			     lower_inode->i_ino, /* hashval */
+			     sdcardfs_inode_test, /* inode comparison function */
+			     sdcardfs_inode_set, /* inode init function */
+			     &data); /* data passed to test+set fxns */
+	if (!inode) {
+		iput(lower_inode);
+		return ERR_PTR(-ENOMEM);
+	}
+	/* if found a cached inode, then just return it (after iput) */
+	if (!(inode->i_state & I_NEW)) {
+		iput(lower_inode);
+		return inode;
+	}
+
+	/* initialize new inode */
+	info = SDCARDFS_I(inode);
+
+	inode->i_ino = lower_inode->i_ino;
+	sdcardfs_set_lower_inode(inode, lower_inode);
+
+	inode->i_version++;
+
+	/* use different set of inode ops for symlinks & directories */
+	if (S_ISDIR(lower_inode->i_mode))
+		inode->i_op = &sdcardfs_dir_iops;
+	else if (S_ISLNK(lower_inode->i_mode))
+		inode->i_op = &sdcardfs_symlink_iops;
+	else
+		inode->i_op = &sdcardfs_main_iops;
+
+	/* use different set of file ops for directories */
+	if (S_ISDIR(lower_inode->i_mode))
+		inode->i_fop = &sdcardfs_dir_fops;
+	else
+		inode->i_fop = &sdcardfs_main_fops;
+
+	inode->i_mapping->a_ops = &sdcardfs_aops;
+
+	inode->i_atime.tv_sec = 0;
+	inode->i_atime.tv_nsec = 0;
+	inode->i_mtime.tv_sec = 0;
+	inode->i_mtime.tv_nsec = 0;
+	inode->i_ctime.tv_sec = 0;
+	inode->i_ctime.tv_nsec = 0;
+
+	/* properly initialize special inodes */
+	if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) ||
+	    S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode))
+		init_special_inode(inode, lower_inode->i_mode,
+				   lower_inode->i_rdev);
+
+	/* all well, copy inode attributes */
+	sdcardfs_copy_and_fix_attrs(inode, lower_inode);
+	fsstack_copy_inode_size(inode, lower_inode);
+
+	unlock_new_inode(inode);
+	return inode;
+}
+
+/*
+ * Helper interpose routine, called directly by ->lookup to handle
+ * spliced dentries.
+ */
+static struct dentry *__sdcardfs_interpose(struct dentry *dentry,
+					 struct super_block *sb,
+					 struct path *lower_path,
+					 userid_t id)
+{
+	struct inode *inode;
+	struct inode *lower_inode;
+	struct super_block *lower_sb;
+	struct dentry *ret_dentry;
+
+	lower_inode = d_inode(lower_path->dentry);
+	lower_sb = sdcardfs_lower_super(sb);
+
+	/* check that the lower file system didn't cross a mount point */
+	if (lower_inode->i_sb != lower_sb) {
+		ret_dentry = ERR_PTR(-EXDEV);
+		goto out;
+	}
+
+	/*
+	 * We allocate our new inode below by calling sdcardfs_iget,
+	 * which will initialize some of the new inode's fields
+	 */
+
+	/* inherit lower inode number for sdcardfs's inode */
+	inode = sdcardfs_iget(sb, lower_inode, id);
+	if (IS_ERR(inode)) {
+		ret_dentry = ERR_CAST(inode);
+		goto out;
+	}
+
+	ret_dentry = d_splice_alias(inode, dentry);
+	dentry = ret_dentry ?: dentry;
+	if (!IS_ERR(dentry))
+		update_derived_permission_lock(dentry);
+out:
+	return ret_dentry;
+}
+
+/*
+ * Connect an sdcardfs inode dentry/inode with several lower ones.  This is
+ * the classic stackable file system "vnode interposition" action.
+ *
+ * @dentry: sdcardfs's dentry which interposes on lower one
+ * @sb: sdcardfs's super_block
+ * @lower_path: the lower path (caller does path_get/put)
+ */
+int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb,
+		     struct path *lower_path, userid_t id)
+{
+	struct dentry *ret_dentry;
+
+	ret_dentry = __sdcardfs_interpose(dentry, sb, lower_path, id);
+	return PTR_ERR(ret_dentry);
+}
+
+struct sdcardfs_name_data {
+	struct dir_context ctx;
+	const struct qstr *to_find;
+	char *name;
+	bool found;
+};
+
+static int sdcardfs_name_match(struct dir_context *ctx, const char *name,
+		int namelen, loff_t offset, u64 ino, unsigned int d_type)
+{
+	struct sdcardfs_name_data *buf = container_of(ctx, struct sdcardfs_name_data, ctx);
+	struct qstr candidate = QSTR_INIT(name, namelen);
+
+	if (qstr_case_eq(buf->to_find, &candidate)) {
+		memcpy(buf->name, name, namelen);
+		buf->name[namelen] = 0;
+		buf->found = true;
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * Main driver function for sdcardfs's lookup.
+ *
+ * Returns: NULL (ok), ERR_PTR if an error occurred.
+ * Fills in lower_parent_path with <dentry,mnt> on success.
+ */
+static struct dentry *__sdcardfs_lookup(struct dentry *dentry,
+		unsigned int flags, struct path *lower_parent_path, userid_t id)
+{
+	int err = 0;
+	struct vfsmount *lower_dir_mnt;
+	struct dentry *lower_dir_dentry = NULL;
+	struct dentry *lower_dentry;
+	const struct qstr *name;
+	struct path lower_path;
+	struct qstr dname;
+	struct dentry *ret_dentry = NULL;
+	struct sdcardfs_sb_info *sbi;
+
+	sbi = SDCARDFS_SB(dentry->d_sb);
+	/* must initialize dentry operations */
+	d_set_d_op(dentry, &sdcardfs_ci_dops);
+
+	if (IS_ROOT(dentry))
+		goto out;
+
+	name = &dentry->d_name;
+
+	/* now start the actual lookup procedure */
+	lower_dir_dentry = lower_parent_path->dentry;
+	lower_dir_mnt = lower_parent_path->mnt;
+
+	/* Use vfs_path_lookup to check if the dentry exists or not */
+	err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name->name, 0,
+				&lower_path);
+	/* check for other cases */
+	if (err == -ENOENT) {
+		struct file *file;
+		const struct cred *cred = current_cred();
+
+		struct sdcardfs_name_data buffer = {
+			.ctx.actor = sdcardfs_name_match,
+			.to_find = name,
+			.name = __getname(),
+			.found = false,
+		};
+
+		if (!buffer.name) {
+			err = -ENOMEM;
+			goto out;
+		}
+		file = dentry_open(lower_parent_path, O_RDONLY, cred);
+		if (IS_ERR(file)) {
+			err = PTR_ERR(file);
+			goto put_name;
+		}
+		err = iterate_dir(file, &buffer.ctx);
+		fput(file);
+		if (err)
+			goto put_name;
+
+		if (buffer.found)
+			err = vfs_path_lookup(lower_dir_dentry,
+						lower_dir_mnt,
+						buffer.name, 0,
+						&lower_path);
+		else
+			err = -ENOENT;
+put_name:
+		__putname(buffer.name);
+	}
+
+	/* no error: handle positive dentries */
+	if (!err) {
+		/* check if the dentry is an obb dentry
+		 * if true, the lower_inode must be replaced with
+		 * the inode of the graft path
+		 */
+
+		if (need_graft_path(dentry)) {
+
+			/* setup_obb_dentry()
+			 * The lower_path will be stored to the dentry's orig_path
+			 * and the base obbpath will be copyed to the lower_path variable.
+			 * if an error returned, there's no change in the lower_path
+			 * returns: -ERRNO if error (0: no error)
+			 */
+			err = setup_obb_dentry(dentry, &lower_path);
+
+			if (err) {
+				/* if the sbi->obbpath is not available, we can optionally
+				 * setup the lower_path with its orig_path.
+				 * but, the current implementation just returns an error
+				 * because the sdcard daemon also regards this case as
+				 * a lookup fail.
+				 */
+				pr_info("sdcardfs: base obbpath is not available\n");
+				sdcardfs_put_reset_orig_path(dentry);
+				goto out;
+			}
+		}
+
+		sdcardfs_set_lower_path(dentry, &lower_path);
+		ret_dentry =
+			__sdcardfs_interpose(dentry, dentry->d_sb, &lower_path, id);
+		if (IS_ERR(ret_dentry)) {
+			err = PTR_ERR(ret_dentry);
+			 /* path_put underlying path on error */
+			sdcardfs_put_reset_lower_path(dentry);
+		}
+		goto out;
+	}
+
+	/*
+	 * We don't consider ENOENT an error, and we want to return a
+	 * negative dentry.
+	 */
+	if (err && err != -ENOENT)
+		goto out;
+
+	/* instatiate a new negative dentry */
+	dname.name = name->name;
+	dname.len = name->len;
+
+	/* See if the low-level filesystem might want
+	 * to use its own hash
+	 */
+	lower_dentry = d_hash_and_lookup(lower_dir_dentry, &dname);
+	if (IS_ERR(lower_dentry))
+		return lower_dentry;
+	if (!lower_dentry) {
+		/* We called vfs_path_lookup earlier, and did not get a negative
+		 * dentry then. Don't confuse the lower filesystem by forcing
+		 * one on it now...
+		 */
+		err = -ENOENT;
+		goto out;
+	}
+
+	lower_path.dentry = lower_dentry;
+	lower_path.mnt = mntget(lower_dir_mnt);
+	sdcardfs_set_lower_path(dentry, &lower_path);
+
+	/*
+	 * If the intent is to create a file, then don't return an error, so
+	 * the VFS will continue the process of making this negative dentry
+	 * into a positive one.
+	 */
+	if (flags & (LOOKUP_CREATE|LOOKUP_RENAME_TARGET))
+		err = 0;
+
+out:
+	if (err)
+		return ERR_PTR(err);
+	return ret_dentry;
+}
+
+/*
+ * On success:
+ * fills dentry object appropriate values and returns NULL.
+ * On fail (== error)
+ * returns error ptr
+ *
+ * @dir : Parent inode.
+ * @dentry : Target dentry to lookup. we should set each of fields.
+ *	     (dentry->d_name is initialized already)
+ * @nd : nameidata of parent inode
+ */
+struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry,
+			     unsigned int flags)
+{
+	struct dentry *ret = NULL, *parent;
+	struct path lower_parent_path;
+	int err = 0;
+	const struct cred *saved_cred = NULL;
+
+	parent = dget_parent(dentry);
+
+	if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) {
+		ret = ERR_PTR(-EACCES);
+		goto out_err;
+	}
+
+	/* save current_cred and override it */
+	saved_cred = override_fsids(SDCARDFS_SB(dir->i_sb),
+						SDCARDFS_I(dir)->data);
+	if (!saved_cred) {
+		ret = ERR_PTR(-ENOMEM);
+		goto out_err;
+	}
+
+	sdcardfs_get_lower_path(parent, &lower_parent_path);
+
+	/* allocate dentry private data.  We free it in ->d_release */
+	err = new_dentry_private_data(dentry);
+	if (err) {
+		ret = ERR_PTR(err);
+		goto out;
+	}
+
+	ret = __sdcardfs_lookup(dentry, flags, &lower_parent_path,
+				SDCARDFS_I(dir)->data->userid);
+	if (IS_ERR(ret))
+		goto out;
+	if (ret)
+		dentry = ret;
+	if (d_inode(dentry)) {
+		fsstack_copy_attr_times(d_inode(dentry),
+					sdcardfs_lower_inode(d_inode(dentry)));
+		/* get derived permission */
+		get_derived_permission(parent, dentry);
+		fixup_tmp_permissions(d_inode(dentry));
+		fixup_lower_ownership(dentry, dentry->d_name.name);
+	}
+	/* update parent directory's atime */
+	fsstack_copy_attr_atime(d_inode(parent),
+				sdcardfs_lower_inode(d_inode(parent)));
+
+out:
+	sdcardfs_put_lower_path(parent, &lower_parent_path);
+	revert_fsids(saved_cred);
+out_err:
+	dput(parent);
+	return ret;
+}
diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c
new file mode 100644
index 0000000..80825b2
--- /dev/null
+++ b/fs/sdcardfs/main.c
@@ -0,0 +1,479 @@
+/*
+ * fs/sdcardfs/main.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/parser.h>
+
+enum {
+	Opt_fsuid,
+	Opt_fsgid,
+	Opt_gid,
+	Opt_debug,
+	Opt_mask,
+	Opt_multiuser,
+	Opt_userid,
+	Opt_reserved_mb,
+	Opt_err,
+};
+
+static const match_table_t sdcardfs_tokens = {
+	{Opt_fsuid, "fsuid=%u"},
+	{Opt_fsgid, "fsgid=%u"},
+	{Opt_gid, "gid=%u"},
+	{Opt_debug, "debug"},
+	{Opt_mask, "mask=%u"},
+	{Opt_userid, "userid=%d"},
+	{Opt_multiuser, "multiuser"},
+	{Opt_reserved_mb, "reserved_mb=%u"},
+	{Opt_err, NULL}
+};
+
+static int parse_options(struct super_block *sb, char *options, int silent,
+				int *debug, struct sdcardfs_vfsmount_options *vfsopts,
+				struct sdcardfs_mount_options *opts)
+{
+	char *p;
+	substring_t args[MAX_OPT_ARGS];
+	int option;
+
+	/* by default, we use AID_MEDIA_RW as uid, gid */
+	opts->fs_low_uid = AID_MEDIA_RW;
+	opts->fs_low_gid = AID_MEDIA_RW;
+	vfsopts->mask = 0;
+	opts->multiuser = false;
+	opts->fs_user_id = 0;
+	vfsopts->gid = 0;
+	/* by default, 0MB is reserved */
+	opts->reserved_mb = 0;
+
+	*debug = 0;
+
+	if (!options)
+		return 0;
+
+	while ((p = strsep(&options, ",")) != NULL) {
+		int token;
+
+		if (!*p)
+			continue;
+
+		token = match_token(p, sdcardfs_tokens, args);
+
+		switch (token) {
+		case Opt_debug:
+			*debug = 1;
+			break;
+		case Opt_fsuid:
+			if (match_int(&args[0], &option))
+				return 0;
+			opts->fs_low_uid = option;
+			break;
+		case Opt_fsgid:
+			if (match_int(&args[0], &option))
+				return 0;
+			opts->fs_low_gid = option;
+			break;
+		case Opt_gid:
+			if (match_int(&args[0], &option))
+				return 0;
+			vfsopts->gid = option;
+			break;
+		case Opt_userid:
+			if (match_int(&args[0], &option))
+				return 0;
+			opts->fs_user_id = option;
+			break;
+		case Opt_mask:
+			if (match_int(&args[0], &option))
+				return 0;
+			vfsopts->mask = option;
+			break;
+		case Opt_multiuser:
+			opts->multiuser = true;
+			break;
+		case Opt_reserved_mb:
+			if (match_int(&args[0], &option))
+				return 0;
+			opts->reserved_mb = option;
+			break;
+		/* unknown option */
+		default:
+			if (!silent)
+				pr_err("Unrecognized mount option \"%s\" or missing value", p);
+			return -EINVAL;
+		}
+	}
+
+	if (*debug) {
+		pr_info("sdcardfs : options - debug:%d\n", *debug);
+		pr_info("sdcardfs : options - uid:%d\n",
+							opts->fs_low_uid);
+		pr_info("sdcardfs : options - gid:%d\n",
+							opts->fs_low_gid);
+	}
+
+	return 0;
+}
+
+int parse_options_remount(struct super_block *sb, char *options, int silent,
+				struct sdcardfs_vfsmount_options *vfsopts)
+{
+	char *p;
+	substring_t args[MAX_OPT_ARGS];
+	int option;
+	int debug;
+
+	if (!options)
+		return 0;
+
+	while ((p = strsep(&options, ",")) != NULL) {
+		int token;
+
+		if (!*p)
+			continue;
+
+		token = match_token(p, sdcardfs_tokens, args);
+
+		switch (token) {
+		case Opt_debug:
+			debug = 1;
+			break;
+		case Opt_gid:
+			if (match_int(&args[0], &option))
+				return 0;
+			vfsopts->gid = option;
+
+			break;
+		case Opt_mask:
+			if (match_int(&args[0], &option))
+				return 0;
+			vfsopts->mask = option;
+			break;
+		case Opt_multiuser:
+		case Opt_userid:
+		case Opt_fsuid:
+		case Opt_fsgid:
+		case Opt_reserved_mb:
+			pr_warn("Option \"%s\" can't be changed during remount\n", p);
+			break;
+		/* unknown option */
+		default:
+			if (!silent)
+				pr_err("Unrecognized mount option \"%s\" or missing value", p);
+			return -EINVAL;
+		}
+	}
+
+	if (debug) {
+		pr_info("sdcardfs : options - debug:%d\n", debug);
+		pr_info("sdcardfs : options - gid:%d\n", vfsopts->gid);
+		pr_info("sdcardfs : options - mask:%d\n", vfsopts->mask);
+	}
+
+	return 0;
+}
+
+#if 0
+/*
+ * our custom d_alloc_root work-alike
+ *
+ * we can't use d_alloc_root if we want to use our own interpose function
+ * unchanged, so we simply call our own "fake" d_alloc_root
+ */
+static struct dentry *sdcardfs_d_alloc_root(struct super_block *sb)
+{
+	struct dentry *ret = NULL;
+
+	if (sb) {
+		static const struct qstr name = {
+			.name = "/",
+			.len = 1
+		};
+
+		ret = d_alloc(NULL, &name);
+		if (ret) {
+			d_set_d_op(ret, &sdcardfs_ci_dops);
+			ret->d_sb = sb;
+			ret->d_parent = ret;
+		}
+	}
+	return ret;
+}
+#endif
+
+DEFINE_MUTEX(sdcardfs_super_list_lock);
+EXPORT_SYMBOL_GPL(sdcardfs_super_list_lock);
+LIST_HEAD(sdcardfs_super_list);
+EXPORT_SYMBOL_GPL(sdcardfs_super_list);
+
+/*
+ * There is no need to lock the sdcardfs_super_info's rwsem as there is no
+ * way anyone can have a reference to the superblock at this point in time.
+ */
+static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb,
+		const char *dev_name, void *raw_data, int silent)
+{
+	int err = 0;
+	int debug;
+	struct super_block *lower_sb;
+	struct path lower_path;
+	struct sdcardfs_sb_info *sb_info;
+	struct sdcardfs_vfsmount_options *mnt_opt = mnt->data;
+	struct inode *inode;
+
+	pr_info("sdcardfs version 2.0\n");
+
+	if (!dev_name) {
+		pr_err("sdcardfs: read_super: missing dev_name argument\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	pr_info("sdcardfs: dev_name -> %s\n", dev_name);
+	pr_info("sdcardfs: options -> %s\n", (char *)raw_data);
+	pr_info("sdcardfs: mnt -> %p\n", mnt);
+
+	/* parse lower path */
+	err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
+			&lower_path);
+	if (err) {
+		pr_err("sdcardfs: error accessing lower directory '%s'\n", dev_name);
+		goto out;
+	}
+
+	/* allocate superblock private data */
+	sb->s_fs_info = kzalloc(sizeof(struct sdcardfs_sb_info), GFP_KERNEL);
+	if (!SDCARDFS_SB(sb)) {
+		pr_crit("sdcardfs: read_super: out of memory\n");
+		err = -ENOMEM;
+		goto out_free;
+	}
+
+	sb_info = sb->s_fs_info;
+	/* parse options */
+	err = parse_options(sb, raw_data, silent, &debug, mnt_opt, &sb_info->options);
+	if (err) {
+		pr_err("sdcardfs: invalid options\n");
+		goto out_freesbi;
+	}
+
+	/* set the lower superblock field of upper superblock */
+	lower_sb = lower_path.dentry->d_sb;
+	atomic_inc(&lower_sb->s_active);
+	sdcardfs_set_lower_super(sb, lower_sb);
+
+	/* inherit maxbytes from lower file system */
+	sb->s_maxbytes = lower_sb->s_maxbytes;
+
+	/*
+	 * Our c/m/atime granularity is 1 ns because we may stack on file
+	 * systems whose granularity is as good.
+	 */
+	sb->s_time_gran = 1;
+
+	sb->s_magic = SDCARDFS_SUPER_MAGIC;
+	sb->s_op = &sdcardfs_sops;
+
+	/* get a new inode and allocate our root dentry */
+	inode = sdcardfs_iget(sb, d_inode(lower_path.dentry), 0);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		goto out_sput;
+	}
+	sb->s_root = d_make_root(inode);
+	if (!sb->s_root) {
+		err = -ENOMEM;
+		goto out_iput;
+	}
+	d_set_d_op(sb->s_root, &sdcardfs_ci_dops);
+
+	/* link the upper and lower dentries */
+	sb->s_root->d_fsdata = NULL;
+	err = new_dentry_private_data(sb->s_root);
+	if (err)
+		goto out_freeroot;
+
+	/* set the lower dentries for s_root */
+	sdcardfs_set_lower_path(sb->s_root, &lower_path);
+
+	/*
+	 * No need to call interpose because we already have a positive
+	 * dentry, which was instantiated by d_make_root.  Just need to
+	 * d_rehash it.
+	 */
+	d_rehash(sb->s_root);
+
+	/* setup permission policy */
+	sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL);
+	mutex_lock(&sdcardfs_super_list_lock);
+	if (sb_info->options.multiuser) {
+		setup_derived_state(d_inode(sb->s_root), PERM_PRE_ROOT,
+				sb_info->options.fs_user_id, AID_ROOT,
+				false, SDCARDFS_I(d_inode(sb->s_root))->data);
+		snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name);
+	} else {
+		setup_derived_state(d_inode(sb->s_root), PERM_ROOT,
+				sb_info->options.fs_user_id, AID_ROOT,
+				false, SDCARDFS_I(d_inode(sb->s_root))->data);
+		snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name);
+	}
+	fixup_tmp_permissions(d_inode(sb->s_root));
+	sb_info->sb = sb;
+	list_add(&sb_info->list, &sdcardfs_super_list);
+	mutex_unlock(&sdcardfs_super_list_lock);
+
+	if (!silent)
+		pr_info("sdcardfs: mounted on top of %s type %s\n",
+				dev_name, lower_sb->s_type->name);
+	goto out; /* all is well */
+
+	/* no longer needed: free_dentry_private_data(sb->s_root); */
+out_freeroot:
+	dput(sb->s_root);
+out_iput:
+	iput(inode);
+out_sput:
+	/* drop refs we took earlier */
+	atomic_dec(&lower_sb->s_active);
+out_freesbi:
+	kfree(SDCARDFS_SB(sb));
+	sb->s_fs_info = NULL;
+out_free:
+	path_put(&lower_path);
+
+out:
+	return err;
+}
+
+struct sdcardfs_mount_private {
+	struct vfsmount *mnt;
+	const char *dev_name;
+	void *raw_data;
+};
+
+static int __sdcardfs_fill_super(
+	struct super_block *sb,
+	void *_priv, int silent)
+{
+	struct sdcardfs_mount_private *priv = _priv;
+
+	return sdcardfs_read_super(priv->mnt,
+		sb, priv->dev_name, priv->raw_data, silent);
+}
+
+static struct dentry *sdcardfs_mount(struct vfsmount *mnt,
+		struct file_system_type *fs_type, int flags,
+			    const char *dev_name, void *raw_data)
+{
+	struct sdcardfs_mount_private priv = {
+		.mnt = mnt,
+		.dev_name = dev_name,
+		.raw_data = raw_data
+	};
+
+	return mount_nodev(fs_type, flags,
+		&priv, __sdcardfs_fill_super);
+}
+
+static struct dentry *sdcardfs_mount_wrn(struct file_system_type *fs_type,
+		    int flags, const char *dev_name, void *raw_data)
+{
+	WARN(1, "sdcardfs does not support mount. Use mount2.\n");
+	return ERR_PTR(-EINVAL);
+}
+
+void *sdcardfs_alloc_mnt_data(void)
+{
+	return kmalloc(sizeof(struct sdcardfs_vfsmount_options), GFP_KERNEL);
+}
+
+void sdcardfs_kill_sb(struct super_block *sb)
+{
+	struct sdcardfs_sb_info *sbi;
+
+	if (sb->s_magic == SDCARDFS_SUPER_MAGIC) {
+		sbi = SDCARDFS_SB(sb);
+		mutex_lock(&sdcardfs_super_list_lock);
+		list_del(&sbi->list);
+		mutex_unlock(&sdcardfs_super_list_lock);
+	}
+	kill_anon_super(sb);
+}
+
+static struct file_system_type sdcardfs_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= SDCARDFS_NAME,
+	.mount		= sdcardfs_mount_wrn,
+	.mount2		= sdcardfs_mount,
+	.alloc_mnt_data = sdcardfs_alloc_mnt_data,
+	.kill_sb	= sdcardfs_kill_sb,
+	.fs_flags	= 0,
+};
+MODULE_ALIAS_FS(SDCARDFS_NAME);
+
+static int __init init_sdcardfs_fs(void)
+{
+	int err;
+
+	pr_info("Registering sdcardfs " SDCARDFS_VERSION "\n");
+
+	err = sdcardfs_init_inode_cache();
+	if (err)
+		goto out;
+	err = sdcardfs_init_dentry_cache();
+	if (err)
+		goto out;
+	err = packagelist_init();
+	if (err)
+		goto out;
+	err = register_filesystem(&sdcardfs_fs_type);
+out:
+	if (err) {
+		sdcardfs_destroy_inode_cache();
+		sdcardfs_destroy_dentry_cache();
+		packagelist_exit();
+	}
+	return err;
+}
+
+static void __exit exit_sdcardfs_fs(void)
+{
+	sdcardfs_destroy_inode_cache();
+	sdcardfs_destroy_dentry_cache();
+	packagelist_exit();
+	unregister_filesystem(&sdcardfs_fs_type);
+	pr_info("Completed sdcardfs module unload\n");
+}
+
+/* Original wrapfs authors */
+MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University (http://www.fsl.cs.sunysb.edu/)");
+
+/* Original sdcardfs authors */
+MODULE_AUTHOR("Woojoong Lee, Daeho Jeong, Kitae Lee, Yeongjin Gil System Memory Lab., Samsung Electronics");
+
+/* Current maintainer */
+MODULE_AUTHOR("Daniel Rosenberg, Google");
+MODULE_DESCRIPTION("Sdcardfs " SDCARDFS_VERSION);
+MODULE_LICENSE("GPL");
+
+module_init(init_sdcardfs_fs);
+module_exit(exit_sdcardfs_fs);
diff --git a/fs/sdcardfs/mmap.c b/fs/sdcardfs/mmap.c
new file mode 100644
index 0000000..391d2a7
--- /dev/null
+++ b/fs/sdcardfs/mmap.c
@@ -0,0 +1,88 @@
+/*
+ * fs/sdcardfs/mmap.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+
+static int sdcardfs_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	int err;
+	struct file *file;
+	const struct vm_operations_struct *lower_vm_ops;
+
+	file = (struct file *)vma->vm_private_data;
+	lower_vm_ops = SDCARDFS_F(file)->lower_vm_ops;
+	BUG_ON(!lower_vm_ops);
+
+	err = lower_vm_ops->fault(vma, vmf);
+	return err;
+}
+
+static void sdcardfs_vm_open(struct vm_area_struct *vma)
+{
+	struct file *file = (struct file *)vma->vm_private_data;
+
+	get_file(file);
+}
+
+static void sdcardfs_vm_close(struct vm_area_struct *vma)
+{
+	struct file *file = (struct file *)vma->vm_private_data;
+
+	fput(file);
+}
+
+static int sdcardfs_page_mkwrite(struct vm_area_struct *vma,
+			       struct vm_fault *vmf)
+{
+	int err = 0;
+	struct file *file;
+	const struct vm_operations_struct *lower_vm_ops;
+
+	file = (struct file *)vma->vm_private_data;
+	lower_vm_ops = SDCARDFS_F(file)->lower_vm_ops;
+	BUG_ON(!lower_vm_ops);
+	if (!lower_vm_ops->page_mkwrite)
+		goto out;
+
+	err = lower_vm_ops->page_mkwrite(vma, vmf);
+out:
+	return err;
+}
+
+static ssize_t sdcardfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+{
+	/*
+	 * This function should never be called directly.  We need it
+	 * to exist, to get past a check in open_check_o_direct(),
+	 * which is called from do_last().
+	 */
+	return -EINVAL;
+}
+
+const struct address_space_operations sdcardfs_aops = {
+	.direct_IO	= sdcardfs_direct_IO,
+};
+
+const struct vm_operations_struct sdcardfs_vm_ops = {
+	.fault		= sdcardfs_fault,
+	.page_mkwrite	= sdcardfs_page_mkwrite,
+	.open		= sdcardfs_vm_open,
+	.close		= sdcardfs_vm_close,
+};
diff --git a/fs/sdcardfs/multiuser.h b/fs/sdcardfs/multiuser.h
new file mode 100644
index 0000000..85341e7
--- /dev/null
+++ b/fs/sdcardfs/multiuser.h
@@ -0,0 +1,53 @@
+/*
+ * fs/sdcardfs/multiuser.h
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#define AID_USER_OFFSET     100000 /* offset for uid ranges for each user */
+#define AID_APP_START        10000 /* first app user */
+#define AID_APP_END          19999 /* last app user */
+#define AID_CACHE_GID_START  20000 /* start of gids for apps to mark cached data */
+#define AID_EXT_GID_START    30000 /* start of gids for apps to mark external data */
+#define AID_EXT_CACHE_GID_START 40000 /* start of gids for apps to mark external cached data */
+#define AID_EXT_CACHE_GID_END 49999   /* end of gids for apps to mark external cached data */
+#define AID_SHARED_GID_START 50000 /* start of gids for apps in each user to share */
+
+typedef uid_t userid_t;
+typedef uid_t appid_t;
+
+static inline uid_t multiuser_get_uid(userid_t user_id, appid_t app_id)
+{
+	return (user_id * AID_USER_OFFSET) + (app_id % AID_USER_OFFSET);
+}
+
+static inline bool uid_is_app(uid_t uid)
+{
+	appid_t appid = uid % AID_USER_OFFSET;
+
+	return appid >= AID_APP_START && appid <= AID_APP_END;
+}
+
+static inline gid_t multiuser_get_ext_cache_gid(uid_t uid)
+{
+	return uid - AID_APP_START + AID_EXT_CACHE_GID_START;
+}
+
+static inline gid_t multiuser_get_ext_gid(uid_t uid)
+{
+	return uid - AID_APP_START + AID_EXT_GID_START;
+}
diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c
new file mode 100644
index 0000000..6da0c21
--- /dev/null
+++ b/fs/sdcardfs/packagelist.c
@@ -0,0 +1,881 @@
+/*
+ * fs/sdcardfs/packagelist.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+#include <linux/hashtable.h>
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/radix-tree.h>
+#include <linux/dcache.h>
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <linux/configfs.h>
+
+struct hashtable_entry {
+	struct hlist_node hlist;
+	struct hlist_node dlist; /* for deletion cleanup */
+	struct qstr key;
+	atomic_t value;
+};
+
+static DEFINE_HASHTABLE(package_to_appid, 8);
+static DEFINE_HASHTABLE(package_to_userid, 8);
+static DEFINE_HASHTABLE(ext_to_groupid, 8);
+
+
+static struct kmem_cache *hashtable_entry_cachep;
+
+static unsigned int full_name_case_hash(const void *salt, const unsigned char *name, unsigned int len)
+{
+	unsigned long hash = init_name_hash(salt);
+
+	while (len--)
+		hash = partial_name_hash(tolower(*name++), hash);
+	return end_name_hash(hash);
+}
+
+static inline void qstr_init(struct qstr *q, const char *name)
+{
+	q->name = name;
+	q->len = strlen(q->name);
+	q->hash = full_name_case_hash(0, q->name, q->len);
+}
+
+static inline int qstr_copy(const struct qstr *src, struct qstr *dest)
+{
+	dest->name = kstrdup(src->name, GFP_KERNEL);
+	dest->hash_len = src->hash_len;
+	return !!dest->name;
+}
+
+
+static appid_t __get_appid(const struct qstr *key)
+{
+	struct hashtable_entry *hash_cur;
+	unsigned int hash = key->hash;
+	appid_t ret_id;
+
+	rcu_read_lock();
+	hash_for_each_possible_rcu(package_to_appid, hash_cur, hlist, hash) {
+		if (qstr_case_eq(key, &hash_cur->key)) {
+			ret_id = atomic_read(&hash_cur->value);
+			rcu_read_unlock();
+			return ret_id;
+		}
+	}
+	rcu_read_unlock();
+	return 0;
+}
+
+appid_t get_appid(const char *key)
+{
+	struct qstr q;
+
+	qstr_init(&q, key);
+	return __get_appid(&q);
+}
+
+static appid_t __get_ext_gid(const struct qstr *key)
+{
+	struct hashtable_entry *hash_cur;
+	unsigned int hash = key->hash;
+	appid_t ret_id;
+
+	rcu_read_lock();
+	hash_for_each_possible_rcu(ext_to_groupid, hash_cur, hlist, hash) {
+		if (qstr_case_eq(key, &hash_cur->key)) {
+			ret_id = atomic_read(&hash_cur->value);
+			rcu_read_unlock();
+			return ret_id;
+		}
+	}
+	rcu_read_unlock();
+	return 0;
+}
+
+appid_t get_ext_gid(const char *key)
+{
+	struct qstr q;
+
+	qstr_init(&q, key);
+	return __get_ext_gid(&q);
+}
+
+static appid_t __is_excluded(const struct qstr *app_name, userid_t user)
+{
+	struct hashtable_entry *hash_cur;
+	unsigned int hash = app_name->hash;
+
+	rcu_read_lock();
+	hash_for_each_possible_rcu(package_to_userid, hash_cur, hlist, hash) {
+		if (atomic_read(&hash_cur->value) == user &&
+				qstr_case_eq(app_name, &hash_cur->key)) {
+			rcu_read_unlock();
+			return 1;
+		}
+	}
+	rcu_read_unlock();
+	return 0;
+}
+
+appid_t is_excluded(const char *key, userid_t user)
+{
+	struct qstr q;
+	qstr_init(&q, key);
+	return __is_excluded(&q, user);
+}
+
+/* Kernel has already enforced everything we returned through
+ * derive_permissions_locked(), so this is used to lock down access
+ * even further, such as enforcing that apps hold sdcard_rw.
+ */
+int check_caller_access_to_name(struct inode *parent_node, const struct qstr *name)
+{
+	struct qstr q_autorun = QSTR_LITERAL("autorun.inf");
+	struct qstr q__android_secure = QSTR_LITERAL(".android_secure");
+	struct qstr q_android_secure = QSTR_LITERAL("android_secure");
+
+	/* Always block security-sensitive files at root */
+	if (parent_node && SDCARDFS_I(parent_node)->data->perm == PERM_ROOT) {
+		if (qstr_case_eq(name, &q_autorun)
+			|| qstr_case_eq(name, &q__android_secure)
+			|| qstr_case_eq(name, &q_android_secure)) {
+			return 0;
+		}
+	}
+
+	/* Root always has access; access for any other UIDs should always
+	 * be controlled through packages.list.
+	 */
+	if (from_kuid(&init_user_ns, current_fsuid()) == 0)
+		return 1;
+
+	/* No extra permissions to enforce */
+	return 1;
+}
+
+static struct hashtable_entry *alloc_hashtable_entry(const struct qstr *key,
+		appid_t value)
+{
+	struct hashtable_entry *ret = kmem_cache_alloc(hashtable_entry_cachep,
+			GFP_KERNEL);
+	if (!ret)
+		return NULL;
+	INIT_HLIST_NODE(&ret->dlist);
+	INIT_HLIST_NODE(&ret->hlist);
+
+	if (!qstr_copy(key, &ret->key)) {
+		kmem_cache_free(hashtable_entry_cachep, ret);
+		return NULL;
+	}
+
+	atomic_set(&ret->value, value);
+	return ret;
+}
+
+static int insert_packagelist_appid_entry_locked(const struct qstr *key, appid_t value)
+{
+	struct hashtable_entry *hash_cur;
+	struct hashtable_entry *new_entry;
+	unsigned int hash = key->hash;
+
+	hash_for_each_possible_rcu(package_to_appid, hash_cur, hlist, hash) {
+		if (qstr_case_eq(key, &hash_cur->key)) {
+			atomic_set(&hash_cur->value, value);
+			return 0;
+		}
+	}
+	new_entry = alloc_hashtable_entry(key, value);
+	if (!new_entry)
+		return -ENOMEM;
+	hash_add_rcu(package_to_appid, &new_entry->hlist, hash);
+	return 0;
+}
+
+static int insert_ext_gid_entry_locked(const struct qstr *key, appid_t value)
+{
+	struct hashtable_entry *hash_cur;
+	struct hashtable_entry *new_entry;
+	unsigned int hash = key->hash;
+
+	/* An extension can only belong to one gid */
+	hash_for_each_possible_rcu(ext_to_groupid, hash_cur, hlist, hash) {
+		if (qstr_case_eq(key, &hash_cur->key))
+			return -EINVAL;
+	}
+	new_entry = alloc_hashtable_entry(key, value);
+	if (!new_entry)
+		return -ENOMEM;
+	hash_add_rcu(ext_to_groupid, &new_entry->hlist, hash);
+	return 0;
+}
+
+static int insert_userid_exclude_entry_locked(const struct qstr *key, userid_t value)
+{
+	struct hashtable_entry *hash_cur;
+	struct hashtable_entry *new_entry;
+	unsigned int hash = key->hash;
+
+	/* Only insert if not already present */
+	hash_for_each_possible_rcu(package_to_userid, hash_cur, hlist, hash) {
+		if (atomic_read(&hash_cur->value) == value &&
+				qstr_case_eq(key, &hash_cur->key))
+			return 0;
+	}
+	new_entry = alloc_hashtable_entry(key, value);
+	if (!new_entry)
+		return -ENOMEM;
+	hash_add_rcu(package_to_userid, &new_entry->hlist, hash);
+	return 0;
+}
+
+static void fixup_all_perms_name(const struct qstr *key)
+{
+	struct sdcardfs_sb_info *sbinfo;
+	struct limit_search limit = {
+		.flags = BY_NAME,
+		.name = QSTR_INIT(key->name, key->len),
+	};
+	list_for_each_entry(sbinfo, &sdcardfs_super_list, list) {
+		if (sbinfo_has_sdcard_magic(sbinfo))
+			fixup_perms_recursive(sbinfo->sb->s_root, &limit);
+	}
+}
+
+static void fixup_all_perms_name_userid(const struct qstr *key, userid_t userid)
+{
+	struct sdcardfs_sb_info *sbinfo;
+	struct limit_search limit = {
+		.flags = BY_NAME | BY_USERID,
+		.name = QSTR_INIT(key->name, key->len),
+		.userid = userid,
+	};
+	list_for_each_entry(sbinfo, &sdcardfs_super_list, list) {
+		if (sbinfo_has_sdcard_magic(sbinfo))
+			fixup_perms_recursive(sbinfo->sb->s_root, &limit);
+	}
+}
+
+static void fixup_all_perms_userid(userid_t userid)
+{
+	struct sdcardfs_sb_info *sbinfo;
+	struct limit_search limit = {
+		.flags = BY_USERID,
+		.userid = userid,
+	};
+	list_for_each_entry(sbinfo, &sdcardfs_super_list, list) {
+		if (sbinfo_has_sdcard_magic(sbinfo))
+			fixup_perms_recursive(sbinfo->sb->s_root, &limit);
+	}
+}
+
+static int insert_packagelist_entry(const struct qstr *key, appid_t value)
+{
+	int err;
+
+	mutex_lock(&sdcardfs_super_list_lock);
+	err = insert_packagelist_appid_entry_locked(key, value);
+	if (!err)
+		fixup_all_perms_name(key);
+	mutex_unlock(&sdcardfs_super_list_lock);
+
+	return err;
+}
+
+static int insert_ext_gid_entry(const struct qstr *key, appid_t value)
+{
+	int err;
+
+	mutex_lock(&sdcardfs_super_list_lock);
+	err = insert_ext_gid_entry_locked(key, value);
+	mutex_unlock(&sdcardfs_super_list_lock);
+
+	return err;
+}
+
+static int insert_userid_exclude_entry(const struct qstr *key, userid_t value)
+{
+	int err;
+
+	mutex_lock(&sdcardfs_super_list_lock);
+	err = insert_userid_exclude_entry_locked(key, value);
+	if (!err)
+		fixup_all_perms_name_userid(key, value);
+	mutex_unlock(&sdcardfs_super_list_lock);
+
+	return err;
+}
+
+static void free_hashtable_entry(struct hashtable_entry *entry)
+{
+	kfree(entry->key.name);
+	kmem_cache_free(hashtable_entry_cachep, entry);
+}
+
+static void remove_packagelist_entry_locked(const struct qstr *key)
+{
+	struct hashtable_entry *hash_cur;
+	unsigned int hash = key->hash;
+	struct hlist_node *h_t;
+	HLIST_HEAD(free_list);
+
+	hash_for_each_possible_rcu(package_to_userid, hash_cur, hlist, hash) {
+		if (qstr_case_eq(key, &hash_cur->key)) {
+			hash_del_rcu(&hash_cur->hlist);
+			hlist_add_head(&hash_cur->dlist, &free_list);
+		}
+	}
+	hash_for_each_possible_rcu(package_to_appid, hash_cur, hlist, hash) {
+		if (qstr_case_eq(key, &hash_cur->key)) {
+			hash_del_rcu(&hash_cur->hlist);
+			hlist_add_head(&hash_cur->dlist, &free_list);
+			break;
+		}
+	}
+	synchronize_rcu();
+	hlist_for_each_entry_safe(hash_cur, h_t, &free_list, dlist)
+		free_hashtable_entry(hash_cur);
+}
+
+static void remove_packagelist_entry(const struct qstr *key)
+{
+	mutex_lock(&sdcardfs_super_list_lock);
+	remove_packagelist_entry_locked(key);
+	fixup_all_perms_name(key);
+	mutex_unlock(&sdcardfs_super_list_lock);
+}
+
+static void remove_ext_gid_entry_locked(const struct qstr *key, gid_t group)
+{
+	struct hashtable_entry *hash_cur;
+	unsigned int hash = key->hash;
+
+	hash_for_each_possible_rcu(ext_to_groupid, hash_cur, hlist, hash) {
+		if (qstr_case_eq(key, &hash_cur->key) && atomic_read(&hash_cur->value) == group) {
+			hash_del_rcu(&hash_cur->hlist);
+			synchronize_rcu();
+			free_hashtable_entry(hash_cur);
+			break;
+		}
+	}
+}
+
+static void remove_ext_gid_entry(const struct qstr *key, gid_t group)
+{
+	mutex_lock(&sdcardfs_super_list_lock);
+	remove_ext_gid_entry_locked(key, group);
+	mutex_unlock(&sdcardfs_super_list_lock);
+}
+
+static void remove_userid_all_entry_locked(userid_t userid)
+{
+	struct hashtable_entry *hash_cur;
+	struct hlist_node *h_t;
+	HLIST_HEAD(free_list);
+	int i;
+
+	hash_for_each_rcu(package_to_userid, i, hash_cur, hlist) {
+		if (atomic_read(&hash_cur->value) == userid) {
+			hash_del_rcu(&hash_cur->hlist);
+			hlist_add_head(&hash_cur->dlist, &free_list);
+		}
+	}
+	synchronize_rcu();
+	hlist_for_each_entry_safe(hash_cur, h_t, &free_list, dlist) {
+		free_hashtable_entry(hash_cur);
+	}
+}
+
+static void remove_userid_all_entry(userid_t userid)
+{
+	mutex_lock(&sdcardfs_super_list_lock);
+	remove_userid_all_entry_locked(userid);
+	fixup_all_perms_userid(userid);
+	mutex_unlock(&sdcardfs_super_list_lock);
+}
+
+static void remove_userid_exclude_entry_locked(const struct qstr *key, userid_t userid)
+{
+	struct hashtable_entry *hash_cur;
+	unsigned int hash = key->hash;
+
+	hash_for_each_possible_rcu(package_to_userid, hash_cur, hlist, hash) {
+		if (qstr_case_eq(key, &hash_cur->key) &&
+				atomic_read(&hash_cur->value) == userid) {
+			hash_del_rcu(&hash_cur->hlist);
+			synchronize_rcu();
+			free_hashtable_entry(hash_cur);
+			break;
+		}
+	}
+}
+
+static void remove_userid_exclude_entry(const struct qstr *key, userid_t userid)
+{
+	mutex_lock(&sdcardfs_super_list_lock);
+	remove_userid_exclude_entry_locked(key, userid);
+	fixup_all_perms_name_userid(key, userid);
+	mutex_unlock(&sdcardfs_super_list_lock);
+}
+
+static void packagelist_destroy(void)
+{
+	struct hashtable_entry *hash_cur;
+	struct hlist_node *h_t;
+	HLIST_HEAD(free_list);
+	int i;
+
+	mutex_lock(&sdcardfs_super_list_lock);
+	hash_for_each_rcu(package_to_appid, i, hash_cur, hlist) {
+		hash_del_rcu(&hash_cur->hlist);
+		hlist_add_head(&hash_cur->dlist, &free_list);
+	}
+	hash_for_each_rcu(package_to_userid, i, hash_cur, hlist) {
+		hash_del_rcu(&hash_cur->hlist);
+		hlist_add_head(&hash_cur->dlist, &free_list);
+	}
+	synchronize_rcu();
+	hlist_for_each_entry_safe(hash_cur, h_t, &free_list, dlist)
+		free_hashtable_entry(hash_cur);
+	mutex_unlock(&sdcardfs_super_list_lock);
+	pr_info("sdcardfs: destroyed packagelist pkgld\n");
+}
+
+#define SDCARDFS_CONFIGFS_ATTR(_pfx, _name)			\
+static struct configfs_attribute _pfx##attr_##_name = {	\
+	.ca_name	= __stringify(_name),		\
+	.ca_mode	= S_IRUGO | S_IWUGO,		\
+	.ca_owner	= THIS_MODULE,			\
+	.show		= _pfx##_name##_show,		\
+	.store		= _pfx##_name##_store,		\
+}
+
+#define SDCARDFS_CONFIGFS_ATTR_RO(_pfx, _name)			\
+static struct configfs_attribute _pfx##attr_##_name = {	\
+	.ca_name	= __stringify(_name),		\
+	.ca_mode	= S_IRUGO,			\
+	.ca_owner	= THIS_MODULE,			\
+	.show		= _pfx##_name##_show,		\
+}
+
+#define SDCARDFS_CONFIGFS_ATTR_WO(_pfx, _name)			\
+static struct configfs_attribute _pfx##attr_##_name = {	\
+	.ca_name	= __stringify(_name),		\
+	.ca_mode	= S_IWUGO,			\
+	.ca_owner	= THIS_MODULE,			\
+	.store		= _pfx##_name##_store,		\
+}
+
+struct package_details {
+	struct config_item item;
+	struct qstr name;
+};
+
+static inline struct package_details *to_package_details(struct config_item *item)
+{
+	return item ? container_of(item, struct package_details, item) : NULL;
+}
+
+static ssize_t package_details_appid_show(struct config_item *item, char *page)
+{
+	return scnprintf(page, PAGE_SIZE, "%u\n", __get_appid(&to_package_details(item)->name));
+}
+
+static ssize_t package_details_appid_store(struct config_item *item,
+				       const char *page, size_t count)
+{
+	unsigned int tmp;
+	int ret;
+
+	ret = kstrtouint(page, 10, &tmp);
+	if (ret)
+		return ret;
+
+	ret = insert_packagelist_entry(&to_package_details(item)->name, tmp);
+
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+static ssize_t package_details_excluded_userids_show(struct config_item *item,
+				      char *page)
+{
+	struct package_details *package_details = to_package_details(item);
+	struct hashtable_entry *hash_cur;
+	unsigned int hash = package_details->name.hash;
+	int count = 0;
+
+	rcu_read_lock();
+	hash_for_each_possible_rcu(package_to_userid, hash_cur, hlist, hash) {
+		if (qstr_case_eq(&package_details->name, &hash_cur->key))
+			count += scnprintf(page + count, PAGE_SIZE - count,
+					"%d ", atomic_read(&hash_cur->value));
+	}
+	rcu_read_unlock();
+	if (count)
+		count--;
+	count += scnprintf(page + count, PAGE_SIZE - count, "\n");
+	return count;
+}
+
+static ssize_t package_details_excluded_userids_store(struct config_item *item,
+				       const char *page, size_t count)
+{
+	unsigned int tmp;
+	int ret;
+
+	ret = kstrtouint(page, 10, &tmp);
+	if (ret)
+		return ret;
+
+	ret = insert_userid_exclude_entry(&to_package_details(item)->name, tmp);
+
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+static ssize_t package_details_clear_userid_store(struct config_item *item,
+				       const char *page, size_t count)
+{
+	unsigned int tmp;
+	int ret;
+
+	ret = kstrtouint(page, 10, &tmp);
+	if (ret)
+		return ret;
+	remove_userid_exclude_entry(&to_package_details(item)->name, tmp);
+	return count;
+}
+
+static void package_details_release(struct config_item *item)
+{
+	struct package_details *package_details = to_package_details(item);
+
+	pr_info("sdcardfs: removing %s\n", package_details->name.name);
+	remove_packagelist_entry(&package_details->name);
+	kfree(package_details->name.name);
+	kfree(package_details);
+}
+
+SDCARDFS_CONFIGFS_ATTR(package_details_, appid);
+SDCARDFS_CONFIGFS_ATTR(package_details_, excluded_userids);
+SDCARDFS_CONFIGFS_ATTR_WO(package_details_, clear_userid);
+
+static struct configfs_attribute *package_details_attrs[] = {
+	&package_details_attr_appid,
+	&package_details_attr_excluded_userids,
+	&package_details_attr_clear_userid,
+	NULL,
+};
+
+static struct configfs_item_operations package_details_item_ops = {
+	.release = package_details_release,
+};
+
+static struct config_item_type package_appid_type = {
+	.ct_item_ops	= &package_details_item_ops,
+	.ct_attrs	= package_details_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+struct extensions_value {
+	struct config_group group;
+	unsigned int num;
+};
+
+struct extension_details {
+	struct config_item item;
+	struct qstr name;
+	unsigned int num;
+};
+
+static inline struct extensions_value *to_extensions_value(struct config_item *item)
+{
+	return item ? container_of(to_config_group(item), struct extensions_value, group) : NULL;
+}
+
+static inline struct extension_details *to_extension_details(struct config_item *item)
+{
+	return item ? container_of(item, struct extension_details, item) : NULL;
+}
+
+static void extension_details_release(struct config_item *item)
+{
+	struct extension_details *extension_details = to_extension_details(item);
+
+	pr_info("sdcardfs: No longer mapping %s files to gid %d\n",
+			extension_details->name.name, extension_details->num);
+	remove_ext_gid_entry(&extension_details->name, extension_details->num);
+	kfree(extension_details->name.name);
+	kfree(extension_details);
+}
+
+static struct configfs_item_operations extension_details_item_ops = {
+	.release = extension_details_release,
+};
+
+static struct config_item_type extension_details_type = {
+	.ct_item_ops = &extension_details_item_ops,
+	.ct_owner = THIS_MODULE,
+};
+
+static struct config_item *extension_details_make_item(struct config_group *group, const char *name)
+{
+	struct extensions_value *extensions_value = to_extensions_value(&group->cg_item);
+	struct extension_details *extension_details = kzalloc(sizeof(struct extension_details), GFP_KERNEL);
+	const char *tmp;
+	int ret;
+
+	if (!extension_details)
+		return ERR_PTR(-ENOMEM);
+
+	tmp = kstrdup(name, GFP_KERNEL);
+	if (!tmp) {
+		kfree(extension_details);
+		return ERR_PTR(-ENOMEM);
+	}
+	qstr_init(&extension_details->name, tmp);
+	ret = insert_ext_gid_entry(&extension_details->name, extensions_value->num);
+
+	if (ret) {
+		kfree(extension_details->name.name);
+		kfree(extension_details);
+		return ERR_PTR(ret);
+	}
+	config_item_init_type_name(&extension_details->item, name, &extension_details_type);
+
+	return &extension_details->item;
+}
+
+static struct configfs_group_operations extensions_value_group_ops = {
+	.make_item = extension_details_make_item,
+};
+
+static struct config_item_type extensions_name_type = {
+	.ct_group_ops	= &extensions_value_group_ops,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct config_group *extensions_make_group(struct config_group *group, const char *name)
+{
+	struct extensions_value *extensions_value;
+	unsigned int tmp;
+	int ret;
+
+	extensions_value = kzalloc(sizeof(struct extensions_value), GFP_KERNEL);
+	if (!extensions_value)
+		return ERR_PTR(-ENOMEM);
+	ret = kstrtouint(name, 10, &tmp);
+	if (ret) {
+		kfree(extensions_value);
+		return ERR_PTR(ret);
+	}
+
+	extensions_value->num = tmp;
+	config_group_init_type_name(&extensions_value->group, name,
+						&extensions_name_type);
+	return &extensions_value->group;
+}
+
+static void extensions_drop_group(struct config_group *group, struct config_item *item)
+{
+	struct extensions_value *value = to_extensions_value(item);
+
+	pr_info("sdcardfs: No longer mapping any files to gid %d\n", value->num);
+	kfree(value);
+}
+
+static struct configfs_group_operations extensions_group_ops = {
+	.make_group	= extensions_make_group,
+	.drop_item	= extensions_drop_group,
+};
+
+static struct config_item_type extensions_type = {
+	.ct_group_ops	= &extensions_group_ops,
+	.ct_owner	= THIS_MODULE,
+};
+
+struct config_group extension_group = {
+	.cg_item = {
+		.ci_namebuf = "extensions",
+		.ci_type = &extensions_type,
+	},
+};
+
+static struct config_item *packages_make_item(struct config_group *group, const char *name)
+{
+	struct package_details *package_details;
+	const char *tmp;
+
+	package_details = kzalloc(sizeof(struct package_details), GFP_KERNEL);
+	if (!package_details)
+		return ERR_PTR(-ENOMEM);
+	tmp = kstrdup(name, GFP_KERNEL);
+	if (!tmp) {
+		kfree(package_details);
+		return ERR_PTR(-ENOMEM);
+	}
+	qstr_init(&package_details->name, tmp);
+	config_item_init_type_name(&package_details->item, name,
+						&package_appid_type);
+
+	return &package_details->item;
+}
+
+static ssize_t packages_list_show(struct config_item *item, char *page)
+{
+	struct hashtable_entry *hash_cur_app;
+	struct hashtable_entry *hash_cur_user;
+	int i;
+	int count = 0, written = 0;
+	const char errormsg[] = "<truncated>\n";
+	unsigned int hash;
+
+	rcu_read_lock();
+	hash_for_each_rcu(package_to_appid, i, hash_cur_app, hlist) {
+		written = scnprintf(page + count, PAGE_SIZE - sizeof(errormsg) - count, "%s %d\n",
+					hash_cur_app->key.name, atomic_read(&hash_cur_app->value));
+		hash = hash_cur_app->key.hash;
+		hash_for_each_possible_rcu(package_to_userid, hash_cur_user, hlist, hash) {
+			if (qstr_case_eq(&hash_cur_app->key, &hash_cur_user->key)) {
+				written += scnprintf(page + count + written - 1,
+					PAGE_SIZE - sizeof(errormsg) - count - written + 1,
+					" %d\n", atomic_read(&hash_cur_user->value)) - 1;
+			}
+		}
+		if (count + written == PAGE_SIZE - sizeof(errormsg) - 1) {
+			count += scnprintf(page + count, PAGE_SIZE - count, errormsg);
+			break;
+		}
+		count += written;
+	}
+	rcu_read_unlock();
+
+	return count;
+}
+
+static ssize_t packages_remove_userid_store(struct config_item *item,
+				       const char *page, size_t count)
+{
+	unsigned int tmp;
+	int ret;
+
+	ret = kstrtouint(page, 10, &tmp);
+	if (ret)
+		return ret;
+	remove_userid_all_entry(tmp);
+	return count;
+}
+
+static struct configfs_attribute packages_attr_packages_gid_list = {
+	.ca_name	= "packages_gid.list",
+	.ca_mode	= S_IRUGO,
+	.ca_owner	= THIS_MODULE,
+	.show		= packages_list_show,
+};
+
+SDCARDFS_CONFIGFS_ATTR_WO(packages_, remove_userid);
+
+static struct configfs_attribute *packages_attrs[] = {
+	&packages_attr_packages_gid_list,
+	&packages_attr_remove_userid,
+	NULL,
+};
+
+/*
+ * Note that, since no extra work is required on ->drop_item(),
+ * no ->drop_item() is provided.
+ */
+static struct configfs_group_operations packages_group_ops = {
+	.make_item	= packages_make_item,
+};
+
+static struct config_item_type packages_type = {
+	.ct_group_ops	= &packages_group_ops,
+	.ct_attrs	= packages_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+struct config_group *sd_default_groups[] = {
+	&extension_group,
+	NULL,
+};
+
+static struct configfs_subsystem sdcardfs_packages = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf = "sdcardfs",
+			.ci_type = &packages_type,
+		},
+	},
+};
+
+static int configfs_sdcardfs_init(void)
+{
+	int ret, i;
+	struct configfs_subsystem *subsys = &sdcardfs_packages;
+
+	config_group_init(&subsys->su_group);
+	for (i = 0; sd_default_groups[i]; i++) {
+		config_group_init(sd_default_groups[i]);
+		configfs_add_default_group(sd_default_groups[i], &subsys->su_group);
+	}
+	mutex_init(&subsys->su_mutex);
+	ret = configfs_register_subsystem(subsys);
+	if (ret) {
+		pr_err("Error %d while registering subsystem %s\n",
+		       ret,
+		       subsys->su_group.cg_item.ci_namebuf);
+	}
+	return ret;
+}
+
+static void configfs_sdcardfs_exit(void)
+{
+	configfs_unregister_subsystem(&sdcardfs_packages);
+}
+
+int packagelist_init(void)
+{
+	hashtable_entry_cachep =
+		kmem_cache_create("packagelist_hashtable_entry",
+					sizeof(struct hashtable_entry), 0, 0, NULL);
+	if (!hashtable_entry_cachep) {
+		pr_err("sdcardfs: failed creating pkgl_hashtable entry slab cache\n");
+		return -ENOMEM;
+	}
+
+	configfs_sdcardfs_init();
+	return 0;
+}
+
+void packagelist_exit(void)
+{
+	configfs_sdcardfs_exit();
+	packagelist_destroy();
+	kmem_cache_destroy(hashtable_entry_cachep);
+}
diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h
new file mode 100644
index 0000000..c6f63de
--- /dev/null
+++ b/fs/sdcardfs/sdcardfs.h
@@ -0,0 +1,638 @@
+/*
+ * fs/sdcardfs/sdcardfs.h
+ *
+ * The sdcardfs v2.0
+ *   This file system replaces the sdcard daemon on Android
+ *   On version 2.0, some of the daemon functions have been ported
+ *   to support the multi-user concepts of Android 4.4
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#ifndef _SDCARDFS_H_
+#define _SDCARDFS_H_
+
+#include <linux/dcache.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/aio.h>
+#include <linux/kref.h>
+#include <linux/mm.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/seq_file.h>
+#include <linux/statfs.h>
+#include <linux/fs_stack.h>
+#include <linux/magic.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/security.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include "multiuser.h"
+
+/* the file system name */
+#define SDCARDFS_NAME "sdcardfs"
+
+/* sdcardfs root inode number */
+#define SDCARDFS_ROOT_INO     1
+
+/* useful for tracking code reachability */
+#define UDBG pr_default("DBG:%s:%s:%d\n", __FILE__, __func__, __LINE__)
+
+#define SDCARDFS_DIRENT_SIZE 256
+
+/* temporary static uid settings for development */
+#define AID_ROOT             0	/* uid for accessing /mnt/sdcard & extSdcard */
+#define AID_MEDIA_RW      1023	/* internal media storage write access */
+
+#define AID_SDCARD_RW     1015	/* external storage write access */
+#define AID_SDCARD_R      1028	/* external storage read access */
+#define AID_SDCARD_PICS   1033	/* external storage photos access */
+#define AID_SDCARD_AV     1034	/* external storage audio/video access */
+#define AID_SDCARD_ALL    1035	/* access all users external storage */
+#define AID_MEDIA_OBB     1059  /* obb files */
+
+#define AID_SDCARD_IMAGE  1057
+
+#define AID_PACKAGE_INFO  1027
+
+
+/*
+ * Permissions are handled by our permission function.
+ * We don't want anyone who happens to look at our inode value to prematurely
+ * block access, so store more permissive values. These are probably never
+ * used.
+ */
+#define fixup_tmp_permissions(x)	\
+	do {						\
+		(x)->i_uid = make_kuid(&init_user_ns,	\
+				SDCARDFS_I(x)->data->d_uid);	\
+		(x)->i_gid = make_kgid(&init_user_ns, AID_SDCARD_RW);	\
+		(x)->i_mode = ((x)->i_mode & S_IFMT) | 0775;\
+	} while (0)
+
+/* Android 5.0 support */
+
+/* Permission mode for a specific node. Controls how file permissions
+ * are derived for children nodes.
+ */
+typedef enum {
+	/* Nothing special; this node should just inherit from its parent. */
+	PERM_INHERIT,
+	/* This node is one level above a normal root; used for legacy layouts
+	 * which use the first level to represent user_id.
+	 */
+	PERM_PRE_ROOT,
+	/* This node is "/" */
+	PERM_ROOT,
+	/* This node is "/Android" */
+	PERM_ANDROID,
+	/* This node is "/Android/data" */
+	PERM_ANDROID_DATA,
+	/* This node is "/Android/obb" */
+	PERM_ANDROID_OBB,
+	/* This node is "/Android/media" */
+	PERM_ANDROID_MEDIA,
+	/* This node is "/Android/[data|media|obb]/[package]" */
+	PERM_ANDROID_PACKAGE,
+	/* This node is "/Android/[data|media|obb]/[package]/cache" */
+	PERM_ANDROID_PACKAGE_CACHE,
+} perm_t;
+
+struct sdcardfs_sb_info;
+struct sdcardfs_mount_options;
+struct sdcardfs_inode_info;
+struct sdcardfs_inode_data;
+
+/* Do not directly use this function. Use OVERRIDE_CRED() instead. */
+const struct cred *override_fsids(struct sdcardfs_sb_info *sbi,
+			struct sdcardfs_inode_data *data);
+/* Do not directly use this function, use REVERT_CRED() instead. */
+void revert_fsids(const struct cred *old_cred);
+
+/* operations vectors defined in specific files */
+extern const struct file_operations sdcardfs_main_fops;
+extern const struct file_operations sdcardfs_dir_fops;
+extern const struct inode_operations sdcardfs_main_iops;
+extern const struct inode_operations sdcardfs_dir_iops;
+extern const struct inode_operations sdcardfs_symlink_iops;
+extern const struct super_operations sdcardfs_sops;
+extern const struct dentry_operations sdcardfs_ci_dops;
+extern const struct address_space_operations sdcardfs_aops, sdcardfs_dummy_aops;
+extern const struct vm_operations_struct sdcardfs_vm_ops;
+
+extern int sdcardfs_init_inode_cache(void);
+extern void sdcardfs_destroy_inode_cache(void);
+extern int sdcardfs_init_dentry_cache(void);
+extern void sdcardfs_destroy_dentry_cache(void);
+extern int new_dentry_private_data(struct dentry *dentry);
+extern void free_dentry_private_data(struct dentry *dentry);
+extern struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry,
+				unsigned int flags);
+extern struct inode *sdcardfs_iget(struct super_block *sb,
+				 struct inode *lower_inode, userid_t id);
+extern int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb,
+			    struct path *lower_path, userid_t id);
+
+/* file private data */
+struct sdcardfs_file_info {
+	struct file *lower_file;
+	const struct vm_operations_struct *lower_vm_ops;
+};
+
+struct sdcardfs_inode_data {
+	struct kref refcount;
+	bool abandoned;
+
+	perm_t perm;
+	userid_t userid;
+	uid_t d_uid;
+	bool under_android;
+	bool under_cache;
+	bool under_obb;
+};
+
+/* sdcardfs inode data in memory */
+struct sdcardfs_inode_info {
+	struct inode *lower_inode;
+	/* state derived based on current position in hierarchy */
+	struct sdcardfs_inode_data *data;
+
+	/* top folder for ownership */
+	struct sdcardfs_inode_data *top_data;
+
+	struct inode vfs_inode;
+};
+
+
+/* sdcardfs dentry data in memory */
+struct sdcardfs_dentry_info {
+	spinlock_t lock;	/* protects lower_path */
+	struct path lower_path;
+	struct path orig_path;
+};
+
+struct sdcardfs_mount_options {
+	uid_t fs_low_uid;
+	gid_t fs_low_gid;
+	userid_t fs_user_id;
+	bool multiuser;
+	unsigned int reserved_mb;
+};
+
+struct sdcardfs_vfsmount_options {
+	gid_t gid;
+	mode_t mask;
+};
+
+extern int parse_options_remount(struct super_block *sb, char *options, int silent,
+		struct sdcardfs_vfsmount_options *vfsopts);
+
+/* sdcardfs super-block data in memory */
+struct sdcardfs_sb_info {
+	struct super_block *sb;
+	struct super_block *lower_sb;
+	/* derived perm policy : some of options have been added
+	 * to sdcardfs_mount_options (Android 4.4 support)
+	 */
+	struct sdcardfs_mount_options options;
+	spinlock_t lock;	/* protects obbpath */
+	char *obbpath_s;
+	struct path obbpath;
+	void *pkgl_id;
+	struct list_head list;
+};
+
+/*
+ * inode to private data
+ *
+ * Since we use containers and the struct inode is _inside_ the
+ * sdcardfs_inode_info structure, SDCARDFS_I will always (given a non-NULL
+ * inode pointer), return a valid non-NULL pointer.
+ */
+static inline struct sdcardfs_inode_info *SDCARDFS_I(const struct inode *inode)
+{
+	return container_of(inode, struct sdcardfs_inode_info, vfs_inode);
+}
+
+/* dentry to private data */
+#define SDCARDFS_D(dent) ((struct sdcardfs_dentry_info *)(dent)->d_fsdata)
+
+/* superblock to private data */
+#define SDCARDFS_SB(super) ((struct sdcardfs_sb_info *)(super)->s_fs_info)
+
+/* file to private Data */
+#define SDCARDFS_F(file) ((struct sdcardfs_file_info *)((file)->private_data))
+
+/* file to lower file */
+static inline struct file *sdcardfs_lower_file(const struct file *f)
+{
+	return SDCARDFS_F(f)->lower_file;
+}
+
+static inline void sdcardfs_set_lower_file(struct file *f, struct file *val)
+{
+	SDCARDFS_F(f)->lower_file = val;
+}
+
+/* inode to lower inode. */
+static inline struct inode *sdcardfs_lower_inode(const struct inode *i)
+{
+	return SDCARDFS_I(i)->lower_inode;
+}
+
+static inline void sdcardfs_set_lower_inode(struct inode *i, struct inode *val)
+{
+	SDCARDFS_I(i)->lower_inode = val;
+}
+
+/* superblock to lower superblock */
+static inline struct super_block *sdcardfs_lower_super(
+	const struct super_block *sb)
+{
+	return SDCARDFS_SB(sb)->lower_sb;
+}
+
+static inline void sdcardfs_set_lower_super(struct super_block *sb,
+					  struct super_block *val)
+{
+	SDCARDFS_SB(sb)->lower_sb = val;
+}
+
+/* path based (dentry/mnt) macros */
+static inline void pathcpy(struct path *dst, const struct path *src)
+{
+	dst->dentry = src->dentry;
+	dst->mnt = src->mnt;
+}
+
+/* sdcardfs_get_pname functions calls path_get()
+ * therefore, the caller must call "proper" path_put functions
+ */
+#define SDCARDFS_DENT_FUNC(pname) \
+static inline void sdcardfs_get_##pname(const struct dentry *dent, \
+					struct path *pname) \
+{ \
+	spin_lock(&SDCARDFS_D(dent)->lock); \
+	pathcpy(pname, &SDCARDFS_D(dent)->pname); \
+	path_get(pname); \
+	spin_unlock(&SDCARDFS_D(dent)->lock); \
+	return; \
+} \
+static inline void sdcardfs_put_##pname(const struct dentry *dent, \
+					struct path *pname) \
+{ \
+	path_put(pname); \
+	return; \
+} \
+static inline void sdcardfs_set_##pname(const struct dentry *dent, \
+					struct path *pname) \
+{ \
+	spin_lock(&SDCARDFS_D(dent)->lock); \
+	pathcpy(&SDCARDFS_D(dent)->pname, pname); \
+	spin_unlock(&SDCARDFS_D(dent)->lock); \
+	return; \
+} \
+static inline void sdcardfs_reset_##pname(const struct dentry *dent) \
+{ \
+	spin_lock(&SDCARDFS_D(dent)->lock); \
+	SDCARDFS_D(dent)->pname.dentry = NULL; \
+	SDCARDFS_D(dent)->pname.mnt = NULL; \
+	spin_unlock(&SDCARDFS_D(dent)->lock); \
+	return; \
+} \
+static inline void sdcardfs_put_reset_##pname(const struct dentry *dent) \
+{ \
+	struct path pname; \
+	spin_lock(&SDCARDFS_D(dent)->lock); \
+	if (SDCARDFS_D(dent)->pname.dentry) { \
+		pathcpy(&pname, &SDCARDFS_D(dent)->pname); \
+		SDCARDFS_D(dent)->pname.dentry = NULL; \
+		SDCARDFS_D(dent)->pname.mnt = NULL; \
+		spin_unlock(&SDCARDFS_D(dent)->lock); \
+		path_put(&pname); \
+	} else \
+		spin_unlock(&SDCARDFS_D(dent)->lock); \
+	return; \
+}
+
+SDCARDFS_DENT_FUNC(lower_path)
+SDCARDFS_DENT_FUNC(orig_path)
+
+static inline bool sbinfo_has_sdcard_magic(struct sdcardfs_sb_info *sbinfo)
+{
+	return sbinfo && sbinfo->sb
+			&& sbinfo->sb->s_magic == SDCARDFS_SUPER_MAGIC;
+}
+
+static inline struct sdcardfs_inode_data *data_get(
+		struct sdcardfs_inode_data *data)
+{
+	if (data)
+		kref_get(&data->refcount);
+	return data;
+}
+
+static inline struct sdcardfs_inode_data *top_data_get(
+		struct sdcardfs_inode_info *info)
+{
+	return data_get(info->top_data);
+}
+
+extern void data_release(struct kref *ref);
+
+static inline void data_put(struct sdcardfs_inode_data *data)
+{
+	kref_put(&data->refcount, data_release);
+}
+
+static inline void release_own_data(struct sdcardfs_inode_info *info)
+{
+	/*
+	 * This happens exactly once per inode. At this point, the inode that
+	 * originally held this data is about to be freed, and all references
+	 * to it are held as a top value, and will likely be released soon.
+	 */
+	info->data->abandoned = true;
+	data_put(info->data);
+}
+
+static inline void set_top(struct sdcardfs_inode_info *info,
+			struct sdcardfs_inode_data *top)
+{
+	struct sdcardfs_inode_data *old_top = info->top_data;
+
+	if (top)
+		data_get(top);
+	info->top_data = top;
+	if (old_top)
+		data_put(old_top);
+}
+
+static inline int get_gid(struct vfsmount *mnt,
+		struct sdcardfs_inode_data *data)
+{
+	struct sdcardfs_vfsmount_options *opts = mnt->data;
+
+	if (opts->gid == AID_SDCARD_RW)
+		/* As an optimization, certain trusted system components only run
+		 * as owner but operate across all users. Since we're now handing
+		 * out the sdcard_rw GID only to trusted apps, we're okay relaxing
+		 * the user boundary enforcement for the default view. The UIDs
+		 * assigned to app directories are still multiuser aware.
+		 */
+		return AID_SDCARD_RW;
+	else
+		return multiuser_get_uid(data->userid, opts->gid);
+}
+
+static inline int get_mode(struct vfsmount *mnt,
+		struct sdcardfs_inode_info *info,
+		struct sdcardfs_inode_data *data)
+{
+	int owner_mode;
+	int filtered_mode;
+	struct sdcardfs_vfsmount_options *opts = mnt->data;
+	int visible_mode = 0775 & ~opts->mask;
+
+
+	if (data->perm == PERM_PRE_ROOT) {
+		/* Top of multi-user view should always be visible to ensure
+		* secondary users can traverse inside.
+		*/
+		visible_mode = 0711;
+	} else if (data->under_android) {
+		/* Block "other" access to Android directories, since only apps
+		* belonging to a specific user should be in there; we still
+		* leave +x open for the default view.
+		*/
+		if (opts->gid == AID_SDCARD_RW)
+			visible_mode = visible_mode & ~0006;
+		else
+			visible_mode = visible_mode & ~0007;
+	}
+	owner_mode = info->lower_inode->i_mode & 0700;
+	filtered_mode = visible_mode & (owner_mode | (owner_mode >> 3) | (owner_mode >> 6));
+	return filtered_mode;
+}
+
+static inline int has_graft_path(const struct dentry *dent)
+{
+	int ret = 0;
+
+	spin_lock(&SDCARDFS_D(dent)->lock);
+	if (SDCARDFS_D(dent)->orig_path.dentry != NULL)
+		ret = 1;
+	spin_unlock(&SDCARDFS_D(dent)->lock);
+
+	return ret;
+}
+
+static inline void sdcardfs_get_real_lower(const struct dentry *dent,
+						struct path *real_lower)
+{
+	/* in case of a local obb dentry
+	 * the orig_path should be returned
+	 */
+	if (has_graft_path(dent))
+		sdcardfs_get_orig_path(dent, real_lower);
+	else
+		sdcardfs_get_lower_path(dent, real_lower);
+}
+
+static inline void sdcardfs_put_real_lower(const struct dentry *dent,
+						struct path *real_lower)
+{
+	if (has_graft_path(dent))
+		sdcardfs_put_orig_path(dent, real_lower);
+	else
+		sdcardfs_put_lower_path(dent, real_lower);
+}
+
+extern struct mutex sdcardfs_super_list_lock;
+extern struct list_head sdcardfs_super_list;
+
+/* for packagelist.c */
+extern appid_t get_appid(const char *app_name);
+extern appid_t get_ext_gid(const char *app_name);
+extern appid_t is_excluded(const char *app_name, userid_t userid);
+extern int check_caller_access_to_name(struct inode *parent_node, const struct qstr *name);
+extern int packagelist_init(void);
+extern void packagelist_exit(void);
+
+/* for derived_perm.c */
+#define BY_NAME		(1 << 0)
+#define BY_USERID	(1 << 1)
+struct limit_search {
+	unsigned int flags;
+	struct qstr name;
+	userid_t userid;
+};
+
+extern void setup_derived_state(struct inode *inode, perm_t perm,
+		userid_t userid, uid_t uid, bool under_android,
+		struct sdcardfs_inode_data *top);
+extern void get_derived_permission(struct dentry *parent, struct dentry *dentry);
+extern void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, const struct qstr *name);
+extern void fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit);
+
+extern void update_derived_permission_lock(struct dentry *dentry);
+void fixup_lower_ownership(struct dentry *dentry, const char *name);
+extern int need_graft_path(struct dentry *dentry);
+extern int is_base_obbpath(struct dentry *dentry);
+extern int is_obbpath_invalid(struct dentry *dentry);
+extern int setup_obb_dentry(struct dentry *dentry, struct path *lower_path);
+
+/* locking helpers */
+static inline struct dentry *lock_parent(struct dentry *dentry)
+{
+	struct dentry *dir = dget_parent(dentry);
+
+	inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
+	return dir;
+}
+
+static inline void unlock_dir(struct dentry *dir)
+{
+	inode_unlock(d_inode(dir));
+	dput(dir);
+}
+
+static inline int prepare_dir(const char *path_s, uid_t uid, gid_t gid, mode_t mode)
+{
+	int err;
+	struct dentry *dent;
+	struct iattr attrs;
+	struct path parent;
+
+	dent = kern_path_locked(path_s, &parent);
+	if (IS_ERR(dent)) {
+		err = PTR_ERR(dent);
+		if (err == -EEXIST)
+			err = 0;
+		goto out_unlock;
+	}
+
+	err = vfs_mkdir2(parent.mnt, d_inode(parent.dentry), dent, mode);
+	if (err) {
+		if (err == -EEXIST)
+			err = 0;
+		goto out_dput;
+	}
+
+	attrs.ia_uid = make_kuid(&init_user_ns, uid);
+	attrs.ia_gid = make_kgid(&init_user_ns, gid);
+	attrs.ia_valid = ATTR_UID | ATTR_GID;
+	inode_lock(d_inode(dent));
+	notify_change2(parent.mnt, dent, &attrs, NULL);
+	inode_unlock(d_inode(dent));
+
+out_dput:
+	dput(dent);
+
+out_unlock:
+	/* parent dentry locked by lookup_create */
+	inode_unlock(d_inode(parent.dentry));
+	path_put(&parent);
+	return err;
+}
+
+/*
+ * Return 1, if a disk has enough free space, otherwise 0.
+ * We assume that any files can not be overwritten.
+ */
+static inline int check_min_free_space(struct dentry *dentry, size_t size, int dir)
+{
+	int err;
+	struct path lower_path;
+	struct kstatfs statfs;
+	u64 avail;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+
+	if (sbi->options.reserved_mb) {
+		/* Get fs stat of lower filesystem. */
+		sdcardfs_get_lower_path(dentry, &lower_path);
+		err = vfs_statfs(&lower_path, &statfs);
+		sdcardfs_put_lower_path(dentry, &lower_path);
+
+		if (unlikely(err))
+			return 0;
+
+		/* Invalid statfs informations. */
+		if (unlikely(statfs.f_bsize == 0))
+			return 0;
+
+		/* if you are checking directory, set size to f_bsize. */
+		if (unlikely(dir))
+			size = statfs.f_bsize;
+
+		/* available size */
+		avail = statfs.f_bavail * statfs.f_bsize;
+
+		/* not enough space */
+		if ((u64)size > avail)
+			return 0;
+
+		/* enough space */
+		if ((avail - size) > (sbi->options.reserved_mb * 1024 * 1024))
+			return 1;
+
+		return 0;
+	} else
+		return 1;
+}
+
+/*
+ * Copies attrs and maintains sdcardfs managed attrs
+ * Since our permission check handles all special permissions, set those to be open
+ */
+static inline void sdcardfs_copy_and_fix_attrs(struct inode *dest, const struct inode *src)
+{
+	dest->i_mode = (src->i_mode  & S_IFMT) | S_IRWXU | S_IRWXG |
+			S_IROTH | S_IXOTH; /* 0775 */
+	dest->i_uid = make_kuid(&init_user_ns, SDCARDFS_I(dest)->data->d_uid);
+	dest->i_gid = make_kgid(&init_user_ns, AID_SDCARD_RW);
+	dest->i_rdev = src->i_rdev;
+	dest->i_atime = src->i_atime;
+	dest->i_mtime = src->i_mtime;
+	dest->i_ctime = src->i_ctime;
+	dest->i_blkbits = src->i_blkbits;
+	dest->i_flags = src->i_flags;
+	set_nlink(dest, src->i_nlink);
+}
+
+static inline bool str_case_eq(const char *s1, const char *s2)
+{
+	return !strcasecmp(s1, s2);
+}
+
+static inline bool str_n_case_eq(const char *s1, const char *s2, size_t len)
+{
+	return !strncasecmp(s1, s2, len);
+}
+
+static inline bool qstr_case_eq(const struct qstr *q1, const struct qstr *q2)
+{
+	return q1->len == q2->len && str_case_eq(q1->name, q2->name);
+}
+
+#define QSTR_LITERAL(string) QSTR_INIT(string, sizeof(string)-1)
+
+#endif	/* not _SDCARDFS_H_ */
diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c
new file mode 100644
index 0000000..7f4539b
--- /dev/null
+++ b/fs/sdcardfs/super.c
@@ -0,0 +1,324 @@
+/*
+ * fs/sdcardfs/super.c
+ *
+ * Copyright (c) 2013 Samsung Electronics Co. Ltd
+ *   Authors: Daeho Jeong, Woojoong Lee, Seunghwan Hyun,
+ *               Sunghwan Yun, Sungjong Seo
+ *
+ * This program has been developed as a stackable file system based on
+ * the WrapFS which written by
+ *
+ * Copyright (c) 1998-2011 Erez Zadok
+ * Copyright (c) 2009     Shrikar Archak
+ * Copyright (c) 2003-2011 Stony Brook University
+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#include "sdcardfs.h"
+
+/*
+ * The inode cache is used with alloc_inode for both our inode info and the
+ * vfs inode.
+ */
+static struct kmem_cache *sdcardfs_inode_cachep;
+
+/*
+ * To support the top references, we must track some data separately.
+ * An sdcardfs_inode_info always has a reference to its data, and once set up,
+ * also has a reference to its top. The top may be itself, in which case it
+ * holds two references to its data. When top is changed, it takes a ref to the
+ * new data and then drops the ref to the old data.
+ */
+static struct kmem_cache *sdcardfs_inode_data_cachep;
+
+void data_release(struct kref *ref)
+{
+	struct sdcardfs_inode_data *data =
+		container_of(ref, struct sdcardfs_inode_data, refcount);
+
+	kmem_cache_free(sdcardfs_inode_data_cachep, data);
+}
+
+/* final actions when unmounting a file system */
+static void sdcardfs_put_super(struct super_block *sb)
+{
+	struct sdcardfs_sb_info *spd;
+	struct super_block *s;
+
+	spd = SDCARDFS_SB(sb);
+	if (!spd)
+		return;
+
+	if (spd->obbpath_s) {
+		kfree(spd->obbpath_s);
+		path_put(&spd->obbpath);
+	}
+
+	/* decrement lower super references */
+	s = sdcardfs_lower_super(sb);
+	sdcardfs_set_lower_super(sb, NULL);
+	atomic_dec(&s->s_active);
+
+	kfree(spd);
+	sb->s_fs_info = NULL;
+}
+
+static int sdcardfs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	int err;
+	struct path lower_path;
+	u32 min_blocks;
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
+
+	sdcardfs_get_lower_path(dentry, &lower_path);
+	err = vfs_statfs(&lower_path, buf);
+	sdcardfs_put_lower_path(dentry, &lower_path);
+
+	if (sbi->options.reserved_mb) {
+		/* Invalid statfs informations. */
+		if (buf->f_bsize == 0) {
+			pr_err("Returned block size is zero.\n");
+			return -EINVAL;
+		}
+
+		min_blocks = ((sbi->options.reserved_mb * 1024 * 1024)/buf->f_bsize);
+		buf->f_blocks -= min_blocks;
+
+		if (buf->f_bavail > min_blocks)
+			buf->f_bavail -= min_blocks;
+		else
+			buf->f_bavail = 0;
+
+		/* Make reserved blocks invisiable to media storage */
+		buf->f_bfree = buf->f_bavail;
+	}
+
+	/* set return buf to our f/s to avoid confusing user-level utils */
+	buf->f_type = SDCARDFS_SUPER_MAGIC;
+
+	return err;
+}
+
+/*
+ * @flags: numeric mount options
+ * @options: mount options string
+ */
+static int sdcardfs_remount_fs(struct super_block *sb, int *flags, char *options)
+{
+	int err = 0;
+
+	/*
+	 * The VFS will take care of "ro" and "rw" flags among others.  We
+	 * can safely accept a few flags (RDONLY, MANDLOCK), and honor
+	 * SILENT, but anything else left over is an error.
+	 */
+	if ((*flags & ~(MS_RDONLY | MS_MANDLOCK | MS_SILENT)) != 0) {
+		pr_err("sdcardfs: remount flags 0x%x unsupported\n", *flags);
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+/*
+ * @mnt: mount point we are remounting
+ * @sb: superblock we are remounting
+ * @flags: numeric mount options
+ * @options: mount options string
+ */
+static int sdcardfs_remount_fs2(struct vfsmount *mnt, struct super_block *sb,
+						int *flags, char *options)
+{
+	int err = 0;
+
+	/*
+	 * The VFS will take care of "ro" and "rw" flags among others.  We
+	 * can safely accept a few flags (RDONLY, MANDLOCK), and honor
+	 * SILENT, but anything else left over is an error.
+	 */
+	if ((*flags & ~(MS_RDONLY | MS_MANDLOCK | MS_SILENT | MS_REMOUNT)) != 0) {
+		pr_err("sdcardfs: remount flags 0x%x unsupported\n", *flags);
+		err = -EINVAL;
+	}
+	pr_info("Remount options were %s for vfsmnt %p.\n", options, mnt);
+	err = parse_options_remount(sb, options, *flags & ~MS_SILENT, mnt->data);
+
+
+	return err;
+}
+
+static void *sdcardfs_clone_mnt_data(void *data)
+{
+	struct sdcardfs_vfsmount_options *opt = kmalloc(sizeof(struct sdcardfs_vfsmount_options), GFP_KERNEL);
+	struct sdcardfs_vfsmount_options *old = data;
+
+	if (!opt)
+		return NULL;
+	opt->gid = old->gid;
+	opt->mask = old->mask;
+	return opt;
+}
+
+static void sdcardfs_copy_mnt_data(void *data, void *newdata)
+{
+	struct sdcardfs_vfsmount_options *old = data;
+	struct sdcardfs_vfsmount_options *new = newdata;
+
+	old->gid = new->gid;
+	old->mask = new->mask;
+}
+
+/*
+ * Called by iput() when the inode reference count reached zero
+ * and the inode is not hashed anywhere.  Used to clear anything
+ * that needs to be, before the inode is completely destroyed and put
+ * on the inode free list.
+ */
+static void sdcardfs_evict_inode(struct inode *inode)
+{
+	struct inode *lower_inode;
+
+	truncate_inode_pages(&inode->i_data, 0);
+	set_top(SDCARDFS_I(inode), NULL);
+	clear_inode(inode);
+	/*
+	 * Decrement a reference to a lower_inode, which was incremented
+	 * by our read_inode when it was created initially.
+	 */
+	lower_inode = sdcardfs_lower_inode(inode);
+	sdcardfs_set_lower_inode(inode, NULL);
+	iput(lower_inode);
+}
+
+static struct inode *sdcardfs_alloc_inode(struct super_block *sb)
+{
+	struct sdcardfs_inode_info *i;
+	struct sdcardfs_inode_data *d;
+
+	i = kmem_cache_alloc(sdcardfs_inode_cachep, GFP_KERNEL);
+	if (!i)
+		return NULL;
+
+	/* memset everything up to the inode to 0 */
+	memset(i, 0, offsetof(struct sdcardfs_inode_info, vfs_inode));
+
+	d = kmem_cache_alloc(sdcardfs_inode_data_cachep,
+					GFP_KERNEL | __GFP_ZERO);
+	if (!d) {
+		kmem_cache_free(sdcardfs_inode_cachep, i);
+		return NULL;
+	}
+
+	i->data = d;
+	kref_init(&d->refcount);
+
+	i->vfs_inode.i_version = 1;
+	return &i->vfs_inode;
+}
+
+static void i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+
+	release_own_data(SDCARDFS_I(inode));
+	kmem_cache_free(sdcardfs_inode_cachep, SDCARDFS_I(inode));
+}
+
+static void sdcardfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, i_callback);
+}
+
+/* sdcardfs inode cache constructor */
+static void init_once(void *obj)
+{
+	struct sdcardfs_inode_info *i = obj;
+
+	inode_init_once(&i->vfs_inode);
+}
+
+int sdcardfs_init_inode_cache(void)
+{
+	sdcardfs_inode_cachep =
+		kmem_cache_create("sdcardfs_inode_cache",
+				  sizeof(struct sdcardfs_inode_info), 0,
+				  SLAB_RECLAIM_ACCOUNT, init_once);
+
+	if (!sdcardfs_inode_cachep)
+		return -ENOMEM;
+
+	sdcardfs_inode_data_cachep =
+		kmem_cache_create("sdcardfs_inode_data_cache",
+				  sizeof(struct sdcardfs_inode_data), 0,
+				  SLAB_RECLAIM_ACCOUNT, NULL);
+	if (!sdcardfs_inode_data_cachep) {
+		kmem_cache_destroy(sdcardfs_inode_cachep);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/* sdcardfs inode cache destructor */
+void sdcardfs_destroy_inode_cache(void)
+{
+	kmem_cache_destroy(sdcardfs_inode_data_cachep);
+	kmem_cache_destroy(sdcardfs_inode_cachep);
+}
+
+/*
+ * Used only in nfs, to kill any pending RPC tasks, so that subsequent
+ * code can actually succeed and won't leave tasks that need handling.
+ */
+static void sdcardfs_umount_begin(struct super_block *sb)
+{
+	struct super_block *lower_sb;
+
+	lower_sb = sdcardfs_lower_super(sb);
+	if (lower_sb && lower_sb->s_op && lower_sb->s_op->umount_begin)
+		lower_sb->s_op->umount_begin(lower_sb);
+}
+
+static int sdcardfs_show_options(struct vfsmount *mnt, struct seq_file *m,
+			struct dentry *root)
+{
+	struct sdcardfs_sb_info *sbi = SDCARDFS_SB(root->d_sb);
+	struct sdcardfs_mount_options *opts = &sbi->options;
+	struct sdcardfs_vfsmount_options *vfsopts = mnt->data;
+
+	if (opts->fs_low_uid != 0)
+		seq_printf(m, ",fsuid=%u", opts->fs_low_uid);
+	if (opts->fs_low_gid != 0)
+		seq_printf(m, ",fsgid=%u", opts->fs_low_gid);
+	if (vfsopts->gid != 0)
+		seq_printf(m, ",gid=%u", vfsopts->gid);
+	if (opts->multiuser)
+		seq_puts(m, ",multiuser");
+	if (vfsopts->mask)
+		seq_printf(m, ",mask=%u", vfsopts->mask);
+	if (opts->fs_user_id)
+		seq_printf(m, ",userid=%u", opts->fs_user_id);
+	if (opts->reserved_mb != 0)
+		seq_printf(m, ",reserved=%uMB", opts->reserved_mb);
+
+	return 0;
+};
+
+const struct super_operations sdcardfs_sops = {
+	.put_super	= sdcardfs_put_super,
+	.statfs		= sdcardfs_statfs,
+	.remount_fs	= sdcardfs_remount_fs,
+	.remount_fs2	= sdcardfs_remount_fs2,
+	.clone_mnt_data	= sdcardfs_clone_mnt_data,
+	.copy_mnt_data	= sdcardfs_copy_mnt_data,
+	.evict_inode	= sdcardfs_evict_inode,
+	.umount_begin	= sdcardfs_umount_begin,
+	.show_options2	= sdcardfs_show_options,
+	.alloc_inode	= sdcardfs_alloc_inode,
+	.destroy_inode	= sdcardfs_destroy_inode,
+	.drop_inode	= generic_delete_inode,
+};
diff --git a/fs/super.c b/fs/super.c
index abe2541..de136ed 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -771,7 +771,8 @@
 }
 
 /**
- *	do_remount_sb - asks filesystem to change mount options.
+ *	do_remount_sb2 - asks filesystem to change mount options.
+ *	@mnt:   mount we are looking at
  *	@sb:	superblock in question
  *	@flags:	numeric part of options
  *	@data:	the rest of options
@@ -779,7 +780,7 @@
  *
  *	Alters the mount options of a mounted file system.
  */
-int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
+int do_remount_sb2(struct vfsmount *mnt, struct super_block *sb, int flags, void *data, int force)
 {
 	int retval;
 	int remount_ro;
@@ -821,7 +822,16 @@
 		}
 	}
 
-	if (sb->s_op->remount_fs) {
+	if (mnt && sb->s_op->remount_fs2) {
+		retval = sb->s_op->remount_fs2(mnt, sb, &flags, data);
+		if (retval) {
+			if (!force)
+				goto cancel_readonly;
+			/* If forced remount, go ahead despite any errors */
+			WARN(1, "forced remount of a %s fs returned %i\n",
+			     sb->s_type->name, retval);
+		}
+	} else if (sb->s_op->remount_fs) {
 		retval = sb->s_op->remount_fs(sb, &flags, data);
 		if (retval) {
 			if (!force)
@@ -853,12 +863,17 @@
 	return retval;
 }
 
+int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
+{
+	return do_remount_sb2(NULL, sb, flags, data, force);
+}
+
 static void do_emergency_remount(struct work_struct *work)
 {
 	struct super_block *sb, *p = NULL;
 
 	spin_lock(&sb_lock);
-	list_for_each_entry(sb, &super_blocks, s_list) {
+	list_for_each_entry_reverse(sb, &super_blocks, s_list) {
 		if (hlist_unhashed(&sb->s_instances))
 			continue;
 		sb->s_count++;
@@ -1178,7 +1193,7 @@
 EXPORT_SYMBOL(mount_single);
 
 struct dentry *
-mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
+mount_fs(struct file_system_type *type, int flags, const char *name, struct vfsmount *mnt, void *data)
 {
 	struct dentry *root;
 	struct super_block *sb;
@@ -1195,7 +1210,10 @@
 			goto out_free_secdata;
 	}
 
-	root = type->mount(type, flags, name, data);
+	if (type->mount2)
+		root = type->mount2(mnt, type, flags, name, data);
+	else
+		root = type->mount(type, flags, name, data);
 	if (IS_ERR(root)) {
 		error = PTR_ERR(root);
 		goto out_free_secdata;
diff --git a/fs/sync.c b/fs/sync.c
index 2a54c1f..5c2420c 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -218,6 +218,7 @@
 	if (f.file) {
 		ret = vfs_fsync(f.file, datasync);
 		fdput(f);
+		inc_syscfs(current);
 	}
 	return ret;
 }
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 784d667..9d9c032 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -493,7 +493,8 @@
 				 new_flags, vma->anon_vma,
 				 vma->vm_file, vma->vm_pgoff,
 				 vma_policy(vma),
-				 NULL_VM_UFFD_CTX);
+				 NULL_VM_UFFD_CTX,
+				 vma_get_anon_name(vma));
 		if (prev)
 			vma = prev;
 		else
@@ -872,7 +873,8 @@
 		prev = vma_merge(mm, prev, start, vma_end, new_flags,
 				 vma->anon_vma, vma->vm_file, vma->vm_pgoff,
 				 vma_policy(vma),
-				 ((struct vm_userfaultfd_ctx){ ctx }));
+				 ((struct vm_userfaultfd_ctx){ ctx }),
+				 vma_get_anon_name(vma));
 		if (prev) {
 			vma = prev;
 			goto next;
@@ -1009,7 +1011,8 @@
 		prev = vma_merge(mm, prev, start, vma_end, new_flags,
 				 vma->anon_vma, vma->vm_file, vma->vm_pgoff,
 				 vma_policy(vma),
-				 NULL_VM_UFFD_CTX);
+				 NULL_VM_UFFD_CTX,
+				 vma_get_anon_name(vma));
 		if (prev) {
 			vma = prev;
 			goto next;
diff --git a/fs/utimes.c b/fs/utimes.c
index 22307cd..87ce37b 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -91,7 +91,7 @@
 	}
 retry_deleg:
 	inode_lock(inode);
-	error = notify_change(path->dentry, &newattrs, &delegated_inode);
+	error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode);
 	inode_unlock(inode);
 	if (delegated_inode) {
 		error = break_deleg_wait(&delegated_inode);
diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h
index 9701f2d..a5696c1 100644
--- a/include/drm/drm_atomic.h
+++ b/include/drm/drm_atomic.h
@@ -144,6 +144,7 @@
 	struct drm_crtc *ptr;
 	struct drm_crtc_state *state;
 	struct drm_crtc_commit *commit;
+	s32 __user *out_fence_ptr;
 };
 
 struct __drm_connnectors_state {
@@ -316,6 +317,8 @@
 			      struct drm_crtc *crtc);
 void drm_atomic_set_fb_for_plane(struct drm_plane_state *plane_state,
 				 struct drm_framebuffer *fb);
+void drm_atomic_set_fence_for_plane(struct drm_plane_state *plane_state,
+				    struct fence *fence);
 int __must_check
 drm_atomic_set_crtc_for_connector(struct drm_connector_state *conn_state,
 				  struct drm_crtc *crtc);
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 0aa2925..f3d58c7 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -680,6 +680,35 @@
 	 * context.
 	 */
 	struct drm_modeset_acquire_ctx *acquire_ctx;
+
+	/**
+	 * @fence_context:
+	 *
+	 * timeline context used for fence operations.
+	 */
+	unsigned int fence_context;
+
+	/**
+	 * @fence_lock:
+	 *
+	 * spinlock to protect the fences in the fence_context.
+	 */
+
+	spinlock_t fence_lock;
+	/**
+	 * @fence_seqno:
+	 *
+	 * Seqno variable used as monotonic counter for the fences
+	 * created on the CRTC's timeline.
+	 */
+	unsigned long fence_seqno;
+
+	/**
+	 * @timeline_name:
+	 *
+	 * The name of the CRTC's fence timeline.
+	 */
+	char timeline_name[32];
 };
 
 /**
@@ -1160,6 +1189,17 @@
 	 */
 	struct drm_property *prop_fb_id;
 	/**
+	 * @prop_in_fence_fd: Sync File fd representing the incoming fences
+	 * for a Plane.
+	 */
+	struct drm_property *prop_in_fence_fd;
+	/**
+	 * @prop_out_fence_ptr: Sync File fd pointer representing the
+	 * outgoing fences for a CRTC. Userspace should provide a pointer to a
+	 * value of type s32, and then cast that pointer to u64.
+	 */
+	struct drm_property *prop_out_fence_ptr;
+	/**
 	 * @prop_crtc_id: Default atomic plane property to specify the
 	 * &drm_crtc.
 	 */
diff --git a/include/drm/drm_fb_cma_helper.h b/include/drm/drm_fb_cma_helper.h
index f313211f..3b00f64 100644
--- a/include/drm/drm_fb_cma_helper.h
+++ b/include/drm/drm_fb_cma_helper.h
@@ -12,6 +12,8 @@
 struct drm_device;
 struct drm_file;
 struct drm_mode_fb_cmd2;
+struct drm_plane;
+struct drm_plane_state;
 
 struct drm_fbdev_cma *drm_fbdev_cma_init_with_funcs(struct drm_device *dev,
 	unsigned int preferred_bpp, unsigned int num_crtc,
@@ -41,6 +43,9 @@
 struct drm_gem_cma_object *drm_fb_cma_get_gem_obj(struct drm_framebuffer *fb,
 	unsigned int plane);
 
+int drm_fb_cma_prepare_fb(struct drm_plane *plane,
+			  struct drm_plane_state *state);
+
 #ifdef CONFIG_DEBUG_FS
 struct seq_file;
 
diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h
index 8b4dc62..952ef84 100644
--- a/include/drm/drm_plane.h
+++ b/include/drm/drm_plane.h
@@ -65,7 +65,7 @@
 
 	struct drm_crtc *crtc;   /* do not write directly, use drm_atomic_set_crtc_for_plane() */
 	struct drm_framebuffer *fb;  /* do not write directly, use drm_atomic_set_fb_for_plane() */
-	struct fence *fence;
+	struct fence *fence; /* do not write directly, use drm_atomic_set_fence_for_plane() */
 
 	/* Signed dest location allows it to be partially off screen */
 	int32_t crtc_x, crtc_y;
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
new file mode 100644
index 0000000..a460889
--- /dev/null
+++ b/include/linux/Kbuild
@@ -0,0 +1,2 @@
+header-y += if_pppolac.h
+header-y += if_pppopns.h
diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h
index 8c98113..eff56cb 100644
--- a/include/linux/amba/mmci.h
+++ b/include/linux/amba/mmci.h
@@ -5,6 +5,15 @@
 #define AMBA_MMCI_H
 
 #include <linux/mmc/host.h>
+#include <linux/mmc/card.h>
+#include <linux/mmc/sdio_func.h>
+
+struct embedded_sdio_data {
+        struct sdio_cis cis;
+        struct sdio_cccr cccr;
+        struct sdio_embedded_func *funcs;
+        int num_funcs;
+};
 
 /**
  * struct mmci_platform_data - platform configuration for the MMCI
@@ -31,6 +40,7 @@
 	int	gpio_wp;
 	int	gpio_cd;
 	bool	cd_invert;
+	struct embedded_sdio_data *embedded_sdio;
 };
 
 #endif
diff --git a/include/linux/android_aid.h b/include/linux/android_aid.h
new file mode 100644
index 0000000..6f1fa179
--- /dev/null
+++ b/include/linux/android_aid.h
@@ -0,0 +1,28 @@
+/* include/linux/android_aid.h
+ *
+ * Copyright (C) 2008 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_ANDROID_AID_H
+#define _LINUX_ANDROID_AID_H
+
+/* AIDs that the kernel treats differently */
+#define AID_OBSOLETE_000 KGIDT_INIT(3001)  /* was NET_BT_ADMIN */
+#define AID_OBSOLETE_001 KGIDT_INIT(3002)  /* was NET_BT */
+#define AID_INET         KGIDT_INIT(3003)
+#define AID_NET_RAW      KGIDT_INIT(3004)
+#define AID_NET_ADMIN    KGIDT_INIT(3005)
+#define AID_NET_BW_STATS KGIDT_INIT(3006)  /* read bandwidth statistics */
+#define AID_NET_BW_ACCT  KGIDT_INIT(3007)  /* change bandwidth statistics accounting */
+
+#endif
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index bd738aa..238261e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -196,6 +196,9 @@
 
 	/* for bidi */
 	struct request *next_rq;
+
+	ktime_t			lat_hist_io_start;
+	int			lat_hist_enabled;
 };
 
 #define REQ_OP_SHIFT (8 * sizeof(u64) - REQ_OP_BITS)
@@ -1700,6 +1703,79 @@
 extern long bdev_direct_access(struct block_device *, struct blk_dax_ctl *);
 extern int bdev_dax_supported(struct super_block *, int);
 extern bool bdev_dax_capable(struct block_device *);
+
+/*
+ * X-axis for IO latency histogram support.
+ */
+static const u_int64_t latency_x_axis_us[] = {
+	100,
+	200,
+	300,
+	400,
+	500,
+	600,
+	700,
+	800,
+	900,
+	1000,
+	1200,
+	1400,
+	1600,
+	1800,
+	2000,
+	2500,
+	3000,
+	4000,
+	5000,
+	6000,
+	7000,
+	9000,
+	10000
+};
+
+#define BLK_IO_LAT_HIST_DISABLE         0
+#define BLK_IO_LAT_HIST_ENABLE          1
+#define BLK_IO_LAT_HIST_ZERO            2
+
+struct io_latency_state {
+	u_int64_t	latency_y_axis_read[ARRAY_SIZE(latency_x_axis_us) + 1];
+	u_int64_t	latency_reads_elems;
+	u_int64_t	latency_y_axis_write[ARRAY_SIZE(latency_x_axis_us) + 1];
+	u_int64_t	latency_writes_elems;
+};
+
+static inline void
+blk_update_latency_hist(struct io_latency_state *s,
+			int read,
+			u_int64_t delta_us)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(latency_x_axis_us); i++) {
+		if (delta_us < (u_int64_t)latency_x_axis_us[i]) {
+			if (read)
+				s->latency_y_axis_read[i]++;
+			else
+				s->latency_y_axis_write[i]++;
+			break;
+		}
+	}
+	if (i == ARRAY_SIZE(latency_x_axis_us)) {
+		/* Overflowed the histogram */
+		if (read)
+			s->latency_y_axis_read[i]++;
+		else
+			s->latency_y_axis_write[i]++;
+	}
+	if (read)
+		s->latency_reads_elems++;
+	else
+		s->latency_writes_elems++;
+}
+
+void blk_zero_latency_hist(struct io_latency_state *s);
+ssize_t blk_latency_hist_show(struct io_latency_state *s, char *buf);
+
 #else /* CONFIG_BLOCK */
 
 struct block_device;
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
new file mode 100644
index 0000000..ace92fc
--- /dev/null
+++ b/include/linux/bpf-cgroup.h
@@ -0,0 +1,77 @@
+#ifndef _BPF_CGROUP_H
+#define _BPF_CGROUP_H
+
+#include <linux/jump_label.h>
+#include <uapi/linux/bpf.h>
+
+struct sock;
+struct cgroup;
+struct sk_buff;
+
+#ifdef CONFIG_CGROUP_BPF
+
+extern struct static_key_false cgroup_bpf_enabled_key;
+#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
+
+struct cgroup_bpf {
+	/*
+	 * Store two sets of bpf_prog pointers, one for programs that are
+	 * pinned directly to this cgroup, and one for those that are effective
+	 * when this cgroup is accessed.
+	 */
+	struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE];
+	struct bpf_prog *effective[MAX_BPF_ATTACH_TYPE];
+	bool disallow_override[MAX_BPF_ATTACH_TYPE];
+};
+
+void cgroup_bpf_put(struct cgroup *cgrp);
+void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent);
+
+int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
+			struct bpf_prog *prog, enum bpf_attach_type type,
+			bool overridable);
+
+/* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */
+int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog,
+		      enum bpf_attach_type type, bool overridable);
+
+int __cgroup_bpf_run_filter(struct sock *sk,
+			    struct sk_buff *skb,
+			    enum bpf_attach_type type);
+
+/* Wrappers for __cgroup_bpf_run_filter() guarded by cgroup_bpf_enabled. */
+#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb)			\
+({									\
+	int __ret = 0;							\
+	if (cgroup_bpf_enabled)						\
+		__ret = __cgroup_bpf_run_filter(sk, skb,		\
+						BPF_CGROUP_INET_INGRESS); \
+									\
+	__ret;								\
+})
+
+#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb)				\
+({									\
+	int __ret = 0;							\
+	if (cgroup_bpf_enabled && sk && sk == skb->sk) {		\
+		typeof(sk) __sk = sk_to_full_sk(sk);			\
+		if (sk_fullsock(__sk))					\
+			__ret = __cgroup_bpf_run_filter(__sk, skb,	\
+						BPF_CGROUP_INET_EGRESS); \
+	}								\
+	__ret;								\
+})
+
+#else
+
+struct cgroup_bpf {};
+static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
+static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
+				      struct cgroup *parent) {}
+
+#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
+
+#endif /* CONFIG_CGROUP_BPF */
+
+#endif /* _BPF_CGROUP_H */
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 1619a32..9c72b21 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -16,6 +16,7 @@
 #include <linux/percpu-refcount.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/workqueue.h>
+#include <linux/bpf-cgroup.h>
 
 #ifdef CONFIG_CGROUPS
 
@@ -301,6 +302,9 @@
 	/* used to schedule release agent */
 	struct work_struct release_agent_work;
 
+	/* used to store eBPF programs */
+	struct cgroup_bpf bpf;
+
 	/* ids of the ancestors at each level including self */
 	int ancestor_ids[];
 };
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 0df0336a..7f4a2a5 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -20,6 +20,10 @@
 SUBSYS(cpuacct)
 #endif
 
+#if IS_ENABLED(CONFIG_CGROUP_SCHEDTUNE)
+SUBSYS(schedtune)
+#endif
+
 #if IS_ENABLED(CONFIG_BLK_CGROUP)
 SUBSYS(io)
 #endif
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index ae5ac89..fdf5be4 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -276,4 +276,11 @@
 static inline void cpu_smt_check_topology(void) { }
 #endif
 
+#define IDLE_START 1
+#define IDLE_END 2
+
+void idle_notifier_register(struct notifier_block *n);
+void idle_notifier_unregister(struct notifier_block *n);
+void idle_notifier_call_chain(unsigned long val);
+
 #endif /* _LINUX_CPU_H_ */
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 9d9e0b5..96c93b6 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -177,6 +177,7 @@
 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu);
 int cpufreq_update_policy(unsigned int cpu);
 bool have_governor_per_policy(void);
+bool cpufreq_driver_is_slow(void);
 struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy);
 void cpufreq_enable_fast_switch(struct cpufreq_policy *policy);
 void cpufreq_disable_fast_switch(struct cpufreq_policy *policy);
@@ -351,6 +352,14 @@
  */
 #define CPUFREQ_NEED_INITIAL_FREQ_CHECK	(1 << 5)
 
+/*
+ * Indicates that it is safe to call cpufreq_driver_target from
+ * non-interruptable context in scheduler hot paths.  Drivers must
+ * opt-in to this flag, as the safe default is that they might sleep
+ * or be too slow for hot path use.
+ */
+#define CPUFREQ_DRIVER_FAST		(1 << 6)
+
 int cpufreq_register_driver(struct cpufreq_driver *driver_data);
 int cpufreq_unregister_driver(struct cpufreq_driver *driver_data);
 
@@ -545,6 +554,32 @@
 	ssize_t (*store)(struct gov_attr_set *attr_set, const char *buf,
 			 size_t count);
 };
+/* CPUFREQ DEFAULT GOVERNOR */
+/*
+ * Performance governor is fallback governor if any other gov failed to auto
+ * load due latency restrictions
+ */
+#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
+extern struct cpufreq_governor cpufreq_gov_performance;
+#endif
+#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE
+#define CPUFREQ_DEFAULT_GOVERNOR	(&cpufreq_gov_performance)
+#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE)
+extern struct cpufreq_governor cpufreq_gov_powersave;
+#define CPUFREQ_DEFAULT_GOVERNOR	(&cpufreq_gov_powersave)
+#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE)
+extern struct cpufreq_governor cpufreq_gov_userspace;
+#define CPUFREQ_DEFAULT_GOVERNOR	(&cpufreq_gov_userspace)
+#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND)
+extern struct cpufreq_governor cpufreq_gov_ondemand;
+#define CPUFREQ_DEFAULT_GOVERNOR	(&cpufreq_gov_ondemand)
+#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE)
+extern struct cpufreq_governor cpufreq_gov_conservative;
+#define CPUFREQ_DEFAULT_GOVERNOR	(&cpufreq_gov_conservative)
+#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED)
+extern struct cpufreq_governor cpufreq_gov_sched;
+#define CPUFREQ_DEFAULT_GOVERNOR	(&cpufreq_gov_sched)
+#endif
 
 /*********************************************************************
  *                     FREQUENCY TABLE HELPERS                       *
@@ -878,4 +913,8 @@
 int cpufreq_generic_init(struct cpufreq_policy *policy,
 		struct cpufreq_frequency_table *table,
 		unsigned int transition_latency);
+
+struct sched_domain;
+unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu);
+unsigned long cpufreq_scale_max_freq_capacity(int cpu);
 #endif /* _LINUX_CPUFREQ_H */
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index bb31373..9a8eec9 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -207,7 +207,7 @@
 #endif
 
 /* kernel/sched/idle.c */
-extern void sched_idle_set_state(struct cpuidle_state *idle_state);
+extern void sched_idle_set_state(struct cpuidle_state *idle_state, int index);
 extern void default_idle_call(void);
 
 #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index b757ee4..014d7f9 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -142,6 +142,7 @@
 	int (*d_manage)(struct dentry *, bool);
 	struct dentry *(*d_real)(struct dentry *, const struct inode *,
 				 unsigned int);
+	void (*d_canonical_path)(const struct path *, struct path *);
 } ____cacheline_aligned;
 
 /*
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 9661bb2..9d30c77 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -408,6 +408,12 @@
 void *dm_get_mdptr(struct mapped_device *md);
 
 /*
+ * Export the device via the ioctl interface (uses mdptr).
+ */
+int dm_ioctl_export(struct mapped_device *md, const char *name,
+		    const char *uuid);
+
+/*
  * A device can still be used while suspended, but I/O is deferred.
  */
 int dm_suspend(struct mapped_device *md, unsigned suspend_flags);
@@ -434,6 +440,13 @@
 
 struct queue_limits *dm_get_queue_limits(struct mapped_device *md);
 
+void dm_lock_md_type(struct mapped_device *md);
+void dm_unlock_md_type(struct mapped_device *md);
+void dm_set_md_type(struct mapped_device *md, unsigned type);
+unsigned dm_get_md_type(struct mapped_device *md);
+int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
+unsigned dm_table_get_type(struct dm_table *t);
+
 /*
  * Geometry functions.
  */
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 80b1b8f..2877ccb 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1427,7 +1427,7 @@
 				  unsigned long *load_addr,
 				  unsigned long *load_size);
 
-efi_status_t efi_parse_options(char *cmdline);
+efi_status_t efi_parse_options(char const *cmdline);
 
 efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg,
 			   struct screen_info *si, efi_guid_t *proto,
diff --git a/include/linux/fence.h b/include/linux/fence.h
index 9bb2c0c..7c9b78c 100644
--- a/include/linux/fence.h
+++ b/include/linux/fence.h
@@ -108,6 +108,7 @@
  * @get_driver_name: returns the driver name.
  * @get_timeline_name: return the name of the context this fence belongs to.
  * @enable_signaling: enable software signaling of fence.
+ * @disable_signaling: disable software signaling of fence (optional).
  * @signaled: [optional] peek whether the fence is signaled, can be null.
  * @wait: custom wait implementation, or fence_default_wait.
  * @release: [optional] called on destruction of fence, can be null
@@ -167,6 +168,7 @@
 	const char * (*get_driver_name)(struct fence *fence);
 	const char * (*get_timeline_name)(struct fence *fence);
 	bool (*enable_signaling)(struct fence *fence);
+	void (*disable_signaling)(struct fence *fence);
 	bool (*signaled)(struct fence *fence);
 	signed long (*wait)(struct fence *fence, bool intr, signed long timeout);
 	void (*release)(struct fence *fence);
@@ -183,6 +185,16 @@
 void fence_free(struct fence *fence);
 
 /**
+ * fence_put - decreases refcount of the fence
+ * @fence:	[in]	fence to reduce refcount of
+ */
+static inline void fence_put(struct fence *fence)
+{
+	if (fence)
+		kref_put(&fence->refcount, fence_release);
+}
+
+/**
  * fence_get - increases refcount of the fence
  * @fence:	[in]	fence to increase refcount of
  *
@@ -210,13 +222,49 @@
 }
 
 /**
- * fence_put - decreases refcount of the fence
- * @fence:	[in]	fence to reduce refcount of
+ * fence_get_rcu_safe  - acquire a reference to an RCU tracked fence
+ * @fence:	[in]	pointer to fence to increase refcount of
+ *
+ * Function returns NULL if no refcount could be obtained, or the fence.
+ * This function handles acquiring a reference to a fence that may be
+ * reallocated within the RCU grace period (such as with SLAB_DESTROY_BY_RCU),
+ * so long as the caller is using RCU on the pointer to the fence.
+ *
+ * An alternative mechanism is to employ a seqlock to protect a bunch of
+ * fences, such as used by struct reservation_object. When using a seqlock,
+ * the seqlock must be taken before and checked after a reference to the
+ * fence is acquired (as shown here).
+ *
+ * The caller is required to hold the RCU read lock.
  */
-static inline void fence_put(struct fence *fence)
+static inline struct fence *fence_get_rcu_safe(struct fence * __rcu *fencep)
 {
-	if (fence)
-		kref_put(&fence->refcount, fence_release);
+	do {
+		struct fence *fence;
+
+		fence = rcu_dereference(*fencep);
+		if (!fence || !fence_get_rcu(fence))
+			return NULL;
+
+		/* The atomic_inc_not_zero() inside fence_get_rcu()
+		 * provides a full memory barrier upon success (such as now).
+		 * This is paired with the write barrier from assigning
+		 * to the __rcu protected fence pointer so that if that
+		 * pointer still matches the current fence, we know we
+		 * have successfully acquire a reference to it. If it no
+		 * longer matches, we are holding a reference to some other
+		 * reallocated pointer. This is possible if the allocator
+		 * is using a freelist like SLAB_DESTROY_BY_RCU where the
+		 * fence remains valid for the RCU grace period, but it
+		 * may be reallocated. When using such allocators, we are
+		 * responsible for ensuring the reference we get is to
+		 * the right fence, as below.
+		 */
+		if (fence == rcu_access_pointer(*fencep))
+			return rcu_pointer_handoff(fence);
+
+		fence_put(fence);
+	} while (1);
 }
 
 int fence_signal(struct fence *fence);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index bcad2b9..ba61781 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1612,13 +1612,21 @@
  * VFS helper functions..
  */
 extern int vfs_create(struct inode *, struct dentry *, umode_t, bool);
+extern int vfs_create2(struct vfsmount *, struct inode *, struct dentry *, umode_t, bool);
 extern int vfs_mkdir(struct inode *, struct dentry *, umode_t);
+extern int vfs_mkdir2(struct vfsmount *, struct inode *, struct dentry *, umode_t);
 extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
+extern int vfs_mknod2(struct vfsmount *, struct inode *, struct dentry *, umode_t, dev_t);
 extern int vfs_symlink(struct inode *, struct dentry *, const char *);
+extern int vfs_symlink2(struct vfsmount *, struct inode *, struct dentry *, const char *);
 extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct inode **);
+extern int vfs_link2(struct vfsmount *, struct dentry *, struct inode *, struct dentry *, struct inode **);
 extern int vfs_rmdir(struct inode *, struct dentry *);
+extern int vfs_rmdir2(struct vfsmount *, struct inode *, struct dentry *);
 extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
+extern int vfs_unlink2(struct vfsmount *, struct inode *, struct dentry *, struct inode **);
 extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
+extern int vfs_rename2(struct vfsmount *, struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
 extern int vfs_whiteout(struct inode *, struct dentry *);
 
 /*
@@ -1746,6 +1754,7 @@
 	struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
 	const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *);
 	int (*permission) (struct inode *, int);
+	int (*permission2) (struct vfsmount *, struct inode *, int);
 	struct posix_acl * (*get_acl)(struct inode *, int);
 
 	int (*readlink) (struct dentry *, char __user *,int);
@@ -1760,6 +1769,7 @@
 	int (*rename) (struct inode *, struct dentry *,
 			struct inode *, struct dentry *, unsigned int);
 	int (*setattr) (struct dentry *, struct iattr *);
+	int (*setattr2) (struct vfsmount *, struct dentry *, struct iattr *);
 	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
@@ -1808,9 +1818,13 @@
 	int (*unfreeze_fs) (struct super_block *);
 	int (*statfs) (struct dentry *, struct kstatfs *);
 	int (*remount_fs) (struct super_block *, int *, char *);
+	int (*remount_fs2) (struct vfsmount *, struct super_block *, int *, char *);
+	void *(*clone_mnt_data) (void *);
+	void (*copy_mnt_data) (void *, void *);
 	void (*umount_begin) (struct super_block *);
 
 	int (*show_options)(struct seq_file *, struct dentry *);
+	int (*show_options2)(struct vfsmount *,struct seq_file *, struct dentry *);
 	int (*show_devname)(struct seq_file *, struct dentry *);
 	int (*show_path)(struct seq_file *, struct dentry *);
 	int (*show_stats)(struct seq_file *, struct dentry *);
@@ -2044,6 +2058,9 @@
 #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move() during rename() internally. */
 	struct dentry *(*mount) (struct file_system_type *, int,
 		       const char *, void *);
+	struct dentry *(*mount2) (struct vfsmount *, struct file_system_type *, int,
+			       const char *, void *);
+	void *(*alloc_mnt_data) (void);
 	void (*kill_sb) (struct super_block *);
 	struct module *owner;
 	struct file_system_type * next;
@@ -2342,6 +2359,8 @@
 extern long vfs_truncate(const struct path *, loff_t);
 extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
 		       struct file *filp);
+extern int do_truncate2(struct vfsmount *, struct dentry *, loff_t start,
+			unsigned int time_attrs, struct file *filp);
 extern int vfs_fallocate(struct file *file, int mode, loff_t offset,
 			loff_t len);
 extern long do_sys_open(int dfd, const char __user *filename, int flags,
@@ -2584,8 +2603,11 @@
 extern sector_t bmap(struct inode *, sector_t);
 #endif
 extern int notify_change(struct dentry *, struct iattr *, struct inode **);
+extern int notify_change2(struct vfsmount *, struct dentry *, struct iattr *, struct inode **);
 extern int inode_permission(struct inode *, int);
+extern int inode_permission2(struct vfsmount *, struct inode *, int);
 extern int __inode_permission(struct inode *, int);
+extern int __inode_permission2(struct vfsmount *, struct inode *, int);
 extern int generic_permission(struct inode *, int);
 extern int __check_sticky(struct inode *dir, struct inode *inode);
 
diff --git a/include/linux/gpio_event.h b/include/linux/gpio_event.h
new file mode 100644
index 0000000..2613fc5
--- /dev/null
+++ b/include/linux/gpio_event.h
@@ -0,0 +1,170 @@
+/* include/linux/gpio_event.h
+ *
+ * Copyright (C) 2007 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_GPIO_EVENT_H
+#define _LINUX_GPIO_EVENT_H
+
+#include <linux/input.h>
+
+struct gpio_event_input_devs {
+	int count;
+	struct input_dev *dev[];
+};
+enum {
+	GPIO_EVENT_FUNC_UNINIT  = 0x0,
+	GPIO_EVENT_FUNC_INIT    = 0x1,
+	GPIO_EVENT_FUNC_SUSPEND = 0x2,
+	GPIO_EVENT_FUNC_RESUME  = 0x3,
+};
+struct gpio_event_info {
+	int (*func)(struct gpio_event_input_devs *input_devs,
+		    struct gpio_event_info *info,
+		    void **data, int func);
+	int (*event)(struct gpio_event_input_devs *input_devs,
+		     struct gpio_event_info *info,
+		     void **data, unsigned int dev, unsigned int type,
+		     unsigned int code, int value); /* out events */
+	bool no_suspend;
+};
+
+struct gpio_event_platform_data {
+	const char *name;
+	struct gpio_event_info **info;
+	size_t info_count;
+	int (*power)(const struct gpio_event_platform_data *pdata, bool on);
+	const char *names[]; /* If name is NULL, names contain a NULL */
+			     /* terminated list of input devices to create */
+};
+
+#define GPIO_EVENT_DEV_NAME "gpio-event"
+
+/* Key matrix */
+
+enum gpio_event_matrix_flags {
+	/* unset: drive active output low, set: drive active output high */
+	GPIOKPF_ACTIVE_HIGH              = 1U << 0,
+	GPIOKPF_DEBOUNCE                 = 1U << 1,
+	GPIOKPF_REMOVE_SOME_PHANTOM_KEYS = 1U << 2,
+	GPIOKPF_REMOVE_PHANTOM_KEYS      = GPIOKPF_REMOVE_SOME_PHANTOM_KEYS |
+					   GPIOKPF_DEBOUNCE,
+	GPIOKPF_DRIVE_INACTIVE           = 1U << 3,
+	GPIOKPF_LEVEL_TRIGGERED_IRQ      = 1U << 4,
+	GPIOKPF_PRINT_UNMAPPED_KEYS      = 1U << 16,
+	GPIOKPF_PRINT_MAPPED_KEYS        = 1U << 17,
+	GPIOKPF_PRINT_PHANTOM_KEYS       = 1U << 18,
+};
+
+#define MATRIX_CODE_BITS (10)
+#define MATRIX_KEY_MASK ((1U << MATRIX_CODE_BITS) - 1)
+#define MATRIX_KEY(dev, code) \
+	(((dev) << MATRIX_CODE_BITS) | (code & MATRIX_KEY_MASK))
+
+extern int gpio_event_matrix_func(struct gpio_event_input_devs *input_devs,
+			struct gpio_event_info *info, void **data, int func);
+struct gpio_event_matrix_info {
+	/* initialize to gpio_event_matrix_func */
+	struct gpio_event_info info;
+	/* size must be ninputs * noutputs */
+	const unsigned short *keymap;
+	unsigned int *input_gpios;
+	unsigned int *output_gpios;
+	unsigned int ninputs;
+	unsigned int noutputs;
+	/* time to wait before reading inputs after driving each output */
+	ktime_t settle_time;
+	/* time to wait before scanning the keypad a second time */
+	ktime_t debounce_delay;
+	ktime_t poll_time;
+	unsigned flags;
+};
+
+/* Directly connected inputs and outputs */
+
+enum gpio_event_direct_flags {
+	GPIOEDF_ACTIVE_HIGH         = 1U << 0,
+/*	GPIOEDF_USE_DOWN_IRQ        = 1U << 1, */
+/*	GPIOEDF_USE_IRQ             = (1U << 2) | GPIOIDF_USE_DOWN_IRQ, */
+	GPIOEDF_PRINT_KEYS          = 1U << 8,
+	GPIOEDF_PRINT_KEY_DEBOUNCE  = 1U << 9,
+	GPIOEDF_PRINT_KEY_UNSTABLE  = 1U << 10,
+};
+
+struct gpio_event_direct_entry {
+	uint32_t gpio:16;
+	uint32_t code:10;
+	uint32_t dev:6;
+};
+
+/* inputs */
+extern int gpio_event_input_func(struct gpio_event_input_devs *input_devs,
+			struct gpio_event_info *info, void **data, int func);
+struct gpio_event_input_info {
+	/* initialize to gpio_event_input_func */
+	struct gpio_event_info info;
+	ktime_t debounce_time;
+	ktime_t poll_time;
+	uint16_t flags;
+	uint16_t type;
+	const struct gpio_event_direct_entry *keymap;
+	size_t keymap_size;
+};
+
+/* outputs */
+extern int gpio_event_output_func(struct gpio_event_input_devs *input_devs,
+			struct gpio_event_info *info, void **data, int func);
+extern int gpio_event_output_event(struct gpio_event_input_devs *input_devs,
+			struct gpio_event_info *info, void **data,
+			unsigned int dev, unsigned int type,
+			unsigned int code, int value);
+struct gpio_event_output_info {
+	/* initialize to gpio_event_output_func and gpio_event_output_event */
+	struct gpio_event_info info;
+	uint16_t flags;
+	uint16_t type;
+	const struct gpio_event_direct_entry *keymap;
+	size_t keymap_size;
+};
+
+
+/* axes */
+
+enum gpio_event_axis_flags {
+	GPIOEAF_PRINT_UNKNOWN_DIRECTION  = 1U << 16,
+	GPIOEAF_PRINT_RAW                = 1U << 17,
+	GPIOEAF_PRINT_EVENT              = 1U << 18,
+};
+
+extern int gpio_event_axis_func(struct gpio_event_input_devs *input_devs,
+			struct gpio_event_info *info, void **data, int func);
+struct gpio_event_axis_info {
+	/* initialize to gpio_event_axis_func */
+	struct gpio_event_info info;
+	uint8_t  count; /* number of gpios for this axis */
+	uint8_t  dev; /* device index when using multiple input devices */
+	uint8_t  type; /* EV_REL or EV_ABS */
+	uint16_t code;
+	uint16_t decoded_size;
+	uint16_t (*map)(struct gpio_event_axis_info *info, uint16_t in);
+	uint32_t *gpio;
+	uint32_t flags;
+};
+#define gpio_axis_2bit_gray_map gpio_axis_4bit_gray_map
+#define gpio_axis_3bit_gray_map gpio_axis_4bit_gray_map
+uint16_t gpio_axis_4bit_gray_map(
+			struct gpio_event_axis_info *info, uint16_t in);
+uint16_t gpio_axis_5bit_singletrack_map(
+			struct gpio_event_axis_info *info, uint16_t in);
+
+#endif
diff --git a/include/linux/if_pppolac.h b/include/linux/if_pppolac.h
new file mode 100644
index 0000000..e40aa10
--- /dev/null
+++ b/include/linux/if_pppolac.h
@@ -0,0 +1,23 @@
+/* include/linux/if_pppolac.h
+ *
+ * Header for PPP on L2TP Access Concentrator / PPPoLAC Socket (RFC 2661)
+ *
+ * Copyright (C) 2009 Google, Inc.
+ * Author: Chia-chi Yeh <chiachi@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __LINUX_IF_PPPOLAC_H
+#define __LINUX_IF_PPPOLAC_H
+
+#include <uapi/linux/if_pppolac.h>
+
+#endif /* __LINUX_IF_PPPOLAC_H */
diff --git a/include/linux/if_pppopns.h b/include/linux/if_pppopns.h
new file mode 100644
index 0000000..4ac621a9
--- /dev/null
+++ b/include/linux/if_pppopns.h
@@ -0,0 +1,23 @@
+/* include/linux/if_pppopns.h
+ *
+ * Header for PPP on PPTP Network Server / PPPoPNS Socket (RFC 2637)
+ *
+ * Copyright (C) 2009 Google, Inc.
+ * Author: Chia-chi Yeh <chiachi@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __LINUX_IF_PPPOPNS_H
+#define __LINUX_IF_PPPOPNS_H
+
+#include <uapi/linux/if_pppopns.h>
+
+#endif /* __LINUX_IF_PPPOPNS_H */
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index ba7a9b0..325727a 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -43,6 +43,25 @@
 	u32 seq_sent, seq_recv;
 	int ppp_flags;
 };
+
+struct pppolac_opt {
+	__u32		local;
+	__u32		remote;
+	__u32		recv_sequence;
+	__u32		xmit_sequence;
+	atomic_t	sequencing;
+	int		(*backlog_rcv)(struct sock *sk_udp, struct sk_buff *skb);
+};
+
+struct pppopns_opt {
+	__u16		local;
+	__u16		remote;
+	__u32		recv_sequence;
+	__u32		xmit_sequence;
+	void		(*data_ready)(struct sock *sk_raw);
+	int		(*backlog_rcv)(struct sock *sk_raw, struct sk_buff *skb);
+};
+
 #include <net/sock.h>
 
 struct pppox_sock {
@@ -53,6 +72,8 @@
 	union {
 		struct pppoe_opt pppoe;
 		struct pptp_opt  pptp;
+		struct pppolac_opt lac;
+		struct pppopns_opt pns;
 	} proto;
 	__be16			num;
 };
diff --git a/include/linux/initramfs.h b/include/linux/initramfs.h
new file mode 100644
index 0000000..fc7da63
--- /dev/null
+++ b/include/linux/initramfs.h
@@ -0,0 +1,32 @@
+/*
+ * include/linux/initramfs.h
+ *
+ * Copyright (C) 2015, Google
+ * Rom Lemarchand <romlem@android.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _LINUX_INITRAMFS_H
+#define _LINUX_INITRAMFS_H
+
+#include <linux/kconfig.h>
+
+#if IS_BUILTIN(CONFIG_BLK_DEV_INITRD)
+
+int __init default_rootfs(void);
+
+#endif
+
+#endif /* _LINUX_INITRAMFS_H */
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index b9dfca55..11ff751 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -37,9 +37,11 @@
 	__s32		accept_ra_rtr_pref;
 	__s32		rtr_probe_interval;
 #ifdef CONFIG_IPV6_ROUTE_INFO
+	__s32		accept_ra_rt_info_min_plen;
 	__s32		accept_ra_rt_info_max_plen;
 #endif
 #endif
+	__s32		accept_ra_rt_table;
 	__s32		proxy_ndp;
 	__s32		accept_source_route;
 	__s32		accept_ra_from_local;
diff --git a/include/linux/keychord.h b/include/linux/keychord.h
new file mode 100644
index 0000000..08cf540
--- /dev/null
+++ b/include/linux/keychord.h
@@ -0,0 +1,23 @@
+/*
+ *  Key chord input driver
+ *
+ * Copyright (C) 2008 Google, Inc.
+ * Author: Mike Lockwood <lockwood@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+*/
+
+#ifndef __LINUX_KEYCHORD_H_
+#define __LINUX_KEYCHORD_H_
+
+#include <uapi/linux/keychord.h>
+
+#endif	/* __LINUX_KEYCHORD_H_ */
diff --git a/include/linux/keycombo.h b/include/linux/keycombo.h
new file mode 100644
index 0000000..c6db262
--- /dev/null
+++ b/include/linux/keycombo.h
@@ -0,0 +1,36 @@
+/*
+ * include/linux/keycombo.h - platform data structure for keycombo driver
+ *
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_KEYCOMBO_H
+#define _LINUX_KEYCOMBO_H
+
+#define KEYCOMBO_NAME "keycombo"
+
+/*
+ * if key_down_fn and key_up_fn are both present, you are guaranteed that
+ * key_down_fn will return before key_up_fn is called, and that key_up_fn
+ * is called iff key_down_fn is called.
+ */
+struct keycombo_platform_data {
+	void (*key_down_fn)(void *);
+	void (*key_up_fn)(void *);
+	void *priv;
+	int key_down_delay; /* Time in ms */
+	int *keys_up;
+	int keys_down[]; /* 0 terminated */
+};
+
+#endif /* _LINUX_KEYCOMBO_H */
diff --git a/include/linux/keyreset.h b/include/linux/keyreset.h
new file mode 100644
index 0000000..2e34afa
--- /dev/null
+++ b/include/linux/keyreset.h
@@ -0,0 +1,29 @@
+/*
+ * include/linux/keyreset.h - platform data structure for resetkeys driver
+ *
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_KEYRESET_H
+#define _LINUX_KEYRESET_H
+
+#define KEYRESET_NAME "keyreset"
+
+struct keyreset_platform_data {
+	int (*reset_fn)(void);
+	int key_down_delay;
+	int *keys_up;
+	int keys_down[]; /* 0 terminated */
+};
+
+#endif /* _LINUX_KEYRESET_H */
diff --git a/include/linux/memory-state-time.h b/include/linux/memory-state-time.h
new file mode 100644
index 0000000..d2212b0
--- /dev/null
+++ b/include/linux/memory-state-time.h
@@ -0,0 +1,42 @@
+/* include/linux/memory-state-time.h
+ *
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/workqueue.h>
+
+#define UPDATE_MEMORY_STATE(BLOCK, VALUE) BLOCK->update_call(BLOCK, VALUE)
+
+struct memory_state_update_block;
+
+typedef void (*memory_state_update_fn_t)(struct memory_state_update_block *ub,
+		int value);
+
+/* This struct is populated when you pass it to a memory_state_register*
+ * function. The update_call function is used for an update and defined in the
+ * typedef memory_state_update_fn_t
+ */
+struct memory_state_update_block {
+	memory_state_update_fn_t update_call;
+	int id;
+};
+
+/* Register a frequency struct memory_state_update_block to provide updates to
+ * memory_state_time about frequency changes using its update_call function.
+ */
+struct memory_state_update_block *memory_state_register_frequency_source(void);
+
+/* Register a bandwidth struct memory_state_update_block to provide updates to
+ * memory_state_time about bandwidth changes using its update_call function.
+ */
+struct memory_state_update_block *memory_state_register_bandwidth_source(void);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 11a5a46..5013b3a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1143,6 +1143,7 @@
 extern void show_free_areas(unsigned int flags);
 extern bool skip_free_areas_node(unsigned int flags, int nid);
 
+void shmem_set_file(struct vm_area_struct *vma, struct file *file);
 int shmem_zero_setup(struct vm_area_struct *);
 #ifdef CONFIG_SHMEM
 bool shmem_mapping(struct address_space *mapping);
@@ -1966,7 +1967,7 @@
 extern struct vm_area_struct *vma_merge(struct mm_struct *,
 	struct vm_area_struct *prev, unsigned long addr, unsigned long end,
 	unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
-	struct mempolicy *, struct vm_userfaultfd_ctx);
+	struct mempolicy *, struct vm_userfaultfd_ctx, const char __user *);
 extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
 extern int split_vma(struct mm_struct *,
 	struct vm_area_struct *, unsigned long addr, int new_below);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 8d6decd..35daed7 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -326,11 +326,18 @@
 	/*
 	 * For areas with an address space and backing store,
 	 * linkage into the address_space->i_mmap interval tree.
+	 *
+	 * For private anonymous mappings, a pointer to a null terminated string
+	 * in the user process containing the name given to the vma, or NULL
+	 * if unnamed.
 	 */
-	struct {
-		struct rb_node rb;
-		unsigned long rb_subtree_last;
-	} shared;
+	union {
+		struct {
+			struct rb_node rb;
+			unsigned long rb_subtree_last;
+		} shared;
+		const char __user *anon_name;
+	};
 
 	/*
 	 * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
@@ -620,4 +627,13 @@
 	unsigned long val;
 } swp_entry_t;
 
+/* Return the name for an anonymous mapping or NULL for a file-backed mapping */
+static inline const char __user *vma_get_anon_name(struct vm_area_struct *vma)
+{
+	if (vma->vm_file)
+		return NULL;
+
+	return vma->anon_name;
+}
+
 #endif /* _LINUX_MM_TYPES_H */
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 73fad83..510a73a 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -119,6 +119,9 @@
 	u8			raw_pwr_cl_ddr_200_360;	/* 253 */
 	u8			raw_bkops_status;	/* 246 */
 	u8			raw_sectors[4];		/* 212 - 4 bytes */
+	u8			pre_eol_info;		/* 267 */
+	u8			device_life_time_est_typ_a;	/* 268 */
+	u8			device_life_time_est_typ_b;	/* 269 */
 
 	unsigned int            feature_support;
 #define MMC_DISCARD_FEATURE	BIT(0)                  /* CMD38 feature */
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 2b953eb..46a4b79 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -142,6 +142,10 @@
 
 	/* Allow other commands during this ongoing data transfer or busy wait */
 	bool			cap_cmd_during_tfr;
+	ktime_t			io_start;
+#ifdef CONFIG_BLOCK
+	int			lat_hist_enabled;
+#endif
 };
 
 struct mmc_card;
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 0b24394..fac3b5c 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -16,6 +16,7 @@
 #include <linux/sched.h>
 #include <linux/device.h>
 #include <linux/fault-inject.h>
+#include <linux/blkdev.h>
 
 #include <linux/mmc/core.h>
 #include <linux/mmc/card.h>
@@ -397,6 +398,20 @@
 	int			dsr_req;	/* DSR value is valid */
 	u32			dsr;	/* optional driver stage (DSR) value */
 
+#ifdef CONFIG_MMC_EMBEDDED_SDIO
+	struct {
+		struct sdio_cis			*cis;
+		struct sdio_cccr		*cccr;
+		struct sdio_embedded_func	*funcs;
+		int				num_funcs;
+	} embedded_sdio_data;
+#endif
+
+#ifdef CONFIG_BLOCK
+	int			latency_hist_enabled;
+	struct io_latency_state io_lat_s;
+#endif
+
 	unsigned long		private[0] ____cacheline_aligned;
 };
 
@@ -406,6 +421,14 @@
 void mmc_free_host(struct mmc_host *);
 int mmc_of_parse(struct mmc_host *host);
 
+#ifdef CONFIG_MMC_EMBEDDED_SDIO
+extern void mmc_set_embedded_sdio_data(struct mmc_host *host,
+				       struct sdio_cis *cis,
+				       struct sdio_cccr *cccr,
+				       struct sdio_embedded_func *funcs,
+				       int num_funcs);
+#endif
+
 static inline void *mmc_priv(struct mmc_host *host)
 {
 	return (void *)host->private;
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index c376209..a034d07 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -331,6 +331,9 @@
 #define EXT_CSD_CACHE_SIZE		249	/* RO, 4 bytes */
 #define EXT_CSD_PWR_CL_DDR_200_360	253	/* RO */
 #define EXT_CSD_FIRMWARE_VERSION	254	/* RO, 8 bytes */
+#define EXT_CSD_PRE_EOL_INFO		267	/* RO */
+#define EXT_CSD_DEVICE_LIFE_TIME_EST_TYP_A	268	/* RO */
+#define EXT_CSD_DEVICE_LIFE_TIME_EST_TYP_B	269	/* RO */
 #define EXT_CSD_SUPPORTED_MODE		493	/* RO */
 #define EXT_CSD_TAG_UNIT_SIZE		498	/* RO */
 #define EXT_CSD_DATA_TAG_SUPPORT	499	/* RO */
diff --git a/include/linux/mmc/pm.h b/include/linux/mmc/pm.h
index 4a13920..6e2d6a1 100644
--- a/include/linux/mmc/pm.h
+++ b/include/linux/mmc/pm.h
@@ -26,5 +26,6 @@
 
 #define MMC_PM_KEEP_POWER	(1 << 0)	/* preserve card power during suspend */
 #define MMC_PM_WAKE_SDIO_IRQ	(1 << 1)	/* wake up host system on SDIO IRQ assertion */
+#define MMC_PM_IGNORE_PM_NOTIFY	(1 << 2)	/* ignore mmc pm notify */
 
 #endif /* LINUX_MMC_PM_H */
diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h
index 97ca105..f563bcf 100644
--- a/include/linux/mmc/sdio_func.h
+++ b/include/linux/mmc/sdio_func.h
@@ -23,6 +23,14 @@
 typedef void (sdio_irq_handler_t)(struct sdio_func *);
 
 /*
+ * Structure used to hold embedded SDIO device data from platform layer
+ */
+struct sdio_embedded_func {
+	uint8_t f_class;
+	uint32_t f_maxblksize;
+};
+
+/*
  * SDIO function CIS tuple (unknown to the core)
  */
 struct sdio_func_tuple {
@@ -128,6 +136,8 @@
 extern unsigned int sdio_align_size(struct sdio_func *func, unsigned int sz);
 
 extern u8 sdio_readb(struct sdio_func *func, unsigned int addr, int *err_ret);
+extern u8 sdio_readb_ext(struct sdio_func *func, unsigned int addr, int *err_ret,
+	unsigned in);
 extern u16 sdio_readw(struct sdio_func *func, unsigned int addr, int *err_ret);
 extern u32 sdio_readl(struct sdio_func *func, unsigned int addr, int *err_ret);
 
diff --git a/include/linux/mount.h b/include/linux/mount.h
index e0f3a82..5615a9e 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -67,6 +67,7 @@
 	struct dentry *mnt_root;	/* root of the mounted tree */
 	struct super_block *mnt_sb;	/* pointer to superblock */
 	int mnt_flags;
+	void *data;
 };
 
 struct file; /* forward dec */
diff --git a/include/linux/namei.h b/include/linux/namei.h
index f29abda3..cf437f5 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -78,8 +78,11 @@
 extern void done_path_create(struct path *, struct dentry *);
 extern struct dentry *kern_path_locked(const char *, struct path *);
 extern int kern_path_mountpoint(int, const char *, struct path *, unsigned int);
+extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
+		const char *, unsigned int, struct path *);
 
 extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
+extern struct dentry *lookup_one_len2(const char *, struct vfsmount *mnt, struct dentry *, int);
 extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int);
 
 extern int follow_down_one(struct path *);
diff --git a/include/linux/netfilter/xt_qtaguid.h b/include/linux/netfilter/xt_qtaguid.h
new file mode 100644
index 0000000..1c67155
--- /dev/null
+++ b/include/linux/netfilter/xt_qtaguid.h
@@ -0,0 +1,14 @@
+#ifndef _XT_QTAGUID_MATCH_H
+#define _XT_QTAGUID_MATCH_H
+
+/* For now we just replace the xt_owner.
+ * FIXME: make iptables aware of qtaguid. */
+#include <linux/netfilter/xt_owner.h>
+
+#define XT_QTAGUID_UID    XT_OWNER_UID
+#define XT_QTAGUID_GID    XT_OWNER_GID
+#define XT_QTAGUID_SOCKET XT_OWNER_SOCKET
+#define xt_qtaguid_match_info xt_owner_match_info
+
+int qtaguid_untag(struct socket *sock, bool kernel);
+#endif /* _XT_QTAGUID_MATCH_H */
diff --git a/include/linux/netfilter/xt_quota2.h b/include/linux/netfilter/xt_quota2.h
new file mode 100644
index 0000000..eadc69033
--- /dev/null
+++ b/include/linux/netfilter/xt_quota2.h
@@ -0,0 +1,25 @@
+#ifndef _XT_QUOTA_H
+#define _XT_QUOTA_H
+
+enum xt_quota_flags {
+	XT_QUOTA_INVERT    = 1 << 0,
+	XT_QUOTA_GROW      = 1 << 1,
+	XT_QUOTA_PACKET    = 1 << 2,
+	XT_QUOTA_NO_CHANGE = 1 << 3,
+	XT_QUOTA_MASK      = 0x0F,
+};
+
+struct xt_quota_counter;
+
+struct xt_quota_mtinfo2 {
+	char name[15];
+	u_int8_t flags;
+
+	/* Comparison-invariant */
+	aligned_u64 quota;
+
+	/* Used internally by the kernel */
+	struct xt_quota_counter *master __attribute__((aligned(8)));
+};
+
+#endif /* _XT_QUOTA_H */
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 4341f32..501d461 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -63,6 +63,27 @@
 extern unsigned long of_get_flat_dt_root(void);
 extern int of_get_flat_dt_size(void);
 
+/*
+ * early_init_dt_scan_chosen - scan the device tree for ramdisk and bootargs
+ *
+ * The boot arguments will be placed into the memory pointed to by @data.
+ * That memory should be COMMAND_LINE_SIZE big and initialized to be a valid
+ * (possibly empty) string.  Logic for what will be in @data after this
+ * function finishes:
+ *
+ * - CONFIG_CMDLINE_FORCE=true
+ *     CONFIG_CMDLINE
+ * - CONFIG_CMDLINE_EXTEND=true, @data is non-empty string
+ *     @data + dt bootargs (even if dt bootargs are empty)
+ * - CONFIG_CMDLINE_EXTEND=true, @data is empty string
+ *     CONFIG_CMDLINE + dt bootargs (even if dt bootargs are empty)
+ * - CMDLINE_FROM_BOOTLOADER=true, dt bootargs=non-empty:
+ *     dt bootargs
+ * - CMDLINE_FROM_BOOTLOADER=true, dt bootargs=empty, @data is non-empty string
+ *     @data is left unchanged
+ * - CMDLINE_FROM_BOOTLOADER=true, dt bootargs=empty, @data is empty string
+ *     CONFIG_CMDLINE (or "" if that's not defined)
+ */
 extern int early_init_dt_scan_chosen(unsigned long node, const char *uname,
 				     int depth, void *data);
 extern int early_init_dt_scan_memory(unsigned long node, const char *uname,
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ae8ecf8..de7c4d2 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1173,6 +1173,11 @@
 int perf_event_max_stack_handler(struct ctl_table *table, int write,
 				 void __user *buffer, size_t *lenp, loff_t *ppos);
 
+static inline bool perf_paranoid_any(void)
+{
+	return sysctl_perf_event_paranoid > 2;
+}
+
 static inline bool perf_paranoid_tracepoint_raw(void)
 {
 	return sysctl_perf_event_paranoid > -1;
diff --git a/include/linux/platform_data/ds2482.h b/include/linux/platform_data/ds2482.h
new file mode 100644
index 0000000..5a6879e2a
--- /dev/null
+++ b/include/linux/platform_data/ds2482.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2012 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __PLATFORM_DATA_DS2482__
+#define __PLATFORM_DATA_DS2482__
+
+struct ds2482_platform_data {
+	int		slpz_gpio;
+};
+
+#endif /* __PLATFORM_DATA_DS2482__ */
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index ad97baf..29fc01a 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -18,6 +18,7 @@
 #include <linux/leds.h>
 #include <linux/spinlock.h>
 #include <linux/notifier.h>
+#include <linux/types.h>
 
 /*
  * All voltages, currents, charges, energies, time and temperatures in uV,
@@ -148,6 +149,12 @@
 	POWER_SUPPLY_PROP_SCOPE,
 	POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT,
 	POWER_SUPPLY_PROP_CALIBRATE,
+	/* Local extensions */
+	POWER_SUPPLY_PROP_USB_HC,
+	POWER_SUPPLY_PROP_USB_OTG,
+	POWER_SUPPLY_PROP_CHARGE_ENABLED,
+	/* Local extensions of type int64_t */
+	POWER_SUPPLY_PROP_CHARGE_COUNTER_EXT,
 	/* Properties of type `const char *' */
 	POWER_SUPPLY_PROP_MODEL_NAME,
 	POWER_SUPPLY_PROP_MANUFACTURER,
@@ -175,6 +182,7 @@
 union power_supply_propval {
 	int intval;
 	const char *strval;
+	int64_t int64val;
 };
 
 struct device_node;
diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
index 4058bf9..cb5edd6 100644
--- a/include/linux/pstore_ram.h
+++ b/include/linux/pstore_ram.h
@@ -80,6 +80,8 @@
 ssize_t persistent_ram_ecc_string(struct persistent_ram_zone *prz,
 	char *str, size_t len);
 
+void ramoops_console_write_buf(const char *buf, size_t size);
+
 /*
  * Ramoops platform data
  * @mem_size	memory size for ramoops
diff --git a/include/linux/reservation.h b/include/linux/reservation.h
index b0f305e..bad7710 100644
--- a/include/linux/reservation.h
+++ b/include/linux/reservation.h
@@ -177,17 +177,14 @@
 reservation_object_get_excl_rcu(struct reservation_object *obj)
 {
 	struct fence *fence;
-	unsigned seq;
-retry:
-	seq = read_seqcount_begin(&obj->seq);
+
+	if (!rcu_access_pointer(obj->fence_excl))
+		return NULL;
+
 	rcu_read_lock();
-	fence = rcu_dereference(obj->fence_excl);
-	if (read_seqcount_retry(&obj->seq, seq)) {
-		rcu_read_unlock();
-		goto retry;
-	}
-	fence = fence_get(fence);
+	fence = fence_get_rcu_safe(&obj->fence_excl);
 	rcu_read_unlock();
+
 	return fence;
 }
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ebd0afb..3aa25ee 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -173,6 +173,9 @@
 extern unsigned long nr_iowait(void);
 extern unsigned long nr_iowait_cpu(int cpu);
 extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
+#ifdef CONFIG_CPU_QUIET
+extern u64 nr_running_integral(unsigned int cpu);
+#endif
 
 extern void calc_global_load(unsigned long ticks);
 
@@ -315,6 +318,15 @@
 /* Task command name length */
 #define TASK_COMM_LEN 16
 
+enum task_event {
+	PUT_PREV_TASK   = 0,
+	PICK_NEXT_TASK  = 1,
+	TASK_WAKE       = 2,
+	TASK_MIGRATE    = 3,
+	TASK_UPDATE     = 4,
+	IRQ_UPDATE	= 5,
+};
+
 #include <linux/spinlock.h>
 
 /*
@@ -983,6 +995,14 @@
 #define SCHED_CAPACITY_SHIFT	SCHED_FIXEDPOINT_SHIFT
 #define SCHED_CAPACITY_SCALE	(1L << SCHED_CAPACITY_SHIFT)
 
+struct sched_capacity_reqs {
+	unsigned long cfs;
+	unsigned long rt;
+	unsigned long dl;
+
+	unsigned long total;
+};
+
 /*
  * Wake-queues are lists of tasks with a pending wakeup, whose
  * callers have already marked the task as woken internally,
@@ -1046,6 +1066,7 @@
 #define SD_PREFER_SIBLING	0x1000	/* Prefer to place tasks in a sibling domain */
 #define SD_OVERLAP		0x2000	/* sched_domains of this level overlap */
 #define SD_NUMA			0x4000	/* cross-node balancing */
+#define SD_SHARE_CAP_STATES	0x8000  /* Domain members share capacity state */
 
 #ifdef CONFIG_SCHED_SMT
 static inline int cpu_smt_flags(void)
@@ -1078,8 +1099,57 @@
 
 extern int sched_domain_level_max;
 
+struct capacity_state {
+	unsigned long cap;	/* compute capacity */
+	unsigned long power;	/* power consumption at this compute capacity */
+};
+
+struct idle_state {
+	unsigned long power;	 /* power consumption in this idle state */
+};
+
+struct sched_group_energy {
+	unsigned int nr_idle_states;	/* number of idle states */
+	struct idle_state *idle_states;	/* ptr to idle state array */
+	unsigned int nr_cap_states;	/* number of capacity states */
+	struct capacity_state *cap_states; /* ptr to capacity state array */
+};
+
+unsigned long capacity_curr_of(int cpu);
+
 struct sched_group;
 
+struct eas_stats {
+	/* select_idle_sibling() stats */
+	u64 sis_attempts;
+	u64 sis_idle;
+	u64 sis_cache_affine;
+	u64 sis_suff_cap;
+	u64 sis_idle_cpu;
+	u64 sis_count;
+
+	/* select_energy_cpu_brute() stats */
+	u64 secb_attempts;
+	u64 secb_sync;
+	u64 secb_idle_bt;
+	u64 secb_insuff_cap;
+	u64 secb_no_nrg_sav;
+	u64 secb_nrg_sav;
+	u64 secb_count;
+
+	/* find_best_target() stats */
+	u64 fbt_attempts;
+	u64 fbt_no_cpu;
+	u64 fbt_no_sd;
+	u64 fbt_pref_idle;
+	u64 fbt_count;
+
+	/* cas */
+	/* select_task_rq_fair() stats */
+	u64 cas_attempts;
+	u64 cas_count;
+};
+
 struct sched_domain_shared {
 	atomic_t	ref;
 	atomic_t	nr_busy_cpus;
@@ -1148,6 +1218,8 @@
 	unsigned int ttwu_wake_remote;
 	unsigned int ttwu_move_affine;
 	unsigned int ttwu_move_balance;
+
+	struct eas_stats eas_stats;
 #endif
 #ifdef CONFIG_SCHED_DEBUG
 	char *name;
@@ -1185,6 +1257,8 @@
 
 typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
 typedef int (*sched_domain_flags_f)(void);
+typedef
+const struct sched_group_energy * const(*sched_domain_energy_f)(int cpu);
 
 #define SDTL_OVERLAP	0x01
 
@@ -1198,6 +1272,7 @@
 struct sched_domain_topology_level {
 	sched_domain_mask_f mask;
 	sched_domain_flags_f sd_flags;
+	sched_domain_energy_f energy;
 	int		    flags;
 	int		    numa_level;
 	struct sd_data      data;
@@ -1343,6 +1418,70 @@
 	u64			nr_wakeups_affine_attempts;
 	u64			nr_wakeups_passive;
 	u64			nr_wakeups_idle;
+
+	/* select_idle_sibling() */
+	u64			nr_wakeups_sis_attempts;
+	u64			nr_wakeups_sis_idle;
+	u64			nr_wakeups_sis_cache_affine;
+	u64			nr_wakeups_sis_suff_cap;
+	u64			nr_wakeups_sis_idle_cpu;
+	u64			nr_wakeups_sis_count;
+
+	/* energy_aware_wake_cpu() */
+	u64			nr_wakeups_secb_attempts;
+	u64			nr_wakeups_secb_sync;
+	u64			nr_wakeups_secb_idle_bt;
+	u64			nr_wakeups_secb_insuff_cap;
+	u64			nr_wakeups_secb_no_nrg_sav;
+	u64			nr_wakeups_secb_nrg_sav;
+	u64			nr_wakeups_secb_count;
+
+	/* find_best_target() */
+	u64			nr_wakeups_fbt_attempts;
+	u64			nr_wakeups_fbt_no_cpu;
+	u64			nr_wakeups_fbt_no_sd;
+	u64			nr_wakeups_fbt_pref_idle;
+	u64			nr_wakeups_fbt_count;
+
+	/* cas */
+	/* select_task_rq_fair() */
+	u64			nr_wakeups_cas_attempts;
+	u64			nr_wakeups_cas_count;
+};
+#endif
+
+#ifdef CONFIG_SCHED_WALT
+#define RAVG_HIST_SIZE_MAX  5
+
+/* ravg represents frequency scaled cpu-demand of tasks */
+struct ravg {
+	/*
+	 * 'mark_start' marks the beginning of an event (task waking up, task
+	 * starting to execute, task being preempted) within a window
+	 *
+	 * 'sum' represents how runnable a task has been within current
+	 * window. It incorporates both running time and wait time and is
+	 * frequency scaled.
+	 *
+	 * 'sum_history' keeps track of history of 'sum' seen over previous
+	 * RAVG_HIST_SIZE windows. Windows where task was entirely sleeping are
+	 * ignored.
+	 *
+	 * 'demand' represents maximum sum seen over previous
+	 * sysctl_sched_ravg_hist_size windows. 'demand' could drive frequency
+	 * demand for tasks.
+	 *
+	 * 'curr_window' represents task's contribution to cpu busy time
+	 * statistics (rq->curr_runnable_sum) in current window
+	 *
+	 * 'prev_window' represents task's contribution to cpu busy time
+	 * statistics (rq->prev_runnable_sum) in previous window
+	 */
+	u64 mark_start;
+	u32 sum, demand;
+	u32 sum_history[RAVG_HIST_SIZE_MAX];
+	u32 curr_window, prev_window;
+	u16 active_windows;
 };
 #endif
 
@@ -1517,6 +1656,15 @@
 	const struct sched_class *sched_class;
 	struct sched_entity se;
 	struct sched_rt_entity rt;
+#ifdef CONFIG_SCHED_WALT
+	struct ravg ravg;
+	/*
+	 * 'init_load_pct' represents the initial task load assigned to children
+	 * of this task
+	 */
+	u32 init_load_pct;
+#endif
+
 #ifdef CONFIG_CGROUP_SCHED
 	struct task_group *sched_task_group;
 #endif
@@ -3572,6 +3720,11 @@
 {
 	tsk->ioac.syscw++;
 }
+
+static inline void inc_syscfs(struct task_struct *tsk)
+{
+	tsk->ioac.syscfs++;
+}
 #else
 static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
 {
@@ -3588,6 +3741,9 @@
 static inline void inc_syscw(struct task_struct *tsk)
 {
 }
+static inline void inc_syscfs(struct task_struct *tsk)
+{
+}
 #endif
 
 #ifndef TASK_SIZE_OF
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 05e8b6e..e0e4af7 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -18,6 +18,15 @@
 extern unsigned int sysctl_sched_min_granularity;
 extern unsigned int sysctl_sched_wakeup_granularity;
 extern unsigned int sysctl_sched_child_runs_first;
+extern unsigned int sysctl_sched_sync_hint_enable;
+extern unsigned int sysctl_sched_initial_task_util;
+extern unsigned int sysctl_sched_cstate_aware;
+#ifdef CONFIG_SCHED_WALT
+extern unsigned int sysctl_sched_use_walt_cpu_util;
+extern unsigned int sysctl_sched_use_walt_task_util;
+extern unsigned int sysctl_sched_walt_init_task_load_pct;
+extern unsigned int sysctl_sched_walt_cpu_high_irqload;
+#endif
 
 enum sched_tunable_scaling {
 	SCHED_TUNABLESCALING_NONE,
@@ -56,6 +65,22 @@
 extern unsigned int sysctl_sched_cfs_bandwidth_slice;
 #endif
 
+#ifdef CONFIG_SCHED_TUNE
+extern unsigned int sysctl_sched_cfs_boost;
+int sysctl_sched_cfs_boost_handler(struct ctl_table *table, int write,
+				   void __user *buffer, size_t *length,
+				   loff_t *ppos);
+static inline unsigned int get_sysctl_sched_cfs_boost(void)
+{
+	return sysctl_sched_cfs_boost;
+}
+#else
+static inline unsigned int get_sysctl_sched_cfs_boost(void)
+{
+	return 0;
+}
+#endif
+
 #ifdef CONFIG_SCHED_AUTOGROUP
 extern unsigned int sysctl_sched_autogroup_enabled;
 #endif
diff --git a/include/linux/sched_energy.h b/include/linux/sched_energy.h
new file mode 100644
index 0000000..1daf3e1
--- /dev/null
+++ b/include/linux/sched_energy.h
@@ -0,0 +1,44 @@
+#ifndef _LINUX_SCHED_ENERGY_H
+#define _LINUX_SCHED_ENERGY_H
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+/*
+ * There doesn't seem to be an NR_CPUS style max number of sched domain
+ * levels so here's an arbitrary constant one for the moment.
+ *
+ * The levels alluded to here correspond to entries in struct
+ * sched_domain_topology_level that are meant to be populated by arch
+ * specific code (topology.c).
+ */
+#define NR_SD_LEVELS 8
+
+#define SD_LEVEL0   0
+#define SD_LEVEL1   1
+#define SD_LEVEL2   2
+#define SD_LEVEL3   3
+#define SD_LEVEL4   4
+#define SD_LEVEL5   5
+#define SD_LEVEL6   6
+#define SD_LEVEL7   7
+
+/*
+ * Convenience macro for iterating through said sd levels.
+ */
+#define for_each_possible_sd_level(level)		    \
+	for (level = 0; level < NR_SD_LEVELS; level++)
+
+#ifdef CONFIG_SMP
+
+extern struct sched_group_energy *sge_array[NR_CPUS][NR_SD_LEVELS];
+
+void init_sched_energy_costs(void);
+
+#else
+
+#define init_sched_energy_costs() do { } while (0)
+
+#endif /* CONFIG_SMP */
+
+#endif
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index eb4f645..c7dff69 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -66,6 +66,7 @@
 	void		(*set_ldisc)(struct uart_port *, struct ktermios *);
 	void		(*pm)(struct uart_port *, unsigned int state,
 			      unsigned int oldstate);
+	void		(*wake_peer)(struct uart_port *);
 
 	/*
 	 * Return a string describing the type of the port
diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index a0596ca0..a2f8109 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -24,6 +24,7 @@
 void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
 void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
 
+u64 sock_gen_cookie(struct sock *sk);
 int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie);
 void sock_diag_save_cookie(struct sock *sk, __u32 *cookie);
 
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 249dafc..90d8569 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -436,6 +436,7 @@
 extern bool pm_save_wakeup_count(unsigned int count);
 extern void pm_wakep_autosleep_enabled(bool set);
 extern void pm_print_active_wakeup_sources(void);
+extern void pm_get_active_wakeup_sources(char *pending_sources, size_t max);
 
 static inline void lock_system_sleep(void)
 {
diff --git a/include/linux/task_io_accounting.h b/include/linux/task_io_accounting.h
index bdf855c..2dd338f 100644
--- a/include/linux/task_io_accounting.h
+++ b/include/linux/task_io_accounting.h
@@ -18,6 +18,8 @@
 	u64 syscr;
 	/* # of write syscalls */
 	u64 syscw;
+	/* # of fsync syscalls */
+	u64 syscfs;
 #endif /* CONFIG_TASK_XACCT */
 
 #ifdef CONFIG_TASK_IO_ACCOUNTING
diff --git a/include/linux/task_io_accounting_ops.h b/include/linux/task_io_accounting_ops.h
index 4d090f9..1b505c8 100644
--- a/include/linux/task_io_accounting_ops.h
+++ b/include/linux/task_io_accounting_ops.h
@@ -96,6 +96,7 @@
 	dst->wchar += src->wchar;
 	dst->syscr += src->syscr;
 	dst->syscw += src->syscw;
+	dst->syscfs += src->syscfs;
 }
 #else
 static inline void task_chr_io_accounting_add(struct task_io_accounting *dst,
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 62be0786..78ec2eb 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -117,6 +117,7 @@
 extern void tick_nohz_idle_exit(void);
 extern void tick_nohz_irq_exit(void);
 extern ktime_t tick_nohz_get_sleep_length(void);
+extern unsigned long tick_nohz_get_idle_calls(void);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
 #else /* !CONFIG_NO_HZ_COMMON */
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 09168c5..361f8bf 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -249,6 +249,7 @@
 
 extern u64 ktime_get_mono_fast_ns(void);
 extern u64 ktime_get_raw_fast_ns(void);
+extern u64 ktime_get_boot_fast_ns(void);
 
 /*
  * Timespec interfaces utilizing the ktime based ones
diff --git a/include/linux/usb/class-dual-role.h b/include/linux/usb/class-dual-role.h
new file mode 100644
index 0000000..c6df223
--- /dev/null
+++ b/include/linux/usb/class-dual-role.h
@@ -0,0 +1,129 @@
+#ifndef __LINUX_CLASS_DUAL_ROLE_H__
+#define __LINUX_CLASS_DUAL_ROLE_H__
+
+#include <linux/workqueue.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+
+struct device;
+
+enum dual_role_supported_modes {
+	DUAL_ROLE_SUPPORTED_MODES_DFP_AND_UFP = 0,
+	DUAL_ROLE_SUPPORTED_MODES_DFP,
+	DUAL_ROLE_SUPPORTED_MODES_UFP,
+/*The following should be the last element*/
+	DUAL_ROLE_PROP_SUPPORTED_MODES_TOTAL,
+};
+
+enum {
+	DUAL_ROLE_PROP_MODE_UFP = 0,
+	DUAL_ROLE_PROP_MODE_DFP,
+	DUAL_ROLE_PROP_MODE_NONE,
+/*The following should be the last element*/
+	DUAL_ROLE_PROP_MODE_TOTAL,
+};
+
+enum {
+	DUAL_ROLE_PROP_PR_SRC = 0,
+	DUAL_ROLE_PROP_PR_SNK,
+	DUAL_ROLE_PROP_PR_NONE,
+/*The following should be the last element*/
+	DUAL_ROLE_PROP_PR_TOTAL,
+
+};
+
+enum {
+	DUAL_ROLE_PROP_DR_HOST = 0,
+	DUAL_ROLE_PROP_DR_DEVICE,
+	DUAL_ROLE_PROP_DR_NONE,
+/*The following should be the last element*/
+	DUAL_ROLE_PROP_DR_TOTAL,
+};
+
+enum {
+	DUAL_ROLE_PROP_VCONN_SUPPLY_NO = 0,
+	DUAL_ROLE_PROP_VCONN_SUPPLY_YES,
+/*The following should be the last element*/
+	DUAL_ROLE_PROP_VCONN_SUPPLY_TOTAL,
+};
+
+enum dual_role_property {
+	DUAL_ROLE_PROP_SUPPORTED_MODES = 0,
+	DUAL_ROLE_PROP_MODE,
+	DUAL_ROLE_PROP_PR,
+	DUAL_ROLE_PROP_DR,
+	DUAL_ROLE_PROP_VCONN_SUPPLY,
+};
+
+struct dual_role_phy_instance;
+
+/* Description of typec port */
+struct dual_role_phy_desc {
+	/* /sys/class/dual_role_usb/<name>/ */
+	const char *name;
+	enum dual_role_supported_modes supported_modes;
+	enum dual_role_property *properties;
+	size_t num_properties;
+
+	/* Callback for "cat /sys/class/dual_role_usb/<name>/<property>" */
+	int (*get_property)(struct dual_role_phy_instance *dual_role,
+			     enum dual_role_property prop,
+			     unsigned int *val);
+	/* Callback for "echo <value> >
+	 *                      /sys/class/dual_role_usb/<name>/<property>" */
+	int (*set_property)(struct dual_role_phy_instance *dual_role,
+			     enum dual_role_property prop,
+			     const unsigned int *val);
+	/* Decides whether userspace can change a specific property */
+	int (*property_is_writeable)(struct dual_role_phy_instance *dual_role,
+				      enum dual_role_property prop);
+};
+
+struct dual_role_phy_instance {
+	const struct dual_role_phy_desc *desc;
+
+	/* Driver private data */
+	void *drv_data;
+
+	struct device dev;
+	struct work_struct changed_work;
+};
+
+#if IS_ENABLED(CONFIG_DUAL_ROLE_USB_INTF)
+extern void dual_role_instance_changed(struct dual_role_phy_instance
+				       *dual_role);
+extern struct dual_role_phy_instance *__must_check
+devm_dual_role_instance_register(struct device *parent,
+				 const struct dual_role_phy_desc *desc);
+extern void devm_dual_role_instance_unregister(struct device *dev,
+					       struct dual_role_phy_instance
+					       *dual_role);
+extern int dual_role_get_property(struct dual_role_phy_instance *dual_role,
+				  enum dual_role_property prop,
+				  unsigned int *val);
+extern int dual_role_set_property(struct dual_role_phy_instance *dual_role,
+				  enum dual_role_property prop,
+				  const unsigned int *val);
+extern int dual_role_property_is_writeable(struct dual_role_phy_instance
+					   *dual_role,
+					   enum dual_role_property prop);
+extern void *dual_role_get_drvdata(struct dual_role_phy_instance *dual_role);
+#else /* CONFIG_DUAL_ROLE_USB_INTF */
+static inline void dual_role_instance_changed(struct dual_role_phy_instance
+				       *dual_role){}
+static inline struct dual_role_phy_instance *__must_check
+devm_dual_role_instance_register(struct device *parent,
+				 const struct dual_role_phy_desc *desc)
+{
+	return ERR_PTR(-ENOSYS);
+}
+static inline void devm_dual_role_instance_unregister(struct device *dev,
+					       struct dual_role_phy_instance
+					       *dual_role){}
+static inline void *dual_role_get_drvdata(struct dual_role_phy_instance
+		*dual_role)
+{
+	return ERR_PTR(-ENOSYS);
+}
+#endif /* CONFIG_DUAL_ROLE_USB_INTF */
+#endif /* __LINUX_CLASS_DUAL_ROLE_H__ */
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 667d204..74f97ce 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -584,6 +584,7 @@
 	struct config_group group;
 	struct list_head cfs_list;
 	struct usb_function_driver *fd;
+	struct usb_function *f;
 	int (*set_inst_name)(struct usb_function_instance *inst,
 			      const char *name);
 	void (*free_func_inst)(struct usb_function_instance *inst);
diff --git a/include/linux/usb/f_accessory.h b/include/linux/usb/f_accessory.h
new file mode 100644
index 0000000..ebe3c4d
--- /dev/null
+++ b/include/linux/usb/f_accessory.h
@@ -0,0 +1,23 @@
+/*
+ * Gadget Function Driver for Android USB accessories
+ *
+ * Copyright (C) 2011 Google, Inc.
+ * Author: Mike Lockwood <lockwood@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __LINUX_USB_F_ACCESSORY_H
+#define __LINUX_USB_F_ACCESSORY_H
+
+#include <uapi/linux/usb/f_accessory.h>
+
+#endif /* __LINUX_USB_F_ACCESSORY_H */
diff --git a/include/linux/usb/f_mtp.h b/include/linux/usb/f_mtp.h
new file mode 100644
index 0000000..4e84177
--- /dev/null
+++ b/include/linux/usb/f_mtp.h
@@ -0,0 +1,23 @@
+/*
+ * Gadget Function Driver for MTP
+ *
+ * Copyright (C) 2010 Google, Inc.
+ * Author: Mike Lockwood <lockwood@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __LINUX_USB_F_MTP_H
+#define __LINUX_USB_F_MTP_H
+
+#include <uapi/linux/usb/f_mtp.h>
+
+#endif /* __LINUX_USB_F_MTP_H */
diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h
new file mode 100644
index 0000000..d84d8c3
--- /dev/null
+++ b/include/linux/wakeup_reason.h
@@ -0,0 +1,32 @@
+/*
+ * include/linux/wakeup_reason.h
+ *
+ * Logs the reason which caused the kernel to resume
+ * from the suspend mode.
+ *
+ * Copyright (C) 2014 Google, Inc.
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _LINUX_WAKEUP_REASON_H
+#define _LINUX_WAKEUP_REASON_H
+
+#define MAX_SUSPEND_ABORT_LEN 256
+
+void log_wakeup_reason(int irq);
+int check_wakeup_reason(int irq);
+
+#ifdef CONFIG_SUSPEND
+void log_suspend_abort_reason(const char *fmt, ...);
+#else
+static inline void log_suspend_abort_reason(const char *fmt, ...) { }
+#endif
+
+#endif /* _LINUX_WAKEUP_REASON_H */
diff --git a/include/linux/wlan_plat.h b/include/linux/wlan_plat.h
new file mode 100644
index 0000000..8e8b06f
--- /dev/null
+++ b/include/linux/wlan_plat.h
@@ -0,0 +1,30 @@
+/* include/linux/wlan_plat.h
+ *
+ * Copyright (C) 2010 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef _LINUX_WLAN_PLAT_H_
+#define _LINUX_WLAN_PLAT_H_
+
+#define WLAN_PLAT_NODFS_FLAG	0x01
+
+struct wifi_platform_data {
+	int (*set_power)(int val);
+	int (*set_reset)(int val);
+	int (*set_carddetect)(int val);
+	void *(*mem_prealloc)(int section, unsigned long size);
+	int (*get_mac_addr)(unsigned char *buf);
+	int (*get_wake_irq)(void);
+	void *(*get_country_code)(char *ccode, u32 flags);
+};
+
+#endif
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index b8ee8a1..858f308 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -244,6 +244,8 @@
 void addrconf_prefix_rcv(struct net_device *dev,
 			 u8 *opt, int len, bool sllao);
 
+u32 addrconf_rt_table(const struct net_device *dev, u32 default_table);
+
 /*
  *	anycast prototypes (anycast.c)
  */
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 456e4a6..8dbfdf7 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -8,6 +8,11 @@
 #include <net/flow.h>
 #include <net/rtnetlink.h>
 
+struct fib_kuid_range {
+	kuid_t start;
+	kuid_t end;
+};
+
 struct fib_rule {
 	struct list_head	list;
 	int			iifindex;
@@ -30,6 +35,7 @@
 	int			suppress_prefixlen;
 	char			iifname[IFNAMSIZ];
 	char			oifname[IFNAMSIZ];
+	struct fib_kuid_range	uid_range;
 	struct rcu_head		rcu;
 };
 
@@ -92,7 +98,8 @@
 	[FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \
 	[FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \
 	[FRA_GOTO]	= { .type = NLA_U32 }, \
-	[FRA_L3MDEV]	= { .type = NLA_U8 }
+	[FRA_L3MDEV]	= { .type = NLA_U8 }, \
+	[FRA_UID_RANGE]	= { .len = sizeof(struct fib_rule_uid_range) }
 
 static inline void fib_rule_get(struct fib_rule *rule)
 {
diff --git a/include/net/flow.h b/include/net/flow.h
index 035aa77..6bbbca8 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -11,6 +11,7 @@
 #include <linux/in6.h>
 #include <linux/atomic.h>
 #include <net/flow_dissector.h>
+#include <linux/uidgid.h>
 
 /*
  * ifindex generation is per-net namespace, and loopback is
@@ -37,6 +38,7 @@
 #define FLOWI_FLAG_SKIP_NH_OIF		0x04
 	__u32	flowic_secid;
 	struct flowi_tunnel flowic_tun_key;
+	kuid_t  flowic_uid;
 };
 
 union flowi_uli {
@@ -74,6 +76,7 @@
 #define flowi4_flags		__fl_common.flowic_flags
 #define flowi4_secid		__fl_common.flowic_secid
 #define flowi4_tun_key		__fl_common.flowic_tun_key
+#define flowi4_uid		__fl_common.flowic_uid
 
 	/* (saddr,daddr) must be grouped, same order as in IP header */
 	__be32			saddr;
@@ -93,7 +96,8 @@
 				      __u32 mark, __u8 tos, __u8 scope,
 				      __u8 proto, __u8 flags,
 				      __be32 daddr, __be32 saddr,
-				      __be16 dport, __be16 sport)
+				      __be16 dport, __be16 sport,
+				      kuid_t uid)
 {
 	fl4->flowi4_oif = oif;
 	fl4->flowi4_iif = LOOPBACK_IFINDEX;
@@ -104,6 +108,7 @@
 	fl4->flowi4_flags = flags;
 	fl4->flowi4_secid = 0;
 	fl4->flowi4_tun_key.tun_id = 0;
+	fl4->flowi4_uid = uid;
 	fl4->daddr = daddr;
 	fl4->saddr = saddr;
 	fl4->fl4_dport = dport;
@@ -131,6 +136,7 @@
 #define flowi6_flags		__fl_common.flowic_flags
 #define flowi6_secid		__fl_common.flowic_secid
 #define flowi6_tun_key		__fl_common.flowic_tun_key
+#define flowi6_uid		__fl_common.flowic_uid
 	struct in6_addr		daddr;
 	struct in6_addr		saddr;
 	/* Note: flowi6_tos is encoded in flowlabel, too. */
@@ -176,6 +182,7 @@
 #define flowi_flags	u.__fl_common.flowic_flags
 #define flowi_secid	u.__fl_common.flowic_secid
 #define flowi_tun_key	u.__fl_common.flowic_tun_key
+#define flowi_uid	u.__fl_common.flowic_uid
 } __attribute__((__aligned__(BITS_PER_LONG/8)));
 
 static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4)
diff --git a/include/net/ip.h b/include/net/ip.h
index f06cd30..1581986 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -180,6 +180,7 @@
 				/* -1 if not needed */ 
 	int	    bound_dev_if;
 	u8  	    tos;
+	kuid_t	    uid;
 }; 
 
 #define IP_REPLY_ARG_NOSRCCHECK 1
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 2c43993..4341731 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -142,9 +142,10 @@
 		  const struct in6_addr *gwaddr);
 
 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif,
-		     u32 mark);
+		     u32 mark, kuid_t uid);
 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu);
-void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark);
+void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
+		  kuid_t uid);
 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
 			    u32 mark);
 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk);
diff --git a/include/net/route.h b/include/net/route.h
index b8488ef..2702b7a 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -154,7 +154,7 @@
 	flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos,
 			   RT_SCOPE_UNIVERSE, proto,
 			   sk ? inet_sk_flowi_flags(sk) : 0,
-			   daddr, saddr, dport, sport);
+			   daddr, saddr, dport, sport, sock_net_uid(net, sk));
 	if (sk)
 		security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
 	return ip_route_output_flow(net, fl4, sk);
@@ -270,7 +270,8 @@
 		flow_flags |= FLOWI_FLAG_ANYSRC;
 
 	flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE,
-			   protocol, flow_flags, dst, src, dport, sport);
+			   protocol, flow_flags, dst, src, dport, sport,
+			   sk->sk_uid);
 }
 
 static inline struct rtable *ip_route_connect(struct flowi4 *fl4,
diff --git a/include/net/sock.h b/include/net/sock.h
index 1163086..d1177e6 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -420,6 +420,7 @@
 	u32			sk_max_ack_backlog;
 	__u32			sk_priority;
 	__u32			sk_mark;
+	kuid_t			sk_uid;
 	struct pid		*sk_peer_pid;
 	const struct cred	*sk_peer_cred;
 	long			sk_rcvtimeo;
@@ -1657,6 +1658,7 @@
 	sk->sk_wq = parent->wq;
 	parent->sk = sk;
 	sk_set_socket(sk, parent);
+	sk->sk_uid = SOCK_INODE(parent)->i_uid;
 	security_sock_graft(sk, parent);
 	write_unlock_bh(&sk->sk_callback_lock);
 }
@@ -1664,6 +1666,11 @@
 kuid_t sock_i_uid(struct sock *sk);
 unsigned long sock_i_ino(struct sock *sk);
 
+static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk)
+{
+	return sk ? sk->sk_uid : make_kuid(net->user_ns, 0);
+}
+
 static inline u32 net_tx_rndhash(void)
 {
 	u32 v = prandom_u32();
diff --git a/include/net/tcp.h b/include/net/tcp.h
index fed2a78..e4b297a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -271,6 +271,7 @@
 extern int sysctl_tcp_invalid_ratelimit;
 extern int sysctl_tcp_pacing_ss_ratio;
 extern int sysctl_tcp_pacing_ca_ratio;
+extern int sysctl_tcp_default_init_rwnd;
 
 extern atomic_long_t tcp_memory_allocated;
 extern struct percpu_counter tcp_sockets_allocated;
diff --git a/include/trace/events/android_fs.h b/include/trace/events/android_fs.h
new file mode 100644
index 0000000..4950953
--- /dev/null
+++ b/include/trace/events/android_fs.h
@@ -0,0 +1,65 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM android_fs
+
+#if !defined(_TRACE_ANDROID_FS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_ANDROID_FS_H
+
+#include <linux/tracepoint.h>
+#include <trace/events/android_fs_template.h>
+
+DEFINE_EVENT(android_fs_data_start_template, android_fs_dataread_start,
+	TP_PROTO(struct inode *inode, loff_t offset, int bytes,
+		 pid_t pid, char *pathname, char *command),
+	TP_ARGS(inode, offset, bytes, pid, pathname, command));
+
+DEFINE_EVENT(android_fs_data_end_template, android_fs_dataread_end,
+	TP_PROTO(struct inode *inode, loff_t offset, int bytes),
+	TP_ARGS(inode, offset, bytes));
+
+DEFINE_EVENT(android_fs_data_start_template, android_fs_datawrite_start,
+	TP_PROTO(struct inode *inode, loff_t offset, int bytes,
+		 pid_t pid, char *pathname, char *command),
+	TP_ARGS(inode, offset, bytes, pid, pathname, command));
+
+DEFINE_EVENT(android_fs_data_end_template, android_fs_datawrite_end,
+	TP_PROTO(struct inode *inode, loff_t offset, int bytes),
+	     TP_ARGS(inode, offset, bytes));
+
+#endif /* _TRACE_ANDROID_FS_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
+#ifndef ANDROID_FSTRACE_GET_PATHNAME
+#define ANDROID_FSTRACE_GET_PATHNAME
+
+/* Sizes an on-stack array, so careful if sizing this up ! */
+#define MAX_TRACE_PATHBUF_LEN	256
+
+static inline char *
+android_fstrace_get_pathname(char *buf, int buflen, struct inode *inode)
+{
+	char *path;
+	struct dentry *d;
+
+	/*
+	 * d_obtain_alias() will either iput() if it locates an existing
+	 * dentry or transfer the reference to the new dentry created.
+	 * So get an extra reference here.
+	 */
+	ihold(inode);
+	d = d_obtain_alias(inode);
+	if (likely(!IS_ERR(d))) {
+		path = dentry_path_raw(d, buf, buflen);
+		if (unlikely(IS_ERR(path))) {
+			strcpy(buf, "ERROR");
+			path = buf;
+		}
+		dput(d);
+	} else {
+		strcpy(buf, "ERROR");
+		path = buf;
+	}
+	return path;
+}
+#endif
diff --git a/include/trace/events/android_fs_template.h b/include/trace/events/android_fs_template.h
new file mode 100644
index 0000000..b23d17b
--- /dev/null
+++ b/include/trace/events/android_fs_template.h
@@ -0,0 +1,64 @@
+#if !defined(_TRACE_ANDROID_FS_TEMPLATE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_ANDROID_FS_TEMPLATE_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(android_fs_data_start_template,
+	TP_PROTO(struct inode *inode, loff_t offset, int bytes,
+		 pid_t pid, char *pathname, char *command),
+	TP_ARGS(inode, offset, bytes, pid, pathname, command),
+	TP_STRUCT__entry(
+		__string(pathbuf, pathname);
+		__field(loff_t,	offset);
+		__field(int,	bytes);
+		__field(loff_t,	i_size);
+		__string(cmdline, command);
+		__field(pid_t,	pid);
+		__field(ino_t,	ino);
+	),
+	TP_fast_assign(
+		{
+			/*
+			 * Replace the spaces in filenames and cmdlines
+			 * because this screws up the tooling that parses
+			 * the traces.
+			 */
+			__assign_str(pathbuf, pathname);
+			(void)strreplace(__get_str(pathbuf), ' ', '_');
+			__entry->offset		= offset;
+			__entry->bytes		= bytes;
+			__entry->i_size		= i_size_read(inode);
+			__assign_str(cmdline, command);
+			(void)strreplace(__get_str(cmdline), ' ', '_');
+			__entry->pid		= pid;
+			__entry->ino		= inode->i_ino;
+		}
+	),
+	TP_printk("entry_name %s, offset %llu, bytes %d, cmdline %s,"
+		  " pid %d, i_size %llu, ino %lu",
+		  __get_str(pathbuf), __entry->offset, __entry->bytes,
+		  __get_str(cmdline), __entry->pid, __entry->i_size,
+		  (unsigned long) __entry->ino)
+);
+
+DECLARE_EVENT_CLASS(android_fs_data_end_template,
+	TP_PROTO(struct inode *inode, loff_t offset, int bytes),
+	TP_ARGS(inode, offset, bytes),
+	TP_STRUCT__entry(
+		__field(ino_t,	ino);
+		__field(loff_t,	offset);
+		__field(int,	bytes);
+	),
+	TP_fast_assign(
+		{
+			__entry->ino		= inode->i_ino;
+			__entry->offset		= offset;
+			__entry->bytes		= bytes;
+		}
+	),
+	TP_printk("ino %lu, offset %llu, bytes %d",
+		  (unsigned long) __entry->ino,
+		  __entry->offset, __entry->bytes)
+);
+
+#endif /* _TRACE_ANDROID_FS_TEMPLATE_H */
diff --git a/include/trace/events/cpufreq_interactive.h b/include/trace/events/cpufreq_interactive.h
new file mode 100644
index 0000000..faecc0b
--- /dev/null
+++ b/include/trace/events/cpufreq_interactive.h
@@ -0,0 +1,112 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM cpufreq_interactive
+
+#if !defined(_TRACE_CPUFREQ_INTERACTIVE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_CPUFREQ_INTERACTIVE_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(set,
+	TP_PROTO(u32 cpu_id, unsigned long targfreq,
+		 unsigned long actualfreq),
+	TP_ARGS(cpu_id, targfreq, actualfreq),
+
+	TP_STRUCT__entry(
+		__field(u32, cpu_id)
+		__field(unsigned long, targfreq)
+		__field(unsigned long, actualfreq)
+	),
+
+	TP_fast_assign(
+		__entry->cpu_id = (u32)cpu_id;
+		__entry->targfreq = targfreq;
+		__entry->actualfreq = actualfreq;
+	),
+
+	TP_printk("cpu=%u targ=%lu actual=%lu",
+		__entry->cpu_id, __entry->targfreq,
+		__entry->actualfreq)
+);
+
+DEFINE_EVENT(set, cpufreq_interactive_setspeed,
+	TP_PROTO(u32 cpu_id, unsigned long targfreq,
+		 unsigned long actualfreq),
+	TP_ARGS(cpu_id, targfreq, actualfreq)
+);
+
+DECLARE_EVENT_CLASS(loadeval,
+	TP_PROTO(unsigned long cpu_id, unsigned long load,
+		 unsigned long curtarg, unsigned long curactual,
+		 unsigned long newtarg),
+	TP_ARGS(cpu_id, load, curtarg, curactual, newtarg),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, cpu_id)
+		__field(unsigned long, load)
+		__field(unsigned long, curtarg)
+		__field(unsigned long, curactual)
+		__field(unsigned long, newtarg)
+	),
+
+	TP_fast_assign(
+		__entry->cpu_id = cpu_id;
+		__entry->load = load;
+		__entry->curtarg = curtarg;
+		__entry->curactual = curactual;
+		__entry->newtarg = newtarg;
+	),
+
+	TP_printk("cpu=%lu load=%lu cur=%lu actual=%lu targ=%lu",
+		  __entry->cpu_id, __entry->load, __entry->curtarg,
+		  __entry->curactual, __entry->newtarg)
+);
+
+DEFINE_EVENT(loadeval, cpufreq_interactive_target,
+	TP_PROTO(unsigned long cpu_id, unsigned long load,
+		 unsigned long curtarg, unsigned long curactual,
+		 unsigned long newtarg),
+	TP_ARGS(cpu_id, load, curtarg, curactual, newtarg)
+);
+
+DEFINE_EVENT(loadeval, cpufreq_interactive_already,
+	TP_PROTO(unsigned long cpu_id, unsigned long load,
+		 unsigned long curtarg, unsigned long curactual,
+		 unsigned long newtarg),
+	TP_ARGS(cpu_id, load, curtarg, curactual, newtarg)
+);
+
+DEFINE_EVENT(loadeval, cpufreq_interactive_notyet,
+	TP_PROTO(unsigned long cpu_id, unsigned long load,
+		 unsigned long curtarg, unsigned long curactual,
+		 unsigned long newtarg),
+	TP_ARGS(cpu_id, load, curtarg, curactual, newtarg)
+);
+
+TRACE_EVENT(cpufreq_interactive_boost,
+	TP_PROTO(const char *s),
+	TP_ARGS(s),
+	TP_STRUCT__entry(
+		__string(s, s)
+	),
+	TP_fast_assign(
+		__assign_str(s, s);
+	),
+	TP_printk("%s", __get_str(s))
+);
+
+TRACE_EVENT(cpufreq_interactive_unboost,
+	TP_PROTO(const char *s),
+	TP_ARGS(s),
+	TP_STRUCT__entry(
+		__string(s, s)
+	),
+	TP_fast_assign(
+		__assign_str(s, s);
+	),
+	TP_printk("%s", __get_str(s))
+);
+
+#endif /* _TRACE_CPUFREQ_INTERACTIVE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/cpufreq_sched.h b/include/trace/events/cpufreq_sched.h
new file mode 100644
index 0000000..a46cd08
--- /dev/null
+++ b/include/trace/events/cpufreq_sched.h
@@ -0,0 +1,87 @@
+/*
+ *  Copyright (C)  2015 Steve Muckle <smuckle@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM cpufreq_sched
+
+#if !defined(_TRACE_CPUFREQ_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_CPUFREQ_SCHED_H
+
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(cpufreq_sched_throttled,
+	    TP_PROTO(unsigned int rem),
+	    TP_ARGS(rem),
+	    TP_STRUCT__entry(
+		    __field(	unsigned int,	rem)
+	    ),
+	    TP_fast_assign(
+		    __entry->rem = rem;
+	    ),
+	    TP_printk("throttled - %d usec remaining", __entry->rem)
+);
+
+TRACE_EVENT(cpufreq_sched_request_opp,
+	    TP_PROTO(int cpu,
+		     unsigned long capacity,
+		     unsigned int freq_new,
+		     unsigned int requested_freq),
+	    TP_ARGS(cpu, capacity, freq_new, requested_freq),
+	    TP_STRUCT__entry(
+		    __field(	int,		cpu)
+		    __field(	unsigned long,	capacity)
+		    __field(	unsigned int,	freq_new)
+		    __field(	unsigned int,	requested_freq)
+		    ),
+	    TP_fast_assign(
+		    __entry->cpu = cpu;
+		    __entry->capacity = capacity;
+		    __entry->freq_new = freq_new;
+		    __entry->requested_freq = requested_freq;
+		    ),
+	    TP_printk("cpu %d cap change, cluster cap request %ld => OPP %d "
+		      "(cur %d)",
+		      __entry->cpu, __entry->capacity, __entry->freq_new,
+		      __entry->requested_freq)
+);
+
+TRACE_EVENT(cpufreq_sched_update_capacity,
+	    TP_PROTO(int cpu,
+		     bool request,
+		     struct sched_capacity_reqs *scr,
+		     unsigned long new_capacity),
+	    TP_ARGS(cpu, request, scr, new_capacity),
+	    TP_STRUCT__entry(
+		    __field(	int,		cpu)
+		    __field(	bool,		request)
+		    __field(	unsigned long,	cfs)
+		    __field(	unsigned long,	rt)
+		    __field(	unsigned long,	dl)
+		    __field(	unsigned long,	total)
+		    __field(	unsigned long,	new_total)
+	    ),
+	    TP_fast_assign(
+		    __entry->cpu = cpu;
+		    __entry->request = request;
+		    __entry->cfs = scr->cfs;
+		    __entry->rt = scr->rt;
+		    __entry->dl = scr->dl;
+		    __entry->total = scr->total;
+		    __entry->new_total = new_capacity;
+	    ),
+	    TP_printk("cpu=%d set_cap=%d cfs=%ld rt=%ld dl=%ld old_tot=%ld "
+		      "new_tot=%ld",
+		      __entry->cpu, __entry->request, __entry->cfs, __entry->rt,
+		      __entry->dl, __entry->total, __entry->new_total)
+);
+
+#endif /* _TRACE_CPUFREQ_SCHED_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/gpu.h b/include/trace/events/gpu.h
new file mode 100644
index 0000000..7e15cdf
--- /dev/null
+++ b/include/trace/events/gpu.h
@@ -0,0 +1,143 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM gpu
+
+#if !defined(_TRACE_GPU_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_GPU_H
+
+#include <linux/tracepoint.h>
+#include <linux/time.h>
+
+#define show_secs_from_ns(ns) \
+	({ \
+		u64 t = ns + (NSEC_PER_USEC / 2); \
+		do_div(t, NSEC_PER_SEC); \
+		t; \
+	})
+
+#define show_usecs_from_ns(ns) \
+	({ \
+		u64 t = ns + (NSEC_PER_USEC / 2) ; \
+		u32 rem; \
+		do_div(t, NSEC_PER_USEC); \
+		rem = do_div(t, USEC_PER_SEC); \
+	})
+
+/*
+ * The gpu_sched_switch event indicates that a switch from one GPU context to
+ * another occurred on one of the GPU hardware blocks.
+ *
+ * The gpu_name argument identifies the GPU hardware block.  Each independently
+ * scheduled GPU hardware block should have a different name.  This may be used
+ * in different ways for different GPUs.  For example, if a GPU includes
+ * multiple processing cores it may use names "GPU 0", "GPU 1", etc.  If a GPU
+ * includes a separately scheduled 2D and 3D hardware block, it might use the
+ * names "2D" and "3D".
+ *
+ * The timestamp argument is the timestamp at which the switch occurred on the
+ * GPU. These timestamps are in units of nanoseconds and must use
+ * approximately the same time as sched_clock, though they need not come from
+ * any CPU clock. The timestamps for a single hardware block must be
+ * monotonically nondecreasing.  This means that if a variable compensation
+ * offset is used to translate from some other clock to the sched_clock, then
+ * care must be taken when increasing that offset, and doing so may result in
+ * multiple events with the same timestamp.
+ *
+ * The next_ctx_id argument identifies the next context that was running on
+ * the GPU hardware block.  A value of 0 indicates that the hardware block
+ * will be idle.
+ *
+ * The next_prio argument indicates the priority of the next context at the
+ * time of the event.  The exact numeric values may mean different things for
+ * different GPUs, but they should follow the rule that lower values indicate a
+ * higher priority.
+ *
+ * The next_job_id argument identifies the batch of work that the GPU will be
+ * working on.  This should correspond to a job_id that was previously traced
+ * as a gpu_job_enqueue event when the batch of work was created.
+ */
+TRACE_EVENT(gpu_sched_switch,
+
+	TP_PROTO(const char *gpu_name, u64 timestamp,
+		u32 next_ctx_id, s32 next_prio, u32 next_job_id),
+
+	TP_ARGS(gpu_name, timestamp, next_ctx_id, next_prio, next_job_id),
+
+	TP_STRUCT__entry(
+		__string(       gpu_name,       gpu_name        )
+		__field(        u64,            timestamp       )
+		__field(        u32,            next_ctx_id     )
+		__field(        s32,            next_prio       )
+		__field(        u32,            next_job_id     )
+	),
+
+	TP_fast_assign(
+		__assign_str(gpu_name, gpu_name);
+		__entry->timestamp = timestamp;
+		__entry->next_ctx_id = next_ctx_id;
+		__entry->next_prio = next_prio;
+		__entry->next_job_id = next_job_id;
+	),
+
+	TP_printk("gpu_name=%s ts=%llu.%06lu next_ctx_id=%lu next_prio=%ld "
+		"next_job_id=%lu",
+		__get_str(gpu_name),
+		(unsigned long long)show_secs_from_ns(__entry->timestamp),
+		(unsigned long)show_usecs_from_ns(__entry->timestamp),
+		(unsigned long)__entry->next_ctx_id,
+		(long)__entry->next_prio,
+		(unsigned long)__entry->next_job_id)
+);
+
+/*
+ * The gpu_job_enqueue event indicates that a batch of work has been queued up
+ * to be processed by the GPU.  This event is not intended to indicate that
+ * the batch of work has been submitted to the GPU hardware, but rather that
+ * it has been submitted to the GPU kernel driver.
+ *
+ * This event should be traced on the thread that initiated the work being
+ * queued.  For example, if a batch of work is submitted to the kernel by a
+ * userland thread, the event should be traced on that thread.
+ *
+ * The ctx_id field identifies the GPU context in which the batch of work
+ * being queued is to be run.
+ *
+ * The job_id field identifies the batch of work being queued within the given
+ * GPU context.  The first batch of work submitted for a given GPU context
+ * should have a job_id of 0, and each subsequent batch of work should
+ * increment the job_id by 1.
+ *
+ * The type field identifies the type of the job being enqueued.  The job
+ * types may be different for different GPU hardware.  For example, a GPU may
+ * differentiate between "2D", "3D", and "compute" jobs.
+ */
+TRACE_EVENT(gpu_job_enqueue,
+
+	TP_PROTO(u32 ctx_id, u32 job_id, const char *type),
+
+	TP_ARGS(ctx_id, job_id, type),
+
+	TP_STRUCT__entry(
+		__field(        u32,            ctx_id          )
+		__field(        u32,            job_id          )
+		__string(       type,           type            )
+	),
+
+	TP_fast_assign(
+		__entry->ctx_id = ctx_id;
+		__entry->job_id = job_id;
+		__assign_str(type, type);
+	),
+
+	TP_printk("ctx_id=%lu job_id=%lu type=%s",
+		(unsigned long)__entry->ctx_id,
+		(unsigned long)__entry->job_id,
+		__get_str(type))
+);
+
+#undef show_secs_from_ns
+#undef show_usecs_from_ns
+
+#endif /* _TRACE_GPU_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/net.h b/include/trace/events/net.h
index 49cc7c3..89d009e 100644
--- a/include/trace/events/net.h
+++ b/include/trace/events/net.h
@@ -57,7 +57,7 @@
 		__entry->gso_type = skb_shinfo(skb)->gso_type;
 	),
 
-	TP_printk("dev=%s queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d len=%u data_len=%u network_offset=%d transport_offset_valid=%d transport_offset=%d tx_flags=%d gso_size=%d gso_segs=%d gso_type=%#x",
+	TP_printk("dev=%s queue_mapping=%u skbaddr=%pK vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d len=%u data_len=%u network_offset=%d transport_offset_valid=%d transport_offset=%d tx_flags=%d gso_size=%d gso_segs=%d gso_type=%#x",
 		  __get_str(name), __entry->queue_mapping, __entry->skbaddr,
 		  __entry->vlan_tagged, __entry->vlan_proto, __entry->vlan_tci,
 		  __entry->protocol, __entry->ip_summed, __entry->len,
@@ -90,7 +90,7 @@
 		__assign_str(name, dev->name);
 	),
 
-	TP_printk("dev=%s skbaddr=%p len=%u rc=%d",
+	TP_printk("dev=%s skbaddr=%pK len=%u rc=%d",
 		__get_str(name), __entry->skbaddr, __entry->len, __entry->rc)
 );
 
@@ -112,7 +112,7 @@
 		__assign_str(name, skb->dev->name);
 	),
 
-	TP_printk("dev=%s skbaddr=%p len=%u",
+	TP_printk("dev=%s skbaddr=%pK len=%u",
 		__get_str(name), __entry->skbaddr, __entry->len)
 )
 
@@ -191,7 +191,7 @@
 		__entry->gso_type = skb_shinfo(skb)->gso_type;
 	),
 
-	TP_printk("dev=%s napi_id=%#x queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d hash=0x%08x l4_hash=%d len=%u data_len=%u truesize=%u mac_header_valid=%d mac_header=%d nr_frags=%d gso_size=%d gso_type=%#x",
+	TP_printk("dev=%s napi_id=%#x queue_mapping=%u skbaddr=%pK vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d hash=0x%08x l4_hash=%d len=%u data_len=%u truesize=%u mac_header_valid=%d mac_header=%d nr_frags=%d gso_size=%d gso_type=%#x",
 		  __get_str(name), __entry->napi_id, __entry->queue_mapping,
 		  __entry->skbaddr, __entry->vlan_tagged, __entry->vlan_proto,
 		  __entry->vlan_tci, __entry->protocol, __entry->ip_summed,
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index 54e3aad..ec6f815 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -147,6 +147,38 @@
 	TP_ARGS(frequency, cpu_id)
 );
 
+TRACE_EVENT(cpu_frequency_limits,
+
+	TP_PROTO(unsigned int max_freq, unsigned int min_freq,
+		unsigned int cpu_id),
+
+	TP_ARGS(max_freq, min_freq, cpu_id),
+
+	TP_STRUCT__entry(
+		__field(	u32,		min_freq	)
+		__field(	u32,		max_freq	)
+		__field(	u32,		cpu_id		)
+	),
+
+	TP_fast_assign(
+		__entry->min_freq = min_freq;
+		__entry->max_freq = max_freq;
+		__entry->cpu_id = cpu_id;
+	),
+
+	TP_printk("min=%lu max=%lu cpu_id=%lu",
+		  (unsigned long)__entry->min_freq,
+		  (unsigned long)__entry->max_freq,
+		  (unsigned long)__entry->cpu_id)
+);
+
+DEFINE_EVENT(cpu, cpu_capacity,
+
+	TP_PROTO(unsigned int capacity, unsigned int cpu_id),
+
+	TP_ARGS(capacity, cpu_id)
+);
+
 TRACE_EVENT(device_pm_callback_start,
 
 	TP_PROTO(struct device *dev, const char *pm_ops, int event),
@@ -300,6 +332,25 @@
 	TP_ARGS(name, state, cpu_id)
 );
 
+TRACE_EVENT(clock_set_parent,
+
+	TP_PROTO(const char *name, const char *parent_name),
+
+	TP_ARGS(name, parent_name),
+
+	TP_STRUCT__entry(
+		__string(       name,           name            )
+		__string(       parent_name,    parent_name     )
+	),
+
+	TP_fast_assign(
+		__assign_str(name, name);
+		__assign_str(parent_name, parent_name);
+	),
+
+	TP_printk("%s parent=%s", __get_str(name), __get_str(parent_name))
+);
+
 /*
  * The power domain events are used for power domains transitions
  */
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 9b90c57..da97ab5 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -219,7 +219,7 @@
 DEFINE_EVENT(sched_process_template, sched_process_free,
 	     TP_PROTO(struct task_struct *p),
 	     TP_ARGS(p));
-	     
+
 
 /*
  * Tracepoint for a task exiting:
@@ -374,6 +374,30 @@
 	     TP_ARGS(tsk, delay));
 
 /*
+ * Tracepoint for recording the cause of uninterruptible sleep.
+ */
+TRACE_EVENT(sched_blocked_reason,
+
+	TP_PROTO(struct task_struct *tsk),
+
+	TP_ARGS(tsk),
+
+	TP_STRUCT__entry(
+		__field( pid_t,	pid	)
+		__field( void*, caller	)
+		__field( bool, io_wait	)
+	),
+
+	TP_fast_assign(
+		__entry->pid	= tsk->pid;
+		__entry->caller = (void*)get_wchan(tsk);
+		__entry->io_wait = tsk->in_iowait;
+	),
+
+	TP_printk("pid=%d iowait=%d caller=%pS", __entry->pid, __entry->io_wait, __entry->caller)
+);
+
+/*
  * Tracepoint for accounting runtime (time the task is executing
  * on a CPU).
  */
@@ -562,6 +586,581 @@
 
 	TP_printk("cpu=%d", __entry->cpu)
 );
+
+TRACE_EVENT(sched_contrib_scale_f,
+
+	TP_PROTO(int cpu, unsigned long freq_scale_factor,
+		 unsigned long cpu_scale_factor),
+
+	TP_ARGS(cpu, freq_scale_factor, cpu_scale_factor),
+
+	TP_STRUCT__entry(
+		__field(int, cpu)
+		__field(unsigned long, freq_scale_factor)
+		__field(unsigned long, cpu_scale_factor)
+	),
+
+	TP_fast_assign(
+		__entry->cpu = cpu;
+		__entry->freq_scale_factor = freq_scale_factor;
+		__entry->cpu_scale_factor = cpu_scale_factor;
+	),
+
+	TP_printk("cpu=%d freq_scale_factor=%lu cpu_scale_factor=%lu",
+		  __entry->cpu, __entry->freq_scale_factor,
+		  __entry->cpu_scale_factor)
+);
+
+#ifdef CONFIG_SMP
+
+#ifdef CONFIG_SCHED_WALT
+extern unsigned int sysctl_sched_use_walt_cpu_util;
+extern unsigned int sysctl_sched_use_walt_task_util;
+extern unsigned int walt_ravg_window;
+extern unsigned int walt_disabled;
+#endif
+
+/*
+ * Tracepoint for accounting sched averages for tasks.
+ */
+TRACE_EVENT(sched_load_avg_task,
+
+	TP_PROTO(struct task_struct *tsk, struct sched_avg *avg, void *_ravg),
+
+	TP_ARGS(tsk, avg, _ravg),
+
+	TP_STRUCT__entry(
+		__array( char,	comm,	TASK_COMM_LEN		)
+		__field( pid_t,	pid				)
+		__field( int,	cpu				)
+		__field( unsigned long,	load_avg		)
+		__field( unsigned long,	util_avg		)
+		__field( unsigned long,	util_avg_pelt		)
+		__field( unsigned long,	util_avg_walt		)
+		__field( u64,		load_sum		)
+		__field( u32,		util_sum		)
+		__field( u32,		period_contrib		)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid			= tsk->pid;
+		__entry->cpu			= task_cpu(tsk);
+		__entry->load_avg		= avg->load_avg;
+		__entry->util_avg		= avg->util_avg;
+		__entry->load_sum		= avg->load_sum;
+		__entry->util_sum		= avg->util_sum;
+		__entry->period_contrib		= avg->period_contrib;
+		__entry->util_avg_pelt  = avg->util_avg;
+		__entry->util_avg_walt  = 0;
+#ifdef CONFIG_SCHED_WALT
+		__entry->util_avg_walt = (((unsigned long)((struct ravg*)_ravg)->demand) << NICE_0_LOAD_SHIFT);
+		do_div(__entry->util_avg_walt, walt_ravg_window);
+		if (!walt_disabled && sysctl_sched_use_walt_task_util)
+			__entry->util_avg = __entry->util_avg_walt;
+#endif
+	),
+	TP_printk("comm=%s pid=%d cpu=%d load_avg=%lu util_avg=%lu "
+			"util_avg_pelt=%lu util_avg_walt=%lu load_sum=%llu"
+		  " util_sum=%u period_contrib=%u",
+		  __entry->comm,
+		  __entry->pid,
+		  __entry->cpu,
+		  __entry->load_avg,
+		  __entry->util_avg,
+		  __entry->util_avg_pelt,
+		  __entry->util_avg_walt,
+		  (u64)__entry->load_sum,
+		  (u32)__entry->util_sum,
+		  (u32)__entry->period_contrib)
+);
+
+/*
+ * Tracepoint for accounting sched averages for cpus.
+ */
+TRACE_EVENT(sched_load_avg_cpu,
+
+	TP_PROTO(int cpu, struct cfs_rq *cfs_rq),
+
+	TP_ARGS(cpu, cfs_rq),
+
+	TP_STRUCT__entry(
+		__field( int,	cpu				)
+		__field( unsigned long,	load_avg		)
+		__field( unsigned long,	util_avg		)
+		__field( unsigned long,	util_avg_pelt		)
+		__field( unsigned long,	util_avg_walt		)
+	),
+
+	TP_fast_assign(
+		__entry->cpu			= cpu;
+		__entry->load_avg		= cfs_rq->avg.load_avg;
+		__entry->util_avg		= cfs_rq->avg.util_avg;
+		__entry->util_avg_pelt	= cfs_rq->avg.util_avg;
+		__entry->util_avg_walt	= 0;
+#ifdef CONFIG_SCHED_WALT
+		__entry->util_avg_walt	=
+				cpu_rq(cpu)->prev_runnable_sum << NICE_0_LOAD_SHIFT;
+		do_div(__entry->util_avg_walt, walt_ravg_window);
+		if (!walt_disabled && sysctl_sched_use_walt_cpu_util)
+			__entry->util_avg		= __entry->util_avg_walt;
+#endif
+	),
+
+	TP_printk("cpu=%d load_avg=%lu util_avg=%lu "
+			  "util_avg_pelt=%lu util_avg_walt=%lu",
+		  __entry->cpu, __entry->load_avg, __entry->util_avg,
+		  __entry->util_avg_pelt, __entry->util_avg_walt)
+);
+
+/*
+ * Tracepoint for sched_tune_config settings
+ */
+TRACE_EVENT(sched_tune_config,
+
+	TP_PROTO(int boost),
+
+	TP_ARGS(boost),
+
+	TP_STRUCT__entry(
+		__field( int,	boost		)
+	),
+
+	TP_fast_assign(
+		__entry->boost 	= boost;
+	),
+
+	TP_printk("boost=%d ", __entry->boost)
+);
+
+/*
+ * Tracepoint for accounting CPU  boosted utilization
+ */
+TRACE_EVENT(sched_boost_cpu,
+
+	TP_PROTO(int cpu, unsigned long util, long margin),
+
+	TP_ARGS(cpu, util, margin),
+
+	TP_STRUCT__entry(
+		__field( int,		cpu			)
+		__field( unsigned long,	util			)
+		__field(long,		margin			)
+	),
+
+	TP_fast_assign(
+		__entry->cpu	= cpu;
+		__entry->util	= util;
+		__entry->margin	= margin;
+	),
+
+	TP_printk("cpu=%d util=%lu margin=%ld",
+		  __entry->cpu,
+		  __entry->util,
+		  __entry->margin)
+);
+
+/*
+ * Tracepoint for schedtune_tasks_update
+ */
+TRACE_EVENT(sched_tune_tasks_update,
+
+	TP_PROTO(struct task_struct *tsk, int cpu, int tasks, int idx,
+		int boost, int max_boost),
+
+	TP_ARGS(tsk, cpu, tasks, idx, boost, max_boost),
+
+	TP_STRUCT__entry(
+		__array( char,	comm,	TASK_COMM_LEN	)
+		__field( pid_t,		pid		)
+		__field( int,		cpu		)
+		__field( int,		tasks		)
+		__field( int,		idx		)
+		__field( int,		boost		)
+		__field( int,		max_boost	)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid		= tsk->pid;
+		__entry->cpu 		= cpu;
+		__entry->tasks		= tasks;
+		__entry->idx 		= idx;
+		__entry->boost		= boost;
+		__entry->max_boost	= max_boost;
+	),
+
+	TP_printk("pid=%d comm=%s "
+			"cpu=%d tasks=%d idx=%d boost=%d max_boost=%d",
+		__entry->pid, __entry->comm,
+		__entry->cpu, __entry->tasks, __entry->idx,
+		__entry->boost, __entry->max_boost)
+);
+
+/*
+ * Tracepoint for schedtune_boostgroup_update
+ */
+TRACE_EVENT(sched_tune_boostgroup_update,
+
+	TP_PROTO(int cpu, int variation, int max_boost),
+
+	TP_ARGS(cpu, variation, max_boost),
+
+	TP_STRUCT__entry(
+		__field( int,	cpu		)
+		__field( int,	variation	)
+		__field( int,	max_boost	)
+	),
+
+	TP_fast_assign(
+		__entry->cpu		= cpu;
+		__entry->variation	= variation;
+		__entry->max_boost	= max_boost;
+	),
+
+	TP_printk("cpu=%d variation=%d max_boost=%d",
+		__entry->cpu, __entry->variation, __entry->max_boost)
+);
+
+/*
+ * Tracepoint for accounting task boosted utilization
+ */
+TRACE_EVENT(sched_boost_task,
+
+	TP_PROTO(struct task_struct *tsk, unsigned long util, long margin),
+
+	TP_ARGS(tsk, util, margin),
+
+	TP_STRUCT__entry(
+		__array( char,	comm,	TASK_COMM_LEN		)
+		__field( pid_t,		pid			)
+		__field( unsigned long,	util			)
+		__field( long,		margin			)
+
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid	= tsk->pid;
+		__entry->util	= util;
+		__entry->margin	= margin;
+	),
+
+	TP_printk("comm=%s pid=%d util=%lu margin=%ld",
+		  __entry->comm, __entry->pid,
+		  __entry->util,
+		  __entry->margin)
+);
+
+/*
+ * Tracepoint for find_best_target
+ */
+TRACE_EVENT(sched_find_best_target,
+
+	TP_PROTO(struct task_struct *tsk, bool prefer_idle,
+		unsigned long min_util, int start_cpu,
+		int best_idle, int best_active, int target),
+
+	TP_ARGS(tsk, prefer_idle, min_util, start_cpu,
+		best_idle, best_active, target),
+
+	TP_STRUCT__entry(
+		__array( char,	comm,	TASK_COMM_LEN	)
+		__field( pid_t,	pid			)
+		__field( unsigned long,	min_util	)
+		__field( bool,	prefer_idle		)
+		__field( int,	start_cpu		)
+		__field( int,	best_idle		)
+		__field( int,	best_active		)
+		__field( int,	target			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid		= tsk->pid;
+		__entry->min_util	= min_util;
+		__entry->prefer_idle	= prefer_idle;
+		__entry->start_cpu 	= start_cpu;
+		__entry->best_idle	= best_idle;
+		__entry->best_active	= best_active;
+		__entry->target		= target;
+	),
+
+	TP_printk("pid=%d comm=%s prefer_idle=%d start_cpu=%d "
+		  "best_idle=%d best_active=%d target=%d",
+		__entry->pid, __entry->comm,
+		__entry->prefer_idle, __entry->start_cpu,
+		__entry->best_idle, __entry->best_active,
+		__entry->target)
+);
+
+/*
+ * Tracepoint for accounting sched group energy
+ */
+TRACE_EVENT(sched_energy_diff,
+
+	TP_PROTO(struct task_struct *tsk, int scpu, int dcpu, int udelta,
+		int nrgb, int nrga, int nrgd, int capb, int capa, int capd,
+		int nrgn, int nrgp),
+
+	TP_ARGS(tsk, scpu, dcpu, udelta,
+		nrgb, nrga, nrgd, capb, capa, capd,
+		nrgn, nrgp),
+
+	TP_STRUCT__entry(
+		__array( char,	comm,	TASK_COMM_LEN	)
+		__field( pid_t,	pid	)
+		__field( int,	scpu	)
+		__field( int,	dcpu	)
+		__field( int,	udelta	)
+		__field( int,	nrgb	)
+		__field( int,	nrga	)
+		__field( int,	nrgd	)
+		__field( int,	capb	)
+		__field( int,	capa	)
+		__field( int,	capd	)
+		__field( int,	nrgn	)
+		__field( int,	nrgp	)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid		= tsk->pid;
+		__entry->scpu 		= scpu;
+		__entry->dcpu 		= dcpu;
+		__entry->udelta 	= udelta;
+		__entry->nrgb 		= nrgb;
+		__entry->nrga 		= nrga;
+		__entry->nrgd 		= nrgd;
+		__entry->capb 		= capb;
+		__entry->capa 		= capa;
+		__entry->capd 		= capd;
+		__entry->nrgn 		= nrgn;
+		__entry->nrgp 		= nrgp;
+	),
+
+	TP_printk("pid=%d comm=%s "
+			"src_cpu=%d dst_cpu=%d usage_delta=%d "
+			"nrg_before=%d nrg_after=%d nrg_diff=%d "
+			"cap_before=%d cap_after=%d cap_delta=%d "
+			"nrg_delta=%d nrg_payoff=%d",
+		__entry->pid, __entry->comm,
+		__entry->scpu, __entry->dcpu, __entry->udelta,
+		__entry->nrgb, __entry->nrga, __entry->nrgd,
+		__entry->capb, __entry->capa, __entry->capd,
+		__entry->nrgn, __entry->nrgp)
+);
+
+/*
+ * Tracepoint for schedtune_tasks_update
+ */
+TRACE_EVENT(sched_tune_filter,
+
+	TP_PROTO(int nrg_delta, int cap_delta,
+		 int nrg_gain,  int cap_gain,
+		 int payoff, int region),
+
+	TP_ARGS(nrg_delta, cap_delta, nrg_gain, cap_gain, payoff, region),
+
+	TP_STRUCT__entry(
+		__field( int,	nrg_delta	)
+		__field( int,	cap_delta	)
+		__field( int,	nrg_gain	)
+		__field( int,	cap_gain	)
+		__field( int,	payoff		)
+		__field( int,	region		)
+	),
+
+	TP_fast_assign(
+		__entry->nrg_delta	= nrg_delta;
+		__entry->cap_delta	= cap_delta;
+		__entry->nrg_gain	= nrg_gain;
+		__entry->cap_gain	= cap_gain;
+		__entry->payoff		= payoff;
+		__entry->region		= region;
+	),
+
+	TP_printk("nrg_delta=%d cap_delta=%d nrg_gain=%d cap_gain=%d payoff=%d region=%d",
+		__entry->nrg_delta, __entry->cap_delta,
+		__entry->nrg_gain, __entry->cap_gain,
+		__entry->payoff, __entry->region)
+);
+
+/*
+ * Tracepoint for system overutilized flag
+ */
+TRACE_EVENT(sched_overutilized,
+
+	TP_PROTO(bool overutilized),
+
+	TP_ARGS(overutilized),
+
+	TP_STRUCT__entry(
+		__field( bool,	overutilized	)
+	),
+
+	TP_fast_assign(
+		__entry->overutilized	= overutilized;
+	),
+
+	TP_printk("overutilized=%d",
+		__entry->overutilized ? 1 : 0)
+);
+#ifdef CONFIG_SCHED_WALT
+struct rq;
+
+TRACE_EVENT(walt_update_task_ravg,
+
+	TP_PROTO(struct task_struct *p, struct rq *rq, int evt,
+						u64 wallclock, u64 irqtime),
+
+	TP_ARGS(p, rq, evt, wallclock, irqtime),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,   TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	pid_t,	cur_pid			)
+		__field(unsigned int,	cur_freq		)
+		__field(	u64,	wallclock		)
+		__field(	u64,	mark_start		)
+		__field(	u64,	delta_m			)
+		__field(	u64,	win_start		)
+		__field(	u64,	delta			)
+		__field(	u64,	irqtime			)
+		__field(        int,    evt			)
+		__field(unsigned int,	demand			)
+		__field(unsigned int,	sum			)
+		__field(	 int,	cpu			)
+		__field(	u64,	cs			)
+		__field(	u64,	ps			)
+		__field(unsigned long,	util			)
+		__field(	u32,	curr_window		)
+		__field(	u32,	prev_window		)
+		__field(	u64,	nt_cs			)
+		__field(	u64,	nt_ps			)
+		__field(	u32,	active_windows		)
+	),
+
+	TP_fast_assign(
+		__entry->wallclock      = wallclock;
+		__entry->win_start      = rq->window_start;
+		__entry->delta          = (wallclock - rq->window_start);
+		__entry->evt            = evt;
+		__entry->cpu            = rq->cpu;
+		__entry->cur_pid        = rq->curr->pid;
+		__entry->cur_freq       = rq->cur_freq;
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid            = p->pid;
+		__entry->mark_start     = p->ravg.mark_start;
+		__entry->delta_m        = (wallclock - p->ravg.mark_start);
+		__entry->demand         = p->ravg.demand;
+		__entry->sum            = p->ravg.sum;
+		__entry->irqtime        = irqtime;
+		__entry->cs             = rq->curr_runnable_sum;
+		__entry->ps             = rq->prev_runnable_sum;
+		__entry->util           = rq->prev_runnable_sum << NICE_0_LOAD_SHIFT;
+		do_div(__entry->util, walt_ravg_window);
+		__entry->curr_window	= p->ravg.curr_window;
+		__entry->prev_window	= p->ravg.prev_window;
+		__entry->nt_cs		= rq->nt_curr_runnable_sum;
+		__entry->nt_ps		= rq->nt_prev_runnable_sum;
+		__entry->active_windows	= p->ravg.active_windows;
+	),
+
+	TP_printk("wc %llu ws %llu delta %llu event %d cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu"
+		" cs %llu ps %llu util %lu cur_window %u prev_window %u active_wins %u"
+		, __entry->wallclock, __entry->win_start, __entry->delta,
+		__entry->evt, __entry->cpu,
+		__entry->cur_freq, __entry->cur_pid,
+		__entry->pid, __entry->comm, __entry->mark_start,
+		__entry->delta_m, __entry->demand,
+		__entry->sum, __entry->irqtime,
+		__entry->cs, __entry->ps, __entry->util,
+		__entry->curr_window, __entry->prev_window,
+		  __entry->active_windows
+		)
+);
+
+TRACE_EVENT(walt_update_history,
+
+	TP_PROTO(struct rq *rq, struct task_struct *p, u32 runtime, int samples,
+			int evt),
+
+	TP_ARGS(rq, p, runtime, samples, evt),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,   TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(unsigned int,	runtime			)
+		__field(	 int,	samples			)
+		__field(	 int,	evt			)
+		__field(	 u64,	demand			)
+		__field(	 u64,	walt_avg		)
+		__field(unsigned int,	pelt_avg		)
+		__array(	 u32,	hist, RAVG_HIST_SIZE_MAX)
+		__field(	 int,	cpu			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid            = p->pid;
+		__entry->runtime        = runtime;
+		__entry->samples        = samples;
+		__entry->evt            = evt;
+		__entry->demand         = p->ravg.demand;
+		__entry->walt_avg	= (__entry->demand << 10);
+		__entry->walt_avg	= div_u64(__entry->walt_avg,
+						  walt_ravg_window);
+		__entry->pelt_avg	= p->se.avg.util_avg;
+		memcpy(__entry->hist, p->ravg.sum_history,
+					RAVG_HIST_SIZE_MAX * sizeof(u32));
+		__entry->cpu            = rq->cpu;
+	),
+
+	TP_printk("%d (%s): runtime %u samples %d event %d demand %llu"
+		" walt %llu pelt %u (hist: %u %u %u %u %u) cpu %d",
+		__entry->pid, __entry->comm,
+		__entry->runtime, __entry->samples, __entry->evt,
+		__entry->demand,
+		__entry->walt_avg,
+		__entry->pelt_avg,
+		__entry->hist[0], __entry->hist[1],
+		__entry->hist[2], __entry->hist[3],
+		__entry->hist[4], __entry->cpu)
+);
+
+TRACE_EVENT(walt_migration_update_sum,
+
+	TP_PROTO(struct rq *rq, struct task_struct *p),
+
+	TP_ARGS(rq, p),
+
+	TP_STRUCT__entry(
+		__field(int,		cpu			)
+		__field(int,		pid			)
+		__field(	u64,	cs			)
+		__field(	u64,	ps			)
+		__field(	s64,	nt_cs			)
+		__field(	s64,	nt_ps			)
+	),
+
+	TP_fast_assign(
+		__entry->cpu		= cpu_of(rq);
+		__entry->cs		= rq->curr_runnable_sum;
+		__entry->ps		= rq->prev_runnable_sum;
+		__entry->nt_cs		= (s64)rq->nt_curr_runnable_sum;
+		__entry->nt_ps		= (s64)rq->nt_prev_runnable_sum;
+		__entry->pid		= p->pid;
+	),
+
+	TP_printk("cpu %d: cs %llu ps %llu nt_cs %lld nt_ps %lld pid %d",
+		  __entry->cpu, __entry->cs, __entry->ps,
+		  __entry->nt_cs, __entry->nt_ps, __entry->pid)
+);
+#endif /* CONFIG_SCHED_WALT */
+
+#endif /* CONFIG_SMP */
+
 #endif /* _TRACE_SCHED_H */
 
 /* This part must be outside protection */
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 67d632f..2d078c20 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -92,4 +92,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SO_COOKIE		57
+
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h
index 41420e3..993bb46 100644
--- a/include/uapi/linux/android/binder.h
+++ b/include/uapi/linux/android/binder.h
@@ -33,11 +33,68 @@
 	BINDER_TYPE_HANDLE	= B_PACK_CHARS('s', 'h', '*', B_TYPE_LARGE),
 	BINDER_TYPE_WEAK_HANDLE	= B_PACK_CHARS('w', 'h', '*', B_TYPE_LARGE),
 	BINDER_TYPE_FD		= B_PACK_CHARS('f', 'd', '*', B_TYPE_LARGE),
+	BINDER_TYPE_FDA		= B_PACK_CHARS('f', 'd', 'a', B_TYPE_LARGE),
+	BINDER_TYPE_PTR		= B_PACK_CHARS('p', 't', '*', B_TYPE_LARGE),
 };
 
-enum {
+/**
+ * enum flat_binder_object_shifts: shift values for flat_binder_object_flags
+ * @FLAT_BINDER_FLAG_SCHED_POLICY_SHIFT: shift for getting scheduler policy.
+ *
+ */
+enum flat_binder_object_shifts {
+	FLAT_BINDER_FLAG_SCHED_POLICY_SHIFT = 9,
+};
+
+/**
+ * enum flat_binder_object_flags - flags for use in flat_binder_object.flags
+ */
+enum flat_binder_object_flags {
+	/**
+	 * @FLAT_BINDER_FLAG_PRIORITY_MASK: bit-mask for min scheduler priority
+	 *
+	 * These bits can be used to set the minimum scheduler priority
+	 * at which transactions into this node should run. Valid values
+	 * in these bits depend on the scheduler policy encoded in
+	 * @FLAT_BINDER_FLAG_SCHED_POLICY_MASK.
+	 *
+	 * For SCHED_NORMAL/SCHED_BATCH, the valid range is between [-20..19]
+	 * For SCHED_FIFO/SCHED_RR, the value can run between [1..99]
+	 */
 	FLAT_BINDER_FLAG_PRIORITY_MASK = 0xff,
+	/**
+	 * @FLAT_BINDER_FLAG_ACCEPTS_FDS: whether the node accepts fds.
+	 */
 	FLAT_BINDER_FLAG_ACCEPTS_FDS = 0x100,
+	/**
+	 * @FLAT_BINDER_FLAG_SCHED_POLICY_MASK: bit-mask for scheduling policy
+	 *
+	 * These two bits can be used to set the min scheduling policy at which
+	 * transactions on this node should run. These match the UAPI
+	 * scheduler policy values, eg:
+	 * 00b: SCHED_NORMAL
+	 * 01b: SCHED_FIFO
+	 * 10b: SCHED_RR
+	 * 11b: SCHED_BATCH
+	 */
+	FLAT_BINDER_FLAG_SCHED_POLICY_MASK =
+		3U << FLAT_BINDER_FLAG_SCHED_POLICY_SHIFT,
+
+	/**
+	 * @FLAT_BINDER_FLAG_INHERIT_RT: whether the node inherits RT policy
+	 *
+	 * Only when set, calls into this node will inherit a real-time
+	 * scheduling policy from the caller (for synchronous transactions).
+	 */
+	FLAT_BINDER_FLAG_INHERIT_RT = 0x800,
+
+	/**
+	 * @FLAT_BINDER_FLAG_TXN_SECURITY_CTX: request security contexts
+	 *
+	 * Only when set, causes senders to include their security
+	 * context
+	 */
+	FLAT_BINDER_FLAG_TXN_SECURITY_CTX = 0x1000,
 };
 
 #ifdef BINDER_IPC_32BIT
@@ -48,6 +105,14 @@
 typedef __u64 binder_uintptr_t;
 #endif
 
+/**
+ * struct binder_object_header - header shared by all binder metadata objects.
+ * @type:	type of the object
+ */
+struct binder_object_header {
+	__u32        type;
+};
+
 /*
  * This is the flattened representation of a Binder object for transfer
  * between processes.  The 'offsets' supplied as part of a binder transaction
@@ -56,9 +121,8 @@
  * between processes.
  */
 struct flat_binder_object {
-	/* 8 bytes for large_flat_header. */
-	__u32		type;
-	__u32		flags;
+	struct binder_object_header	hdr;
+	__u32				flags;
 
 	/* 8 bytes of data. */
 	union {
@@ -70,6 +134,86 @@
 	binder_uintptr_t	cookie;
 };
 
+/**
+ * struct binder_fd_object - describes a filedescriptor to be fixed up.
+ * @hdr:	common header structure
+ * @pad_flags:	padding to remain compatible with old userspace code
+ * @pad_binder:	padding to remain compatible with old userspace code
+ * @fd:		file descriptor
+ * @cookie:	opaque data, used by user-space
+ */
+struct binder_fd_object {
+	struct binder_object_header	hdr;
+	__u32				pad_flags;
+	union {
+		binder_uintptr_t	pad_binder;
+		__u32			fd;
+	};
+
+	binder_uintptr_t		cookie;
+};
+
+/* struct binder_buffer_object - object describing a userspace buffer
+ * @hdr:		common header structure
+ * @flags:		one or more BINDER_BUFFER_* flags
+ * @buffer:		address of the buffer
+ * @length:		length of the buffer
+ * @parent:		index in offset array pointing to parent buffer
+ * @parent_offset:	offset in @parent pointing to this buffer
+ *
+ * A binder_buffer object represents an object that the
+ * binder kernel driver can copy verbatim to the target
+ * address space. A buffer itself may be pointed to from
+ * within another buffer, meaning that the pointer inside
+ * that other buffer needs to be fixed up as well. This
+ * can be done by setting the BINDER_BUFFER_FLAG_HAS_PARENT
+ * flag in @flags, by setting @parent buffer to the index
+ * in the offset array pointing to the parent binder_buffer_object,
+ * and by setting @parent_offset to the offset in the parent buffer
+ * at which the pointer to this buffer is located.
+ */
+struct binder_buffer_object {
+	struct binder_object_header	hdr;
+	__u32				flags;
+	binder_uintptr_t		buffer;
+	binder_size_t			length;
+	binder_size_t			parent;
+	binder_size_t			parent_offset;
+};
+
+enum {
+	BINDER_BUFFER_FLAG_HAS_PARENT = 0x01,
+};
+
+/* struct binder_fd_array_object - object describing an array of fds in a buffer
+ * @hdr:		common header structure
+ * @pad:		padding to ensure correct alignment
+ * @num_fds:		number of file descriptors in the buffer
+ * @parent:		index in offset array to buffer holding the fd array
+ * @parent_offset:	start offset of fd array in the buffer
+ *
+ * A binder_fd_array object represents an array of file
+ * descriptors embedded in a binder_buffer_object. It is
+ * different from a regular binder_buffer_object because it
+ * describes a list of file descriptors to fix up, not an opaque
+ * blob of memory, and hence the kernel needs to treat it differently.
+ *
+ * An example of how this would be used is with Android's
+ * native_handle_t object, which is a struct with a list of integers
+ * and a list of file descriptors. The native_handle_t struct itself
+ * will be represented by a struct binder_buffer_objct, whereas the
+ * embedded list of file descriptors is represented by a
+ * struct binder_fd_array_object with that binder_buffer_object as
+ * a parent.
+ */
+struct binder_fd_array_object {
+	struct binder_object_header	hdr;
+	__u32				pad;
+	binder_size_t			num_fds;
+	binder_size_t			parent;
+	binder_size_t			parent_offset;
+};
+
 /*
  * On 64-bit platforms where user code may run in 32-bits the driver must
  * translate the buffer (and local binder) addresses appropriately.
@@ -97,6 +241,19 @@
 #define BINDER_CURRENT_PROTOCOL_VERSION 8
 #endif
 
+/*
+ * Use with BINDER_GET_NODE_DEBUG_INFO, driver reads ptr, writes to all fields.
+ * Set ptr to NULL for the first call to get the info for the first node, and
+ * then repeat the call passing the previously returned value to get the next
+ * nodes.  ptr will be 0 when there are no more nodes.
+ */
+struct binder_node_debug_info {
+	binder_uintptr_t ptr;
+	binder_uintptr_t cookie;
+	__u32            has_strong_ref;
+	__u32            has_weak_ref;
+};
+
 #define BINDER_WRITE_READ		_IOWR('b', 1, struct binder_write_read)
 #define BINDER_SET_IDLE_TIMEOUT		_IOW('b', 3, __s64)
 #define BINDER_SET_MAX_THREADS		_IOW('b', 5, __u32)
@@ -104,6 +261,8 @@
 #define BINDER_SET_CONTEXT_MGR		_IOW('b', 7, __s32)
 #define BINDER_THREAD_EXIT		_IOW('b', 8, __s32)
 #define BINDER_VERSION			_IOWR('b', 9, struct binder_version)
+#define BINDER_GET_NODE_DEBUG_INFO	_IOWR('b', 11, struct binder_node_debug_info)
+#define BINDER_SET_CONTEXT_MGR_EXT	_IOW('b', 13, struct flat_binder_object)
 
 /*
  * NOTE: Two special error codes you should check for when calling
@@ -162,6 +321,16 @@
 	} data;
 };
 
+struct binder_transaction_data_secctx {
+	struct binder_transaction_data transaction_data;
+	binder_uintptr_t secctx;
+};
+
+struct binder_transaction_data_sg {
+	struct binder_transaction_data transaction_data;
+	binder_size_t buffers_size;
+};
+
 struct binder_ptr_cookie {
 	binder_uintptr_t ptr;
 	binder_uintptr_t cookie;
@@ -192,6 +361,11 @@
 	BR_OK = _IO('r', 1),
 	/* No parameters! */
 
+	BR_TRANSACTION_SEC_CTX = _IOR('r', 2,
+				      struct binder_transaction_data_secctx),
+	/*
+	 * binder_transaction_data_secctx: the received command.
+	 */
 	BR_TRANSACTION = _IOR('r', 2, struct binder_transaction_data),
 	BR_REPLY = _IOR('r', 3, struct binder_transaction_data),
 	/*
@@ -346,6 +520,12 @@
 	/*
 	 * void *: cookie
 	 */
+
+	BC_TRANSACTION_SG = _IOW('c', 17, struct binder_transaction_data_sg),
+	BC_REPLY_SG = _IOW('c', 18, struct binder_transaction_data_sg),
+	/*
+	 * binder_transaction_data_sg: the sent command.
+	 */
 };
 
 #endif /* _UAPI_LINUX_BINDER_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f09c70b..b2d5be9 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -73,6 +73,8 @@
 	BPF_PROG_LOAD,
 	BPF_OBJ_PIN,
 	BPF_OBJ_GET,
+	BPF_PROG_ATTACH,
+	BPF_PROG_DETACH,
 };
 
 enum bpf_map_type {
@@ -96,8 +98,23 @@
 	BPF_PROG_TYPE_TRACEPOINT,
 	BPF_PROG_TYPE_XDP,
 	BPF_PROG_TYPE_PERF_EVENT,
+	BPF_PROG_TYPE_CGROUP_SKB,
 };
 
+enum bpf_attach_type {
+	BPF_CGROUP_INET_INGRESS,
+	BPF_CGROUP_INET_EGRESS,
+	__MAX_BPF_ATTACH_TYPE
+};
+
+#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
+
+/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
+ * to the given target_fd cgroup the descendent cgroup will be able to
+ * override effective bpf program that was inherited from this cgroup
+ */
+#define BPF_F_ALLOW_OVERRIDE	(1U << 0)
+
 #define BPF_PSEUDO_MAP_FD	1
 
 /* flags for BPF_MAP_UPDATE_ELEM command */
@@ -141,6 +158,13 @@
 		__aligned_u64	pathname;
 		__u32		bpf_fd;
 	};
+
+	struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
+		__u32		target_fd;	/* container object to attach to */
+		__u32		attach_bpf_fd;	/* eBPF program to attach */
+		__u32		attach_type;
+		__u32		attach_flags;
+	};
 } __attribute__((aligned(8)));
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -426,6 +450,67 @@
 	 */
 	BPF_FUNC_set_hash_invalid,
 
+	/**
+	 * int bpf_get_numa_node_id()
+	 *     Return: Id of current NUMA node.
+	 */
+	BPF_FUNC_get_numa_node_id,
+
+	/**
+	 * int bpf_skb_change_head()
+	 *     Grows headroom of skb and adjusts MAC header offset accordingly.
+	 *     Will extends/reallocae as required automatically.
+	 *     May change skb data pointer and will thus invalidate any check
+	 *     performed for direct packet access.
+	 *     @skb: pointer to skb
+	 *     @len: length of header to be pushed in front
+	 *     @flags: Flags (unused for now)
+	 *     Return: 0 on success or negative error
+	 */
+	BPF_FUNC_skb_change_head,
+
+	/**
+	 * int bpf_xdp_adjust_head(xdp_md, delta)
+	 *     Adjust the xdp_md.data by delta
+	 *     @xdp_md: pointer to xdp_md
+	 *     @delta: An positive/negative integer to be added to xdp_md.data
+	 *     Return: 0 on success or negative on error
+	 */
+	BPF_FUNC_xdp_adjust_head,
+
+	/**
+	 * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
+	 *     Copy a NUL terminated string from unsafe address. In case the string
+	 *     length is smaller than size, the target is not padded with further NUL
+	 *     bytes. In case the string length is larger than size, just count-1
+	 *     bytes are copied and the last byte is set to NUL.
+	 *     @dst: destination address
+	 *     @size: maximum number of bytes to copy, including the trailing NUL
+	 *     @unsafe_ptr: unsafe address
+	 *     Return:
+	 *       > 0 length of the string including the trailing NUL on success
+	 *       < 0 error
+	 */
+	BPF_FUNC_probe_read_str,
+
+	/**
+	 * u64 bpf_bpf_get_socket_cookie(skb)
+	 *     Get the cookie for the socket stored inside sk_buff.
+	 *     @skb: pointer to skb
+	 *     Return: 8 Bytes non-decreasing number on success or 0 if the socket
+	 *     field is missing inside sk_buff
+	 */
+	BPF_FUNC_get_socket_cookie,
+
+	/**
+	 * u32 bpf_get_socket_uid(skb)
+	 *     Get the owner uid of the socket stored inside sk_buff.
+	 *     @skb: pointer to skb
+	 *     Return: uid of the socket owner on success or 0 if the socket pointer
+	 *     inside sk_buff is NULL
+	 */
+	BPF_FUNC_get_socket_uid,
+
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 14404b3..bbf02a6 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -29,6 +29,11 @@
 	__u32		flags;
 };
 
+struct fib_rule_uid_range {
+	__u32		start;
+	__u32		end;
+};
+
 enum {
 	FRA_UNSPEC,
 	FRA_DST,	/* destination address */
@@ -51,6 +56,7 @@
 	FRA_OIFNAME,
 	FRA_PAD,
 	FRA_L3MDEV,	/* iif or oif is l3mdev goto its table */
+	FRA_UID_RANGE,	/* UID range */
 	__FRA_MAX
 };
 
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 4749955..11c0307 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -237,6 +237,8 @@
 #define FICLONERANGE	_IOW(0x94, 13, struct file_clone_range)
 #define FIDEDUPERANGE	_IOWR(0x94, 54, struct file_dedupe_range)
 
+#define FIDTRIM	_IOWR('f', 128, struct fstrim_range)	/* Deep discard trim */
+
 #define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
 #define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
 #define	FS_IOC_GETVERSION		_IOR('v', 1, long)
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 42fa977..0932378 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -375,6 +375,7 @@
 	FUSE_READDIRPLUS   = 44,
 	FUSE_RENAME2       = 45,
 	FUSE_LSEEK         = 46,
+	FUSE_CANONICAL_PATH= 2016,
 
 	/* CUSE specific operations */
 	CUSE_INIT          = 4096,
diff --git a/include/uapi/linux/hw_breakpoint.h b/include/uapi/linux/hw_breakpoint.h
index b04000a..2b65efd 100644
--- a/include/uapi/linux/hw_breakpoint.h
+++ b/include/uapi/linux/hw_breakpoint.h
@@ -4,7 +4,11 @@
 enum {
 	HW_BREAKPOINT_LEN_1 = 1,
 	HW_BREAKPOINT_LEN_2 = 2,
+	HW_BREAKPOINT_LEN_3 = 3,
 	HW_BREAKPOINT_LEN_4 = 4,
+	HW_BREAKPOINT_LEN_5 = 5,
+	HW_BREAKPOINT_LEN_6 = 6,
+	HW_BREAKPOINT_LEN_7 = 7,
 	HW_BREAKPOINT_LEN_8 = 8,
 };
 
diff --git a/include/uapi/linux/if_pppolac.h b/include/uapi/linux/if_pppolac.h
new file mode 100644
index 0000000..b7eb8153
--- /dev/null
+++ b/include/uapi/linux/if_pppolac.h
@@ -0,0 +1,33 @@
+/* include/uapi/linux/if_pppolac.h
+ *
+ * Header for PPP on L2TP Access Concentrator / PPPoLAC Socket (RFC 2661)
+ *
+ * Copyright (C) 2009 Google, Inc.
+ * Author: Chia-chi Yeh <chiachi@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _UAPI_LINUX_IF_PPPOLAC_H
+#define _UAPI_LINUX_IF_PPPOLAC_H
+
+#include <linux/socket.h>
+#include <linux/types.h>
+
+struct sockaddr_pppolac {
+	sa_family_t	sa_family;	/* AF_PPPOX */
+	unsigned int	sa_protocol;	/* PX_PROTO_OLAC */
+	int		udp_socket;
+	struct __attribute__((packed)) {
+		__u16	tunnel, session;
+	} local, remote;
+} __attribute__((packed));
+
+#endif /* _UAPI_LINUX_IF_PPPOLAC_H */
diff --git a/include/uapi/linux/if_pppopns.h b/include/uapi/linux/if_pppopns.h
new file mode 100644
index 0000000..a392b52
--- /dev/null
+++ b/include/uapi/linux/if_pppopns.h
@@ -0,0 +1,32 @@
+/* include/uapi/linux/if_pppopns.h
+ *
+ * Header for PPP on PPTP Network Server / PPPoPNS Socket (RFC 2637)
+ *
+ * Copyright (C) 2009 Google, Inc.
+ * Author: Chia-chi Yeh <chiachi@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _UAPI_LINUX_IF_PPPOPNS_H
+#define _UAPI_LINUX_IF_PPPOPNS_H
+
+#include <linux/socket.h>
+#include <linux/types.h>
+
+struct sockaddr_pppopns {
+	sa_family_t	sa_family;	/* AF_PPPOX */
+	unsigned int	sa_protocol;	/* PX_PROTO_OPNS */
+	int		tcp_socket;
+	__u16		local;
+	__u16		remote;
+} __attribute__((packed));
+
+#endif /* _UAPI_LINUX_IF_PPPOPNS_H */
diff --git a/include/uapi/linux/if_pppox.h b/include/uapi/linux/if_pppox.h
index d37bbb1..6aad18a 100644
--- a/include/uapi/linux/if_pppox.h
+++ b/include/uapi/linux/if_pppox.h
@@ -24,6 +24,8 @@
 #include <linux/if.h>
 #include <linux/if_ether.h>
 #include <linux/if_pppol2tp.h>
+#include <linux/if_pppolac.h>
+#include <linux/if_pppopns.h>
 #include <linux/in.h>
 #include <linux/in6.h>
 
@@ -59,7 +61,9 @@
 #define PX_PROTO_OE    0 /* Currently just PPPoE */
 #define PX_PROTO_OL2TP 1 /* Now L2TP also */
 #define PX_PROTO_PPTP  2
-#define PX_MAX_PROTO   3
+#define PX_PROTO_OLAC  3
+#define PX_PROTO_OPNS  4
+#define PX_MAX_PROTO   5
 
 struct sockaddr_pppox {
 	__kernel_sa_family_t sa_family;       /* address family, AF_PPPOX */
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 8c27723..c462f1d 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -164,6 +164,7 @@
 	DEVCONF_ACCEPT_DAD,
 	DEVCONF_FORCE_TLLAO,
 	DEVCONF_NDISC_NOTIFY,
+	DEVCONF_ACCEPT_RA_RT_TABLE,
 	DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL,
 	DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL,
 	DEVCONF_SUPPRESS_FRAG_NDISC,
@@ -178,6 +179,12 @@
 	DEVCONF_DROP_UNSOLICITED_NA,
 	DEVCONF_KEEP_ADDR_ON_DOWN,
 	DEVCONF_RTR_SOLICIT_MAX_INTERVAL,
+	DEVCONF_SEG6_ENABLED,
+	DEVCONF_SEG6_REQUIRE_HMAC,
+	DEVCONF_ENHANCED_DAD,
+	DEVCONF_ADDR_GEN_MODE,
+	DEVCONF_DISABLE_POLICY,
+	DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN,
 	DEVCONF_MAX
 };
 
diff --git a/include/uapi/linux/keychord.h b/include/uapi/linux/keychord.h
new file mode 100644
index 0000000..ea7cf4d
--- /dev/null
+++ b/include/uapi/linux/keychord.h
@@ -0,0 +1,52 @@
+/*
+ *  Key chord input driver
+ *
+ * Copyright (C) 2008 Google, Inc.
+ * Author: Mike Lockwood <lockwood@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+*/
+
+#ifndef _UAPI_LINUX_KEYCHORD_H_
+#define _UAPI_LINUX_KEYCHORD_H_
+
+#include <linux/input.h>
+
+#define KEYCHORD_VERSION		1
+
+/*
+ * One or more input_keychord structs are written to /dev/keychord
+ * at once to specify the list of keychords to monitor.
+ * Reading /dev/keychord returns the id of a keychord when the
+ * keychord combination is pressed.  A keychord is signalled when
+ * all of the keys in the keycode list are in the pressed state.
+ * The order in which the keys are pressed does not matter.
+ * The keychord will not be signalled if keys not in the keycode
+ * list are pressed.
+ * Keychords will not be signalled on key release events.
+ */
+struct input_keychord {
+	/* should be KEYCHORD_VERSION */
+	__u16 version;
+	/*
+	 * client specified ID, returned from read()
+	 * when this keychord is pressed.
+	 */
+	__u16 id;
+
+	/* number of keycodes in this keychord */
+	__u16 count;
+
+	/* variable length array of keycodes */
+	__u16 keycodes[];
+};
+
+#endif	/* _UAPI_LINUX_KEYCHORD_H_ */
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 9bd5594..bd01769 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -53,6 +53,8 @@
 #define REISER2FS_SUPER_MAGIC_STRING	"ReIsEr2Fs"
 #define REISER2FS_JR_SUPER_MAGIC_STRING	"ReIsEr3Fs"
 
+#define SDCARDFS_SUPER_MAGIC	0x5dca2df5
+
 #define SMB_SUPER_MAGIC		0x517B
 #define CGROUP_SUPER_MAGIC	0x27e0eb
 #define CGROUP2_SUPER_MAGIC	0x63677270
diff --git a/include/uapi/linux/netfilter/xt_IDLETIMER.h b/include/uapi/linux/netfilter/xt_IDLETIMER.h
index 208ae93..faaa28b 100644
--- a/include/uapi/linux/netfilter/xt_IDLETIMER.h
+++ b/include/uapi/linux/netfilter/xt_IDLETIMER.h
@@ -4,6 +4,7 @@
  * Header file for Xtables timer target module.
  *
  * Copyright (C) 2004, 2010 Nokia Corporation
+ *
  * Written by Timo Teras <ext-timo.teras@nokia.com>
  *
  * Converted to x_tables and forward-ported to 2.6.34
@@ -32,12 +33,19 @@
 #include <linux/types.h>
 
 #define MAX_IDLETIMER_LABEL_SIZE 28
+#define NLMSG_MAX_SIZE 64
+
+#define NL_EVENT_TYPE_INACTIVE 0
+#define NL_EVENT_TYPE_ACTIVE 1
 
 struct idletimer_tg_info {
 	__u32 timeout;
 
 	char label[MAX_IDLETIMER_LABEL_SIZE];
 
+	/* Use netlink messages for notification in addition to sysfs */
+	__u8 send_nl_msg;
+
 	/* for kernel module internal use only */
 	struct idletimer_tg *timer __attribute__((aligned(8)));
 };
diff --git a/include/uapi/linux/netfilter/xt_socket.h b/include/uapi/linux/netfilter/xt_socket.h
index 87644f8..7f00df6 100644
--- a/include/uapi/linux/netfilter/xt_socket.h
+++ b/include/uapi/linux/netfilter/xt_socket.h
@@ -26,4 +26,11 @@
 			   | XT_SOCKET_NOWILDCARD \
 			   | XT_SOCKET_RESTORESKMARK)
 
+struct sock *xt_socket_lookup_slow_v4(struct net *net,
+				      const struct sk_buff *skb,
+				      const struct net_device *indev);
+struct sock *xt_socket_lookup_slow_v6(struct net *net,
+				      const struct sk_buff *skb,
+				      const struct net_device *indev);
+
 #endif /* _XT_SOCKET_H */
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 64776b7..8b8a5e9 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -209,4 +209,7 @@
 # define PR_SPEC_DISABLE		(1UL << 2)
 # define PR_SPEC_FORCE_DISABLE		(1UL << 3)
 
+#define PR_SET_VMA		0x53564d41
+# define PR_SET_VMA_ANON_NAME		0
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 5a78be5..e14377f 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -318,6 +318,7 @@
 	RTA_ENCAP,
 	RTA_EXPIRES,
 	RTA_PAD,
+	RTA_UID,
 	__RTA_MAX
 };
 
diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h
index d2b1215..e13d480 100644
--- a/include/uapi/linux/sysctl.h
+++ b/include/uapi/linux/sysctl.h
@@ -568,6 +568,7 @@
 	NET_IPV6_PROXY_NDP=23,
 	NET_IPV6_ACCEPT_SOURCE_ROUTE=25,
 	NET_IPV6_ACCEPT_RA_FROM_LOCAL=26,
+	NET_IPV6_ACCEPT_RA_RT_INFO_MIN_PLEN=27,
 	__NET_IPV6_MAX
 };
 
diff --git a/include/uapi/linux/usb/f_accessory.h b/include/uapi/linux/usb/f_accessory.h
new file mode 100644
index 0000000..0baeb7d
--- /dev/null
+++ b/include/uapi/linux/usb/f_accessory.h
@@ -0,0 +1,146 @@
+/*
+ * Gadget Function Driver for Android USB accessories
+ *
+ * Copyright (C) 2011 Google, Inc.
+ * Author: Mike Lockwood <lockwood@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _UAPI_LINUX_USB_F_ACCESSORY_H
+#define _UAPI_LINUX_USB_F_ACCESSORY_H
+
+/* Use Google Vendor ID when in accessory mode */
+#define USB_ACCESSORY_VENDOR_ID 0x18D1
+
+
+/* Product ID to use when in accessory mode */
+#define USB_ACCESSORY_PRODUCT_ID 0x2D00
+
+/* Product ID to use when in accessory mode and adb is enabled */
+#define USB_ACCESSORY_ADB_PRODUCT_ID 0x2D01
+
+/* Indexes for strings sent by the host via ACCESSORY_SEND_STRING */
+#define ACCESSORY_STRING_MANUFACTURER   0
+#define ACCESSORY_STRING_MODEL          1
+#define ACCESSORY_STRING_DESCRIPTION    2
+#define ACCESSORY_STRING_VERSION        3
+#define ACCESSORY_STRING_URI            4
+#define ACCESSORY_STRING_SERIAL         5
+
+/* Control request for retrieving device's protocol version
+ *
+ *	requestType:    USB_DIR_IN | USB_TYPE_VENDOR
+ *	request:        ACCESSORY_GET_PROTOCOL
+ *	value:          0
+ *	index:          0
+ *	data            version number (16 bits little endian)
+ *                     1 for original accessory support
+ *                     2 adds HID and device to host audio support
+ */
+#define ACCESSORY_GET_PROTOCOL  51
+
+/* Control request for host to send a string to the device
+ *
+ *	requestType:    USB_DIR_OUT | USB_TYPE_VENDOR
+ *	request:        ACCESSORY_SEND_STRING
+ *	value:          0
+ *	index:          string ID
+ *	data            zero terminated UTF8 string
+ *
+ *  The device can later retrieve these strings via the
+ *  ACCESSORY_GET_STRING_* ioctls
+ */
+#define ACCESSORY_SEND_STRING   52
+
+/* Control request for starting device in accessory mode.
+ * The host sends this after setting all its strings to the device.
+ *
+ *	requestType:    USB_DIR_OUT | USB_TYPE_VENDOR
+ *	request:        ACCESSORY_START
+ *	value:          0
+ *	index:          0
+ *	data            none
+ */
+#define ACCESSORY_START         53
+
+/* Control request for registering a HID device.
+ * Upon registering, a unique ID is sent by the accessory in the
+ * value parameter. This ID will be used for future commands for
+ * the device
+ *
+ *	requestType:    USB_DIR_OUT | USB_TYPE_VENDOR
+ *	request:        ACCESSORY_REGISTER_HID_DEVICE
+ *	value:          Accessory assigned ID for the HID device
+ *	index:          total length of the HID report descriptor
+ *	data            none
+ */
+#define ACCESSORY_REGISTER_HID         54
+
+/* Control request for unregistering a HID device.
+ *
+ *	requestType:    USB_DIR_OUT | USB_TYPE_VENDOR
+ *	request:        ACCESSORY_REGISTER_HID
+ *	value:          Accessory assigned ID for the HID device
+ *	index:          0
+ *	data            none
+ */
+#define ACCESSORY_UNREGISTER_HID         55
+
+/* Control request for sending the HID report descriptor.
+ * If the HID descriptor is longer than the endpoint zero max packet size,
+ * the descriptor will be sent in multiple ACCESSORY_SET_HID_REPORT_DESC
+ * commands. The data for the descriptor must be sent sequentially
+ * if multiple packets are needed.
+ *
+ *	requestType:    USB_DIR_OUT | USB_TYPE_VENDOR
+ *	request:        ACCESSORY_SET_HID_REPORT_DESC
+ *	value:          Accessory assigned ID for the HID device
+ *	index:          offset of data in descriptor
+ *                      (needed when HID descriptor is too big for one packet)
+ *	data            the HID report descriptor
+ */
+#define ACCESSORY_SET_HID_REPORT_DESC         56
+
+/* Control request for sending HID events.
+ *
+ *	requestType:    USB_DIR_OUT | USB_TYPE_VENDOR
+ *	request:        ACCESSORY_SEND_HID_EVENT
+ *	value:          Accessory assigned ID for the HID device
+ *	index:          0
+ *	data            the HID report for the event
+ */
+#define ACCESSORY_SEND_HID_EVENT         57
+
+/* Control request for setting the audio mode.
+ *
+ *	requestType:	USB_DIR_OUT | USB_TYPE_VENDOR
+ *	request:        ACCESSORY_SET_AUDIO_MODE
+ *	value:          0 - no audio
+ *                     1 - device to host, 44100 16-bit stereo PCM
+ *	index:          0
+ *	data            none
+ */
+#define ACCESSORY_SET_AUDIO_MODE         58
+
+/* ioctls for retrieving strings set by the host */
+#define ACCESSORY_GET_STRING_MANUFACTURER   _IOW('M', 1, char[256])
+#define ACCESSORY_GET_STRING_MODEL          _IOW('M', 2, char[256])
+#define ACCESSORY_GET_STRING_DESCRIPTION    _IOW('M', 3, char[256])
+#define ACCESSORY_GET_STRING_VERSION        _IOW('M', 4, char[256])
+#define ACCESSORY_GET_STRING_URI            _IOW('M', 5, char[256])
+#define ACCESSORY_GET_STRING_SERIAL         _IOW('M', 6, char[256])
+/* returns 1 if there is a start request pending */
+#define ACCESSORY_IS_START_REQUESTED        _IO('M', 7)
+/* returns audio mode (set via the ACCESSORY_SET_AUDIO_MODE control request) */
+#define ACCESSORY_GET_AUDIO_MODE            _IO('M', 8)
+
+#endif /* _UAPI_LINUX_USB_F_ACCESSORY_H */
diff --git a/include/uapi/linux/usb/f_mtp.h b/include/uapi/linux/usb/f_mtp.h
new file mode 100644
index 0000000..5032918
--- /dev/null
+++ b/include/uapi/linux/usb/f_mtp.h
@@ -0,0 +1,61 @@
+/*
+ * Gadget Function Driver for MTP
+ *
+ * Copyright (C) 2010 Google, Inc.
+ * Author: Mike Lockwood <lockwood@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _UAPI_LINUX_USB_F_MTP_H
+#define _UAPI_LINUX_USB_F_MTP_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+struct mtp_file_range {
+	/* file descriptor for file to transfer */
+	int			fd;
+	/* offset in file for start of transfer */
+	loff_t		offset;
+	/* number of bytes to transfer */
+	int64_t		length;
+	/* MTP command ID for data header,
+	 * used only for MTP_SEND_FILE_WITH_HEADER
+	 */
+	uint16_t	command;
+	/* MTP transaction ID for data header,
+	 * used only for MTP_SEND_FILE_WITH_HEADER
+	 */
+	uint32_t	transaction_id;
+};
+
+struct mtp_event {
+	/* size of the event */
+	size_t		length;
+	/* event data to send */
+	void		*data;
+};
+
+/* Sends the specified file range to the host */
+#define MTP_SEND_FILE              _IOW('M', 0, struct mtp_file_range)
+/* Receives data from the host and writes it to a file.
+ * The file is created if it does not exist.
+ */
+#define MTP_RECEIVE_FILE           _IOW('M', 1, struct mtp_file_range)
+/* Sends an event to the host via the interrupt endpoint */
+#define MTP_SEND_EVENT             _IOW('M', 3, struct mtp_event)
+/* Sends the specified file range to the host,
+ * with a 12 byte MTP data packet header at the beginning.
+ */
+#define MTP_SEND_FILE_WITH_HEADER  _IOW('M', 4, struct mtp_file_range)
+
+#endif /* _UAPI_LINUX_USB_F_MTP_H */
diff --git a/init/Kconfig b/init/Kconfig
index b331fee..544d791 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -400,6 +400,15 @@
 
 	  If in doubt, say N here.
 
+config SCHED_WALT
+        bool "Support window based load tracking"
+        depends on SMP
+        help
+        This feature will allow the scheduler to maintain a tunable window
+	based set of metrics for tasks and runqueues. These metrics can be
+	used to guide task placement as well as task frequency requirements
+	for cpufreq governors.
+
 config BSD_PROCESS_ACCT
 	bool "BSD Process Accounting"
 	depends on MULTIUSER
@@ -971,6 +980,82 @@
 
 if CGROUPS
 
+config CGROUP_DEBUG
+	bool "Example debug cgroup subsystem"
+	default n
+	help
+	  This option enables a simple cgroup subsystem that
+	  exports useful debugging information about the cgroups
+	  framework.
+
+	  Say N if unsure.
+
+config CGROUP_FREEZER
+	bool "Freezer cgroup subsystem"
+	help
+	  Provides a way to freeze and unfreeze all tasks in a
+	  cgroup.
+
+config CGROUP_PIDS
+	bool "PIDs cgroup subsystem"
+	help
+	  Provides enforcement of process number limits in the scope of a
+	  cgroup. Any attempt to fork more processes than is allowed in the
+	  cgroup will fail. PIDs are fundamentally a global resource because it
+	  is fairly trivial to reach PID exhaustion before you reach even a
+	  conservative kmemcg limit. As a result, it is possible to grind a
+	  system to halt without being limited by other cgroup policies. The
+	  PIDs cgroup subsystem is designed to stop this from happening.
+
+	  It should be noted that organisational operations (such as attaching
+	  to a cgroup hierarchy will *not* be blocked by the PIDs subsystem),
+	  since the PIDs limit only affects a process's ability to fork, not to
+	  attach to a cgroup.
+
+config CGROUP_DEVICE
+	bool "Device controller for cgroups"
+	help
+	  Provides a cgroup implementing whitelists for devices which
+	  a process in the cgroup can mknod or open.
+
+config CPUSETS
+	bool "Cpuset support"
+	help
+	  This option will let you create and manage CPUSETs which
+	  allow dynamically partitioning a system into sets of CPUs and
+	  Memory Nodes and assigning tasks to run only within those sets.
+	  This is primarily useful on large SMP or NUMA systems.
+
+	  Say N if unsure.
+
+config PROC_PID_CPUSET
+	bool "Include legacy /proc/<pid>/cpuset file"
+	depends on CPUSETS
+	default y
+
+config CGROUP_CPUACCT
+	bool "Simple CPU accounting cgroup subsystem"
+	help
+	  Provides a simple Resource Controller for monitoring the
+	  total CPU consumed by the tasks in a cgroup.
+
+config CGROUP_SCHEDTUNE
+	bool "CFS tasks boosting cgroup subsystem (EXPERIMENTAL)"
+	depends on SCHED_TUNE
+	help
+	  This option provides the "schedtune" controller which improves the
+	  flexibility of the task boosting mechanism by introducing the support
+	  to define "per task" boost values.
+
+	  This new controller:
+	  1. allows only a two layers hierarchy, where the root defines the
+	     system-wide boost value and its direct childrens define each one a
+	     different "class of tasks" to be boosted with a different value
+	  2. supports up to 16 different task classes, each one which could be
+	     configured with a different boost value
+
+	  Say N if unsure.
+
 config PAGE_COUNTER
        bool
 
@@ -1154,6 +1239,19 @@
 
 	  Say N if unsure.
 
+config CGROUP_BPF
+	bool "Support for eBPF programs attached to cgroups"
+	depends on BPF_SYSCALL
+	select SOCK_CGROUP_DATA
+	help
+	  Allow attaching eBPF programs to a cgroup using the bpf(2)
+	  syscall command BPF_PROG_ATTACH.
+
+	  In which context these programs are accessed depends on the type
+	  of attachment. For instance, programs that are attached using
+	  BPF_CGROUP_INET_INGRESS will be executed on the ingress path of
+	  inet sockets.
+
 config CGROUP_DEBUG
 	bool "Example controller"
 	default n
@@ -1163,6 +1261,10 @@
 
 	  Say N.
 
+config SOCK_CGROUP_DATA
+	bool
+	default n
+
 endif # CGROUPS
 
 config CHECKPOINT_RESTORE
@@ -1248,6 +1350,43 @@
 	  desktop applications.  Task group autogeneration is currently based
 	  upon task session.
 
+config SCHED_TUNE
+	bool "Boosting for CFS tasks (EXPERIMENTAL)"
+	depends on SMP
+	help
+	  This option enables the system-wide support for task boosting.
+	  When this support is enabled a new sysctl interface is exposed to
+	  userspace via:
+	     /proc/sys/kernel/sched_cfs_boost
+	  which allows to set a system-wide boost value in range [0..100].
+
+	  The currently boosting strategy is implemented in such a way that:
+	  - a 0% boost value requires to operate in "standard" mode by
+	    scheduling all tasks at the minimum capacities required by their
+	    workload demand
+	  - a 100% boost value requires to push at maximum the task
+	    performances, "regardless" of the incurred energy consumption
+
+	  A boost value in between these two boundaries is used to bias the
+	  power/performance trade-off, the higher the boost value the more the
+	  scheduler is biased toward performance boosting instead of energy
+	  efficiency.
+
+	  Since this support exposes a single system-wide knob, the specified
+	  boost value is applied to all (CFS) tasks in the system.
+
+	  If unsure, say N.
+
+config DEFAULT_USE_ENERGY_AWARE
+	bool "Default to enabling the Energy Aware Scheduler feature"
+	default n
+	help
+	  This option defaults the ENERGY_AWARE scheduling feature to true,
+	  as without SCHED_DEBUG set this feature can't be enabled or disabled
+	  via sysctl.
+
+	  Say N if unsure.
+
 config SYSFS_DEPRECATED
 	bool "Enable deprecated sysfs features to support old userspace tools"
 	depends on SYSFS
diff --git a/init/Makefile b/init/Makefile
index c4fb455..d210b23 100644
--- a/init/Makefile
+++ b/init/Makefile
@@ -5,11 +5,8 @@
 ccflags-y := -fno-function-sections -fno-data-sections
 
 obj-y                          := main.o version.o mounts.o
-ifneq ($(CONFIG_BLK_DEV_INITRD),y)
 obj-y                          += noinitramfs.o
-else
 obj-$(CONFIG_BLK_DEV_INITRD)   += initramfs.o
-endif
 obj-$(CONFIG_GENERIC_CALIBRATE_DELAY) += calibrate.o
 
 ifneq ($(CONFIG_ARCH_INIT_TASK),y)
@@ -20,6 +17,7 @@
 mounts-$(CONFIG_BLK_DEV_RAM)	+= do_mounts_rd.o
 mounts-$(CONFIG_BLK_DEV_INITRD)	+= do_mounts_initrd.o
 mounts-$(CONFIG_BLK_DEV_MD)	+= do_mounts_md.o
+mounts-$(CONFIG_BLK_DEV_DM)	+= do_mounts_dm.o
 
 # dependencies on generated files need to be listed explicitly
 $(obj)/version.o: include/generated/compile.h
diff --git a/init/do_mounts.c b/init/do_mounts.c
index dea5de9..1902a1c8 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -566,6 +566,7 @@
 	wait_for_device_probe();
 
 	md_run_setup();
+	dm_run_setup();
 
 	if (saved_root_name[0]) {
 		root_device_name = saved_root_name;
diff --git a/init/do_mounts.h b/init/do_mounts.h
index 067af1d9e..ecb2757 100644
--- a/init/do_mounts.h
+++ b/init/do_mounts.h
@@ -74,3 +74,13 @@
 static inline void md_run_setup(void) {}
 
 #endif
+
+#ifdef CONFIG_BLK_DEV_DM
+
+void dm_run_setup(void);
+
+#else
+
+static inline void dm_run_setup(void) {}
+
+#endif
diff --git a/init/do_mounts_dm.c b/init/do_mounts_dm.c
new file mode 100644
index 0000000..af84b01
--- /dev/null
+++ b/init/do_mounts_dm.c
@@ -0,0 +1,470 @@
+/* do_mounts_dm.c
+ * Copyright (C) 2010 The Chromium OS Authors <chromium-os-dev@chromium.org>
+ *                    All Rights Reserved.
+ * Based on do_mounts_md.c
+ *
+ * This file is released under the GPL.
+ */
+#include <linux/async.h>
+#include <linux/ctype.h>
+#include <linux/device-mapper.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+
+#include "do_mounts.h"
+
+#define DM_MAX_DEVICES 256
+#define DM_MAX_TARGETS 256
+#define DM_MAX_NAME 32
+#define DM_MAX_UUID 129
+#define DM_NO_UUID "none"
+
+#define DM_MSG_PREFIX "init"
+
+/* Separators used for parsing the dm= argument. */
+#define DM_FIELD_SEP " "
+#define DM_LINE_SEP ","
+#define DM_ANY_SEP DM_FIELD_SEP DM_LINE_SEP
+
+/*
+ * When the device-mapper and any targets are compiled into the kernel
+ * (not a module), one or more device-mappers may be created and used
+ * as the root device at boot time with the parameters given with the
+ * boot line dm=...
+ *
+ * Multiple device-mappers can be stacked specifing the number of
+ * devices. A device can have multiple targets if the the number of
+ * targets is specified.
+ *
+ * TODO(taysom:defect 32847)
+ * In the future, the <num> field will be mandatory.
+ *
+ * <device>        ::= [<num>] <device-mapper>+
+ * <device-mapper> ::= <head> "," <target>+
+ * <head>          ::= <name> <uuid> <mode> [<num>]
+ * <target>        ::= <start> <length> <type> <options> ","
+ * <mode>          ::= "ro" | "rw"
+ * <uuid>          ::= xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx | "none"
+ * <type>          ::= "verity" | "bootcache" | ...
+ *
+ * Example:
+ * 2 vboot none ro 1,
+ *     0 1768000 bootcache
+ *       device=aa55b119-2a47-8c45-946a-5ac57765011f+1
+ *       signature=76e9be054b15884a9fa85973e9cb274c93afadb6
+ *       cache_start=1768000 max_blocks=100000 size_limit=23 max_trace=20000,
+ *   vroot none ro 1,
+ *     0 1740800 verity payload=254:0 hashtree=254:0 hashstart=1740800 alg=sha1
+ *       root_hexdigest=76e9be054b15884a9fa85973e9cb274c93afadb6
+ *       salt=5b3549d54d6c7a3837b9b81ed72e49463a64c03680c47835bef94d768e5646fe
+ *
+ * Notes:
+ *  1. uuid is a label for the device and we set it to "none".
+ *  2. The <num> field will be optional initially and assumed to be 1.
+ *     Once all the scripts that set these fields have been set, it will
+ *     be made mandatory.
+ */
+
+struct dm_setup_target {
+	sector_t begin;
+	sector_t length;
+	char *type;
+	char *params;
+	/* simple singly linked list */
+	struct dm_setup_target *next;
+};
+
+struct dm_device {
+	int minor;
+	int ro;
+	char name[DM_MAX_NAME];
+	char uuid[DM_MAX_UUID];
+	unsigned long num_targets;
+	struct dm_setup_target *target;
+	int target_count;
+	struct dm_device *next;
+};
+
+struct dm_option {
+	char *start;
+	char *next;
+	size_t len;
+	char delim;
+};
+
+static struct {
+	unsigned long num_devices;
+	char *str;
+} dm_setup_args __initdata;
+
+static __initdata int dm_early_setup;
+
+static int __init get_dm_option(struct dm_option *opt, const char *accept)
+{
+	char *str = opt->next;
+	char *endp;
+
+	if (!str)
+		return 0;
+
+	str = skip_spaces(str);
+	opt->start = str;
+	endp = strpbrk(str, accept);
+	if (!endp) {  /* act like strchrnul */
+		opt->len = strlen(str);
+		endp = str + opt->len;
+	} else {
+		opt->len = endp - str;
+	}
+	opt->delim = *endp;
+	if (*endp == 0) {
+		/* Don't advance past the nul. */
+		opt->next = endp;
+	} else {
+		opt->next = endp + 1;
+	}
+	return opt->len != 0;
+}
+
+static int __init dm_setup_cleanup(struct dm_device *devices)
+{
+	struct dm_device *dev = devices;
+
+	while (dev) {
+		struct dm_device *old_dev = dev;
+		struct dm_setup_target *target = dev->target;
+		while (target) {
+			struct dm_setup_target *old_target = target;
+			kfree(target->type);
+			kfree(target->params);
+			target = target->next;
+			kfree(old_target);
+			dev->target_count--;
+		}
+		BUG_ON(dev->target_count);
+		dev = dev->next;
+		kfree(old_dev);
+	}
+	return 0;
+}
+
+static char * __init dm_parse_device(struct dm_device *dev, char *str)
+{
+	struct dm_option opt;
+	size_t len;
+
+	/* Grab the logical name of the device to be exported to udev */
+	opt.next = str;
+	if (!get_dm_option(&opt, DM_FIELD_SEP)) {
+		DMERR("failed to parse device name");
+		goto parse_fail;
+	}
+	len = min(opt.len + 1, sizeof(dev->name));
+	strlcpy(dev->name, opt.start, len);  /* includes nul */
+
+	/* Grab the UUID value or "none" */
+	if (!get_dm_option(&opt, DM_FIELD_SEP)) {
+		DMERR("failed to parse device uuid");
+		goto parse_fail;
+	}
+	len = min(opt.len + 1, sizeof(dev->uuid));
+	strlcpy(dev->uuid, opt.start, len);
+
+	/* Determine if the table/device will be read only or read-write */
+	get_dm_option(&opt, DM_ANY_SEP);
+	if (!strncmp("ro", opt.start, opt.len)) {
+		dev->ro = 1;
+	} else if (!strncmp("rw", opt.start, opt.len)) {
+		dev->ro = 0;
+	} else {
+		DMERR("failed to parse table mode");
+		goto parse_fail;
+	}
+
+	/* Optional number field */
+	/* XXX: The <num> field will be mandatory in the next round */
+	if (opt.delim == DM_FIELD_SEP[0]) {
+		if (!get_dm_option(&opt, DM_LINE_SEP))
+			return NULL;
+		dev->num_targets = simple_strtoul(opt.start, NULL, 10);
+	} else {
+		dev->num_targets = 1;
+	}
+	if (dev->num_targets > DM_MAX_TARGETS) {
+		DMERR("too many targets %lu > %d",
+			dev->num_targets, DM_MAX_TARGETS);
+	}
+	return opt.next;
+
+parse_fail:
+	return NULL;
+}
+
+static char * __init dm_parse_targets(struct dm_device *dev, char *str)
+{
+	struct dm_option opt;
+	struct dm_setup_target **target = &dev->target;
+	unsigned long num_targets = dev->num_targets;
+	unsigned long i;
+
+	/* Targets are defined as per the table format but with a
+	 * comma as a newline separator. */
+	opt.next = str;
+	for (i = 0; i < num_targets; i++) {
+		*target = kzalloc(sizeof(struct dm_setup_target), GFP_KERNEL);
+		if (!*target) {
+			DMERR("failed to allocate memory for target %s<%ld>",
+				dev->name, i);
+			goto parse_fail;
+		}
+		dev->target_count++;
+
+		if (!get_dm_option(&opt, DM_FIELD_SEP)) {
+			DMERR("failed to parse starting sector"
+				" for target %s<%ld>", dev->name, i);
+			goto parse_fail;
+		}
+		(*target)->begin = simple_strtoull(opt.start, NULL, 10);
+
+		if (!get_dm_option(&opt, DM_FIELD_SEP)) {
+			DMERR("failed to parse length for target %s<%ld>",
+				dev->name, i);
+			goto parse_fail;
+		}
+		(*target)->length = simple_strtoull(opt.start, NULL, 10);
+
+		if (get_dm_option(&opt, DM_FIELD_SEP))
+			(*target)->type = kstrndup(opt.start, opt.len,
+							GFP_KERNEL);
+		if (!((*target)->type)) {
+			DMERR("failed to parse type for target %s<%ld>",
+				dev->name, i);
+			goto parse_fail;
+		}
+		if (get_dm_option(&opt, DM_LINE_SEP))
+			(*target)->params = kstrndup(opt.start, opt.len,
+							GFP_KERNEL);
+		if (!((*target)->params)) {
+			DMERR("failed to parse params for target %s<%ld>",
+				dev->name, i);
+			goto parse_fail;
+		}
+		target = &((*target)->next);
+	}
+	DMDEBUG("parsed %d targets", dev->target_count);
+
+	return opt.next;
+
+parse_fail:
+	return NULL;
+}
+
+static struct dm_device * __init dm_parse_args(void)
+{
+	struct dm_device *devices = NULL;
+	struct dm_device **tail = &devices;
+	struct dm_device *dev;
+	char *str = dm_setup_args.str;
+	unsigned long num_devices = dm_setup_args.num_devices;
+	unsigned long i;
+
+	if (!str)
+		return NULL;
+	for (i = 0; i < num_devices; i++) {
+		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+		if (!dev) {
+			DMERR("failed to allocated memory for dev");
+			goto error;
+		}
+		*tail = dev;
+		tail = &dev->next;
+		/*
+		 * devices are given minor numbers 0 - n-1
+		 * in the order they are found in the arg
+		 * string.
+		 */
+		dev->minor = i;
+		str = dm_parse_device(dev, str);
+		if (!str)	/* NULL indicates error in parsing, bail */
+			goto error;
+
+		str = dm_parse_targets(dev, str);
+		if (!str)
+			goto error;
+	}
+	return devices;
+error:
+	dm_setup_cleanup(devices);
+	return NULL;
+}
+
+/*
+ * Parse the command-line parameters given our kernel, but do not
+ * actually try to invoke the DM device now; that is handled by
+ * dm_setup_drives after the low-level disk drivers have initialised.
+ * dm format is described at the top of the file.
+ *
+ * Because dm minor numbers are assigned in assending order starting with 0,
+ * You can assume the first device is /dev/dm-0, the next device is /dev/dm-1,
+ * and so forth.
+ */
+static int __init dm_setup(char *str)
+{
+	struct dm_option opt;
+	unsigned long num_devices;
+
+	if (!str) {
+		DMDEBUG("str is NULL");
+		goto parse_fail;
+	}
+	opt.next = str;
+	if (!get_dm_option(&opt, DM_FIELD_SEP))
+		goto parse_fail;
+	if (isdigit(opt.start[0])) {	/* XXX: Optional number field */
+		num_devices = simple_strtoul(opt.start, NULL, 10);
+		str = opt.next;
+	} else {
+		num_devices = 1;
+		/* Don't advance str */
+	}
+	if (num_devices > DM_MAX_DEVICES) {
+		DMDEBUG("too many devices %lu > %d",
+			num_devices, DM_MAX_DEVICES);
+	}
+	dm_setup_args.str = str;
+	dm_setup_args.num_devices = num_devices;
+	DMINFO("will configure %lu devices", num_devices);
+	dm_early_setup = 1;
+	return 1;
+
+parse_fail:
+	DMWARN("Invalid arguments supplied to dm=.");
+	return 0;
+}
+
+static void __init dm_setup_drives(void)
+{
+	struct mapped_device *md = NULL;
+	struct dm_table *table = NULL;
+	struct dm_setup_target *target;
+	struct dm_device *dev;
+	char *uuid;
+	fmode_t fmode = FMODE_READ;
+	struct dm_device *devices;
+
+	devices = dm_parse_args();
+
+	for (dev = devices; dev; dev = dev->next) {
+		if (dm_create(dev->minor, &md)) {
+			DMDEBUG("failed to create the device");
+			goto dm_create_fail;
+		}
+		DMDEBUG("created device '%s'", dm_device_name(md));
+
+		/*
+		 * In addition to flagging the table below, the disk must be
+		 * set explicitly ro/rw.
+		 */
+		set_disk_ro(dm_disk(md), dev->ro);
+
+		if (!dev->ro)
+			fmode |= FMODE_WRITE;
+		if (dm_table_create(&table, fmode, dev->target_count, md)) {
+			DMDEBUG("failed to create the table");
+			goto dm_table_create_fail;
+		}
+
+		dm_lock_md_type(md);
+
+		for (target = dev->target; target; target = target->next) {
+			DMINFO("adding target '%llu %llu %s %s'",
+			       (unsigned long long) target->begin,
+			       (unsigned long long) target->length,
+			       target->type, target->params);
+			if (dm_table_add_target(table, target->type,
+						target->begin,
+						target->length,
+						target->params)) {
+				DMDEBUG("failed to add the target"
+					" to the table");
+				goto add_target_fail;
+			}
+		}
+		if (dm_table_complete(table)) {
+			DMDEBUG("failed to complete the table");
+			goto table_complete_fail;
+		}
+
+		/* Suspend the device so that we can bind it to the table. */
+		if (dm_suspend(md, 0)) {
+			DMDEBUG("failed to suspend the device pre-bind");
+			goto suspend_fail;
+		}
+
+		/* Initial table load: acquire type of table. */
+		dm_set_md_type(md, dm_table_get_type(table));
+
+		/* Setup md->queue to reflect md's type. */
+		if (dm_setup_md_queue(md, table)) {
+			DMWARN("unable to set up device queue for new table.");
+			goto setup_md_queue_fail;
+		}
+
+		/*
+		 * Bind the table to the device. This is the only way
+		 * to associate md->map with the table and set the disk
+		 * capacity directly.
+		 */
+		if (dm_swap_table(md, table)) {  /* should return NULL. */
+			DMDEBUG("failed to bind the device to the table");
+			goto table_bind_fail;
+		}
+
+		/* Finally, resume and the device should be ready. */
+		if (dm_resume(md)) {
+			DMDEBUG("failed to resume the device");
+			goto resume_fail;
+		}
+
+		/* Export the dm device via the ioctl interface */
+		if (!strcmp(DM_NO_UUID, dev->uuid))
+			uuid = NULL;
+		if (dm_ioctl_export(md, dev->name, uuid)) {
+			DMDEBUG("failed to export device with given"
+				" name and uuid");
+			goto export_fail;
+		}
+
+		dm_unlock_md_type(md);
+
+		DMINFO("dm-%d is ready", dev->minor);
+	}
+	dm_setup_cleanup(devices);
+	return;
+
+export_fail:
+resume_fail:
+table_bind_fail:
+setup_md_queue_fail:
+suspend_fail:
+table_complete_fail:
+add_target_fail:
+	dm_unlock_md_type(md);
+dm_table_create_fail:
+	dm_put(md);
+dm_create_fail:
+	DMWARN("starting dm-%d (%s) failed",
+	       dev->minor, dev->name);
+	dm_setup_cleanup(devices);
+}
+
+__setup("dm=", dm_setup);
+
+void __init dm_run_setup(void)
+{
+	if (!dm_early_setup)
+		return;
+	DMINFO("attempting early device configuration.");
+	dm_setup_drives();
+}
diff --git a/init/initramfs.c b/init/initramfs.c
index 981f286..d0b53f49 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -18,6 +18,7 @@
 #include <linux/dirent.h>
 #include <linux/syscalls.h>
 #include <linux/utime.h>
+#include <linux/initramfs.h>
 #include <linux/file.h>
 
 static ssize_t __init xwrite(int fd, const char *p, size_t count)
@@ -606,9 +607,25 @@
 }
 #endif
 
+static int __initdata do_skip_initramfs;
+
+static int __init skip_initramfs_param(char *str)
+{
+	if (*str)
+		return 0;
+	do_skip_initramfs = 1;
+	return 1;
+}
+__setup("skip_initramfs", skip_initramfs_param);
+
 static int __init populate_rootfs(void)
 {
-	char *err = unpack_to_rootfs(__initramfs_start, __initramfs_size);
+	char *err;
+
+	if (do_skip_initramfs)
+		return default_rootfs();
+
+	err = unpack_to_rootfs(__initramfs_start, __initramfs_size);
 	if (err)
 		panic("%s", err); /* Failed to decompress INTERNAL initramfs */
 	if (initrd_start) {
diff --git a/init/noinitramfs.c b/init/noinitramfs.c
index 267739d8..bcc8bcb0 100644
--- a/init/noinitramfs.c
+++ b/init/noinitramfs.c
@@ -21,11 +21,16 @@
 #include <linux/stat.h>
 #include <linux/kdev_t.h>
 #include <linux/syscalls.h>
+#include <linux/kconfig.h>
+#include <linux/initramfs.h>
 
 /*
  * Create a simple rootfs that is similar to the default initramfs
  */
-static int __init default_rootfs(void)
+#if !IS_BUILTIN(CONFIG_BLK_DEV_INITRD)
+static
+#endif
+int __init default_rootfs(void)
 {
 	int err;
 
@@ -49,4 +54,6 @@
 	printk(KERN_WARNING "Failed to create a rootfs\n");
 	return err;
 }
+#if !IS_BUILTIN(CONFIG_BLK_DEV_INITRD)
 rootfs_initcall(default_rootfs);
+#endif
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 28a142f..02fb438 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -745,7 +745,7 @@
 	}
 
 	mode &= ~current_umask();
-	ret = vfs_create(dir, path->dentry, mode, true);
+	ret = vfs_create2(path->mnt, dir, path->dentry, mode, true);
 	path->dentry->d_fsdata = NULL;
 	if (ret)
 		return ERR_PTR(ret);
@@ -761,7 +761,7 @@
 	if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
 		return ERR_PTR(-EINVAL);
 	acc = oflag2acc[oflag & O_ACCMODE];
-	if (inode_permission(d_inode(path->dentry), acc))
+	if (inode_permission2(path->mnt, d_inode(path->dentry), acc))
 		return ERR_PTR(-EACCES);
 	return dentry_open(path, oflag, current_cred());
 }
@@ -794,7 +794,7 @@
 	ro = mnt_want_write(mnt);	/* we'll drop it in any case */
 	error = 0;
 	inode_lock(d_inode(root));
-	path.dentry = lookup_one_len(name->name, root, strlen(name->name));
+	path.dentry = lookup_one_len2(name->name, mnt, root, strlen(name->name));
 	if (IS_ERR(path.dentry)) {
 		error = PTR_ERR(path.dentry);
 		goto out_putfd;
@@ -865,7 +865,7 @@
 	if (err)
 		goto out_name;
 	inode_lock_nested(d_inode(mnt->mnt_root), I_MUTEX_PARENT);
-	dentry = lookup_one_len(name->name, mnt->mnt_root,
+	dentry = lookup_one_len2(name->name, mnt, mnt->mnt_root,
 				strlen(name->name));
 	if (IS_ERR(dentry)) {
 		err = PTR_ERR(dentry);
@@ -877,7 +877,7 @@
 		err = -ENOENT;
 	} else {
 		ihold(inode);
-		err = vfs_unlink(d_inode(dentry->d_parent), dentry, NULL);
+		err = vfs_unlink2(mnt, d_inode(dentry->d_parent), dentry, NULL);
 	}
 	dput(dentry);
 
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index eed911d..b22256b 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -5,3 +5,4 @@
 ifeq ($(CONFIG_PERF_EVENTS),y)
 obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
 endif
+obj-$(CONFIG_CGROUP_BPF) += cgroup.o
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
new file mode 100644
index 0000000..a44a7e4
--- /dev/null
+++ b/kernel/bpf/cgroup.c
@@ -0,0 +1,205 @@
+/*
+ * Functions to manage eBPF programs attached to cgroups
+ *
+ * Copyright (c) 2016 Daniel Mack
+ *
+ * This file is subject to the terms and conditions of version 2 of the GNU
+ * General Public License.  See the file COPYING in the main directory of the
+ * Linux distribution for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/atomic.h>
+#include <linux/cgroup.h>
+#include <linux/slab.h>
+#include <linux/bpf.h>
+#include <linux/bpf-cgroup.h>
+#include <net/sock.h>
+
+DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
+EXPORT_SYMBOL(cgroup_bpf_enabled_key);
+
+/**
+ * cgroup_bpf_put() - put references of all bpf programs
+ * @cgrp: the cgroup to modify
+ */
+void cgroup_bpf_put(struct cgroup *cgrp)
+{
+	unsigned int type;
+
+	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.prog); type++) {
+		struct bpf_prog *prog = cgrp->bpf.prog[type];
+
+		if (prog) {
+			bpf_prog_put(prog);
+			static_branch_dec(&cgroup_bpf_enabled_key);
+		}
+	}
+}
+
+/**
+ * cgroup_bpf_inherit() - inherit effective programs from parent
+ * @cgrp: the cgroup to modify
+ * @parent: the parent to inherit from
+ */
+void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent)
+{
+	unsigned int type;
+
+	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.effective); type++) {
+		struct bpf_prog *e;
+
+		e = rcu_dereference_protected(parent->bpf.effective[type],
+					      lockdep_is_held(&cgroup_mutex));
+		rcu_assign_pointer(cgrp->bpf.effective[type], e);
+		cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type];
+	}
+}
+
+/**
+ * __cgroup_bpf_update() - Update the pinned program of a cgroup, and
+ *                         propagate the change to descendants
+ * @cgrp: The cgroup which descendants to traverse
+ * @parent: The parent of @cgrp, or %NULL if @cgrp is the root
+ * @prog: A new program to pin
+ * @type: Type of pinning operation (ingress/egress)
+ *
+ * Each cgroup has a set of two pointers for bpf programs; one for eBPF
+ * programs it owns, and which is effective for execution.
+ *
+ * If @prog is not %NULL, this function attaches a new program to the cgroup
+ * and releases the one that is currently attached, if any. @prog is then made
+ * the effective program of type @type in that cgroup.
+ *
+ * If @prog is %NULL, the currently attached program of type @type is released,
+ * and the effective program of the parent cgroup (if any) is inherited to
+ * @cgrp.
+ *
+ * Then, the descendants of @cgrp are walked and the effective program for
+ * each of them is set to the effective program of @cgrp unless the
+ * descendant has its own program attached, in which case the subbranch is
+ * skipped. This ensures that delegated subcgroups with own programs are left
+ * untouched.
+ *
+ * Must be called with cgroup_mutex held.
+ */
+int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
+			struct bpf_prog *prog, enum bpf_attach_type type,
+			bool new_overridable)
+{
+	struct bpf_prog *old_prog, *effective = NULL;
+	struct cgroup_subsys_state *pos;
+	bool overridable = true;
+
+	if (parent) {
+		overridable = !parent->bpf.disallow_override[type];
+		effective = rcu_dereference_protected(parent->bpf.effective[type],
+						      lockdep_is_held(&cgroup_mutex));
+	}
+
+	if (prog && effective && !overridable)
+		/* if parent has non-overridable prog attached, disallow
+		 * attaching new programs to descendent cgroup
+		 */
+		return -EPERM;
+
+	if (prog && effective && overridable != new_overridable)
+		/* if parent has overridable prog attached, only
+		 * allow overridable programs in descendent cgroup
+		 */
+		return -EPERM;
+
+	old_prog = cgrp->bpf.prog[type];
+
+	if (prog) {
+		overridable = new_overridable;
+		effective = prog;
+		if (old_prog &&
+		    cgrp->bpf.disallow_override[type] == new_overridable)
+			/* disallow attaching non-overridable on top
+			 * of existing overridable in this cgroup
+			 * and vice versa
+			 */
+			return -EPERM;
+	}
+
+	if (!prog && !old_prog)
+		/* report error when trying to detach and nothing is attached */
+		return -ENOENT;
+
+	cgrp->bpf.prog[type] = prog;
+
+	css_for_each_descendant_pre(pos, &cgrp->self) {
+		struct cgroup *desc = container_of(pos, struct cgroup, self);
+
+		/* skip the subtree if the descendant has its own program */
+		if (desc->bpf.prog[type] && desc != cgrp) {
+			pos = css_rightmost_descendant(pos);
+		} else {
+			rcu_assign_pointer(desc->bpf.effective[type],
+					   effective);
+			desc->bpf.disallow_override[type] = !overridable;
+		}
+	}
+
+	if (prog)
+		static_branch_inc(&cgroup_bpf_enabled_key);
+
+	if (old_prog) {
+		bpf_prog_put(old_prog);
+		static_branch_dec(&cgroup_bpf_enabled_key);
+	}
+	return 0;
+}
+
+/**
+ * __cgroup_bpf_run_filter() - Run a program for packet filtering
+ * @sk: The socket sending or receiving traffic
+ * @skb: The skb that is being sent or received
+ * @type: The type of program to be exectuted
+ *
+ * If no socket is passed, or the socket is not of type INET or INET6,
+ * this function does nothing and returns 0.
+ *
+ * The program type passed in via @type must be suitable for network
+ * filtering. No further check is performed to assert that.
+ *
+ * This function will return %-EPERM if any if an attached program was found
+ * and if it returned != 1 during execution. In all other cases, 0 is returned.
+ */
+int __cgroup_bpf_run_filter(struct sock *sk,
+			    struct sk_buff *skb,
+			    enum bpf_attach_type type)
+{
+	struct bpf_prog *prog;
+	struct cgroup *cgrp;
+	int ret = 0;
+
+	if (!sk || !sk_fullsock(sk))
+		return 0;
+
+	if (sk->sk_family != AF_INET &&
+	    sk->sk_family != AF_INET6)
+		return 0;
+
+	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+
+	rcu_read_lock();
+
+	prog = rcu_dereference(cgrp->bpf.effective[type]);
+	if (prog) {
+		unsigned int offset = skb->data - skb_network_header(skb);
+		struct sock *save_sk = skb->sk;
+
+		skb->sk = sk;
+		__skb_push(skb, offset);
+		ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM;
+		__skb_pull(skb, offset);
+		skb->sk = save_sk;
+	}
+
+	rcu_read_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL(__cgroup_bpf_run_filter);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ca7e277..4eb48f4 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -726,7 +726,9 @@
 	    attr->kern_version != LINUX_VERSION_CODE)
 		return -EINVAL;
 
-	if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN))
+	if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
+	    type != BPF_PROG_TYPE_CGROUP_SKB &&
+	    !capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
 	/* plain bpf_prog allocation */
@@ -800,6 +802,85 @@
 	return bpf_obj_get_user(u64_to_ptr(attr->pathname));
 }
 
+#ifdef CONFIG_CGROUP_BPF
+
+#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
+
+static int bpf_prog_attach(const union bpf_attr *attr)
+{
+	struct bpf_prog *prog;
+	struct cgroup *cgrp;
+	int ret;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (CHECK_ATTR(BPF_PROG_ATTACH))
+		return -EINVAL;
+
+	if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE)
+		return -EINVAL;
+
+	switch (attr->attach_type) {
+	case BPF_CGROUP_INET_INGRESS:
+	case BPF_CGROUP_INET_EGRESS:
+		prog = bpf_prog_get_type(attr->attach_bpf_fd,
+					 BPF_PROG_TYPE_CGROUP_SKB);
+		if (IS_ERR(prog))
+			return PTR_ERR(prog);
+
+		cgrp = cgroup_get_from_fd(attr->target_fd);
+		if (IS_ERR(cgrp)) {
+			bpf_prog_put(prog);
+			return PTR_ERR(cgrp);
+		}
+
+		ret = cgroup_bpf_update(cgrp, prog, attr->attach_type,
+					attr->attach_flags & BPF_F_ALLOW_OVERRIDE);
+		if (ret)
+			bpf_prog_put(prog);
+		cgroup_put(cgrp);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return ret;
+}
+
+#define BPF_PROG_DETACH_LAST_FIELD attach_type
+
+static int bpf_prog_detach(const union bpf_attr *attr)
+{
+	struct cgroup *cgrp;
+	int ret;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (CHECK_ATTR(BPF_PROG_DETACH))
+		return -EINVAL;
+
+	switch (attr->attach_type) {
+	case BPF_CGROUP_INET_INGRESS:
+	case BPF_CGROUP_INET_EGRESS:
+		cgrp = cgroup_get_from_fd(attr->target_fd);
+		if (IS_ERR(cgrp))
+			return PTR_ERR(cgrp);
+
+		ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false);
+		cgroup_put(cgrp);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return ret;
+}
+#endif /* CONFIG_CGROUP_BPF */
+
 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 {
 	union bpf_attr attr = {};
@@ -866,6 +947,16 @@
 	case BPF_OBJ_GET:
 		err = bpf_obj_get(&attr);
 		break;
+
+#ifdef CONFIG_CGROUP_BPF
+	case BPF_PROG_ATTACH:
+		err = bpf_prog_attach(&attr);
+		break;
+	case BPF_PROG_DETACH:
+		err = bpf_prog_detach(&attr);
+		break;
+#endif
+
 	default:
 		err = -EINVAL;
 		break;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index bb0cf1c..2ebd03d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2856,7 +2856,8 @@
 	 */
 	if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
 	    !uid_eq(cred->euid, tcred->uid) &&
-	    !uid_eq(cred->euid, tcred->suid))
+	    !uid_eq(cred->euid, tcred->suid) &&
+	    !ns_capable(tcred->user_ns, CAP_SYS_NICE))
 		ret = -EACCES;
 
 	if (!ret && cgroup_on_dfl(dst_cgrp)) {
@@ -5079,6 +5080,8 @@
 		if (cgrp->kn)
 			RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
 					 NULL);
+
+		cgroup_bpf_put(cgrp);
 	}
 
 	mutex_unlock(&cgroup_mutex);
@@ -5291,6 +5294,9 @@
 	if (!cgroup_on_dfl(cgrp))
 		cgrp->subtree_control = cgroup_control(cgrp);
 
+	if (parent)
+		cgroup_bpf_inherit(cgrp, parent);
+
 	cgroup_propagate_control(cgrp);
 
 	return cgrp;
@@ -6506,6 +6512,20 @@
 }
 subsys_initcall(cgroup_namespaces_init);
 
+#ifdef CONFIG_CGROUP_BPF
+int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog,
+		      enum bpf_attach_type type, bool overridable)
+{
+	struct cgroup *parent = cgroup_parent(cgrp);
+	int ret;
+
+	mutex_lock(&cgroup_mutex);
+	ret = __cgroup_bpf_update(cgrp, parent, prog, type, overridable);
+	mutex_unlock(&cgroup_mutex);
+	return ret;
+}
+#endif /* CONFIG_CGROUP_BPF */
+
 #ifdef CONFIG_CGROUP_DEBUG
 static struct cgroup_subsys_state *
 debug_css_alloc(struct cgroup_subsys_state *parent_css)
diff --git a/kernel/configs/README.android b/kernel/configs/README.android
new file mode 100644
index 0000000..2e2d7c0
--- /dev/null
+++ b/kernel/configs/README.android
@@ -0,0 +1,15 @@
+The android-*.config files in this directory are meant to be used as a base
+for an Android kernel config. All devices should have the options in
+android-base.config enabled. While not mandatory, the options in
+android-recommended.config enable advanced Android features.
+
+Assuming you already have a minimalist defconfig for your device, a possible
+way to enable these options would be:
+
+     ARCH=<arch> scripts/kconfig/merge_config.sh <path_to>/<device>_defconfig kernel/configs/android-base.config kernel/configs/android-recommended.config
+
+This will generate a .config that can then be used to save a new defconfig or
+compile a new kernel with Android features enabled.
+
+Because there is no tool to consistently generate these config fragments,
+lets keep them alphabetically sorted instead of random.
diff --git a/kernel/configs/android-base-arm64.cfg b/kernel/configs/android-base-arm64.cfg
new file mode 100644
index 0000000..43f23d6
--- /dev/null
+++ b/kernel/configs/android-base-arm64.cfg
@@ -0,0 +1,5 @@
+#  KEEP ALPHABETICALLY SORTED
+CONFIG_ARMV8_DEPRECATED=y
+CONFIG_CP15_BARRIER_EMULATION=y
+CONFIG_SETEND_EMULATION=y
+CONFIG_SWP_EMULATION=y
diff --git a/kernel/configs/android-base.config b/kernel/configs/android-base.config
index 1a8f34f..a393f83 100644
--- a/kernel/configs/android-base.config
+++ b/kernel/configs/android-base.config
@@ -1,27 +1,32 @@
 #  KEEP ALPHABETICALLY SORTED
 # CONFIG_DEVKMEM is not set
 # CONFIG_DEVMEM is not set
+# CONFIG_FHANDLE is not set
 # CONFIG_INET_LRO is not set
-# CONFIG_MODULES is not set
+# CONFIG_NFSD is not set
+# CONFIG_NFS_FS is not set
 # CONFIG_OABI_COMPAT is not set
 # CONFIG_SYSVIPC is not set
+# CONFIG_USELIB is not set
 CONFIG_ANDROID=y
+CONFIG_ANDROID_BINDER_DEVICES=binder,hwbinder,vndbinder
 CONFIG_ANDROID_BINDER_IPC=y
 CONFIG_ANDROID_LOW_MEMORY_KILLER=y
-CONFIG_ARMV8_DEPRECATED=y
 CONFIG_ASHMEM=y
 CONFIG_AUDIT=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_CGROUPS=y
 CONFIG_CGROUP_CPUACCT=y
-CONFIG_CGROUP_DEBUG=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_SCHED=y
-CONFIG_CP15_BARRIER_EMULATION=y
+CONFIG_CGROUP_BPF=y
 CONFIG_DEFAULT_SECURITY_SELINUX=y
 CONFIG_EMBEDDED=y
 CONFIG_FB=y
+CONFIG_HARDENED_USERCOPY=y
 CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
 CONFIG_INET6_AH=y
 CONFIG_INET6_ESP=y
 CONFIG_INET6_IPCOMP=y
@@ -59,10 +64,12 @@
 CONFIG_IP_NF_TARGET_NETMAP=y
 CONFIG_IP_NF_TARGET_REDIRECT=y
 CONFIG_IP_NF_TARGET_REJECT=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
 CONFIG_NET=y
 CONFIG_NETDEVICES=y
 CONFIG_NETFILTER=y
-CONFIG_NETFILTER_TPROXY=y
 CONFIG_NETFILTER_XT_MATCH_COMMENT=y
 CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y
 CONFIG_NETFILTER_XT_MATCH_CONNMARK=y
@@ -76,6 +83,8 @@
 CONFIG_NETFILTER_XT_MATCH_MARK=y
 CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y
 CONFIG_NETFILTER_XT_MATCH_POLICY=y
+CONFIG_NETFILTER_XT_MATCH_QTAGUID=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA2=y
 CONFIG_NETFILTER_XT_MATCH_QUOTA=y
 CONFIG_NETFILTER_XT_MATCH_SOCKET=y
 CONFIG_NETFILTER_XT_MATCH_STATE=y
@@ -124,26 +133,33 @@
 CONFIG_PM_AUTOSLEEP=y
 CONFIG_PM_WAKELOCKS=y
 CONFIG_PPP=y
+CONFIG_PPPOLAC=y
+CONFIG_PPPOPNS=y
 CONFIG_PPP_BSDCOMP=y
 CONFIG_PPP_DEFLATE=y
 CONFIG_PPP_MPPE=y
 CONFIG_PREEMPT=y
-CONFIG_QUOTA=y
+CONFIG_PROFILING=y
+CONFIG_RANDOMIZE_BASE=y
 CONFIG_RTC_CLASS=y
 CONFIG_RT_GROUP_SCHED=y
 CONFIG_SECCOMP=y
 CONFIG_SECURITY=y
 CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y
 CONFIG_SECURITY_SELINUX=y
-CONFIG_SETEND_EMULATION=y
 CONFIG_STAGING=y
-CONFIG_SWP_EMULATION=y
 CONFIG_SYNC=y
 CONFIG_TUN=y
+CONFIG_UID_SYS_STATS=y
 CONFIG_UNIX=y
-CONFIG_USB_GADGET=y
 CONFIG_USB_CONFIGFS=y
+CONFIG_USB_CONFIGFS_F_ACC=y
+CONFIG_USB_CONFIGFS_F_AUDIO_SRC=y
 CONFIG_USB_CONFIGFS_F_FS=y
 CONFIG_USB_CONFIGFS_F_MIDI=y
-CONFIG_USB_OTG_WAKELOCK=y
+CONFIG_USB_CONFIGFS_F_MTP=y
+CONFIG_USB_CONFIGFS_F_PTP=y
+CONFIG_USB_CONFIGFS_UEVENT=y
+CONFIG_USB_GADGET=y
 CONFIG_XFRM_USER=y
diff --git a/kernel/configs/android-recommended.config b/kernel/configs/android-recommended.config
index 297756b..f3f7c1c 100644
--- a/kernel/configs/android-recommended.config
+++ b/kernel/configs/android-recommended.config
@@ -1,16 +1,20 @@
 #  KEEP ALPHABETICALLY SORTED
+# CONFIG_AIO is not set
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_NF_CONNTRACK_SIP is not set
 # CONFIG_PM_WAKELOCKS_GC is not set
 # CONFIG_VT is not set
+CONFIG_ARM64_SW_TTBR0_PAN=y
 CONFIG_BACKLIGHT_LCD_SUPPORT=y
 CONFIG_BLK_DEV_DM=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=8192
+CONFIG_CC_STACKPROTECTOR_STRONG=y
 CONFIG_COMPACTION=y
+CONFIG_CPU_SW_DOMAIN_PAN=y
 CONFIG_DEBUG_RODATA=y
 CONFIG_DM_CRYPT=y
 CONFIG_DM_UEVENT=y
@@ -75,6 +79,8 @@
 CONFIG_INPUT_EVDEV=y
 CONFIG_INPUT_GPIO=y
 CONFIG_INPUT_JOYSTICK=y
+CONFIG_INPUT_KEYCHORD=y
+CONFIG_INPUT_KEYRESET=y
 CONFIG_INPUT_MISC=y
 CONFIG_INPUT_TABLET=y
 CONFIG_INPUT_UINPUT=y
@@ -89,6 +95,7 @@
 CONFIG_LOGITECH_FF=y
 CONFIG_MD=y
 CONFIG_MEDIA_SUPPORT=y
+CONFIG_MEMORY_STATE_TIME=y
 CONFIG_MSDOS_FS=y
 CONFIG_PANIC_TIMEOUT=5
 CONFIG_PANTHERLORD_FF=y
@@ -100,6 +107,11 @@
 CONFIG_PSTORE=y
 CONFIG_PSTORE_CONSOLE=y
 CONFIG_PSTORE_RAM=y
+CONFIG_QFMT_V2=y
+CONFIG_QUOTA=y
+CONFIG_QUOTACTL=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+CONFIG_QUOTA_TREE=y
 CONFIG_SCHEDSTATS=y
 CONFIG_SMARTJOYPLUS_FF=y
 CONFIG_SND=y
diff --git a/kernel/cpu.c b/kernel/cpu.c
index bf24e840..0b1a62b3 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1278,6 +1278,7 @@
 void enable_nonboot_cpus(void)
 {
 	int cpu, error;
+	struct device *cpu_device;
 
 	/* Allow everyone to use the CPU hotplug again */
 	cpu_maps_update_begin();
@@ -1295,6 +1296,12 @@
 		trace_suspend_resume(TPS("CPU_ON"), cpu, false);
 		if (!error) {
 			pr_info("CPU%d is up\n", cpu);
+			cpu_device = get_cpu_device(cpu);
+			if (!cpu_device)
+				pr_err("%s: failed to get cpu%d device\n",
+				       __func__, cpu);
+			else
+				kobject_uevent(&cpu_device->kobj, KOBJ_ONLINE);
 			continue;
 		}
 		pr_warn("Error taking CPU%d up: %d\n", cpu, error);
@@ -2222,3 +2229,23 @@
 #endif
 	this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
 }
+
+static ATOMIC_NOTIFIER_HEAD(idle_notifier);
+
+void idle_notifier_register(struct notifier_block *n)
+{
+	atomic_notifier_chain_register(&idle_notifier, n);
+}
+EXPORT_SYMBOL_GPL(idle_notifier_register);
+
+void idle_notifier_unregister(struct notifier_block *n)
+{
+	atomic_notifier_chain_unregister(&idle_notifier, n);
+}
+EXPORT_SYMBOL_GPL(idle_notifier_unregister);
+
+void idle_notifier_call_chain(unsigned long val)
+{
+	atomic_notifier_call_chain(&idle_notifier, val, NULL);
+}
+EXPORT_SYMBOL_GPL(idle_notifier_call_chain);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 511b1dd..194e2f2 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -100,6 +100,7 @@
 
 	/* user-configured CPUs and Memory Nodes allow to tasks */
 	cpumask_var_t cpus_allowed;
+	cpumask_var_t cpus_requested;
 	nodemask_t mems_allowed;
 
 	/* effective CPUs and Memory Nodes allow to tasks */
@@ -399,7 +400,7 @@
 
 static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
 {
-	return	cpumask_subset(p->cpus_allowed, q->cpus_allowed) &&
+	return	cpumask_subset(p->cpus_requested, q->cpus_requested) &&
 		nodes_subset(p->mems_allowed, q->mems_allowed) &&
 		is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
 		is_mem_exclusive(p) <= is_mem_exclusive(q);
@@ -499,7 +500,7 @@
 	cpuset_for_each_child(c, css, par) {
 		if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
 		    c != cur &&
-		    cpumask_intersects(trial->cpus_allowed, c->cpus_allowed))
+		    cpumask_intersects(trial->cpus_requested, c->cpus_requested))
 			goto out;
 		if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
 		    c != cur &&
@@ -958,17 +959,18 @@
 	if (!*buf) {
 		cpumask_clear(trialcs->cpus_allowed);
 	} else {
-		retval = cpulist_parse(buf, trialcs->cpus_allowed);
+		retval = cpulist_parse(buf, trialcs->cpus_requested);
 		if (retval < 0)
 			return retval;
 
-		if (!cpumask_subset(trialcs->cpus_allowed,
-				    top_cpuset.cpus_allowed))
+		if (!cpumask_subset(trialcs->cpus_requested, cpu_present_mask))
 			return -EINVAL;
+
+		cpumask_and(trialcs->cpus_allowed, trialcs->cpus_requested, cpu_active_mask);
 	}
 
 	/* Nothing to do if the cpus didn't change */
-	if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed))
+	if (cpumask_equal(cs->cpus_requested, trialcs->cpus_requested))
 		return 0;
 
 	retval = validate_change(cs, trialcs);
@@ -977,6 +979,7 @@
 
 	spin_lock_irq(&callback_lock);
 	cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
+	cpumask_copy(cs->cpus_requested, trialcs->cpus_requested);
 	spin_unlock_irq(&callback_lock);
 
 	/* use trialcs->cpus_allowed as a temp variable */
@@ -1761,7 +1764,7 @@
 
 	switch (type) {
 	case FILE_CPULIST:
-		seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed));
+		seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_requested));
 		break;
 	case FILE_MEMLIST:
 		seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed));
@@ -1951,11 +1954,14 @@
 		return ERR_PTR(-ENOMEM);
 	if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL))
 		goto free_cs;
+	if (!alloc_cpumask_var(&cs->cpus_requested, GFP_KERNEL))
+		goto free_allowed;
 	if (!alloc_cpumask_var(&cs->effective_cpus, GFP_KERNEL))
-		goto free_cpus;
+		goto free_requested;
 
 	set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
 	cpumask_clear(cs->cpus_allowed);
+	cpumask_clear(cs->cpus_requested);
 	nodes_clear(cs->mems_allowed);
 	cpumask_clear(cs->effective_cpus);
 	nodes_clear(cs->effective_mems);
@@ -1964,7 +1970,9 @@
 
 	return &cs->css;
 
-free_cpus:
+free_requested:
+	free_cpumask_var(cs->cpus_requested);
+free_allowed:
 	free_cpumask_var(cs->cpus_allowed);
 free_cs:
 	kfree(cs);
@@ -2027,6 +2035,7 @@
 	cs->mems_allowed = parent->mems_allowed;
 	cs->effective_mems = parent->mems_allowed;
 	cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
+	cpumask_copy(cs->cpus_requested, parent->cpus_requested);
 	cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
 	spin_unlock_irq(&callback_lock);
 out_unlock:
@@ -2061,6 +2070,7 @@
 
 	free_cpumask_var(cs->effective_cpus);
 	free_cpumask_var(cs->cpus_allowed);
+	free_cpumask_var(cs->cpus_requested);
 	kfree(cs);
 }
 
@@ -2125,8 +2135,11 @@
 		BUG();
 	if (!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL))
 		BUG();
+	if (!alloc_cpumask_var(&top_cpuset.cpus_requested, GFP_KERNEL))
+		BUG();
 
 	cpumask_setall(top_cpuset.cpus_allowed);
+	cpumask_setall(top_cpuset.cpus_requested);
 	nodes_setall(top_cpuset.mems_allowed);
 	cpumask_setall(top_cpuset.effective_cpus);
 	nodes_setall(top_cpuset.effective_mems);
@@ -2260,7 +2273,7 @@
 		goto retry;
 	}
 
-	cpumask_and(&new_cpus, cs->cpus_allowed, parent_cs(cs)->effective_cpus);
+	cpumask_and(&new_cpus, cs->cpus_requested, parent_cs(cs)->effective_cpus);
 	nodes_and(new_mems, cs->mems_allowed, parent_cs(cs)->effective_mems);
 
 	cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus);
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index cc892a9..d3c5b15 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -216,7 +216,7 @@
 	int i;
 	int diag, dtab_count;
 	int key, buf_size, ret;
-
+	static int last_crlf;
 
 	diag = kdbgetintenv("DTABCOUNT", &dtab_count);
 	if (diag)
@@ -237,6 +237,9 @@
 		return buffer;
 	if (key != 9)
 		tab = 0;
+	if (key != 10 && key != 13)
+		last_crlf = 0;
+
 	switch (key) {
 	case 8: /* backspace */
 		if (cp > buffer) {
@@ -254,7 +257,12 @@
 			*cp = tmp;
 		}
 		break;
-	case 13: /* enter */
+	case 10: /* new line */
+	case 13: /* carriage return */
+		/* handle \n after \r */
+		if (last_crlf && last_crlf != key)
+			break;
+		last_crlf = key;
 		*lastchar++ = '\n';
 		*lastchar++ = '\0';
 		if (!KDB_STATE(KGDB_TRANS)) {
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5cbb2ed..6671ed9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -389,8 +389,13 @@
  *   0 - disallow raw tracepoint access for unpriv
  *   1 - disallow cpu events for unpriv
  *   2 - disallow kernel profiling for unpriv
+ *   3 - disallow all unpriv perf event use
  */
+#ifdef CONFIG_SECURITY_PERF_EVENTS_RESTRICT
+int sysctl_perf_event_paranoid __read_mostly = 3;
+#else
 int sysctl_perf_event_paranoid __read_mostly = 2;
+#endif
 
 /* Minimum for 512 kiB + 1 user control page */
 int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */
@@ -9686,6 +9691,9 @@
 	if (flags & ~PERF_FLAG_ALL)
 		return -EINVAL;
 
+	if (perf_paranoid_any() && !capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
 	err = perf_copy_attr(attr_uptr, &attr);
 	if (err)
 		return err;
diff --git a/kernel/exit.c b/kernel/exit.c
index d9394fc..a4959643 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -55,6 +55,8 @@
 #include <linux/shm.h>
 #include <linux/kcov.h>
 
+#include "sched/tune.h"
+
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/pgtable.h>
@@ -783,6 +785,9 @@
 	}
 
 	exit_signals(tsk);  /* sets PF_EXITING */
+
+	schedtune_exit_task(tsk);
+
 	/*
 	 * Ensure that all new tsk->pi_lock acquisitions must observe
 	 * PF_EXITING. Serializes against futex.c:attach_to_pi_owner().
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index dd2b5a4..826b733 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -1,6 +1,7 @@
 config SUSPEND
 	bool "Suspend to RAM and standby"
 	depends on ARCH_SUSPEND_POSSIBLE
+	select RTC_LIB
 	default y
 	---help---
 	  Allow the system to enter sleep states in which main memory is
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index eb4f717..80578f2 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -14,3 +14,5 @@
 obj-$(CONFIG_PM_WAKELOCKS)	+= wakelock.o
 
 obj-$(CONFIG_MAGIC_SYSRQ)	+= poweroff.o
+
+obj-$(CONFIG_SUSPEND)	+= wakeup_reason.o
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 8ea24de..9a12c83 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -18,6 +18,7 @@
 #include <linux/workqueue.h>
 #include <linux/kmod.h>
 #include <trace/events/power.h>
+#include <linux/wakeup_reason.h>
 #include <linux/cpuset.h>
 
 /*
@@ -35,6 +36,9 @@
 	unsigned int elapsed_msecs;
 	bool wakeup = false;
 	int sleep_usecs = USEC_PER_MSEC;
+#ifdef CONFIG_PM_SLEEP
+	char suspend_abort[MAX_SUSPEND_ABORT_LEN];
+#endif
 
 	start = ktime_get_boottime();
 
@@ -64,6 +68,11 @@
 			break;
 
 		if (pm_wakeup_pending()) {
+#ifdef CONFIG_PM_SLEEP
+			pm_get_active_wakeup_sources(suspend_abort,
+				MAX_SUSPEND_ABORT_LEN);
+			log_suspend_abort_reason(suspend_abort);
+#endif
 			wakeup = true;
 			break;
 		}
@@ -82,26 +91,27 @@
 	elapsed = ktime_sub(end, start);
 	elapsed_msecs = ktime_to_ms(elapsed);
 
-	if (todo) {
+	if (wakeup) {
 		pr_cont("\n");
-		pr_err("Freezing of tasks %s after %d.%03d seconds "
-		       "(%d tasks refusing to freeze, wq_busy=%d):\n",
-		       wakeup ? "aborted" : "failed",
+		pr_err("Freezing of tasks aborted after %d.%03d seconds",
+		       elapsed_msecs / 1000, elapsed_msecs % 1000);
+	} else if (todo) {
+		pr_cont("\n");
+		pr_err("Freezing of tasks failed after %d.%03d seconds"
+		       " (%d tasks refusing to freeze, wq_busy=%d):\n",
 		       elapsed_msecs / 1000, elapsed_msecs % 1000,
 		       todo - wq_busy, wq_busy);
 
 		if (wq_busy)
 			show_workqueue_state();
 
-		if (!wakeup) {
-			read_lock(&tasklist_lock);
-			for_each_process_thread(g, p) {
-				if (p != current && !freezer_should_skip(p)
-				    && freezing(p) && !frozen(p))
-					sched_show_task(p);
-			}
-			read_unlock(&tasklist_lock);
+		read_lock(&tasklist_lock);
+		for_each_process_thread(g, p) {
+			if (p != current && !freezer_should_skip(p)
+			    && freezing(p) && !frozen(p))
+				sched_show_task(p);
 		}
+		read_unlock(&tasklist_lock);
 	} else {
 		pr_cont("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000,
 			elapsed_msecs % 1000);
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 6ccb08f..2d0c99b 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -26,9 +26,11 @@
 #include <linux/suspend.h>
 #include <linux/syscore_ops.h>
 #include <linux/ftrace.h>
+#include <linux/rtc.h>
 #include <trace/events/power.h>
 #include <linux/compiler.h>
 #include <linux/moduleparam.h>
+#include <linux/wakeup_reason.h>
 
 #include "power.h"
 
@@ -322,7 +324,8 @@
  */
 static int suspend_enter(suspend_state_t state, bool *wakeup)
 {
-	int error;
+	char suspend_abort[MAX_SUSPEND_ABORT_LEN];
+	int error, last_dev;
 
 	error = platform_suspend_prepare(state);
 	if (error)
@@ -330,7 +333,11 @@
 
 	error = dpm_suspend_late(PMSG_SUSPEND);
 	if (error) {
+		last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1;
+		last_dev %= REC_FAILED_NUM;
 		pr_err("PM: late suspend of devices failed\n");
+		log_suspend_abort_reason("%s device failed to power down",
+			suspend_stats.failed_devs[last_dev]);
 		goto Platform_finish;
 	}
 	error = platform_suspend_prepare_late(state);
@@ -339,7 +346,11 @@
 
 	error = dpm_suspend_noirq(PMSG_SUSPEND);
 	if (error) {
+		last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1;
+		last_dev %= REC_FAILED_NUM;
 		pr_err("PM: noirq suspend of devices failed\n");
+		log_suspend_abort_reason("noirq suspend of %s device failed",
+			suspend_stats.failed_devs[last_dev]);
 		goto Platform_early_resume;
 	}
 	error = platform_suspend_prepare_noirq(state);
@@ -363,8 +374,10 @@
 	}
 
 	error = disable_nonboot_cpus();
-	if (error || suspend_test(TEST_CPUS))
+	if (error || suspend_test(TEST_CPUS)) {
+		log_suspend_abort_reason("Disabling non-boot cpus failed");
 		goto Enable_cpus;
+	}
 
 	arch_suspend_disable_irqs();
 	BUG_ON(!irqs_disabled());
@@ -380,6 +393,9 @@
 				state, false);
 			events_check_enabled = false;
 		} else if (*wakeup) {
+			pm_get_active_wakeup_sources(suspend_abort,
+				MAX_SUSPEND_ABORT_LEN);
+			log_suspend_abort_reason(suspend_abort);
 			error = -EBUSY;
 		}
 		syscore_resume();
@@ -427,6 +443,7 @@
 	error = dpm_suspend_start(PMSG_SUSPEND);
 	if (error) {
 		pr_err("PM: Some devices failed to suspend, or early wake event detected\n");
+		log_suspend_abort_reason("Some devices failed to suspend, or early wake event detected");
 		goto Recover_platform;
 	}
 	suspend_test_finish("suspend devices");
@@ -527,6 +544,18 @@
 	return error;
 }
 
+static void pm_suspend_marker(char *annotation)
+{
+	struct timespec ts;
+	struct rtc_time tm;
+
+	getnstimeofday(&ts);
+	rtc_time_to_tm(ts.tv_sec, &tm);
+	pr_info("PM: suspend %s %d-%02d-%02d %02d:%02d:%02d.%09lu UTC\n",
+		annotation, tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
+		tm.tm_hour, tm.tm_min, tm.tm_sec, ts.tv_nsec);
+}
+
 /**
  * pm_suspend - Externally visible function for suspending the system.
  * @state: System sleep state to enter.
@@ -541,6 +570,7 @@
 	if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX)
 		return -EINVAL;
 
+	pm_suspend_marker("entry");
 	error = enter_state(state);
 	if (error) {
 		suspend_stats.fail++;
@@ -548,6 +578,7 @@
 	} else {
 		suspend_stats.success++;
 	}
+	pm_suspend_marker("exit");
 	return error;
 }
 EXPORT_SYMBOL(pm_suspend);
diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c
new file mode 100644
index 0000000..252611f
--- /dev/null
+++ b/kernel/power/wakeup_reason.c
@@ -0,0 +1,225 @@
+/*
+ * kernel/power/wakeup_reason.c
+ *
+ * Logs the reasons which caused the kernel to resume from
+ * the suspend mode.
+ *
+ * Copyright (C) 2014 Google, Inc.
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/wakeup_reason.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/notifier.h>
+#include <linux/suspend.h>
+
+
+#define MAX_WAKEUP_REASON_IRQS 32
+static int irq_list[MAX_WAKEUP_REASON_IRQS];
+static int irqcount;
+static bool suspend_abort;
+static char abort_reason[MAX_SUSPEND_ABORT_LEN];
+static struct kobject *wakeup_reason;
+static DEFINE_SPINLOCK(resume_reason_lock);
+
+static ktime_t last_monotime; /* monotonic time before last suspend */
+static ktime_t curr_monotime; /* monotonic time after last suspend */
+static ktime_t last_stime; /* monotonic boottime offset before last suspend */
+static ktime_t curr_stime; /* monotonic boottime offset after last suspend */
+
+static ssize_t last_resume_reason_show(struct kobject *kobj, struct kobj_attribute *attr,
+		char *buf)
+{
+	int irq_no, buf_offset = 0;
+	struct irq_desc *desc;
+	spin_lock(&resume_reason_lock);
+	if (suspend_abort) {
+		buf_offset = sprintf(buf, "Abort: %s", abort_reason);
+	} else {
+		for (irq_no = 0; irq_no < irqcount; irq_no++) {
+			desc = irq_to_desc(irq_list[irq_no]);
+			if (desc && desc->action && desc->action->name)
+				buf_offset += sprintf(buf + buf_offset, "%d %s\n",
+						irq_list[irq_no], desc->action->name);
+			else
+				buf_offset += sprintf(buf + buf_offset, "%d\n",
+						irq_list[irq_no]);
+		}
+	}
+	spin_unlock(&resume_reason_lock);
+	return buf_offset;
+}
+
+static ssize_t last_suspend_time_show(struct kobject *kobj,
+			struct kobj_attribute *attr, char *buf)
+{
+	struct timespec sleep_time;
+	struct timespec total_time;
+	struct timespec suspend_resume_time;
+
+	/*
+	 * total_time is calculated from monotonic bootoffsets because
+	 * unlike CLOCK_MONOTONIC it include the time spent in suspend state.
+	 */
+	total_time = ktime_to_timespec(ktime_sub(curr_stime, last_stime));
+
+	/*
+	 * suspend_resume_time is calculated as monotonic (CLOCK_MONOTONIC)
+	 * time interval before entering suspend and post suspend.
+	 */
+	suspend_resume_time = ktime_to_timespec(ktime_sub(curr_monotime, last_monotime));
+
+	/* sleep_time = total_time - suspend_resume_time */
+	sleep_time = timespec_sub(total_time, suspend_resume_time);
+
+	/* Export suspend_resume_time and sleep_time in pair here. */
+	return sprintf(buf, "%lu.%09lu %lu.%09lu\n",
+				suspend_resume_time.tv_sec, suspend_resume_time.tv_nsec,
+				sleep_time.tv_sec, sleep_time.tv_nsec);
+}
+
+static struct kobj_attribute resume_reason = __ATTR_RO(last_resume_reason);
+static struct kobj_attribute suspend_time = __ATTR_RO(last_suspend_time);
+
+static struct attribute *attrs[] = {
+	&resume_reason.attr,
+	&suspend_time.attr,
+	NULL,
+};
+static struct attribute_group attr_group = {
+	.attrs = attrs,
+};
+
+/*
+ * logs all the wake up reasons to the kernel
+ * stores the irqs to expose them to the userspace via sysfs
+ */
+void log_wakeup_reason(int irq)
+{
+	struct irq_desc *desc;
+	desc = irq_to_desc(irq);
+	if (desc && desc->action && desc->action->name)
+		printk(KERN_INFO "Resume caused by IRQ %d, %s\n", irq,
+				desc->action->name);
+	else
+		printk(KERN_INFO "Resume caused by IRQ %d\n", irq);
+
+	spin_lock(&resume_reason_lock);
+	if (irqcount == MAX_WAKEUP_REASON_IRQS) {
+		spin_unlock(&resume_reason_lock);
+		printk(KERN_WARNING "Resume caused by more than %d IRQs\n",
+				MAX_WAKEUP_REASON_IRQS);
+		return;
+	}
+
+	irq_list[irqcount++] = irq;
+	spin_unlock(&resume_reason_lock);
+}
+
+int check_wakeup_reason(int irq)
+{
+	int irq_no;
+	int ret = false;
+
+	spin_lock(&resume_reason_lock);
+	for (irq_no = 0; irq_no < irqcount; irq_no++)
+		if (irq_list[irq_no] == irq) {
+			ret = true;
+			break;
+	}
+	spin_unlock(&resume_reason_lock);
+	return ret;
+}
+
+void log_suspend_abort_reason(const char *fmt, ...)
+{
+	va_list args;
+
+	spin_lock(&resume_reason_lock);
+
+	//Suspend abort reason has already been logged.
+	if (suspend_abort) {
+		spin_unlock(&resume_reason_lock);
+		return;
+	}
+
+	suspend_abort = true;
+	va_start(args, fmt);
+	vsnprintf(abort_reason, MAX_SUSPEND_ABORT_LEN, fmt, args);
+	va_end(args);
+	spin_unlock(&resume_reason_lock);
+}
+
+/* Detects a suspend and clears all the previous wake up reasons*/
+static int wakeup_reason_pm_event(struct notifier_block *notifier,
+		unsigned long pm_event, void *unused)
+{
+	switch (pm_event) {
+	case PM_SUSPEND_PREPARE:
+		spin_lock(&resume_reason_lock);
+		irqcount = 0;
+		suspend_abort = false;
+		spin_unlock(&resume_reason_lock);
+		/* monotonic time since boot */
+		last_monotime = ktime_get();
+		/* monotonic time since boot including the time spent in suspend */
+		last_stime = ktime_get_boottime();
+		break;
+	case PM_POST_SUSPEND:
+		/* monotonic time since boot */
+		curr_monotime = ktime_get();
+		/* monotonic time since boot including the time spent in suspend */
+		curr_stime = ktime_get_boottime();
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block wakeup_reason_pm_notifier_block = {
+	.notifier_call = wakeup_reason_pm_event,
+};
+
+/* Initializes the sysfs parameter
+ * registers the pm_event notifier
+ */
+int __init wakeup_reason_init(void)
+{
+	int retval;
+
+	retval = register_pm_notifier(&wakeup_reason_pm_notifier_block);
+	if (retval)
+		printk(KERN_WARNING "[%s] failed to register PM notifier %d\n",
+				__func__, retval);
+
+	wakeup_reason = kobject_create_and_add("wakeup_reasons", kernel_kobj);
+	if (!wakeup_reason) {
+		printk(KERN_WARNING "[%s] failed to create a sysfs kobject\n",
+				__func__);
+		return 1;
+	}
+	retval = sysfs_create_group(wakeup_reason, &attr_group);
+	if (retval) {
+		kobject_put(wakeup_reason);
+		printk(KERN_WARNING "[%s] failed to create a sysfs group %d\n",
+				__func__, retval);
+	}
+	return 0;
+}
+
+late_initcall(wakeup_reason_init);
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 27adaaa..b6e193d 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -56,6 +56,10 @@
 #include "braille.h"
 #include "internal.h"
 
+#ifdef CONFIG_EARLY_PRINTK_DIRECT
+extern void printascii(char *);
+#endif
+
 int console_printk[4] = {
 	CONSOLE_LOGLEVEL_DEFAULT,	/* console_loglevel */
 	MESSAGE_LOGLEVEL_DEFAULT,	/* default_message_loglevel */
@@ -1875,6 +1879,10 @@
 		}
 	}
 
+#ifdef CONFIG_EARLY_PRINTK_DIRECT
+	printascii(text);
+#endif
+
 	if (level == LOGLEVEL_DEFAULT)
 		level = default_message_loglevel;
 
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 5e59b83..b687747 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -18,10 +18,13 @@
 obj-y += core.o loadavg.o clock.o cputime.o
 obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
 obj-y += wait.o swait.o completion.o idle.o
-obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o
+obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o energy.o
+obj-$(CONFIG_SCHED_WALT) += walt.o
 obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
 obj-$(CONFIG_SCHEDSTATS) += stats.o
 obj-$(CONFIG_SCHED_DEBUG) += debug.o
+obj-$(CONFIG_SCHED_TUNE) += tune.o
 obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
 obj-$(CONFIG_CPU_FREQ) += cpufreq.o
+obj-$(CONFIG_CPU_FREQ_GOV_SCHED) += cpufreq_sched.o
 obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 6b3fff6..0b6635d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -90,6 +90,7 @@
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/sched.h>
+#include "walt.h"
 
 DEFINE_MUTEX(sched_domains_mutex);
 DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
@@ -156,6 +157,18 @@
 /* cpus with isolated domains */
 cpumask_var_t cpu_isolated_map;
 
+struct rq *
+lock_rq_of(struct task_struct *p, struct rq_flags *flags)
+{
+	return task_rq_lock(p, flags);
+}
+
+void
+unlock_rq_of(struct rq *rq, struct task_struct *p, struct rq_flags *flags)
+{
+	task_rq_unlock(rq, p, flags);
+}
+
 /*
  * this_rq_lock - lock this runqueue and disable interrupts.
  */
@@ -998,7 +1011,9 @@
 
 	p->on_rq = TASK_ON_RQ_MIGRATING;
 	dequeue_task(rq, p, 0);
+	double_lock_balance(rq, cpu_rq(new_cpu));
 	set_task_cpu(p, new_cpu);
+	double_unlock_balance(rq, cpu_rq(new_cpu));
 	raw_spin_unlock(&rq->lock);
 
 	rq = cpu_rq(new_cpu);
@@ -1254,6 +1269,8 @@
 			p->sched_class->migrate_task_rq(p);
 		p->se.nr_migrations++;
 		perf_event_task_migrate(p);
+
+		walt_fixup_busy_time(p, new_cpu);
 	}
 
 	__set_task_cpu(p, new_cpu);
@@ -2010,6 +2027,10 @@
 {
 	unsigned long flags;
 	int cpu, success = 0;
+#ifdef CONFIG_SMP
+	struct rq *rq;
+	u64 wallclock;
+#endif
 
 	/*
 	 * If we are going to wake up a thread waiting for CONDITION we
@@ -2083,14 +2104,24 @@
 	 */
 	smp_cond_load_acquire(&p->on_cpu, !VAL);
 
+	rq = cpu_rq(task_cpu(p));
+
+	raw_spin_lock(&rq->lock);
+	wallclock = walt_ktime_clock();
+	walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
+	walt_update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
+	raw_spin_unlock(&rq->lock);
+
 	p->sched_contributes_to_load = !!task_contributes_to_load(p);
 	p->state = TASK_WAKING;
 
 	cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
+
 	if (task_cpu(p) != cpu) {
 		wake_flags |= WF_MIGRATED;
 		set_task_cpu(p, cpu);
 	}
+
 #endif /* CONFIG_SMP */
 
 	ttwu_queue(p, cpu, wake_flags);
@@ -2140,8 +2171,13 @@
 
 	trace_sched_waking(p);
 
-	if (!task_on_rq_queued(p))
+	if (!task_on_rq_queued(p)) {
+		u64 wallclock = walt_ktime_clock();
+
+		walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
+		walt_update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
 		ttwu_activate(rq, p, ENQUEUE_WAKEUP);
+	}
 
 	ttwu_do_wakeup(rq, p, 0, cookie);
 	ttwu_stat(p, smp_processor_id(), 0);
@@ -2207,6 +2243,7 @@
 	p->se.nr_migrations		= 0;
 	p->se.vruntime			= 0;
 	INIT_LIST_HEAD(&p->se.group_node);
+	walt_init_new_task_load(p);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	p->se.cfs_rq			= NULL;
@@ -2566,6 +2603,9 @@
 	struct rq *rq;
 
 	raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
+
+	walt_init_new_task_load(p);
+
 	p->state = TASK_RUNNING;
 #ifdef CONFIG_SMP
 	/*
@@ -2581,7 +2621,9 @@
 	rq = __task_rq_lock(p, &rf);
 	post_init_entity_util_avg(&p->se);
 
-	activate_task(rq, p, 0);
+	walt_mark_task_starting(p);
+
+	activate_task(rq, p, ENQUEUE_WAKEUP_NEW);
 	p->on_rq = TASK_ON_RQ_QUEUED;
 	trace_sched_wakeup_new(p);
 	check_preempt_curr(rq, p, WF_FORK);
@@ -2966,6 +3008,36 @@
 	return atomic_read(&this->nr_iowait);
 }
 
+#ifdef CONFIG_CPU_QUIET
+u64 nr_running_integral(unsigned int cpu)
+{
+	unsigned int seqcnt;
+	u64 integral;
+	struct rq *q;
+
+	if (cpu >= nr_cpu_ids)
+		return 0;
+
+	q = cpu_rq(cpu);
+
+	/*
+	 * Update average to avoid reading stalled value if there were
+	 * no run-queue changes for a long time. On the other hand if
+	 * the changes are happening right now, just read current value
+	 * directly.
+	 */
+
+	seqcnt = read_seqcount_begin(&q->ave_seqcnt);
+	integral = do_nr_running_integral(q);
+	if (read_seqcount_retry(&q->ave_seqcnt, seqcnt)) {
+		read_seqcount_begin(&q->ave_seqcnt);
+		integral = q->nr_running_integral;
+	}
+
+	return integral;
+}
+#endif
+
 void get_iowait_load(unsigned long *nr_waiters, unsigned long *load)
 {
 	struct rq *rq = this_rq();
@@ -3070,6 +3142,94 @@
 	return ns;
 }
 
+#ifdef CONFIG_CPU_FREQ_GOV_SCHED
+
+static inline
+unsigned long add_capacity_margin(unsigned long cpu_capacity)
+{
+	cpu_capacity  = cpu_capacity * capacity_margin;
+	cpu_capacity /= SCHED_CAPACITY_SCALE;
+	return cpu_capacity;
+}
+
+static inline
+unsigned long sum_capacity_reqs(unsigned long cfs_cap,
+				struct sched_capacity_reqs *scr)
+{
+	unsigned long total = add_capacity_margin(cfs_cap + scr->rt);
+	return total += scr->dl;
+}
+
+unsigned long boosted_cpu_util(int cpu);
+static void sched_freq_tick_pelt(int cpu)
+{
+	unsigned long cpu_utilization = boosted_cpu_util(cpu);
+	unsigned long capacity_curr = capacity_curr_of(cpu);
+	struct sched_capacity_reqs *scr;
+
+	scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
+	if (sum_capacity_reqs(cpu_utilization, scr) < capacity_curr)
+		return;
+
+	/*
+	 * To make free room for a task that is building up its "real"
+	 * utilization and to harm its performance the least, request
+	 * a jump to a higher OPP as soon as the margin of free capacity
+	 * is impacted (specified by capacity_margin).
+	 */
+	set_cfs_cpu_capacity(cpu, true, cpu_utilization);
+}
+
+#ifdef CONFIG_SCHED_WALT
+static void sched_freq_tick_walt(int cpu)
+{
+	unsigned long cpu_utilization = cpu_util(cpu);
+	unsigned long capacity_curr = capacity_curr_of(cpu);
+
+	if (walt_disabled || !sysctl_sched_use_walt_cpu_util)
+		return sched_freq_tick_pelt(cpu);
+
+	/*
+	 * Add a margin to the WALT utilization.
+	 * NOTE: WALT tracks a single CPU signal for all the scheduling
+	 * classes, thus this margin is going to be added to the DL class as
+	 * well, which is something we do not do in sched_freq_tick_pelt case.
+	 */
+	cpu_utilization = add_capacity_margin(cpu_utilization);
+	if (cpu_utilization <= capacity_curr)
+		return;
+
+	/*
+	 * It is likely that the load is growing so we
+	 * keep the added margin in our request as an
+	 * extra boost.
+	 */
+	set_cfs_cpu_capacity(cpu, true, cpu_utilization);
+
+}
+#define _sched_freq_tick(cpu) sched_freq_tick_walt(cpu)
+#else
+#define _sched_freq_tick(cpu) sched_freq_tick_pelt(cpu)
+#endif /* CONFIG_SCHED_WALT */
+
+static void sched_freq_tick(int cpu)
+{
+	unsigned long capacity_orig, capacity_curr;
+
+	if (!sched_freq())
+		return;
+
+	capacity_orig = capacity_orig_of(cpu);
+	capacity_curr = capacity_curr_of(cpu);
+	if (capacity_curr == capacity_orig)
+		return;
+
+	_sched_freq_tick(cpu);
+}
+#else
+static inline void sched_freq_tick(int cpu) { }
+#endif /* CONFIG_CPU_FREQ_GOV_SCHED */
+
 /*
  * This function gets called by the timer code, with HZ frequency.
  * We call it with interrupts disabled.
@@ -3083,10 +3243,14 @@
 	sched_clock_tick();
 
 	raw_spin_lock(&rq->lock);
+	walt_set_window_start(rq);
 	update_rq_clock(rq);
 	curr->sched_class->task_tick(rq, curr, 0);
 	cpu_load_update_active(rq);
+	walt_update_task_ravg(rq->curr, rq, TASK_UPDATE,
+			walt_ktime_clock(), 0);
 	calc_global_load_tick(rq);
+	sched_freq_tick(cpu);
 	raw_spin_unlock(&rq->lock);
 
 	perf_event_task_tick();
@@ -3339,6 +3503,7 @@
 	struct pin_cookie cookie;
 	struct rq *rq;
 	int cpu;
+	u64 wallclock;
 
 	cpu = smp_processor_id();
 	rq = cpu_rq(cpu);
@@ -3391,6 +3556,9 @@
 		update_rq_clock(rq);
 
 	next = pick_next_task(rq, prev, cookie);
+	wallclock = walt_ktime_clock();
+	walt_update_task_ravg(prev, rq, PUT_PREV_TASK, wallclock, 0);
+	walt_update_task_ravg(next, rq, PICK_NEXT_TASK, wallclock, 0);
 	clear_tsk_need_resched(prev);
 	clear_preempt_need_resched();
 	rq->clock_skip_update = 0;
@@ -5666,9 +5834,6 @@
 
 	if (!(sd->flags & SD_LOAD_BALANCE)) {
 		printk("does not load-balance\n");
-		if (sd->parent)
-			printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain"
-					" has parent");
 		return -1;
 	}
 
@@ -5710,7 +5875,7 @@
 		printk(KERN_CONT " %*pbl",
 		       cpumask_pr_args(sched_group_cpus(group)));
 		if (group->sgc->capacity != SCHED_CAPACITY_SCALE) {
-			printk(KERN_CONT " (cpu_capacity = %d)",
+			printk(KERN_CONT " (cpu_capacity = %lu)",
 				group->sgc->capacity);
 		}
 
@@ -5763,8 +5928,12 @@
 
 static int sd_degenerate(struct sched_domain *sd)
 {
-	if (cpumask_weight(sched_domain_span(sd)) == 1)
-		return 1;
+	if (cpumask_weight(sched_domain_span(sd)) == 1) {
+		if (sd->groups->sge)
+			sd->flags &= ~SD_LOAD_BALANCE;
+		else
+			return 1;
+	}
 
 	/* Following flags need at least 2 groups */
 	if (sd->flags & (SD_LOAD_BALANCE |
@@ -5774,7 +5943,8 @@
 			 SD_SHARE_CPUCAPACITY |
 			 SD_ASYM_CPUCAPACITY |
 			 SD_SHARE_PKG_RESOURCES |
-			 SD_SHARE_POWERDOMAIN)) {
+			 SD_SHARE_POWERDOMAIN |
+			 SD_SHARE_CAP_STATES)) {
 		if (sd->groups != sd->groups->next)
 			return 0;
 	}
@@ -5807,7 +5977,12 @@
 				SD_SHARE_CPUCAPACITY |
 				SD_SHARE_PKG_RESOURCES |
 				SD_PREFER_SIBLING |
-				SD_SHARE_POWERDOMAIN);
+				SD_SHARE_POWERDOMAIN |
+				SD_SHARE_CAP_STATES);
+		if (parent->groups->sge) {
+			parent->flags &= ~SD_LOAD_BALANCE;
+			return 0;
+		}
 		if (nr_node_ids == 1)
 			pflags &= ~SD_SERIALIZE;
 	}
@@ -5905,6 +6080,11 @@
 
 	if (cpupri_init(&rd->cpupri) != 0)
 		goto free_rto_mask;
+
+	init_max_cpu_capacity(&rd->max_cpu_capacity);
+
+	rd->max_cap_orig_cpu = rd->min_cap_orig_cpu = -1;
+
 	return 0;
 
 free_rto_mask:
@@ -6016,11 +6196,14 @@
 DEFINE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
 DEFINE_PER_CPU(struct sched_domain *, sd_numa);
 DEFINE_PER_CPU(struct sched_domain *, sd_asym);
+DEFINE_PER_CPU(struct sched_domain *, sd_ea);
+DEFINE_PER_CPU(struct sched_domain *, sd_scs);
 
 static void update_top_cache_domain(int cpu)
 {
 	struct sched_domain_shared *sds = NULL;
 	struct sched_domain *sd;
+	struct sched_domain *ea_sd = NULL;
 	int id = cpu;
 	int size = 1;
 
@@ -6041,6 +6224,17 @@
 
 	sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
 	rcu_assign_pointer(per_cpu(sd_asym, cpu), sd);
+
+	for_each_domain(cpu, sd) {
+		if (sd->groups->sge)
+			ea_sd = sd;
+		else
+			break;
+	}
+	rcu_assign_pointer(per_cpu(sd_ea, cpu), ea_sd);
+
+	sd = highest_flag_domain(cpu, SD_SHARE_CAP_STATES);
+	rcu_assign_pointer(per_cpu(sd_scs, cpu), sd);
 }
 
 /*
@@ -6222,6 +6416,8 @@
 		 * die on a /0 trap.
 		 */
 		sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
+		sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
+		sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
 
 		/*
 		 * Make sure the first group of this domain contains the
@@ -6350,6 +6546,66 @@
 }
 
 /*
+ * Check that the per-cpu provided sd energy data is consistent for all cpus
+ * within the mask.
+ */
+static inline void check_sched_energy_data(int cpu, sched_domain_energy_f fn,
+					   const struct cpumask *cpumask)
+{
+	const struct sched_group_energy * const sge = fn(cpu);
+	struct cpumask mask;
+	int i;
+
+	if (cpumask_weight(cpumask) <= 1)
+		return;
+
+	cpumask_xor(&mask, cpumask, get_cpu_mask(cpu));
+
+	for_each_cpu(i, &mask) {
+		const struct sched_group_energy * const e = fn(i);
+		int y;
+
+		BUG_ON(e->nr_idle_states != sge->nr_idle_states);
+
+		for (y = 0; y < (e->nr_idle_states); y++) {
+			BUG_ON(e->idle_states[y].power !=
+					sge->idle_states[y].power);
+		}
+
+		BUG_ON(e->nr_cap_states != sge->nr_cap_states);
+
+		for (y = 0; y < (e->nr_cap_states); y++) {
+			BUG_ON(e->cap_states[y].cap != sge->cap_states[y].cap);
+			BUG_ON(e->cap_states[y].power !=
+					sge->cap_states[y].power);
+		}
+	}
+}
+
+static void init_sched_energy(int cpu, struct sched_domain *sd,
+			      sched_domain_energy_f fn)
+{
+	if (!(fn && fn(cpu)))
+		return;
+
+	if (cpu != group_balance_cpu(sd->groups))
+		return;
+
+	if (sd->child && !sd->child->groups->sge) {
+		pr_err("BUG: EAS setup broken for CPU%d\n", cpu);
+#ifdef CONFIG_SCHED_DEBUG
+		pr_err("     energy data on %s but not on %s domain\n",
+			sd->name, sd->child->name);
+#endif
+		return;
+	}
+
+	check_sched_energy_data(cpu, fn, sched_group_cpus(sd->groups));
+
+	sd->groups->sge = fn(cpu);
+}
+
+/*
  * Initializers for schedule domains
  * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
  */
@@ -6465,6 +6721,7 @@
  *   SD_NUMA                - describes NUMA topologies
  *   SD_SHARE_POWERDOMAIN   - describes shared power domain
  *   SD_ASYM_CPUCAPACITY    - describes mixed capacity topologies
+ *   SD_SHARE_CAP_STATES    - describes shared capacity states
  *
  * Odd one out, which beside describing the topology has a quirk also
  * prescribes the desired behaviour that goes along with it:
@@ -6477,7 +6734,8 @@
 	 SD_NUMA |			\
 	 SD_ASYM_PACKING |		\
 	 SD_ASYM_CPUCAPACITY |		\
-	 SD_SHARE_POWERDOMAIN)
+	 SD_SHARE_POWERDOMAIN |		\
+	 SD_SHARE_CAP_STATES)
 
 static struct sched_domain *
 sd_init(struct sched_domain_topology_level *tl,
@@ -7035,7 +7293,6 @@
 	enum s_alloc alloc_state;
 	struct sched_domain *sd;
 	struct s_data d;
-	struct rq *rq = NULL;
 	int i, ret = -ENOMEM;
 
 	alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
@@ -7053,8 +7310,6 @@
 				*per_cpu_ptr(d.sd, i) = sd;
 			if (tl->flags & SDTL_OVERLAP || sched_feat(FORCE_SD_OVERLAP))
 				sd->flags |= SD_OVERLAP;
-			if (cpumask_equal(cpu_map, sched_domain_span(sd)))
-				break;
 		}
 	}
 
@@ -7074,10 +7329,13 @@
 
 	/* Calculate CPU capacity for physical packages and nodes */
 	for (i = nr_cpumask_bits-1; i >= 0; i--) {
+		struct sched_domain_topology_level *tl = sched_domain_topology;
+
 		if (!cpumask_test_cpu(i, cpu_map))
 			continue;
 
-		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
+		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent, tl++) {
+			init_sched_energy(i, sd, tl->energy);
 			claim_allocations(i, sd);
 			init_sched_groups_capacity(i, sd);
 		}
@@ -7086,22 +7344,23 @@
 	/* Attach the domains */
 	rcu_read_lock();
 	for_each_cpu(i, cpu_map) {
-		rq = cpu_rq(i);
-		sd = *per_cpu_ptr(d.sd, i);
+		int max_cpu = READ_ONCE(d.rd->max_cap_orig_cpu);
+		int min_cpu = READ_ONCE(d.rd->min_cap_orig_cpu);
 
-		/* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */
-		if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity))
-			WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig);
+		if ((max_cpu < 0) || (cpu_rq(i)->cpu_capacity_orig >
+		    cpu_rq(max_cpu)->cpu_capacity_orig))
+			WRITE_ONCE(d.rd->max_cap_orig_cpu, i);
+
+		if ((min_cpu < 0) || (cpu_rq(i)->cpu_capacity_orig <
+		    cpu_rq(min_cpu)->cpu_capacity_orig))
+			WRITE_ONCE(d.rd->min_cap_orig_cpu, i);
+
+		sd = *per_cpu_ptr(d.sd, i);
 
 		cpu_attach_domain(sd, d.rd, i);
 	}
 	rcu_read_unlock();
 
-	if (rq && sched_debug_enabled) {
-		pr_info("span: %*pbl (max cpu_capacity = %lu)\n",
-			cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
-	}
-
 	ret = 0;
 error:
 	__free_domain_allocs(&d, alloc_state, cpu_map);
@@ -7444,6 +7703,9 @@
 	/* Handle pending wakeups and then migrate everything off */
 	sched_ttwu_pending();
 	raw_spin_lock_irqsave(&rq->lock, flags);
+
+	walt_migrate_sync_cpu(cpu);
+
 	if (rq->rd) {
 		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
 		set_rq_offline(rq);
@@ -7463,6 +7725,7 @@
 {
 	cpumask_var_t non_isolated_cpus;
 
+	walt_init_cpu_efficiency();
 	alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
 	alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
 
@@ -7620,6 +7883,7 @@
 #ifdef CONFIG_FAIR_GROUP_SCHED
 		root_task_group.shares = ROOT_TASK_GROUP_LOAD;
 		INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
+		rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
 		/*
 		 * How much cpu bandwidth does root_task_group get?
 		 *
@@ -7664,6 +7928,11 @@
 		rq->idle_stamp = 0;
 		rq->avg_idle = 2*sysctl_sched_migration_cost;
 		rq->max_idle_balance_cost = sysctl_sched_migration_cost;
+#ifdef CONFIG_SCHED_WALT
+		rq->cur_irqload = 0;
+		rq->avg_irqload = 0;
+		rq->irqload_ts = 0;
+#endif
 
 		INIT_LIST_HEAD(&rq->cfs_tasks);
 
@@ -7721,6 +7990,14 @@
 	return (nested == preempt_offset);
 }
 
+static int __might_sleep_init_called;
+int __init __might_sleep_init(void)
+{
+	__might_sleep_init_called = 1;
+	return 0;
+}
+early_initcall(__might_sleep_init);
+
 void __might_sleep(const char *file, int line, int preempt_offset)
 {
 	/*
@@ -7746,8 +8023,10 @@
 
 	rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
 	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
-	     !is_idle_task(current)) ||
-	    system_state != SYSTEM_RUNNING || oops_in_progress)
+	     !is_idle_task(current)) || oops_in_progress)
+		return;
+	if (system_state != SYSTEM_RUNNING &&
+	    (!__might_sleep_init_called || system_state != SYSTEM_BOOTING))
 		return;
 	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
 		return;
diff --git a/kernel/sched/cpufreq_sched.c b/kernel/sched/cpufreq_sched.c
new file mode 100644
index 0000000..1b19f26
--- /dev/null
+++ b/kernel/sched/cpufreq_sched.c
@@ -0,0 +1,499 @@
+/*
+ *  Copyright (C)  2015 Michael Turquette <mturquette@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/percpu.h>
+#include <linux/irq_work.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/cpufreq_sched.h>
+
+#include "sched.h"
+
+#define THROTTLE_DOWN_NSEC	50000000 /* 50ms default */
+#define THROTTLE_UP_NSEC	500000 /* 500us default */
+
+struct static_key __read_mostly __sched_freq = STATIC_KEY_INIT_FALSE;
+static bool __read_mostly cpufreq_driver_slow;
+
+#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
+static struct cpufreq_governor cpufreq_gov_sched;
+#endif
+
+static DEFINE_PER_CPU(unsigned long, enabled);
+DEFINE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);
+
+struct gov_tunables {
+	struct gov_attr_set attr_set;
+	unsigned int up_throttle_nsec;
+	unsigned int down_throttle_nsec;
+};
+
+/**
+ * gov_data - per-policy data internal to the governor
+ * @up_throttle: next throttling period expiry if increasing OPP
+ * @down_throttle: next throttling period expiry if decreasing OPP
+ * @up_throttle_nsec: throttle period length in nanoseconds if increasing OPP
+ * @down_throttle_nsec: throttle period length in nanoseconds if decreasing OPP
+ * @task: worker thread for dvfs transition that may block/sleep
+ * @irq_work: callback used to wake up worker thread
+ * @requested_freq: last frequency requested by the sched governor
+ *
+ * struct gov_data is the per-policy cpufreq_sched-specific data structure. A
+ * per-policy instance of it is created when the cpufreq_sched governor receives
+ * the CPUFREQ_GOV_START condition and a pointer to it exists in the gov_data
+ * member of struct cpufreq_policy.
+ *
+ * Readers of this data must call down_read(policy->rwsem). Writers must
+ * call down_write(policy->rwsem).
+ */
+struct gov_data {
+	ktime_t up_throttle;
+	ktime_t down_throttle;
+	struct gov_tunables *tunables;
+	struct list_head tunables_hook;
+	struct task_struct *task;
+	struct irq_work irq_work;
+	unsigned int requested_freq;
+};
+
+static void cpufreq_sched_try_driver_target(struct cpufreq_policy *policy,
+					    unsigned int freq)
+{
+	struct gov_data *gd = policy->governor_data;
+
+	/* avoid race with cpufreq_sched_stop */
+	if (!down_write_trylock(&policy->rwsem))
+		return;
+
+	__cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
+
+	gd->up_throttle = ktime_add_ns(ktime_get(),
+				       gd->tunables->up_throttle_nsec);
+	gd->down_throttle = ktime_add_ns(ktime_get(),
+					 gd->tunables->down_throttle_nsec);
+	up_write(&policy->rwsem);
+}
+
+static bool finish_last_request(struct gov_data *gd, unsigned int cur_freq)
+{
+	ktime_t now = ktime_get();
+
+	ktime_t throttle = gd->requested_freq < cur_freq ?
+		gd->down_throttle : gd->up_throttle;
+
+	if (ktime_after(now, throttle))
+		return false;
+
+	while (1) {
+		int usec_left = ktime_to_ns(ktime_sub(throttle, now));
+
+		usec_left /= NSEC_PER_USEC;
+		trace_cpufreq_sched_throttled(usec_left);
+		usleep_range(usec_left, usec_left + 100);
+		now = ktime_get();
+		if (ktime_after(now, throttle))
+			return true;
+	}
+}
+
+/*
+ * we pass in struct cpufreq_policy. This is safe because changing out the
+ * policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP),
+ * which tears down all of the data structures and __cpufreq_governor(policy,
+ * CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the
+ * new policy pointer
+ */
+static int cpufreq_sched_thread(void *data)
+{
+	struct sched_param param;
+	struct cpufreq_policy *policy;
+	struct gov_data *gd;
+	unsigned int new_request = 0;
+	unsigned int last_request = 0;
+	int ret;
+
+	policy = (struct cpufreq_policy *) data;
+	gd = policy->governor_data;
+
+	param.sched_priority = 50;
+	ret = sched_setscheduler_nocheck(gd->task, SCHED_FIFO, &param);
+	if (ret) {
+		pr_warn("%s: failed to set SCHED_FIFO\n", __func__);
+		do_exit(-EINVAL);
+	} else {
+		pr_debug("%s: kthread (%d) set to SCHED_FIFO\n",
+				__func__, gd->task->pid);
+	}
+
+	do {
+		new_request = gd->requested_freq;
+		if (new_request == last_request) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			if (kthread_should_stop())
+				break;
+			schedule();
+		} else {
+			/*
+			 * if the frequency thread sleeps while waiting to be
+			 * unthrottled, start over to check for a newer request
+			 */
+			if (finish_last_request(gd, policy->cur))
+				continue;
+			last_request = new_request;
+			cpufreq_sched_try_driver_target(policy, new_request);
+		}
+	} while (!kthread_should_stop());
+
+	return 0;
+}
+
+static void cpufreq_sched_irq_work(struct irq_work *irq_work)
+{
+	struct gov_data *gd;
+
+	gd = container_of(irq_work, struct gov_data, irq_work);
+	if (!gd)
+		return;
+
+	wake_up_process(gd->task);
+}
+
+static void update_fdomain_capacity_request(int cpu)
+{
+	unsigned int freq_new, index_new, cpu_tmp;
+	struct cpufreq_policy *policy;
+	struct gov_data *gd;
+	unsigned long capacity = 0;
+
+	/*
+	 * Avoid grabbing the policy if possible. A test is still
+	 * required after locking the CPU's policy to avoid racing
+	 * with the governor changing.
+	 */
+	if (!per_cpu(enabled, cpu))
+		return;
+
+	policy = cpufreq_cpu_get(cpu);
+	if (IS_ERR_OR_NULL(policy))
+		return;
+
+	if (policy->governor != &cpufreq_gov_sched ||
+	    !policy->governor_data)
+		goto out;
+
+	gd = policy->governor_data;
+
+	/* find max capacity requested by cpus in this policy */
+	for_each_cpu(cpu_tmp, policy->cpus) {
+		struct sched_capacity_reqs *scr;
+
+		scr = &per_cpu(cpu_sched_capacity_reqs, cpu_tmp);
+		capacity = max(capacity, scr->total);
+	}
+
+	/* Convert the new maximum capacity request into a cpu frequency */
+	freq_new = capacity * policy->max >> SCHED_CAPACITY_SHIFT;
+	index_new = cpufreq_frequency_table_target(policy, freq_new, CPUFREQ_RELATION_L);
+	freq_new = policy->freq_table[index_new].frequency;
+
+	if (freq_new > policy->max)
+		freq_new = policy->max;
+
+	if (freq_new < policy->min)
+		freq_new = policy->min;
+
+	trace_cpufreq_sched_request_opp(cpu, capacity, freq_new,
+					gd->requested_freq);
+	if (freq_new == gd->requested_freq)
+		goto out;
+
+	gd->requested_freq = freq_new;
+
+	/*
+	 * Throttling is not yet supported on platforms with fast cpufreq
+	 * drivers.
+	 */
+	if (cpufreq_driver_slow)
+		irq_work_queue_on(&gd->irq_work, cpu);
+	else
+		cpufreq_sched_try_driver_target(policy, freq_new);
+
+out:
+	cpufreq_cpu_put(policy);
+}
+
+void update_cpu_capacity_request(int cpu, bool request)
+{
+	unsigned long new_capacity;
+	struct sched_capacity_reqs *scr;
+
+	/* The rq lock serializes access to the CPU's sched_capacity_reqs. */
+	lockdep_assert_held(&cpu_rq(cpu)->lock);
+
+	scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
+
+	new_capacity = scr->cfs + scr->rt;
+	new_capacity = new_capacity * capacity_margin
+		/ SCHED_CAPACITY_SCALE;
+	new_capacity += scr->dl;
+
+	if (new_capacity == scr->total)
+		return;
+
+	trace_cpufreq_sched_update_capacity(cpu, request, scr, new_capacity);
+
+	scr->total = new_capacity;
+	if (request)
+		update_fdomain_capacity_request(cpu);
+}
+
+static inline void set_sched_freq(void)
+{
+	static_key_slow_inc(&__sched_freq);
+}
+
+static inline void clear_sched_freq(void)
+{
+	static_key_slow_dec(&__sched_freq);
+}
+
+/* Tunables */
+static struct gov_tunables *global_tunables;
+
+static inline struct gov_tunables *to_tunables(struct gov_attr_set *attr_set)
+{
+	return container_of(attr_set, struct gov_tunables, attr_set);
+}
+
+static ssize_t up_throttle_nsec_show(struct gov_attr_set *attr_set, char *buf)
+{
+	struct gov_tunables *tunables = to_tunables(attr_set);
+
+	return sprintf(buf, "%u\n", tunables->up_throttle_nsec);
+}
+
+static ssize_t up_throttle_nsec_store(struct gov_attr_set *attr_set,
+				      const char *buf, size_t count)
+{
+	struct gov_tunables *tunables = to_tunables(attr_set);
+	int ret;
+	long unsigned int val;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret < 0)
+		return ret;
+	tunables->up_throttle_nsec = val;
+	return count;
+}
+
+static ssize_t down_throttle_nsec_show(struct gov_attr_set *attr_set, char *buf)
+{
+	struct gov_tunables *tunables = to_tunables(attr_set);
+
+	return sprintf(buf, "%u\n", tunables->down_throttle_nsec);
+}
+
+static ssize_t down_throttle_nsec_store(struct gov_attr_set *attr_set,
+					const char *buf, size_t count)
+{
+	struct gov_tunables *tunables = to_tunables(attr_set);
+	int ret;
+	long unsigned int val;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret < 0)
+		return ret;
+	tunables->down_throttle_nsec = val;
+	return count;
+}
+
+static struct governor_attr up_throttle_nsec = __ATTR_RW(up_throttle_nsec);
+static struct governor_attr down_throttle_nsec = __ATTR_RW(down_throttle_nsec);
+
+static struct attribute *schedfreq_attributes[] = {
+	&up_throttle_nsec.attr,
+	&down_throttle_nsec.attr,
+	NULL
+};
+
+static struct kobj_type tunables_ktype = {
+	.default_attrs = schedfreq_attributes,
+	.sysfs_ops = &governor_sysfs_ops,
+};
+
+static int cpufreq_sched_policy_init(struct cpufreq_policy *policy)
+{
+	struct gov_data *gd;
+	int cpu;
+	int rc;
+
+	for_each_cpu(cpu, policy->cpus)
+		memset(&per_cpu(cpu_sched_capacity_reqs, cpu), 0,
+		       sizeof(struct sched_capacity_reqs));
+
+	gd = kzalloc(sizeof(*gd), GFP_KERNEL);
+	if (!gd)
+		return -ENOMEM;
+
+	policy->governor_data = gd;
+
+	if (!global_tunables) {
+		gd->tunables = kzalloc(sizeof(*gd->tunables), GFP_KERNEL);
+		if (!gd->tunables)
+			goto free_gd;
+
+		gd->tunables->up_throttle_nsec =
+			policy->cpuinfo.transition_latency ?
+			policy->cpuinfo.transition_latency :
+			THROTTLE_UP_NSEC;
+		gd->tunables->down_throttle_nsec =
+			THROTTLE_DOWN_NSEC;
+
+		rc = kobject_init_and_add(&gd->tunables->attr_set.kobj,
+					  &tunables_ktype,
+					  get_governor_parent_kobj(policy),
+					  "%s", cpufreq_gov_sched.name);
+		if (rc)
+			goto free_tunables;
+
+		gov_attr_set_init(&gd->tunables->attr_set,
+				  &gd->tunables_hook);
+
+		pr_debug("%s: throttle_threshold = %u [ns]\n",
+			 __func__, gd->tunables->up_throttle_nsec);
+
+		if (!have_governor_per_policy())
+			global_tunables = gd->tunables;
+	} else {
+		gd->tunables = global_tunables;
+		gov_attr_set_get(&global_tunables->attr_set,
+				 &gd->tunables_hook);
+	}
+
+	policy->governor_data = gd;
+	if (cpufreq_driver_is_slow()) {
+		cpufreq_driver_slow = true;
+		gd->task = kthread_create(cpufreq_sched_thread, policy,
+					  "kschedfreq:%d",
+					  cpumask_first(policy->related_cpus));
+		if (IS_ERR_OR_NULL(gd->task)) {
+			pr_err("%s: failed to create kschedfreq thread\n",
+			       __func__);
+			goto free_tunables;
+		}
+		get_task_struct(gd->task);
+		kthread_bind_mask(gd->task, policy->related_cpus);
+		wake_up_process(gd->task);
+		init_irq_work(&gd->irq_work, cpufreq_sched_irq_work);
+	}
+
+	set_sched_freq();
+
+	return 0;
+
+free_tunables:
+	kfree(gd->tunables);
+free_gd:
+	policy->governor_data = NULL;
+	kfree(gd);
+	return -ENOMEM;
+}
+
+static void cpufreq_sched_policy_exit(struct cpufreq_policy *policy)
+{
+	unsigned int count;
+	struct gov_data *gd = policy->governor_data;
+
+	clear_sched_freq();
+	if (cpufreq_driver_slow) {
+		kthread_stop(gd->task);
+		put_task_struct(gd->task);
+	}
+
+	count = gov_attr_set_put(&gd->tunables->attr_set, &gd->tunables_hook);
+	if (!count) {
+		if (!have_governor_per_policy())
+			global_tunables = NULL;
+		kfree(gd->tunables);
+	}
+
+	policy->governor_data = NULL;
+
+	kfree(gd);
+}
+
+static int cpufreq_sched_start(struct cpufreq_policy *policy)
+{
+	int cpu;
+
+	for_each_cpu(cpu, policy->cpus)
+		per_cpu(enabled, cpu) = 1;
+
+	return 0;
+}
+
+static void cpufreq_sched_limits(struct cpufreq_policy *policy)
+{
+	unsigned int clamp_freq;
+	struct gov_data *gd = policy->governor_data;;
+
+	pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz\n",
+		policy->cpu, policy->min, policy->max,
+		policy->cur);
+
+	clamp_freq = clamp(gd->requested_freq, policy->min, policy->max);
+
+	if (policy->cur != clamp_freq)
+		__cpufreq_driver_target(policy, clamp_freq, CPUFREQ_RELATION_L);
+}
+
+static void cpufreq_sched_stop(struct cpufreq_policy *policy)
+{
+	int cpu;
+
+	for_each_cpu(cpu, policy->cpus)
+		per_cpu(enabled, cpu) = 0;
+}
+
+
+#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
+static
+#endif
+struct cpufreq_governor cpufreq_gov_sched = {
+	.name			= "sched",
+	.init                   = cpufreq_sched_policy_init,
+	.exit                   = cpufreq_sched_policy_exit,
+	.start                  = cpufreq_sched_start,
+	.stop                   = cpufreq_sched_stop,
+	.limits                 = cpufreq_sched_limits,
+	.owner			= THIS_MODULE,
+};
+
+static int __init cpufreq_sched_init(void)
+{
+	int cpu;
+
+	for_each_cpu(cpu, cpu_possible_mask)
+		per_cpu(enabled, cpu) = 0;
+	return cpufreq_register_governor(&cpufreq_gov_sched);
+}
+
+#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
+struct cpufreq_governor *cpufreq_default_governor(void)
+{
+        return &cpufreq_gov_sched;
+}
+#endif
+
+/* Try to make this the default governor */
+fs_initcall(cpufreq_sched_init);
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index cb771c76..19694c2 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -12,14 +12,27 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/cpufreq.h>
+#include <linux/kthread.h>
 #include <linux/slab.h>
 #include <trace/events/power.h>
 
 #include "sched.h"
+#include "tune.h"
+
+unsigned long boosted_cpu_util(int cpu);
+
+/* Stub out fast switch routines present on mainline to reduce the backport
+ * overhead. */
+#define cpufreq_driver_fast_switch(x, y) 0
+#define cpufreq_enable_fast_switch(x)
+#define cpufreq_disable_fast_switch(x)
+#define LATENCY_MULTIPLIER			(1000)
+#define SUGOV_KTHREAD_PRIORITY	50
 
 struct sugov_tunables {
 	struct gov_attr_set attr_set;
-	unsigned int rate_limit_us;
+	unsigned int up_rate_limit_us;
+	unsigned int down_rate_limit_us;
 };
 
 struct sugov_policy {
@@ -30,14 +43,18 @@
 
 	raw_spinlock_t update_lock;  /* For shared policies */
 	u64 last_freq_update_time;
-	s64 freq_update_delay_ns;
+	s64 min_rate_limit_ns;
+	s64 up_rate_delay_ns;
+	s64 down_rate_delay_ns;
 	unsigned int next_freq;
 	unsigned int cached_raw_freq;
 
 	/* The next fields are only needed if fast switch cannot be used. */
 	struct irq_work irq_work;
-	struct work_struct work;
+	struct kthread_work work;
 	struct mutex work_lock;
+	struct kthread_worker worker;
+	struct task_struct *thread;
 	bool work_in_progress;
 
 	bool need_freq_update;
@@ -55,6 +72,11 @@
 	unsigned long util;
 	unsigned long max;
 	unsigned int flags;
+
+	/* The field below is for single-CPU policies only. */
+#ifdef CONFIG_NO_HZ_COMMON
+	unsigned long saved_idle_calls;
+#endif
 };
 
 static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
@@ -79,7 +101,27 @@
 	}
 
 	delta_ns = time - sg_policy->last_freq_update_time;
-	return delta_ns >= sg_policy->freq_update_delay_ns;
+
+	/* No need to recalculate next freq for min_rate_limit_us at least */
+	return delta_ns >= sg_policy->min_rate_limit_ns;
+}
+
+static bool sugov_up_down_rate_limit(struct sugov_policy *sg_policy, u64 time,
+				     unsigned int next_freq)
+{
+	s64 delta_ns;
+
+	delta_ns = time - sg_policy->last_freq_update_time;
+
+	if (next_freq > sg_policy->next_freq &&
+	    delta_ns < sg_policy->up_rate_delay_ns)
+			return true;
+
+	if (next_freq < sg_policy->next_freq &&
+	    delta_ns < sg_policy->down_rate_delay_ns)
+			return true;
+
+	return false;
 }
 
 static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
@@ -87,22 +129,23 @@
 {
 	struct cpufreq_policy *policy = sg_policy->policy;
 
+	if (sugov_up_down_rate_limit(sg_policy, time, next_freq))
+		return;
+
+	if (sg_policy->next_freq == next_freq)
+		return;
+
+	sg_policy->next_freq = next_freq;
 	sg_policy->last_freq_update_time = time;
 
 	if (policy->fast_switch_enabled) {
-		if (sg_policy->next_freq == next_freq) {
-			trace_cpu_frequency(policy->cur, smp_processor_id());
-			return;
-		}
-		sg_policy->next_freq = next_freq;
 		next_freq = cpufreq_driver_fast_switch(policy, next_freq);
 		if (next_freq == CPUFREQ_ENTRY_INVALID)
 			return;
 
 		policy->cur = next_freq;
 		trace_cpu_frequency(next_freq, smp_processor_id());
-	} else if (sg_policy->next_freq != next_freq) {
-		sg_policy->next_freq = next_freq;
+	} else {
 		sg_policy->work_in_progress = true;
 		irq_work_queue(&sg_policy->irq_work);
 	}
@@ -110,7 +153,7 @@
 
 /**
  * get_next_freq - Compute a new frequency for a given cpufreq policy.
- * @sg_cpu: schedutil cpu object to compute the new frequency for.
+ * @sg_policy: schedutil policy object to compute the new frequency for.
  * @util: Current CPU utilization.
  * @max: CPU capacity.
  *
@@ -130,10 +173,9 @@
  * next_freq (as calculated above) is returned, subject to policy min/max and
  * cpufreq driver limitations.
  */
-static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util,
-				  unsigned long max)
+static unsigned int get_next_freq(struct sugov_policy *sg_policy,
+				  unsigned long util, unsigned long max)
 {
-	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	struct cpufreq_policy *policy = sg_policy->policy;
 	unsigned int freq = arch_scale_freq_invariant() ?
 				policy->cpuinfo.max_freq : policy->cur;
@@ -146,15 +188,36 @@
 	return cpufreq_driver_resolve_freq(policy, freq);
 }
 
-static void sugov_get_util(unsigned long *util, unsigned long *max)
+static inline bool use_pelt(void)
 {
-	struct rq *rq = this_rq();
-	unsigned long cfs_max;
+#ifdef CONFIG_SCHED_WALT
+	return (!sysctl_sched_use_walt_cpu_util || walt_disabled);
+#else
+	return true;
+#endif
+}
 
-	cfs_max = arch_scale_cpu_capacity(NULL, smp_processor_id());
+static void sugov_get_util(unsigned long *util, unsigned long *max, u64 time)
+{
+	int cpu = smp_processor_id();
+	struct rq *rq = cpu_rq(cpu);
+	unsigned long max_cap, rt;
+	s64 delta;
 
-	*util = min(rq->cfs.avg.util_avg, cfs_max);
-	*max = cfs_max;
+	max_cap = arch_scale_cpu_capacity(NULL, cpu);
+
+	sched_avg_update(rq);
+	delta = time - rq->age_stamp;
+	if (unlikely(delta < 0))
+		delta = 0;
+	rt = div64_u64(rq->rt_avg, sched_avg_period() + delta);
+	rt = (rt * max_cap) >> SCHED_CAPACITY_SHIFT;
+
+	*util = boosted_cpu_util(cpu);
+	if (likely(use_pelt()))
+		*util = min((*util + rt), max_cap);
+
+	*max = max_cap;
 }
 
 static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
@@ -187,6 +250,19 @@
 	sg_cpu->iowait_boost >>= 1;
 }
 
+#ifdef CONFIG_NO_HZ_COMMON
+static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
+{
+	unsigned long idle_calls = tick_nohz_get_idle_calls();
+	bool ret = idle_calls == sg_cpu->saved_idle_calls;
+
+	sg_cpu->saved_idle_calls = idle_calls;
+	return ret;
+}
+#else
+static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
+#endif /* CONFIG_NO_HZ_COMMON */
+
 static void sugov_update_single(struct update_util_data *hook, u64 time,
 				unsigned int flags)
 {
@@ -195,6 +271,7 @@
 	struct cpufreq_policy *policy = sg_policy->policy;
 	unsigned long util, max;
 	unsigned int next_f;
+	bool busy;
 
 	sugov_set_iowait_boost(sg_cpu, time, flags);
 	sg_cpu->last_update = time;
@@ -202,40 +279,36 @@
 	if (!sugov_should_update_freq(sg_policy, time))
 		return;
 
-	if (flags & SCHED_CPUFREQ_RT_DL) {
+	busy = sugov_cpu_is_busy(sg_cpu);
+
+	if (flags & SCHED_CPUFREQ_DL) {
 		next_f = policy->cpuinfo.max_freq;
 	} else {
-		sugov_get_util(&util, &max);
+		sugov_get_util(&util, &max, time);
 		sugov_iowait_boost(sg_cpu, &util, &max);
-		next_f = get_next_freq(sg_cpu, util, max);
+		next_f = get_next_freq(sg_policy, util, max);
+		/*
+		 * Do not reduce the frequency if the CPU has not been idle
+		 * recently, as the reduction is likely to be premature then.
+		 */
+		if (busy && next_f < sg_policy->next_freq)
+			next_f = sg_policy->next_freq;
 	}
 	sugov_update_commit(sg_policy, time, next_f);
 }
 
-static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
-					   unsigned long util, unsigned long max,
-					   unsigned int flags)
+static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
 {
 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	struct cpufreq_policy *policy = sg_policy->policy;
-	unsigned int max_f = policy->cpuinfo.max_freq;
-	u64 last_freq_update_time = sg_policy->last_freq_update_time;
+	unsigned long util = 0, max = 1;
 	unsigned int j;
 
-	if (flags & SCHED_CPUFREQ_RT_DL)
-		return max_f;
-
-	sugov_iowait_boost(sg_cpu, &util, &max);
-
 	for_each_cpu(j, policy->cpus) {
-		struct sugov_cpu *j_sg_cpu;
+		struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
 		unsigned long j_util, j_max;
 		s64 delta_ns;
 
-		if (j == smp_processor_id())
-			continue;
-
-		j_sg_cpu = &per_cpu(sugov_cpu, j);
 		/*
 		 * If the CPU utilization was last updated before the previous
 		 * frequency update and the time elapsed between the last update
@@ -243,13 +316,13 @@
 		 * enough, don't take the CPU into account as it probably is
 		 * idle now (and clear iowait_boost for it).
 		 */
-		delta_ns = last_freq_update_time - j_sg_cpu->last_update;
+		delta_ns = time - j_sg_cpu->last_update;
 		if (delta_ns > TICK_NSEC) {
 			j_sg_cpu->iowait_boost = 0;
 			continue;
 		}
-		if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL)
-			return max_f;
+		if (j_sg_cpu->flags & SCHED_CPUFREQ_DL)
+			return policy->cpuinfo.max_freq;
 
 		j_util = j_sg_cpu->util;
 		j_max = j_sg_cpu->max;
@@ -261,7 +334,7 @@
 		sugov_iowait_boost(j_sg_cpu, &util, &max);
 	}
 
-	return get_next_freq(sg_cpu, util, max);
+	return get_next_freq(sg_policy, util, max);
 }
 
 static void sugov_update_shared(struct update_util_data *hook, u64 time,
@@ -272,7 +345,7 @@
 	unsigned long util, max;
 	unsigned int next_f;
 
-	sugov_get_util(&util, &max);
+	sugov_get_util(&util, &max, time);
 
 	raw_spin_lock(&sg_policy->update_lock);
 
@@ -284,14 +357,18 @@
 	sg_cpu->last_update = time;
 
 	if (sugov_should_update_freq(sg_policy, time)) {
-		next_f = sugov_next_freq_shared(sg_cpu, util, max, flags);
+		if (flags & SCHED_CPUFREQ_DL)
+			next_f = sg_policy->policy->cpuinfo.max_freq;
+		else
+			next_f = sugov_next_freq_shared(sg_cpu, time);
+
 		sugov_update_commit(sg_policy, time, next_f);
 	}
 
 	raw_spin_unlock(&sg_policy->update_lock);
 }
 
-static void sugov_work(struct work_struct *work)
+static void sugov_work(struct kthread_work *work)
 {
 	struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work);
 
@@ -308,7 +385,21 @@
 	struct sugov_policy *sg_policy;
 
 	sg_policy = container_of(irq_work, struct sugov_policy, irq_work);
-	schedule_work_on(smp_processor_id(), &sg_policy->work);
+
+	/*
+	 * For RT and deadline tasks, the schedutil governor shoots the
+	 * frequency to maximum. Special care must be taken to ensure that this
+	 * kthread doesn't result in the same behavior.
+	 *
+	 * This is (mostly) guaranteed by the work_in_progress flag. The flag is
+	 * updated only at the end of the sugov_work() function and before that
+	 * the schedutil governor rejects all other frequency scaling requests.
+	 *
+	 * There is a very rare case though, where the RT thread yields right
+	 * after the work_in_progress flag is cleared. The effects of that are
+	 * neglected for now.
+	 */
+	kthread_queue_work(&sg_policy->worker, &sg_policy->work);
 }
 
 /************************** sysfs interface ************************/
@@ -321,15 +412,32 @@
 	return container_of(attr_set, struct sugov_tunables, attr_set);
 }
 
-static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
+static DEFINE_MUTEX(min_rate_lock);
+
+static void update_min_rate_limit_us(struct sugov_policy *sg_policy)
+{
+	mutex_lock(&min_rate_lock);
+	sg_policy->min_rate_limit_ns = min(sg_policy->up_rate_delay_ns,
+					   sg_policy->down_rate_delay_ns);
+	mutex_unlock(&min_rate_lock);
+}
+
+static ssize_t up_rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
 {
 	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
 
-	return sprintf(buf, "%u\n", tunables->rate_limit_us);
+	return sprintf(buf, "%u\n", tunables->up_rate_limit_us);
 }
 
-static ssize_t rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf,
-				   size_t count)
+static ssize_t down_rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
+{
+	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
+
+	return sprintf(buf, "%u\n", tunables->down_rate_limit_us);
+}
+
+static ssize_t up_rate_limit_us_store(struct gov_attr_set *attr_set,
+				      const char *buf, size_t count)
 {
 	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
 	struct sugov_policy *sg_policy;
@@ -338,18 +446,42 @@
 	if (kstrtouint(buf, 10, &rate_limit_us))
 		return -EINVAL;
 
-	tunables->rate_limit_us = rate_limit_us;
+	tunables->up_rate_limit_us = rate_limit_us;
 
-	list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook)
-		sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC;
+	list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) {
+		sg_policy->up_rate_delay_ns = rate_limit_us * NSEC_PER_USEC;
+		update_min_rate_limit_us(sg_policy);
+	}
 
 	return count;
 }
 
-static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us);
+static ssize_t down_rate_limit_us_store(struct gov_attr_set *attr_set,
+					const char *buf, size_t count)
+{
+	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
+	struct sugov_policy *sg_policy;
+	unsigned int rate_limit_us;
+
+	if (kstrtouint(buf, 10, &rate_limit_us))
+		return -EINVAL;
+
+	tunables->down_rate_limit_us = rate_limit_us;
+
+	list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) {
+		sg_policy->down_rate_delay_ns = rate_limit_us * NSEC_PER_USEC;
+		update_min_rate_limit_us(sg_policy);
+	}
+
+	return count;
+}
+
+static struct governor_attr up_rate_limit_us = __ATTR_RW(up_rate_limit_us);
+static struct governor_attr down_rate_limit_us = __ATTR_RW(down_rate_limit_us);
 
 static struct attribute *sugov_attributes[] = {
-	&rate_limit_us.attr,
+	&up_rate_limit_us.attr,
+	&down_rate_limit_us.attr,
 	NULL
 };
 
@@ -371,19 +503,64 @@
 		return NULL;
 
 	sg_policy->policy = policy;
-	init_irq_work(&sg_policy->irq_work, sugov_irq_work);
-	INIT_WORK(&sg_policy->work, sugov_work);
-	mutex_init(&sg_policy->work_lock);
 	raw_spin_lock_init(&sg_policy->update_lock);
 	return sg_policy;
 }
 
 static void sugov_policy_free(struct sugov_policy *sg_policy)
 {
-	mutex_destroy(&sg_policy->work_lock);
 	kfree(sg_policy);
 }
 
+static int sugov_kthread_create(struct sugov_policy *sg_policy)
+{
+	struct task_struct *thread;
+	struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO / 2 };
+	struct cpufreq_policy *policy = sg_policy->policy;
+	int ret;
+
+	/* kthread only required for slow path */
+	if (policy->fast_switch_enabled)
+		return 0;
+
+	kthread_init_work(&sg_policy->work, sugov_work);
+	kthread_init_worker(&sg_policy->worker);
+	thread = kthread_create(kthread_worker_fn, &sg_policy->worker,
+				"sugov:%d",
+				cpumask_first(policy->related_cpus));
+	if (IS_ERR(thread)) {
+		pr_err("failed to create sugov thread: %ld\n", PTR_ERR(thread));
+		return PTR_ERR(thread);
+	}
+
+	ret = sched_setscheduler_nocheck(thread, SCHED_FIFO, &param);
+	if (ret) {
+		kthread_stop(thread);
+		pr_warn("%s: failed to set SCHED_FIFO\n", __func__);
+		return ret;
+	}
+
+	sg_policy->thread = thread;
+	kthread_bind_mask(thread, policy->related_cpus);
+	init_irq_work(&sg_policy->irq_work, sugov_irq_work);
+	mutex_init(&sg_policy->work_lock);
+
+	wake_up_process(thread);
+
+	return 0;
+}
+
+static void sugov_kthread_stop(struct sugov_policy *sg_policy)
+{
+	/* kthread only required for slow path */
+	if (sg_policy->policy->fast_switch_enabled)
+		return;
+
+	kthread_flush_worker(&sg_policy->worker);
+	kthread_stop(sg_policy->thread);
+	mutex_destroy(&sg_policy->work_lock);
+}
+
 static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy)
 {
 	struct sugov_tunables *tunables;
@@ -416,16 +593,24 @@
 	if (policy->governor_data)
 		return -EBUSY;
 
+	cpufreq_enable_fast_switch(policy);
+
 	sg_policy = sugov_policy_alloc(policy);
-	if (!sg_policy)
-		return -ENOMEM;
+	if (!sg_policy) {
+		ret = -ENOMEM;
+		goto disable_fast_switch;
+	}
+
+	ret = sugov_kthread_create(sg_policy);
+	if (ret)
+		goto free_sg_policy;
 
 	mutex_lock(&global_tunables_lock);
 
 	if (global_tunables) {
 		if (WARN_ON(have_governor_per_policy())) {
 			ret = -EINVAL;
-			goto free_sg_policy;
+			goto stop_kthread;
 		}
 		policy->governor_data = sg_policy;
 		sg_policy->tunables = global_tunables;
@@ -437,13 +622,16 @@
 	tunables = sugov_tunables_alloc(sg_policy);
 	if (!tunables) {
 		ret = -ENOMEM;
-		goto free_sg_policy;
+		goto stop_kthread;
 	}
 
-	tunables->rate_limit_us = LATENCY_MULTIPLIER;
+	tunables->up_rate_limit_us = LATENCY_MULTIPLIER;
+	tunables->down_rate_limit_us = LATENCY_MULTIPLIER;
 	lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
-	if (lat)
-		tunables->rate_limit_us *= lat;
+	if (lat) {
+		tunables->up_rate_limit_us *= lat;
+		tunables->down_rate_limit_us *= lat;
+	}
 
 	policy->governor_data = sg_policy;
 	sg_policy->tunables = tunables;
@@ -454,20 +642,25 @@
 	if (ret)
 		goto fail;
 
- out:
+out:
 	mutex_unlock(&global_tunables_lock);
-
-	cpufreq_enable_fast_switch(policy);
 	return 0;
 
- fail:
+fail:
 	policy->governor_data = NULL;
 	sugov_tunables_free(tunables);
 
+ stop_kthread:
+	sugov_kthread_stop(sg_policy);
+
  free_sg_policy:
 	mutex_unlock(&global_tunables_lock);
 
 	sugov_policy_free(sg_policy);
+
+disable_fast_switch:
+	cpufreq_disable_fast_switch(policy);
+
 	pr_err("initialization failed (error %d)\n", ret);
 	return ret;
 }
@@ -478,8 +671,6 @@
 	struct sugov_tunables *tunables = sg_policy->tunables;
 	unsigned int count;
 
-	cpufreq_disable_fast_switch(policy);
-
 	mutex_lock(&global_tunables_lock);
 
 	count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook);
@@ -489,7 +680,10 @@
 
 	mutex_unlock(&global_tunables_lock);
 
+	sugov_kthread_stop(sg_policy);
 	sugov_policy_free(sg_policy);
+
+	cpufreq_disable_fast_switch(policy);
 }
 
 static int sugov_start(struct cpufreq_policy *policy)
@@ -497,7 +691,11 @@
 	struct sugov_policy *sg_policy = policy->governor_data;
 	unsigned int cpu;
 
-	sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
+	sg_policy->up_rate_delay_ns =
+		sg_policy->tunables->up_rate_limit_us * NSEC_PER_USEC;
+	sg_policy->down_rate_delay_ns =
+		sg_policy->tunables->down_rate_limit_us * NSEC_PER_USEC;
+	update_min_rate_limit_us(sg_policy);
 	sg_policy->last_freq_update_time = 0;
 	sg_policy->next_freq = UINT_MAX;
 	sg_policy->work_in_progress = false;
@@ -509,7 +707,7 @@
 
 		memset(sg_cpu, 0, sizeof(*sg_cpu));
 		sg_cpu->sg_policy = sg_policy;
-		sg_cpu->flags = SCHED_CPUFREQ_RT;
+		sg_cpu->flags = SCHED_CPUFREQ_DL;
 		sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
 		cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
 					     policy_is_shared(policy) ?
@@ -529,8 +727,10 @@
 
 	synchronize_sched();
 
-	irq_work_sync(&sg_policy->irq_work);
-	cancel_work_sync(&sg_policy->work);
+	if (!policy->fast_switch_enabled) {
+		irq_work_sync(&sg_policy->irq_work);
+		kthread_cancel_work_sync(&sg_policy->work);
+	}
 }
 
 static void sugov_limits(struct cpufreq_policy *policy)
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 448d642..84437fc 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -8,7 +8,7 @@
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #endif
-
+#include "walt.h"
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 
@@ -58,11 +58,18 @@
 	struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
 	s64 delta;
 	int cpu;
+#ifdef CONFIG_SCHED_WALT
+	u64 wallclock;
+	bool account = true;
+#endif
 
 	if (!sched_clock_irqtime)
 		return;
 
 	cpu = smp_processor_id();
+#ifdef CONFIG_SCHED_WALT
+	wallclock = sched_clock_cpu(cpu);
+#endif
 	delta = sched_clock_cpu(cpu) - irqtime->irq_start_time;
 	irqtime->irq_start_time += delta;
 
@@ -76,6 +83,13 @@
 		irqtime_account_delta(irqtime, delta, CPUTIME_IRQ);
 	else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
 		irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ);
+#ifdef CONFIG_SCHED_WALT
+	else
+		account = false;
+
+	if (account)
+		walt_account_irqtime(cpu, curr, delta, wallclock);
+#endif
 }
 EXPORT_SYMBOL_GPL(irqtime_account_irq);
 
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index fa178b6..59e38cd 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -261,9 +261,60 @@
 }
 
 static struct ctl_table *
+sd_alloc_ctl_energy_table(struct sched_group_energy *sge)
+{
+	struct ctl_table *table = sd_alloc_ctl_entry(5);
+
+	if (table == NULL)
+		return NULL;
+
+	set_table_entry(&table[0], "nr_idle_states", &sge->nr_idle_states,
+			sizeof(int), 0644, proc_dointvec_minmax, false);
+	set_table_entry(&table[1], "idle_states", &sge->idle_states[0].power,
+			sge->nr_idle_states*sizeof(struct idle_state), 0644,
+			proc_doulongvec_minmax, false);
+	set_table_entry(&table[2], "nr_cap_states", &sge->nr_cap_states,
+			sizeof(int), 0644, proc_dointvec_minmax, false);
+	set_table_entry(&table[3], "cap_states", &sge->cap_states[0].cap,
+			sge->nr_cap_states*sizeof(struct capacity_state), 0644,
+			proc_doulongvec_minmax, false);
+
+	return table;
+}
+
+static struct ctl_table *
+sd_alloc_ctl_group_table(struct sched_group *sg)
+{
+	struct ctl_table *table = sd_alloc_ctl_entry(2);
+
+	if (table == NULL)
+		return NULL;
+
+	table->procname = kstrdup("energy", GFP_KERNEL);
+	table->mode = 0555;
+	table->child = sd_alloc_ctl_energy_table((struct sched_group_energy *)sg->sge);
+
+	return table;
+}
+
+static struct ctl_table *
 sd_alloc_ctl_domain_table(struct sched_domain *sd)
 {
-	struct ctl_table *table = sd_alloc_ctl_entry(14);
+	struct ctl_table *table;
+	unsigned int nr_entries = 14;
+
+	int i = 0;
+	struct sched_group *sg = sd->groups;
+
+	if (sg->sge) {
+		int nr_sgs = 0;
+
+		do {} while (nr_sgs++, sg = sg->next, sg != sd->groups);
+
+		nr_entries += nr_sgs;
+	}
+
+	table = sd_alloc_ctl_entry(nr_entries);
 
 	if (table == NULL)
 		return NULL;
@@ -296,7 +347,19 @@
 		sizeof(long), 0644, proc_doulongvec_minmax, false);
 	set_table_entry(&table[12], "name", sd->name,
 		CORENAME_MAX_SIZE, 0444, proc_dostring, false);
-	/* &table[13] is terminator */
+	sg = sd->groups;
+	if (sg->sge) {
+		char buf[32];
+		struct ctl_table *entry = &table[13];
+
+		do {
+			snprintf(buf, 32, "group%d", i);
+			entry->procname = kstrdup(buf, GFP_KERNEL);
+			entry->mode = 0555;
+			entry->child = sd_alloc_ctl_group_table(sg);
+		} while (entry++, i++, sg = sg->next, sg != sd->groups);
+	}
+	/* &table[nr_entries-1] is terminator */
 
 	return table;
 }
@@ -918,7 +981,33 @@
 		P_SCHEDSTAT(se.statistics.nr_wakeups_affine_attempts);
 		P_SCHEDSTAT(se.statistics.nr_wakeups_passive);
 		P_SCHEDSTAT(se.statistics.nr_wakeups_idle);
-
+		/* eas */
+		/* select_idle_sibling() */
+		P_SCHEDSTAT(se.statistics.nr_wakeups_sis_attempts);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_sis_idle);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_sis_cache_affine);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_sis_suff_cap);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_sis_idle_cpu);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_sis_count);
+		/* select_energy_cpu_brute() */
+		P_SCHEDSTAT(se.statistics.nr_wakeups_secb_attempts);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_secb_sync);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_secb_idle_bt);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_secb_insuff_cap);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_secb_no_nrg_sav);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_secb_nrg_sav);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_secb_count);
+		/* find_best_target() */
+		P_SCHEDSTAT(se.statistics.nr_wakeups_fbt_attempts);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_fbt_no_cpu);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_fbt_no_sd);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_fbt_pref_idle);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_fbt_count);
+		/* cas */
+		/* select_task_rq_fair() */
+		P_SCHEDSTAT(se.statistics.nr_wakeups_cas_attempts);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_cas_count);
+ 
 		avg_atom = p->se.sum_exec_runtime;
 		if (nr_switches)
 			avg_atom = div64_ul(avg_atom, nr_switches);
diff --git a/kernel/sched/energy.c b/kernel/sched/energy.c
new file mode 100644
index 0000000..b0656b7
--- /dev/null
+++ b/kernel/sched/energy.c
@@ -0,0 +1,124 @@
+/*
+ * Obtain energy cost data from DT and populate relevant scheduler data
+ * structures.
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#define pr_fmt(fmt) "sched-energy: " fmt
+
+#define DEBUG
+
+#include <linux/gfp.h>
+#include <linux/of.h>
+#include <linux/printk.h>
+#include <linux/sched.h>
+#include <linux/sched_energy.h>
+#include <linux/stddef.h>
+
+struct sched_group_energy *sge_array[NR_CPUS][NR_SD_LEVELS];
+
+static void free_resources(void)
+{
+	int cpu, sd_level;
+	struct sched_group_energy *sge;
+
+	for_each_possible_cpu(cpu) {
+		for_each_possible_sd_level(sd_level) {
+			sge = sge_array[cpu][sd_level];
+			if (sge) {
+				kfree(sge->cap_states);
+				kfree(sge->idle_states);
+				kfree(sge);
+			}
+		}
+	}
+}
+
+void init_sched_energy_costs(void)
+{
+	struct device_node *cn, *cp;
+	struct capacity_state *cap_states;
+	struct idle_state *idle_states;
+	struct sched_group_energy *sge;
+	const struct property *prop;
+	int sd_level, i, nstates, cpu;
+	const __be32 *val;
+
+	for_each_possible_cpu(cpu) {
+		cn = of_get_cpu_node(cpu, NULL);
+		if (!cn) {
+			pr_warn("CPU device node missing for CPU %d\n", cpu);
+			return;
+		}
+
+		if (!of_find_property(cn, "sched-energy-costs", NULL)) {
+			pr_warn("CPU device node has no sched-energy-costs\n");
+			return;
+		}
+
+		for_each_possible_sd_level(sd_level) {
+			cp = of_parse_phandle(cn, "sched-energy-costs", sd_level);
+			if (!cp)
+				break;
+
+			prop = of_find_property(cp, "busy-cost-data", NULL);
+			if (!prop || !prop->value) {
+				pr_warn("No busy-cost data, skipping sched_energy init\n");
+				goto out;
+			}
+
+			sge = kcalloc(1, sizeof(struct sched_group_energy),
+				      GFP_NOWAIT);
+
+			nstates = (prop->length / sizeof(u32)) / 2;
+			cap_states = kcalloc(nstates,
+					     sizeof(struct capacity_state),
+					     GFP_NOWAIT);
+
+			for (i = 0, val = prop->value; i < nstates; i++) {
+				cap_states[i].cap = be32_to_cpup(val++);
+				cap_states[i].power = be32_to_cpup(val++);
+			}
+
+			sge->nr_cap_states = nstates;
+			sge->cap_states = cap_states;
+
+			prop = of_find_property(cp, "idle-cost-data", NULL);
+			if (!prop || !prop->value) {
+				pr_warn("No idle-cost data, skipping sched_energy init\n");
+				goto out;
+			}
+
+			nstates = (prop->length / sizeof(u32));
+			idle_states = kcalloc(nstates,
+					      sizeof(struct idle_state),
+					      GFP_NOWAIT);
+
+			for (i = 0, val = prop->value; i < nstates; i++)
+				idle_states[i].power = be32_to_cpup(val++);
+
+			sge->nr_idle_states = nstates;
+			sge->idle_states = idle_states;
+
+			sge_array[cpu][sd_level] = sge;
+		}
+	}
+
+	pr_info("Sched-energy-costs installed from DT\n");
+	return;
+
+out:
+	free_resources();
+}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0c91d72..078a5f5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -30,10 +30,13 @@
 #include <linux/mempolicy.h>
 #include <linux/migrate.h>
 #include <linux/task_work.h>
+#include <linux/module.h>
 
 #include <trace/events/sched.h>
 
 #include "sched.h"
+#include "tune.h"
+#include "walt.h"
 
 /*
  * Targeted preemption latency for CPU-bound tasks:
@@ -50,6 +53,16 @@
 unsigned int sysctl_sched_latency = 6000000ULL;
 unsigned int normalized_sysctl_sched_latency = 6000000ULL;
 
+unsigned int sysctl_sched_sync_hint_enable = 1;
+unsigned int sysctl_sched_initial_task_util = 0;
+unsigned int sysctl_sched_cstate_aware = 1;
+
+#ifdef CONFIG_SCHED_WALT
+unsigned int sysctl_sched_use_walt_cpu_util = 1;
+unsigned int sysctl_sched_use_walt_task_util = 1;
+__read_mostly unsigned int sysctl_sched_walt_cpu_high_irqload =
+    (10 * NSEC_PER_MSEC);
+#endif
 /*
  * The initial- and re-scaling of tunables is configurable
  * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
@@ -116,7 +129,7 @@
 
 /*
  * The margin used when comparing utilization with CPU capacity:
- * util * 1024 < capacity * margin
+ * util * margin < capacity * 1024
  */
 unsigned int capacity_margin = 1280; /* ~20% */
 
@@ -290,19 +303,59 @@
 static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
 {
 	if (!cfs_rq->on_list) {
+		struct rq *rq = rq_of(cfs_rq);
+		int cpu = cpu_of(rq);
 		/*
 		 * Ensure we either appear before our parent (if already
 		 * enqueued) or force our parent to appear after us when it is
-		 * enqueued.  The fact that we always enqueue bottom-up
-		 * reduces this to two cases.
+		 * enqueued. The fact that we always enqueue bottom-up
+		 * reduces this to two cases and a special case for the root
+		 * cfs_rq. Furthermore, it also means that we will always reset
+		 * tmp_alone_branch either when the branch is connected
+		 * to a tree or when we reach the beg of the tree
 		 */
 		if (cfs_rq->tg->parent &&
-		    cfs_rq->tg->parent->cfs_rq[cpu_of(rq_of(cfs_rq))]->on_list) {
-			list_add_rcu(&cfs_rq->leaf_cfs_rq_list,
-				&rq_of(cfs_rq)->leaf_cfs_rq_list);
-		} else {
+		    cfs_rq->tg->parent->cfs_rq[cpu]->on_list) {
+			/*
+			 * If parent is already on the list, we add the child
+			 * just before. Thanks to circular linked property of
+			 * the list, this means to put the child at the tail
+			 * of the list that starts by parent.
+			 */
 			list_add_tail_rcu(&cfs_rq->leaf_cfs_rq_list,
-				&rq_of(cfs_rq)->leaf_cfs_rq_list);
+				&(cfs_rq->tg->parent->cfs_rq[cpu]->leaf_cfs_rq_list));
+			/*
+			 * The branch is now connected to its tree so we can
+			 * reset tmp_alone_branch to the beginning of the
+			 * list.
+			 */
+			rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
+		} else if (!cfs_rq->tg->parent) {
+			/*
+			 * cfs rq without parent should be put
+			 * at the tail of the list.
+			 */
+			list_add_tail_rcu(&cfs_rq->leaf_cfs_rq_list,
+				&rq->leaf_cfs_rq_list);
+			/*
+			 * We have reach the beg of a tree so we can reset
+			 * tmp_alone_branch to the beginning of the list.
+			 */
+			rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
+		} else {
+			/*
+			 * The parent has not already been added so we want to
+			 * make sure that it will be put after us.
+			 * tmp_alone_branch points to the beg of the branch
+			 * where we will add parent.
+			 */
+			list_add_rcu(&cfs_rq->leaf_cfs_rq_list,
+				rq->tmp_alone_branch);
+			/*
+			 * update tmp_alone_branch to points to the new beg
+			 * of the branch
+			 */
+			rq->tmp_alone_branch = &cfs_rq->leaf_cfs_rq_list;
 		}
 
 		cfs_rq->on_list = 1;
@@ -699,18 +752,19 @@
 	if (entity_is_task(se))
 		sa->load_avg = scale_load_down(se->load.weight);
 	sa->load_sum = sa->load_avg * LOAD_AVG_MAX;
+
 	/*
 	 * At this point, util_avg won't be used in select_task_rq_fair anyway
 	 */
-	sa->util_avg = 0;
+	sa->util_avg =  sched_freq() ?
+		sysctl_sched_initial_task_util :
+		0;
 	sa->util_sum = 0;
 	/* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
 }
 
 static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
-static int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq);
-static void update_tg_load_avg(struct cfs_rq *cfs_rq, int force);
-static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se);
+static void attach_entity_cfs_rq(struct sched_entity *se);
 
 /*
  * With new tasks being created, their initial util_avgs are extrapolated
@@ -742,7 +796,6 @@
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 	struct sched_avg *sa = &se->avg;
 	long cap = (long)(SCHED_CAPACITY_SCALE - cfs_rq->avg.util_avg) / 2;
-	u64 now = cfs_rq_clock_task(cfs_rq);
 
 	if (cap > 0) {
 		if (cfs_rq->avg.util_avg != 0) {
@@ -770,14 +823,12 @@
 			 * such that the next switched_to_fair() has the
 			 * expected state.
 			 */
-			se->avg.last_update_time = now;
+			se->avg.last_update_time = cfs_rq_clock_task(cfs_rq);
 			return;
 		}
 	}
 
-	update_cfs_rq_load_avg(now, cfs_rq, false);
-	attach_entity_load_avg(cfs_rq, se);
-	update_tg_load_avg(cfs_rq, false);
+	attach_entity_cfs_rq(se);
 }
 
 #else /* !CONFIG_SMP */
@@ -937,6 +988,7 @@
 			}
 
 			trace_sched_stat_blocked(tsk, delta);
+			trace_sched_blocked_reason(tsk);
 
 			/*
 			 * Blocking time is in units of nanosecs, so shift by
@@ -2646,16 +2698,20 @@
 
 static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
 
-static void update_cfs_shares(struct cfs_rq *cfs_rq)
+static void update_cfs_shares(struct sched_entity *se)
 {
+	struct cfs_rq *cfs_rq = group_cfs_rq(se);
 	struct task_group *tg;
-	struct sched_entity *se;
 	long shares;
 
-	tg = cfs_rq->tg;
-	se = tg->se[cpu_of(rq_of(cfs_rq))];
-	if (!se || throttled_hierarchy(cfs_rq))
+	if (!cfs_rq)
 		return;
+
+	if (throttled_hierarchy(cfs_rq))
+		return;
+
+	tg = cfs_rq->tg;
+
 #ifndef CONFIG_SMP
 	if (likely(se->load.weight == tg->shares))
 		return;
@@ -2664,8 +2720,9 @@
 
 	reweight_entity(cfs_rq_of(se), se, shares);
 }
+
 #else /* CONFIG_FAIR_GROUP_SCHED */
-static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
+static inline void update_cfs_shares(struct sched_entity *se)
 {
 }
 #endif /* CONFIG_FAIR_GROUP_SCHED */
@@ -2816,6 +2873,7 @@
 
 	scale_freq = arch_scale_freq_capacity(NULL, cpu);
 	scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
+	trace_sched_contrib_scale_f(cpu, scale_freq, scale_cpu);
 
 	/* delta_w is the amount already accumulated against our next period */
 	delta_w = sa->period_contrib;
@@ -2891,6 +2949,26 @@
 	return decayed;
 }
 
+/*
+ * Signed add and clamp on underflow.
+ *
+ * Explicitly do a load-store to ensure the intermediate value never hits
+ * memory. This allows lockless observations without ever seeing the negative
+ * values.
+ */
+#define add_positive(_ptr, _val) do {                           \
+	typeof(_ptr) ptr = (_ptr);                              \
+	typeof(_val) val = (_val);                              \
+	typeof(*ptr) res, var = READ_ONCE(*ptr);                \
+								\
+	res = var + val;                                        \
+								\
+	if (val < 0 && res > var)                               \
+		res = 0;                                        \
+								\
+	WRITE_ONCE(*ptr, res);                                  \
+} while (0)
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 /**
  * update_tg_load_avg - update the tg's load avg
@@ -2970,8 +3048,138 @@
 		se->avg.last_update_time = n_last_update_time;
 	}
 }
+
+/* Take into account change of utilization of a child task group */
+static inline void
+update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+	struct cfs_rq *gcfs_rq = group_cfs_rq(se);
+	long delta = gcfs_rq->avg.util_avg - se->avg.util_avg;
+
+	/* Nothing to update */
+	if (!delta)
+		return;
+
+	/* Set new sched_entity's utilization */
+	se->avg.util_avg = gcfs_rq->avg.util_avg;
+	se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX;
+
+	/* Update parent cfs_rq utilization */
+	add_positive(&cfs_rq->avg.util_avg, delta);
+	cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * LOAD_AVG_MAX;
+}
+
+/* Take into account change of load of a child task group */
+static inline void
+update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+	struct cfs_rq *gcfs_rq = group_cfs_rq(se);
+	long delta, load = gcfs_rq->avg.load_avg;
+
+	/*
+	 * If the load of group cfs_rq is null, the load of the
+	 * sched_entity will also be null so we can skip the formula
+	 */
+	if (load) {
+		long tg_load;
+
+		/* Get tg's load and ensure tg_load > 0 */
+		tg_load = atomic_long_read(&gcfs_rq->tg->load_avg) + 1;
+
+		/* Ensure tg_load >= load and updated with current load*/
+		tg_load -= gcfs_rq->tg_load_avg_contrib;
+		tg_load += load;
+
+		/*
+		 * We need to compute a correction term in the case that the
+		 * task group is consuming more CPU than a task of equal
+		 * weight. A task with a weight equals to tg->shares will have
+		 * a load less or equal to scale_load_down(tg->shares).
+		 * Similarly, the sched_entities that represent the task group
+		 * at parent level, can't have a load higher than
+		 * scale_load_down(tg->shares). And the Sum of sched_entities'
+		 * load must be <= scale_load_down(tg->shares).
+		 */
+		if (tg_load > scale_load_down(gcfs_rq->tg->shares)) {
+			/* scale gcfs_rq's load into tg's shares*/
+			load *= scale_load_down(gcfs_rq->tg->shares);
+			load /= tg_load;
+		}
+	}
+
+	delta = load - se->avg.load_avg;
+
+	/* Nothing to update */
+	if (!delta)
+		return;
+
+	/* Set new sched_entity's load */
+	se->avg.load_avg = load;
+	se->avg.load_sum = se->avg.load_avg * LOAD_AVG_MAX;
+
+	/* Update parent cfs_rq load */
+	add_positive(&cfs_rq->avg.load_avg, delta);
+	cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * LOAD_AVG_MAX;
+
+	/*
+	 * If the sched_entity is already enqueued, we also have to update the
+	 * runnable load avg.
+	 */
+	if (se->on_rq) {
+		/* Update parent cfs_rq runnable_load_avg */
+		add_positive(&cfs_rq->runnable_load_avg, delta);
+		cfs_rq->runnable_load_sum = cfs_rq->runnable_load_avg * LOAD_AVG_MAX;
+	}
+}
+
+static inline void set_tg_cfs_propagate(struct cfs_rq *cfs_rq)
+{
+	cfs_rq->propagate_avg = 1;
+}
+
+static inline int test_and_clear_tg_cfs_propagate(struct sched_entity *se)
+{
+	struct cfs_rq *cfs_rq = group_cfs_rq(se);
+
+	if (!cfs_rq->propagate_avg)
+		return 0;
+
+	cfs_rq->propagate_avg = 0;
+	return 1;
+}
+
+/* Update task and its cfs_rq load average */
+static inline int propagate_entity_load_avg(struct sched_entity *se)
+{
+	struct cfs_rq *cfs_rq;
+
+	if (entity_is_task(se))
+		return 0;
+
+	if (!test_and_clear_tg_cfs_propagate(se))
+		return 0;
+
+	cfs_rq = cfs_rq_of(se);
+
+	set_tg_cfs_propagate(cfs_rq);
+
+	update_tg_cfs_util(cfs_rq, se);
+	update_tg_cfs_load(cfs_rq, se);
+
+	return 1;
+}
+
 #else /* CONFIG_FAIR_GROUP_SCHED */
+
 static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
+
+static inline int propagate_entity_load_avg(struct sched_entity *se)
+{
+	return 0;
+}
+
+static inline void set_tg_cfs_propagate(struct cfs_rq *cfs_rq) {}
+
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
 static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
@@ -3042,6 +3250,7 @@
 		sub_positive(&sa->load_avg, r);
 		sub_positive(&sa->load_sum, r * LOAD_AVG_MAX);
 		removed_load = 1;
+		set_tg_cfs_propagate(cfs_rq);
 	}
 
 	if (atomic_long_read(&cfs_rq->removed_util_avg)) {
@@ -3049,6 +3258,7 @@
 		sub_positive(&sa->util_avg, r);
 		sub_positive(&sa->util_sum, r * LOAD_AVG_MAX);
 		removed_util = 1;
+		set_tg_cfs_propagate(cfs_rq);
 	}
 
 	decayed = __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
@@ -3062,27 +3272,51 @@
 	if (update_freq && (decayed || removed_util))
 		cfs_rq_util_change(cfs_rq);
 
+	/* Trace CPU load, unless cfs_rq belongs to a non-root task_group */
+	if (cfs_rq == &rq_of(cfs_rq)->cfs)
+		trace_sched_load_avg_cpu(cpu_of(rq_of(cfs_rq)), cfs_rq);
+
 	return decayed || removed_load;
 }
 
+/*
+ * Optional action to be done while updating the load average
+ */
+#define UPDATE_TG	0x1
+#define SKIP_AGE_LOAD	0x2
+
 /* Update task and its cfs_rq load average */
-static inline void update_load_avg(struct sched_entity *se, int update_tg)
+static inline void update_load_avg(struct sched_entity *se, int flags)
 {
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 	u64 now = cfs_rq_clock_task(cfs_rq);
 	struct rq *rq = rq_of(cfs_rq);
 	int cpu = cpu_of(rq);
+	int decayed;
+	void *ptr = NULL;
 
 	/*
 	 * Track task load average for carrying it to new CPU after migrated, and
 	 * track group sched_entity load average for task_h_load calc in migration
 	 */
-	__update_load_avg(now, cpu, &se->avg,
+	if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD)) {
+		__update_load_avg(now, cpu, &se->avg,
 			  se->on_rq * scale_load_down(se->load.weight),
 			  cfs_rq->curr == se, NULL);
+	}
 
-	if (update_cfs_rq_load_avg(now, cfs_rq, true) && update_tg)
+	decayed  = update_cfs_rq_load_avg(now, cfs_rq, true);
+	decayed |= propagate_entity_load_avg(se);
+
+	if (decayed && (flags & UPDATE_TG))
 		update_tg_load_avg(cfs_rq, 0);
+
+	if (entity_is_task(se)) {
+#ifdef CONFIG_SCHED_WALT
+		ptr = (void *)&(task_of(se)->ravg);
+#endif
+		trace_sched_load_avg_task(task_of(se), &se->avg, ptr);
+	}
 }
 
 /**
@@ -3095,31 +3329,12 @@
  */
 static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	if (!sched_feat(ATTACH_AGE_LOAD))
-		goto skip_aging;
-
-	/*
-	 * If we got migrated (either between CPUs or between cgroups) we'll
-	 * have aged the average right before clearing @last_update_time.
-	 *
-	 * Or we're fresh through post_init_entity_util_avg().
-	 */
-	if (se->avg.last_update_time) {
-		__update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)),
-				  &se->avg, 0, 0, NULL);
-
-		/*
-		 * XXX: we could have just aged the entire load away if we've been
-		 * absent from the fair class for too long.
-		 */
-	}
-
-skip_aging:
 	se->avg.last_update_time = cfs_rq->avg.last_update_time;
 	cfs_rq->avg.load_avg += se->avg.load_avg;
 	cfs_rq->avg.load_sum += se->avg.load_sum;
 	cfs_rq->avg.util_avg += se->avg.util_avg;
 	cfs_rq->avg.util_sum += se->avg.util_sum;
+	set_tg_cfs_propagate(cfs_rq);
 
 	cfs_rq_util_change(cfs_rq);
 }
@@ -3134,14 +3349,12 @@
  */
 static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	__update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)),
-			  &se->avg, se->on_rq * scale_load_down(se->load.weight),
-			  cfs_rq->curr == se, NULL);
 
 	sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg);
 	sub_positive(&cfs_rq->avg.load_sum, se->avg.load_sum);
 	sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);
 	sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);
+	set_tg_cfs_propagate(cfs_rq);
 
 	cfs_rq_util_change(cfs_rq);
 }
@@ -3151,34 +3364,20 @@
 enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	struct sched_avg *sa = &se->avg;
-	u64 now = cfs_rq_clock_task(cfs_rq);
-	int migrated, decayed;
-
-	migrated = !sa->last_update_time;
-	if (!migrated) {
-		__update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
-			se->on_rq * scale_load_down(se->load.weight),
-			cfs_rq->curr == se, NULL);
-	}
-
-	decayed = update_cfs_rq_load_avg(now, cfs_rq, !migrated);
 
 	cfs_rq->runnable_load_avg += sa->load_avg;
 	cfs_rq->runnable_load_sum += sa->load_sum;
 
-	if (migrated)
+	if (!sa->last_update_time) {
 		attach_entity_load_avg(cfs_rq, se);
-
-	if (decayed || migrated)
 		update_tg_load_avg(cfs_rq, 0);
+	}
 }
 
 /* Remove the runnable load generated by se from cfs_rq's runnable load average */
 static inline void
 dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	update_load_avg(se, 1);
-
 	cfs_rq->runnable_load_avg =
 		max_t(long, cfs_rq->runnable_load_avg - se->avg.load_avg, 0);
 	cfs_rq->runnable_load_sum =
@@ -3207,13 +3406,25 @@
 #endif
 
 /*
+ * Synchronize entity load avg of dequeued entity without locking
+ * the previous rq.
+ */
+void sync_entity_load_avg(struct sched_entity *se)
+{
+	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+	u64 last_update_time;
+
+	last_update_time = cfs_rq_last_update_time(cfs_rq);
+	__update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL);
+}
+
+/*
  * Task first catches up with cfs_rq, and then subtract
  * itself from the cfs_rq (task must be off the queue now).
  */
 void remove_entity_load_avg(struct sched_entity *se)
 {
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
-	u64 last_update_time;
 
 	/*
 	 * tasks cannot exit without having gone through wake_up_new_task() ->
@@ -3225,9 +3436,7 @@
 	 * calls this.
 	 */
 
-	last_update_time = cfs_rq_last_update_time(cfs_rq);
-
-	__update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL);
+	sync_entity_load_avg(se);
 	atomic_long_add(se->avg.load_avg, &cfs_rq->removed_load_avg);
 	atomic_long_add(se->avg.util_avg, &cfs_rq->removed_util_avg);
 }
@@ -3252,7 +3461,10 @@
 	return 0;
 }
 
-static inline void update_load_avg(struct sched_entity *se, int not_used)
+#define UPDATE_TG	0x0
+#define SKIP_AGE_LOAD	0x0
+
+static inline void update_load_avg(struct sched_entity *se, int not_used1)
 {
 	cpufreq_update_util(rq_of(cfs_rq_of(se)), 0);
 }
@@ -3397,9 +3609,18 @@
 	if (renorm && !curr)
 		se->vruntime += cfs_rq->min_vruntime;
 
+	/*
+	 * When enqueuing a sched_entity, we must:
+	 *   - Update loads to have both entity and cfs_rq synced with now.
+	 *   - Add its load to cfs_rq->runnable_avg
+	 *   - For group_entity, update its weight to reflect the new share of
+	 *     its group cfs_rq
+	 *   - Add its new weight to cfs_rq->load.weight
+	 */
+	update_load_avg(se, UPDATE_TG);
 	enqueue_entity_load_avg(cfs_rq, se);
+	update_cfs_shares(se);
 	account_entity_enqueue(cfs_rq, se);
-	update_cfs_shares(cfs_rq);
 
 	if (flags & ENQUEUE_WAKEUP)
 		place_entity(cfs_rq, se, 0);
@@ -3471,6 +3692,16 @@
 	 * Update run-time statistics of the 'current'.
 	 */
 	update_curr(cfs_rq);
+
+	/*
+	 * When dequeuing a sched_entity, we must:
+	 *   - Update loads to have both entity and cfs_rq synced with now.
+	 *   - Substract its load from the cfs_rq->runnable_avg.
+	 *   - Substract its previous weight from cfs_rq->load.weight.
+	 *   - For group entity, update its weight to reflect the new share
+	 *     of its group cfs_rq.
+	 */
+	update_load_avg(se, UPDATE_TG);
 	dequeue_entity_load_avg(cfs_rq, se);
 
 	update_stats_dequeue(cfs_rq, se, flags);
@@ -3494,7 +3725,7 @@
 	/* return excess runtime on last dequeue */
 	return_cfs_rq_runtime(cfs_rq);
 
-	update_cfs_shares(cfs_rq);
+	update_cfs_shares(se);
 
 	/*
 	 * Now advance min_vruntime if @se was the entity holding it back,
@@ -3558,7 +3789,7 @@
 		 */
 		update_stats_wait_end(cfs_rq, se);
 		__dequeue_entity(cfs_rq, se);
-		update_load_avg(se, 1);
+		update_load_avg(se, UPDATE_TG);
 	}
 
 	update_stats_curr_start(cfs_rq, se);
@@ -3676,8 +3907,8 @@
 	/*
 	 * Ensure that runnable average is periodically updated.
 	 */
-	update_load_avg(curr, 1);
-	update_cfs_shares(cfs_rq);
+	update_load_avg(curr, UPDATE_TG);
+	update_cfs_shares(curr);
 
 #ifdef CONFIG_SCHED_HRTICK
 	/*
@@ -4544,6 +4775,28 @@
 }
 #endif
 
+#ifdef CONFIG_SMP
+static bool cpu_overutilized(int cpu);
+unsigned long boosted_cpu_util(int cpu);
+#else
+#define boosted_cpu_util(cpu) cpu_util(cpu)
+#endif
+
+#ifdef CONFIG_SMP
+static void update_capacity_of(int cpu)
+{
+	unsigned long req_cap;
+
+	if (!sched_freq())
+		return;
+
+	/* Convert scale-invariant capacity to cpu. */
+	req_cap = boosted_cpu_util(cpu);
+	req_cap = req_cap * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu);
+	set_cfs_cpu_capacity(cpu, true, req_cap);
+}
+#endif
+
 /*
  * The enqueue_task method is called before nr_running is
  * increased. Here we update the fair scheduling stats and
@@ -4554,6 +4807,10 @@
 {
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se = &p->se;
+#ifdef CONFIG_SMP
+	int task_new = flags & ENQUEUE_WAKEUP_NEW;
+	int task_wakeup = flags & ENQUEUE_WAKEUP;
+#endif
 
 	/*
 	 * If in_iowait is set, the code below may not trigger any cpufreq
@@ -4578,6 +4835,7 @@
 		if (cfs_rq_throttled(cfs_rq))
 			break;
 		cfs_rq->h_nr_running++;
+		walt_inc_cfs_cumulative_runnable_avg(cfs_rq, p);
 
 		flags = ENQUEUE_WAKEUP;
 	}
@@ -4585,17 +4843,59 @@
 	for_each_sched_entity(se) {
 		cfs_rq = cfs_rq_of(se);
 		cfs_rq->h_nr_running++;
+		walt_inc_cfs_cumulative_runnable_avg(cfs_rq, p);
 
 		if (cfs_rq_throttled(cfs_rq))
 			break;
 
-		update_load_avg(se, 1);
-		update_cfs_shares(cfs_rq);
+		update_load_avg(se, UPDATE_TG);
+		update_cfs_shares(se);
 	}
 
 	if (!se)
 		add_nr_running(rq, 1);
 
+#ifdef CONFIG_SMP
+
+	/*
+	 * Update SchedTune accounting.
+	 *
+	 * We do it before updating the CPU capacity to ensure the
+	 * boost value of the current task is accounted for in the
+	 * selection of the OPP.
+	 *
+	 * We do it also in the case where we enqueue a throttled task;
+	 * we could argue that a throttled task should not boost a CPU,
+	 * however:
+	 * a) properly implementing CPU boosting considering throttled
+	 *    tasks will increase a lot the complexity of the solution
+	 * b) it's not easy to quantify the benefits introduced by
+	 *    such a more complex solution.
+	 * Thus, for the time being we go for the simple solution and boost
+	 * also for throttled RQs.
+	 */
+	schedtune_enqueue_task(p, cpu_of(rq));
+
+	if (!se) {
+		walt_inc_cumulative_runnable_avg(rq, p);
+		if (!task_new && !rq->rd->overutilized &&
+		    cpu_overutilized(rq->cpu)) {
+			rq->rd->overutilized = true;
+			trace_sched_overutilized(true);
+		}
+
+		/*
+		 * We want to potentially trigger a freq switch
+		 * request only for tasks that are waking up; this is
+		 * because we get here also during load balancing, but
+		 * in these cases it seems wise to trigger as single
+		 * request after load balancing is done.
+		 */
+		if (task_new || task_wakeup)
+			update_capacity_of(cpu_of(rq));
+	}
+
+#endif /* CONFIG_SMP */
 	hrtick_update(rq);
 }
 
@@ -4625,6 +4925,7 @@
 		if (cfs_rq_throttled(cfs_rq))
 			break;
 		cfs_rq->h_nr_running--;
+		walt_dec_cfs_cumulative_runnable_avg(cfs_rq, p);
 
 		/* Don't dequeue parent if it has other entities besides us */
 		if (cfs_rq->load.weight) {
@@ -4644,17 +4945,50 @@
 	for_each_sched_entity(se) {
 		cfs_rq = cfs_rq_of(se);
 		cfs_rq->h_nr_running--;
+		walt_dec_cfs_cumulative_runnable_avg(cfs_rq, p);
 
 		if (cfs_rq_throttled(cfs_rq))
 			break;
 
-		update_load_avg(se, 1);
-		update_cfs_shares(cfs_rq);
+		update_load_avg(se, UPDATE_TG);
+		update_cfs_shares(se);
 	}
 
 	if (!se)
 		sub_nr_running(rq, 1);
 
+#ifdef CONFIG_SMP
+
+	/*
+	 * Update SchedTune accounting
+	 *
+	 * We do it before updating the CPU capacity to ensure the
+	 * boost value of the current task is accounted for in the
+	 * selection of the OPP.
+	 */
+	schedtune_dequeue_task(p, cpu_of(rq));
+
+	if (!se) {
+		walt_dec_cumulative_runnable_avg(rq, p);
+
+		/*
+		 * We want to potentially trigger a freq switch
+		 * request only for tasks that are going to sleep;
+		 * this is because we get here also during load
+		 * balancing, but in these cases it seems wise to
+		 * trigger as single request after load balancing is
+		 * done.
+		 */
+		if (task_sleep) {
+			if (rq->cfs.nr_running)
+				update_capacity_of(cpu_of(rq));
+			else if (sched_freq())
+				set_cfs_cpu_capacity(cpu_of(rq), false, 0);
+		}
+	}
+
+#endif /* CONFIG_SMP */
+
 	hrtick_update(rq);
 }
 
@@ -4961,15 +5295,6 @@
 	return max(rq->cpu_load[type-1], total);
 }
 
-static unsigned long capacity_of(int cpu)
-{
-	return cpu_rq(cpu)->cpu_capacity;
-}
-
-static unsigned long capacity_orig_of(int cpu)
-{
-	return cpu_rq(cpu)->cpu_capacity_orig;
-}
 
 static unsigned long cpu_avg_load_per_task(int cpu)
 {
@@ -5121,6 +5446,500 @@
 }
 
 /*
+ * Returns the current capacity of cpu after applying both
+ * cpu and freq scaling.
+ */
+unsigned long capacity_curr_of(int cpu)
+{
+	return cpu_rq(cpu)->cpu_capacity_orig *
+	       arch_scale_freq_capacity(NULL, cpu)
+	       >> SCHED_CAPACITY_SHIFT;
+}
+
+static inline bool energy_aware(void)
+{
+	return sched_feat(ENERGY_AWARE);
+}
+
+struct energy_env {
+	struct sched_group	*sg_top;
+	struct sched_group	*sg_cap;
+	int			cap_idx;
+	int			util_delta;
+	int			src_cpu;
+	int			dst_cpu;
+	int			energy;
+	int			payoff;
+	struct task_struct	*task;
+	struct {
+		int before;
+		int after;
+		int delta;
+		int diff;
+	} nrg;
+	struct {
+		int before;
+		int after;
+		int delta;
+	} cap;
+};
+
+/*
+ * __cpu_norm_util() returns the cpu util relative to a specific capacity,
+ * i.e. it's busy ratio, in the range [0..SCHED_LOAD_SCALE] which is useful for
+ * energy calculations. Using the scale-invariant util returned by
+ * cpu_util() and approximating scale-invariant util by:
+ *
+ *   util ~ (curr_freq/max_freq)*1024 * capacity_orig/1024 * running_time/time
+ *
+ * the normalized util can be found using the specific capacity.
+ *
+ *   capacity = capacity_orig * curr_freq/max_freq
+ *
+ *   norm_util = running_time/time ~ util/capacity
+ */
+static unsigned long __cpu_norm_util(int cpu, unsigned long capacity, int delta)
+{
+	int util = __cpu_util(cpu, delta);
+
+	if (util >= capacity)
+		return SCHED_CAPACITY_SCALE;
+
+	return (util << SCHED_CAPACITY_SHIFT)/capacity;
+}
+
+static int calc_util_delta(struct energy_env *eenv, int cpu)
+{
+	if (cpu == eenv->src_cpu)
+		return -eenv->util_delta;
+	if (cpu == eenv->dst_cpu)
+		return eenv->util_delta;
+	return 0;
+}
+
+static
+unsigned long group_max_util(struct energy_env *eenv)
+{
+	int i, delta;
+	unsigned long max_util = 0;
+
+	for_each_cpu(i, sched_group_cpus(eenv->sg_cap)) {
+		delta = calc_util_delta(eenv, i);
+		max_util = max(max_util, __cpu_util(i, delta));
+	}
+
+	return max_util;
+}
+
+/*
+ * group_norm_util() returns the approximated group util relative to it's
+ * current capacity (busy ratio) in the range [0..SCHED_LOAD_SCALE] for use in
+ * energy calculations. Since task executions may or may not overlap in time in
+ * the group the true normalized util is between max(cpu_norm_util(i)) and
+ * sum(cpu_norm_util(i)) when iterating over all cpus in the group, i. The
+ * latter is used as the estimate as it leads to a more pessimistic energy
+ * estimate (more busy).
+ */
+static unsigned
+long group_norm_util(struct energy_env *eenv, struct sched_group *sg)
+{
+	int i, delta;
+	unsigned long util_sum = 0;
+	unsigned long capacity = sg->sge->cap_states[eenv->cap_idx].cap;
+
+	for_each_cpu(i, sched_group_cpus(sg)) {
+		delta = calc_util_delta(eenv, i);
+		util_sum += __cpu_norm_util(i, capacity, delta);
+	}
+
+	if (util_sum > SCHED_CAPACITY_SCALE)
+		return SCHED_CAPACITY_SCALE;
+	return util_sum;
+}
+
+static int find_new_capacity(struct energy_env *eenv,
+	const struct sched_group_energy * const sge)
+{
+	int idx;
+	unsigned long util = group_max_util(eenv);
+
+	for (idx = 0; idx < sge->nr_cap_states; idx++) {
+		if (sge->cap_states[idx].cap >= util)
+			break;
+	}
+
+	eenv->cap_idx = idx;
+
+	return idx;
+}
+
+static int group_idle_state(struct energy_env *eenv, struct sched_group *sg)
+{
+	int i, state = INT_MAX;
+	int src_in_grp, dst_in_grp;
+	long grp_util = 0;
+
+	/* Find the shallowest idle state in the sched group. */
+	for_each_cpu(i, sched_group_cpus(sg))
+		state = min(state, idle_get_state_idx(cpu_rq(i)));
+
+	/* Take non-cpuidle idling into account (active idle/arch_cpu_idle()) */
+	state++;
+
+	/*
+	 * Try to estimate if a deeper idle state is
+	 * achievable when we move the task.
+	 */
+	for_each_cpu(i, sched_group_cpus(sg))
+		grp_util += cpu_util(i);
+
+	src_in_grp = cpumask_test_cpu(eenv->src_cpu, sched_group_cpus(sg));
+	dst_in_grp = cpumask_test_cpu(eenv->dst_cpu, sched_group_cpus(sg));
+	if (src_in_grp == dst_in_grp) {
+		/* both CPUs under consideration are in the same group or not in
+		 * either group, migration should leave idle state the same.
+		 */
+		goto end;
+	}
+	/* add or remove util as appropriate to indicate what group util
+	 * will be (worst case - no concurrent execution) after moving the task
+	 */
+	grp_util += src_in_grp ? -eenv->util_delta : eenv->util_delta;
+
+	if (grp_util <=
+		((long)sg->sgc->max_capacity * (int)sg->group_weight)) {
+		/* after moving, this group is at most partly
+		 * occupied, so it should have some idle time.
+		 */
+		int max_idle_state_idx = sg->sge->nr_idle_states - 2;
+		int new_state = grp_util * max_idle_state_idx;
+		if (grp_util <= 0)
+			/* group will have no util, use lowest state */
+			new_state = max_idle_state_idx + 1;
+		else {
+			/* for partially idle, linearly map util to idle
+			 * states, excluding the lowest one. This does not
+			 * correspond to the state we expect to enter in
+			 * reality, but an indication of what might happen.
+			 */
+			new_state = min(max_idle_state_idx, (int)
+					(new_state / sg->sgc->max_capacity));
+			new_state = max_idle_state_idx - new_state;
+		}
+		state = new_state;
+	} else {
+		/* After moving, the group will be fully occupied
+		 * so assume it will not be idle at all.
+		 */
+		state = 0;
+	}
+end:
+	return state;
+}
+
+/*
+ * sched_group_energy(): Computes the absolute energy consumption of cpus
+ * belonging to the sched_group including shared resources shared only by
+ * members of the group. Iterates over all cpus in the hierarchy below the
+ * sched_group starting from the bottom working it's way up before going to
+ * the next cpu until all cpus are covered at all levels. The current
+ * implementation is likely to gather the same util statistics multiple times.
+ * This can probably be done in a faster but more complex way.
+ * Note: sched_group_energy() may fail when racing with sched_domain updates.
+ */
+static int sched_group_energy(struct energy_env *eenv)
+{
+	struct sched_domain *sd;
+	int cpu, total_energy = 0;
+	struct cpumask visit_cpus;
+	struct sched_group *sg;
+	int cpu_count;
+
+	WARN_ON(!eenv->sg_top->sge);
+
+	cpumask_copy(&visit_cpus, sched_group_cpus(eenv->sg_top));
+	/* If a cpu is hotplugged in while we are in this function,
+	 * it does not appear in the existing visit_cpus mask
+	 * which came from the sched_group pointer of the
+	 * sched_domain pointed at by sd_ea for either the prev
+	 * or next cpu and was dereferenced in __energy_diff.
+	 * Since we will dereference sd_scs later as we iterate
+	 * through the CPUs we expect to visit, new CPUs can
+	 * be present which are not in the visit_cpus mask.
+	 * Guard this with cpu_count.
+	 */
+	cpu_count = cpumask_weight(&visit_cpus);
+
+	while (!cpumask_empty(&visit_cpus)) {
+		struct sched_group *sg_shared_cap = NULL;
+
+		cpu = cpumask_first(&visit_cpus);
+
+		/*
+		 * Is the group utilization affected by cpus outside this
+		 * sched_group?
+		 * This sd may have groups with cpus which were not present
+		 * when we took visit_cpus.
+		 */
+		sd = rcu_dereference(per_cpu(sd_scs, cpu));
+
+		if (sd && sd->parent)
+			sg_shared_cap = sd->parent->groups;
+
+		for_each_domain(cpu, sd) {
+			sg = sd->groups;
+
+			/* Has this sched_domain already been visited? */
+			if (sd->child && group_first_cpu(sg) != cpu)
+				break;
+
+			do {
+				unsigned long group_util;
+				int sg_busy_energy, sg_idle_energy;
+				int cap_idx, idle_idx;
+
+				if (sg_shared_cap && sg_shared_cap->group_weight >= sg->group_weight)
+					eenv->sg_cap = sg_shared_cap;
+				else
+					eenv->sg_cap = sg;
+
+				cap_idx = find_new_capacity(eenv, sg->sge);
+
+				if (sg->group_weight == 1) {
+					/* Remove capacity of src CPU (before task move) */
+					if (eenv->util_delta == 0 &&
+					    cpumask_test_cpu(eenv->src_cpu, sched_group_cpus(sg))) {
+						eenv->cap.before = sg->sge->cap_states[cap_idx].cap;
+						eenv->cap.delta -= eenv->cap.before;
+					}
+					/* Add capacity of dst CPU  (after task move) */
+					if (eenv->util_delta != 0 &&
+					    cpumask_test_cpu(eenv->dst_cpu, sched_group_cpus(sg))) {
+						eenv->cap.after = sg->sge->cap_states[cap_idx].cap;
+						eenv->cap.delta += eenv->cap.after;
+					}
+				}
+
+				idle_idx = group_idle_state(eenv, sg);
+				group_util = group_norm_util(eenv, sg);
+
+				sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power)
+								>> SCHED_CAPACITY_SHIFT;
+				sg_idle_energy = ((SCHED_CAPACITY_SCALE-group_util)
+								* sg->sge->idle_states[idle_idx].power)
+								>> SCHED_CAPACITY_SHIFT;
+
+				total_energy += sg_busy_energy + sg_idle_energy;
+
+				if (!sd->child) {
+					/*
+					 * cpu_count here is the number of
+					 * cpus we expect to visit in this
+					 * calculation. If we race against
+					 * hotplug, we can have extra cpus
+					 * added to the groups we are
+					 * iterating which do not appear in
+					 * the visit_cpus mask. In that case
+					 * we are not able to calculate energy
+					 * without restarting so we will bail
+					 * out and use prev_cpu this time.
+					 */
+					if (!cpu_count)
+						return -EINVAL;
+					cpumask_xor(&visit_cpus, &visit_cpus, sched_group_cpus(sg));
+					cpu_count--;
+				}
+
+				if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(eenv->sg_top)))
+					goto next_cpu;
+
+			} while (sg = sg->next, sg != sd->groups);
+		}
+
+		/*
+		 * If we raced with hotplug and got an sd NULL-pointer;
+		 * returning a wrong energy estimation is better than
+		 * entering an infinite loop.
+		 * Specifically: If a cpu is unplugged after we took
+		 * the visit_cpus mask, it no longer has an sd_scs
+		 * pointer, so when we dereference it, we get NULL.
+		 */
+		if (cpumask_test_cpu(cpu, &visit_cpus))
+			return -EINVAL;
+next_cpu:
+		cpumask_clear_cpu(cpu, &visit_cpus);
+		continue;
+	}
+
+	eenv->energy = total_energy;
+	return 0;
+}
+
+static inline bool cpu_in_sg(struct sched_group *sg, int cpu)
+{
+	return cpu != -1 && cpumask_test_cpu(cpu, sched_group_cpus(sg));
+}
+
+/*
+ * energy_diff(): Estimate the energy impact of changing the utilization
+ * distribution. eenv specifies the change: utilisation amount, source, and
+ * destination cpu. Source or destination cpu may be -1 in which case the
+ * utilization is removed from or added to the system (e.g. task wake-up). If
+ * both are specified, the utilization is migrated.
+ */
+static inline int __energy_diff(struct energy_env *eenv)
+{
+	struct sched_domain *sd;
+	struct sched_group *sg;
+	int sd_cpu = -1, energy_before = 0, energy_after = 0;
+	int diff, margin;
+
+	struct energy_env eenv_before = {
+		.util_delta	= 0,
+		.src_cpu	= eenv->src_cpu,
+		.dst_cpu	= eenv->dst_cpu,
+		.nrg		= { 0, 0, 0, 0},
+		.cap		= { 0, 0, 0 },
+	};
+
+	if (eenv->src_cpu == eenv->dst_cpu)
+		return 0;
+
+	sd_cpu = (eenv->src_cpu != -1) ? eenv->src_cpu : eenv->dst_cpu;
+	sd = rcu_dereference(per_cpu(sd_ea, sd_cpu));
+
+	if (!sd)
+		return 0; /* Error */
+
+	sg = sd->groups;
+
+	do {
+		if (cpu_in_sg(sg, eenv->src_cpu) || cpu_in_sg(sg, eenv->dst_cpu)) {
+			eenv_before.sg_top = eenv->sg_top = sg;
+
+			if (sched_group_energy(&eenv_before))
+				return 0; /* Invalid result abort */
+			energy_before += eenv_before.energy;
+
+			/* Keep track of SRC cpu (before) capacity */
+			eenv->cap.before = eenv_before.cap.before;
+			eenv->cap.delta = eenv_before.cap.delta;
+
+			if (sched_group_energy(eenv))
+				return 0; /* Invalid result abort */
+			energy_after += eenv->energy;
+		}
+	} while (sg = sg->next, sg != sd->groups);
+
+	eenv->nrg.before = energy_before;
+	eenv->nrg.after = energy_after;
+	eenv->nrg.diff = eenv->nrg.after - eenv->nrg.before;
+	eenv->payoff = 0;
+#ifndef CONFIG_SCHED_TUNE
+	trace_sched_energy_diff(eenv->task,
+			eenv->src_cpu, eenv->dst_cpu, eenv->util_delta,
+			eenv->nrg.before, eenv->nrg.after, eenv->nrg.diff,
+			eenv->cap.before, eenv->cap.after, eenv->cap.delta,
+			eenv->nrg.delta, eenv->payoff);
+#endif
+	/*
+	 * Dead-zone margin preventing too many migrations.
+	 */
+
+	margin = eenv->nrg.before >> 6; /* ~1.56% */
+
+	diff = eenv->nrg.after - eenv->nrg.before;
+
+	eenv->nrg.diff = (abs(diff) < margin) ? 0 : eenv->nrg.diff;
+
+	return eenv->nrg.diff;
+}
+
+#ifdef CONFIG_SCHED_TUNE
+
+struct target_nrg schedtune_target_nrg;
+extern bool schedtune_initialized;
+/*
+ * System energy normalization
+ * Returns the normalized value, in the range [0..SCHED_CAPACITY_SCALE],
+ * corresponding to the specified energy variation.
+ */
+static inline int
+normalize_energy(int energy_diff)
+{
+	u32 normalized_nrg;
+
+	/* during early setup, we don't know the extents */
+	if (unlikely(!schedtune_initialized))
+		return energy_diff < 0 ? -1 : 1 ;
+
+#ifdef CONFIG_SCHED_DEBUG
+	{
+	int max_delta;
+
+	/* Check for boundaries */
+	max_delta  = schedtune_target_nrg.max_power;
+	max_delta -= schedtune_target_nrg.min_power;
+	WARN_ON(abs(energy_diff) >= max_delta);
+	}
+#endif
+
+	/* Do scaling using positive numbers to increase the range */
+	normalized_nrg = (energy_diff < 0) ? -energy_diff : energy_diff;
+
+	/* Scale by energy magnitude */
+	normalized_nrg <<= SCHED_CAPACITY_SHIFT;
+
+	/* Normalize on max energy for target platform */
+	normalized_nrg = reciprocal_divide(
+			normalized_nrg, schedtune_target_nrg.rdiv);
+
+	return (energy_diff < 0) ? -normalized_nrg : normalized_nrg;
+}
+
+static inline int
+energy_diff(struct energy_env *eenv)
+{
+	int boost = schedtune_task_boost(eenv->task);
+	int nrg_delta;
+
+	/* Conpute "absolute" energy diff */
+	__energy_diff(eenv);
+
+	/* Return energy diff when boost margin is 0 */
+	if (boost == 0)
+		return eenv->nrg.diff;
+
+	/* Compute normalized energy diff */
+	nrg_delta = normalize_energy(eenv->nrg.diff);
+	eenv->nrg.delta = nrg_delta;
+
+	eenv->payoff = schedtune_accept_deltas(
+			eenv->nrg.delta,
+			eenv->cap.delta,
+			eenv->task);
+
+	trace_sched_energy_diff(eenv->task,
+			eenv->src_cpu, eenv->dst_cpu, eenv->util_delta,
+			eenv->nrg.before, eenv->nrg.after, eenv->nrg.diff,
+			eenv->cap.before, eenv->cap.after, eenv->cap.delta,
+			eenv->nrg.delta, eenv->payoff);
+
+	/*
+	 * When SchedTune is enabled, the energy_diff() function will return
+	 * the computed energy payoff value. Since the energy_diff() return
+	 * value is expected to be negative by its callers, this evaluation
+	 * function return a negative value each time the evaluation return a
+	 * positive payoff, which is the condition for the acceptance of
+	 * a scheduling decision
+	 */
+	return -eenv->payoff;
+}
+#else /* CONFIG_SCHED_TUNE */
+#define energy_diff(eenv) __energy_diff(eenv)
+#endif
+
+/*
  * Detect M:N waker/wakee relationships via a switching-frequency heuristic.
  *
  * A waker of many should wake a different task than the one last awakened
@@ -5216,6 +6035,149 @@
 	return 1;
 }
 
+static inline unsigned long task_util(struct task_struct *p)
+{
+#ifdef CONFIG_SCHED_WALT
+	if (!walt_disabled && sysctl_sched_use_walt_task_util) {
+		unsigned long demand = p->ravg.demand;
+		return (demand << 10) / walt_ravg_window;
+	}
+#endif
+	return p->se.avg.util_avg;
+}
+
+static inline unsigned long boosted_task_util(struct task_struct *task);
+
+static inline bool __task_fits(struct task_struct *p, int cpu, int util)
+{
+	unsigned long capacity = capacity_of(cpu);
+
+	util += boosted_task_util(p);
+
+	return (capacity * 1024) > (util * capacity_margin);
+}
+
+static inline bool task_fits_max(struct task_struct *p, int cpu)
+{
+	unsigned long capacity = capacity_of(cpu);
+	unsigned long max_capacity = cpu_rq(cpu)->rd->max_cpu_capacity.val;
+
+	if (capacity == max_capacity)
+		return true;
+
+	if (capacity * capacity_margin > max_capacity * 1024)
+		return true;
+
+	return __task_fits(p, cpu, 0);
+}
+
+static bool cpu_overutilized(int cpu)
+{
+	return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin);
+}
+
+#ifdef CONFIG_SCHED_TUNE
+
+struct reciprocal_value schedtune_spc_rdiv;
+
+static long
+schedtune_margin(unsigned long signal, long boost)
+{
+	long long margin = 0;
+
+	/*
+	 * Signal proportional compensation (SPC)
+	 *
+	 * The Boost (B) value is used to compute a Margin (M) which is
+	 * proportional to the complement of the original Signal (S):
+	 *   M = B * (SCHED_CAPACITY_SCALE - S)
+	 * The obtained M could be used by the caller to "boost" S.
+	 */
+	if (boost >= 0) {
+		margin  = SCHED_CAPACITY_SCALE - signal;
+		margin *= boost;
+	} else
+		margin = -signal * boost;
+
+	margin  = reciprocal_divide(margin, schedtune_spc_rdiv);
+
+	if (boost < 0)
+		margin *= -1;
+	return margin;
+}
+
+static inline int
+schedtune_cpu_margin(unsigned long util, int cpu)
+{
+	int boost = schedtune_cpu_boost(cpu);
+
+	if (boost == 0)
+		return 0;
+
+	return schedtune_margin(util, boost);
+}
+
+static inline long
+schedtune_task_margin(struct task_struct *task)
+{
+	int boost = schedtune_task_boost(task);
+	unsigned long util;
+	long margin;
+
+	if (boost == 0)
+		return 0;
+
+	util = task_util(task);
+	margin = schedtune_margin(util, boost);
+
+	return margin;
+}
+
+#else /* CONFIG_SCHED_TUNE */
+
+static inline int
+schedtune_cpu_margin(unsigned long util, int cpu)
+{
+	return 0;
+}
+
+static inline int
+schedtune_task_margin(struct task_struct *task)
+{
+	return 0;
+}
+
+#endif /* CONFIG_SCHED_TUNE */
+
+unsigned long
+boosted_cpu_util(int cpu)
+{
+	unsigned long util = cpu_util(cpu);
+	long margin = schedtune_cpu_margin(util, cpu);
+
+	trace_sched_boost_cpu(cpu, util, margin);
+
+	return util + margin;
+}
+
+static inline unsigned long
+boosted_task_util(struct task_struct *task)
+{
+	unsigned long util = task_util(task);
+	long margin = schedtune_task_margin(task);
+
+	trace_sched_boost_task(task, util, margin);
+
+	return util + margin;
+}
+
+static int cpu_util_wake(int cpu, struct task_struct *p);
+
+static unsigned long capacity_spare_wake(int cpu, struct task_struct *p)
+{
+	return capacity_orig_of(cpu) - cpu_util_wake(cpu, p);
+}
+
 /*
  * find_idlest_group finds and returns the least busy CPU group within the
  * domain.
@@ -5225,7 +6187,9 @@
 		  int this_cpu, int sd_flag)
 {
 	struct sched_group *idlest = NULL, *group = sd->groups;
+	struct sched_group *most_spare_sg = NULL;
 	unsigned long min_load = ULONG_MAX, this_load = 0;
+	unsigned long most_spare = 0, this_spare = 0;
 	int load_idx = sd->forkexec_idx;
 	int imbalance = 100 + (sd->imbalance_pct-100)/2;
 
@@ -5233,7 +6197,7 @@
 		load_idx = sd->wake_idx;
 
 	do {
-		unsigned long load, avg_load;
+		unsigned long load, avg_load, spare_cap, max_spare_cap;
 		int local_group;
 		int i;
 
@@ -5245,8 +6209,12 @@
 		local_group = cpumask_test_cpu(this_cpu,
 					       sched_group_cpus(group));
 
-		/* Tally up the load of all CPUs in the group */
+		/*
+		 * Tally up the load of all CPUs in the group and find
+		 * the group containing the CPU with most spare capacity.
+		 */
 		avg_load = 0;
+		max_spare_cap = 0;
 
 		for_each_cpu(i, sched_group_cpus(group)) {
 			/* Bias balancing toward cpus of our domain */
@@ -5256,6 +6224,11 @@
 				load = target_load(i, load_idx);
 
 			avg_load += load;
+
+			spare_cap = capacity_spare_wake(i, p);
+
+			if (spare_cap > max_spare_cap)
+				max_spare_cap = spare_cap;
 		}
 
 		/* Adjust by relative CPU capacity of the group */
@@ -5263,12 +6236,33 @@
 
 		if (local_group) {
 			this_load = avg_load;
-		} else if (avg_load < min_load) {
-			min_load = avg_load;
-			idlest = group;
+			this_spare = max_spare_cap;
+		} else {
+			if (avg_load < min_load) {
+				min_load = avg_load;
+				idlest = group;
+			}
+
+			if (most_spare < max_spare_cap) {
+				most_spare = max_spare_cap;
+				most_spare_sg = group;
+			}
 		}
 	} while (group = group->next, group != sd->groups);
 
+	/*
+	 * The cross-over point between using spare capacity or least load
+	 * is too conservative for high utilization tasks on partially
+	 * utilized systems if we require spare_capacity > task_util(p),
+	 * so we allow for some task stuffing by using
+	 * spare_capacity > task_util(p)/2.
+	 */
+	if (this_spare > task_util(p) / 2 &&
+	    imbalance*this_spare > 100*most_spare)
+		return NULL;
+	else if (most_spare > task_util(p) / 2)
+		return most_spare_sg;
+
 	if (!idlest || 100*this_load < imbalance*min_load)
 		return NULL;
 	return idlest;
@@ -5494,96 +6488,525 @@
 static int select_idle_sibling(struct task_struct *p, int prev, int target)
 {
 	struct sched_domain *sd;
-	int i;
+	struct sched_group *sg;
+	int i = task_cpu(p);
+	int best_idle_cpu = -1;
+	int best_idle_cstate = INT_MAX;
+	unsigned long best_idle_capacity = ULONG_MAX;
 
-	if (idle_cpu(target))
-		return target;
+	schedstat_inc(p->se.statistics.nr_wakeups_sis_attempts);
+	schedstat_inc(this_rq()->eas_stats.sis_attempts);
+
+	if (!sysctl_sched_cstate_aware) {
+		if (idle_cpu(target)) {
+			schedstat_inc(p->se.statistics.nr_wakeups_sis_idle);
+			schedstat_inc(this_rq()->eas_stats.sis_idle);
+			return target;
+		}
+
+		/*
+		 * If the prevous cpu is cache affine and idle, don't be stupid.
+		 */
+		if (i != target && cpus_share_cache(i, target) && idle_cpu(i)) {
+			schedstat_inc(p->se.statistics.nr_wakeups_sis_cache_affine);
+			schedstat_inc(this_rq()->eas_stats.sis_cache_affine);
+			return i;
+		}
+
+		sd = rcu_dereference(per_cpu(sd_llc, target));
+		if (!sd)
+			return target;
+
+		i = select_idle_core(p, sd, target);
+		if ((unsigned)i < nr_cpumask_bits)
+			return i;
+
+		i = select_idle_cpu(p, sd, target);
+		if ((unsigned)i < nr_cpumask_bits)
+			return i;
+
+		i = select_idle_smt(p, sd, target);
+		if ((unsigned)i < nr_cpumask_bits)
+			return i;
+	}
 
 	/*
-	 * If the previous cpu is cache affine and idle, don't be stupid.
+	 * Otherwise, iterate the domains and find an elegible idle cpu.
 	 */
-	if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev))
-		return prev;
-
 	sd = rcu_dereference(per_cpu(sd_llc, target));
-	if (!sd)
-		return target;
+	for_each_lower_domain(sd) {
+		sg = sd->groups;
+		do {
+			if (!cpumask_intersects(sched_group_cpus(sg),
+                                        tsk_cpus_allowed(p)))
+				goto next;
 
-	i = select_idle_core(p, sd, target);
-	if ((unsigned)i < nr_cpumask_bits)
-		return i;
 
-	i = select_idle_cpu(p, sd, target);
-	if ((unsigned)i < nr_cpumask_bits)
-		return i;
+			if (sysctl_sched_cstate_aware) {
+				for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg)) {
+					int idle_idx = idle_get_state_idx(cpu_rq(i));
+					unsigned long new_usage = boosted_task_util(p);
+					unsigned long capacity_orig = capacity_orig_of(i);
 
-	i = select_idle_smt(p, sd, target);
-	if ((unsigned)i < nr_cpumask_bits)
-		return i;
+					if (new_usage > capacity_orig || !idle_cpu(i))
+						goto next;
+
+					if (i == target && new_usage <= capacity_curr_of(target)) {
+						schedstat_inc(p->se.statistics.nr_wakeups_sis_suff_cap);
+						schedstat_inc(this_rq()->eas_stats.sis_suff_cap);
+						schedstat_inc(sd->eas_stats.sis_suff_cap);
+						return target;
+					}
+
+					if (idle_idx < best_idle_cstate &&
+					    capacity_orig <= best_idle_capacity) {
+						best_idle_cpu = i;
+						best_idle_cstate = idle_idx;
+						best_idle_capacity = capacity_orig;
+					}
+				}
+			} else {
+				for_each_cpu(i, sched_group_cpus(sg)) {
+					if (i == target || !idle_cpu(i))
+						goto next;
+				}
+
+				target = cpumask_first_and(sched_group_cpus(sg),
+					tsk_cpus_allowed(p));
+				schedstat_inc(p->se.statistics.nr_wakeups_sis_idle_cpu);
+				schedstat_inc(this_rq()->eas_stats.sis_idle_cpu);
+				schedstat_inc(sd->eas_stats.sis_idle_cpu);
+				goto done;
+			}
+next:
+			sg = sg->next;
+		} while (sg != sd->groups);
+	}
+
+	if (best_idle_cpu >= 0)
+		target = best_idle_cpu;
+
+done:
+	schedstat_inc(p->se.statistics.nr_wakeups_sis_count);
+	schedstat_inc(this_rq()->eas_stats.sis_count);
 
 	return target;
 }
-
+ 
 /*
- * cpu_util returns the amount of capacity of a CPU that is used by CFS
- * tasks. The unit of the return value must be the one of capacity so we can
- * compare the utilization with the capacity of the CPU that is available for
- * CFS task (ie cpu_capacity).
- *
- * cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the
- * recent utilization of currently non-runnable tasks on a CPU. It represents
- * the amount of utilization of a CPU in the range [0..capacity_orig] where
- * capacity_orig is the cpu_capacity available at the highest frequency
- * (arch_scale_freq_capacity()).
- * The utilization of a CPU converges towards a sum equal to or less than the
- * current capacity (capacity_curr <= capacity_orig) of the CPU because it is
- * the running time on this CPU scaled by capacity_curr.
- *
- * Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even
- * higher than capacity_orig because of unfortunate rounding in
- * cfs.avg.util_avg or just after migrating tasks and new task wakeups until
- * the average stabilizes with the new running time. We need to check that the
- * utilization stays within the range of [0..capacity_orig] and cap it if
- * necessary. Without utilization capping, a group could be seen as overloaded
- * (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of
- * available capacity. We allow utilization to overshoot capacity_curr (but not
- * capacity_orig) as it useful for predicting the capacity required after task
- * migrations (scheduler-driven DVFS).
+ * cpu_util_wake: Compute cpu utilization with any contributions from
+ * the waking task p removed.
  */
-static int cpu_util(int cpu)
+static int cpu_util_wake(int cpu, struct task_struct *p)
 {
-	unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg;
-	unsigned long capacity = capacity_orig_of(cpu);
+	unsigned long util, capacity;
+
+#ifdef CONFIG_SCHED_WALT
+	/*
+	 * WALT does not decay idle tasks in the same manner
+	 * as PELT, so it makes little sense to subtract task
+	 * utilization from cpu utilization. Instead just use
+	 * cpu_util for this case.
+	 */
+	if (!walt_disabled && sysctl_sched_use_walt_cpu_util)
+		return cpu_util(cpu);
+#endif
+	/* Task has no contribution or is new */
+	if (cpu != task_cpu(p) || !p->se.avg.last_update_time)
+		return cpu_util(cpu);
+
+	capacity = capacity_orig_of(cpu);
+	util = max_t(long, cpu_util(cpu) - task_util(p), 0);
 
 	return (util >= capacity) ? capacity : util;
 }
 
-static inline int task_util(struct task_struct *p)
+static int start_cpu(bool boosted)
 {
-	return p->se.avg.util_avg;
+	struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
+
+	RCU_LOCKDEP_WARN(rcu_read_lock_sched_held(),
+			   "sched RCU must be held");
+
+	return boosted ? rd->max_cap_orig_cpu : rd->min_cap_orig_cpu;
+}
+
+static inline int find_best_target(struct task_struct *p, int *backup_cpu,
+				   bool boosted, bool prefer_idle)
+{
+	unsigned long best_idle_min_cap_orig = ULONG_MAX;
+	unsigned long min_util = boosted_task_util(p);
+	unsigned long target_capacity = ULONG_MAX;
+	unsigned long min_wake_util = ULONG_MAX;
+	unsigned long target_max_spare_cap = 0;
+	unsigned long target_util = ULONG_MAX;
+	unsigned long best_active_util = ULONG_MAX;
+	int best_idle_cstate = INT_MAX;
+	struct sched_domain *sd;
+	struct sched_group *sg;
+	int best_active_cpu = -1;
+	int best_idle_cpu = -1;
+	int target_cpu = -1;
+	int cpu, i;
+
+	*backup_cpu = -1;
+
+	schedstat_inc(p->se.statistics.nr_wakeups_fbt_attempts);
+	schedstat_inc(this_rq()->eas_stats.fbt_attempts);
+
+	/* Find start CPU based on boost value */
+	cpu = start_cpu(boosted);
+	if (cpu < 0) {
+		schedstat_inc(p->se.statistics.nr_wakeups_fbt_no_cpu);
+		schedstat_inc(this_rq()->eas_stats.fbt_no_cpu);
+		return -1;
+	}
+
+	/* Find SD for the start CPU */
+	sd = rcu_dereference(per_cpu(sd_ea, cpu));
+	if (!sd) {
+		schedstat_inc(p->se.statistics.nr_wakeups_fbt_no_sd);
+		schedstat_inc(this_rq()->eas_stats.fbt_no_sd);
+		return -1;
+	}
+
+	/* Scan CPUs in all SDs */
+	sg = sd->groups;
+	do {
+		for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg)) {
+			unsigned long capacity_curr = capacity_curr_of(i);
+			unsigned long capacity_orig = capacity_orig_of(i);
+			unsigned long wake_util, new_util;
+
+			if (!cpu_online(i))
+				continue;
+
+			if (walt_cpu_high_irqload(i))
+				continue;
+
+			/*
+			 * p's blocked utilization is still accounted for on prev_cpu
+			 * so prev_cpu will receive a negative bias due to the double
+			 * accounting. However, the blocked utilization may be zero.
+			 */
+			wake_util = cpu_util_wake(i, p);
+			new_util = wake_util + task_util(p);
+
+			/*
+			 * Ensure minimum capacity to grant the required boost.
+			 * The target CPU can be already at a capacity level higher
+			 * than the one required to boost the task.
+			 */
+			new_util = max(min_util, new_util);
+			if (new_util > capacity_orig)
+				continue;
+
+			/*
+			 * Case A) Latency sensitive tasks
+			 *
+			 * Unconditionally favoring tasks that prefer idle CPU to
+			 * improve latency.
+			 *
+			 * Looking for:
+			 * - an idle CPU, whatever its idle_state is, since
+			 *   the first CPUs we explore are more likely to be
+			 *   reserved for latency sensitive tasks.
+			 * - a non idle CPU where the task fits in its current
+			 *   capacity and has the maximum spare capacity.
+			 * - a non idle CPU with lower contention from other
+			 *   tasks and running at the lowest possible OPP.
+			 *
+			 * The last two goals tries to favor a non idle CPU
+			 * where the task can run as if it is "almost alone".
+			 * A maximum spare capacity CPU is favoured since
+			 * the task already fits into that CPU's capacity
+			 * without waiting for an OPP chance.
+			 *
+			 * The following code path is the only one in the CPUs
+			 * exploration loop which is always used by
+			 * prefer_idle tasks. It exits the loop with wither a
+			 * best_active_cpu or a target_cpu which should
+			 * represent an optimal choice for latency sensitive
+			 * tasks.
+			 */
+			if (prefer_idle) {
+
+				/*
+				 * Case A.1: IDLE CPU
+				 * Return the first IDLE CPU we find.
+				 */
+				if (idle_cpu(i)) {
+					schedstat_inc(p->se.statistics.nr_wakeups_fbt_pref_idle);
+					schedstat_inc(this_rq()->eas_stats.fbt_pref_idle);
+
+					trace_sched_find_best_target(p,
+							prefer_idle, min_util,
+							cpu, best_idle_cpu,
+							best_active_cpu, i);
+
+					return i;
+				}
+
+				/*
+				 * Case A.2: Target ACTIVE CPU
+				 * Favor CPUs with max spare capacity.
+				 */
+				if ((capacity_curr > new_util) &&
+					(capacity_orig - new_util > target_max_spare_cap)) {
+					target_max_spare_cap = capacity_orig - new_util;
+					target_cpu = i;
+					continue;
+				}
+				if (target_cpu != -1)
+					continue;
+
+
+				/*
+				 * Case A.3: Backup ACTIVE CPU
+				 * Favor CPUs with:
+				 * - lower utilization due to other tasks
+				 * - lower utilization with the task in
+				 */
+				if (wake_util > min_wake_util)
+					continue;
+				if (new_util > best_active_util)
+					continue;
+				min_wake_util = wake_util;
+				best_active_util = new_util;
+				best_active_cpu = i;
+				continue;
+			}
+
+			/*
+			 * Case B) Non latency sensitive tasks on IDLE CPUs.
+			 *
+			 * Find an optimal backup IDLE CPU for non latency
+			 * sensitive tasks.
+			 *
+			 * Looking for:
+			 * - minimizing the capacity_orig,
+			 *   i.e. preferring LITTLE CPUs
+			 * - favoring shallowest idle states
+			 *   i.e. avoid to wakeup deep-idle CPUs
+			 *
+			 * The following code path is used by non latency
+			 * sensitive tasks if IDLE CPUs are available. If at
+			 * least one of such CPUs are available it sets the
+			 * best_idle_cpu to the most suitable idle CPU to be
+			 * selected.
+			 *
+			 * If idle CPUs are available, favour these CPUs to
+			 * improve performances by spreading tasks.
+			 * Indeed, the energy_diff() computed by the caller
+			 * will take care to ensure the minimization of energy
+			 * consumptions without affecting performance.
+			 */
+			if (idle_cpu(i)) {
+				int idle_idx = idle_get_state_idx(cpu_rq(i));
+
+				/* Select idle CPU with lower cap_orig */
+				if (capacity_orig > best_idle_min_cap_orig)
+					continue;
+
+				/*
+				 * Skip CPUs in deeper idle state, but only
+				 * if they are also less energy efficient.
+				 * IOW, prefer a deep IDLE LITTLE CPU vs a
+				 * shallow idle big CPU.
+				 */
+				if (sysctl_sched_cstate_aware &&
+				    best_idle_cstate <= idle_idx)
+					continue;
+
+				/* Keep track of best idle CPU */
+				best_idle_min_cap_orig = capacity_orig;
+				best_idle_cstate = idle_idx;
+				best_idle_cpu = i;
+				continue;
+			}
+
+			/*
+			 * Case C) Non latency sensitive tasks on ACTIVE CPUs.
+			 *
+			 * Pack tasks in the most energy efficient capacities.
+			 *
+			 * This task packing strategy prefers more energy
+			 * efficient CPUs (i.e. pack on smaller maximum
+			 * capacity CPUs) while also trying to spread tasks to
+			 * run them all at the lower OPP.
+			 *
+			 * This assumes for example that it's more energy
+			 * efficient to run two tasks on two CPUs at a lower
+			 * OPP than packing both on a single CPU but running
+			 * that CPU at an higher OPP.
+			 *
+			 * Thus, this case keep track of the CPU with the
+			 * smallest maximum capacity and highest spare maximum
+			 * capacity.
+			 */
+
+			/* Favor CPUs with smaller capacity */
+			if (capacity_orig > target_capacity)
+				continue;
+
+			/* Favor CPUs with maximum spare capacity */
+			if ((capacity_orig - new_util) < target_max_spare_cap)
+				continue;
+
+			target_max_spare_cap = capacity_orig - new_util;
+			target_capacity = capacity_orig;
+			target_util = new_util;
+			target_cpu = i;
+		}
+
+	} while (sg = sg->next, sg != sd->groups);
+
+	/*
+	 * For non latency sensitive tasks, cases B and C in the previous loop,
+	 * we pick the best IDLE CPU only if we was not able to find a target
+	 * ACTIVE CPU.
+	 *
+	 * Policies priorities:
+	 *
+	 * - prefer_idle tasks:
+	 *
+	 *   a) IDLE CPU available, we return immediately
+	 *   b) ACTIVE CPU where task fits and has the bigger maximum spare
+	 *      capacity (i.e. target_cpu)
+	 *   c) ACTIVE CPU with less contention due to other tasks
+	 *      (i.e. best_active_cpu)
+	 *
+	 * - NON prefer_idle tasks:
+	 *
+	 *   a) ACTIVE CPU: target_cpu
+	 *   b) IDLE CPU: best_idle_cpu
+	 */
+	if (target_cpu == -1)
+		target_cpu = prefer_idle
+			? best_active_cpu
+			: best_idle_cpu;
+	else
+		*backup_cpu = prefer_idle
+		? best_active_cpu
+		: best_idle_cpu;
+
+	trace_sched_find_best_target(p, prefer_idle, min_util, cpu,
+				     best_idle_cpu, best_active_cpu,
+				     target_cpu);
+
+	schedstat_inc(p->se.statistics.nr_wakeups_fbt_count);
+	schedstat_inc(this_rq()->eas_stats.fbt_count);
+
+	return target_cpu;
 }
 
 /*
  * Disable WAKE_AFFINE in the case where task @p doesn't fit in the
  * capacity of either the waking CPU @cpu or the previous CPU @prev_cpu.
- *
+ * 
  * In that case WAKE_AFFINE doesn't make sense and we'll let
  * BALANCE_WAKE sort things out.
  */
 static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
 {
 	long min_cap, max_cap;
-
 	min_cap = min(capacity_orig_of(prev_cpu), capacity_orig_of(cpu));
-	max_cap = cpu_rq(cpu)->rd->max_cpu_capacity;
-
+	max_cap = cpu_rq(cpu)->rd->max_cpu_capacity.val;
 	/* Minimum capacity is close to max, no need to abort wake_affine */
 	if (max_cap - min_cap < max_cap >> 3)
 		return 0;
 
+	/* Bring task utilization in sync with prev_cpu */
+	sync_entity_load_avg(&p->se);
+
 	return min_cap * 1024 < task_util(p) * capacity_margin;
 }
 
+static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync)
+{
+	struct sched_domain *sd;
+	int target_cpu = prev_cpu, tmp_target, tmp_backup;
+	bool boosted, prefer_idle;
+
+	schedstat_inc(p->se.statistics.nr_wakeups_secb_attempts);
+	schedstat_inc(this_rq()->eas_stats.secb_attempts);
+
+	if (sysctl_sched_sync_hint_enable && sync) {
+		int cpu = smp_processor_id();
+
+		if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) {
+			schedstat_inc(p->se.statistics.nr_wakeups_secb_sync);
+			schedstat_inc(this_rq()->eas_stats.secb_sync);
+			return cpu;
+		}
+	}
+
+	rcu_read_lock();
+#ifdef CONFIG_CGROUP_SCHEDTUNE
+	boosted = schedtune_task_boost(p) > 0;
+	prefer_idle = schedtune_prefer_idle(p) > 0;
+#else
+	boosted = get_sysctl_sched_cfs_boost() > 0;
+	prefer_idle = 0;
+#endif
+
+	sync_entity_load_avg(&p->se);
+
+	sd = rcu_dereference(per_cpu(sd_ea, prev_cpu));
+	/* Find a cpu with sufficient capacity */
+	tmp_target = find_best_target(p, &tmp_backup, boosted, prefer_idle);
+
+	if (!sd)
+		goto unlock;
+	if (tmp_target >= 0) {
+		target_cpu = tmp_target;
+		if ((boosted || prefer_idle) && idle_cpu(target_cpu)) {
+			schedstat_inc(p->se.statistics.nr_wakeups_secb_idle_bt);
+			schedstat_inc(this_rq()->eas_stats.secb_idle_bt);
+			goto unlock;
+		}
+	}
+
+	if (target_cpu != prev_cpu) {
+		struct energy_env eenv = {
+			.util_delta     = task_util(p),
+			.src_cpu        = prev_cpu,
+			.dst_cpu        = target_cpu,
+			.task           = p,
+		};
+
+		/* Not enough spare capacity on previous cpu */
+		if (cpu_overutilized(prev_cpu)) {
+			schedstat_inc(p->se.statistics.nr_wakeups_secb_insuff_cap);
+			schedstat_inc(this_rq()->eas_stats.secb_insuff_cap);
+			goto unlock;
+		}
+
+		if (energy_diff(&eenv) >= 0) {
+			/* No energy saving for target_cpu, try backup */
+			target_cpu = tmp_backup;
+			eenv.dst_cpu = target_cpu;
+			if (tmp_backup < 0 || energy_diff(&eenv) >= 0) {
+				schedstat_inc(p->se.statistics.nr_wakeups_secb_no_nrg_sav);
+				schedstat_inc(this_rq()->eas_stats.secb_no_nrg_sav);
+				target_cpu = prev_cpu;
+				goto unlock;
+			}
+		}
+
+		schedstat_inc(p->se.statistics.nr_wakeups_secb_nrg_sav);
+		schedstat_inc(this_rq()->eas_stats.secb_nrg_sav);
+		goto unlock;
+	}
+
+	schedstat_inc(p->se.statistics.nr_wakeups_secb_count);
+	schedstat_inc(this_rq()->eas_stats.secb_count);
+
+unlock:
+	rcu_read_unlock();
+	return target_cpu;
+}
+
 /*
  * select_task_rq_fair: Select target runqueue for the waking task in domains
  * that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE,
@@ -5607,10 +7030,13 @@
 
 	if (sd_flag & SD_BALANCE_WAKE) {
 		record_wakee(p);
-		want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu)
-			      && cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
+		want_affine = (!wake_wide(p) && !wake_cap(p, cpu, prev_cpu) &&
+			cpumask_test_cpu(cpu, tsk_cpus_allowed(p)));
 	}
 
+	if (energy_aware() && !(cpu_rq(prev_cpu)->rd->overutilized))
+		return select_energy_cpu_brute(p, prev_cpu, sync);
+
 	rcu_read_lock();
 	for_each_domain(cpu, tmp) {
 		if (!(tmp->flags & SD_LOAD_BALANCE))
@@ -5642,39 +7068,58 @@
 		if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
 			new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
 
-	} else while (sd) {
-		struct sched_group *group;
-		int weight;
+	} else {
+		int wu = sd_flag & SD_BALANCE_WAKE;
+		int cas_cpu = -1;
 
-		if (!(sd->flags & sd_flag)) {
-			sd = sd->child;
-			continue;
+		if (wu) {
+			schedstat_inc(p->se.statistics.nr_wakeups_cas_attempts);
+			schedstat_inc(this_rq()->eas_stats.cas_attempts);
 		}
 
-		group = find_idlest_group(sd, p, cpu, sd_flag);
-		if (!group) {
-			sd = sd->child;
-			continue;
+
+		while (sd) {
+			struct sched_group *group;
+			int weight;
+
+			if (wu)
+				schedstat_inc(sd->eas_stats.cas_attempts);
+
+			if (!(sd->flags & sd_flag)) {
+				sd = sd->child;
+				continue;
+			}
+
+			group = find_idlest_group(sd, p, cpu, sd_flag);
+			if (!group) {
+				sd = sd->child;
+				continue;
+			}
+
+			new_cpu = find_idlest_cpu(group, p, cpu);
+			if (new_cpu == -1 || new_cpu == cpu) {
+				/* Now try balancing at a lower domain level of cpu */
+				sd = sd->child;
+				continue;
+			}
+
+			/* Now try balancing at a lower domain level of new_cpu */
+			cpu = cas_cpu = new_cpu;
+			weight = sd->span_weight;
+			sd = NULL;
+			for_each_domain(cpu, tmp) {
+				if (weight <= tmp->span_weight)
+					break;
+				if (tmp->flags & sd_flag)
+					sd = tmp;
+			}
+			/* while loop will break here if sd == NULL */
 		}
 
-		new_cpu = find_idlest_cpu(group, p, cpu);
-		if (new_cpu == -1 || new_cpu == cpu) {
-			/* Now try balancing at a lower domain level of cpu */
-			sd = sd->child;
-			continue;
+		if (wu && (cas_cpu >= 0)) {
+			schedstat_inc(p->se.statistics.nr_wakeups_cas_count);
+			schedstat_inc(this_rq()->eas_stats.cas_count);
 		}
-
-		/* Now try balancing at a lower domain level of new_cpu */
-		cpu = new_cpu;
-		weight = sd->span_weight;
-		sd = NULL;
-		for_each_domain(cpu, tmp) {
-			if (weight <= tmp->span_weight)
-				break;
-			if (tmp->flags & sd_flag)
-				sd = tmp;
-		}
-		/* while loop will break here if sd == NULL */
 	}
 	rcu_read_unlock();
 
@@ -5734,6 +7179,8 @@
 {
 	remove_entity_load_avg(&p->se);
 }
+#else
+#define task_fits_max(p, cpu) true
 #endif /* CONFIG_SMP */
 
 static unsigned long
@@ -5980,6 +7427,8 @@
 	if (hrtick_enabled(rq))
 		hrtick_start_fair(rq, p);
 
+	rq->misfit_task = !task_fits_max(p, rq->cpu);
+
 	return p;
 simple:
 	cfs_rq = &rq->cfs;
@@ -6001,9 +7450,12 @@
 	if (hrtick_enabled(rq))
 		hrtick_start_fair(rq, p);
 
+	rq->misfit_task = !task_fits_max(p, rq->cpu);
+
 	return p;
 
 idle:
+	rq->misfit_task = 0;
 	/*
 	 * This is OK, because current is on_cpu, which avoids it being picked
 	 * for load-balance and preemption/IRQs are still disabled avoiding
@@ -6216,6 +7668,13 @@
 
 enum fbq_type { regular, remote, all };
 
+enum group_type {
+	group_other = 0,
+	group_misfit_task,
+	group_imbalanced,
+	group_overloaded,
+};
+
 #define LBF_ALL_PINNED	0x01
 #define LBF_NEED_BREAK	0x02
 #define LBF_DST_PINNED  0x04
@@ -6234,6 +7693,7 @@
 	int			new_dst_cpu;
 	enum cpu_idle_type	idle;
 	long			imbalance;
+	unsigned int		src_grp_nr_running;
 	/* The set of CPUs under consideration for load-balancing */
 	struct cpumask		*cpus;
 
@@ -6244,6 +7704,7 @@
 	unsigned int		loop_max;
 
 	enum fbq_type		fbq_type;
+	enum group_type		busiest_group_type;
 	struct list_head	tasks;
 };
 
@@ -6425,7 +7886,9 @@
 
 	p->on_rq = TASK_ON_RQ_MIGRATING;
 	deactivate_task(env->src_rq, p, 0);
+	double_lock_balance(env->src_rq, env->dst_rq);
 	set_task_cpu(p, env->dst_cpu);
+	double_unlock_balance(env->src_rq, env->dst_rq);
 }
 
 /*
@@ -6570,6 +8033,10 @@
 {
 	raw_spin_lock(&rq->lock);
 	attach_task(rq, p);
+	/*
+	 * We want to potentially raise target_cpu's OPP.
+	 */
+	update_capacity_of(cpu_of(rq));
 	raw_spin_unlock(&rq->lock);
 }
 
@@ -6591,6 +8058,11 @@
 		attach_task(env->dst_rq, p);
 	}
 
+	/*
+	 * We want to potentially raise env.dst_cpu's OPP.
+	 */
+	update_capacity_of(env->dst_cpu);
+
 	raw_spin_unlock(&env->dst_rq->lock);
 }
 
@@ -6615,6 +8087,10 @@
 
 		if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true))
 			update_tg_load_avg(cfs_rq, 0);
+
+		/* Propagate pending load changes to the parent */
+		if (cfs_rq->tg->se[cpu])
+			update_load_avg(cfs_rq->tg->se[cpu], 0);
 	}
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
@@ -6686,12 +8162,6 @@
 
 /********** Helpers for find_busiest_group ************************/
 
-enum group_type {
-	group_other = 0,
-	group_imbalanced,
-	group_overloaded,
-};
-
 /*
  * sg_lb_stats - stats of a sched_group required for load_balancing
  */
@@ -6707,6 +8177,7 @@
 	unsigned int group_weight;
 	enum group_type group_type;
 	int group_no_capacity;
+	int group_misfit_task; /* A cpu has a task too big for its capacity */
 #ifdef CONFIG_NUMA_BALANCING
 	unsigned int nr_numa_running;
 	unsigned int nr_preferred_running;
@@ -6804,13 +8275,43 @@
 	return 1;
 }
 
+void init_max_cpu_capacity(struct max_cpu_capacity *mcc)
+{
+	raw_spin_lock_init(&mcc->lock);
+	mcc->val = 0;
+	mcc->cpu = -1;
+}
+
 static void update_cpu_capacity(struct sched_domain *sd, int cpu)
 {
 	unsigned long capacity = arch_scale_cpu_capacity(sd, cpu);
 	struct sched_group *sdg = sd->groups;
+	struct max_cpu_capacity *mcc;
+	unsigned long max_capacity;
+	int max_cap_cpu;
+	unsigned long flags;
 
 	cpu_rq(cpu)->cpu_capacity_orig = capacity;
 
+	mcc = &cpu_rq(cpu)->rd->max_cpu_capacity;
+
+	raw_spin_lock_irqsave(&mcc->lock, flags);
+	max_capacity = mcc->val;
+	max_cap_cpu = mcc->cpu;
+
+	if ((max_capacity > capacity && max_cap_cpu == cpu) ||
+	    (max_capacity < capacity)) {
+		mcc->val = capacity;
+		mcc->cpu = cpu;
+#ifdef CONFIG_SCHED_DEBUG
+		raw_spin_unlock_irqrestore(&mcc->lock, flags);
+		pr_info("CPU%d: update max cpu_capacity %lu\n", cpu, capacity);
+		goto skip_unlock;
+#endif
+	}
+	raw_spin_unlock_irqrestore(&mcc->lock, flags);
+
+skip_unlock: __attribute__ ((unused));
 	capacity *= scale_rt_capacity(cpu);
 	capacity >>= SCHED_CAPACITY_SHIFT;
 
@@ -6819,13 +8320,15 @@
 
 	cpu_rq(cpu)->cpu_capacity = capacity;
 	sdg->sgc->capacity = capacity;
+	sdg->sgc->max_capacity = capacity;
+	sdg->sgc->min_capacity = capacity;
 }
 
 void update_group_capacity(struct sched_domain *sd, int cpu)
 {
 	struct sched_domain *child = sd->child;
 	struct sched_group *group, *sdg = sd->groups;
-	unsigned long capacity;
+	unsigned long capacity, max_capacity, min_capacity;
 	unsigned long interval;
 
 	interval = msecs_to_jiffies(sd->balance_interval);
@@ -6838,6 +8341,8 @@
 	}
 
 	capacity = 0;
+	max_capacity = 0;
+	min_capacity = ULONG_MAX;
 
 	if (child->flags & SD_OVERLAP) {
 		/*
@@ -6862,11 +8367,13 @@
 			 */
 			if (unlikely(!rq->sd)) {
 				capacity += capacity_of(cpu);
-				continue;
+			} else {
+				sgc = rq->sd->groups->sgc;
+				capacity += sgc->capacity;
 			}
 
-			sgc = rq->sd->groups->sgc;
-			capacity += sgc->capacity;
+			max_capacity = max(capacity, max_capacity);
+			min_capacity = min(capacity, min_capacity);
 		}
 	} else  {
 		/*
@@ -6876,12 +8383,18 @@
 
 		group = child->groups;
 		do {
-			capacity += group->sgc->capacity;
+			struct sched_group_capacity *sgc = group->sgc;
+
+			capacity += sgc->capacity;
+			max_capacity = max(sgc->max_capacity, max_capacity);
+			min_capacity = min(sgc->min_capacity, min_capacity);
 			group = group->next;
 		} while (group != child->groups);
 	}
 
 	sdg->sgc->capacity = capacity;
+	sdg->sgc->max_capacity = max_capacity;
+	sdg->sgc->min_capacity = min_capacity;
 }
 
 /*
@@ -6976,6 +8489,17 @@
 	return false;
 }
 
+/*
+ * group_smaller_cpu_capacity: Returns true if sched_group sg has smaller
+ * per-cpu capacity than sched_group ref.
+ */
+static inline bool
+group_smaller_cpu_capacity(struct sched_group *sg, struct sched_group *ref)
+{
+	return sg->sgc->max_capacity + capacity_margin - SCHED_CAPACITY_SCALE <
+							ref->sgc->max_capacity;
+}
+
 static inline enum
 group_type group_classify(struct sched_group *group,
 			  struct sg_lb_stats *sgs)
@@ -6986,9 +8510,44 @@
 	if (sg_imbalanced(group))
 		return group_imbalanced;
 
+	if (sgs->group_misfit_task)
+		return group_misfit_task;
+
 	return group_other;
 }
 
+#ifdef CONFIG_NO_HZ_COMMON
+/*
+ * idle load balancing data
+ *  - used by the nohz balance, but we want it available here
+ *    so that we can see which CPUs have no tick.
+ */
+static struct {
+	cpumask_var_t idle_cpus_mask;
+	atomic_t nr_cpus;
+	unsigned long next_balance;     /* in jiffy units */
+} nohz ____cacheline_aligned;
+
+static inline void update_cpu_stats_if_tickless(struct rq *rq)
+{
+	/* only called from update_sg_lb_stats when irqs are disabled */
+	if (cpumask_test_cpu(rq->cpu, nohz.idle_cpus_mask)) {
+		/* rate limit updates to once-per-jiffie at most */
+		if (READ_ONCE(jiffies) <= rq->last_load_update_tick)
+			return;
+
+		raw_spin_lock(&rq->lock);
+		update_rq_clock(rq);
+		cpu_load_update_idle(rq);
+		update_cfs_rq_load_avg(rq->clock_task, &rq->cfs, false);
+		raw_spin_unlock(&rq->lock);
+	}
+}
+
+#else
+static inline void update_cpu_stats_if_tickless(struct rq *rq) { }
+#endif
+
 /**
  * update_sg_lb_stats - Update sched_group's statistics for load balancing.
  * @env: The load balancing environment.
@@ -6997,11 +8556,12 @@
  * @local_group: Does group contain this_cpu.
  * @sgs: variable to hold the statistics for this group.
  * @overload: Indicate more than one runnable task for any CPU.
+ * @overutilized: Indicate overutilization for any CPU.
  */
 static inline void update_sg_lb_stats(struct lb_env *env,
 			struct sched_group *group, int load_idx,
 			int local_group, struct sg_lb_stats *sgs,
-			bool *overload)
+			bool *overload, bool *overutilized)
 {
 	unsigned long load;
 	int i, nr_running;
@@ -7011,6 +8571,12 @@
 	for_each_cpu_and(i, sched_group_cpus(group), env->cpus) {
 		struct rq *rq = cpu_rq(i);
 
+		/* if we are entering idle and there are CPUs with
+		 * their tick stopped, do an update for them
+		 */
+		if (env->idle == CPU_NEWLY_IDLE)
+			update_cpu_stats_if_tickless(rq);
+
 		/* Bias balancing toward cpus of our domain */
 		if (local_group)
 			load = target_load(i, load_idx);
@@ -7035,6 +8601,12 @@
 		 */
 		if (!nr_running && idle_cpu(i))
 			sgs->idle_cpus++;
+
+		if (cpu_overutilized(i)) {
+			*overutilized = true;
+			if (!sgs->group_misfit_task && rq->misfit_task)
+				sgs->group_misfit_task = capacity_of(i);
+		}
 	}
 
 	/* Adjust by relative CPU capacity of the group */
@@ -7076,9 +8648,31 @@
 	if (sgs->group_type < busiest->group_type)
 		return false;
 
+	/*
+	 * Candidate sg doesn't face any serious load-balance problems
+	 * so don't pick it if the local sg is already filled up.
+	 */
+	if (sgs->group_type == group_other &&
+	    !group_has_capacity(env, &sds->local_stat))
+		return false;
+
 	if (sgs->avg_load <= busiest->avg_load)
 		return false;
 
+	if (!(env->sd->flags & SD_ASYM_CPUCAPACITY))
+		goto asym_packing;
+
+	/*
+	 * Candidate sg has no more than one task per CPU and
+	 * has higher per-CPU capacity. Migrating tasks to less
+	 * capable CPUs may harm throughput. Maximize throughput,
+	 * power/energy consequences are not considered.
+	 */
+	if (sgs->sum_nr_running <= sgs->group_weight &&
+	    group_smaller_cpu_capacity(sds->local, sg))
+		return false;
+
+asym_packing:
 	/* This is the busiest node in its class. */
 	if (!(env->sd->flags & SD_ASYM_PACKING))
 		return true;
@@ -7133,6 +8727,9 @@
 }
 #endif /* CONFIG_NUMA_BALANCING */
 
+#define lb_sd_parent(sd) \
+	(sd->parent && sd->parent->groups != sd->parent->groups->next)
+
 /**
  * update_sd_lb_stats - Update sched_domain's statistics for load balancing.
  * @env: The load balancing environment.
@@ -7144,7 +8741,7 @@
 	struct sched_group *sg = env->sd->groups;
 	struct sg_lb_stats tmp_sgs;
 	int load_idx, prefer_sibling = 0;
-	bool overload = false;
+	bool overload = false, overutilized = false;
 
 	if (child && child->flags & SD_PREFER_SIBLING)
 		prefer_sibling = 1;
@@ -7166,7 +8763,7 @@
 		}
 
 		update_sg_lb_stats(env, sg, load_idx, local_group, sgs,
-						&overload);
+						&overload, &overutilized);
 
 		if (local_group)
 			goto next_group;
@@ -7188,6 +8785,15 @@
 			sgs->group_type = group_classify(sg, sgs);
 		}
 
+		/*
+		 * Ignore task groups with misfit tasks if local group has no
+		 * capacity or if per-cpu capacity isn't higher.
+		 */
+		if (sgs->group_type == group_misfit_task &&
+		    (!group_has_capacity(env, &sds->local_stat) ||
+		     !group_smaller_cpu_capacity(sg, sds->local)))
+			sgs->group_type = group_other;
+
 		if (update_sd_pick_busiest(env, sds, sg, sgs)) {
 			sds->busiest = sg;
 			sds->busiest_stat = *sgs;
@@ -7204,10 +8810,23 @@
 	if (env->sd->flags & SD_NUMA)
 		env->fbq_type = fbq_classify_group(&sds->busiest_stat);
 
-	if (!env->sd->parent) {
+	env->src_grp_nr_running = sds->busiest_stat.sum_nr_running;
+
+	if (!lb_sd_parent(env->sd)) {
 		/* update overload indicator if we are at root domain */
 		if (env->dst_rq->rd->overload != overload)
 			env->dst_rq->rd->overload = overload;
+
+		/* Update over-utilization (tipping point, U >= 0) indicator */
+		if (env->dst_rq->rd->overutilized != overutilized) {
+			env->dst_rq->rd->overutilized = overutilized;
+			trace_sched_overutilized(overutilized);
+		}
+	} else {
+		if (!env->dst_rq->rd->overutilized && overutilized) {
+			env->dst_rq->rd->overutilized = true;
+			trace_sched_overutilized(true);
+		}
 	}
 
 }
@@ -7360,6 +8979,22 @@
 	 */
 	if (busiest->avg_load <= sds->avg_load ||
 	    local->avg_load >= sds->avg_load) {
+		/* Misfitting tasks should be migrated in any case */
+		if (busiest->group_type == group_misfit_task) {
+			env->imbalance = busiest->group_misfit_task;
+			return;
+		}
+
+		/*
+		 * Busiest group is overloaded, local is not, use the spare
+		 * cycles to maximize throughput
+		 */
+		if (busiest->group_type == group_overloaded &&
+		    local->group_type <= group_misfit_task) {
+			env->imbalance = busiest->load_per_task;
+			return;
+		}
+
 		env->imbalance = 0;
 		return fix_small_imbalance(env, sds);
 	}
@@ -7393,6 +9028,11 @@
 		(sds->avg_load - local->avg_load) * local->group_capacity
 	) / SCHED_CAPACITY_SCALE;
 
+	/* Boost imbalance to allow misfit task to be balanced. */
+	if (busiest->group_type == group_misfit_task)
+		env->imbalance = max_t(long, env->imbalance,
+				     busiest->group_misfit_task);
+
 	/*
 	 * if *imbalance is less than the average load per runnable task
 	 * there is no guarantee that any tasks will be moved so we'll have
@@ -7428,6 +9068,10 @@
 	 * this level.
 	 */
 	update_sd_lb_stats(env, &sds);
+
+	if (energy_aware() && !env->dst_rq->rd->overutilized)
+		goto out_balanced;
+
 	local = &sds.local_stat;
 	busiest = &sds.busiest_stat;
 
@@ -7455,6 +9099,11 @@
 	    busiest->group_no_capacity)
 		goto force_balance;
 
+	/* Misfitting tasks should be dealt with regardless of the avg load */
+	if (busiest->group_type == group_misfit_task) {
+		goto force_balance;
+	}
+
 	/*
 	 * If the local group is busier than the selected busiest group
 	 * don't try and pull any tasks.
@@ -7478,7 +9127,8 @@
 		 * might end up to just move the imbalance on another group
 		 */
 		if ((busiest->group_type != group_overloaded) &&
-				(local->idle_cpus <= (busiest->idle_cpus + 1)))
+		    (local->idle_cpus <= (busiest->idle_cpus + 1)) &&
+		    !group_smaller_cpu_capacity(sds.busiest, sds.local))
 			goto out_balanced;
 	} else {
 		/*
@@ -7491,6 +9141,7 @@
 	}
 
 force_balance:
+	env->busiest_group_type = busiest->group_type;
 	/* Looks like there is an imbalance. Compute it */
 	calculate_imbalance(env, &sds);
 	return sds.busiest;
@@ -7549,7 +9200,8 @@
 		 */
 
 		if (rq->nr_running == 1 && wl > env->imbalance &&
-		    !check_cpu_capacity(rq, env->sd))
+		    !check_cpu_capacity(rq, env->sd) &&
+		    env->busiest_group_type != group_misfit_task)
 			continue;
 
 		/*
@@ -7607,6 +9259,13 @@
 			return 1;
 	}
 
+	if ((capacity_of(env->src_cpu) < capacity_of(env->dst_cpu)) &&
+				env->src_rq->cfs.h_nr_running == 1 &&
+				cpu_overutilized(env->src_cpu) &&
+				!cpu_overutilized(env->dst_cpu)) {
+			return 1;
+	}
+
 	return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
 }
 
@@ -7655,7 +9314,7 @@
 			int *continue_balancing)
 {
 	int ld_moved, cur_ld_moved, active_balance = 0;
-	struct sched_domain *sd_parent = sd->parent;
+	struct sched_domain *sd_parent = lb_sd_parent(sd) ? sd->parent : NULL;
 	struct sched_group *group;
 	struct rq *busiest;
 	unsigned long flags;
@@ -7728,6 +9387,11 @@
 		 * ld_moved     - cumulative load moved across iterations
 		 */
 		cur_ld_moved = detach_tasks(&env);
+		/*
+		 * We want to potentially lower env.src_cpu's OPP.
+		 */
+		if (cur_ld_moved)
+			update_capacity_of(env.src_cpu);
 
 		/*
 		 * We've detached some tasks from busiest_rq. Every
@@ -7819,7 +9483,8 @@
 		 * excessive cache_hot migrations and active balances.
 		 */
 		if (idle != CPU_NEWLY_IDLE)
-			sd->nr_balance_failed++;
+			if (env.src_grp_nr_running > 1)
+				sd->nr_balance_failed++;
 
 		if (need_active_balance(&env)) {
 			raw_spin_lock_irqsave(&busiest->lock, flags);
@@ -7949,6 +9614,7 @@
 	struct sched_domain *sd;
 	int pulled_task = 0;
 	u64 curr_cost = 0;
+	long removed_util=0;
 
 	/*
 	 * We must set idle_stamp _before_ calling idle_balance(), such that we
@@ -7956,8 +9622,9 @@
 	 */
 	this_rq->idle_stamp = rq_clock(this_rq);
 
-	if (this_rq->avg_idle < sysctl_sched_migration_cost ||
-	    !this_rq->rd->overload) {
+	if (!energy_aware() &&
+	    (this_rq->avg_idle < sysctl_sched_migration_cost ||
+	     !this_rq->rd->overload)) {
 		rcu_read_lock();
 		sd = rcu_dereference_check_sched_domain(this_rq->sd);
 		if (sd)
@@ -7969,6 +9636,17 @@
 
 	raw_spin_unlock(&this_rq->lock);
 
+	/*
+	 * If removed_util_avg is !0 we most probably migrated some task away
+	 * from this_cpu. In this case we might be willing to trigger an OPP
+	 * update, but we want to do so if we don't find anybody else to pull
+	 * here (we will trigger an OPP update with the pulled task's enqueue
+	 * anyway).
+	 *
+	 * Record removed_util before calling update_blocked_averages, and use
+	 * it below (before returning) to see if an OPP update is required.
+	 */
+	removed_util = atomic_long_read(&(this_rq->cfs).removed_util_avg);
 	update_blocked_averages(this_cpu);
 	rcu_read_lock();
 	for_each_domain(this_cpu, sd) {
@@ -8032,6 +9710,13 @@
 
 	if (pulled_task)
 		this_rq->idle_stamp = 0;
+	else if (removed_util) {
+		/*
+		 * No task pulled and someone has been migrated away.
+		 * Good case to trigger an OPP update.
+		 */
+		update_capacity_of(this_cpu);
+	}
 
 	return pulled_task;
 }
@@ -8092,6 +9777,10 @@
 		p = detach_one_task(&env);
 		if (p) {
 			schedstat_inc(sd->alb_pushed);
+			/*
+			 * We want to potentially lower env.src_cpu's OPP.
+			 */
+			update_capacity_of(env.src_cpu);
 			/* Active balancing done, reset the failure counter. */
 			sd->nr_balance_failed = 0;
 		} else {
@@ -8123,12 +9812,6 @@
  *   needed, they will kick the idle load balancer, which then does idle
  *   load balancing for all the idle CPUs.
  */
-static struct {
-	cpumask_var_t idle_cpus_mask;
-	atomic_t nr_cpus;
-	unsigned long next_balance;     /* in jiffy units */
-} nohz ____cacheline_aligned;
-
 static inline int find_new_ilb(void)
 {
 	int ilb = cpumask_first(nohz.idle_cpus_mask);
@@ -8462,12 +10145,17 @@
 	if (time_before(now, nohz.next_balance))
 		return false;
 
-	if (rq->nr_running >= 2)
+	if (rq->nr_running >= 2 &&
+	    (!energy_aware() || cpu_overutilized(cpu)))
+		return true;
+
+	/* Do idle load balance if there have misfit task */
+	if (energy_aware() && rq->misfit_task)
 		return true;
 
 	rcu_read_lock();
 	sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
-	if (sds) {
+	if (sds && !energy_aware()) {
 		/*
 		 * XXX: write a coherent comment on why we do this.
 		 * See also: http://lkml.kernel.org/r/20111202010832.602203411@sbsiddha-desk.sc.intel.com
@@ -8575,6 +10263,16 @@
 
 	if (static_branch_unlikely(&sched_numa_balancing))
 		task_tick_numa(rq, curr);
+
+#ifdef CONFIG_SMP
+	if (!rq->rd->overutilized && cpu_overutilized(task_cpu(curr))) {
+		rq->rd->overutilized = true;
+		trace_sched_overutilized(true);
+	}
+
+	rq->misfit_task = !task_fits_max(curr, rq->cpu);
+#endif
+
 }
 
 /*
@@ -8662,11 +10360,65 @@
 	return false;
 }
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
+/*
+ * Propagate the changes of the sched_entity across the tg tree to make it
+ * visible to the root
+ */
+static void propagate_entity_cfs_rq(struct sched_entity *se)
+{
+	struct cfs_rq *cfs_rq;
+
+	/* Start to propagate at parent */
+	se = se->parent;
+
+	for_each_sched_entity(se) {
+		cfs_rq = cfs_rq_of(se);
+
+		if (cfs_rq_throttled(cfs_rq))
+			break;
+
+		update_load_avg(se, UPDATE_TG);
+	}
+}
+#else
+static void propagate_entity_cfs_rq(struct sched_entity *se) { }
+#endif
+
+static void detach_entity_cfs_rq(struct sched_entity *se)
+{
+	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+	/* Catch up with the cfs_rq and remove our load when we leave */
+	update_load_avg(se, 0);
+	detach_entity_load_avg(cfs_rq, se);
+	update_tg_load_avg(cfs_rq, false);
+	propagate_entity_cfs_rq(se);
+}
+
+static void attach_entity_cfs_rq(struct sched_entity *se)
+{
+	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	/*
+	 * Since the real-depth could have been changed (only FAIR
+	 * class maintain depth value), reset depth properly.
+	 */
+	se->depth = se->parent ? se->parent->depth + 1 : 0;
+#endif
+
+	/* Synchronize entity with its cfs_rq */
+	update_load_avg(se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
+	attach_entity_load_avg(cfs_rq, se);
+	update_tg_load_avg(cfs_rq, false);
+	propagate_entity_cfs_rq(se);
+}
+
 static void detach_task_cfs_rq(struct task_struct *p)
 {
 	struct sched_entity *se = &p->se;
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
-	u64 now = cfs_rq_clock_task(cfs_rq);
 
 	if (!vruntime_normalized(p)) {
 		/*
@@ -8677,30 +10429,15 @@
 		se->vruntime -= cfs_rq->min_vruntime;
 	}
 
-	/* Catch up with the cfs_rq and remove our load when we leave */
-	update_cfs_rq_load_avg(now, cfs_rq, false);
-	detach_entity_load_avg(cfs_rq, se);
-	update_tg_load_avg(cfs_rq, false);
+	detach_entity_cfs_rq(se);
 }
 
 static void attach_task_cfs_rq(struct task_struct *p)
 {
 	struct sched_entity *se = &p->se;
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
-	u64 now = cfs_rq_clock_task(cfs_rq);
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
-	/*
-	 * Since the real-depth could have been changed (only FAIR
-	 * class maintain depth value), reset depth properly.
-	 */
-	se->depth = se->parent ? se->parent->depth + 1 : 0;
-#endif
-
-	/* Synchronize task with its cfs_rq */
-	update_cfs_rq_load_avg(now, cfs_rq, false);
-	attach_entity_load_avg(cfs_rq, se);
-	update_tg_load_avg(cfs_rq, false);
+	attach_entity_cfs_rq(se);
 
 	if (!vruntime_normalized(p))
 		se->vruntime += cfs_rq->min_vruntime;
@@ -8754,6 +10491,9 @@
 	cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
 #endif
 #ifdef CONFIG_SMP
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	cfs_rq->propagate_avg = 0;
+#endif
 	atomic_long_set(&cfs_rq->removed_load_avg, 0);
 	atomic_long_set(&cfs_rq->removed_util_avg, 0);
 #endif
@@ -8954,8 +10694,10 @@
 
 		/* Possible calls to update_curr() need rq clock */
 		update_rq_clock(rq);
-		for_each_sched_entity(se)
-			update_cfs_shares(group_cfs_rq(se));
+		for_each_sched_entity(se) {
+			update_load_avg(se, UPDATE_TG);
+			update_cfs_shares(se);
+		}
 		raw_spin_unlock_irqrestore(&rq->lock, flags);
 	}
 
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 1b3c818..2a453df 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -74,3 +74,12 @@
 SCHED_FEAT(LB_MIN, false)
 SCHED_FEAT(ATTACH_AGE_LOAD, true)
 
+/*
+ * Energy aware scheduling. Use platform energy model to guide scheduling
+ * decisions optimizing for energy efficiency.
+ */
+#ifdef CONFIG_DEFAULT_USE_ENERGY_AWARE
+SCHED_FEAT(ENERGY_AWARE, true)
+#else
+SCHED_FEAT(ENERGY_AWARE, false)
+#endif
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 1d8718d..cf75f00f7 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -23,9 +23,10 @@
  * sched_idle_set_state - Record idle state for the current CPU.
  * @idle_state: State to record.
  */
-void sched_idle_set_state(struct cpuidle_state *idle_state)
+void sched_idle_set_state(struct cpuidle_state *idle_state, int index)
 {
 	idle_set_state(this_rq(), idle_state);
+	idle_set_state_idx(this_rq(), index);
 }
 
 static int __read_mostly cpu_idle_force_poll;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 9ab4d73..d7f46a0 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -8,6 +8,8 @@
 #include <linux/slab.h>
 #include <linux/irq_work.h>
 
+#include "walt.h"
+
 int sched_rr_timeslice = RR_TIMESLICE;
 
 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
@@ -888,6 +890,51 @@
 	return rt_task_of(rt_se)->prio;
 }
 
+static void dump_throttled_rt_tasks(struct rt_rq *rt_rq)
+{
+	struct rt_prio_array *array = &rt_rq->active;
+	struct sched_rt_entity *rt_se;
+	char buf[500];
+	char *pos = buf;
+	char *end = buf + sizeof(buf);
+	int idx;
+
+	pos += snprintf(pos, sizeof(buf),
+		"sched: RT throttling activated for rt_rq %p (cpu %d)\n",
+		rt_rq, cpu_of(rq_of_rt_rq(rt_rq)));
+
+	if (bitmap_empty(array->bitmap, MAX_RT_PRIO))
+		goto out;
+
+	pos += snprintf(pos, end - pos, "potential CPU hogs:\n");
+	idx = sched_find_first_bit(array->bitmap);
+	while (idx < MAX_RT_PRIO) {
+		list_for_each_entry(rt_se, array->queue + idx, run_list) {
+			struct task_struct *p;
+
+			if (!rt_entity_is_task(rt_se))
+				continue;
+
+			p = rt_task_of(rt_se);
+			if (pos < end)
+				pos += snprintf(pos, end - pos, "\t%s (%d)\n",
+					p->comm, p->pid);
+		}
+		idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx + 1);
+	}
+out:
+#ifdef CONFIG_PANIC_ON_RT_THROTTLING
+	/*
+	 * Use pr_err() in the BUG() case since printk_sched() will
+	 * not get flushed and deadlock is not a concern.
+	 */
+	pr_err("%s", buf);
+	BUG();
+#else
+	printk_deferred("%s", buf);
+#endif
+}
+
 static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
 {
 	u64 runtime = sched_rt_runtime(rt_rq);
@@ -911,8 +958,14 @@
 		 * but accrue some time due to boosting.
 		 */
 		if (likely(rt_b->rt_runtime)) {
+			static bool once = false;
+
 			rt_rq->rt_throttled = 1;
-			printk_deferred_once("sched: RT throttling activated\n");
+
+			if (!once) {
+				once = true;
+				dump_throttled_rt_tasks(rt_rq);
+			}
 		} else {
 			/*
 			 * In case we did anyway, make it go away,
@@ -1313,6 +1366,7 @@
 		rt_se->timeout = 0;
 
 	enqueue_rt_entity(rt_se, flags);
+	walt_inc_cumulative_runnable_avg(rq, p);
 
 	if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1)
 		enqueue_pushable_task(rq, p);
@@ -1324,6 +1378,7 @@
 
 	update_curr_rt(rq);
 	dequeue_rt_entity(rt_se, flags);
+	walt_dec_cumulative_runnable_avg(rq, p);
 
 	dequeue_pushable_task(rq, p);
 }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ec6e838..99293bb 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -413,6 +413,7 @@
 	unsigned long runnable_load_avg;
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	unsigned long tg_load_avg_contrib;
+	unsigned long propagate_avg;
 #endif
 	atomic_long_t removed_load_avg, removed_util_avg;
 #ifndef CONFIG_64BIT
@@ -447,6 +448,10 @@
 	struct list_head leaf_cfs_rq_list;
 	struct task_group *tg;	/* group that "owns" this runqueue */
 
+#ifdef CONFIG_SCHED_WALT
+	u64 cumulative_runnable_avg;
+#endif
+
 #ifdef CONFIG_CFS_BANDWIDTH
 	int runtime_enabled;
 	u64 runtime_expires;
@@ -542,6 +547,12 @@
 
 #ifdef CONFIG_SMP
 
+struct max_cpu_capacity {
+	raw_spinlock_t lock;
+	unsigned long val;
+	int cpu;
+};
+
 /*
  * We add the notion of a root-domain which will be used to define per-domain
  * variables. Each exclusive cpuset essentially defines an island domain by
@@ -560,6 +571,9 @@
 	/* Indicate more than one runnable task for any CPU */
 	bool overload;
 
+	/* Indicate one or more cpus over-utilized (tipping point) */
+	bool overutilized;
+
 	/*
 	 * The bit corresponding to a CPU gets set here if such CPU has more
 	 * than one runnable -deadline task (as it is below for RT tasks).
@@ -589,7 +603,11 @@
 	cpumask_var_t rto_mask;
 	struct cpupri cpupri;
 
-	unsigned long max_cpu_capacity;
+	/* Maximum cpu capacity in the system. */
+	struct max_cpu_capacity max_cpu_capacity;
+
+	/* First cpu with maximum and minimum original capacity */
+	int max_cap_orig_cpu, min_cap_orig_cpu;
 };
 
 extern struct root_domain def_root_domain;
@@ -623,6 +641,7 @@
 #endif
 	#define CPU_LOAD_IDX_MAX 5
 	unsigned long cpu_load[CPU_LOAD_IDX_MAX];
+	unsigned int misfit_task;
 #ifdef CONFIG_NO_HZ_COMMON
 #ifdef CONFIG_SMP
 	unsigned long last_load_update_tick;
@@ -632,6 +651,14 @@
 #ifdef CONFIG_NO_HZ_FULL
 	unsigned long last_sched_tick;
 #endif
+
+#ifdef CONFIG_CPU_QUIET
+	/* time-based average load */
+	u64 nr_last_stamp;
+	u64 nr_running_integral;
+	seqcount_t ave_seqcnt;
+#endif
+
 	/* capture load from *all* tasks on this cpu: */
 	struct load_weight load;
 	unsigned long nr_load_updates;
@@ -644,6 +671,7 @@
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	/* list of leaf cfs_rq on this cpu: */
 	struct list_head leaf_cfs_rq_list;
+	struct list_head *tmp_alone_branch;
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
 	/*
@@ -693,6 +721,30 @@
 	u64 max_idle_balance_cost;
 #endif
 
+#ifdef CONFIG_SCHED_WALT
+	/*
+	 * max_freq = user or thermal defined maximum
+	 * max_possible_freq = maximum supported by hardware
+	 */
+	unsigned int cur_freq, max_freq, min_freq, max_possible_freq;
+	struct cpumask freq_domain_cpumask;
+
+	u64 cumulative_runnable_avg;
+	int efficiency; /* Differentiate cpus with different IPC capability */
+	int load_scale_factor;
+	int capacity;
+	int max_possible_capacity;
+	u64 window_start;
+	u64 curr_runnable_sum;
+	u64 prev_runnable_sum;
+	u64 nt_curr_runnable_sum;
+	u64 nt_prev_runnable_sum;
+	u64 cur_irqload;
+	u64 avg_irqload;
+	u64 irqload_ts;
+#endif /* CONFIG_SCHED_WALT */
+
+
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 	u64 prev_irq_time;
 #endif
@@ -731,6 +783,9 @@
 	/* try_to_wake_up() stats */
 	unsigned int ttwu_count;
 	unsigned int ttwu_local;
+#ifdef CONFIG_SMP
+	struct eas_stats eas_stats;
+#endif
 #endif
 
 #ifdef CONFIG_SMP
@@ -740,6 +795,7 @@
 #ifdef CONFIG_CPU_IDLE
 	/* Must be inspected within a rcu lock section */
 	struct cpuidle_state *idle_state;
+	int idle_state_idx;
 #endif
 };
 
@@ -889,6 +945,8 @@
 DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
 DECLARE_PER_CPU(struct sched_domain *, sd_numa);
 DECLARE_PER_CPU(struct sched_domain *, sd_asym);
+DECLARE_PER_CPU(struct sched_domain *, sd_ea);
+DECLARE_PER_CPU(struct sched_domain *, sd_scs);
 
 struct sched_group_capacity {
 	atomic_t ref;
@@ -896,7 +954,9 @@
 	 * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
 	 * for a single CPU.
 	 */
-	unsigned int capacity;
+	unsigned long capacity;
+	unsigned long max_capacity; /* Max per-cpu capacity in group */
+	unsigned long min_capacity; /* Min per-CPU capacity in group */
 	unsigned long next_update;
 	int imbalance; /* XXX unrelated to capacity but shared group state */
 
@@ -909,6 +969,7 @@
 
 	unsigned int group_weight;
 	struct sched_group_capacity *sgc;
+	const struct sched_group_energy *sge;
 
 	/*
 	 * The CPUs this group covers.
@@ -1217,6 +1278,7 @@
 #else
 #define ENQUEUE_MIGRATED	0x00
 #endif
+#define ENQUEUE_WAKEUP_NEW	0x40
 
 #define RETRY_TASK		((void *)-1UL)
 
@@ -1307,6 +1369,7 @@
 
 #ifdef CONFIG_SMP
 
+extern void init_max_cpu_capacity(struct max_cpu_capacity *mcc);
 extern void update_group_capacity(struct sched_domain *sd, int cpu);
 
 extern void trigger_load_balance(struct rq *rq);
@@ -1327,6 +1390,17 @@
 	SCHED_WARN_ON(!rcu_read_lock_held());
 	return rq->idle_state;
 }
+
+static inline void idle_set_state_idx(struct rq *rq, int idle_state_idx)
+{
+	rq->idle_state_idx = idle_state_idx;
+}
+
+static inline int idle_get_state_idx(struct rq *rq)
+{
+	WARN_ON(!rcu_read_lock_held());
+	return rq->idle_state_idx;
+}
 #else
 static inline void idle_set_state(struct rq *rq,
 				  struct cpuidle_state *idle_state)
@@ -1337,6 +1411,15 @@
 {
 	return NULL;
 }
+
+static inline void idle_set_state_idx(struct rq *rq, int idle_state_idx)
+{
+}
+
+static inline int idle_get_state_idx(struct rq *rq)
+{
+	return -1;
+}
 #endif
 
 extern void sysrq_sched_debug_show(void);
@@ -1391,7 +1474,7 @@
 static inline void sched_update_tick_dependency(struct rq *rq) { }
 #endif
 
-static inline void add_nr_running(struct rq *rq, unsigned count)
+static inline void __add_nr_running(struct rq *rq, unsigned count)
 {
 	unsigned prev_nr = rq->nr_running;
 
@@ -1407,13 +1490,50 @@
 	sched_update_tick_dependency(rq);
 }
 
-static inline void sub_nr_running(struct rq *rq, unsigned count)
+static inline void __sub_nr_running(struct rq *rq, unsigned count)
 {
 	rq->nr_running -= count;
 	/* Check if we still need preemption */
 	sched_update_tick_dependency(rq);
 }
 
+#ifdef CONFIG_CPU_QUIET
+#define NR_AVE_SCALE(x)		((x) << FSHIFT)
+static inline u64 do_nr_running_integral(struct rq *rq)
+{
+	s64 nr, deltax;
+	u64 nr_running_integral = rq->nr_running_integral;
+
+	deltax = rq->clock_task - rq->nr_last_stamp;
+	nr = NR_AVE_SCALE(rq->nr_running);
+
+	nr_running_integral += nr * deltax;
+
+	return nr_running_integral;
+}
+
+static inline void add_nr_running(struct rq *rq, unsigned count)
+{
+	write_seqcount_begin(&rq->ave_seqcnt);
+	rq->nr_running_integral = do_nr_running_integral(rq);
+	rq->nr_last_stamp = rq->clock_task;
+	__add_nr_running(rq, count);
+	write_seqcount_end(&rq->ave_seqcnt);
+}
+
+static inline void sub_nr_running(struct rq *rq, unsigned count)
+{
+	write_seqcount_begin(&rq->ave_seqcnt);
+	rq->nr_running_integral = do_nr_running_integral(rq);
+	rq->nr_last_stamp = rq->clock_task;
+	__sub_nr_running(rq, count);
+	write_seqcount_end(&rq->ave_seqcnt);
+}
+#else
+#define add_nr_running __add_nr_running
+#define sub_nr_running __sub_nr_running
+#endif
+
 static inline void rq_last_tick_reset(struct rq *rq)
 {
 #ifdef CONFIG_NO_HZ_FULL
@@ -1486,10 +1606,146 @@
 }
 #endif
 
+#ifdef CONFIG_SMP
+static inline unsigned long capacity_of(int cpu)
+{
+	return cpu_rq(cpu)->cpu_capacity;
+}
+
+static inline unsigned long capacity_orig_of(int cpu)
+{
+	return cpu_rq(cpu)->cpu_capacity_orig;
+}
+
+extern unsigned int sysctl_sched_use_walt_cpu_util;
+extern unsigned int walt_ravg_window;
+extern unsigned int walt_disabled;
+
+/*
+ * cpu_util returns the amount of capacity of a CPU that is used by CFS
+ * tasks. The unit of the return value must be the one of capacity so we can
+ * compare the utilization with the capacity of the CPU that is available for
+ * CFS task (ie cpu_capacity).
+ *
+ * cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the
+ * recent utilization of currently non-runnable tasks on a CPU. It represents
+ * the amount of utilization of a CPU in the range [0..capacity_orig] where
+ * capacity_orig is the cpu_capacity available at the highest frequency
+ * (arch_scale_freq_capacity()).
+ * The utilization of a CPU converges towards a sum equal to or less than the
+ * current capacity (capacity_curr <= capacity_orig) of the CPU because it is
+ * the running time on this CPU scaled by capacity_curr.
+ *
+ * Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even
+ * higher than capacity_orig because of unfortunate rounding in
+ * cfs.avg.util_avg or just after migrating tasks and new task wakeups until
+ * the average stabilizes with the new running time. We need to check that the
+ * utilization stays within the range of [0..capacity_orig] and cap it if
+ * necessary. Without utilization capping, a group could be seen as overloaded
+ * (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of
+ * available capacity. We allow utilization to overshoot capacity_curr (but not
+ * capacity_orig) as it useful for predicting the capacity required after task
+ * migrations (scheduler-driven DVFS).
+ */
+static inline unsigned long __cpu_util(int cpu, int delta)
+{
+	unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg;
+	unsigned long capacity = capacity_orig_of(cpu);
+
+#ifdef CONFIG_SCHED_WALT
+	if (!walt_disabled && sysctl_sched_use_walt_cpu_util) {
+		util = cpu_rq(cpu)->prev_runnable_sum << SCHED_CAPACITY_SHIFT;
+		util = div_u64(util, walt_ravg_window);
+	}
+#endif
+	delta += util;
+	if (delta < 0)
+		return 0;
+
+	return (delta >= capacity) ? capacity : delta;
+}
+
+static inline unsigned long cpu_util(int cpu)
+{
+	return __cpu_util(cpu, 0);
+}
+
+#endif
+
+#ifdef CONFIG_CPU_FREQ_GOV_SCHED
+#define capacity_max SCHED_CAPACITY_SCALE
+extern unsigned int capacity_margin;
+extern struct static_key __sched_freq;
+
+static inline bool sched_freq(void)
+{
+	return static_key_false(&__sched_freq);
+}
+
+DECLARE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);
+void update_cpu_capacity_request(int cpu, bool request);
+
+static inline void set_cfs_cpu_capacity(int cpu, bool request,
+					unsigned long capacity)
+{
+	struct sched_capacity_reqs *scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
+
+#ifdef CONFIG_SCHED_WALT
+       if (!walt_disabled && sysctl_sched_use_walt_cpu_util) {
+		int rtdl = scr->rt + scr->dl;
+		/*
+		 * WALT tracks the utilization of a CPU considering the load
+		 * generated by all the scheduling classes.
+		 * Since the following call to:
+		 *    update_cpu_capacity
+		 * is already adding the RT and DL utilizations let's remove
+		 * these contributions from the WALT signal.
+		 */
+		if (capacity > rtdl)
+			capacity -= rtdl;
+		else
+			capacity = 0;
+	}
+#endif
+	if (scr->cfs != capacity) {
+		scr->cfs = capacity;
+		update_cpu_capacity_request(cpu, request);
+	}
+}
+
+static inline void set_rt_cpu_capacity(int cpu, bool request,
+				       unsigned long capacity)
+{
+	if (per_cpu(cpu_sched_capacity_reqs, cpu).rt != capacity) {
+		per_cpu(cpu_sched_capacity_reqs, cpu).rt = capacity;
+		update_cpu_capacity_request(cpu, request);
+	}
+}
+
+static inline void set_dl_cpu_capacity(int cpu, bool request,
+				       unsigned long capacity)
+{
+	if (per_cpu(cpu_sched_capacity_reqs, cpu).dl != capacity) {
+		per_cpu(cpu_sched_capacity_reqs, cpu).dl = capacity;
+		update_cpu_capacity_request(cpu, request);
+	}
+}
+#else
+static inline bool sched_freq(void) { return false; }
+static inline void set_cfs_cpu_capacity(int cpu, bool request,
+					unsigned long capacity)
+{ }
+static inline void set_rt_cpu_capacity(int cpu, bool request,
+				       unsigned long capacity)
+{ }
+static inline void set_dl_cpu_capacity(int cpu, bool request,
+				       unsigned long capacity)
+{ }
+#endif
+
 static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
 {
 	rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq));
-	sched_avg_update(rq);
 }
 #else
 static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { }
@@ -1524,6 +1780,9 @@
 	raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
 }
 
+extern struct rq *lock_rq_of(struct task_struct *p, struct rq_flags *flags);
+extern void unlock_rq_of(struct rq *rq, struct task_struct *p, struct rq_flags *flags);
+
 #ifdef CONFIG_SMP
 #ifdef CONFIG_PREEMPT
 
@@ -1596,7 +1855,8 @@
 static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
 	__releases(busiest->lock)
 {
-	raw_spin_unlock(&busiest->lock);
+	if (this_rq != busiest)
+		raw_spin_unlock(&busiest->lock);
 	lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
 }
 
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
index 87e2c9f..6d74a7c 100644
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -12,6 +12,28 @@
  */
 #define SCHEDSTAT_VERSION 15
 
+#ifdef CONFIG_SMP
+static inline void show_easstat(struct seq_file *seq, struct eas_stats *stats)
+{
+	/* eas-specific runqueue stats */
+	seq_printf(seq, "eas %llu %llu %llu %llu %llu %llu ",
+	    stats->sis_attempts, stats->sis_idle, stats->sis_cache_affine,
+	    stats->sis_suff_cap, stats->sis_idle_cpu, stats->sis_count);
+
+	seq_printf(seq, "%llu %llu %llu %llu %llu %llu %llu ",
+	    stats->secb_attempts, stats->secb_sync, stats->secb_idle_bt,
+	    stats->secb_insuff_cap, stats->secb_no_nrg_sav,
+	    stats->secb_nrg_sav, stats->secb_count);
+
+	seq_printf(seq, "%llu %llu %llu %llu %llu ",
+	    stats->fbt_attempts, stats->fbt_no_cpu, stats->fbt_no_sd,
+	    stats->fbt_pref_idle, stats->fbt_count);
+
+	seq_printf(seq, "%llu %llu\n",
+	    stats->cas_attempts, stats->cas_count);
+}
+#endif
+
 static int show_schedstat(struct seq_file *seq, void *v)
 {
 	int cpu;
@@ -40,6 +62,8 @@
 		seq_printf(seq, "\n");
 
 #ifdef CONFIG_SMP
+		show_easstat(seq, &rq->eas_stats);
+
 		/* domain-specific stats */
 		rcu_read_lock();
 		for_each_domain(cpu, sd) {
@@ -66,6 +90,8 @@
 			    sd->sbf_count, sd->sbf_balanced, sd->sbf_pushed,
 			    sd->ttwu_wake_remote, sd->ttwu_move_affine,
 			    sd->ttwu_move_balance);
+
+			show_easstat(seq, &sd->eas_stats);
 		}
 		rcu_read_unlock();
 #endif
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index 604297a..836a389 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -1,4 +1,5 @@
 #include "sched.h"
+#include "walt.h"
 
 /*
  * stop-task scheduling class.
@@ -42,12 +43,14 @@
 enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags)
 {
 	add_nr_running(rq, 1);
+	walt_inc_cumulative_runnable_avg(rq, p);
 }
 
 static void
 dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags)
 {
 	sub_nr_running(rq, 1);
+	walt_dec_cumulative_runnable_avg(rq, p);
 }
 
 static void yield_task_stop(struct rq *rq)
diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c
new file mode 100644
index 0000000..654934f
--- /dev/null
+++ b/kernel/sched/tune.c
@@ -0,0 +1,956 @@
+#include <linux/cgroup.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/printk.h>
+#include <linux/rcupdate.h>
+#include <linux/slab.h>
+
+#include <trace/events/sched.h>
+
+#include "sched.h"
+#include "tune.h"
+
+#ifdef CONFIG_CGROUP_SCHEDTUNE
+bool schedtune_initialized = false;
+#endif
+
+unsigned int sysctl_sched_cfs_boost __read_mostly;
+
+extern struct reciprocal_value schedtune_spc_rdiv;
+extern struct target_nrg schedtune_target_nrg;
+
+/* Performance Boost region (B) threshold params */
+static int perf_boost_idx;
+
+/* Performance Constraint region (C) threshold params */
+static int perf_constrain_idx;
+
+/**
+ * Performance-Energy (P-E) Space thresholds constants
+ */
+struct threshold_params {
+	int nrg_gain;
+	int cap_gain;
+};
+
+/*
+ * System specific P-E space thresholds constants
+ */
+static struct threshold_params
+threshold_gains[] = {
+	{ 0, 5 }, /*   < 10% */
+	{ 1, 5 }, /*   < 20% */
+	{ 2, 5 }, /*   < 30% */
+	{ 3, 5 }, /*   < 40% */
+	{ 4, 5 }, /*   < 50% */
+	{ 5, 4 }, /*   < 60% */
+	{ 5, 3 }, /*   < 70% */
+	{ 5, 2 }, /*   < 80% */
+	{ 5, 1 }, /*   < 90% */
+	{ 5, 0 }  /* <= 100% */
+};
+
+static int
+__schedtune_accept_deltas(int nrg_delta, int cap_delta,
+			  int perf_boost_idx, int perf_constrain_idx)
+{
+	int payoff = -INT_MAX;
+	int gain_idx = -1;
+
+	/* Performance Boost (B) region */
+	if (nrg_delta >= 0 && cap_delta > 0)
+		gain_idx = perf_boost_idx;
+	/* Performance Constraint (C) region */
+	else if (nrg_delta < 0 && cap_delta <= 0)
+		gain_idx = perf_constrain_idx;
+
+	/* Default: reject schedule candidate */
+	if (gain_idx == -1)
+		return payoff;
+
+	/*
+	 * Evaluate "Performance Boost" vs "Energy Increase"
+	 *
+	 * - Performance Boost (B) region
+	 *
+	 *   Condition: nrg_delta > 0 && cap_delta > 0
+	 *   Payoff criteria:
+	 *     cap_gain / nrg_gain  < cap_delta / nrg_delta =
+	 *     cap_gain * nrg_delta < cap_delta * nrg_gain
+	 *   Note that since both nrg_gain and nrg_delta are positive, the
+	 *   inequality does not change. Thus:
+	 *
+	 *     payoff = (cap_delta * nrg_gain) - (cap_gain * nrg_delta)
+	 *
+	 * - Performance Constraint (C) region
+	 *
+	 *   Condition: nrg_delta < 0 && cap_delta < 0
+	 *   payoff criteria:
+	 *     cap_gain / nrg_gain  > cap_delta / nrg_delta =
+	 *     cap_gain * nrg_delta < cap_delta * nrg_gain
+	 *   Note that since nrg_gain > 0 while nrg_delta < 0, the
+	 *   inequality change. Thus:
+	 *
+	 *     payoff = (cap_delta * nrg_gain) - (cap_gain * nrg_delta)
+	 *
+	 * This means that, in case of same positive defined {cap,nrg}_gain
+	 * for both the B and C regions, we can use the same payoff formula
+	 * where a positive value represents the accept condition.
+	 */
+	payoff  = cap_delta * threshold_gains[gain_idx].nrg_gain;
+	payoff -= nrg_delta * threshold_gains[gain_idx].cap_gain;
+
+	return payoff;
+}
+
+#ifdef CONFIG_CGROUP_SCHEDTUNE
+
+/*
+ * EAS scheduler tunables for task groups.
+ */
+
+/* SchdTune tunables for a group of tasks */
+struct schedtune {
+	/* SchedTune CGroup subsystem */
+	struct cgroup_subsys_state css;
+
+	/* Boost group allocated ID */
+	int idx;
+
+	/* Boost value for tasks on that SchedTune CGroup */
+	int boost;
+
+	/* Performance Boost (B) region threshold params */
+	int perf_boost_idx;
+
+	/* Performance Constraint (C) region threshold params */
+	int perf_constrain_idx;
+
+	/* Hint to bias scheduling of tasks on that SchedTune CGroup
+	 * towards idle CPUs */
+	int prefer_idle;
+};
+
+static inline struct schedtune *css_st(struct cgroup_subsys_state *css)
+{
+	return css ? container_of(css, struct schedtune, css) : NULL;
+}
+
+static inline struct schedtune *task_schedtune(struct task_struct *tsk)
+{
+	return css_st(task_css(tsk, schedtune_cgrp_id));
+}
+
+static inline struct schedtune *parent_st(struct schedtune *st)
+{
+	return css_st(st->css.parent);
+}
+
+/*
+ * SchedTune root control group
+ * The root control group is used to defined a system-wide boosting tuning,
+ * which is applied to all tasks in the system.
+ * Task specific boost tuning could be specified by creating and
+ * configuring a child control group under the root one.
+ * By default, system-wide boosting is disabled, i.e. no boosting is applied
+ * to tasks which are not into a child control group.
+ */
+static struct schedtune
+root_schedtune = {
+	.boost	= 0,
+	.perf_boost_idx = 0,
+	.perf_constrain_idx = 0,
+	.prefer_idle = 0,
+};
+
+int
+schedtune_accept_deltas(int nrg_delta, int cap_delta,
+			struct task_struct *task)
+{
+	struct schedtune *ct;
+	int perf_boost_idx;
+	int perf_constrain_idx;
+
+	/* Optimal (O) region */
+	if (nrg_delta < 0 && cap_delta > 0) {
+		trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, 1, 0);
+		return INT_MAX;
+	}
+
+	/* Suboptimal (S) region */
+	if (nrg_delta > 0 && cap_delta < 0) {
+		trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, -1, 5);
+		return -INT_MAX;
+	}
+
+	/* Get task specific perf Boost/Constraints indexes */
+	rcu_read_lock();
+	ct = task_schedtune(task);
+	perf_boost_idx = ct->perf_boost_idx;
+	perf_constrain_idx = ct->perf_constrain_idx;
+	rcu_read_unlock();
+
+	return __schedtune_accept_deltas(nrg_delta, cap_delta,
+			perf_boost_idx, perf_constrain_idx);
+}
+
+/*
+ * Maximum number of boost groups to support
+ * When per-task boosting is used we still allow only limited number of
+ * boost groups for two main reasons:
+ * 1. on a real system we usually have only few classes of workloads which
+ *    make sense to boost with different values (e.g. background vs foreground
+ *    tasks, interactive vs low-priority tasks)
+ * 2. a limited number allows for a simpler and more memory/time efficient
+ *    implementation especially for the computation of the per-CPU boost
+ *    value
+ */
+#define BOOSTGROUPS_COUNT 5
+
+/* Array of configured boostgroups */
+static struct schedtune *allocated_group[BOOSTGROUPS_COUNT] = {
+	&root_schedtune,
+	NULL,
+};
+
+/* SchedTune boost groups
+ * Keep track of all the boost groups which impact on CPU, for example when a
+ * CPU has two RUNNABLE tasks belonging to two different boost groups and thus
+ * likely with different boost values.
+ * Since on each system we expect only a limited number of boost groups, here
+ * we use a simple array to keep track of the metrics required to compute the
+ * maximum per-CPU boosting value.
+ */
+struct boost_groups {
+	/* Maximum boost value for all RUNNABLE tasks on a CPU */
+	bool idle;
+	int boost_max;
+	struct {
+		/* The boost for tasks on that boost group */
+		int boost;
+		/* Count of RUNNABLE tasks on that boost group */
+		unsigned tasks;
+	} group[BOOSTGROUPS_COUNT];
+	/* CPU's boost group locking */
+	raw_spinlock_t lock;
+};
+
+/* Boost groups affecting each CPU in the system */
+DEFINE_PER_CPU(struct boost_groups, cpu_boost_groups);
+
+static void
+schedtune_cpu_update(int cpu)
+{
+	struct boost_groups *bg;
+	int boost_max;
+	int idx;
+
+	bg = &per_cpu(cpu_boost_groups, cpu);
+
+	/* The root boost group is always active */
+	boost_max = bg->group[0].boost;
+	for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx) {
+		/*
+		 * A boost group affects a CPU only if it has
+		 * RUNNABLE tasks on that CPU
+		 */
+		if (bg->group[idx].tasks == 0)
+			continue;
+
+		boost_max = max(boost_max, bg->group[idx].boost);
+	}
+	/* Ensures boost_max is non-negative when all cgroup boost values
+	 * are neagtive. Avoids under-accounting of cpu capacity which may cause
+	 * task stacking and frequency spikes.*/
+	boost_max = max(boost_max, 0);
+	bg->boost_max = boost_max;
+}
+
+static int
+schedtune_boostgroup_update(int idx, int boost)
+{
+	struct boost_groups *bg;
+	int cur_boost_max;
+	int old_boost;
+	int cpu;
+
+	/* Update per CPU boost groups */
+	for_each_possible_cpu(cpu) {
+		bg = &per_cpu(cpu_boost_groups, cpu);
+
+		/*
+		 * Keep track of current boost values to compute the per CPU
+		 * maximum only when it has been affected by the new value of
+		 * the updated boost group
+		 */
+		cur_boost_max = bg->boost_max;
+		old_boost = bg->group[idx].boost;
+
+		/* Update the boost value of this boost group */
+		bg->group[idx].boost = boost;
+
+		/* Check if this update increase current max */
+		if (boost > cur_boost_max && bg->group[idx].tasks) {
+			bg->boost_max = boost;
+			trace_sched_tune_boostgroup_update(cpu, 1, bg->boost_max);
+			continue;
+		}
+
+		/* Check if this update has decreased current max */
+		if (cur_boost_max == old_boost && old_boost > boost) {
+			schedtune_cpu_update(cpu);
+			trace_sched_tune_boostgroup_update(cpu, -1, bg->boost_max);
+			continue;
+		}
+
+		trace_sched_tune_boostgroup_update(cpu, 0, bg->boost_max);
+	}
+
+	return 0;
+}
+
+#define ENQUEUE_TASK  1
+#define DEQUEUE_TASK -1
+
+static inline void
+schedtune_tasks_update(struct task_struct *p, int cpu, int idx, int task_count)
+{
+	struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu);
+	int tasks = bg->group[idx].tasks + task_count;
+
+	/* Update boosted tasks count while avoiding to make it negative */
+	bg->group[idx].tasks = max(0, tasks);
+
+	trace_sched_tune_tasks_update(p, cpu, tasks, idx,
+			bg->group[idx].boost, bg->boost_max);
+
+	/* Boost group activation or deactivation on that RQ */
+	if (tasks == 1 || tasks == 0)
+		schedtune_cpu_update(cpu);
+}
+
+/*
+ * NOTE: This function must be called while holding the lock on the CPU RQ
+ */
+void schedtune_enqueue_task(struct task_struct *p, int cpu)
+{
+	struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu);
+	unsigned long irq_flags;
+	struct schedtune *st;
+	int idx;
+
+	if (!unlikely(schedtune_initialized))
+		return;
+
+	/*
+	 * When a task is marked PF_EXITING by do_exit() it's going to be
+	 * dequeued and enqueued multiple times in the exit path.
+	 * Thus we avoid any further update, since we do not want to change
+	 * CPU boosting while the task is exiting.
+	 */
+	if (p->flags & PF_EXITING)
+		return;
+
+	/*
+	 * Boost group accouting is protected by a per-cpu lock and requires
+	 * interrupt to be disabled to avoid race conditions for example on
+	 * do_exit()::cgroup_exit() and task migration.
+	 */
+	raw_spin_lock_irqsave(&bg->lock, irq_flags);
+	rcu_read_lock();
+
+	st = task_schedtune(p);
+	idx = st->idx;
+
+	schedtune_tasks_update(p, cpu, idx, ENQUEUE_TASK);
+
+	rcu_read_unlock();
+	raw_spin_unlock_irqrestore(&bg->lock, irq_flags);
+}
+
+int schedtune_can_attach(struct cgroup_taskset *tset)
+{
+	struct task_struct *task;
+	struct cgroup_subsys_state *css;
+	struct boost_groups *bg;
+	struct rq_flags irq_flags;
+	unsigned int cpu;
+	struct rq *rq;
+	int src_bg; /* Source boost group index */
+	int dst_bg; /* Destination boost group index */
+	int tasks;
+
+	if (!unlikely(schedtune_initialized))
+		return 0;
+
+
+	cgroup_taskset_for_each(task, css, tset) {
+
+		/*
+		 * Lock the CPU's RQ the task is enqueued to avoid race
+		 * conditions with migration code while the task is being
+		 * accounted
+		 */
+		rq = lock_rq_of(task, &irq_flags);
+
+		if (!task->on_rq) {
+			unlock_rq_of(rq, task, &irq_flags);
+			continue;
+		}
+
+		/*
+		 * Boost group accouting is protected by a per-cpu lock and requires
+		 * interrupt to be disabled to avoid race conditions on...
+		 */
+		cpu = cpu_of(rq);
+		bg = &per_cpu(cpu_boost_groups, cpu);
+		raw_spin_lock(&bg->lock);
+
+		dst_bg = css_st(css)->idx;
+		src_bg = task_schedtune(task)->idx;
+
+		/*
+		 * Current task is not changing boostgroup, which can
+		 * happen when the new hierarchy is in use.
+		 */
+		if (unlikely(dst_bg == src_bg)) {
+			raw_spin_unlock(&bg->lock);
+			unlock_rq_of(rq, task, &irq_flags);
+			continue;
+		}
+
+		/*
+		 * This is the case of a RUNNABLE task which is switching its
+		 * current boost group.
+		 */
+
+		/* Move task from src to dst boost group */
+		tasks = bg->group[src_bg].tasks - 1;
+		bg->group[src_bg].tasks = max(0, tasks);
+		bg->group[dst_bg].tasks += 1;
+
+		raw_spin_unlock(&bg->lock);
+		unlock_rq_of(rq, task, &irq_flags);
+
+		/* Update CPU boost group */
+		if (bg->group[src_bg].tasks == 0 || bg->group[dst_bg].tasks == 1)
+			schedtune_cpu_update(task_cpu(task));
+
+	}
+
+	return 0;
+}
+
+void schedtune_cancel_attach(struct cgroup_taskset *tset)
+{
+	/* This can happen only if SchedTune controller is mounted with
+	 * other hierarchies ane one of them fails. Since usually SchedTune is
+	 * mouted on its own hierarcy, for the time being we do not implement
+	 * a proper rollback mechanism */
+	WARN(1, "SchedTune cancel attach not implemented");
+}
+
+/*
+ * NOTE: This function must be called while holding the lock on the CPU RQ
+ */
+void schedtune_dequeue_task(struct task_struct *p, int cpu)
+{
+	struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu);
+	unsigned long irq_flags;
+	struct schedtune *st;
+	int idx;
+
+	if (!unlikely(schedtune_initialized))
+		return;
+
+	/*
+	 * When a task is marked PF_EXITING by do_exit() it's going to be
+	 * dequeued and enqueued multiple times in the exit path.
+	 * Thus we avoid any further update, since we do not want to change
+	 * CPU boosting while the task is exiting.
+	 * The last dequeue is already enforce by the do_exit() code path
+	 * via schedtune_exit_task().
+	 */
+	if (p->flags & PF_EXITING)
+		return;
+
+	/*
+	 * Boost group accouting is protected by a per-cpu lock and requires
+	 * interrupt to be disabled to avoid race conditions on...
+	 */
+	raw_spin_lock_irqsave(&bg->lock, irq_flags);
+	rcu_read_lock();
+
+	st = task_schedtune(p);
+	idx = st->idx;
+
+	schedtune_tasks_update(p, cpu, idx, DEQUEUE_TASK);
+
+	rcu_read_unlock();
+	raw_spin_unlock_irqrestore(&bg->lock, irq_flags);
+}
+
+void schedtune_exit_task(struct task_struct *tsk)
+{
+	struct schedtune *st;
+	struct rq_flags irq_flags;
+	unsigned int cpu;
+	struct rq *rq;
+	int idx;
+
+	if (!unlikely(schedtune_initialized))
+		return;
+
+	rq = lock_rq_of(tsk, &irq_flags);
+	rcu_read_lock();
+
+	cpu = cpu_of(rq);
+	st = task_schedtune(tsk);
+	idx = st->idx;
+	schedtune_tasks_update(tsk, cpu, idx, DEQUEUE_TASK);
+
+	rcu_read_unlock();
+	unlock_rq_of(rq, tsk, &irq_flags);
+}
+
+int schedtune_cpu_boost(int cpu)
+{
+	struct boost_groups *bg;
+
+	bg = &per_cpu(cpu_boost_groups, cpu);
+	return bg->boost_max;
+}
+
+int schedtune_task_boost(struct task_struct *p)
+{
+	struct schedtune *st;
+	int task_boost;
+
+	if (!unlikely(schedtune_initialized))
+		return 0;
+
+	/* Get task boost value */
+	rcu_read_lock();
+	st = task_schedtune(p);
+	task_boost = st->boost;
+	rcu_read_unlock();
+
+	return task_boost;
+}
+
+int schedtune_prefer_idle(struct task_struct *p)
+{
+	struct schedtune *st;
+	int prefer_idle;
+
+	if (!unlikely(schedtune_initialized))
+		return 0;
+
+	/* Get prefer_idle value */
+	rcu_read_lock();
+	st = task_schedtune(p);
+	prefer_idle = st->prefer_idle;
+	rcu_read_unlock();
+
+	return prefer_idle;
+}
+
+static u64
+prefer_idle_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	struct schedtune *st = css_st(css);
+
+	return st->prefer_idle;
+}
+
+static int
+prefer_idle_write(struct cgroup_subsys_state *css, struct cftype *cft,
+	    u64 prefer_idle)
+{
+	struct schedtune *st = css_st(css);
+	st->prefer_idle = prefer_idle;
+
+	return 0;
+}
+
+static s64
+boost_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	struct schedtune *st = css_st(css);
+
+	return st->boost;
+}
+
+static int
+boost_write(struct cgroup_subsys_state *css, struct cftype *cft,
+	    s64 boost)
+{
+	struct schedtune *st = css_st(css);
+	unsigned threshold_idx;
+	int boost_pct;
+
+	if (boost < -100 || boost > 100)
+		return -EINVAL;
+	boost_pct = boost;
+
+	/*
+	 * Update threshold params for Performance Boost (B)
+	 * and Performance Constraint (C) regions.
+	 * The current implementatio uses the same cuts for both
+	 * B and C regions.
+	 */
+	threshold_idx = clamp(boost_pct, 0, 99) / 10;
+	st->perf_boost_idx = threshold_idx;
+	st->perf_constrain_idx = threshold_idx;
+
+	st->boost = boost;
+	if (css == &root_schedtune.css) {
+		sysctl_sched_cfs_boost = boost;
+		perf_boost_idx  = threshold_idx;
+		perf_constrain_idx  = threshold_idx;
+	}
+
+	/* Update CPU boost */
+	schedtune_boostgroup_update(st->idx, st->boost);
+
+	trace_sched_tune_config(st->boost);
+
+	return 0;
+}
+
+static struct cftype files[] = {
+	{
+		.name = "boost",
+		.read_s64 = boost_read,
+		.write_s64 = boost_write,
+	},
+	{
+		.name = "prefer_idle",
+		.read_u64 = prefer_idle_read,
+		.write_u64 = prefer_idle_write,
+	},
+	{ }	/* terminate */
+};
+
+static int
+schedtune_boostgroup_init(struct schedtune *st)
+{
+	struct boost_groups *bg;
+	int cpu;
+
+	/* Keep track of allocated boost groups */
+	allocated_group[st->idx] = st;
+
+	/* Initialize the per CPU boost groups */
+	for_each_possible_cpu(cpu) {
+		bg = &per_cpu(cpu_boost_groups, cpu);
+		bg->group[st->idx].boost = 0;
+		bg->group[st->idx].tasks = 0;
+		raw_spin_lock_init(&bg->lock);
+	}
+
+	return 0;
+}
+
+static struct cgroup_subsys_state *
+schedtune_css_alloc(struct cgroup_subsys_state *parent_css)
+{
+	struct schedtune *st;
+	int idx;
+
+	if (!parent_css)
+		return &root_schedtune.css;
+
+	/* Allow only single level hierachies */
+	if (parent_css != &root_schedtune.css) {
+		pr_err("Nested SchedTune boosting groups not allowed\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* Allow only a limited number of boosting groups */
+	for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx)
+		if (!allocated_group[idx])
+			break;
+	if (idx == BOOSTGROUPS_COUNT) {
+		pr_err("Trying to create more than %d SchedTune boosting groups\n",
+		       BOOSTGROUPS_COUNT);
+		return ERR_PTR(-ENOSPC);
+	}
+
+	st = kzalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		goto out;
+
+	/* Initialize per CPUs boost group support */
+	st->idx = idx;
+	if (schedtune_boostgroup_init(st))
+		goto release;
+
+	return &st->css;
+
+release:
+	kfree(st);
+out:
+	return ERR_PTR(-ENOMEM);
+}
+
+static void
+schedtune_boostgroup_release(struct schedtune *st)
+{
+	/* Reset this boost group */
+	schedtune_boostgroup_update(st->idx, 0);
+
+	/* Keep track of allocated boost groups */
+	allocated_group[st->idx] = NULL;
+}
+
+static void
+schedtune_css_free(struct cgroup_subsys_state *css)
+{
+	struct schedtune *st = css_st(css);
+
+	schedtune_boostgroup_release(st);
+	kfree(st);
+}
+
+struct cgroup_subsys schedtune_cgrp_subsys = {
+	.css_alloc	= schedtune_css_alloc,
+	.css_free	= schedtune_css_free,
+	.can_attach     = schedtune_can_attach,
+	.cancel_attach  = schedtune_cancel_attach,
+	.legacy_cftypes	= files,
+	.early_init	= 1,
+};
+
+static inline void
+schedtune_init_cgroups(void)
+{
+	struct boost_groups *bg;
+	int cpu;
+
+	/* Initialize the per CPU boost groups */
+	for_each_possible_cpu(cpu) {
+		bg = &per_cpu(cpu_boost_groups, cpu);
+		memset(bg, 0, sizeof(struct boost_groups));
+		raw_spin_lock_init(&bg->lock);
+	}
+
+	pr_info("schedtune: configured to support %d boost groups\n",
+		BOOSTGROUPS_COUNT);
+
+	schedtune_initialized = true;
+}
+
+#else /* CONFIG_CGROUP_SCHEDTUNE */
+
+int
+schedtune_accept_deltas(int nrg_delta, int cap_delta,
+			struct task_struct *task)
+{
+	/* Optimal (O) region */
+	if (nrg_delta < 0 && cap_delta > 0) {
+		trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, 1, 0);
+		return INT_MAX;
+	}
+
+	/* Suboptimal (S) region */
+	if (nrg_delta > 0 && cap_delta < 0) {
+		trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, -1, 5);
+		return -INT_MAX;
+	}
+
+	return __schedtune_accept_deltas(nrg_delta, cap_delta,
+			perf_boost_idx, perf_constrain_idx);
+}
+
+#endif /* CONFIG_CGROUP_SCHEDTUNE */
+
+int
+sysctl_sched_cfs_boost_handler(struct ctl_table *table, int write,
+			       void __user *buffer, size_t *lenp,
+			       loff_t *ppos)
+{
+	int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+	unsigned threshold_idx;
+	int boost_pct;
+
+	if (ret || !write)
+		return ret;
+
+	if (sysctl_sched_cfs_boost < -100 || sysctl_sched_cfs_boost > 100)
+		return -EINVAL;
+	boost_pct = sysctl_sched_cfs_boost;
+
+	/*
+	 * Update threshold params for Performance Boost (B)
+	 * and Performance Constraint (C) regions.
+	 * The current implementatio uses the same cuts for both
+	 * B and C regions.
+	 */
+	threshold_idx = clamp(boost_pct, 0, 99) / 10;
+	perf_boost_idx = threshold_idx;
+	perf_constrain_idx = threshold_idx;
+
+	return 0;
+}
+
+#ifdef CONFIG_SCHED_DEBUG
+static void
+schedtune_test_nrg(unsigned long delta_pwr)
+{
+	unsigned long test_delta_pwr;
+	unsigned long test_norm_pwr;
+	int idx;
+
+	/*
+	 * Check normalization constants using some constant system
+	 * energy values
+	 */
+	pr_info("schedtune: verify normalization constants...\n");
+	for (idx = 0; idx < 6; ++idx) {
+		test_delta_pwr = delta_pwr >> idx;
+
+		/* Normalize on max energy for target platform */
+		test_norm_pwr = reciprocal_divide(
+					test_delta_pwr << SCHED_CAPACITY_SHIFT,
+					schedtune_target_nrg.rdiv);
+
+		pr_info("schedtune: max_pwr/2^%d: %4lu => norm_pwr: %5lu\n",
+			idx, test_delta_pwr, test_norm_pwr);
+	}
+}
+#else
+#define schedtune_test_nrg(delta_pwr)
+#endif
+
+/*
+ * Compute the min/max power consumption of a cluster and all its CPUs
+ */
+static void
+schedtune_add_cluster_nrg(
+		struct sched_domain *sd,
+		struct sched_group *sg,
+		struct target_nrg *ste)
+{
+	struct sched_domain *sd2;
+	struct sched_group *sg2;
+
+	struct cpumask *cluster_cpus;
+	char str[32];
+
+	unsigned long min_pwr;
+	unsigned long max_pwr;
+	int cpu;
+
+	/* Get Cluster energy using EM data for the first CPU */
+	cluster_cpus = sched_group_cpus(sg);
+	snprintf(str, 32, "CLUSTER[%*pbl]",
+		 cpumask_pr_args(cluster_cpus));
+
+	min_pwr = sg->sge->idle_states[sg->sge->nr_idle_states - 1].power;
+	max_pwr = sg->sge->cap_states[sg->sge->nr_cap_states - 1].power;
+	pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n",
+		str, min_pwr, max_pwr);
+
+	/*
+	 * Keep track of this cluster's energy in the computation of the
+	 * overall system energy
+	 */
+	ste->min_power += min_pwr;
+	ste->max_power += max_pwr;
+
+	/* Get CPU energy using EM data for each CPU in the group */
+	for_each_cpu(cpu, cluster_cpus) {
+		/* Get a SD view for the specific CPU */
+		for_each_domain(cpu, sd2) {
+			/* Get the CPU group */
+			sg2 = sd2->groups;
+			min_pwr = sg2->sge->idle_states[sg2->sge->nr_idle_states - 1].power;
+			max_pwr = sg2->sge->cap_states[sg2->sge->nr_cap_states - 1].power;
+
+			ste->min_power += min_pwr;
+			ste->max_power += max_pwr;
+
+			snprintf(str, 32, "CPU[%d]", cpu);
+			pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n",
+				str, min_pwr, max_pwr);
+
+			/*
+			 * Assume we have EM data only at the CPU and
+			 * the upper CLUSTER level
+			 */
+			BUG_ON(!cpumask_equal(
+				sched_group_cpus(sg),
+				sched_group_cpus(sd2->parent->groups)
+				));
+			break;
+		}
+	}
+}
+
+/*
+ * Initialize the constants required to compute normalized energy.
+ * The values of these constants depends on the EM data for the specific
+ * target system and topology.
+ * Thus, this function is expected to be called by the code
+ * that bind the EM to the topology information.
+ */
+static int
+schedtune_init(void)
+{
+	struct target_nrg *ste = &schedtune_target_nrg;
+	unsigned long delta_pwr = 0;
+	struct sched_domain *sd;
+	struct sched_group *sg;
+
+	pr_info("schedtune: init normalization constants...\n");
+	ste->max_power = 0;
+	ste->min_power = 0;
+
+	rcu_read_lock();
+
+	/*
+	 * When EAS is in use, we always have a pointer to the highest SD
+	 * which provides EM data.
+	 */
+	sd = rcu_dereference(per_cpu(sd_ea, cpumask_first(cpu_online_mask)));
+	if (!sd) {
+		pr_info("schedtune: no energy model data\n");
+		goto nodata;
+	}
+
+	sg = sd->groups;
+	do {
+		schedtune_add_cluster_nrg(sd, sg, ste);
+	} while (sg = sg->next, sg != sd->groups);
+
+	rcu_read_unlock();
+
+	pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n",
+		"SYSTEM", ste->min_power, ste->max_power);
+
+	/* Compute normalization constants */
+	delta_pwr = ste->max_power - ste->min_power;
+	ste->rdiv = reciprocal_value(delta_pwr);
+	pr_info("schedtune: using normalization constants mul: %u sh1: %u sh2: %u\n",
+		ste->rdiv.m, ste->rdiv.sh1, ste->rdiv.sh2);
+
+	schedtune_test_nrg(delta_pwr);
+
+#ifdef CONFIG_CGROUP_SCHEDTUNE
+	schedtune_init_cgroups();
+#else
+	pr_info("schedtune: configured to support global boosting only\n");
+#endif
+
+	schedtune_spc_rdiv = reciprocal_value(100);
+
+	return 0;
+
+nodata:
+	pr_warning("schedtune: disabled!\n");
+	rcu_read_unlock();
+	return -EINVAL;
+}
+postcore_initcall(schedtune_init);
diff --git a/kernel/sched/tune.h b/kernel/sched/tune.h
new file mode 100644
index 0000000..4f64417
--- /dev/null
+++ b/kernel/sched/tune.h
@@ -0,0 +1,55 @@
+
+#ifdef CONFIG_SCHED_TUNE
+
+#include <linux/reciprocal_div.h>
+
+/*
+ * System energy normalization constants
+ */
+struct target_nrg {
+	unsigned long min_power;
+	unsigned long max_power;
+	struct reciprocal_value rdiv;
+};
+
+#ifdef CONFIG_CGROUP_SCHEDTUNE
+
+int schedtune_cpu_boost(int cpu);
+int schedtune_task_boost(struct task_struct *tsk);
+
+int schedtune_prefer_idle(struct task_struct *tsk);
+
+void schedtune_exit_task(struct task_struct *tsk);
+
+void schedtune_enqueue_task(struct task_struct *p, int cpu);
+void schedtune_dequeue_task(struct task_struct *p, int cpu);
+
+#else /* CONFIG_CGROUP_SCHEDTUNE */
+
+#define schedtune_cpu_boost(cpu)  get_sysctl_sched_cfs_boost()
+#define schedtune_task_boost(tsk) get_sysctl_sched_cfs_boost()
+
+#define schedtune_exit_task(task) do { } while (0)
+
+#define schedtune_enqueue_task(task, cpu) do { } while (0)
+#define schedtune_dequeue_task(task, cpu) do { } while (0)
+
+#endif /* CONFIG_CGROUP_SCHEDTUNE */
+
+int schedtune_normalize_energy(int energy);
+int schedtune_accept_deltas(int nrg_delta, int cap_delta,
+			    struct task_struct *task);
+
+#else /* CONFIG_SCHED_TUNE */
+
+#define schedtune_cpu_boost(cpu)  0
+#define schedtune_task_boost(tsk) 0
+
+#define schedtune_exit_task(task) do { } while (0)
+
+#define schedtune_enqueue_task(task, cpu) do { } while (0)
+#define schedtune_dequeue_task(task, cpu) do { } while (0)
+
+#define schedtune_accept_deltas(nrg_delta, cap_delta, task) nrg_delta
+
+#endif /* CONFIG_SCHED_TUNE */
diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c
new file mode 100644
index 0000000..522f723
--- /dev/null
+++ b/kernel/sched/walt.c
@@ -0,0 +1,1133 @@
+/*
+ * Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ *
+ * Window Assisted Load Tracking (WALT) implementation credits:
+ * Srivatsa Vaddagiri, Steve Muckle, Syed Rameez Mustafa, Joonwoo Park,
+ * Pavan Kumar Kondeti, Olav Haugan
+ *
+ * 2016-03-06: Integration with EAS/refactoring by Vikram Mulukutla
+ *             and Todd Kjos
+ */
+
+#include <linux/syscore_ops.h>
+#include <linux/cpufreq.h>
+#include <trace/events/sched.h>
+#include "sched.h"
+#include "walt.h"
+
+#define WINDOW_STATS_RECENT		0
+#define WINDOW_STATS_MAX		1
+#define WINDOW_STATS_MAX_RECENT_AVG	2
+#define WINDOW_STATS_AVG		3
+#define WINDOW_STATS_INVALID_POLICY	4
+
+#define EXITING_TASK_MARKER	0xdeaddead
+
+static __read_mostly unsigned int walt_ravg_hist_size = 5;
+static __read_mostly unsigned int walt_window_stats_policy =
+	WINDOW_STATS_MAX_RECENT_AVG;
+static __read_mostly unsigned int walt_account_wait_time = 1;
+static __read_mostly unsigned int walt_freq_account_wait_time = 0;
+static __read_mostly unsigned int walt_io_is_busy = 0;
+
+unsigned int sysctl_sched_walt_init_task_load_pct = 15;
+
+/* 1 -> use PELT based load stats, 0 -> use window-based load stats */
+unsigned int __read_mostly walt_disabled = 0;
+
+static unsigned int max_possible_efficiency = 1024;
+static unsigned int min_possible_efficiency = 1024;
+
+/*
+ * Maximum possible frequency across all cpus. Task demand and cpu
+ * capacity (cpu_power) metrics are scaled in reference to it.
+ */
+static unsigned int max_possible_freq = 1;
+
+/*
+ * Minimum possible max_freq across all cpus. This will be same as
+ * max_possible_freq on homogeneous systems and could be different from
+ * max_possible_freq on heterogenous systems. min_max_freq is used to derive
+ * capacity (cpu_power) of cpus.
+ */
+static unsigned int min_max_freq = 1;
+
+static unsigned int max_load_scale_factor = 1024;
+static unsigned int max_possible_capacity = 1024;
+
+/* Mask of all CPUs that have  max_possible_capacity */
+static cpumask_t mpc_mask = CPU_MASK_ALL;
+
+/* Window size (in ns) */
+__read_mostly unsigned int walt_ravg_window = 20000000;
+
+/* Min window size (in ns) = 10ms */
+#ifdef CONFIG_HZ_300
+/*
+ * Tick interval becomes to 3333333 due to
+ * rounding error when HZ=300.
+ */
+#define MIN_SCHED_RAVG_WINDOW (3333333 * 6)
+#else
+#define MIN_SCHED_RAVG_WINDOW 10000000
+#endif
+
+/* Max window size (in ns) = 1s */
+#define MAX_SCHED_RAVG_WINDOW 1000000000
+
+static unsigned int sync_cpu;
+static ktime_t ktime_last;
+static __read_mostly bool walt_ktime_suspended;
+
+static unsigned int task_load(struct task_struct *p)
+{
+	return p->ravg.demand;
+}
+
+void
+walt_inc_cumulative_runnable_avg(struct rq *rq,
+				 struct task_struct *p)
+{
+	rq->cumulative_runnable_avg += p->ravg.demand;
+}
+
+void
+walt_dec_cumulative_runnable_avg(struct rq *rq,
+				 struct task_struct *p)
+{
+	rq->cumulative_runnable_avg -= p->ravg.demand;
+	BUG_ON((s64)rq->cumulative_runnable_avg < 0);
+}
+
+static void
+fixup_cumulative_runnable_avg(struct rq *rq,
+			      struct task_struct *p, s64 task_load_delta)
+{
+	rq->cumulative_runnable_avg += task_load_delta;
+	if ((s64)rq->cumulative_runnable_avg < 0)
+		panic("cra less than zero: tld: %lld, task_load(p) = %u\n",
+			task_load_delta, task_load(p));
+}
+
+u64 walt_ktime_clock(void)
+{
+	if (unlikely(walt_ktime_suspended))
+		return ktime_to_ns(ktime_last);
+	return ktime_get_ns();
+}
+
+static void walt_resume(void)
+{
+	walt_ktime_suspended = false;
+}
+
+static int walt_suspend(void)
+{
+	ktime_last = ktime_get();
+	walt_ktime_suspended = true;
+	return 0;
+}
+
+static struct syscore_ops walt_syscore_ops = {
+	.resume	= walt_resume,
+	.suspend = walt_suspend
+};
+
+static int __init walt_init_ops(void)
+{
+	register_syscore_ops(&walt_syscore_ops);
+	return 0;
+}
+late_initcall(walt_init_ops);
+
+void walt_inc_cfs_cumulative_runnable_avg(struct cfs_rq *cfs_rq,
+		struct task_struct *p)
+{
+	cfs_rq->cumulative_runnable_avg += p->ravg.demand;
+}
+
+void walt_dec_cfs_cumulative_runnable_avg(struct cfs_rq *cfs_rq,
+		struct task_struct *p)
+{
+	cfs_rq->cumulative_runnable_avg -= p->ravg.demand;
+}
+
+static int exiting_task(struct task_struct *p)
+{
+	if (p->flags & PF_EXITING) {
+		if (p->ravg.sum_history[0] != EXITING_TASK_MARKER) {
+			p->ravg.sum_history[0] = EXITING_TASK_MARKER;
+		}
+		return 1;
+	}
+	return 0;
+}
+
+static int __init set_walt_ravg_window(char *str)
+{
+	get_option(&str, &walt_ravg_window);
+
+	walt_disabled = (walt_ravg_window < MIN_SCHED_RAVG_WINDOW ||
+				walt_ravg_window > MAX_SCHED_RAVG_WINDOW);
+	return 0;
+}
+
+early_param("walt_ravg_window", set_walt_ravg_window);
+
+static void
+update_window_start(struct rq *rq, u64 wallclock)
+{
+	s64 delta;
+	int nr_windows;
+
+	delta = wallclock - rq->window_start;
+	/* If the MPM global timer is cleared, set delta as 0 to avoid kernel BUG happening */
+	if (delta < 0) {
+		delta = 0;
+		WARN_ONCE(1, "WALT wallclock appears to have gone backwards or reset\n");
+	}
+
+	if (delta < walt_ravg_window)
+		return;
+
+	nr_windows = div64_u64(delta, walt_ravg_window);
+	rq->window_start += (u64)nr_windows * (u64)walt_ravg_window;
+}
+
+static u64 scale_exec_time(u64 delta, struct rq *rq)
+{
+	unsigned int cur_freq = rq->cur_freq;
+	int sf;
+
+	if (unlikely(cur_freq > max_possible_freq))
+		cur_freq = rq->max_possible_freq;
+
+	/* round up div64 */
+	delta = div64_u64(delta * cur_freq + max_possible_freq - 1,
+			  max_possible_freq);
+
+	sf = DIV_ROUND_UP(rq->efficiency * 1024, max_possible_efficiency);
+
+	delta *= sf;
+	delta >>= 10;
+
+	return delta;
+}
+
+static int cpu_is_waiting_on_io(struct rq *rq)
+{
+	if (!walt_io_is_busy)
+		return 0;
+
+	return atomic_read(&rq->nr_iowait);
+}
+
+void walt_account_irqtime(int cpu, struct task_struct *curr,
+				 u64 delta, u64 wallclock)
+{
+	struct rq *rq = cpu_rq(cpu);
+	unsigned long flags, nr_windows;
+	u64 cur_jiffies_ts;
+
+	raw_spin_lock_irqsave(&rq->lock, flags);
+
+	/*
+	 * cputime (wallclock) uses sched_clock so use the same here for
+	 * consistency.
+	 */
+	delta += sched_clock() - wallclock;
+	cur_jiffies_ts = get_jiffies_64();
+
+	if (is_idle_task(curr))
+		walt_update_task_ravg(curr, rq, IRQ_UPDATE, walt_ktime_clock(),
+				 delta);
+
+	nr_windows = cur_jiffies_ts - rq->irqload_ts;
+
+	if (nr_windows) {
+		if (nr_windows < 10) {
+			/* Decay CPU's irqload by 3/4 for each window. */
+			rq->avg_irqload *= (3 * nr_windows);
+			rq->avg_irqload = div64_u64(rq->avg_irqload,
+						    4 * nr_windows);
+		} else {
+			rq->avg_irqload = 0;
+		}
+		rq->avg_irqload += rq->cur_irqload;
+		rq->cur_irqload = 0;
+	}
+
+	rq->cur_irqload += delta;
+	rq->irqload_ts = cur_jiffies_ts;
+	raw_spin_unlock_irqrestore(&rq->lock, flags);
+}
+
+
+#define WALT_HIGH_IRQ_TIMEOUT 3
+
+u64 walt_irqload(int cpu) {
+	struct rq *rq = cpu_rq(cpu);
+	s64 delta;
+	delta = get_jiffies_64() - rq->irqload_ts;
+
+        /*
+	 * Current context can be preempted by irq and rq->irqload_ts can be
+	 * updated by irq context so that delta can be negative.
+	 * But this is okay and we can safely return as this means there
+	 * was recent irq occurrence.
+	 */
+
+        if (delta < WALT_HIGH_IRQ_TIMEOUT)
+		return rq->avg_irqload;
+        else
+		return 0;
+}
+
+int walt_cpu_high_irqload(int cpu) {
+	return walt_irqload(cpu) >= sysctl_sched_walt_cpu_high_irqload;
+}
+
+static int account_busy_for_cpu_time(struct rq *rq, struct task_struct *p,
+				     u64 irqtime, int event)
+{
+	if (is_idle_task(p)) {
+		/* TASK_WAKE && TASK_MIGRATE is not possible on idle task! */
+		if (event == PICK_NEXT_TASK)
+			return 0;
+
+		/* PUT_PREV_TASK, TASK_UPDATE && IRQ_UPDATE are left */
+		return irqtime || cpu_is_waiting_on_io(rq);
+	}
+
+	if (event == TASK_WAKE)
+		return 0;
+
+	if (event == PUT_PREV_TASK || event == IRQ_UPDATE ||
+					 event == TASK_UPDATE)
+		return 1;
+
+	/* Only TASK_MIGRATE && PICK_NEXT_TASK left */
+	return walt_freq_account_wait_time;
+}
+
+/*
+ * Account cpu activity in its busy time counters (rq->curr/prev_runnable_sum)
+ */
+static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
+	     int event, u64 wallclock, u64 irqtime)
+{
+	int new_window, nr_full_windows = 0;
+	int p_is_curr_task = (p == rq->curr);
+	u64 mark_start = p->ravg.mark_start;
+	u64 window_start = rq->window_start;
+	u32 window_size = walt_ravg_window;
+	u64 delta;
+
+	new_window = mark_start < window_start;
+	if (new_window) {
+		nr_full_windows = div64_u64((window_start - mark_start),
+						window_size);
+		if (p->ravg.active_windows < USHRT_MAX)
+			p->ravg.active_windows++;
+	}
+
+	/* Handle per-task window rollover. We don't care about the idle
+	 * task or exiting tasks. */
+	if (new_window && !is_idle_task(p) && !exiting_task(p)) {
+		u32 curr_window = 0;
+
+		if (!nr_full_windows)
+			curr_window = p->ravg.curr_window;
+
+		p->ravg.prev_window = curr_window;
+		p->ravg.curr_window = 0;
+	}
+
+	if (!account_busy_for_cpu_time(rq, p, irqtime, event)) {
+		/* account_busy_for_cpu_time() = 0, so no update to the
+		 * task's current window needs to be made. This could be
+		 * for example
+		 *
+		 *   - a wakeup event on a task within the current
+		 *     window (!new_window below, no action required),
+		 *   - switching to a new task from idle (PICK_NEXT_TASK)
+		 *     in a new window where irqtime is 0 and we aren't
+		 *     waiting on IO */
+
+		if (!new_window)
+			return;
+
+		/* A new window has started. The RQ demand must be rolled
+		 * over if p is the current task. */
+		if (p_is_curr_task) {
+			u64 prev_sum = 0;
+
+			/* p is either idle task or an exiting task */
+			if (!nr_full_windows) {
+				prev_sum = rq->curr_runnable_sum;
+			}
+
+			rq->prev_runnable_sum = prev_sum;
+			rq->curr_runnable_sum = 0;
+		}
+
+		return;
+	}
+
+	if (!new_window) {
+		/* account_busy_for_cpu_time() = 1 so busy time needs
+		 * to be accounted to the current window. No rollover
+		 * since we didn't start a new window. An example of this is
+		 * when a task starts execution and then sleeps within the
+		 * same window. */
+
+		if (!irqtime || !is_idle_task(p) || cpu_is_waiting_on_io(rq))
+			delta = wallclock - mark_start;
+		else
+			delta = irqtime;
+		delta = scale_exec_time(delta, rq);
+		rq->curr_runnable_sum += delta;
+		if (!is_idle_task(p) && !exiting_task(p))
+			p->ravg.curr_window += delta;
+
+		return;
+	}
+
+	if (!p_is_curr_task) {
+		/* account_busy_for_cpu_time() = 1 so busy time needs
+		 * to be accounted to the current window. A new window
+		 * has also started, but p is not the current task, so the
+		 * window is not rolled over - just split up and account
+		 * as necessary into curr and prev. The window is only
+		 * rolled over when a new window is processed for the current
+		 * task.
+		 *
+		 * Irqtime can't be accounted by a task that isn't the
+		 * currently running task. */
+
+		if (!nr_full_windows) {
+			/* A full window hasn't elapsed, account partial
+			 * contribution to previous completed window. */
+			delta = scale_exec_time(window_start - mark_start, rq);
+			if (!exiting_task(p))
+				p->ravg.prev_window += delta;
+		} else {
+			/* Since at least one full window has elapsed,
+			 * the contribution to the previous window is the
+			 * full window (window_size). */
+			delta = scale_exec_time(window_size, rq);
+			if (!exiting_task(p))
+				p->ravg.prev_window = delta;
+		}
+		rq->prev_runnable_sum += delta;
+
+		/* Account piece of busy time in the current window. */
+		delta = scale_exec_time(wallclock - window_start, rq);
+		rq->curr_runnable_sum += delta;
+		if (!exiting_task(p))
+			p->ravg.curr_window = delta;
+
+		return;
+	}
+
+	if (!irqtime || !is_idle_task(p) || cpu_is_waiting_on_io(rq)) {
+		/* account_busy_for_cpu_time() = 1 so busy time needs
+		 * to be accounted to the current window. A new window
+		 * has started and p is the current task so rollover is
+		 * needed. If any of these three above conditions are true
+		 * then this busy time can't be accounted as irqtime.
+		 *
+		 * Busy time for the idle task or exiting tasks need not
+		 * be accounted.
+		 *
+		 * An example of this would be a task that starts execution
+		 * and then sleeps once a new window has begun. */
+
+		if (!nr_full_windows) {
+			/* A full window hasn't elapsed, account partial
+			 * contribution to previous completed window. */
+			delta = scale_exec_time(window_start - mark_start, rq);
+			if (!is_idle_task(p) && !exiting_task(p))
+				p->ravg.prev_window += delta;
+
+			delta += rq->curr_runnable_sum;
+		} else {
+			/* Since at least one full window has elapsed,
+			 * the contribution to the previous window is the
+			 * full window (window_size). */
+			delta = scale_exec_time(window_size, rq);
+			if (!is_idle_task(p) && !exiting_task(p))
+				p->ravg.prev_window = delta;
+
+		}
+		/*
+		 * Rollover for normal runnable sum is done here by overwriting
+		 * the values in prev_runnable_sum and curr_runnable_sum.
+		 * Rollover for new task runnable sum has completed by previous
+		 * if-else statement.
+		 */
+		rq->prev_runnable_sum = delta;
+
+		/* Account piece of busy time in the current window. */
+		delta = scale_exec_time(wallclock - window_start, rq);
+		rq->curr_runnable_sum = delta;
+		if (!is_idle_task(p) && !exiting_task(p))
+			p->ravg.curr_window = delta;
+
+		return;
+	}
+
+	if (irqtime) {
+		/* account_busy_for_cpu_time() = 1 so busy time needs
+		 * to be accounted to the current window. A new window
+		 * has started and p is the current task so rollover is
+		 * needed. The current task must be the idle task because
+		 * irqtime is not accounted for any other task.
+		 *
+		 * Irqtime will be accounted each time we process IRQ activity
+		 * after a period of idleness, so we know the IRQ busy time
+		 * started at wallclock - irqtime. */
+
+		BUG_ON(!is_idle_task(p));
+		mark_start = wallclock - irqtime;
+
+		/* Roll window over. If IRQ busy time was just in the current
+		 * window then that is all that need be accounted. */
+		rq->prev_runnable_sum = rq->curr_runnable_sum;
+		if (mark_start > window_start) {
+			rq->curr_runnable_sum = scale_exec_time(irqtime, rq);
+			return;
+		}
+
+		/* The IRQ busy time spanned multiple windows. Process the
+		 * busy time preceding the current window start first. */
+		delta = window_start - mark_start;
+		if (delta > window_size)
+			delta = window_size;
+		delta = scale_exec_time(delta, rq);
+		rq->prev_runnable_sum += delta;
+
+		/* Process the remaining IRQ busy time in the current window. */
+		delta = wallclock - window_start;
+		rq->curr_runnable_sum = scale_exec_time(delta, rq);
+
+		return;
+	}
+
+	BUG();
+}
+
+static int account_busy_for_task_demand(struct task_struct *p, int event)
+{
+	/* No need to bother updating task demand for exiting tasks
+	 * or the idle task. */
+	if (exiting_task(p) || is_idle_task(p))
+		return 0;
+
+	/* When a task is waking up it is completing a segment of non-busy
+	 * time. Likewise, if wait time is not treated as busy time, then
+	 * when a task begins to run or is migrated, it is not running and
+	 * is completing a segment of non-busy time. */
+	if (event == TASK_WAKE || (!walt_account_wait_time &&
+			 (event == PICK_NEXT_TASK || event == TASK_MIGRATE)))
+		return 0;
+
+	return 1;
+}
+
+/*
+ * Called when new window is starting for a task, to record cpu usage over
+ * recently concluded window(s). Normally 'samples' should be 1. It can be > 1
+ * when, say, a real-time task runs without preemption for several windows at a
+ * stretch.
+ */
+static void update_history(struct rq *rq, struct task_struct *p,
+			 u32 runtime, int samples, int event)
+{
+	u32 *hist = &p->ravg.sum_history[0];
+	int ridx, widx;
+	u32 max = 0, avg, demand;
+	u64 sum = 0;
+
+	/* Ignore windows where task had no activity */
+	if (!runtime || is_idle_task(p) || exiting_task(p) || !samples)
+			goto done;
+
+	/* Push new 'runtime' value onto stack */
+	widx = walt_ravg_hist_size - 1;
+	ridx = widx - samples;
+	for (; ridx >= 0; --widx, --ridx) {
+		hist[widx] = hist[ridx];
+		sum += hist[widx];
+		if (hist[widx] > max)
+			max = hist[widx];
+	}
+
+	for (widx = 0; widx < samples && widx < walt_ravg_hist_size; widx++) {
+		hist[widx] = runtime;
+		sum += hist[widx];
+		if (hist[widx] > max)
+			max = hist[widx];
+	}
+
+	p->ravg.sum = 0;
+
+	if (walt_window_stats_policy == WINDOW_STATS_RECENT) {
+		demand = runtime;
+	} else if (walt_window_stats_policy == WINDOW_STATS_MAX) {
+		demand = max;
+	} else {
+		avg = div64_u64(sum, walt_ravg_hist_size);
+		if (walt_window_stats_policy == WINDOW_STATS_AVG)
+			demand = avg;
+		else
+			demand = max(avg, runtime);
+	}
+
+	/*
+	 * A throttled deadline sched class task gets dequeued without
+	 * changing p->on_rq. Since the dequeue decrements hmp stats
+	 * avoid decrementing it here again.
+	 */
+	if (task_on_rq_queued(p) && (!task_has_dl_policy(p) ||
+						!p->dl.dl_throttled))
+		fixup_cumulative_runnable_avg(rq, p, demand);
+
+	p->ravg.demand = demand;
+
+done:
+	trace_walt_update_history(rq, p, runtime, samples, event);
+	return;
+}
+
+static void add_to_task_demand(struct rq *rq, struct task_struct *p,
+				u64 delta)
+{
+	delta = scale_exec_time(delta, rq);
+	p->ravg.sum += delta;
+	if (unlikely(p->ravg.sum > walt_ravg_window))
+		p->ravg.sum = walt_ravg_window;
+}
+
+/*
+ * Account cpu demand of task and/or update task's cpu demand history
+ *
+ * ms = p->ravg.mark_start;
+ * wc = wallclock
+ * ws = rq->window_start
+ *
+ * Three possibilities:
+ *
+ *	a) Task event is contained within one window.
+ *		window_start < mark_start < wallclock
+ *
+ *		ws   ms  wc
+ *		|    |   |
+ *		V    V   V
+ *		|---------------|
+ *
+ *	In this case, p->ravg.sum is updated *iff* event is appropriate
+ *	(ex: event == PUT_PREV_TASK)
+ *
+ *	b) Task event spans two windows.
+ *		mark_start < window_start < wallclock
+ *
+ *		ms   ws   wc
+ *		|    |    |
+ *		V    V    V
+ *		-----|-------------------
+ *
+ *	In this case, p->ravg.sum is updated with (ws - ms) *iff* event
+ *	is appropriate, then a new window sample is recorded followed
+ *	by p->ravg.sum being set to (wc - ws) *iff* event is appropriate.
+ *
+ *	c) Task event spans more than two windows.
+ *
+ *		ms ws_tmp			   ws  wc
+ *		|  |				   |   |
+ *		V  V				   V   V
+ *		---|-------|-------|-------|-------|------
+ *		   |				   |
+ *		   |<------ nr_full_windows ------>|
+ *
+ *	In this case, p->ravg.sum is updated with (ws_tmp - ms) first *iff*
+ *	event is appropriate, window sample of p->ravg.sum is recorded,
+ *	'nr_full_window' samples of window_size is also recorded *iff*
+ *	event is appropriate and finally p->ravg.sum is set to (wc - ws)
+ *	*iff* event is appropriate.
+ *
+ * IMPORTANT : Leave p->ravg.mark_start unchanged, as update_cpu_busy_time()
+ * depends on it!
+ */
+static void update_task_demand(struct task_struct *p, struct rq *rq,
+	     int event, u64 wallclock)
+{
+	u64 mark_start = p->ravg.mark_start;
+	u64 delta, window_start = rq->window_start;
+	int new_window, nr_full_windows;
+	u32 window_size = walt_ravg_window;
+
+	new_window = mark_start < window_start;
+	if (!account_busy_for_task_demand(p, event)) {
+		if (new_window)
+			/* If the time accounted isn't being accounted as
+			 * busy time, and a new window started, only the
+			 * previous window need be closed out with the
+			 * pre-existing demand. Multiple windows may have
+			 * elapsed, but since empty windows are dropped,
+			 * it is not necessary to account those. */
+			update_history(rq, p, p->ravg.sum, 1, event);
+		return;
+	}
+
+	if (!new_window) {
+		/* The simple case - busy time contained within the existing
+		 * window. */
+		add_to_task_demand(rq, p, wallclock - mark_start);
+		return;
+	}
+
+	/* Busy time spans at least two windows. Temporarily rewind
+	 * window_start to first window boundary after mark_start. */
+	delta = window_start - mark_start;
+	nr_full_windows = div64_u64(delta, window_size);
+	window_start -= (u64)nr_full_windows * (u64)window_size;
+
+	/* Process (window_start - mark_start) first */
+	add_to_task_demand(rq, p, window_start - mark_start);
+
+	/* Push new sample(s) into task's demand history */
+	update_history(rq, p, p->ravg.sum, 1, event);
+	if (nr_full_windows)
+		update_history(rq, p, scale_exec_time(window_size, rq),
+			       nr_full_windows, event);
+
+	/* Roll window_start back to current to process any remainder
+	 * in current window. */
+	window_start += (u64)nr_full_windows * (u64)window_size;
+
+	/* Process (wallclock - window_start) next */
+	mark_start = window_start;
+	add_to_task_demand(rq, p, wallclock - mark_start);
+}
+
+/* Reflect task activity on its demand and cpu's busy time statistics */
+void walt_update_task_ravg(struct task_struct *p, struct rq *rq,
+	     int event, u64 wallclock, u64 irqtime)
+{
+	if (walt_disabled || !rq->window_start)
+		return;
+
+	lockdep_assert_held(&rq->lock);
+
+	update_window_start(rq, wallclock);
+
+	if (!p->ravg.mark_start)
+		goto done;
+
+	update_task_demand(p, rq, event, wallclock);
+	update_cpu_busy_time(p, rq, event, wallclock, irqtime);
+
+done:
+	trace_walt_update_task_ravg(p, rq, event, wallclock, irqtime);
+
+	p->ravg.mark_start = wallclock;
+}
+
+unsigned long __weak arch_get_cpu_efficiency(int cpu)
+{
+	return SCHED_CAPACITY_SCALE;
+}
+
+void walt_init_cpu_efficiency(void)
+{
+	int i, efficiency;
+	unsigned int max = 0, min = UINT_MAX;
+
+	for_each_possible_cpu(i) {
+		efficiency = arch_get_cpu_efficiency(i);
+		cpu_rq(i)->efficiency = efficiency;
+
+		if (efficiency > max)
+			max = efficiency;
+		if (efficiency < min)
+			min = efficiency;
+	}
+
+	if (max)
+		max_possible_efficiency = max;
+
+	if (min)
+		min_possible_efficiency = min;
+}
+
+static void reset_task_stats(struct task_struct *p)
+{
+	u32 sum = 0;
+
+	if (exiting_task(p))
+		sum = EXITING_TASK_MARKER;
+
+	memset(&p->ravg, 0, sizeof(struct ravg));
+	/* Retain EXITING_TASK marker */
+	p->ravg.sum_history[0] = sum;
+}
+
+void walt_mark_task_starting(struct task_struct *p)
+{
+	u64 wallclock;
+	struct rq *rq = task_rq(p);
+
+	if (!rq->window_start) {
+		reset_task_stats(p);
+		return;
+	}
+
+	wallclock = walt_ktime_clock();
+	p->ravg.mark_start = wallclock;
+}
+
+void walt_set_window_start(struct rq *rq)
+{
+	int cpu = cpu_of(rq);
+	struct rq *sync_rq = cpu_rq(sync_cpu);
+
+	if (rq->window_start)
+		return;
+
+	if (cpu == sync_cpu) {
+		rq->window_start = walt_ktime_clock();
+	} else {
+		raw_spin_unlock(&rq->lock);
+		double_rq_lock(rq, sync_rq);
+		rq->window_start = cpu_rq(sync_cpu)->window_start;
+		rq->curr_runnable_sum = rq->prev_runnable_sum = 0;
+		raw_spin_unlock(&sync_rq->lock);
+	}
+
+	rq->curr->ravg.mark_start = rq->window_start;
+}
+
+void walt_migrate_sync_cpu(int cpu)
+{
+	if (cpu == sync_cpu)
+		sync_cpu = smp_processor_id();
+}
+
+void walt_fixup_busy_time(struct task_struct *p, int new_cpu)
+{
+	struct rq *src_rq = task_rq(p);
+	struct rq *dest_rq = cpu_rq(new_cpu);
+	u64 wallclock;
+
+	if (!p->on_rq && p->state != TASK_WAKING)
+		return;
+
+	if (exiting_task(p)) {
+		return;
+	}
+
+	if (p->state == TASK_WAKING)
+		double_rq_lock(src_rq, dest_rq);
+
+	wallclock = walt_ktime_clock();
+
+	walt_update_task_ravg(task_rq(p)->curr, task_rq(p),
+			TASK_UPDATE, wallclock, 0);
+	walt_update_task_ravg(dest_rq->curr, dest_rq,
+			TASK_UPDATE, wallclock, 0);
+
+	walt_update_task_ravg(p, task_rq(p), TASK_MIGRATE, wallclock, 0);
+
+	if (p->ravg.curr_window) {
+		src_rq->curr_runnable_sum -= p->ravg.curr_window;
+		dest_rq->curr_runnable_sum += p->ravg.curr_window;
+	}
+
+	if (p->ravg.prev_window) {
+		src_rq->prev_runnable_sum -= p->ravg.prev_window;
+		dest_rq->prev_runnable_sum += p->ravg.prev_window;
+	}
+
+	if ((s64)src_rq->prev_runnable_sum < 0) {
+		src_rq->prev_runnable_sum = 0;
+		WARN_ON(1);
+	}
+	if ((s64)src_rq->curr_runnable_sum < 0) {
+		src_rq->curr_runnable_sum = 0;
+		WARN_ON(1);
+	}
+
+	trace_walt_migration_update_sum(src_rq, p);
+	trace_walt_migration_update_sum(dest_rq, p);
+
+	if (p->state == TASK_WAKING)
+		double_rq_unlock(src_rq, dest_rq);
+}
+
+/*
+ * Return 'capacity' of a cpu in reference to "least" efficient cpu, such that
+ * least efficient cpu gets capacity of 1024
+ */
+static unsigned long capacity_scale_cpu_efficiency(int cpu)
+{
+	return (1024 * cpu_rq(cpu)->efficiency) / min_possible_efficiency;
+}
+
+/*
+ * Return 'capacity' of a cpu in reference to cpu with lowest max_freq
+ * (min_max_freq), such that one with lowest max_freq gets capacity of 1024.
+ */
+static unsigned long capacity_scale_cpu_freq(int cpu)
+{
+	return (1024 * cpu_rq(cpu)->max_freq) / min_max_freq;
+}
+
+/*
+ * Return load_scale_factor of a cpu in reference to "most" efficient cpu, so
+ * that "most" efficient cpu gets a load_scale_factor of 1
+ */
+static unsigned long load_scale_cpu_efficiency(int cpu)
+{
+	return DIV_ROUND_UP(1024 * max_possible_efficiency,
+			    cpu_rq(cpu)->efficiency);
+}
+
+/*
+ * Return load_scale_factor of a cpu in reference to cpu with best max_freq
+ * (max_possible_freq), so that one with best max_freq gets a load_scale_factor
+ * of 1.
+ */
+static unsigned long load_scale_cpu_freq(int cpu)
+{
+	return DIV_ROUND_UP(1024 * max_possible_freq, cpu_rq(cpu)->max_freq);
+}
+
+static int compute_capacity(int cpu)
+{
+	int capacity = 1024;
+
+	capacity *= capacity_scale_cpu_efficiency(cpu);
+	capacity >>= 10;
+
+	capacity *= capacity_scale_cpu_freq(cpu);
+	capacity >>= 10;
+
+	return capacity;
+}
+
+static int compute_load_scale_factor(int cpu)
+{
+	int load_scale = 1024;
+
+	/*
+	 * load_scale_factor accounts for the fact that task load
+	 * is in reference to "best" performing cpu. Task's load will need to be
+	 * scaled (up) by a factor to determine suitability to be placed on a
+	 * (little) cpu.
+	 */
+	load_scale *= load_scale_cpu_efficiency(cpu);
+	load_scale >>= 10;
+
+	load_scale *= load_scale_cpu_freq(cpu);
+	load_scale >>= 10;
+
+	return load_scale;
+}
+
+static int cpufreq_notifier_policy(struct notifier_block *nb,
+		unsigned long val, void *data)
+{
+	struct cpufreq_policy *policy = (struct cpufreq_policy *)data;
+	int i, update_max = 0;
+	u64 highest_mpc = 0, highest_mplsf = 0;
+	const struct cpumask *cpus = policy->related_cpus;
+	unsigned int orig_min_max_freq = min_max_freq;
+	unsigned int orig_max_possible_freq = max_possible_freq;
+	/* Initialized to policy->max in case policy->related_cpus is empty! */
+	unsigned int orig_max_freq = policy->max;
+
+	if (val != CPUFREQ_NOTIFY)
+		return 0;
+
+	for_each_cpu(i, policy->related_cpus) {
+		cpumask_copy(&cpu_rq(i)->freq_domain_cpumask,
+			     policy->related_cpus);
+		orig_max_freq = cpu_rq(i)->max_freq;
+		cpu_rq(i)->min_freq = policy->min;
+		cpu_rq(i)->max_freq = policy->max;
+		cpu_rq(i)->cur_freq = policy->cur;
+		cpu_rq(i)->max_possible_freq = policy->cpuinfo.max_freq;
+	}
+
+	max_possible_freq = max(max_possible_freq, policy->cpuinfo.max_freq);
+	if (min_max_freq == 1)
+		min_max_freq = UINT_MAX;
+	min_max_freq = min(min_max_freq, policy->cpuinfo.max_freq);
+	BUG_ON(!min_max_freq);
+	BUG_ON(!policy->max);
+
+	/* Changes to policy other than max_freq don't require any updates */
+	if (orig_max_freq == policy->max)
+		return 0;
+
+	/*
+	 * A changed min_max_freq or max_possible_freq (possible during bootup)
+	 * needs to trigger re-computation of load_scale_factor and capacity for
+	 * all possible cpus (even those offline). It also needs to trigger
+	 * re-computation of nr_big_task count on all online cpus.
+	 *
+	 * A changed rq->max_freq otoh needs to trigger re-computation of
+	 * load_scale_factor and capacity for just the cluster of cpus involved.
+	 * Since small task definition depends on max_load_scale_factor, a
+	 * changed load_scale_factor of one cluster could influence
+	 * classification of tasks in another cluster. Hence a changed
+	 * rq->max_freq will need to trigger re-computation of nr_big_task
+	 * count on all online cpus.
+	 *
+	 * While it should be sufficient for nr_big_tasks to be
+	 * re-computed for only online cpus, we have inadequate context
+	 * information here (in policy notifier) with regard to hotplug-safety
+	 * context in which notification is issued. As a result, we can't use
+	 * get_online_cpus() here, as it can lead to deadlock. Until cpufreq is
+	 * fixed up to issue notification always in hotplug-safe context,
+	 * re-compute nr_big_task for all possible cpus.
+	 */
+
+	if (orig_min_max_freq != min_max_freq ||
+		orig_max_possible_freq != max_possible_freq) {
+			cpus = cpu_possible_mask;
+			update_max = 1;
+	}
+
+	/*
+	 * Changed load_scale_factor can trigger reclassification of tasks as
+	 * big or small. Make this change "atomic" so that tasks are accounted
+	 * properly due to changed load_scale_factor
+	 */
+	for_each_cpu(i, cpus) {
+		struct rq *rq = cpu_rq(i);
+
+		rq->capacity = compute_capacity(i);
+		rq->load_scale_factor = compute_load_scale_factor(i);
+
+		if (update_max) {
+			u64 mpc, mplsf;
+
+			mpc = div_u64(((u64) rq->capacity) *
+				rq->max_possible_freq, rq->max_freq);
+			rq->max_possible_capacity = (int) mpc;
+
+			mplsf = div_u64(((u64) rq->load_scale_factor) *
+				rq->max_possible_freq, rq->max_freq);
+
+			if (mpc > highest_mpc) {
+				highest_mpc = mpc;
+				cpumask_clear(&mpc_mask);
+				cpumask_set_cpu(i, &mpc_mask);
+			} else if (mpc == highest_mpc) {
+				cpumask_set_cpu(i, &mpc_mask);
+			}
+
+			if (mplsf > highest_mplsf)
+				highest_mplsf = mplsf;
+		}
+	}
+
+	if (update_max) {
+		max_possible_capacity = highest_mpc;
+		max_load_scale_factor = highest_mplsf;
+	}
+
+	return 0;
+}
+
+static int cpufreq_notifier_trans(struct notifier_block *nb,
+		unsigned long val, void *data)
+{
+	struct cpufreq_freqs *freq = (struct cpufreq_freqs *)data;
+	unsigned int cpu = freq->cpu, new_freq = freq->new;
+	unsigned long flags;
+	int i;
+
+	if (val != CPUFREQ_POSTCHANGE)
+		return 0;
+
+	BUG_ON(!new_freq);
+
+	if (cpu_rq(cpu)->cur_freq == new_freq)
+		return 0;
+
+	for_each_cpu(i, &cpu_rq(cpu)->freq_domain_cpumask) {
+		struct rq *rq = cpu_rq(i);
+
+		raw_spin_lock_irqsave(&rq->lock, flags);
+		walt_update_task_ravg(rq->curr, rq, TASK_UPDATE,
+				      walt_ktime_clock(), 0);
+		rq->cur_freq = new_freq;
+		raw_spin_unlock_irqrestore(&rq->lock, flags);
+	}
+
+	return 0;
+}
+
+static struct notifier_block notifier_policy_block = {
+	.notifier_call = cpufreq_notifier_policy
+};
+
+static struct notifier_block notifier_trans_block = {
+	.notifier_call = cpufreq_notifier_trans
+};
+
+static int register_sched_callback(void)
+{
+	int ret;
+
+	ret = cpufreq_register_notifier(&notifier_policy_block,
+						CPUFREQ_POLICY_NOTIFIER);
+
+	if (!ret)
+		ret = cpufreq_register_notifier(&notifier_trans_block,
+						CPUFREQ_TRANSITION_NOTIFIER);
+
+	return 0;
+}
+
+/*
+ * cpufreq callbacks can be registered at core_initcall or later time.
+ * Any registration done prior to that is "forgotten" by cpufreq. See
+ * initialization of variable init_cpufreq_transition_notifier_list_called
+ * for further information.
+ */
+core_initcall(register_sched_callback);
+
+void walt_init_new_task_load(struct task_struct *p)
+{
+	int i;
+	u32 init_load_windows =
+			div64_u64((u64)sysctl_sched_walt_init_task_load_pct *
+                          (u64)walt_ravg_window, 100);
+	u32 init_load_pct = current->init_load_pct;
+
+	p->init_load_pct = 0;
+	memset(&p->ravg, 0, sizeof(struct ravg));
+
+	if (init_load_pct) {
+		init_load_windows = div64_u64((u64)init_load_pct *
+			  (u64)walt_ravg_window, 100);
+	}
+
+	p->ravg.demand = init_load_windows;
+	for (i = 0; i < RAVG_HIST_SIZE_MAX; ++i)
+		p->ravg.sum_history[i] = init_load_windows;
+}
diff --git a/kernel/sched/walt.h b/kernel/sched/walt.h
new file mode 100644
index 0000000..f56c4da
--- /dev/null
+++ b/kernel/sched/walt.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __WALT_H
+#define __WALT_H
+
+#ifdef CONFIG_SCHED_WALT
+
+void walt_update_task_ravg(struct task_struct *p, struct rq *rq, int event,
+		u64 wallclock, u64 irqtime);
+void walt_inc_cumulative_runnable_avg(struct rq *rq, struct task_struct *p);
+void walt_dec_cumulative_runnable_avg(struct rq *rq, struct task_struct *p);
+void walt_inc_cfs_cumulative_runnable_avg(struct cfs_rq *rq,
+		struct task_struct *p);
+void walt_dec_cfs_cumulative_runnable_avg(struct cfs_rq *rq,
+		struct task_struct *p);
+void walt_fixup_busy_time(struct task_struct *p, int new_cpu);
+void walt_init_new_task_load(struct task_struct *p);
+void walt_mark_task_starting(struct task_struct *p);
+void walt_set_window_start(struct rq *rq);
+void walt_migrate_sync_cpu(int cpu);
+void walt_init_cpu_efficiency(void);
+u64 walt_ktime_clock(void);
+void walt_account_irqtime(int cpu, struct task_struct *curr, u64 delta,
+                                  u64 wallclock);
+
+u64 walt_irqload(int cpu);
+int walt_cpu_high_irqload(int cpu);
+
+#else /* CONFIG_SCHED_WALT */
+
+static inline void walt_update_task_ravg(struct task_struct *p, struct rq *rq,
+		int event, u64 wallclock, u64 irqtime) { }
+static inline void walt_inc_cumulative_runnable_avg(struct rq *rq, struct task_struct *p) { }
+static inline void walt_dec_cumulative_runnable_avg(struct rq *rq, struct task_struct *p) { }
+static inline void walt_inc_cfs_cumulative_runnable_avg(struct cfs_rq *rq,
+		struct task_struct *p) { }
+static inline void walt_dec_cfs_cumulative_runnable_avg(struct cfs_rq *rq,
+		struct task_struct *p) { }
+static inline void walt_fixup_busy_time(struct task_struct *p, int new_cpu) { }
+static inline void walt_init_new_task_load(struct task_struct *p) { }
+static inline void walt_mark_task_starting(struct task_struct *p) { }
+static inline void walt_set_window_start(struct rq *rq) { }
+static inline void walt_migrate_sync_cpu(int cpu) { }
+static inline void walt_init_cpu_efficiency(void) { }
+static inline u64 walt_ktime_clock(void) { return 0; }
+
+#define walt_cpu_high_irqload(cpu) false
+
+#endif /* CONFIG_SCHED_WALT */
+
+extern unsigned int walt_disabled;
+
+#endif
diff --git a/kernel/sys.c b/kernel/sys.c
index 6c4e9b5..2355957 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -41,6 +41,8 @@
 #include <linux/syscore_ops.h>
 #include <linux/version.h>
 #include <linux/ctype.h>
+#include <linux/mm.h>
+#include <linux/mempolicy.h>
 
 #include <linux/compat.h>
 #include <linux/syscalls.h>
@@ -2070,6 +2072,153 @@
 }
 #endif
 
+#ifdef CONFIG_MMU
+static int prctl_update_vma_anon_name(struct vm_area_struct *vma,
+		struct vm_area_struct **prev,
+		unsigned long start, unsigned long end,
+		const char __user *name_addr)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	int error = 0;
+	pgoff_t pgoff;
+
+	if (name_addr == vma_get_anon_name(vma)) {
+		*prev = vma;
+		goto out;
+	}
+
+	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
+	*prev = vma_merge(mm, *prev, start, end, vma->vm_flags, vma->anon_vma,
+				vma->vm_file, pgoff, vma_policy(vma),
+				vma->vm_userfaultfd_ctx, name_addr);
+	if (*prev) {
+		vma = *prev;
+		goto success;
+	}
+
+	*prev = vma;
+
+	if (start != vma->vm_start) {
+		error = split_vma(mm, vma, start, 1);
+		if (error)
+			goto out;
+	}
+
+	if (end != vma->vm_end) {
+		error = split_vma(mm, vma, end, 0);
+		if (error)
+			goto out;
+	}
+
+success:
+	if (!vma->vm_file)
+		vma->anon_name = name_addr;
+
+out:
+	if (error == -ENOMEM)
+		error = -EAGAIN;
+	return error;
+}
+
+static int prctl_set_vma_anon_name(unsigned long start, unsigned long end,
+			unsigned long arg)
+{
+	unsigned long tmp;
+	struct vm_area_struct *vma, *prev;
+	int unmapped_error = 0;
+	int error = -EINVAL;
+
+	/*
+	 * If the interval [start,end) covers some unmapped address
+	 * ranges, just ignore them, but return -ENOMEM at the end.
+	 * - this matches the handling in madvise.
+	 */
+	vma = find_vma_prev(current->mm, start, &prev);
+	if (vma && start > vma->vm_start)
+		prev = vma;
+
+	for (;;) {
+		/* Still start < end. */
+		error = -ENOMEM;
+		if (!vma)
+			return error;
+
+		/* Here start < (end|vma->vm_end). */
+		if (start < vma->vm_start) {
+			unmapped_error = -ENOMEM;
+			start = vma->vm_start;
+			if (start >= end)
+				return error;
+		}
+
+		/* Here vma->vm_start <= start < (end|vma->vm_end) */
+		tmp = vma->vm_end;
+		if (end < tmp)
+			tmp = end;
+
+		/* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
+		error = prctl_update_vma_anon_name(vma, &prev, start, tmp,
+				(const char __user *)arg);
+		if (error)
+			return error;
+		start = tmp;
+		if (prev && start < prev->vm_end)
+			start = prev->vm_end;
+		error = unmapped_error;
+		if (start >= end)
+			return error;
+		if (prev)
+			vma = prev->vm_next;
+		else	/* madvise_remove dropped mmap_sem */
+			vma = find_vma(current->mm, start);
+	}
+}
+
+static int prctl_set_vma(unsigned long opt, unsigned long start,
+		unsigned long len_in, unsigned long arg)
+{
+	struct mm_struct *mm = current->mm;
+	int error;
+	unsigned long len;
+	unsigned long end;
+
+	if (start & ~PAGE_MASK)
+		return -EINVAL;
+	len = (len_in + ~PAGE_MASK) & PAGE_MASK;
+
+	/* Check to see whether len was rounded up from small -ve to zero */
+	if (len_in && !len)
+		return -EINVAL;
+
+	end = start + len;
+	if (end < start)
+		return -EINVAL;
+
+	if (end == start)
+		return 0;
+
+	down_write(&mm->mmap_sem);
+
+	switch (opt) {
+	case PR_SET_VMA_ANON_NAME:
+		error = prctl_set_vma_anon_name(start, end, arg);
+		break;
+	default:
+		error = -EINVAL;
+	}
+
+	up_write(&mm->mmap_sem);
+
+	return error;
+}
+#else /* CONFIG_MMU */
+static int prctl_set_vma(unsigned long opt, unsigned long start,
+		unsigned long len_in, unsigned long arg)
+{
+	return -EINVAL;
+}
+#endif
+
 int __weak arch_prctl_spec_ctrl_get(struct task_struct *t, unsigned long which)
 {
 	return -EINVAL;
@@ -2289,6 +2438,9 @@
 			return -EINVAL;
 		error = arch_prctl_spec_ctrl_set(me, arg2, arg3);
 		break;
+	case PR_SET_VMA:
+		error = prctl_set_vma(arg2, arg3, arg4, arg5);
+		break;
 	default:
 		error = -EINVAL;
 		break;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5515d57..f33e30b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -309,6 +309,57 @@
 		.extra2		= &max_sched_granularity_ns,
 	},
 	{
+		.procname	= "sched_sync_hint_enable",
+		.data		= &sysctl_sched_sync_hint_enable,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#ifdef CONFIG_SCHED_WALT
+	{
+		.procname	= "sched_use_walt_cpu_util",
+		.data		= &sysctl_sched_use_walt_cpu_util,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "sched_use_walt_task_util",
+		.data		= &sysctl_sched_use_walt_task_util,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "sched_walt_init_task_load_pct",
+		.data		= &sysctl_sched_walt_init_task_load_pct,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "sched_walt_cpu_high_irqload",
+		.data		= &sysctl_sched_walt_cpu_high_irqload,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+	{
+		.procname	= "sched_initial_task_util",
+		.data		= &sysctl_sched_initial_task_util,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "sched_cstate_aware",
+		.data		= &sysctl_sched_cstate_aware,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
 		.procname	= "sched_wakeup_granularity_ns",
 		.data		= &sysctl_sched_wakeup_granularity,
 		.maxlen		= sizeof(unsigned int),
@@ -451,6 +502,21 @@
 		.extra1		= &one,
 	},
 #endif
+#ifdef CONFIG_SCHED_TUNE
+	{
+		.procname	= "sched_cfs_boost",
+		.data		= &sysctl_sched_cfs_boost,
+		.maxlen		= sizeof(sysctl_sched_cfs_boost),
+#ifdef CONFIG_CGROUP_SCHEDTUNE
+		.mode		= 0444,
+#else
+		.mode		= 0644,
+#endif
+		.proc_handler	= &sysctl_sched_cfs_boost_handler,
+		.extra1		= &zero,
+		.extra2		= &one_hundred,
+	},
+#endif
 #ifdef CONFIG_PROVE_LOCKING
 	{
 		.procname	= "prove_locking",
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index b6bebe2..65aafad 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1009,6 +1009,18 @@
 	return ts->sleep_length;
 }
 
+/**
+ * tick_nohz_get_idle_calls - return the current idle calls counter value
+ *
+ * Called from the schedutil frequency scaling governor in scheduler context.
+ */
+unsigned long tick_nohz_get_idle_calls(void)
+{
+	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+
+	return ts->idle_calls;
+}
+
 static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
 {
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index e24e1f0..fb182aa 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -444,6 +444,35 @@
 }
 EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
 
+/**
+ * ktime_get_boot_fast_ns - NMI safe and fast access to boot clock.
+ *
+ * To keep it NMI safe since we're accessing from tracing, we're not using a
+ * separate timekeeper with updates to monotonic clock and boot offset
+ * protected with seqlocks. This has the following minor side effects:
+ *
+ * (1) Its possible that a timestamp be taken after the boot offset is updated
+ * but before the timekeeper is updated. If this happens, the new boot offset
+ * is added to the old timekeeping making the clock appear to update slightly
+ * earlier:
+ *    CPU 0                                        CPU 1
+ *    timekeeping_inject_sleeptime64()
+ *    __timekeeping_inject_sleeptime(tk, delta);
+ *                                                 timestamp();
+ *    timekeeping_update(tk, TK_CLEAR_NTP...);
+ *
+ * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be
+ * partially updated.  Since the tk->offs_boot update is a rare event, this
+ * should be a rare occurrence which postprocessing should be able to handle.
+ */
+u64 notrace ktime_get_boot_fast_ns(void)
+{
+	struct timekeeper *tk = &tk_core.timekeeper;
+
+	return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot));
+}
+EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
+
 /* Suspend-time cycles value for halted fast timekeeper. */
 static cycle_t cycles_at_suspend;
 
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 2a96b06..da57689 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -72,6 +72,9 @@
 	select CONTEXT_SWITCH_TRACER
 	bool
 
+config GPU_TRACEPOINTS
+	bool
+
 config CONTEXT_SWITCH_TRACER
 	bool
 
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index e579808..b4eaf9c 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -67,6 +67,7 @@
 endif
 obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
 obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o
+obj-$(CONFIG_GPU_TRACEPOINTS) += gpu-traces.o
 
 obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
 
diff --git a/kernel/trace/gpu-traces.c b/kernel/trace/gpu-traces.c
new file mode 100644
index 0000000..a4b3f00
--- /dev/null
+++ b/kernel/trace/gpu-traces.c
@@ -0,0 +1,23 @@
+/*
+ * GPU tracepoints
+ *
+ * Copyright (C) 2013 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/gpu.h>
+
+EXPORT_TRACEPOINT_SYMBOL(gpu_sched_switch);
+EXPORT_TRACEPOINT_SYMBOL(gpu_job_enqueue);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d477393..d75011b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1125,6 +1125,7 @@
 	{ trace_clock,			"perf",		1 },
 	{ ktime_get_mono_fast_ns,	"mono",		1 },
 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
+	{ ktime_get_boot_fast_ns,	"boot",		1 },
 	ARCH_TRACE_CLOCKS
 };
 
@@ -1603,6 +1604,7 @@
 
 #define SAVED_CMDLINES_DEFAULT 128
 #define NO_CMDLINE_MAP UINT_MAX
+static unsigned saved_tgids[SAVED_CMDLINES_DEFAULT];
 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 struct saved_cmdlines_buffer {
 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
@@ -1841,7 +1843,7 @@
 	}
 
 	set_cmdline(idx, tsk->comm);
-
+	saved_tgids[idx] = tsk->tgid;
 	arch_spin_unlock(&trace_cmdline_lock);
 
 	return 1;
@@ -1884,6 +1886,25 @@
 	preempt_enable();
 }
 
+int trace_find_tgid(int pid)
+{
+	unsigned map;
+	int tgid;
+
+	preempt_disable();
+	arch_spin_lock(&trace_cmdline_lock);
+	map = savedcmd->map_pid_to_cmdline[pid];
+	if (map != NO_CMDLINE_MAP)
+		tgid = saved_tgids[map];
+	else
+		tgid = -1;
+
+	arch_spin_unlock(&trace_cmdline_lock);
+	preempt_enable();
+
+	return tgid;
+}
+
 void tracing_record_cmdline(struct task_struct *tsk)
 {
 	if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
@@ -2937,6 +2958,13 @@
 		    "#              | |       |          |         |\n");
 }
 
+static void print_func_help_header_tgid(struct trace_buffer *buf, struct seq_file *m)
+{
+	print_event_info(buf, m);
+	seq_puts(m, "#           TASK-PID    TGID   CPU#      TIMESTAMP  FUNCTION\n");
+	seq_puts(m, "#              | |        |      |          |         |\n");
+}
+
 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
 {
 	print_event_info(buf, m);
@@ -2949,6 +2977,18 @@
 		    "#              | |       |   ||||       |         |\n");
 }
 
+static void print_func_help_header_irq_tgid(struct trace_buffer *buf, struct seq_file *m)
+{
+	print_event_info(buf, m);
+	seq_puts(m, "#                                      _-----=> irqs-off\n");
+	seq_puts(m, "#                                     / _----=> need-resched\n");
+	seq_puts(m, "#                                    | / _---=> hardirq/softirq\n");
+	seq_puts(m, "#                                    || / _--=> preempt-depth\n");
+	seq_puts(m, "#                                    ||| /     delay\n");
+	seq_puts(m, "#           TASK-PID    TGID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
+	seq_puts(m, "#              | |        |      |   ||||       |         |\n");
+}
+
 void
 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
 {
@@ -3262,9 +3302,15 @@
 	} else {
 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
 			if (trace_flags & TRACE_ITER_IRQ_INFO)
-				print_func_help_header_irq(iter->trace_buffer, m);
+				if (trace_flags & TRACE_ITER_TGID)
+					print_func_help_header_irq_tgid(iter->trace_buffer, m);
+				else
+					print_func_help_header_irq(iter->trace_buffer, m);
 			else
-				print_func_help_header(iter->trace_buffer, m);
+				if (trace_flags & TRACE_ITER_TGID)
+					print_func_help_header_tgid(iter->trace_buffer, m);
+				else
+					print_func_help_header(iter->trace_buffer, m);
 		}
 	}
 }
@@ -4597,6 +4643,50 @@
 }
 
 static ssize_t
+tracing_saved_tgids_read(struct file *file, char __user *ubuf,
+				size_t cnt, loff_t *ppos)
+{
+	char *file_buf;
+	char *buf;
+	int len = 0;
+	int pid;
+	int i;
+
+	file_buf = kmalloc(SAVED_CMDLINES_DEFAULT*(16+1+16), GFP_KERNEL);
+	if (!file_buf)
+		return -ENOMEM;
+
+	buf = file_buf;
+
+	for (i = 0; i < SAVED_CMDLINES_DEFAULT; i++) {
+		int tgid;
+		int r;
+
+		pid = savedcmd->map_cmdline_to_pid[i];
+		if (pid == -1 || pid == NO_CMDLINE_MAP)
+			continue;
+
+		tgid = trace_find_tgid(pid);
+		r = sprintf(buf, "%d %d\n", pid, tgid);
+		buf += r;
+		len += r;
+	}
+
+	len = simple_read_from_buffer(ubuf, cnt, ppos,
+				      file_buf, len);
+
+	kfree(file_buf);
+
+	return len;
+}
+
+static const struct file_operations tracing_saved_tgids_fops = {
+	.open	= tracing_open_generic,
+	.read	= tracing_saved_tgids_read,
+	.llseek	= generic_file_llseek,
+};
+
+static ssize_t
 tracing_set_trace_read(struct file *filp, char __user *ubuf,
 		       size_t cnt, loff_t *ppos)
 {
@@ -7229,6 +7319,9 @@
 	trace_create_file("trace_marker", 0220, d_tracer,
 			  tr, &tracing_mark_fops);
 
+	trace_create_file("saved_tgids", 0444, d_tracer,
+			  tr, &tracing_saved_tgids_fops);
+
 	trace_create_file("trace_clock", 0644, d_tracer, tr,
 			  &trace_clock_fops);
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index b0d8576..d4c6633 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -690,6 +690,7 @@
 
 extern void trace_find_cmdline(int pid, char comm[]);
 extern void trace_event_follow_fork(struct trace_array *tr, bool enable);
+extern int trace_find_tgid(int pid);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 extern unsigned long ftrace_update_tot_cnt;
@@ -1009,7 +1010,8 @@
 		FUNCTION_FLAGS					\
 		FGRAPH_FLAGS					\
 		STACK_FLAGS					\
-		BRANCH_FLAGS
+		BRANCH_FLAGS					\
+		C(TGID,			"print-tgid"),
 
 /*
  * By defining C, we can make TRACE_FLAGS a list of bit names
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 01e7181..7461d51 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -65,6 +65,9 @@
 
 #define TRACE_GRAPH_INDENT	2
 
+/* Flag options */
+#define TRACE_GRAPH_PRINT_FLAT		0x80
+
 static unsigned int max_depth;
 
 static struct tracer_opt trace_opts[] = {
@@ -88,6 +91,8 @@
 	{ TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) },
 	/* Include time within nested functions */
 	{ TRACER_OPT(graph-time, TRACE_GRAPH_GRAPH_TIME) },
+	/* Use standard trace formatting rather than hierarchical */
+	{ TRACER_OPT(funcgraph-flat, TRACE_GRAPH_PRINT_FLAT) },
 	{ } /* Empty entry */
 };
 
@@ -1246,6 +1251,9 @@
 	int cpu = iter->cpu;
 	int ret;
 
+	if (flags & TRACE_GRAPH_PRINT_FLAT)
+		return TRACE_TYPE_UNHANDLED;
+
 	if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) {
 		per_cpu_ptr(data->cpu_data, cpu)->ignore = 0;
 		return TRACE_TYPE_HANDLED;
@@ -1303,13 +1311,6 @@
 	return print_graph_function_flags(iter, tracer_flags.val);
 }
 
-static enum print_line_t
-print_graph_function_event(struct trace_iterator *iter, int flags,
-			   struct trace_event *event)
-{
-	return print_graph_function(iter);
-}
-
 static void print_lat_header(struct seq_file *s, u32 flags)
 {
 	static const char spaces[] = "                "	/* 16 spaces */
@@ -1378,6 +1379,11 @@
 	struct trace_iterator *iter = s->private;
 	struct trace_array *tr = iter->tr;
 
+	if (flags & TRACE_GRAPH_PRINT_FLAT) {
+		trace_default_header(s);
+		return;
+	}
+
 	if (!(tr->trace_flags & TRACE_ITER_CONTEXT_INFO))
 		return;
 
@@ -1459,19 +1465,6 @@
 	return 0;
 }
 
-static struct trace_event_functions graph_functions = {
-	.trace		= print_graph_function_event,
-};
-
-static struct trace_event graph_trace_entry_event = {
-	.type		= TRACE_GRAPH_ENT,
-	.funcs		= &graph_functions,
-};
-
-static struct trace_event graph_trace_ret_event = {
-	.type		= TRACE_GRAPH_RET,
-	.funcs		= &graph_functions
-};
 
 static struct tracer graph_trace __tracer_data = {
 	.name		= "function_graph",
@@ -1548,16 +1541,6 @@
 {
 	max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1);
 
-	if (!register_trace_event(&graph_trace_entry_event)) {
-		pr_warn("Warning: could not register graph trace events\n");
-		return 1;
-	}
-
-	if (!register_trace_event(&graph_trace_ret_event)) {
-		pr_warn("Warning: could not register graph trace events\n");
-		return 1;
-	}
-
 	return register_tracer(&graph_trace);
 }
 
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 3fc2042..0346759 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -530,11 +530,21 @@
 	unsigned long long t;
 	unsigned long secs, usec_rem;
 	char comm[TASK_COMM_LEN];
+	int tgid;
 
 	trace_find_cmdline(entry->pid, comm);
 
-	trace_seq_printf(s, "%16s-%-5d [%03d] ",
-			       comm, entry->pid, iter->cpu);
+	trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
+
+	if (tr->trace_flags & TRACE_ITER_TGID) {
+		tgid = trace_find_tgid(entry->pid);
+		if (tgid < 0)
+			trace_seq_puts(s, "(-----) ");
+		else
+			trace_seq_printf(s, "(%5d) ", tgid);
+	}
+
+	trace_seq_printf(s, "[%03d] ", iter->cpu);
 
 	if (tr->trace_flags & TRACE_ITER_IRQ_INFO)
 		trace_print_lat_fmt(s, entry);
@@ -849,6 +859,174 @@
 	.funcs		= &trace_fn_funcs,
 };
 
+/* TRACE_GRAPH_ENT */
+static enum print_line_t trace_graph_ent_trace(struct trace_iterator *iter, int flags,
+					struct trace_event *event)
+{
+	struct trace_seq *s = &iter->seq;
+	struct ftrace_graph_ent_entry *field;
+
+	trace_assign_type(field, iter->ent);
+
+	trace_seq_puts(s, "graph_ent: func=");
+	if (trace_seq_has_overflowed(s))
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	if (!seq_print_ip_sym(s, field->graph_ent.func, flags))
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	trace_seq_puts(s, "\n");
+	if (trace_seq_has_overflowed(s))
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_graph_ent_raw(struct trace_iterator *iter, int flags,
+				      struct trace_event *event)
+{
+	struct ftrace_graph_ent_entry *field;
+
+	trace_assign_type(field, iter->ent);
+
+	trace_seq_printf(&iter->seq, "%lx %d\n",
+			      field->graph_ent.func,
+			      field->graph_ent.depth);
+	if (trace_seq_has_overflowed(&iter->seq))
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_graph_ent_hex(struct trace_iterator *iter, int flags,
+				      struct trace_event *event)
+{
+	struct ftrace_graph_ent_entry *field;
+	struct trace_seq *s = &iter->seq;
+
+	trace_assign_type(field, iter->ent);
+
+	SEQ_PUT_HEX_FIELD(s, field->graph_ent.func);
+	SEQ_PUT_HEX_FIELD(s, field->graph_ent.depth);
+
+	return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_graph_ent_bin(struct trace_iterator *iter, int flags,
+				      struct trace_event *event)
+{
+	struct ftrace_graph_ent_entry *field;
+	struct trace_seq *s = &iter->seq;
+
+	trace_assign_type(field, iter->ent);
+
+	SEQ_PUT_FIELD(s, field->graph_ent.func);
+	SEQ_PUT_FIELD(s, field->graph_ent.depth);
+
+	return TRACE_TYPE_HANDLED;
+}
+
+static struct trace_event_functions trace_graph_ent_funcs = {
+	.trace		= trace_graph_ent_trace,
+	.raw		= trace_graph_ent_raw,
+	.hex		= trace_graph_ent_hex,
+	.binary		= trace_graph_ent_bin,
+};
+
+static struct trace_event trace_graph_ent_event = {
+	.type		= TRACE_GRAPH_ENT,
+	.funcs		= &trace_graph_ent_funcs,
+};
+
+/* TRACE_GRAPH_RET */
+static enum print_line_t trace_graph_ret_trace(struct trace_iterator *iter, int flags,
+					struct trace_event *event)
+{
+	struct trace_seq *s = &iter->seq;
+	struct trace_entry *entry = iter->ent;
+	struct ftrace_graph_ret_entry *field;
+
+	trace_assign_type(field, entry);
+
+	trace_seq_puts(s, "graph_ret: func=");
+	if (trace_seq_has_overflowed(s))
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	if (!seq_print_ip_sym(s, field->ret.func, flags))
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	trace_seq_puts(s, "\n");
+	if (trace_seq_has_overflowed(s))
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_graph_ret_raw(struct trace_iterator *iter, int flags,
+				      struct trace_event *event)
+{
+	struct ftrace_graph_ret_entry *field;
+
+	trace_assign_type(field, iter->ent);
+
+	trace_seq_printf(&iter->seq, "%lx %lld %lld %ld %d\n",
+			      field->ret.func,
+			      field->ret.calltime,
+			      field->ret.rettime,
+			      field->ret.overrun,
+			      field->ret.depth);
+	if (trace_seq_has_overflowed(&iter->seq))
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_graph_ret_hex(struct trace_iterator *iter, int flags,
+				      struct trace_event *event)
+{
+	struct ftrace_graph_ret_entry *field;
+	struct trace_seq *s = &iter->seq;
+
+	trace_assign_type(field, iter->ent);
+
+	SEQ_PUT_HEX_FIELD(s, field->ret.func);
+	SEQ_PUT_HEX_FIELD(s, field->ret.calltime);
+	SEQ_PUT_HEX_FIELD(s, field->ret.rettime);
+	SEQ_PUT_HEX_FIELD(s, field->ret.overrun);
+	SEQ_PUT_HEX_FIELD(s, field->ret.depth);
+
+	return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_graph_ret_bin(struct trace_iterator *iter, int flags,
+				      struct trace_event *event)
+{
+	struct ftrace_graph_ret_entry *field;
+	struct trace_seq *s = &iter->seq;
+
+	trace_assign_type(field, iter->ent);
+
+	SEQ_PUT_FIELD(s, field->ret.func);
+	SEQ_PUT_FIELD(s, field->ret.calltime);
+	SEQ_PUT_FIELD(s, field->ret.rettime);
+	SEQ_PUT_FIELD(s, field->ret.overrun);
+	SEQ_PUT_FIELD(s, field->ret.depth);
+
+	return TRACE_TYPE_HANDLED;
+}
+
+static struct trace_event_functions trace_graph_ret_funcs = {
+	.trace		= trace_graph_ret_trace,
+	.raw		= trace_graph_ret_raw,
+	.hex		= trace_graph_ret_hex,
+	.binary		= trace_graph_ret_bin,
+};
+
+static struct trace_event trace_graph_ret_event = {
+	.type		= TRACE_GRAPH_RET,
+	.funcs		= &trace_graph_ret_funcs,
+};
+
 /* TRACE_CTX an TRACE_WAKE */
 static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
 					     char *delim)
@@ -1291,6 +1469,8 @@
 
 static struct trace_event *events[] __initdata = {
 	&trace_fn_event,
+	&trace_graph_ent_event,
+	&trace_graph_ret_event,
 	&trace_ctx_event,
 	&trace_wake_event,
 	&trace_stack_event,
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 58a22ca..ba7670c 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -922,6 +922,15 @@
 	bool
 	default n
 
+config PANIC_ON_RT_THROTTLING
+	bool "Panic on RT throttling"
+	help
+	  Say Y here to enable the kernel to panic when a realtime
+	  runqueue is throttled. This may be useful for detecting
+	  and debugging RT throttling issues.
+
+	  Say N if unsure.
+
 config SCHEDSTATS
 	bool "Collect scheduler statistics"
 	depends on DEBUG_KERNEL && PROC_FS
diff --git a/mm/madvise.c b/mm/madvise.c
index 4a01c4b..6849e4e 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -109,7 +109,7 @@
 	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
 	*prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma,
 			  vma->vm_file, pgoff, vma_policy(vma),
-			  vma->vm_userfaultfd_ctx);
+			  vma->vm_userfaultfd_ctx, vma_get_anon_name(vma));
 	if (*prev) {
 		vma = *prev;
 		goto success;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 5cb5147..c1694ae 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -762,7 +762,8 @@
 			((vmstart - vma->vm_start) >> PAGE_SHIFT);
 		prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags,
 				 vma->anon_vma, vma->vm_file, pgoff,
-				 new_pol, vma->vm_userfaultfd_ctx);
+				 new_pol, vma->vm_userfaultfd_ctx,
+				 vma_get_anon_name(vma));
 		if (prev) {
 			vma = prev;
 			next = vma->vm_next;
diff --git a/mm/mlock.c b/mm/mlock.c
index f050569..9cdd063 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -529,7 +529,7 @@
 	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
 	*prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
 			  vma->vm_file, pgoff, vma_policy(vma),
-			  vma->vm_userfaultfd_ctx);
+			  vma->vm_userfaultfd_ctx, vma_get_anon_name(vma));
 	if (*prev) {
 		vma = *prev;
 		goto success;
diff --git a/mm/mmap.c b/mm/mmap.c
index 3f2314a..6441ef8 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -970,7 +970,8 @@
  */
 static inline int is_mergeable_vma(struct vm_area_struct *vma,
 				struct file *file, unsigned long vm_flags,
-				struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
+				struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
+				const char __user *anon_name)
 {
 	/*
 	 * VM_SOFTDIRTY should not prevent from VMA merging, if we
@@ -988,6 +989,8 @@
 		return 0;
 	if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
 		return 0;
+	if (vma_get_anon_name(vma) != anon_name)
+		return 0;
 	return 1;
 }
 
@@ -1020,9 +1023,10 @@
 can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
 		     struct anon_vma *anon_vma, struct file *file,
 		     pgoff_t vm_pgoff,
-		     struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
+		     struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
+		     const char __user *anon_name)
 {
-	if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
+	if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
 	    is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
 		if (vma->vm_pgoff == vm_pgoff)
 			return 1;
@@ -1041,9 +1045,10 @@
 can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
 		    struct anon_vma *anon_vma, struct file *file,
 		    pgoff_t vm_pgoff,
-		    struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
+		    struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
+		    const char __user *anon_name)
 {
-	if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
+	if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
 	    is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
 		pgoff_t vm_pglen;
 		vm_pglen = vma_pages(vma);
@@ -1054,9 +1059,9 @@
 }
 
 /*
- * Given a mapping request (addr,end,vm_flags,file,pgoff), figure out
- * whether that can be merged with its predecessor or its successor.
- * Or both (it neatly fills a hole).
+ * Given a mapping request (addr,end,vm_flags,file,pgoff,anon_name),
+ * figure out whether that can be merged with its predecessor or its
+ * successor.  Or both (it neatly fills a hole).
  *
  * In most cases - when called for mmap, brk or mremap - [addr,end) is
  * certain not to be mapped by the time vma_merge is called; but when
@@ -1098,7 +1103,8 @@
 			unsigned long end, unsigned long vm_flags,
 			struct anon_vma *anon_vma, struct file *file,
 			pgoff_t pgoff, struct mempolicy *policy,
-			struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
+			struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
+			const char __user *anon_name)
 {
 	pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
 	struct vm_area_struct *area, *next;
@@ -1131,7 +1137,8 @@
 			mpol_equal(vma_policy(prev), policy) &&
 			can_vma_merge_after(prev, vm_flags,
 					    anon_vma, file, pgoff,
-					    vm_userfaultfd_ctx)) {
+					    vm_userfaultfd_ctx,
+					    anon_name)) {
 		/*
 		 * OK, it can.  Can we now merge in the successor as well?
 		 */
@@ -1140,7 +1147,8 @@
 				can_vma_merge_before(next, vm_flags,
 						     anon_vma, file,
 						     pgoff+pglen,
-						     vm_userfaultfd_ctx) &&
+						     vm_userfaultfd_ctx,
+						     anon_name) &&
 				is_mergeable_anon_vma(prev->anon_vma,
 						      next->anon_vma, NULL)) {
 							/* cases 1, 6 */
@@ -1163,7 +1171,8 @@
 			mpol_equal(policy, vma_policy(next)) &&
 			can_vma_merge_before(next, vm_flags,
 					     anon_vma, file, pgoff+pglen,
-					     vm_userfaultfd_ctx)) {
+					     vm_userfaultfd_ctx,
+					     anon_name)) {
 		if (prev && addr < prev->vm_end)	/* case 4 */
 			err = __vma_adjust(prev, prev->vm_start,
 					 addr, prev->vm_pgoff, NULL, next);
@@ -1673,7 +1682,7 @@
 	 * Can we just expand an old mapping?
 	 */
 	vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
-			NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX);
+			NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
 	if (vma)
 		goto out;
 
@@ -2732,6 +2741,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(do_munmap);
 
 int vm_munmap(unsigned long start, size_t len)
 {
@@ -2921,7 +2931,7 @@
 
 	/* Can we just expand an old private anonymous mapping? */
 	vma = vma_merge(mm, prev, addr, addr + len, flags,
-			NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX);
+			NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
 	if (vma)
 		goto out;
 
@@ -3089,7 +3099,7 @@
 		return NULL;	/* should never get here */
 	new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
 			    vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
-			    vma->vm_userfaultfd_ctx);
+			    vma->vm_userfaultfd_ctx, vma_get_anon_name(vma));
 	if (new_vma) {
 		/*
 		 * Source vma may have been merged into new_vma
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 6896f77..5471f35 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -352,7 +352,7 @@
 	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
 	*pprev = vma_merge(mm, *pprev, start, end, newflags,
 			   vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
-			   vma->vm_userfaultfd_ctx);
+			   vma->vm_userfaultfd_ctx, vma_get_anon_name(vma));
 	if (*pprev) {
 		vma = *pprev;
 		VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY);
diff --git a/mm/shmem.c b/mm/shmem.c
index 9442424..793a8cf 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -4087,6 +4087,14 @@
 }
 EXPORT_SYMBOL_GPL(shmem_file_setup);
 
+void shmem_set_file(struct vm_area_struct *vma, struct file *file)
+{
+	if (vma->vm_file)
+		fput(vma->vm_file);
+	vma->vm_file = file;
+	vma->vm_ops = &shmem_vm_ops;
+}
+
 /**
  * shmem_zero_setup - setup a shared anonymous mapping
  * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
@@ -4106,10 +4114,7 @@
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	if (vma->vm_file)
-		fput(vma->vm_file);
-	vma->vm_file = file;
-	vma->vm_ops = &shmem_vm_ops;
+	shmem_set_file(vma, file);
 
 	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) &&
 			((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
diff --git a/net/Kconfig b/net/Kconfig
index 7b6cd34..28071fc 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -89,6 +89,12 @@
 
 endif # if INET
 
+config ANDROID_PARANOID_NETWORK
+	bool "Only allow certain groups to create sockets"
+	default y
+	help
+		none
+
 config NETWORK_SECMARK
 	bool "Security Marking"
 	help
@@ -258,10 +264,6 @@
 config HWBM
        bool
 
-config SOCK_CGROUP_DATA
-	bool
-	default n
-
 config CGROUP_NET_PRIO
 	bool "Network priority cgroup"
 	depends on CGROUPS
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 5d369817..ec313c9 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -106,11 +106,40 @@
 }
 EXPORT_SYMBOL(bt_sock_unregister);
 
+#ifdef CONFIG_PARANOID_NETWORK
+static inline int current_has_bt_admin(void)
+{
+	return !current_euid();
+}
+
+static inline int current_has_bt(void)
+{
+	return current_has_bt_admin();
+}
+# else
+static inline int current_has_bt_admin(void)
+{
+	return 1;
+}
+
+static inline int current_has_bt(void)
+{
+	return 1;
+}
+#endif
+
 static int bt_sock_create(struct net *net, struct socket *sock, int proto,
 			  int kern)
 {
 	int err;
 
+	if (proto == BTPROTO_RFCOMM || proto == BTPROTO_SCO ||
+			proto == BTPROTO_L2CAP) {
+		if (!current_has_bt())
+			return -EPERM;
+	} else if (!current_has_bt_admin())
+		return -EPERM;
+
 	if (net != &init_net)
 		return -EAFNOSUPPORT;
 
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 5f5e28f..04eea2f 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -48,11 +48,6 @@
 		return NETDEV_TX_OK;
 	}
 
-	u64_stats_update_begin(&brstats->syncp);
-	brstats->tx_packets++;
-	brstats->tx_bytes += skb->len;
-	u64_stats_update_end(&brstats->syncp);
-
 #ifdef CONFIG_NET_SWITCHDEV
 	skb->offload_fwd_mark = 0;
 #endif
@@ -61,6 +56,12 @@
 	skb_reset_mac_header(skb);
 	skb_pull(skb, ETH_HLEN);
 
+	u64_stats_update_begin(&brstats->syncp);
+	brstats->tx_packets++;
+	/* Exclude ETH_HLEN from byte stats for consistency with Rx chain */
+	brstats->tx_bytes += skb->len;
+	u64_stats_update_end(&brstats->syncp);
+
 	if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid))
 		goto out;
 
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index be4629c3..b6791d9 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -18,6 +18,11 @@
 #include <net/fib_rules.h>
 #include <net/ip_tunnels.h>
 
+static const struct fib_kuid_range fib_kuid_range_unset = {
+	KUIDT_INIT(0),
+	KUIDT_INIT(~0),
+};
+
 int fib_default_rule_add(struct fib_rules_ops *ops,
 			 u32 pref, u32 table, u32 flags)
 {
@@ -33,6 +38,7 @@
 	r->table = table;
 	r->flags = flags;
 	r->fr_net = ops->fro_net;
+	r->uid_range = fib_kuid_range_unset;
 
 	r->suppress_prefixlen = -1;
 	r->suppress_ifgroup = -1;
@@ -172,6 +178,34 @@
 }
 EXPORT_SYMBOL_GPL(fib_rules_unregister);
 
+static int uid_range_set(struct fib_kuid_range *range)
+{
+	return uid_valid(range->start) && uid_valid(range->end);
+}
+
+static struct fib_kuid_range nla_get_kuid_range(struct nlattr **tb)
+{
+	struct fib_rule_uid_range *in;
+	struct fib_kuid_range out;
+
+	in = (struct fib_rule_uid_range *)nla_data(tb[FRA_UID_RANGE]);
+
+	out.start = make_kuid(current_user_ns(), in->start);
+	out.end = make_kuid(current_user_ns(), in->end);
+
+	return out;
+}
+
+static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range)
+{
+	struct fib_rule_uid_range out = {
+		from_kuid_munged(current_user_ns(), range->start),
+		from_kuid_munged(current_user_ns(), range->end)
+	};
+
+	return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out);
+}
+
 static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
 			  struct flowi *fl, int flags,
 			  struct fib_lookup_arg *arg)
@@ -193,6 +227,10 @@
 	if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg))
 		goto out;
 
+	if (uid_lt(fl->flowi_uid, rule->uid_range.start) ||
+	    uid_gt(fl->flowi_uid, rule->uid_range.end))
+		goto out;
+
 	ret = ops->match(rule, fl, flags);
 out:
 	return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
@@ -305,6 +343,10 @@
 		if (r->l3mdev != rule->l3mdev)
 			continue;
 
+		if (!uid_eq(r->uid_range.start, rule->uid_range.start) ||
+		    !uid_eq(r->uid_range.end, rule->uid_range.end))
+			continue;
+
 		if (!ops->compare(r, frh, tb))
 			continue;
 		return 1;
@@ -429,6 +471,21 @@
 	if (rule->l3mdev && rule->table)
 		goto errout_free;
 
+	if (tb[FRA_UID_RANGE]) {
+		if (current_user_ns() != net->user_ns) {
+			err = -EPERM;
+			goto errout_free;
+		}
+
+		rule->uid_range = nla_get_kuid_range(tb);
+
+		if (!uid_range_set(&rule->uid_range) ||
+		    !uid_lte(rule->uid_range.start, rule->uid_range.end))
+			goto errout_free;
+	} else {
+		rule->uid_range = fib_kuid_range_unset;
+	}
+
 	if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
 	    rule_exists(ops, frh, tb, rule)) {
 		err = -EEXIST;
@@ -497,6 +554,7 @@
 	struct fib_rules_ops *ops = NULL;
 	struct fib_rule *rule, *tmp;
 	struct nlattr *tb[FRA_MAX+1];
+	struct fib_kuid_range range;
 	int err = -EINVAL;
 
 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
@@ -516,6 +574,14 @@
 	if (err < 0)
 		goto errout;
 
+	if (tb[FRA_UID_RANGE]) {
+		range = nla_get_kuid_range(tb);
+		if (!uid_range_set(&range))
+			goto errout;
+	} else {
+		range = fib_kuid_range_unset;
+	}
+
 	list_for_each_entry(rule, &ops->rules_list, list) {
 		if (frh->action && (frh->action != rule->action))
 			continue;
@@ -552,6 +618,11 @@
 		    (rule->l3mdev != nla_get_u8(tb[FRA_L3MDEV])))
 			continue;
 
+		if (uid_range_set(&range) &&
+		    (!uid_eq(rule->uid_range.start, range.start) ||
+		     !uid_eq(rule->uid_range.end, range.end)))
+			continue;
+
 		if (!ops->compare(rule, frh, tb))
 			continue;
 
@@ -619,7 +690,8 @@
 			 + nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */
 			 + nla_total_size(4) /* FRA_FWMARK */
 			 + nla_total_size(4) /* FRA_FWMASK */
-			 + nla_total_size_64bit(8); /* FRA_TUN_ID */
+			 + nla_total_size_64bit(8) /* FRA_TUN_ID */
+			 + nla_total_size(sizeof(struct fib_kuid_range));
 
 	if (ops->nlmsg_payload)
 		payload += ops->nlmsg_payload(rule);
@@ -679,7 +751,9 @@
 	    (rule->tun_id &&
 	     nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)) ||
 	    (rule->l3mdev &&
-	     nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)))
+	     nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) ||
+	    (uid_range_set(&rule->uid_range) &&
+	     nla_put_uid_range(skb, &rule->uid_range)))
 		goto nla_put_failure;
 
 	if (rule->suppress_ifgroup != -1) {
diff --git a/net/core/filter.c b/net/core/filter.c
index e8c89d2..c385c55 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -26,6 +26,7 @@
 #include <linux/mm.h>
 #include <linux/fcntl.h>
 #include <linux/socket.h>
+#include <linux/sock_diag.h>
 #include <linux/in.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
@@ -78,6 +79,10 @@
 	if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC))
 		return -ENOMEM;
 
+	err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
+	if (err)
+		return err;
+
 	err = security_sock_rcv_skb(sk, skb);
 	if (err)
 		return err;
@@ -85,7 +90,12 @@
 	rcu_read_lock();
 	filter = rcu_dereference(sk->sk_filter);
 	if (filter) {
-		unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
+		struct sock *save_sk = skb->sk;
+		unsigned int pkt_len;
+
+		skb->sk = sk;
+		pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
+		skb->sk = save_sk;
 		err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
 	}
 	rcu_read_unlock();
@@ -2533,6 +2543,36 @@
 	.arg5_type	= ARG_CONST_STACK_SIZE,
 };
 
+BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
+{
+	return skb->sk ? sock_gen_cookie(skb->sk) : 0;
+}
+
+static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
+	.func           = bpf_get_socket_cookie,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+};
+
+BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
+{
+	struct sock *sk = sk_to_full_sk(skb->sk);
+	kuid_t kuid;
+
+	if (!sk || !sk_fullsock(sk))
+		return overflowuid;
+	kuid = sock_net_uid(sock_net(sk), sk);
+	return from_kuid_munged(sock_net(sk)->user_ns, kuid);
+}
+
+static const struct bpf_func_proto bpf_get_socket_uid_proto = {
+	.func           = bpf_get_socket_uid,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+};
+
 static const struct bpf_func_proto *
 sk_filter_func_proto(enum bpf_func_id func_id)
 {
@@ -2554,6 +2594,10 @@
 	case BPF_FUNC_trace_printk:
 		if (capable(CAP_SYS_ADMIN))
 			return bpf_get_trace_printk_proto();
+	case BPF_FUNC_get_socket_cookie:
+		return &bpf_get_socket_cookie_proto;
+	case BPF_FUNC_get_socket_uid:
+		return &bpf_get_socket_uid_proto;
 	default:
 		return NULL;
 	}
@@ -2631,6 +2675,17 @@
 	}
 }
 
+static const struct bpf_func_proto *
+cg_skb_func_proto(enum bpf_func_id func_id)
+{
+	switch (func_id) {
+	case BPF_FUNC_skb_load_bytes:
+		return &bpf_skb_load_bytes_proto;
+	default:
+		return sk_filter_func_proto(func_id);
+	}
+}
+
 static bool __is_valid_access(int off, int size, enum bpf_access_type type)
 {
 	if (off < 0 || off >= sizeof(struct __sk_buff))
@@ -2993,6 +3048,12 @@
 	.convert_ctx_access	= xdp_convert_ctx_access,
 };
 
+static const struct bpf_verifier_ops cg_skb_ops = {
+	.get_func_proto		= cg_skb_func_proto,
+	.is_valid_access	= sk_filter_is_valid_access,
+	.convert_ctx_access	= sk_filter_convert_ctx_access,
+};
+
 static struct bpf_prog_type_list sk_filter_type __read_mostly = {
 	.ops	= &sk_filter_ops,
 	.type	= BPF_PROG_TYPE_SOCKET_FILTER,
@@ -3013,12 +3074,18 @@
 	.type	= BPF_PROG_TYPE_XDP,
 };
 
+static struct bpf_prog_type_list cg_skb_type __read_mostly = {
+	.ops	= &cg_skb_ops,
+	.type	= BPF_PROG_TYPE_CGROUP_SKB,
+};
+
 static int __init register_sk_filter_ops(void)
 {
 	bpf_register_prog_type(&sk_filter_type);
 	bpf_register_prog_type(&sched_cls_type);
 	bpf_register_prog_type(&sched_act_type);
 	bpf_register_prog_type(&xdp_type);
+	bpf_register_prog_type(&cg_skb_type);
 
 	return 0;
 }
diff --git a/net/core/sock.c b/net/core/sock.c
index 3041aa6..1d31d71 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1032,6 +1032,7 @@
 
 	union {
 		int val;
+		u64 val64;
 		struct linger ling;
 		struct timeval tm;
 	} v;
@@ -1262,6 +1263,13 @@
 		v.val = sk->sk_incoming_cpu;
 		break;
 
+
+	case SO_COOKIE:
+		lv = sizeof(u64);
+		if (len < lv)
+			return -EINVAL;
+		v.val64 = sock_gen_cookie(sk);
+		break;
 	default:
 		/* We implement the SO_SNDLOWAT etc to not be settable
 		 * (1003.1g 7).
@@ -2442,8 +2450,11 @@
 		sk->sk_type	=	sock->type;
 		sk->sk_wq	=	sock->wq;
 		sock->sk	=	sk;
-	} else
+		sk->sk_uid	=	SOCK_INODE(sock)->i_uid;
+	} else {
 		sk->sk_wq	=	NULL;
+		sk->sk_uid	=	make_kuid(sock_net(sk)->user_ns, 0);
+	}
 
 	rwlock_init(&sk->sk_callback_lock);
 	lockdep_set_class_and_name(&sk->sk_callback_lock,
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index d1d9faf..fb467db 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -19,7 +19,7 @@
 static DEFINE_MUTEX(sock_diag_table_mutex);
 static struct workqueue_struct *broadcast_wq;
 
-static u64 sock_gen_cookie(struct sock *sk)
+u64 sock_gen_cookie(struct sock *sk)
 {
 	while (1) {
 		u64 res = atomic64_read(&sk->sk_cookie);
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index bc6a6c8..a8b934a 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -16,6 +16,7 @@
 
 obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
 obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
+obj-$(CONFIG_SYSFS) += sysfs_net_ipv4.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
 obj-$(CONFIG_IP_MROUTE) += ipmr.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 689246d..0865b56 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -89,6 +89,7 @@
 #include <linux/netfilter_ipv4.h>
 #include <linux/random.h>
 #include <linux/slab.h>
+#include <linux/netfilter/xt_qtaguid.h>
 
 #include <asm/uaccess.h>
 
@@ -121,6 +122,19 @@
 #endif
 #include <net/l3mdev.h>
 
+#ifdef CONFIG_ANDROID_PARANOID_NETWORK
+#include <linux/android_aid.h>
+
+static inline int current_has_network(void)
+{
+	return in_egroup_p(AID_INET) || capable(CAP_NET_RAW);
+}
+#else
+static inline int current_has_network(void)
+{
+	return 1;
+}
+#endif
 
 /* The inetsw table contains everything that inet_create needs to
  * build a new socket.
@@ -255,6 +269,9 @@
 	if (protocol < 0 || protocol >= IPPROTO_MAX)
 		return -EINVAL;
 
+	if (!current_has_network())
+		return -EACCES;
+
 	sock->state = SS_UNCONNECTED;
 
 	/* Look for the requested type/protocol pair. */
@@ -303,8 +320,7 @@
 	}
 
 	err = -EPERM;
-	if (sock->type == SOCK_RAW && !kern &&
-	    !ns_capable(net->user_ns, CAP_NET_RAW))
+	if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
 		goto out_rcu_unlock;
 
 	sock->ops = answer->ops;
@@ -397,6 +413,9 @@
 	if (sk) {
 		long timeout;
 
+#ifdef CONFIG_NETFILTER_XT_MATCH_QTAGUID
+		qtaguid_untag(sock, true);
+#endif
 		/* Applications forget to leave groups before exiting */
 		ip_mc_drop_socket(sk);
 
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index cbe3fdb..8efc6a9 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -622,6 +622,7 @@
 	[RTA_FLOW]		= { .type = NLA_U32 },
 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
 	[RTA_ENCAP]		= { .type = NLA_NESTED },
+	[RTA_UID]		= { .type = NLA_U32 },
 };
 
 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 172d3df..8a94cbc 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -425,6 +425,7 @@
 	fl4.daddr = daddr;
 	fl4.saddr = saddr;
 	fl4.flowi4_mark = mark;
+	fl4.flowi4_uid = sock_net_uid(net, NULL);
 	fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
 	fl4.flowi4_proto = IPPROTO_ICMP;
 	fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev);
@@ -473,6 +474,7 @@
 		      param->replyopts.opt.opt.faddr : iph->saddr);
 	fl4->saddr = saddr;
 	fl4->flowi4_mark = mark;
+	fl4->flowi4_uid = sock_net_uid(net, NULL);
 	fl4->flowi4_tos = RT_TOS(tos);
 	fl4->flowi4_proto = IPPROTO_ICMP;
 	fl4->fl4_icmp_type = type;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 1bcbb73..7a8c065 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -418,7 +418,7 @@
 			   sk->sk_protocol, inet_sk_flowi_flags(sk),
 			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
 			   ireq->ir_loc_addr, ireq->ir_rmt_port,
-			   htons(ireq->ir_num));
+			   htons(ireq->ir_num), sk->sk_uid);
 	security_req_classify_flow(req, flowi4_to_flowi(fl4));
 	rt = ip_route_output_flow(net, fl4, sk);
 	if (IS_ERR(rt))
@@ -456,7 +456,7 @@
 			   sk->sk_protocol, inet_sk_flowi_flags(sk),
 			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
 			   ireq->ir_loc_addr, ireq->ir_rmt_port,
-			   htons(ireq->ir_num));
+			   htons(ireq->ir_num), sk->sk_uid);
 	security_req_classify_flow(req, flowi4_to_flowi(fl4));
 	rt = ip_route_output_flow(net, fl4, sk);
 	if (IS_ERR(rt))
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 100c86f..5fcafc8 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -74,6 +74,7 @@
 #include <net/checksum.h>
 #include <net/inetpeer.h>
 #include <net/lwtunnel.h>
+#include <linux/bpf-cgroup.h>
 #include <linux/igmp.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_bridge.h>
@@ -287,6 +288,13 @@
 static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	unsigned int mtu;
+	int ret;
+
+	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
+	if (ret) {
+		kfree_skb(skb);
+		return ret;
+	}
 
 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
 	/* Policy lookup after SNAT yielded a new policy */
@@ -305,6 +313,20 @@
 	return ip_finish_output2(net, sk, skb);
 }
 
+static int ip_mc_finish_output(struct net *net, struct sock *sk,
+			       struct sk_buff *skb)
+{
+	int ret;
+
+	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
+	if (ret) {
+		kfree_skb(skb);
+		return ret;
+	}
+
+	return dev_loopback_xmit(net, sk, skb);
+}
+
 int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct rtable *rt = skb_rtable(skb);
@@ -342,7 +364,7 @@
 			if (newskb)
 				NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
 					net, sk, newskb, NULL, newskb->dev,
-					dev_loopback_xmit);
+					ip_mc_finish_output);
 		}
 
 		/* Multicasts with ttl 0 must not go beyond the host */
@@ -358,7 +380,7 @@
 		if (newskb)
 			NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
 				net, sk, newskb, NULL, newskb->dev,
-				dev_loopback_xmit);
+				ip_mc_finish_output);
 	}
 
 	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
@@ -1600,7 +1622,8 @@
 			   RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
 			   ip_reply_arg_flowi_flags(arg),
 			   daddr, saddr,
-			   tcp_hdr(skb)->source, tcp_hdr(skb)->dest);
+			   tcp_hdr(skb)->source, tcp_hdr(skb)->dest,
+			   arg->uid);
 	security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
 	rt = ip_route_output_key(net, &fl4);
 	if (IS_ERR(rt))
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 9adcd4b..8fa153c 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -798,7 +798,8 @@
 
 	flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
 			   RT_SCOPE_UNIVERSE, sk->sk_protocol,
-			   inet_sk_flowi_flags(sk), faddr, saddr, 0, 0);
+			   inet_sk_flowi_flags(sk), faddr, saddr, 0, 0,
+			   sk->sk_uid);
 
 	security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
 	rt = ip_route_output_flow(net, &fl4, sk);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 59d8770..a860df2 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -612,7 +612,7 @@
 			   hdrincl ? IPPROTO_RAW : sk->sk_protocol,
 			   inet_sk_flowi_flags(sk) |
 			    (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
-			   daddr, saddr, 0, 0);
+			   daddr, saddr, 0, 0, sk->sk_uid);
 
 	if (!hdrincl) {
 		rfv.msg = msg;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c42fb23..0bb1e3f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -510,7 +510,8 @@
 }
 EXPORT_SYMBOL(__ip_select_ident);
 
-static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
+static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
+			     const struct sock *sk,
 			     const struct iphdr *iph,
 			     int oif, u8 tos,
 			     u8 prot, u32 mark, int flow_flags)
@@ -526,19 +527,21 @@
 	flowi4_init_output(fl4, oif, mark, tos,
 			   RT_SCOPE_UNIVERSE, prot,
 			   flow_flags,
-			   iph->daddr, iph->saddr, 0, 0);
+			   iph->daddr, iph->saddr, 0, 0,
+			   sock_net_uid(net, sk));
 }
 
 static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
 			       const struct sock *sk)
 {
+	const struct net *net = dev_net(skb->dev);
 	const struct iphdr *iph = ip_hdr(skb);
 	int oif = skb->dev->ifindex;
 	u8 tos = RT_TOS(iph->tos);
 	u8 prot = iph->protocol;
 	u32 mark = skb->mark;
 
-	__build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0);
+	__build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0);
 }
 
 static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
@@ -555,7 +558,7 @@
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
 			   inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
 			   inet_sk_flowi_flags(sk),
-			   daddr, inet->inet_saddr, 0, 0);
+			   daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
 	rcu_read_unlock();
 }
 
@@ -807,6 +810,7 @@
 	struct rtable *rt;
 	struct flowi4 fl4;
 	const struct iphdr *iph = (const struct iphdr *) skb->data;
+	struct net *net = dev_net(skb->dev);
 	int oif = skb->dev->ifindex;
 	u8 tos = RT_TOS(iph->tos);
 	u8 prot = iph->protocol;
@@ -814,7 +818,7 @@
 
 	rt = (struct rtable *) dst;
 
-	__build_flow_key(&fl4, sk, iph, oif, tos, prot, mark, 0);
+	__build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
 	__ip_do_redirect(rt, skb, &fl4, true);
 }
 
@@ -1038,7 +1042,7 @@
 	if (!mark)
 		mark = IP4_REPLY_MARK(net, skb->mark);
 
-	__build_flow_key(&fl4, NULL, iph, oif,
+	__build_flow_key(net, &fl4, NULL, iph, oif,
 			 RT_TOS(iph->tos), protocol, mark, flow_flags);
 	rt = __ip_route_output_key(net, &fl4);
 	if (!IS_ERR(rt)) {
@@ -1054,7 +1058,7 @@
 	struct flowi4 fl4;
 	struct rtable *rt;
 
-	__build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
+	__build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0);
 
 	if (!fl4.flowi4_mark)
 		fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
@@ -1073,6 +1077,7 @@
 	struct rtable *rt;
 	struct dst_entry *odst = NULL;
 	bool new = false;
+	struct net *net = sock_net(sk);
 
 	bh_lock_sock(sk);
 
@@ -1086,7 +1091,7 @@
 		goto out;
 	}
 
-	__build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
+	__build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
 
 	rt = (struct rtable *)odst;
 	if (odst->obsolete && !odst->ops->check(odst, 0)) {
@@ -1126,7 +1131,7 @@
 	struct flowi4 fl4;
 	struct rtable *rt;
 
-	__build_flow_key(&fl4, NULL, iph, oif,
+	__build_flow_key(net, &fl4, NULL, iph, oif,
 			 RT_TOS(iph->tos), protocol, mark, flow_flags);
 	rt = __ip_route_output_key(net, &fl4);
 	if (!IS_ERR(rt)) {
@@ -1141,9 +1146,10 @@
 	const struct iphdr *iph = (const struct iphdr *) skb->data;
 	struct flowi4 fl4;
 	struct rtable *rt;
+	struct net *net = sock_net(sk);
 
-	__build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
-	rt = __ip_route_output_key(sock_net(sk), &fl4);
+	__build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
+	rt = __ip_route_output_key(net, &fl4);
 	if (!IS_ERR(rt)) {
 		__ip_do_redirect(rt, skb, &fl4, false);
 		ip_rt_put(rt);
@@ -2531,6 +2537,11 @@
 	    nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
 		goto nla_put_failure;
 
+	if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
+	    nla_put_u32(skb, RTA_UID,
+			from_kuid_munged(current_user_ns(), fl4->flowi4_uid)))
+		goto nla_put_failure;
+
 	error = rt->dst.error;
 
 	if (rt_is_input_route(rt)) {
@@ -2583,6 +2594,7 @@
 	int mark;
 	struct sk_buff *skb;
 	u32 table_id = RT_TABLE_MAIN;
+	kuid_t uid;
 
 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
 	if (err < 0)
@@ -2610,6 +2622,10 @@
 	dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
 	iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
 	mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
+	if (tb[RTA_UID])
+		uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
+	else
+		uid = (iif ? INVALID_UID : current_uid());
 
 	memset(&fl4, 0, sizeof(fl4));
 	fl4.daddr = dst;
@@ -2617,6 +2633,7 @@
 	fl4.flowi4_tos = rtm->rtm_tos;
 	fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
 	fl4.flowi4_mark = mark;
+	fl4.flowi4_uid = uid;
 
 	if (iif) {
 		struct net_device *dev;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index b596c41..b59d9cd 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -378,7 +378,7 @@
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
 			   inet_sk_flowi_flags(sk),
 			   opt->srr ? opt->faddr : ireq->ir_rmt_addr,
-			   ireq->ir_loc_addr, th->source, th->dest);
+			   ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid);
 	security_req_classify_flow(req, flowi4_to_flowi(&fl4));
 	rt = ip_route_output_key(sock_net(sk), &fl4);
 	if (IS_ERR(rt)) {
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 024ab83..ce43dc7 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -152,6 +152,21 @@
 	return ret;
 }
 
+/* Validate changes from /proc interface. */
+static int proc_tcp_default_init_rwnd(struct ctl_table *ctl, int write,
+				      void __user *buffer,
+				      size_t *lenp, loff_t *ppos)
+{
+	int old_value = *(int *)ctl->data;
+	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+	int new_value = *(int *)ctl->data;
+
+	if (write && ret == 0 && (new_value < 3 || new_value > 100))
+		*(int *)ctl->data = old_value;
+
+	return ret;
+}
+
 static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
 				       void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -633,6 +648,13 @@
 		.proc_handler	= proc_dointvec_ms_jiffies,
 	},
 	{
+		.procname       = "tcp_default_init_rwnd",
+		.data           = &sysctl_tcp_default_init_rwnd,
+		.maxlen         = sizeof(int),
+		.mode           = 0644,
+		.proc_handler   = proc_tcp_default_init_rwnd
+	},
+	{
 		.procname	= "icmp_msgs_per_sec",
 		.data		= &sysctl_icmp_msgs_per_sec,
 		.maxlen		= sizeof(int),
diff --git a/net/ipv4/sysfs_net_ipv4.c b/net/ipv4/sysfs_net_ipv4.c
new file mode 100644
index 0000000..0cbbf10
--- /dev/null
+++ b/net/ipv4/sysfs_net_ipv4.c
@@ -0,0 +1,88 @@
+/*
+ * net/ipv4/sysfs_net_ipv4.c
+ *
+ * sysfs-based networking knobs (so we can, unlike with sysctl, control perms)
+ *
+ * Copyright (C) 2008 Google, Inc.
+ *
+ * Robert Love <rlove@google.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kobject.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
+#include <linux/init.h>
+#include <net/tcp.h>
+
+#define CREATE_IPV4_FILE(_name, _var) \
+static ssize_t _name##_show(struct kobject *kobj, \
+			    struct kobj_attribute *attr, char *buf) \
+{ \
+	return sprintf(buf, "%d\n", _var); \
+} \
+static ssize_t _name##_store(struct kobject *kobj, \
+			     struct kobj_attribute *attr, \
+			     const char *buf, size_t count) \
+{ \
+	int val, ret; \
+	ret = sscanf(buf, "%d", &val); \
+	if (ret != 1) \
+		return -EINVAL; \
+	if (val < 0) \
+		return -EINVAL; \
+	_var = val; \
+	return count; \
+} \
+static struct kobj_attribute _name##_attr = \
+	__ATTR(_name, 0644, _name##_show, _name##_store)
+
+CREATE_IPV4_FILE(tcp_wmem_min, sysctl_tcp_wmem[0]);
+CREATE_IPV4_FILE(tcp_wmem_def, sysctl_tcp_wmem[1]);
+CREATE_IPV4_FILE(tcp_wmem_max, sysctl_tcp_wmem[2]);
+
+CREATE_IPV4_FILE(tcp_rmem_min, sysctl_tcp_rmem[0]);
+CREATE_IPV4_FILE(tcp_rmem_def, sysctl_tcp_rmem[1]);
+CREATE_IPV4_FILE(tcp_rmem_max, sysctl_tcp_rmem[2]);
+
+static struct attribute *ipv4_attrs[] = {
+	&tcp_wmem_min_attr.attr,
+	&tcp_wmem_def_attr.attr,
+	&tcp_wmem_max_attr.attr,
+	&tcp_rmem_min_attr.attr,
+	&tcp_rmem_def_attr.attr,
+	&tcp_rmem_max_attr.attr,
+	NULL
+};
+
+static struct attribute_group ipv4_attr_group = {
+	.attrs = ipv4_attrs,
+};
+
+static __init int sysfs_ipv4_init(void)
+{
+	struct kobject *ipv4_kobject;
+	int ret;
+
+	ipv4_kobject = kobject_create_and_add("ipv4", kernel_kobj);
+	if (!ipv4_kobject)
+		return -ENOMEM;
+
+	ret = sysfs_create_group(ipv4_kobject, &ipv4_attr_group);
+	if (ret) {
+		kobject_put(ipv4_kobject);
+		return ret;
+	}
+
+	return 0;
+}
+
+subsys_initcall(sysfs_ipv4_init);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index cd4f13dd..471b0b5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -100,6 +100,7 @@
 int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
 int sysctl_tcp_early_retrans __read_mostly = 3;
 int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
+int sysctl_tcp_default_init_rwnd __read_mostly = TCP_INIT_CWND * 2;
 
 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
 #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 82c1064..877a90e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -696,6 +696,7 @@
 		     offsetof(struct inet_timewait_sock, tw_bound_dev_if));
 
 	arg.tos = ip_hdr(skb)->tos;
+	arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
 	local_bh_disable();
 	ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
 			      skb, &TCP_SKB_CB(skb)->header.h4.opt,
@@ -716,7 +717,7 @@
    outside socket context is ugly, certainly. What can I do?
  */
 
-static void tcp_v4_send_ack(struct net *net,
+static void tcp_v4_send_ack(const struct sock *sk,
 			    struct sk_buff *skb, u32 seq, u32 ack,
 			    u32 win, u32 tsval, u32 tsecr, int oif,
 			    struct tcp_md5sig_key *key,
@@ -731,6 +732,7 @@
 #endif
 			];
 	} rep;
+	struct net *net = sock_net(sk);
 	struct ip_reply_arg arg;
 
 	memset(&rep.th, 0, sizeof(struct tcphdr));
@@ -780,6 +782,7 @@
 	if (oif)
 		arg.bound_dev_if = oif;
 	arg.tos = tos;
+	arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
 	local_bh_disable();
 	ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
 			      skb, &TCP_SKB_CB(skb)->header.h4.opt,
@@ -795,7 +798,7 @@
 	struct inet_timewait_sock *tw = inet_twsk(sk);
 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 
-	tcp_v4_send_ack(sock_net(sk), skb,
+	tcp_v4_send_ack(sk, skb,
 			tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
 			tcp_time_stamp + tcptw->tw_ts_offset,
@@ -823,7 +826,7 @@
 	 * exception of <SYN> segments, MUST be right-shifted by
 	 * Rcv.Wind.Shift bits:
 	 */
-	tcp_v4_send_ack(sock_net(sk), skb, seq,
+	tcp_v4_send_ack(sk, skb, seq,
 			tcp_rsk(req)->rcv_nxt,
 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
 			tcp_time_stamp,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6f35cdd..5d88592 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -193,7 +193,7 @@
 	 * (RFC 3517, Section 4, NextSeg() rule (2)). Further place a
 	 * limit when mss is larger than 1460.
 	 */
-	u32 init_rwnd = TCP_INIT_CWND * 2;
+	u32 init_rwnd = sysctl_tcp_default_init_rwnd;
 
 	if (mss > 1460)
 		init_rwnd = max((1460 * init_rwnd) / mss, 2U);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5d4b5e0..bff1745 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1018,7 +1018,8 @@
 		flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
 				   RT_SCOPE_UNIVERSE, sk->sk_protocol,
 				   flow_flags,
-				   faddr, saddr, dport, inet->inet_sport);
+				   faddr, saddr, dport, inet->inet_sport,
+				   sk->sk_uid);
 
 		security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
 		rt = ip_route_output_flow(net, fl4, sk);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4ce7f91..6ba6a5d 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -223,9 +223,11 @@
 	.accept_ra_rtr_pref	= 1,
 	.rtr_probe_interval	= 60 * HZ,
 #ifdef CONFIG_IPV6_ROUTE_INFO
+	.accept_ra_rt_info_min_plen = 0,
 	.accept_ra_rt_info_max_plen = 0,
 #endif
 #endif
+	.accept_ra_rt_table	= 0,
 	.proxy_ndp		= 0,
 	.accept_source_route	= 0,	/* we do not accept RH0 by default. */
 	.disable_ipv6		= 0,
@@ -269,9 +271,11 @@
 	.accept_ra_rtr_pref	= 1,
 	.rtr_probe_interval	= 60 * HZ,
 #ifdef CONFIG_IPV6_ROUTE_INFO
+	.accept_ra_rt_info_min_plen = 0,
 	.accept_ra_rt_info_max_plen = 0,
 #endif
 #endif
+	.accept_ra_rt_table	= 0,
 	.proxy_ndp		= 0,
 	.accept_source_route	= 0,	/* we do not accept RH0 by default. */
 	.disable_ipv6		= 0,
@@ -2204,6 +2208,31 @@
 		ipv6_regen_rndid(idev);
 }
 
+u32 addrconf_rt_table(const struct net_device *dev, u32 default_table) {
+	/* Determines into what table to put autoconf PIO/RIO/default routes
+	 * learned on this device.
+	 *
+	 * - If 0, use the same table for every device. This puts routes into
+	 *   one of RT_TABLE_{PREFIX,INFO,DFLT} depending on the type of route
+	 *   (but note that these three are currently all equal to
+	 *   RT6_TABLE_MAIN).
+	 * - If > 0, use the specified table.
+	 * - If < 0, put routes into table dev->ifindex + (-rt_table).
+	 */
+	struct inet6_dev *idev = in6_dev_get(dev);
+	u32 table;
+	int sysctl = idev->cnf.accept_ra_rt_table;
+	if (sysctl == 0) {
+		table = default_table;
+	} else if (sysctl > 0) {
+		table = (u32) sysctl;
+	} else {
+		table = (unsigned) dev->ifindex + (-sysctl);
+	}
+	in6_dev_put(idev);
+	return table;
+}
+
 /*
  *	Add prefix route.
  */
@@ -2213,7 +2242,7 @@
 		      unsigned long expires, u32 flags)
 {
 	struct fib6_config cfg = {
-		.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX,
+		.fc_table = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_PREFIX),
 		.fc_metric = IP6_RT_PRIO_ADDRCONF,
 		.fc_ifindex = dev->ifindex,
 		.fc_expires = expires,
@@ -2246,7 +2275,7 @@
 	struct fib6_node *fn;
 	struct rt6_info *rt = NULL;
 	struct fib6_table *table;
-	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX;
+	u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_PREFIX);
 
 	table = fib6_get_table(dev_net(dev), tb_id);
 	if (!table)
@@ -4958,9 +4987,11 @@
 	array[DEVCONF_RTR_PROBE_INTERVAL] =
 		jiffies_to_msecs(cnf->rtr_probe_interval);
 #ifdef CONFIG_IPV6_ROUTE_INFO
+	array[DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN] = cnf->accept_ra_rt_info_min_plen;
 	array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
 #endif
 #endif
+	array[DEVCONF_ACCEPT_RA_RT_TABLE] = cnf->accept_ra_rt_table;
 	array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
 	array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route;
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
@@ -5933,6 +5964,13 @@
 	},
 #ifdef CONFIG_IPV6_ROUTE_INFO
 	{
+		.procname	= "accept_ra_rt_info_min_plen",
+		.data		= &ipv6_devconf.accept_ra_rt_info_min_plen,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
 		.procname	= "accept_ra_rt_info_max_plen",
 		.data		= &ipv6_devconf.accept_ra_rt_info_max_plen,
 		.maxlen		= sizeof(int),
@@ -5942,6 +5980,13 @@
 #endif
 #endif
 	{
+		.procname	= "accept_ra_rt_table",
+		.data		= &ipv6_devconf.accept_ra_rt_table,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
 		.procname	= "proxy_ndp",
 		.data		= &ipv6_devconf.proxy_ndp,
 		.maxlen		= sizeof(int),
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 8885dba..ef7c08b 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -65,6 +65,20 @@
 #include <asm/uaccess.h>
 #include <linux/mroute6.h>
 
+#ifdef CONFIG_ANDROID_PARANOID_NETWORK
+#include <linux/android_aid.h>
+
+static inline int current_has_network(void)
+{
+	return in_egroup_p(AID_INET) || capable(CAP_NET_RAW);
+}
+#else
+static inline int current_has_network(void)
+{
+	return 1;
+}
+#endif
+
 #include "ip6_offload.h"
 
 MODULE_AUTHOR("Cast of dozens");
@@ -121,6 +135,9 @@
 	if (protocol < 0 || protocol >= IPPROTO_MAX)
 		return -EINVAL;
 
+	if (!current_has_network())
+		return -EACCES;
+
 	/* Look for the requested type/protocol pair. */
 lookup_protocol:
 	err = -ESOCKTNOSUPPORT;
@@ -167,8 +184,7 @@
 	}
 
 	err = -EPERM;
-	if (sock->type == SOCK_RAW && !kern &&
-	    !ns_capable(net->user_ns, CAP_NET_RAW))
+	if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
 		goto out_rcu_unlock;
 
 	sock->ops = answer->ops;
@@ -695,6 +711,7 @@
 		fl6.flowi6_mark = sk->sk_mark;
 		fl6.fl6_dport = inet->inet_dport;
 		fl6.fl6_sport = inet->inet_sport;
+		fl6.flowi6_uid = sk->sk_uid;
 		security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
 		rcu_read_lock();
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 0edc44c..e742c4d 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -666,9 +666,10 @@
 		return 0;
 
 	if (type == NDISC_REDIRECT)
-		ip6_redirect(skb, net, skb->dev->ifindex, 0);
+		ip6_redirect(skb, net, skb->dev->ifindex, 0,
+			     sock_net_uid(net, NULL));
 	else
-		ip6_update_pmtu(skb, net, info, 0, 0);
+		ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
 	xfrm_state_put(x);
 
 	return 0;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 956af11..129b7a8 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -54,6 +54,7 @@
 	fl6->fl6_dport = inet->inet_dport;
 	fl6->fl6_sport = inet->inet_sport;
 	fl6->flowlabel = np->flow_label;
+	fl6->flowi6_uid = sk->sk_uid;
 
 	if (!fl6->flowi6_oif)
 		fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 6a924be..44a2010 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -478,9 +478,10 @@
 		return 0;
 
 	if (type == NDISC_REDIRECT)
-		ip6_redirect(skb, net, skb->dev->ifindex, 0);
+		ip6_redirect(skb, net, skb->dev->ifindex, 0,
+			     sock_net_uid(net, NULL));
 	else
-		ip6_update_pmtu(skb, net, info, 0, 0);
+		ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
 	xfrm_state_put(x);
 
 	return 0;
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 305e2ed..477692f 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -166,15 +166,15 @@
  * to explore inner IPv6 header, eg. ICMPv6 error messages.
  *
  * If target header is found, its offset is set in *offset and return protocol
- * number. Otherwise, return -1.
+ * number. Otherwise, return -ENOENT or -EBADMSG.
  *
  * If the first fragment doesn't contain the final protocol header or
  * NEXTHDR_NONE it is considered invalid.
  *
  * Note that non-1st fragment is special case that "the protocol number
  * of last header" is "next header" field in Fragment header. In this case,
- * *offset is meaningless and fragment offset is stored in *fragoff if fragoff
- * isn't NULL.
+ * *offset is meaningless. If fragoff is not NULL, the fragment offset is
+ * stored in *fragoff; if it is NULL, return -EINVAL.
  *
  * if flags is not NULL and it's a fragment, then the frag flag
  * IP6_FH_F_FRAG will be set. If it's an AH header, the
@@ -253,9 +253,12 @@
 				if (target < 0 &&
 				    ((!ipv6_ext_hdr(hp->nexthdr)) ||
 				     hp->nexthdr == NEXTHDR_NONE)) {
-					if (fragoff)
+					if (fragoff) {
 						*fragoff = _frag_off;
-					return hp->nexthdr;
+						return hp->nexthdr;
+					} else {
+						return -EINVAL;
+					}
 				}
 				if (!found)
 					return -ENOENT;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 2772004..17fa28f 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -92,9 +92,10 @@
 	struct net *net = dev_net(skb->dev);
 
 	if (type == ICMPV6_PKT_TOOBIG)
-		ip6_update_pmtu(skb, net, info, 0, 0);
+		ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
 	else if (type == NDISC_REDIRECT)
-		ip6_redirect(skb, net, skb->dev->ifindex, 0);
+		ip6_redirect(skb, net, skb->dev->ifindex, 0,
+			     sock_net_uid(net, NULL));
 
 	if (!(type & ICMPV6_INFOMSG_MASK))
 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
@@ -486,6 +487,7 @@
 	fl6.flowi6_oif = iif;
 	fl6.fl6_icmp_type = type;
 	fl6.fl6_icmp_code = code;
+	fl6.flowi6_uid = sock_net_uid(net, NULL);
 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 
 	sk = icmpv6_xmit_lock(net);
@@ -660,6 +662,7 @@
 	fl6.flowi6_oif = skb->dev->ifindex;
 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
 	fl6.flowi6_mark = mark;
+	fl6.flowi6_uid = sock_net_uid(net, NULL);
 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 
 	sk = icmpv6_xmit_lock(net);
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 798a095..10d1deb 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -88,6 +88,7 @@
 	fl6->flowi6_mark = ireq->ir_mark;
 	fl6->fl6_dport = ireq->ir_rmt_port;
 	fl6->fl6_sport = htons(ireq->ir_num);
+	fl6->flowi6_uid = sk->sk_uid;
 	security_req_classify_flow(req, flowi6_to_flowi(fl6));
 
 	dst = ip6_dst_lookup_flow(sk, fl6, final_p);
@@ -136,6 +137,7 @@
 	fl6->flowi6_mark = sk->sk_mark;
 	fl6->fl6_sport = inet->inet_sport;
 	fl6->fl6_dport = inet->inet_dport;
+	fl6->flowi6_uid = sk->sk_uid;
 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
 
 	rcu_read_lock();
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index caee553..a88aff0 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -562,6 +562,8 @@
 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
 		fl6.flowi6_mark = skb->mark;
 
+	fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+
 	err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
 	if (err)
 		return -1;
@@ -621,6 +623,8 @@
 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
 		fl6.flowi6_mark = skb->mark;
 
+	fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+
 	if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)))
 		return -1;
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index b723987..f8f64e0 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -39,6 +39,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 
+#include <linux/bpf-cgroup.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
 
@@ -66,9 +67,6 @@
 	struct in6_addr *nexthop;
 	int ret;
 
-	skb->protocol = htons(ETH_P_IPV6);
-	skb->dev = dev;
-
 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 
@@ -131,6 +129,14 @@
 
 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
+	int ret;
+
+	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
+	if (ret) {
+		kfree_skb(skb);
+		return ret;
+	}
+
 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 	    dst_allfrag(skb_dst(skb)) ||
 	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
@@ -144,6 +150,9 @@
 	struct net_device *dev = skb_dst(skb)->dev;
 	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 
+	skb->protocol = htons(ETH_P_IPV6);
+	skb->dev = dev;
+
 	if (unlikely(idev->cnf.disable_ipv6)) {
 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 		kfree_skb(skb);
@@ -866,7 +875,6 @@
 	if (skb->sk && dst_allfrag(skb_dst(skb)))
 		sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
 
-	skb->dev = skb_dst(skb)->dev;
 	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 	err = -EMSGSIZE;
 
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index f89516d..8855e89 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1275,6 +1275,8 @@
 			fl6.flowi6_mark = skb->mark;
 	}
 
+	fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+
 	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
 		return -1;
 
@@ -1362,6 +1364,8 @@
 			fl6.flowi6_mark = skb->mark;
 	}
 
+	fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+
 	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
 		return -1;
 
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index c2b2ee7..c88505d 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -618,9 +618,10 @@
 		return 0;
 
 	if (type == NDISC_REDIRECT)
-		ip6_redirect(skb, net, skb->dev->ifindex, 0);
+		ip6_redirect(skb, net, skb->dev->ifindex, 0,
+			     sock_net_uid(net, NULL));
 	else
-		ip6_update_pmtu(skb, net, info, 0, 0);
+		ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
 	xfrm_state_put(x);
 
 	return 0;
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 1b9316e..54d165b 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -74,9 +74,10 @@
 		return 0;
 
 	if (type == NDISC_REDIRECT)
-		ip6_redirect(skb, net, skb->dev->ifindex, 0);
+		ip6_redirect(skb, net, skb->dev->ifindex, 0,
+			     sock_net_uid(net, NULL));
 	else
-		ip6_update_pmtu(skb, net, info, 0, 0);
+		ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
 	xfrm_state_put(x);
 
 	return 0;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 505d048f..14750ba 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1395,6 +1395,8 @@
 			if (ri->prefix_len == 0 &&
 			    !in6_dev->cnf.accept_ra_defrtr)
 				continue;
+			if (ri->prefix_len < in6_dev->cnf.accept_ra_rt_info_min_plen)
+				continue;
 			if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
 				continue;
 			rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3,
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index d11c468..39970e2 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -26,6 +26,7 @@
 	struct flowi6 fl6 = {
 		.flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
 		.flowi6_mark = skb->mark,
+		.flowi6_uid = sock_net_uid(net, skb->sk),
 		.daddr = iph->daddr,
 		.saddr = iph->saddr,
 	};
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 9828681..2a965d4 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -113,6 +113,7 @@
 	fl6.daddr = *daddr;
 	fl6.flowi6_oif = oif;
 	fl6.flowi6_mark = sk->sk_mark;
+	fl6.flowi6_uid = sk->sk_uid;
 	fl6.fl6_icmp_type = user_icmph.icmp6_type;
 	fl6.fl6_icmp_code = user_icmph.icmp6_code;
 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index a4f979f..55d284a 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -791,6 +791,7 @@
 	memset(&fl6, 0, sizeof(fl6));
 
 	fl6.flowi6_mark = sk->sk_mark;
+	fl6.flowi6_uid = sk->sk_uid;
 
 	ipc6.hlimit = -1;
 	ipc6.tclass = -1;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 27c93ba..daf5d28 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1417,7 +1417,7 @@
 }
 
 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
-		     int oif, u32 mark)
+		     int oif, u32 mark, kuid_t uid)
 {
 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
 	struct dst_entry *dst;
@@ -1429,6 +1429,7 @@
 	fl6.daddr = iph->daddr;
 	fl6.saddr = iph->saddr;
 	fl6.flowlabel = ip6_flowinfo(iph);
+	fl6.flowi6_uid = uid;
 
 	dst = ip6_route_output(net, NULL, &fl6);
 	if (!dst->error)
@@ -1445,7 +1446,7 @@
 	if (!oif && skb->dev)
 		oif = l3mdev_master_ifindex(skb->dev);
 
-	ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark);
+	ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
 
 	dst = __sk_dst_get(sk);
 	if (!dst || !dst->obsolete ||
@@ -1537,7 +1538,8 @@
 				flags, __ip6_route_redirect);
 }
 
-void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
+void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
+		  kuid_t uid)
 {
 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
 	struct dst_entry *dst;
@@ -1550,6 +1552,7 @@
 	fl6.daddr = iph->daddr;
 	fl6.saddr = iph->saddr;
 	fl6.flowlabel = ip6_flowinfo(iph);
+	fl6.flowi6_uid = uid;
 
 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
 	rt6_do_redirect(dst, NULL, skb);
@@ -1571,6 +1574,7 @@
 	fl6.flowi6_mark = mark;
 	fl6.daddr = msg->dest;
 	fl6.saddr = iph->daddr;
+	fl6.flowi6_uid = sock_net_uid(net, NULL);
 
 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
 	rt6_do_redirect(dst, NULL, skb);
@@ -1579,7 +1583,8 @@
 
 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
 {
-	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
+	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
+		     sk->sk_uid);
 }
 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
 
@@ -2354,8 +2359,7 @@
 					   const struct in6_addr *gwaddr,
 					   struct net_device *dev)
 {
-	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
-	int ifindex = dev->ifindex;
+	u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_INFO);
 	struct fib6_node *fn;
 	struct rt6_info *rt = NULL;
 	struct fib6_table *table;
@@ -2370,7 +2374,7 @@
 		goto out;
 
 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
-		if (rt->dst.dev->ifindex != ifindex)
+		if (rt->dst.dev->ifindex != dev->ifindex)
 			continue;
 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
 			continue;
@@ -2401,7 +2405,7 @@
 		.fc_nlinfo.nl_net = net,
 	};
 
-	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
+	cfg.fc_table = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_INFO),
 	cfg.fc_dst = *prefix;
 	cfg.fc_gateway = *gwaddr;
 
@@ -2417,7 +2421,7 @@
 
 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
 {
-	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
+	u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_MAIN);
 	struct rt6_info *rt;
 	struct fib6_table *table;
 
@@ -2443,7 +2447,7 @@
 				     unsigned int pref)
 {
 	struct fib6_config cfg = {
-		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
+		.fc_table	= l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_DFLT),
 		.fc_metric	= IP6_RT_PRIO_USER,
 		.fc_ifindex	= dev->ifindex,
 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
@@ -2466,43 +2470,16 @@
 	return rt6_get_dflt_router(gwaddr, dev);
 }
 
-static void __rt6_purge_dflt_routers(struct fib6_table *table)
-{
-	struct rt6_info *rt;
-
-restart:
-	read_lock_bh(&table->tb6_lock);
-	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
-		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
-		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
-			dst_hold(&rt->dst);
-			read_unlock_bh(&table->tb6_lock);
-			ip6_del_rt(rt);
-			goto restart;
-		}
-	}
-	read_unlock_bh(&table->tb6_lock);
-
-	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
+int rt6_addrconf_purge(struct rt6_info *rt, void *arg) {
+	if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
+	    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2))
+		return -1;
+	return 0;
 }
 
 void rt6_purge_dflt_routers(struct net *net)
 {
-	struct fib6_table *table;
-	struct hlist_head *head;
-	unsigned int h;
-
-	rcu_read_lock();
-
-	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
-		head = &net->ipv6.fib_table_hash[h];
-		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
-			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
-				__rt6_purge_dflt_routers(table);
-		}
-	}
-
-	rcu_read_unlock();
+	fib6_clean_all(net, rt6_addrconf_purge, NULL);
 }
 
 static void rtmsg_to_fib6_config(struct net *net,
@@ -2821,6 +2798,7 @@
 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
 	[RTA_ENCAP]		= { .type = NLA_NESTED },
 	[RTA_EXPIRES]		= { .type = NLA_U32 },
+	[RTA_UID]		= { .type = NLA_U32 },
 	[RTA_TABLE]		= { .type = NLA_U32 },
 };
 
@@ -3401,6 +3379,12 @@
 	if (tb[RTA_MARK])
 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
 
+	if (tb[RTA_UID])
+		fl6.flowi6_uid = make_kuid(current_user_ns(),
+					   nla_get_u32(tb[RTA_UID]));
+	else
+		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
+
 	if (iif) {
 		struct net_device *dev;
 		int flags = 0;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 7a86433d..a67174e 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -228,6 +228,7 @@
 		fl6.flowi6_mark = ireq->ir_mark;
 		fl6.fl6_dport = ireq->ir_rmt_port;
 		fl6.fl6_sport = inet_sk(sk)->inet_sport;
+		fl6.flowi6_uid = sk->sk_uid;
 		security_req_classify_flow(req, flowi6_to_flowi(&fl6));
 
 		dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4953466..e8f3a386 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -238,6 +238,7 @@
 	fl6.flowi6_mark = sk->sk_mark;
 	fl6.fl6_dport = usin->sin6_port;
 	fl6.fl6_sport = inet->inet_sport;
+	fl6.flowi6_uid = sk->sk_uid;
 
 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
 	final_p = fl6_update_dst(&fl6, opt, &final);
@@ -835,6 +836,7 @@
 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
 	fl6.fl6_dport = t1->dest;
 	fl6.fl6_sport = t1->source;
+	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 
 	/* Pass a socket to ip6_dst_lookup either it is for RST
@@ -1240,9 +1242,6 @@
 	if (skb->protocol == htons(ETH_P_IP))
 		return tcp_v4_do_rcv(sk, skb);
 
-	if (tcp_filter(sk, skb))
-		goto discard;
-
 	/*
 	 *	socket locking is here for SMP purposes as backlog rcv
 	 *	is currently called with bh processing disabled.
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 4db5f54..c43ef0c 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1162,6 +1162,7 @@
 		fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
 
 	fl6.flowi6_mark = sk->sk_mark;
+	fl6.flowi6_uid = sk->sk_uid;
 
 	if (msg->msg_controllen) {
 		opt = &opt_space;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 8d412b9..8b825d6 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -529,6 +529,7 @@
 	memset(&fl6, 0, sizeof(fl6));
 
 	fl6.flowi6_mark = sk->sk_mark;
+	fl6.flowi6_uid = sk->sk_uid;
 
 	ipc6.hlimit = -1;
 	ipc6.tclass = -1;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index e8d56d9..177d3ae 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -1316,6 +1316,8 @@
 	based on who created the socket: the user or group. It is also
 	possible to check whether a socket actually exists.
 
+	Conflicts with '"quota, tag, uid" match'
+
 config NETFILTER_XT_MATCH_POLICY
 	tristate 'IPsec "policy" match support'
 	depends on XFRM
@@ -1349,6 +1351,22 @@
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_MATCH_QTAGUID
+	bool '"quota, tag, owner" match and stats support'
+        depends on NETFILTER_XT_MATCH_SOCKET
+	depends on NETFILTER_XT_MATCH_OWNER=n
+	help
+	  This option replaces the `owner' match. In addition to matching
+	  on uid, it keeps stats based on a tag assigned to a socket.
+	  The full tag is comprised of a UID and an accounting tag.
+	  The tags are assignable to sockets from user space (e.g. a download
+	  manager can assign the socket to another UID for accounting).
+	  Stats and control are done via /proc/net/xt_qtaguid/.
+	  It replaces owner as it takes the same arguments, but should
+	  really be recognized by the iptables tool.
+
+	  If unsure, say `N'.
+
 config NETFILTER_XT_MATCH_QUOTA
 	tristate '"quota" match support'
 	depends on NETFILTER_ADVANCED
@@ -1359,6 +1377,29 @@
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
+config NETFILTER_XT_MATCH_QUOTA2
+	tristate '"quota2" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  This option adds a `quota2' match, which allows to match on a
+	  byte counter correctly and not per CPU.
+	  It allows naming the quotas.
+	  This is based on http://xtables-addons.git.sourceforge.net
+
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
+
+config NETFILTER_XT_MATCH_QUOTA2_LOG
+	bool '"quota2" Netfilter LOG support'
+	depends on NETFILTER_XT_MATCH_QUOTA2
+	default n
+	help
+	  This option allows `quota2' to log ONCE when a quota limit
+	  is passed. It logs via NETLINK using the NETLINK_NFLOG family.
+	  It logs similarly to how ipt_ULOG would without data.
+
+	  If unsure, say `N'.
+
 config NETFILTER_XT_MATCH_RATEEST
 	tristate '"rateest" match support'
 	depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index c23c3c8..54ba5aa 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -167,7 +167,9 @@
 obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_QTAGUID) += xt_qtaguid_print.o xt_qtaguid.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA) += xt_quota.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA2) += xt_quota2.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_RATEEST) += xt_rateest.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT) += xt_recent.o
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index 921c9bd..44c42a7 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -5,6 +5,7 @@
  * After timer expires a kevent will be sent.
  *
  * Copyright (C) 2004, 2010 Nokia Corporation
+ *
  * Written by Timo Teras <ext-timo.teras@nokia.com>
  *
  * Converted to x_tables and reworked for upstream inclusion
@@ -38,8 +39,17 @@
 #include <linux/netfilter/xt_IDLETIMER.h>
 #include <linux/kdev_t.h>
 #include <linux/kobject.h>
+#include <linux/skbuff.h>
 #include <linux/workqueue.h>
 #include <linux/sysfs.h>
+#include <linux/rtc.h>
+#include <linux/time.h>
+#include <linux/math64.h>
+#include <linux/suspend.h>
+#include <linux/notifier.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <net/inet_sock.h>
 
 struct idletimer_tg_attr {
 	struct attribute attr;
@@ -55,14 +65,110 @@
 	struct kobject *kobj;
 	struct idletimer_tg_attr attr;
 
+	struct timespec delayed_timer_trigger;
+	struct timespec last_modified_timer;
+	struct timespec last_suspend_time;
+	struct notifier_block pm_nb;
+
+	int timeout;
 	unsigned int refcnt;
+	bool work_pending;
+	bool send_nl_msg;
+	bool active;
+	uid_t uid;
 };
 
 static LIST_HEAD(idletimer_tg_list);
 static DEFINE_MUTEX(list_mutex);
+static DEFINE_SPINLOCK(timestamp_lock);
 
 static struct kobject *idletimer_tg_kobj;
 
+static bool check_for_delayed_trigger(struct idletimer_tg *timer,
+		struct timespec *ts)
+{
+	bool state;
+	struct timespec temp;
+	spin_lock_bh(&timestamp_lock);
+	timer->work_pending = false;
+	if ((ts->tv_sec - timer->last_modified_timer.tv_sec) > timer->timeout ||
+			timer->delayed_timer_trigger.tv_sec != 0) {
+		state = false;
+		temp.tv_sec = timer->timeout;
+		temp.tv_nsec = 0;
+		if (timer->delayed_timer_trigger.tv_sec != 0) {
+			temp = timespec_add(timer->delayed_timer_trigger, temp);
+			ts->tv_sec = temp.tv_sec;
+			ts->tv_nsec = temp.tv_nsec;
+			timer->delayed_timer_trigger.tv_sec = 0;
+			timer->work_pending = true;
+			schedule_work(&timer->work);
+		} else {
+			temp = timespec_add(timer->last_modified_timer, temp);
+			ts->tv_sec = temp.tv_sec;
+			ts->tv_nsec = temp.tv_nsec;
+		}
+	} else {
+		state = timer->active;
+	}
+	spin_unlock_bh(&timestamp_lock);
+	return state;
+}
+
+static void notify_netlink_uevent(const char *iface, struct idletimer_tg *timer)
+{
+	char iface_msg[NLMSG_MAX_SIZE];
+	char state_msg[NLMSG_MAX_SIZE];
+	char timestamp_msg[NLMSG_MAX_SIZE];
+	char uid_msg[NLMSG_MAX_SIZE];
+	char *envp[] = { iface_msg, state_msg, timestamp_msg, uid_msg, NULL };
+	int res;
+	struct timespec ts;
+	uint64_t time_ns;
+	bool state;
+
+	res = snprintf(iface_msg, NLMSG_MAX_SIZE, "INTERFACE=%s",
+		       iface);
+	if (NLMSG_MAX_SIZE <= res) {
+		pr_err("message too long (%d)", res);
+		return;
+	}
+
+	get_monotonic_boottime(&ts);
+	state = check_for_delayed_trigger(timer, &ts);
+	res = snprintf(state_msg, NLMSG_MAX_SIZE, "STATE=%s",
+			state ? "active" : "inactive");
+
+	if (NLMSG_MAX_SIZE <= res) {
+		pr_err("message too long (%d)", res);
+		return;
+	}
+
+	if (state) {
+		res = snprintf(uid_msg, NLMSG_MAX_SIZE, "UID=%u", timer->uid);
+		if (NLMSG_MAX_SIZE <= res)
+			pr_err("message too long (%d)", res);
+	} else {
+		res = snprintf(uid_msg, NLMSG_MAX_SIZE, "UID=");
+		if (NLMSG_MAX_SIZE <= res)
+			pr_err("message too long (%d)", res);
+	}
+
+	time_ns = timespec_to_ns(&ts);
+	res = snprintf(timestamp_msg, NLMSG_MAX_SIZE, "TIME_NS=%llu", time_ns);
+	if (NLMSG_MAX_SIZE <= res) {
+		timestamp_msg[0] = '\0';
+		pr_err("message too long (%d)", res);
+	}
+
+	pr_debug("putting nlmsg: <%s> <%s> <%s> <%s>\n", iface_msg, state_msg,
+		 timestamp_msg, uid_msg);
+	kobject_uevent_env(idletimer_tg_kobj, KOBJ_CHANGE, envp);
+	return;
+
+
+}
+
 static
 struct idletimer_tg *__idletimer_tg_find_by_label(const char *label)
 {
@@ -83,6 +189,7 @@
 {
 	struct idletimer_tg *timer;
 	unsigned long expires = 0;
+	unsigned long now = jiffies;
 
 	mutex_lock(&list_mutex);
 
@@ -92,11 +199,15 @@
 
 	mutex_unlock(&list_mutex);
 
-	if (time_after(expires, jiffies))
+	if (time_after(expires, now))
 		return sprintf(buf, "%u\n",
-			       jiffies_to_msecs(expires - jiffies) / 1000);
+			       jiffies_to_msecs(expires - now) / 1000);
 
-	return sprintf(buf, "0\n");
+	if (timer->send_nl_msg)
+		return sprintf(buf, "0 %d\n",
+			jiffies_to_msecs(now - expires) / 1000);
+	else
+		return sprintf(buf, "0\n");
 }
 
 static void idletimer_tg_work(struct work_struct *work)
@@ -105,6 +216,9 @@
 						  work);
 
 	sysfs_notify(idletimer_tg_kobj, NULL, timer->attr.attr.name);
+
+	if (timer->send_nl_msg)
+		notify_netlink_uevent(timer->attr.attr.name, timer);
 }
 
 static void idletimer_tg_expired(unsigned long data)
@@ -112,8 +226,55 @@
 	struct idletimer_tg *timer = (struct idletimer_tg *) data;
 
 	pr_debug("timer %s expired\n", timer->attr.attr.name);
-
+	spin_lock_bh(&timestamp_lock);
+	timer->active = false;
+	timer->work_pending = true;
 	schedule_work(&timer->work);
+	spin_unlock_bh(&timestamp_lock);
+}
+
+static int idletimer_resume(struct notifier_block *notifier,
+		unsigned long pm_event, void *unused)
+{
+	struct timespec ts;
+	unsigned long time_diff, now = jiffies;
+	struct idletimer_tg *timer = container_of(notifier,
+			struct idletimer_tg, pm_nb);
+	if (!timer)
+		return NOTIFY_DONE;
+	switch (pm_event) {
+	case PM_SUSPEND_PREPARE:
+		get_monotonic_boottime(&timer->last_suspend_time);
+		break;
+	case PM_POST_SUSPEND:
+		spin_lock_bh(&timestamp_lock);
+		if (!timer->active) {
+			spin_unlock_bh(&timestamp_lock);
+			break;
+		}
+		/* since jiffies are not updated when suspended now represents
+		 * the time it would have suspended */
+		if (time_after(timer->timer.expires, now)) {
+			get_monotonic_boottime(&ts);
+			ts = timespec_sub(ts, timer->last_suspend_time);
+			time_diff = timespec_to_jiffies(&ts);
+			if (timer->timer.expires > (time_diff + now)) {
+				mod_timer_pending(&timer->timer,
+						(timer->timer.expires - time_diff));
+			} else {
+				del_timer(&timer->timer);
+				timer->timer.expires = 0;
+				timer->active = false;
+				timer->work_pending = true;
+				schedule_work(&timer->work);
+			}
+		}
+		spin_unlock_bh(&timestamp_lock);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
 }
 
 static int idletimer_check_sysfs_name(const char *name, unsigned int size)
@@ -166,6 +327,21 @@
 	setup_timer(&info->timer->timer, idletimer_tg_expired,
 		    (unsigned long) info->timer);
 	info->timer->refcnt = 1;
+	info->timer->send_nl_msg = (info->send_nl_msg == 0) ? false : true;
+	info->timer->active = true;
+	info->timer->timeout = info->timeout;
+
+	info->timer->delayed_timer_trigger.tv_sec = 0;
+	info->timer->delayed_timer_trigger.tv_nsec = 0;
+	info->timer->work_pending = false;
+	info->timer->uid = 0;
+	get_monotonic_boottime(&info->timer->last_modified_timer);
+
+	info->timer->pm_nb.notifier_call = idletimer_resume;
+	ret = register_pm_notifier(&info->timer->pm_nb);
+	if (ret)
+		printk(KERN_WARNING "[%s] Failed to register pm notifier %d\n",
+				__func__, ret);
 
 	INIT_WORK(&info->timer->work, idletimer_tg_work);
 
@@ -182,6 +358,42 @@
 	return ret;
 }
 
+static void reset_timer(const struct idletimer_tg_info *info,
+			struct sk_buff *skb)
+{
+	unsigned long now = jiffies;
+	struct idletimer_tg *timer = info->timer;
+	bool timer_prev;
+
+	spin_lock_bh(&timestamp_lock);
+	timer_prev = timer->active;
+	timer->active = true;
+	/* timer_prev is used to guard overflow problem in time_before*/
+	if (!timer_prev || time_before(timer->timer.expires, now)) {
+		pr_debug("Starting Checkentry timer (Expired, Jiffies): %lu, %lu\n",
+				timer->timer.expires, now);
+
+		/* Stores the uid resposible for waking up the radio */
+		if (skb && (skb->sk)) {
+			timer->uid = from_kuid_munged(current_user_ns(),
+					sock_i_uid(skb_to_full_sk(skb)));
+		}
+
+		/* checks if there is a pending inactive notification*/
+		if (timer->work_pending)
+			timer->delayed_timer_trigger = timer->last_modified_timer;
+		else {
+			timer->work_pending = true;
+			schedule_work(&timer->work);
+		}
+	}
+
+	get_monotonic_boottime(&timer->last_modified_timer);
+	mod_timer(&timer->timer,
+			msecs_to_jiffies(info->timeout * 1000) + now);
+	spin_unlock_bh(&timestamp_lock);
+}
+
 /*
  * The actual xt_tables plugin.
  */
@@ -189,15 +401,23 @@
 					 const struct xt_action_param *par)
 {
 	const struct idletimer_tg_info *info = par->targinfo;
+	unsigned long now = jiffies;
 
 	pr_debug("resetting timer %s, timeout period %u\n",
 		 info->label, info->timeout);
 
 	BUG_ON(!info->timer);
 
-	mod_timer(&info->timer->timer,
-		  msecs_to_jiffies(info->timeout * 1000) + jiffies);
+	info->timer->active = true;
 
+	if (time_before(info->timer->timer.expires, now)) {
+		schedule_work(&info->timer->work);
+		pr_debug("Starting timer %s (Expired, Jiffies): %lu, %lu\n",
+			 info->label, info->timer->timer.expires, now);
+	}
+
+	/* TODO: Avoid modifying timers on each packet */
+	reset_timer(info, skb);
 	return XT_CONTINUE;
 }
 
@@ -206,7 +426,7 @@
 	struct idletimer_tg_info *info = par->targinfo;
 	int ret;
 
-	pr_debug("checkentry targinfo%s\n", info->label);
+	pr_debug("checkentry targinfo %s\n", info->label);
 
 	if (info->timeout == 0) {
 		pr_debug("timeout value is zero\n");
@@ -228,9 +448,7 @@
 	info->timer = __idletimer_tg_find_by_label(info->label);
 	if (info->timer) {
 		info->timer->refcnt++;
-		mod_timer(&info->timer->timer,
-			  msecs_to_jiffies(info->timeout * 1000) + jiffies);
-
+		reset_timer(info, NULL);
 		pr_debug("increased refcnt of timer %s to %u\n",
 			 info->label, info->timer->refcnt);
 	} else {
@@ -243,6 +461,7 @@
 	}
 
 	mutex_unlock(&list_mutex);
+
 	return 0;
 }
 
@@ -259,13 +478,14 @@
 
 		list_del(&info->timer->entry);
 		del_timer_sync(&info->timer->timer);
-		cancel_work_sync(&info->timer->work);
 		sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr);
+		unregister_pm_notifier(&info->timer->pm_nb);
+		cancel_work_sync(&info->timer->work);
 		kfree(info->timer->attr.attr.name);
 		kfree(info->timer);
 	} else {
 		pr_debug("decreased refcnt of timer %s to %u\n",
-			 info->label, info->timer->refcnt);
+		info->label, info->timer->refcnt);
 	}
 
 	mutex_unlock(&list_mutex);
@@ -273,6 +493,7 @@
 
 static struct xt_target idletimer_tg __read_mostly = {
 	.name		= "IDLETIMER",
+	.revision	= 1,
 	.family		= NFPROTO_UNSPEC,
 	.target		= idletimer_tg_target,
 	.targetsize     = sizeof(struct idletimer_tg_info),
@@ -338,3 +559,4 @@
 MODULE_LICENSE("GPL v2");
 MODULE_ALIAS("ipt_IDLETIMER");
 MODULE_ALIAS("ip6t_IDLETIMER");
+MODULE_ALIAS("arpt_IDLETIMER");
diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c
new file mode 100644
index 0000000..0e58149
--- /dev/null
+++ b/net/netfilter/xt_qtaguid.c
@@ -0,0 +1,3031 @@
+/*
+ * Kernel iptables module to track stats for packets based on user tags.
+ *
+ * (C) 2011 Google, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * There are run-time debug flags enabled via the debug_mask module param, or
+ * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
+ */
+#define DEBUG
+
+#include <linux/file.h>
+#include <linux/inetdevice.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_qtaguid.h>
+#include <linux/ratelimit.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
+#include <linux/workqueue.h>
+#include <net/addrconf.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#endif
+
+#include <linux/netfilter/xt_socket.h>
+#include "xt_qtaguid_internal.h"
+#include "xt_qtaguid_print.h"
+#include "../../fs/proc/internal.h"
+
+/*
+ * We only use the xt_socket funcs within a similar context to avoid unexpected
+ * return values.
+ */
+#define XT_SOCKET_SUPPORTED_HOOKS \
+	((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
+
+
+static const char *module_procdirname = "xt_qtaguid";
+static struct proc_dir_entry *xt_qtaguid_procdir;
+
+static unsigned int proc_iface_perms = S_IRUGO;
+module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
+
+static struct proc_dir_entry *xt_qtaguid_stats_file;
+static unsigned int proc_stats_perms = S_IRUGO;
+module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
+
+static struct proc_dir_entry *xt_qtaguid_ctrl_file;
+
+/* Everybody can write. But proc_ctrl_write_limited is true by default which
+ * limits what can be controlled. See the can_*() functions.
+ */
+static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
+module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
+
+/* Limited by default, so the gid of the ctrl and stats proc entries
+ * will limit what can be done. See the can_*() functions.
+ */
+static bool proc_stats_readall_limited = true;
+static bool proc_ctrl_write_limited = true;
+
+module_param_named(stats_readall_limited, proc_stats_readall_limited, bool,
+		   S_IRUGO | S_IWUSR);
+module_param_named(ctrl_write_limited, proc_ctrl_write_limited, bool,
+		   S_IRUGO | S_IWUSR);
+
+/*
+ * Limit the number of active tags (via socket tags) for a given UID.
+ * Multiple processes could share the UID.
+ */
+static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
+module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
+
+/*
+ * After the kernel has initiallized this module, it is still possible
+ * to make it passive.
+ * Setting passive to Y:
+ *  - the iface stats handling will not act on notifications.
+ *  - iptables matches will never match.
+ *  - ctrl commands silently succeed.
+ *  - stats are always empty.
+ * This is mostly usefull when a bug is suspected.
+ */
+static bool module_passive;
+module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
+
+/*
+ * Control how qtaguid data is tracked per proc/uid.
+ * Setting tag_tracking_passive to Y:
+ *  - don't create proc specific structs to track tags
+ *  - don't check that active tag stats exceed some limits.
+ *  - don't clean up socket tags on process exits.
+ * This is mostly usefull when a bug is suspected.
+ */
+static bool qtu_proc_handling_passive;
+module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
+		   S_IRUGO | S_IWUSR);
+
+#define QTU_DEV_NAME "xt_qtaguid"
+
+uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK;
+module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
+
+/*---------------------------------------------------------------------------*/
+static const char *iface_stat_procdirname = "iface_stat";
+static struct proc_dir_entry *iface_stat_procdir;
+/*
+ * The iface_stat_all* will go away once userspace gets use to the new fields
+ * that have a format line.
+ */
+static const char *iface_stat_all_procfilename = "iface_stat_all";
+static struct proc_dir_entry *iface_stat_all_procfile;
+static const char *iface_stat_fmt_procfilename = "iface_stat_fmt";
+static struct proc_dir_entry *iface_stat_fmt_procfile;
+
+
+static LIST_HEAD(iface_stat_list);
+static DEFINE_SPINLOCK(iface_stat_list_lock);
+
+static struct rb_root sock_tag_tree = RB_ROOT;
+static DEFINE_SPINLOCK(sock_tag_list_lock);
+
+static struct rb_root tag_counter_set_tree = RB_ROOT;
+static DEFINE_SPINLOCK(tag_counter_set_list_lock);
+
+static struct rb_root uid_tag_data_tree = RB_ROOT;
+static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
+
+static struct rb_root proc_qtu_data_tree = RB_ROOT;
+/* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
+
+static struct qtaguid_event_counts qtu_events;
+/*----------------------------------------------*/
+static bool can_manipulate_uids(void)
+{
+	/* root pwnd */
+	return in_egroup_p(xt_qtaguid_ctrl_file->gid)
+		|| unlikely(!from_kuid(&init_user_ns, current_fsuid())) || unlikely(!proc_ctrl_write_limited)
+		|| unlikely(uid_eq(current_fsuid(), xt_qtaguid_ctrl_file->uid));
+}
+
+static bool can_impersonate_uid(kuid_t uid)
+{
+	return uid_eq(uid, current_fsuid()) || can_manipulate_uids();
+}
+
+static bool can_read_other_uid_stats(kuid_t uid)
+{
+	/* root pwnd */
+	return in_egroup_p(xt_qtaguid_stats_file->gid)
+		|| unlikely(!from_kuid(&init_user_ns, current_fsuid())) || uid_eq(uid, current_fsuid())
+		|| unlikely(!proc_stats_readall_limited)
+		|| unlikely(uid_eq(current_fsuid(), xt_qtaguid_ctrl_file->uid));
+}
+
+static inline void dc_add_byte_packets(struct data_counters *counters, int set,
+				  enum ifs_tx_rx direction,
+				  enum ifs_proto ifs_proto,
+				  int bytes,
+				  int packets)
+{
+	counters->bpc[set][direction][ifs_proto].bytes += bytes;
+	counters->bpc[set][direction][ifs_proto].packets += packets;
+}
+
+static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
+{
+	struct rb_node *node = root->rb_node;
+
+	while (node) {
+		struct tag_node *data = rb_entry(node, struct tag_node, node);
+		int result;
+		RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
+			 " node=%p data=%p\n", tag, node, data);
+		result = tag_compare(tag, data->tag);
+		RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
+			 " data.tag=0x%llx (uid=%u) res=%d\n",
+			 tag, data->tag, get_uid_from_tag(data->tag), result);
+		if (result < 0)
+			node = node->rb_left;
+		else if (result > 0)
+			node = node->rb_right;
+		else
+			return data;
+	}
+	return NULL;
+}
+
+static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
+{
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+	/* Figure out where to put new node */
+	while (*new) {
+		struct tag_node *this = rb_entry(*new, struct tag_node,
+						 node);
+		int result = tag_compare(data->tag, this->tag);
+		RB_DEBUG("qtaguid: %s(): tag=0x%llx"
+			 " (uid=%u)\n", __func__,
+			 this->tag,
+			 get_uid_from_tag(this->tag));
+		parent = *new;
+		if (result < 0)
+			new = &((*new)->rb_left);
+		else if (result > 0)
+			new = &((*new)->rb_right);
+		else
+			BUG();
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&data->node, parent, new);
+	rb_insert_color(&data->node, root);
+}
+
+static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
+{
+	tag_node_tree_insert(&data->tn, root);
+}
+
+static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
+{
+	struct tag_node *node = tag_node_tree_search(root, tag);
+	if (!node)
+		return NULL;
+	return rb_entry(&node->node, struct tag_stat, tn.node);
+}
+
+static void tag_counter_set_tree_insert(struct tag_counter_set *data,
+					struct rb_root *root)
+{
+	tag_node_tree_insert(&data->tn, root);
+}
+
+static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
+							   tag_t tag)
+{
+	struct tag_node *node = tag_node_tree_search(root, tag);
+	if (!node)
+		return NULL;
+	return rb_entry(&node->node, struct tag_counter_set, tn.node);
+
+}
+
+static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
+{
+	tag_node_tree_insert(&data->tn, root);
+}
+
+static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
+{
+	struct tag_node *node = tag_node_tree_search(root, tag);
+	if (!node)
+		return NULL;
+	return rb_entry(&node->node, struct tag_ref, tn.node);
+}
+
+static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
+					     const struct sock *sk)
+{
+	struct rb_node *node = root->rb_node;
+
+	while (node) {
+		struct sock_tag *data = rb_entry(node, struct sock_tag,
+						 sock_node);
+		if (sk < data->sk)
+			node = node->rb_left;
+		else if (sk > data->sk)
+			node = node->rb_right;
+		else
+			return data;
+	}
+	return NULL;
+}
+
+static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
+{
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+	/* Figure out where to put new node */
+	while (*new) {
+		struct sock_tag *this = rb_entry(*new, struct sock_tag,
+						 sock_node);
+		parent = *new;
+		if (data->sk < this->sk)
+			new = &((*new)->rb_left);
+		else if (data->sk > this->sk)
+			new = &((*new)->rb_right);
+		else
+			BUG();
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&data->sock_node, parent, new);
+	rb_insert_color(&data->sock_node, root);
+}
+
+static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
+{
+	struct rb_node *node;
+	struct sock_tag *st_entry;
+
+	node = rb_first(st_to_free_tree);
+	while (node) {
+		st_entry = rb_entry(node, struct sock_tag, sock_node);
+		node = rb_next(node);
+		CT_DEBUG("qtaguid: %s(): "
+			 "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
+			 st_entry->sk,
+			 st_entry->tag,
+			 get_uid_from_tag(st_entry->tag));
+		rb_erase(&st_entry->sock_node, st_to_free_tree);
+		sock_put(st_entry->sk);
+		kfree(st_entry);
+	}
+}
+
+static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
+						       const pid_t pid)
+{
+	struct rb_node *node = root->rb_node;
+
+	while (node) {
+		struct proc_qtu_data *data = rb_entry(node,
+						      struct proc_qtu_data,
+						      node);
+		if (pid < data->pid)
+			node = node->rb_left;
+		else if (pid > data->pid)
+			node = node->rb_right;
+		else
+			return data;
+	}
+	return NULL;
+}
+
+static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
+				      struct rb_root *root)
+{
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+	/* Figure out where to put new node */
+	while (*new) {
+		struct proc_qtu_data *this = rb_entry(*new,
+						      struct proc_qtu_data,
+						      node);
+		parent = *new;
+		if (data->pid < this->pid)
+			new = &((*new)->rb_left);
+		else if (data->pid > this->pid)
+			new = &((*new)->rb_right);
+		else
+			BUG();
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&data->node, parent, new);
+	rb_insert_color(&data->node, root);
+}
+
+static void uid_tag_data_tree_insert(struct uid_tag_data *data,
+				     struct rb_root *root)
+{
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+	/* Figure out where to put new node */
+	while (*new) {
+		struct uid_tag_data *this = rb_entry(*new,
+						     struct uid_tag_data,
+						     node);
+		parent = *new;
+		if (data->uid < this->uid)
+			new = &((*new)->rb_left);
+		else if (data->uid > this->uid)
+			new = &((*new)->rb_right);
+		else
+			BUG();
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&data->node, parent, new);
+	rb_insert_color(&data->node, root);
+}
+
+static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
+						     uid_t uid)
+{
+	struct rb_node *node = root->rb_node;
+
+	while (node) {
+		struct uid_tag_data *data = rb_entry(node,
+						     struct uid_tag_data,
+						     node);
+		if (uid < data->uid)
+			node = node->rb_left;
+		else if (uid > data->uid)
+			node = node->rb_right;
+		else
+			return data;
+	}
+	return NULL;
+}
+
+/*
+ * Allocates a new uid_tag_data struct if needed.
+ * Returns a pointer to the found or allocated uid_tag_data.
+ * Returns a PTR_ERR on failures, and lock is not held.
+ * If found is not NULL:
+ *   sets *found to true if not allocated.
+ *   sets *found to false if allocated.
+ */
+struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
+{
+	struct uid_tag_data *utd_entry;
+
+	/* Look for top level uid_tag_data for the UID */
+	utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
+	DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
+
+	if (found_res)
+		*found_res = utd_entry;
+	if (utd_entry)
+		return utd_entry;
+
+	utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
+	if (!utd_entry) {
+		pr_err("qtaguid: get_uid_data(%u): "
+		       "tag data alloc failed\n", uid);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	utd_entry->uid = uid;
+	utd_entry->tag_ref_tree = RB_ROOT;
+	uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
+	DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
+	return utd_entry;
+}
+
+/* Never returns NULL. Either PTR_ERR or a valid ptr. */
+static struct tag_ref *new_tag_ref(tag_t new_tag,
+				   struct uid_tag_data *utd_entry)
+{
+	struct tag_ref *tr_entry;
+	int res;
+
+	if (utd_entry->num_active_tags + 1 > max_sock_tags) {
+		pr_info("qtaguid: new_tag_ref(0x%llx): "
+			"tag ref alloc quota exceeded. max=%d\n",
+			new_tag, max_sock_tags);
+		res = -EMFILE;
+		goto err_res;
+
+	}
+
+	tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
+	if (!tr_entry) {
+		pr_err("qtaguid: new_tag_ref(0x%llx): "
+		       "tag ref alloc failed\n",
+		       new_tag);
+		res = -ENOMEM;
+		goto err_res;
+	}
+	tr_entry->tn.tag = new_tag;
+	/* tr_entry->num_sock_tags  handled by caller */
+	utd_entry->num_active_tags++;
+	tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
+	DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
+		 " inserted new tag ref %p\n",
+		 new_tag, tr_entry);
+	return tr_entry;
+
+err_res:
+	return ERR_PTR(res);
+}
+
+static struct tag_ref *lookup_tag_ref(tag_t full_tag,
+				      struct uid_tag_data **utd_res)
+{
+	struct uid_tag_data *utd_entry;
+	struct tag_ref *tr_entry;
+	bool found_utd;
+	uid_t uid = get_uid_from_tag(full_tag);
+
+	DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
+		 full_tag, uid);
+
+	utd_entry = get_uid_data(uid, &found_utd);
+	if (IS_ERR_OR_NULL(utd_entry)) {
+		if (utd_res)
+			*utd_res = utd_entry;
+		return NULL;
+	}
+
+	tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
+	if (utd_res)
+		*utd_res = utd_entry;
+	DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
+		 full_tag, utd_entry, tr_entry);
+	return tr_entry;
+}
+
+/* Never returns NULL. Either PTR_ERR or a valid ptr. */
+static struct tag_ref *get_tag_ref(tag_t full_tag,
+				   struct uid_tag_data **utd_res)
+{
+	struct uid_tag_data *utd_entry;
+	struct tag_ref *tr_entry;
+
+	DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
+		 full_tag);
+	tr_entry = lookup_tag_ref(full_tag, &utd_entry);
+	BUG_ON(IS_ERR_OR_NULL(utd_entry));
+	if (!tr_entry)
+		tr_entry = new_tag_ref(full_tag, utd_entry);
+
+	if (utd_res)
+		*utd_res = utd_entry;
+	DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
+		 full_tag, utd_entry, tr_entry);
+	return tr_entry;
+}
+
+/* Checks and maybe frees the UID Tag Data entry */
+static void put_utd_entry(struct uid_tag_data *utd_entry)
+{
+	/* Are we done with the UID tag data entry? */
+	if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) &&
+		!utd_entry->num_pqd) {
+		DR_DEBUG("qtaguid: %s(): "
+			 "erase utd_entry=%p uid=%u "
+			 "by pid=%u tgid=%u uid=%u\n", __func__,
+			 utd_entry, utd_entry->uid,
+			 current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
+		BUG_ON(utd_entry->num_active_tags);
+		rb_erase(&utd_entry->node, &uid_tag_data_tree);
+		kfree(utd_entry);
+	} else {
+		DR_DEBUG("qtaguid: %s(): "
+			 "utd_entry=%p still has %d tags %d proc_qtu_data\n",
+			 __func__, utd_entry, utd_entry->num_active_tags,
+			 utd_entry->num_pqd);
+		BUG_ON(!(utd_entry->num_active_tags ||
+			 utd_entry->num_pqd));
+	}
+}
+
+/*
+ * If no sock_tags are using this tag_ref,
+ * decrements refcount of utd_entry, removes tr_entry
+ * from utd_entry->tag_ref_tree and frees.
+ */
+static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
+					struct uid_tag_data *utd_entry)
+{
+	DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
+		 tr_entry, tr_entry->tn.tag,
+		 get_uid_from_tag(tr_entry->tn.tag));
+	if (!tr_entry->num_sock_tags) {
+		BUG_ON(!utd_entry->num_active_tags);
+		utd_entry->num_active_tags--;
+		rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
+		DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
+		kfree(tr_entry);
+	}
+}
+
+static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
+{
+	struct rb_node *node;
+	struct tag_ref *tr_entry;
+	tag_t acct_tag;
+
+	DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
+		 full_tag, get_uid_from_tag(full_tag));
+	acct_tag = get_atag_from_tag(full_tag);
+	node = rb_first(&utd_entry->tag_ref_tree);
+	while (node) {
+		tr_entry = rb_entry(node, struct tag_ref, tn.node);
+		node = rb_next(node);
+		if (!acct_tag || tr_entry->tn.tag == full_tag)
+			free_tag_ref_from_utd_entry(tr_entry, utd_entry);
+	}
+}
+
+static ssize_t read_proc_u64(struct file *file, char __user *buf,
+			 size_t size, loff_t *ppos)
+{
+	uint64_t *valuep = PDE_DATA(file_inode(file));
+	char tmp[24];
+	size_t tmp_size;
+
+	tmp_size = scnprintf(tmp, sizeof(tmp), "%llu\n", *valuep);
+	return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
+}
+
+static ssize_t read_proc_bool(struct file *file, char __user *buf,
+			  size_t size, loff_t *ppos)
+{
+	bool *valuep = PDE_DATA(file_inode(file));
+	char tmp[24];
+	size_t tmp_size;
+
+	tmp_size = scnprintf(tmp, sizeof(tmp), "%u\n", *valuep);
+	return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
+}
+
+static int get_active_counter_set(tag_t tag)
+{
+	int active_set = 0;
+	struct tag_counter_set *tcs;
+
+	MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
+		 " (uid=%u)\n",
+		 tag, get_uid_from_tag(tag));
+	/* For now we only handle UID tags for active sets */
+	tag = get_utag_from_tag(tag);
+	spin_lock_bh(&tag_counter_set_list_lock);
+	tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
+	if (tcs)
+		active_set = tcs->active_set;
+	spin_unlock_bh(&tag_counter_set_list_lock);
+	return active_set;
+}
+
+/*
+ * Find the entry for tracking the specified interface.
+ * Caller must hold iface_stat_list_lock
+ */
+static struct iface_stat *get_iface_entry(const char *ifname)
+{
+	struct iface_stat *iface_entry;
+
+	/* Find the entry for tracking the specified tag within the interface */
+	if (ifname == NULL) {
+		pr_info("qtaguid: iface_stat: get() NULL device name\n");
+		return NULL;
+	}
+
+	/* Iterate over interfaces */
+	list_for_each_entry(iface_entry, &iface_stat_list, list) {
+		if (!strcmp(ifname, iface_entry->ifname))
+			goto done;
+	}
+	iface_entry = NULL;
+done:
+	return iface_entry;
+}
+
+/* This is for fmt2 only */
+static void pp_iface_stat_header(struct seq_file *m)
+{
+	seq_puts(m,
+		 "ifname "
+		 "total_skb_rx_bytes total_skb_rx_packets "
+		 "total_skb_tx_bytes total_skb_tx_packets "
+		 "rx_tcp_bytes rx_tcp_packets "
+		 "rx_udp_bytes rx_udp_packets "
+		 "rx_other_bytes rx_other_packets "
+		 "tx_tcp_bytes tx_tcp_packets "
+		 "tx_udp_bytes tx_udp_packets "
+		 "tx_other_bytes tx_other_packets\n"
+	);
+}
+
+static void pp_iface_stat_line(struct seq_file *m,
+			       struct iface_stat *iface_entry)
+{
+	struct data_counters *cnts;
+	int cnt_set = 0;   /* We only use one set for the device */
+	cnts = &iface_entry->totals_via_skb;
+	seq_printf(m, "%s %llu %llu %llu %llu %llu %llu %llu %llu "
+		   "%llu %llu %llu %llu %llu %llu %llu %llu\n",
+		   iface_entry->ifname,
+		   dc_sum_bytes(cnts, cnt_set, IFS_RX),
+		   dc_sum_packets(cnts, cnt_set, IFS_RX),
+		   dc_sum_bytes(cnts, cnt_set, IFS_TX),
+		   dc_sum_packets(cnts, cnt_set, IFS_TX),
+		   cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
+		   cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
+		   cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
+		   cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
+		   cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
+		   cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
+		   cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
+		   cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
+		   cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
+		   cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
+		   cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
+		   cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
+}
+
+struct proc_iface_stat_fmt_info {
+	int fmt;
+};
+
+static void *iface_stat_fmt_proc_start(struct seq_file *m, loff_t *pos)
+{
+	struct proc_iface_stat_fmt_info *p = m->private;
+	loff_t n = *pos;
+
+	/*
+	 * This lock will prevent iface_stat_update() from changing active,
+	 * and in turn prevent an interface from unregistering itself.
+	 */
+	spin_lock_bh(&iface_stat_list_lock);
+
+	if (unlikely(module_passive))
+		return NULL;
+
+	if (!n && p->fmt == 2)
+		pp_iface_stat_header(m);
+
+	return seq_list_start(&iface_stat_list, n);
+}
+
+static void *iface_stat_fmt_proc_next(struct seq_file *m, void *p, loff_t *pos)
+{
+	return seq_list_next(p, &iface_stat_list, pos);
+}
+
+static void iface_stat_fmt_proc_stop(struct seq_file *m, void *p)
+{
+	spin_unlock_bh(&iface_stat_list_lock);
+}
+
+static int iface_stat_fmt_proc_show(struct seq_file *m, void *v)
+{
+	struct proc_iface_stat_fmt_info *p = m->private;
+	struct iface_stat *iface_entry;
+	struct rtnl_link_stats64 dev_stats, *stats;
+	struct rtnl_link_stats64 no_dev_stats = {0};
+
+
+	CT_DEBUG("qtaguid:proc iface_stat_fmt pid=%u tgid=%u uid=%u\n",
+		 current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
+
+	iface_entry = list_entry(v, struct iface_stat, list);
+
+	if (iface_entry->active) {
+		stats = dev_get_stats(iface_entry->net_dev,
+				      &dev_stats);
+	} else {
+		stats = &no_dev_stats;
+	}
+	/*
+	 * If the meaning of the data changes, then update the fmtX
+	 * string.
+	 */
+	if (p->fmt == 1) {
+		seq_printf(m, "%s %d %llu %llu %llu %llu %llu %llu %llu %llu\n",
+			   iface_entry->ifname,
+			   iface_entry->active,
+			   iface_entry->totals_via_dev[IFS_RX].bytes,
+			   iface_entry->totals_via_dev[IFS_RX].packets,
+			   iface_entry->totals_via_dev[IFS_TX].bytes,
+			   iface_entry->totals_via_dev[IFS_TX].packets,
+			   stats->rx_bytes, stats->rx_packets,
+			   stats->tx_bytes, stats->tx_packets
+			   );
+	} else {
+		pp_iface_stat_line(m, iface_entry);
+	}
+	return 0;
+}
+
+static const struct file_operations read_u64_fops = {
+	.read		= read_proc_u64,
+	.llseek		= default_llseek,
+};
+
+static const struct file_operations read_bool_fops = {
+	.read		= read_proc_bool,
+	.llseek		= default_llseek,
+};
+
+static void iface_create_proc_worker(struct work_struct *work)
+{
+	struct proc_dir_entry *proc_entry;
+	struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
+						   iface_work);
+	struct iface_stat *new_iface  = isw->iface_entry;
+
+	/* iface_entries are not deleted, so safe to manipulate. */
+	proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
+	if (IS_ERR_OR_NULL(proc_entry)) {
+		pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
+		kfree(isw);
+		return;
+	}
+
+	new_iface->proc_ptr = proc_entry;
+
+	proc_create_data("tx_bytes", proc_iface_perms, proc_entry,
+			 &read_u64_fops,
+			 &new_iface->totals_via_dev[IFS_TX].bytes);
+	proc_create_data("rx_bytes", proc_iface_perms, proc_entry,
+			 &read_u64_fops,
+			 &new_iface->totals_via_dev[IFS_RX].bytes);
+	proc_create_data("tx_packets", proc_iface_perms, proc_entry,
+			 &read_u64_fops,
+			 &new_iface->totals_via_dev[IFS_TX].packets);
+	proc_create_data("rx_packets", proc_iface_perms, proc_entry,
+			 &read_u64_fops,
+			 &new_iface->totals_via_dev[IFS_RX].packets);
+	proc_create_data("active", proc_iface_perms, proc_entry,
+			 &read_bool_fops, &new_iface->active);
+
+	IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
+		 "entry=%p dev=%s\n", new_iface, new_iface->ifname);
+	kfree(isw);
+}
+
+/*
+ * Will set the entry's active state, and
+ * update the net_dev accordingly also.
+ */
+static void _iface_stat_set_active(struct iface_stat *entry,
+				   struct net_device *net_dev,
+				   bool activate)
+{
+	if (activate) {
+		entry->net_dev = net_dev;
+		entry->active = true;
+		IF_DEBUG("qtaguid: %s(%s): "
+			 "enable tracking. rfcnt=%d\n", __func__,
+			 entry->ifname,
+			 __this_cpu_read(*net_dev->pcpu_refcnt));
+	} else {
+		entry->active = false;
+		entry->net_dev = NULL;
+		IF_DEBUG("qtaguid: %s(%s): "
+			 "disable tracking. rfcnt=%d\n", __func__,
+			 entry->ifname,
+			 __this_cpu_read(*net_dev->pcpu_refcnt));
+
+	}
+}
+
+/* Caller must hold iface_stat_list_lock */
+static struct iface_stat *iface_alloc(struct net_device *net_dev)
+{
+	struct iface_stat *new_iface;
+	struct iface_stat_work *isw;
+
+	new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
+	if (new_iface == NULL) {
+		pr_err("qtaguid: iface_stat: create(%s): "
+		       "iface_stat alloc failed\n", net_dev->name);
+		return NULL;
+	}
+	new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC);
+	if (new_iface->ifname == NULL) {
+		pr_err("qtaguid: iface_stat: create(%s): "
+		       "ifname alloc failed\n", net_dev->name);
+		kfree(new_iface);
+		return NULL;
+	}
+	spin_lock_init(&new_iface->tag_stat_list_lock);
+	new_iface->tag_stat_tree = RB_ROOT;
+	_iface_stat_set_active(new_iface, net_dev, true);
+
+	/*
+	 * ipv6 notifier chains are atomic :(
+	 * No create_proc_read_entry() for you!
+	 */
+	isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
+	if (!isw) {
+		pr_err("qtaguid: iface_stat: create(%s): "
+		       "work alloc failed\n", new_iface->ifname);
+		_iface_stat_set_active(new_iface, net_dev, false);
+		kfree(new_iface->ifname);
+		kfree(new_iface);
+		return NULL;
+	}
+	isw->iface_entry = new_iface;
+	INIT_WORK(&isw->iface_work, iface_create_proc_worker);
+	schedule_work(&isw->iface_work);
+	list_add(&new_iface->list, &iface_stat_list);
+	return new_iface;
+}
+
+static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
+					       struct iface_stat *iface)
+{
+	struct rtnl_link_stats64 dev_stats, *stats;
+	bool stats_rewound;
+
+	stats = dev_get_stats(net_dev, &dev_stats);
+	/* No empty packets */
+	stats_rewound =
+		(stats->rx_bytes < iface->last_known[IFS_RX].bytes)
+		|| (stats->tx_bytes < iface->last_known[IFS_TX].bytes);
+
+	IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
+		 "bytes rx/tx=%llu/%llu "
+		 "active=%d last_known=%d "
+		 "stats_rewound=%d\n", __func__,
+		 net_dev ? net_dev->name : "?",
+		 iface, net_dev,
+		 stats->rx_bytes, stats->tx_bytes,
+		 iface->active, iface->last_known_valid, stats_rewound);
+
+	if (iface->active && iface->last_known_valid && stats_rewound) {
+		pr_warn_once("qtaguid: iface_stat: %s(%s): "
+			     "iface reset its stats unexpectedly\n", __func__,
+			     net_dev->name);
+
+		iface->totals_via_dev[IFS_TX].bytes +=
+			iface->last_known[IFS_TX].bytes;
+		iface->totals_via_dev[IFS_TX].packets +=
+			iface->last_known[IFS_TX].packets;
+		iface->totals_via_dev[IFS_RX].bytes +=
+			iface->last_known[IFS_RX].bytes;
+		iface->totals_via_dev[IFS_RX].packets +=
+			iface->last_known[IFS_RX].packets;
+		iface->last_known_valid = false;
+		IF_DEBUG("qtaguid: %s(%s): iface=%p "
+			 "used last known bytes rx/tx=%llu/%llu\n", __func__,
+			 iface->ifname, iface, iface->last_known[IFS_RX].bytes,
+			 iface->last_known[IFS_TX].bytes);
+	}
+}
+
+/*
+ * Create a new entry for tracking the specified interface.
+ * Do nothing if the entry already exists.
+ * Called when an interface is configured with a valid IP address.
+ */
+static void iface_stat_create(struct net_device *net_dev,
+			      struct in_ifaddr *ifa)
+{
+	struct in_device *in_dev = NULL;
+	const char *ifname;
+	struct iface_stat *entry;
+	__be32 ipaddr = 0;
+	struct iface_stat *new_iface;
+
+	IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
+		 net_dev ? net_dev->name : "?",
+		 ifa, net_dev);
+	if (!net_dev) {
+		pr_err("qtaguid: iface_stat: create(): no net dev\n");
+		return;
+	}
+
+	ifname = net_dev->name;
+	if (!ifa) {
+		in_dev = in_dev_get(net_dev);
+		if (!in_dev) {
+			pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
+			       ifname);
+			return;
+		}
+		IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
+			 ifname, in_dev);
+		for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
+			IF_DEBUG("qtaguid: iface_stat: create(%s): "
+				 "ifa=%p ifa_label=%s\n",
+				 ifname, ifa, ifa->ifa_label);
+			if (!strcmp(ifname, ifa->ifa_label))
+				break;
+		}
+	}
+
+	if (!ifa) {
+		IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
+			 ifname);
+		goto done_put;
+	}
+	ipaddr = ifa->ifa_local;
+
+	spin_lock_bh(&iface_stat_list_lock);
+	entry = get_iface_entry(ifname);
+	if (entry != NULL) {
+		IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
+			 ifname, entry);
+		iface_check_stats_reset_and_adjust(net_dev, entry);
+		_iface_stat_set_active(entry, net_dev, true);
+		IF_DEBUG("qtaguid: %s(%s): "
+			 "tracking now %d on ip=%pI4\n", __func__,
+			 entry->ifname, true, &ipaddr);
+		goto done_unlock_put;
+	}
+
+	new_iface = iface_alloc(net_dev);
+	IF_DEBUG("qtaguid: iface_stat: create(%s): done "
+		 "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
+done_unlock_put:
+	spin_unlock_bh(&iface_stat_list_lock);
+done_put:
+	if (in_dev)
+		in_dev_put(in_dev);
+}
+
+static void iface_stat_create_ipv6(struct net_device *net_dev,
+				   struct inet6_ifaddr *ifa)
+{
+	struct in_device *in_dev;
+	const char *ifname;
+	struct iface_stat *entry;
+	struct iface_stat *new_iface;
+	int addr_type;
+
+	IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
+		 ifa, net_dev, net_dev ? net_dev->name : "");
+	if (!net_dev) {
+		pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
+		return;
+	}
+	ifname = net_dev->name;
+
+	in_dev = in_dev_get(net_dev);
+	if (!in_dev) {
+		pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
+		       ifname);
+		return;
+	}
+
+	IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
+		 ifname, in_dev);
+
+	if (!ifa) {
+		IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
+			 ifname);
+		goto done_put;
+	}
+	addr_type = ipv6_addr_type(&ifa->addr);
+
+	spin_lock_bh(&iface_stat_list_lock);
+	entry = get_iface_entry(ifname);
+	if (entry != NULL) {
+		IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
+			 ifname, entry);
+		iface_check_stats_reset_and_adjust(net_dev, entry);
+		_iface_stat_set_active(entry, net_dev, true);
+		IF_DEBUG("qtaguid: %s(%s): "
+			 "tracking now %d on ip=%pI6c\n", __func__,
+			 entry->ifname, true, &ifa->addr);
+		goto done_unlock_put;
+	}
+
+	new_iface = iface_alloc(net_dev);
+	IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
+		 "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
+
+done_unlock_put:
+	spin_unlock_bh(&iface_stat_list_lock);
+done_put:
+	in_dev_put(in_dev);
+}
+
+static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
+{
+	MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
+	return sock_tag_tree_search(&sock_tag_tree, sk);
+}
+
+static struct sock_tag *get_sock_stat(const struct sock *sk)
+{
+	struct sock_tag *sock_tag_entry;
+	MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
+	if (!sk)
+		return NULL;
+	spin_lock_bh(&sock_tag_list_lock);
+	sock_tag_entry = get_sock_stat_nl(sk);
+	spin_unlock_bh(&sock_tag_list_lock);
+	return sock_tag_entry;
+}
+
+static int ipx_proto(const struct sk_buff *skb,
+		     struct xt_action_param *par)
+{
+	int thoff = 0, tproto;
+
+	switch (par->family) {
+	case NFPROTO_IPV6:
+		tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
+		if (tproto < 0)
+			MT_DEBUG("%s(): transport header not found in ipv6"
+				 " skb=%p\n", __func__, skb);
+		break;
+	case NFPROTO_IPV4:
+		tproto = ip_hdr(skb)->protocol;
+		break;
+	default:
+		tproto = IPPROTO_RAW;
+	}
+	return tproto;
+}
+
+static void
+data_counters_update(struct data_counters *dc, int set,
+		     enum ifs_tx_rx direction, int proto, int bytes)
+{
+	switch (proto) {
+	case IPPROTO_TCP:
+		dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
+		break;
+	case IPPROTO_UDP:
+		dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
+		break;
+	case IPPROTO_IP:
+	default:
+		dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
+				    1);
+		break;
+	}
+}
+
+/*
+ * Update stats for the specified interface. Do nothing if the entry
+ * does not exist (when a device was never configured with an IP address).
+ * Called when an device is being unregistered.
+ */
+static void iface_stat_update(struct net_device *net_dev, bool stash_only)
+{
+	struct rtnl_link_stats64 dev_stats, *stats;
+	struct iface_stat *entry;
+
+	stats = dev_get_stats(net_dev, &dev_stats);
+	spin_lock_bh(&iface_stat_list_lock);
+	entry = get_iface_entry(net_dev->name);
+	if (entry == NULL) {
+		IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
+			 net_dev->name);
+		spin_unlock_bh(&iface_stat_list_lock);
+		return;
+	}
+
+	IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
+		 net_dev->name, entry);
+	if (!entry->active) {
+		IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__,
+			 net_dev->name);
+		spin_unlock_bh(&iface_stat_list_lock);
+		return;
+	}
+
+	if (stash_only) {
+		entry->last_known[IFS_TX].bytes = stats->tx_bytes;
+		entry->last_known[IFS_TX].packets = stats->tx_packets;
+		entry->last_known[IFS_RX].bytes = stats->rx_bytes;
+		entry->last_known[IFS_RX].packets = stats->rx_packets;
+		entry->last_known_valid = true;
+		IF_DEBUG("qtaguid: %s(%s): "
+			 "dev stats stashed rx/tx=%llu/%llu\n", __func__,
+			 net_dev->name, stats->rx_bytes, stats->tx_bytes);
+		spin_unlock_bh(&iface_stat_list_lock);
+		return;
+	}
+	entry->totals_via_dev[IFS_TX].bytes += stats->tx_bytes;
+	entry->totals_via_dev[IFS_TX].packets += stats->tx_packets;
+	entry->totals_via_dev[IFS_RX].bytes += stats->rx_bytes;
+	entry->totals_via_dev[IFS_RX].packets += stats->rx_packets;
+	/* We don't need the last_known[] anymore */
+	entry->last_known_valid = false;
+	_iface_stat_set_active(entry, net_dev, false);
+	IF_DEBUG("qtaguid: %s(%s): "
+		 "disable tracking. rx/tx=%llu/%llu\n", __func__,
+		 net_dev->name, stats->rx_bytes, stats->tx_bytes);
+	spin_unlock_bh(&iface_stat_list_lock);
+}
+
+/*
+ * Update stats for the specified interface from the skb.
+ * Do nothing if the entry
+ * does not exist (when a device was never configured with an IP address).
+ * Called on each sk.
+ */
+static void iface_stat_update_from_skb(const struct sk_buff *skb,
+				       struct xt_action_param *par)
+{
+	struct iface_stat *entry;
+	const struct net_device *el_dev;
+	enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX;
+	int bytes = skb->len;
+	int proto;
+
+	if (!skb->dev) {
+		MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
+		el_dev = par->in ? : par->out;
+	} else {
+		const struct net_device *other_dev;
+		el_dev = skb->dev;
+		other_dev = par->in ? : par->out;
+		if (el_dev != other_dev) {
+			MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
+				 "par->(in/out)=%p %s\n",
+				 par->hooknum, el_dev, el_dev->name, other_dev,
+				 other_dev->name);
+		}
+	}
+
+	if (unlikely(!el_dev)) {
+		pr_err_ratelimited("qtaguid[%d]: %s(): no par->in/out?!!\n",
+				   par->hooknum, __func__);
+		BUG();
+	} else {
+		proto = ipx_proto(skb, par);
+		MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
+			 par->hooknum, el_dev->name, el_dev->type,
+			 par->family, proto);
+	}
+
+	spin_lock_bh(&iface_stat_list_lock);
+	entry = get_iface_entry(el_dev->name);
+	if (entry == NULL) {
+		IF_DEBUG("qtaguid: iface_stat: %s(%s): not tracked\n",
+			 __func__, el_dev->name);
+		spin_unlock_bh(&iface_stat_list_lock);
+		return;
+	}
+
+	IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
+		 el_dev->name, entry);
+
+	data_counters_update(&entry->totals_via_skb, 0, direction, proto,
+			     bytes);
+	spin_unlock_bh(&iface_stat_list_lock);
+}
+
+static void tag_stat_update(struct tag_stat *tag_entry,
+			enum ifs_tx_rx direction, int proto, int bytes)
+{
+	int active_set;
+	active_set = get_active_counter_set(tag_entry->tn.tag);
+	MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
+		 "dir=%d proto=%d bytes=%d)\n",
+		 tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
+		 active_set, direction, proto, bytes);
+	data_counters_update(&tag_entry->counters, active_set, direction,
+			     proto, bytes);
+	if (tag_entry->parent_counters)
+		data_counters_update(tag_entry->parent_counters, active_set,
+				     direction, proto, bytes);
+}
+
+/*
+ * Create a new entry for tracking the specified {acct_tag,uid_tag} within
+ * the interface.
+ * iface_entry->tag_stat_list_lock should be held.
+ */
+static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
+					   tag_t tag)
+{
+	struct tag_stat *new_tag_stat_entry = NULL;
+	IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
+		 " (uid=%u)\n", __func__,
+		 iface_entry, tag, get_uid_from_tag(tag));
+	new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
+	if (!new_tag_stat_entry) {
+		pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
+		goto done;
+	}
+	new_tag_stat_entry->tn.tag = tag;
+	tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
+done:
+	return new_tag_stat_entry;
+}
+
+static void if_tag_stat_update(const char *ifname, uid_t uid,
+			       const struct sock *sk, enum ifs_tx_rx direction,
+			       int proto, int bytes)
+{
+	struct tag_stat *tag_stat_entry;
+	tag_t tag, acct_tag;
+	tag_t uid_tag;
+	struct data_counters *uid_tag_counters;
+	struct sock_tag *sock_tag_entry;
+	struct iface_stat *iface_entry;
+	struct tag_stat *new_tag_stat = NULL;
+	MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
+		"uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
+		 ifname, uid, sk, direction, proto, bytes);
+
+	spin_lock_bh(&iface_stat_list_lock);
+	iface_entry = get_iface_entry(ifname);
+	if (!iface_entry) {
+		pr_err_ratelimited("qtaguid: iface_stat: stat_update() "
+				   "%s not found\n", ifname);
+		spin_unlock_bh(&iface_stat_list_lock);
+		return;
+	}
+	/* It is ok to process data when an iface_entry is inactive */
+
+	MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
+		 ifname, iface_entry);
+
+	/*
+	 * Look for a tagged sock.
+	 * It will have an acct_uid.
+	 */
+	sock_tag_entry = get_sock_stat(sk);
+	if (sock_tag_entry) {
+		tag = sock_tag_entry->tag;
+		acct_tag = get_atag_from_tag(tag);
+		uid_tag = get_utag_from_tag(tag);
+	} else {
+		acct_tag = make_atag_from_value(0);
+		tag = combine_atag_with_uid(acct_tag, uid);
+		uid_tag = make_tag_from_uid(uid);
+	}
+	MT_DEBUG("qtaguid: iface_stat: stat_update(): "
+		 " looking for tag=0x%llx (uid=%u) in ife=%p\n",
+		 tag, get_uid_from_tag(tag), iface_entry);
+	/* Loop over tag list under this interface for {acct_tag,uid_tag} */
+	spin_lock_bh(&iface_entry->tag_stat_list_lock);
+
+	tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
+					      tag);
+	if (tag_stat_entry) {
+		/*
+		 * Updating the {acct_tag, uid_tag} entry handles both stats:
+		 * {0, uid_tag} will also get updated.
+		 */
+		tag_stat_update(tag_stat_entry, direction, proto, bytes);
+		goto unlock;
+	}
+
+	/* Loop over tag list under this interface for {0,uid_tag} */
+	tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
+					      uid_tag);
+	if (!tag_stat_entry) {
+		/* Here: the base uid_tag did not exist */
+		/*
+		 * No parent counters. So
+		 *  - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
+		 */
+		new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
+		if (!new_tag_stat)
+			goto unlock;
+		uid_tag_counters = &new_tag_stat->counters;
+	} else {
+		uid_tag_counters = &tag_stat_entry->counters;
+	}
+
+	if (acct_tag) {
+		/* Create the child {acct_tag, uid_tag} and hook up parent. */
+		new_tag_stat = create_if_tag_stat(iface_entry, tag);
+		if (!new_tag_stat)
+			goto unlock;
+		new_tag_stat->parent_counters = uid_tag_counters;
+	} else {
+		/*
+		 * For new_tag_stat to be still NULL here would require:
+		 *  {0, uid_tag} exists
+		 *  and {acct_tag, uid_tag} doesn't exist
+		 *  AND acct_tag == 0.
+		 * Impossible. This reassures us that new_tag_stat
+		 * below will always be assigned.
+		 */
+		BUG_ON(!new_tag_stat);
+	}
+	tag_stat_update(new_tag_stat, direction, proto, bytes);
+unlock:
+	spin_unlock_bh(&iface_entry->tag_stat_list_lock);
+	spin_unlock_bh(&iface_stat_list_lock);
+}
+
+static int iface_netdev_event_handler(struct notifier_block *nb,
+				      unsigned long event, void *ptr) {
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+	if (unlikely(module_passive))
+		return NOTIFY_DONE;
+
+	IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
+		 "ev=0x%lx/%s netdev=%p->name=%s\n",
+		 event, netdev_evt_str(event), dev, dev ? dev->name : "");
+
+	switch (event) {
+	case NETDEV_UP:
+		iface_stat_create(dev, NULL);
+		atomic64_inc(&qtu_events.iface_events);
+		break;
+	case NETDEV_DOWN:
+	case NETDEV_UNREGISTER:
+		iface_stat_update(dev, event == NETDEV_DOWN);
+		atomic64_inc(&qtu_events.iface_events);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static int iface_inet6addr_event_handler(struct notifier_block *nb,
+					 unsigned long event, void *ptr)
+{
+	struct inet6_ifaddr *ifa = ptr;
+	struct net_device *dev;
+
+	if (unlikely(module_passive))
+		return NOTIFY_DONE;
+
+	IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
+		 "ev=0x%lx/%s ifa=%p\n",
+		 event, netdev_evt_str(event), ifa);
+
+	switch (event) {
+	case NETDEV_UP:
+		BUG_ON(!ifa || !ifa->idev);
+		dev = (struct net_device *)ifa->idev->dev;
+		iface_stat_create_ipv6(dev, ifa);
+		atomic64_inc(&qtu_events.iface_events);
+		break;
+	case NETDEV_DOWN:
+	case NETDEV_UNREGISTER:
+		BUG_ON(!ifa || !ifa->idev);
+		dev = (struct net_device *)ifa->idev->dev;
+		iface_stat_update(dev, event == NETDEV_DOWN);
+		atomic64_inc(&qtu_events.iface_events);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static int iface_inetaddr_event_handler(struct notifier_block *nb,
+					unsigned long event, void *ptr)
+{
+	struct in_ifaddr *ifa = ptr;
+	struct net_device *dev;
+
+	if (unlikely(module_passive))
+		return NOTIFY_DONE;
+
+	IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
+		 "ev=0x%lx/%s ifa=%p\n",
+		 event, netdev_evt_str(event), ifa);
+
+	switch (event) {
+	case NETDEV_UP:
+		BUG_ON(!ifa || !ifa->ifa_dev);
+		dev = ifa->ifa_dev->dev;
+		iface_stat_create(dev, ifa);
+		atomic64_inc(&qtu_events.iface_events);
+		break;
+	case NETDEV_DOWN:
+	case NETDEV_UNREGISTER:
+		BUG_ON(!ifa || !ifa->ifa_dev);
+		dev = ifa->ifa_dev->dev;
+		iface_stat_update(dev, event == NETDEV_DOWN);
+		atomic64_inc(&qtu_events.iface_events);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block iface_netdev_notifier_blk = {
+	.notifier_call = iface_netdev_event_handler,
+};
+
+static struct notifier_block iface_inetaddr_notifier_blk = {
+	.notifier_call = iface_inetaddr_event_handler,
+};
+
+static struct notifier_block iface_inet6addr_notifier_blk = {
+	.notifier_call = iface_inet6addr_event_handler,
+};
+
+static const struct seq_operations iface_stat_fmt_proc_seq_ops = {
+	.start	= iface_stat_fmt_proc_start,
+	.next	= iface_stat_fmt_proc_next,
+	.stop	= iface_stat_fmt_proc_stop,
+	.show	= iface_stat_fmt_proc_show,
+};
+
+static int proc_iface_stat_fmt_open(struct inode *inode, struct file *file)
+{
+	struct proc_iface_stat_fmt_info *s;
+
+	s = __seq_open_private(file, &iface_stat_fmt_proc_seq_ops,
+			sizeof(struct proc_iface_stat_fmt_info));
+	if (!s)
+		return -ENOMEM;
+
+	s->fmt = (uintptr_t)PDE_DATA(inode);
+	return 0;
+}
+
+static const struct file_operations proc_iface_stat_fmt_fops = {
+	.open		= proc_iface_stat_fmt_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_private,
+};
+
+static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
+{
+	int err;
+
+	iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
+	if (!iface_stat_procdir) {
+		pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
+		err = -1;
+		goto err;
+	}
+
+	iface_stat_all_procfile = proc_create_data(iface_stat_all_procfilename,
+						   proc_iface_perms,
+						   parent_procdir,
+						   &proc_iface_stat_fmt_fops,
+						   (void *)1 /* fmt1 */);
+	if (!iface_stat_all_procfile) {
+		pr_err("qtaguid: iface_stat: init "
+		       " failed to create stat_old proc entry\n");
+		err = -1;
+		goto err_zap_entry;
+	}
+
+	iface_stat_fmt_procfile = proc_create_data(iface_stat_fmt_procfilename,
+						   proc_iface_perms,
+						   parent_procdir,
+						   &proc_iface_stat_fmt_fops,
+						   (void *)2 /* fmt2 */);
+	if (!iface_stat_fmt_procfile) {
+		pr_err("qtaguid: iface_stat: init "
+		       " failed to create stat_all proc entry\n");
+		err = -1;
+		goto err_zap_all_stats_entry;
+	}
+
+
+	err = register_netdevice_notifier(&iface_netdev_notifier_blk);
+	if (err) {
+		pr_err("qtaguid: iface_stat: init "
+		       "failed to register dev event handler\n");
+		goto err_zap_all_stats_entries;
+	}
+	err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
+	if (err) {
+		pr_err("qtaguid: iface_stat: init "
+		       "failed to register ipv4 dev event handler\n");
+		goto err_unreg_nd;
+	}
+
+	err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
+	if (err) {
+		pr_err("qtaguid: iface_stat: init "
+		       "failed to register ipv6 dev event handler\n");
+		goto err_unreg_ip4_addr;
+	}
+	return 0;
+
+err_unreg_ip4_addr:
+	unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
+err_unreg_nd:
+	unregister_netdevice_notifier(&iface_netdev_notifier_blk);
+err_zap_all_stats_entries:
+	remove_proc_entry(iface_stat_fmt_procfilename, parent_procdir);
+err_zap_all_stats_entry:
+	remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
+err_zap_entry:
+	remove_proc_entry(iface_stat_procdirname, parent_procdir);
+err:
+	return err;
+}
+
+static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
+				    struct xt_action_param *par)
+{
+	struct sock *sk;
+	unsigned int hook_mask = (1 << par->hooknum);
+
+	MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
+		 par->hooknum, par->family);
+
+	/*
+	 * Let's not abuse the the xt_socket_get*_sk(), or else it will
+	 * return garbage SKs.
+	 */
+	if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
+		return NULL;
+
+	switch (par->family) {
+	case NFPROTO_IPV6:
+		sk = xt_socket_lookup_slow_v6(dev_net(skb->dev), skb, par->in);
+		break;
+	case NFPROTO_IPV4:
+		sk = xt_socket_lookup_slow_v4(dev_net(skb->dev), skb, par->in);
+		break;
+	default:
+		return NULL;
+	}
+
+	if (sk) {
+		MT_DEBUG("qtaguid: %p->sk_proto=%u "
+			 "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
+	}
+	return sk;
+}
+
+static void account_for_uid(const struct sk_buff *skb,
+			    const struct sock *alternate_sk, uid_t uid,
+			    struct xt_action_param *par)
+{
+	const struct net_device *el_dev;
+
+	if (!skb->dev) {
+		MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
+		el_dev = par->in ? : par->out;
+	} else {
+		const struct net_device *other_dev;
+		el_dev = skb->dev;
+		other_dev = par->in ? : par->out;
+		if (el_dev != other_dev) {
+			MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
+				"par->(in/out)=%p %s\n",
+				par->hooknum, el_dev, el_dev->name, other_dev,
+				other_dev->name);
+		}
+	}
+
+	if (unlikely(!el_dev)) {
+		pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
+	} else {
+		int proto = ipx_proto(skb, par);
+		MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
+			 par->hooknum, el_dev->name, el_dev->type,
+			 par->family, proto);
+
+		if_tag_stat_update(el_dev->name, uid,
+				skb->sk ? skb->sk : alternate_sk,
+				par->in ? IFS_RX : IFS_TX,
+				proto, skb->len);
+	}
+}
+
+static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_qtaguid_match_info *info = par->matchinfo;
+	const struct file *filp;
+	bool got_sock = false;
+	struct sock *sk;
+	kuid_t sock_uid;
+	bool res;
+	bool set_sk_callback_lock = false;
+
+	if (unlikely(module_passive))
+		return (info->match ^ info->invert) == 0;
+
+	MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
+		 par->hooknum, skb, par->in, par->out, par->family);
+
+	atomic64_inc(&qtu_events.match_calls);
+	if (skb == NULL) {
+		res = (info->match ^ info->invert) == 0;
+		goto ret_res;
+	}
+
+	switch (par->hooknum) {
+	case NF_INET_PRE_ROUTING:
+	case NF_INET_POST_ROUTING:
+		atomic64_inc(&qtu_events.match_calls_prepost);
+		iface_stat_update_from_skb(skb, par);
+		/*
+		 * We are done in pre/post. The skb will get processed
+		 * further alter.
+		 */
+		res = (info->match ^ info->invert);
+		goto ret_res;
+		break;
+	/* default: Fall through and do UID releated work */
+	}
+
+	sk = skb_to_full_sk(skb);
+	/*
+	 * When in TCP_TIME_WAIT the sk is not a "struct sock" but
+	 * "struct inet_timewait_sock" which is missing fields.
+	 * So we ignore it.
+	 */
+	if (sk && sk->sk_state == TCP_TIME_WAIT)
+		sk = NULL;
+	if (sk == NULL) {
+		/*
+		 * A missing sk->sk_socket happens when packets are in-flight
+		 * and the matching socket is already closed and gone.
+		 */
+		sk = qtaguid_find_sk(skb, par);
+		/*
+		 * TCP_NEW_SYN_RECV are not "struct sock" but "struct request_sock"
+		 * where we can get a pointer to a full socket to retrieve uid/gid.
+		 * When in TCP_TIME_WAIT, sk is a struct inet_timewait_sock
+		 * which is missing fields and does not contain any reference
+		 * to a full socket, so just ignore the socket.
+		 */
+		if (sk && sk->sk_state == TCP_NEW_SYN_RECV) {
+			sock_gen_put(sk);
+			sk = sk_to_full_sk(sk);
+		} else if (sk && (!sk_fullsock(sk) || sk->sk_state == TCP_TIME_WAIT)) {
+			sock_gen_put(sk);
+			sk = NULL;
+		} else {
+			/*
+			 * If we got the socket from the find_sk(), we will need to put
+			 * it back, as nf_tproxy_get_sock_v4() got it.
+			 */
+			got_sock = sk;
+		}
+		if (sk)
+			atomic64_inc(&qtu_events.match_found_sk_in_ct);
+		else
+			atomic64_inc(&qtu_events.match_found_no_sk_in_ct);
+	} else {
+		atomic64_inc(&qtu_events.match_found_sk);
+	}
+	MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d fam=%d proto=%d\n",
+		 par->hooknum, sk, got_sock, par->family, ipx_proto(skb, par));
+
+
+	if (sk == NULL) {
+		/*
+		 * Here, the qtaguid_find_sk() using connection tracking
+		 * couldn't find the owner, so for now we just count them
+		 * against the system.
+		 */
+		/*
+		 * TODO: unhack how to force just accounting.
+		 * For now we only do iface stats when the uid-owner is not
+		 * requested.
+		 */
+		if (!(info->match & XT_QTAGUID_UID))
+			account_for_uid(skb, sk, 0, par);
+		MT_DEBUG("qtaguid[%d]: leaving (sk=NULL)\n", par->hooknum);
+		res = (info->match ^ info->invert) == 0;
+		atomic64_inc(&qtu_events.match_no_sk);
+		goto put_sock_ret_res;
+	} else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
+		res = false;
+		goto put_sock_ret_res;
+	}
+	sock_uid = sk->sk_uid;
+	/*
+	 * TODO: unhack how to force just accounting.
+	 * For now we only do iface stats when the uid-owner is not requested
+	 */
+	if (!(info->match & XT_QTAGUID_UID))
+		account_for_uid(skb, sk, from_kuid(&init_user_ns, sock_uid), par);
+
+	/*
+	 * The following two tests fail the match when:
+	 *    id not in range AND no inverted condition requested
+	 * or id     in range AND    inverted condition requested
+	 * Thus (!a && b) || (a && !b) == a ^ b
+	 */
+	if (info->match & XT_QTAGUID_UID) {
+		kuid_t uid_min = make_kuid(&init_user_ns, info->uid_min);
+		kuid_t uid_max = make_kuid(&init_user_ns, info->uid_max);
+
+		if ((uid_gte(sk->sk_uid, uid_min) &&
+		     uid_lte(sk->sk_uid, uid_max)) ^
+		    !(info->invert & XT_QTAGUID_UID)) {
+			MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
+				 par->hooknum);
+			res = false;
+			goto put_sock_ret_res;
+		}
+	}
+	if (info->match & XT_QTAGUID_GID) {
+		kgid_t gid_min = make_kgid(&init_user_ns, info->gid_min);
+		kgid_t gid_max = make_kgid(&init_user_ns, info->gid_max);
+		set_sk_callback_lock = true;
+		read_lock_bh(&sk->sk_callback_lock);
+		MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
+			par->hooknum, sk, sk->sk_socket,
+			sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
+		filp = sk->sk_socket ? sk->sk_socket->file : NULL;
+		if (!filp) {
+			res = ((info->match ^ info->invert) & XT_QTAGUID_GID) == 0;
+			atomic64_inc(&qtu_events.match_no_sk_gid);
+			goto put_sock_ret_res;
+		}
+		MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
+			par->hooknum, filp ? from_kuid(&init_user_ns, filp->f_cred->fsuid) : -1);
+		if ((gid_gte(filp->f_cred->fsgid, gid_min) &&
+				gid_lte(filp->f_cred->fsgid, gid_max)) ^
+			!(info->invert & XT_QTAGUID_GID)) {
+			MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
+				par->hooknum);
+			res = false;
+			goto put_sock_ret_res;
+		}
+	}
+	MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
+	res = true;
+
+put_sock_ret_res:
+	if (got_sock)
+		sock_gen_put(sk);
+	if (set_sk_callback_lock)
+		read_unlock_bh(&sk->sk_callback_lock);
+ret_res:
+	MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
+	return res;
+}
+
+#ifdef DDEBUG
+/*
+ * This function is not in xt_qtaguid_print.c because of locks visibility.
+ * The lock of sock_tag_list must be aquired before calling this function
+ */
+static void prdebug_full_state_locked(int indent_level, const char *fmt, ...)
+{
+	va_list args;
+	char *fmt_buff;
+	char *buff;
+
+	if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+		return;
+
+	fmt_buff = kasprintf(GFP_ATOMIC,
+			     "qtaguid: %s(): %s {\n", __func__, fmt);
+	BUG_ON(!fmt_buff);
+	va_start(args, fmt);
+	buff = kvasprintf(GFP_ATOMIC,
+			  fmt_buff, args);
+	BUG_ON(!buff);
+	pr_debug("%s", buff);
+	kfree(fmt_buff);
+	kfree(buff);
+	va_end(args);
+
+	prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
+
+	spin_lock_bh(&uid_tag_data_tree_lock);
+	prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
+	prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
+	spin_unlock_bh(&uid_tag_data_tree_lock);
+
+	spin_lock_bh(&iface_stat_list_lock);
+	prdebug_iface_stat_list(indent_level, &iface_stat_list);
+	spin_unlock_bh(&iface_stat_list_lock);
+
+	pr_debug("qtaguid: %s(): }\n", __func__);
+}
+#else
+static void prdebug_full_state_locked(int indent_level, const char *fmt, ...) {}
+#endif
+
+struct proc_ctrl_print_info {
+	struct sock *sk; /* socket found by reading to sk_pos */
+	loff_t sk_pos;
+};
+
+static void *qtaguid_ctrl_proc_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct proc_ctrl_print_info *pcpi = m->private;
+	struct sock_tag *sock_tag_entry = v;
+	struct rb_node *node;
+
+	(*pos)++;
+
+	if (!v || v  == SEQ_START_TOKEN)
+		return NULL;
+
+	node = rb_next(&sock_tag_entry->sock_node);
+	if (!node) {
+		pcpi->sk = NULL;
+		sock_tag_entry = SEQ_START_TOKEN;
+	} else {
+		sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
+		pcpi->sk = sock_tag_entry->sk;
+	}
+	pcpi->sk_pos = *pos;
+	return sock_tag_entry;
+}
+
+static void *qtaguid_ctrl_proc_start(struct seq_file *m, loff_t *pos)
+{
+	struct proc_ctrl_print_info *pcpi = m->private;
+	struct sock_tag *sock_tag_entry;
+	struct rb_node *node;
+
+	spin_lock_bh(&sock_tag_list_lock);
+
+	if (unlikely(module_passive))
+		return NULL;
+
+	if (*pos == 0) {
+		pcpi->sk_pos = 0;
+		node = rb_first(&sock_tag_tree);
+		if (!node) {
+			pcpi->sk = NULL;
+			return SEQ_START_TOKEN;
+		}
+		sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
+		pcpi->sk = sock_tag_entry->sk;
+	} else {
+		sock_tag_entry = (pcpi->sk ? get_sock_stat_nl(pcpi->sk) :
+						NULL) ?: SEQ_START_TOKEN;
+		if (*pos != pcpi->sk_pos) {
+			/* seq_read skipped a next call */
+			*pos = pcpi->sk_pos;
+			return qtaguid_ctrl_proc_next(m, sock_tag_entry, pos);
+		}
+	}
+	return sock_tag_entry;
+}
+
+static void qtaguid_ctrl_proc_stop(struct seq_file *m, void *v)
+{
+	spin_unlock_bh(&sock_tag_list_lock);
+}
+
+/*
+ * Procfs reader to get all active socket tags using style "1)" as described in
+ * fs/proc/generic.c
+ */
+static int qtaguid_ctrl_proc_show(struct seq_file *m, void *v)
+{
+	struct sock_tag *sock_tag_entry = v;
+	uid_t uid;
+
+	CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u\n",
+		 current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
+
+	if (sock_tag_entry != SEQ_START_TOKEN) {
+		int sk_ref_count;
+		uid = get_uid_from_tag(sock_tag_entry->tag);
+		CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
+			 "pid=%u\n",
+			 sock_tag_entry->sk,
+			 sock_tag_entry->tag,
+			 uid,
+			 sock_tag_entry->pid
+			);
+		sk_ref_count = atomic_read(
+			&sock_tag_entry->sk->sk_refcnt);
+		seq_printf(m, "sock=%pK tag=0x%llx (uid=%u) pid=%u "
+			   "f_count=%d\n",
+			   sock_tag_entry->sk,
+			   sock_tag_entry->tag, uid,
+			   sock_tag_entry->pid, sk_ref_count);
+	} else {
+		seq_printf(m, "events: sockets_tagged=%llu "
+			   "sockets_untagged=%llu "
+			   "counter_set_changes=%llu "
+			   "delete_cmds=%llu "
+			   "iface_events=%llu "
+			   "match_calls=%llu "
+			   "match_calls_prepost=%llu "
+			   "match_found_sk=%llu "
+			   "match_found_sk_in_ct=%llu "
+			   "match_found_no_sk_in_ct=%llu "
+			   "match_no_sk=%llu "
+			   "match_no_sk_gid=%llu\n",
+			   (u64)atomic64_read(&qtu_events.sockets_tagged),
+			   (u64)atomic64_read(&qtu_events.sockets_untagged),
+			   (u64)atomic64_read(&qtu_events.counter_set_changes),
+			   (u64)atomic64_read(&qtu_events.delete_cmds),
+			   (u64)atomic64_read(&qtu_events.iface_events),
+			   (u64)atomic64_read(&qtu_events.match_calls),
+			   (u64)atomic64_read(&qtu_events.match_calls_prepost),
+			   (u64)atomic64_read(&qtu_events.match_found_sk),
+			   (u64)atomic64_read(&qtu_events.match_found_sk_in_ct),
+			   (u64)atomic64_read(&qtu_events.match_found_no_sk_in_ct),
+			   (u64)atomic64_read(&qtu_events.match_no_sk),
+			   (u64)atomic64_read(&qtu_events.match_no_sk_gid));
+
+		/* Count the following as part of the last item_index. No need
+		 * to lock the sock_tag_list here since it is already locked when
+		 * starting the seq_file operation
+		 */
+		prdebug_full_state_locked(0, "proc ctrl");
+	}
+
+	return 0;
+}
+
+/*
+ * Delete socket tags, and stat tags associated with a given
+ * accouting tag and uid.
+ */
+static int ctrl_cmd_delete(const char *input)
+{
+	char cmd;
+	int uid_int;
+	kuid_t uid;
+	uid_t entry_uid;
+	tag_t acct_tag;
+	tag_t tag;
+	int res, argc;
+	struct iface_stat *iface_entry;
+	struct rb_node *node;
+	struct sock_tag *st_entry;
+	struct rb_root st_to_free_tree = RB_ROOT;
+	struct tag_stat *ts_entry;
+	struct tag_counter_set *tcs_entry;
+	struct tag_ref *tr_entry;
+	struct uid_tag_data *utd_entry;
+
+	argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid_int);
+	uid = make_kuid(&init_user_ns, uid_int);
+	CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
+		 "user_tag=0x%llx uid=%u\n", input, argc, cmd,
+		 acct_tag, uid_int);
+	if (argc < 2) {
+		res = -EINVAL;
+		goto err;
+	}
+	if (!valid_atag(acct_tag)) {
+		pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
+		res = -EINVAL;
+		goto err;
+	}
+	if (argc < 3) {
+		uid = current_fsuid();
+		uid_int = from_kuid(&init_user_ns, uid);
+	} else if (!can_impersonate_uid(uid)) {
+		pr_info("qtaguid: ctrl_delete(%s): "
+			"insufficient priv from pid=%u tgid=%u uid=%u\n",
+			input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
+		res = -EPERM;
+		goto err;
+	}
+
+	tag = combine_atag_with_uid(acct_tag, uid_int);
+	CT_DEBUG("qtaguid: ctrl_delete(%s): "
+		 "looking for tag=0x%llx (uid=%u)\n",
+		 input, tag, uid_int);
+
+	/* Delete socket tags */
+	spin_lock_bh(&sock_tag_list_lock);
+	spin_lock_bh(&uid_tag_data_tree_lock);
+	node = rb_first(&sock_tag_tree);
+	while (node) {
+		st_entry = rb_entry(node, struct sock_tag, sock_node);
+		entry_uid = get_uid_from_tag(st_entry->tag);
+		node = rb_next(node);
+		if (entry_uid != uid_int)
+			continue;
+
+		CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
+			 input, st_entry->tag, entry_uid);
+
+		if (!acct_tag || st_entry->tag == tag) {
+			rb_erase(&st_entry->sock_node, &sock_tag_tree);
+			/* Can't sockfd_put() within spinlock, do it later. */
+			sock_tag_tree_insert(st_entry, &st_to_free_tree);
+			tr_entry = lookup_tag_ref(st_entry->tag, NULL);
+			BUG_ON(tr_entry->num_sock_tags <= 0);
+			tr_entry->num_sock_tags--;
+			/*
+			 * TODO: remove if, and start failing.
+			 * This is a hack to work around the fact that in some
+			 * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
+			 * and are trying to work around apps
+			 * that didn't open the /dev/xt_qtaguid.
+			 */
+			if (st_entry->list.next && st_entry->list.prev)
+				list_del(&st_entry->list);
+		}
+	}
+	spin_unlock_bh(&uid_tag_data_tree_lock);
+	spin_unlock_bh(&sock_tag_list_lock);
+
+	sock_tag_tree_erase(&st_to_free_tree);
+
+	/* Delete tag counter-sets */
+	spin_lock_bh(&tag_counter_set_list_lock);
+	/* Counter sets are only on the uid tag, not full tag */
+	tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
+	if (tcs_entry) {
+		CT_DEBUG("qtaguid: ctrl_delete(%s): "
+			 "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
+			 input,
+			 tcs_entry->tn.tag,
+			 get_uid_from_tag(tcs_entry->tn.tag),
+			 tcs_entry->active_set);
+		rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
+		kfree(tcs_entry);
+	}
+	spin_unlock_bh(&tag_counter_set_list_lock);
+
+	/*
+	 * If acct_tag is 0, then all entries belonging to uid are
+	 * erased.
+	 */
+	spin_lock_bh(&iface_stat_list_lock);
+	list_for_each_entry(iface_entry, &iface_stat_list, list) {
+		spin_lock_bh(&iface_entry->tag_stat_list_lock);
+		node = rb_first(&iface_entry->tag_stat_tree);
+		while (node) {
+			ts_entry = rb_entry(node, struct tag_stat, tn.node);
+			entry_uid = get_uid_from_tag(ts_entry->tn.tag);
+			node = rb_next(node);
+
+			CT_DEBUG("qtaguid: ctrl_delete(%s): "
+				 "ts tag=0x%llx (uid=%u)\n",
+				 input, ts_entry->tn.tag, entry_uid);
+
+			if (entry_uid != uid_int)
+				continue;
+			if (!acct_tag || ts_entry->tn.tag == tag) {
+				CT_DEBUG("qtaguid: ctrl_delete(%s): "
+					 "erase ts: %s 0x%llx %u\n",
+					 input, iface_entry->ifname,
+					 get_atag_from_tag(ts_entry->tn.tag),
+					 entry_uid);
+				rb_erase(&ts_entry->tn.node,
+					 &iface_entry->tag_stat_tree);
+				kfree(ts_entry);
+			}
+		}
+		spin_unlock_bh(&iface_entry->tag_stat_list_lock);
+	}
+	spin_unlock_bh(&iface_stat_list_lock);
+
+	/* Cleanup the uid_tag_data */
+	spin_lock_bh(&uid_tag_data_tree_lock);
+	node = rb_first(&uid_tag_data_tree);
+	while (node) {
+		utd_entry = rb_entry(node, struct uid_tag_data, node);
+		entry_uid = utd_entry->uid;
+		node = rb_next(node);
+
+		CT_DEBUG("qtaguid: ctrl_delete(%s): "
+			 "utd uid=%u\n",
+			 input, entry_uid);
+
+		if (entry_uid != uid_int)
+			continue;
+		/*
+		 * Go over the tag_refs, and those that don't have
+		 * sock_tags using them are freed.
+		 */
+		put_tag_ref_tree(tag, utd_entry);
+		put_utd_entry(utd_entry);
+	}
+	spin_unlock_bh(&uid_tag_data_tree_lock);
+
+	atomic64_inc(&qtu_events.delete_cmds);
+	res = 0;
+
+err:
+	return res;
+}
+
+static int ctrl_cmd_counter_set(const char *input)
+{
+	char cmd;
+	uid_t uid = 0;
+	tag_t tag;
+	int res, argc;
+	struct tag_counter_set *tcs;
+	int counter_set;
+
+	argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
+	CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
+		 "set=%d uid=%u\n", input, argc, cmd,
+		 counter_set, uid);
+	if (argc != 3) {
+		res = -EINVAL;
+		goto err;
+	}
+	if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
+		pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
+			input);
+		res = -EINVAL;
+		goto err;
+	}
+	if (!can_manipulate_uids()) {
+		pr_info("qtaguid: ctrl_counterset(%s): "
+			"insufficient priv from pid=%u tgid=%u uid=%u\n",
+			input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
+		res = -EPERM;
+		goto err;
+	}
+
+	tag = make_tag_from_uid(uid);
+	spin_lock_bh(&tag_counter_set_list_lock);
+	tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
+	if (!tcs) {
+		tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
+		if (!tcs) {
+			spin_unlock_bh(&tag_counter_set_list_lock);
+			pr_err("qtaguid: ctrl_counterset(%s): "
+			       "failed to alloc counter set\n",
+			       input);
+			res = -ENOMEM;
+			goto err;
+		}
+		tcs->tn.tag = tag;
+		tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
+		CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
+			 "(uid=%u) set=%d\n",
+			 input, tag, get_uid_from_tag(tag), counter_set);
+	}
+	tcs->active_set = counter_set;
+	spin_unlock_bh(&tag_counter_set_list_lock);
+	atomic64_inc(&qtu_events.counter_set_changes);
+	res = 0;
+
+err:
+	return res;
+}
+
+static int ctrl_cmd_tag(const char *input)
+{
+	char cmd;
+	int sock_fd = 0;
+	kuid_t uid;
+	unsigned int uid_int = 0;
+	tag_t acct_tag = make_atag_from_value(0);
+	tag_t full_tag;
+	struct socket *el_socket;
+	int res, argc;
+	struct sock_tag *sock_tag_entry;
+	struct tag_ref *tag_ref_entry;
+	struct uid_tag_data *uid_tag_data_entry;
+	struct proc_qtu_data *pqd_entry;
+
+	/* Unassigned args will get defaulted later. */
+	argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid_int);
+	uid = make_kuid(&init_user_ns, uid_int);
+	CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
+		 "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
+		 acct_tag, uid_int);
+	if (argc < 2) {
+		res = -EINVAL;
+		goto err;
+	}
+	el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
+	if (!el_socket) {
+		pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
+			" sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
+			input, sock_fd, res, current->pid, current->tgid,
+			from_kuid(&init_user_ns, current_fsuid()));
+		goto err;
+	}
+	CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->sk_refcnt=%d ->sk=%p\n",
+		 input, atomic_read(&el_socket->sk->sk_refcnt),
+		 el_socket->sk);
+	if (argc < 3) {
+		acct_tag = make_atag_from_value(0);
+	} else if (!valid_atag(acct_tag)) {
+		pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
+		res = -EINVAL;
+		goto err_put;
+	}
+	CT_DEBUG("qtaguid: ctrl_tag(%s): "
+		 "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
+		 "ctrl.gid=%u in_group()=%d in_egroup()=%d\n",
+		 input, current->pid, current->tgid,
+		 from_kuid(&init_user_ns, current_uid()),
+		 from_kuid(&init_user_ns, current_euid()),
+		 from_kuid(&init_user_ns, current_fsuid()),
+		 from_kgid(&init_user_ns, xt_qtaguid_ctrl_file->gid),
+		 in_group_p(xt_qtaguid_ctrl_file->gid),
+		 in_egroup_p(xt_qtaguid_ctrl_file->gid));
+	if (argc < 4) {
+		uid = current_fsuid();
+		uid_int = from_kuid(&init_user_ns, uid);
+	} else if (!can_impersonate_uid(uid)) {
+		pr_info("qtaguid: ctrl_tag(%s): "
+			"insufficient priv from pid=%u tgid=%u uid=%u\n",
+			input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
+		res = -EPERM;
+		goto err_put;
+	}
+	full_tag = combine_atag_with_uid(acct_tag, uid_int);
+
+	spin_lock_bh(&sock_tag_list_lock);
+	spin_lock_bh(&uid_tag_data_tree_lock);
+	sock_tag_entry = get_sock_stat_nl(el_socket->sk);
+	tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
+	if (IS_ERR(tag_ref_entry)) {
+		res = PTR_ERR(tag_ref_entry);
+		spin_unlock_bh(&uid_tag_data_tree_lock);
+		spin_unlock_bh(&sock_tag_list_lock);
+		goto err_put;
+	}
+	tag_ref_entry->num_sock_tags++;
+	if (sock_tag_entry) {
+		struct tag_ref *prev_tag_ref_entry;
+
+		CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
+			 "st@%p ...->sk_refcnt=%d\n",
+			 input, el_socket->sk, sock_tag_entry,
+			 atomic_read(&el_socket->sk->sk_refcnt));
+		prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
+						    &uid_tag_data_entry);
+		BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
+		BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
+		prev_tag_ref_entry->num_sock_tags--;
+		sock_tag_entry->tag = full_tag;
+	} else {
+		CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
+			 input, el_socket->sk);
+		sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
+					 GFP_ATOMIC);
+		if (!sock_tag_entry) {
+			pr_err("qtaguid: ctrl_tag(%s): "
+			       "socket tag alloc failed\n",
+			       input);
+			BUG_ON(tag_ref_entry->num_sock_tags <= 0);
+			tag_ref_entry->num_sock_tags--;
+			free_tag_ref_from_utd_entry(tag_ref_entry,
+						    uid_tag_data_entry);
+			spin_unlock_bh(&uid_tag_data_tree_lock);
+			spin_unlock_bh(&sock_tag_list_lock);
+			res = -ENOMEM;
+			goto err_put;
+		}
+		/*
+		 * Hold the sk refcount here to make sure the sk pointer cannot
+		 * be freed and reused
+		 */
+		sock_hold(el_socket->sk);
+		sock_tag_entry->sk = el_socket->sk;
+		sock_tag_entry->pid = current->tgid;
+		sock_tag_entry->tag = combine_atag_with_uid(acct_tag, uid_int);
+		pqd_entry = proc_qtu_data_tree_search(
+			&proc_qtu_data_tree, current->tgid);
+		/*
+		 * TODO: remove if, and start failing.
+		 * At first, we want to catch user-space code that is not
+		 * opening the /dev/xt_qtaguid.
+		 */
+		if (IS_ERR_OR_NULL(pqd_entry))
+			pr_warn_once(
+				"qtaguid: %s(): "
+				"User space forgot to open /dev/xt_qtaguid? "
+				"pid=%u tgid=%u uid=%u\n", __func__,
+				current->pid, current->tgid,
+				from_kuid(&init_user_ns, current_fsuid()));
+		else
+			list_add(&sock_tag_entry->list,
+				 &pqd_entry->sock_tag_list);
+
+		sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
+		atomic64_inc(&qtu_events.sockets_tagged);
+	}
+	spin_unlock_bh(&uid_tag_data_tree_lock);
+	spin_unlock_bh(&sock_tag_list_lock);
+	/* We keep the ref to the sk until it is untagged */
+	CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->sk_refcnt=%d\n",
+		 input, sock_tag_entry,
+		 atomic_read(&el_socket->sk->sk_refcnt));
+	sockfd_put(el_socket);
+	return 0;
+
+err_put:
+	CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->sk_refcnt=%d\n",
+		 input, atomic_read(&el_socket->sk->sk_refcnt) - 1);
+	/* Release the sock_fd that was grabbed by sockfd_lookup(). */
+	sockfd_put(el_socket);
+	return res;
+
+err:
+	CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
+	return res;
+}
+
+static int ctrl_cmd_untag(const char *input)
+{
+	char cmd;
+	int sock_fd = 0;
+	struct socket *el_socket;
+	int res, argc;
+
+	argc = sscanf(input, "%c %d", &cmd, &sock_fd);
+	CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
+		 input, argc, cmd, sock_fd);
+	if (argc < 2) {
+		res = -EINVAL;
+		return res;
+	}
+	el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
+	if (!el_socket) {
+		pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
+			" sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
+			input, sock_fd, res, current->pid, current->tgid,
+			from_kuid(&init_user_ns, current_fsuid()));
+		return res;
+	}
+	CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
+		 input, atomic_long_read(&el_socket->file->f_count),
+		 el_socket->sk);
+	res = qtaguid_untag(el_socket, false);
+	sockfd_put(el_socket);
+	return res;
+}
+
+int qtaguid_untag(struct socket *el_socket, bool kernel)
+{
+	int res;
+	pid_t pid;
+	struct sock_tag *sock_tag_entry;
+	struct tag_ref *tag_ref_entry;
+	struct uid_tag_data *utd_entry;
+	struct proc_qtu_data *pqd_entry;
+
+	spin_lock_bh(&sock_tag_list_lock);
+	sock_tag_entry = get_sock_stat_nl(el_socket->sk);
+	if (!sock_tag_entry) {
+		spin_unlock_bh(&sock_tag_list_lock);
+		res = -EINVAL;
+		return res;
+	}
+	/*
+	 * The socket already belongs to the current process
+	 * so it can do whatever it wants to it.
+	 */
+	rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
+
+	tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
+	BUG_ON(!tag_ref_entry);
+	BUG_ON(tag_ref_entry->num_sock_tags <= 0);
+	spin_lock_bh(&uid_tag_data_tree_lock);
+	if (kernel)
+		pid = sock_tag_entry->pid;
+	else
+		pid = current->tgid;
+	pqd_entry = proc_qtu_data_tree_search(
+		&proc_qtu_data_tree, pid);
+	/*
+	 * TODO: remove if, and start failing.
+	 * At first, we want to catch user-space code that is not
+	 * opening the /dev/xt_qtaguid.
+	 */
+	if (IS_ERR_OR_NULL(pqd_entry) || !sock_tag_entry->list.next) {
+		pr_warn_once("qtaguid: %s(): "
+			     "User space forgot to open /dev/xt_qtaguid? "
+			     "pid=%u tgid=%u sk_pid=%u, uid=%u\n", __func__,
+			     current->pid, current->tgid, sock_tag_entry->pid,
+			     from_kuid(&init_user_ns, current_fsuid()));
+	} else {
+		list_del(&sock_tag_entry->list);
+	}
+	spin_unlock_bh(&uid_tag_data_tree_lock);
+	/*
+	 * We don't free tag_ref from the utd_entry here,
+	 * only during a cmd_delete().
+	 */
+	tag_ref_entry->num_sock_tags--;
+	spin_unlock_bh(&sock_tag_list_lock);
+	/*
+	 * Release the sock_fd that was grabbed at tag time.
+	 */
+	sock_put(sock_tag_entry->sk);
+	CT_DEBUG("qtaguid: done. st@%p ...->sk_refcnt=%d\n",
+		 sock_tag_entry,
+		 atomic_read(&el_socket->sk->sk_refcnt));
+
+	kfree(sock_tag_entry);
+	atomic64_inc(&qtu_events.sockets_untagged);
+
+	return 0;
+}
+
+static ssize_t qtaguid_ctrl_parse(const char *input, size_t count)
+{
+	char cmd;
+	ssize_t res;
+
+	CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n",
+		 input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
+
+	cmd = input[0];
+	/* Collect params for commands */
+	switch (cmd) {
+	case 'd':
+		res = ctrl_cmd_delete(input);
+		break;
+
+	case 's':
+		res = ctrl_cmd_counter_set(input);
+		break;
+
+	case 't':
+		res = ctrl_cmd_tag(input);
+		break;
+
+	case 'u':
+		res = ctrl_cmd_untag(input);
+		break;
+
+	default:
+		res = -EINVAL;
+		goto err;
+	}
+	if (!res)
+		res = count;
+err:
+	CT_DEBUG("qtaguid: ctrl(%s): res=%zd\n", input, res);
+	return res;
+}
+
+#define MAX_QTAGUID_CTRL_INPUT_LEN 255
+static ssize_t qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
+				   size_t count, loff_t *offp)
+{
+	char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
+
+	if (unlikely(module_passive))
+		return count;
+
+	if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
+		return -EINVAL;
+
+	if (copy_from_user(input_buf, buffer, count))
+		return -EFAULT;
+
+	input_buf[count] = '\0';
+	return qtaguid_ctrl_parse(input_buf, count);
+}
+
+struct proc_print_info {
+	struct iface_stat *iface_entry;
+	int item_index;
+	tag_t tag; /* tag found by reading to tag_pos */
+	off_t tag_pos;
+	int tag_item_index;
+};
+
+static void pp_stats_header(struct seq_file *m)
+{
+	seq_puts(m,
+		 "idx iface acct_tag_hex uid_tag_int cnt_set "
+		 "rx_bytes rx_packets "
+		 "tx_bytes tx_packets "
+		 "rx_tcp_bytes rx_tcp_packets "
+		 "rx_udp_bytes rx_udp_packets "
+		 "rx_other_bytes rx_other_packets "
+		 "tx_tcp_bytes tx_tcp_packets "
+		 "tx_udp_bytes tx_udp_packets "
+		 "tx_other_bytes tx_other_packets\n");
+}
+
+static int pp_stats_line(struct seq_file *m, struct tag_stat *ts_entry,
+			 int cnt_set)
+{
+	struct data_counters *cnts;
+	tag_t tag = ts_entry->tn.tag;
+	uid_t stat_uid = get_uid_from_tag(tag);
+	struct proc_print_info *ppi = m->private;
+	/* Detailed tags are not available to everybody */
+	if (!can_read_other_uid_stats(make_kuid(&init_user_ns,stat_uid))) {
+		CT_DEBUG("qtaguid: stats line: "
+			 "%s 0x%llx %u: insufficient priv "
+			 "from pid=%u tgid=%u uid=%u stats.gid=%u\n",
+			 ppi->iface_entry->ifname,
+			 get_atag_from_tag(tag), stat_uid,
+			 current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()),
+			 from_kgid(&init_user_ns,xt_qtaguid_stats_file->gid));
+		return 0;
+	}
+	ppi->item_index++;
+	cnts = &ts_entry->counters;
+	seq_printf(m, "%d %s 0x%llx %u %u "
+		"%llu %llu "
+		"%llu %llu "
+		"%llu %llu "
+		"%llu %llu "
+		"%llu %llu "
+		"%llu %llu "
+		"%llu %llu "
+		"%llu %llu\n",
+		ppi->item_index,
+		ppi->iface_entry->ifname,
+		get_atag_from_tag(tag),
+		stat_uid,
+		cnt_set,
+		dc_sum_bytes(cnts, cnt_set, IFS_RX),
+		dc_sum_packets(cnts, cnt_set, IFS_RX),
+		dc_sum_bytes(cnts, cnt_set, IFS_TX),
+		dc_sum_packets(cnts, cnt_set, IFS_TX),
+		cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
+		cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
+		cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
+		cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
+		cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
+		cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
+		cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
+		cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
+		cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
+		cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
+		cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
+		cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
+	return seq_has_overflowed(m) ? -ENOSPC : 1;
+}
+
+static bool pp_sets(struct seq_file *m, struct tag_stat *ts_entry)
+{
+	int ret;
+	int counter_set;
+	for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
+	     counter_set++) {
+		ret = pp_stats_line(m, ts_entry, counter_set);
+		if (ret < 0)
+			return false;
+	}
+	return true;
+}
+
+static int qtaguid_stats_proc_iface_stat_ptr_valid(struct iface_stat *ptr)
+{
+	struct iface_stat *iface_entry;
+
+	if (!ptr)
+		return false;
+
+	list_for_each_entry(iface_entry, &iface_stat_list, list)
+		if (iface_entry == ptr)
+			return true;
+	return false;
+}
+
+static void qtaguid_stats_proc_next_iface_entry(struct proc_print_info *ppi)
+{
+	spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock);
+	list_for_each_entry_continue(ppi->iface_entry, &iface_stat_list, list) {
+		spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
+		return;
+	}
+	ppi->iface_entry = NULL;
+}
+
+static void *qtaguid_stats_proc_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct proc_print_info *ppi = m->private;
+	struct tag_stat *ts_entry;
+	struct rb_node *node;
+
+	if (!v) {
+		pr_err("qtaguid: %s(): unexpected v: NULL\n", __func__);
+		return NULL;
+	}
+
+	(*pos)++;
+
+	if (!ppi->iface_entry || unlikely(module_passive))
+		return NULL;
+
+	if (v == SEQ_START_TOKEN)
+		node = rb_first(&ppi->iface_entry->tag_stat_tree);
+	else
+		node = rb_next(&((struct tag_stat *)v)->tn.node);
+
+	while (!node) {
+		qtaguid_stats_proc_next_iface_entry(ppi);
+		if (!ppi->iface_entry)
+			return NULL;
+		node = rb_first(&ppi->iface_entry->tag_stat_tree);
+	}
+
+	ts_entry = rb_entry(node, struct tag_stat, tn.node);
+	ppi->tag = ts_entry->tn.tag;
+	ppi->tag_pos = *pos;
+	ppi->tag_item_index = ppi->item_index;
+	return ts_entry;
+}
+
+static void *qtaguid_stats_proc_start(struct seq_file *m, loff_t *pos)
+{
+	struct proc_print_info *ppi = m->private;
+	struct tag_stat *ts_entry = NULL;
+
+	spin_lock_bh(&iface_stat_list_lock);
+
+	if (*pos == 0) {
+		ppi->item_index = 1;
+		ppi->tag_pos = 0;
+		if (list_empty(&iface_stat_list)) {
+			ppi->iface_entry = NULL;
+		} else {
+			ppi->iface_entry = list_first_entry(&iface_stat_list,
+							    struct iface_stat,
+							    list);
+			spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
+		}
+		return SEQ_START_TOKEN;
+	}
+	if (!qtaguid_stats_proc_iface_stat_ptr_valid(ppi->iface_entry)) {
+		if (ppi->iface_entry) {
+			pr_err("qtaguid: %s(): iface_entry %p not found\n",
+			       __func__, ppi->iface_entry);
+			ppi->iface_entry = NULL;
+		}
+		return NULL;
+	}
+
+	spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
+
+	if (!ppi->tag_pos) {
+		/* seq_read skipped first next call */
+		ts_entry = SEQ_START_TOKEN;
+	} else {
+		ts_entry = tag_stat_tree_search(
+				&ppi->iface_entry->tag_stat_tree, ppi->tag);
+		if (!ts_entry) {
+			pr_info("qtaguid: %s(): tag_stat.tag 0x%llx not found. Abort.\n",
+				__func__, ppi->tag);
+			return NULL;
+		}
+	}
+
+	if (*pos == ppi->tag_pos) { /* normal resume */
+		ppi->item_index = ppi->tag_item_index;
+	} else {
+		/* seq_read skipped a next call */
+		*pos = ppi->tag_pos;
+		ts_entry = qtaguid_stats_proc_next(m, ts_entry, pos);
+	}
+
+	return ts_entry;
+}
+
+static void qtaguid_stats_proc_stop(struct seq_file *m, void *v)
+{
+	struct proc_print_info *ppi = m->private;
+	if (ppi->iface_entry)
+		spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock);
+	spin_unlock_bh(&iface_stat_list_lock);
+}
+
+/*
+ * Procfs reader to get all tag stats using style "1)" as described in
+ * fs/proc/generic.c
+ * Groups all protocols tx/rx bytes.
+ */
+static int qtaguid_stats_proc_show(struct seq_file *m, void *v)
+{
+	struct tag_stat *ts_entry = v;
+
+	if (v == SEQ_START_TOKEN)
+		pp_stats_header(m);
+	else
+		pp_sets(m, ts_entry);
+
+	return 0;
+}
+
+/*------------------------------------------*/
+static int qtudev_open(struct inode *inode, struct file *file)
+{
+	struct uid_tag_data *utd_entry;
+	struct proc_qtu_data  *pqd_entry;
+	struct proc_qtu_data  *new_pqd_entry;
+	int res;
+	bool utd_entry_found;
+
+	if (unlikely(qtu_proc_handling_passive))
+		return 0;
+
+	DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
+		 current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
+
+	spin_lock_bh(&uid_tag_data_tree_lock);
+
+	/* Look for existing uid data, or alloc one. */
+	utd_entry = get_uid_data(from_kuid(&init_user_ns, current_fsuid()), &utd_entry_found);
+	if (IS_ERR_OR_NULL(utd_entry)) {
+		res = PTR_ERR(utd_entry);
+		goto err_unlock;
+	}
+
+	/* Look for existing PID based proc_data */
+	pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
+					      current->tgid);
+	if (pqd_entry) {
+		pr_err("qtaguid: qtudev_open(): %u/%u %u "
+		       "%s already opened\n",
+		       current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()),
+		       QTU_DEV_NAME);
+		res = -EBUSY;
+		goto err_unlock_free_utd;
+	}
+
+	new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
+	if (!new_pqd_entry) {
+		pr_err("qtaguid: qtudev_open(): %u/%u %u: "
+		       "proc data alloc failed\n",
+		       current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
+		res = -ENOMEM;
+		goto err_unlock_free_utd;
+	}
+	new_pqd_entry->pid = current->tgid;
+	INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
+	new_pqd_entry->parent_tag_data = utd_entry;
+	utd_entry->num_pqd++;
+
+	proc_qtu_data_tree_insert(new_pqd_entry,
+				  &proc_qtu_data_tree);
+
+	spin_unlock_bh(&uid_tag_data_tree_lock);
+	DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
+		 from_kuid(&init_user_ns, current_fsuid()), new_pqd_entry);
+	file->private_data = new_pqd_entry;
+	return 0;
+
+err_unlock_free_utd:
+	if (!utd_entry_found) {
+		rb_erase(&utd_entry->node, &uid_tag_data_tree);
+		kfree(utd_entry);
+	}
+err_unlock:
+	spin_unlock_bh(&uid_tag_data_tree_lock);
+	return res;
+}
+
+static int qtudev_release(struct inode *inode, struct file *file)
+{
+	struct proc_qtu_data  *pqd_entry = file->private_data;
+	struct uid_tag_data  *utd_entry = pqd_entry->parent_tag_data;
+	struct sock_tag *st_entry;
+	struct rb_root st_to_free_tree = RB_ROOT;
+	struct list_head *entry, *next;
+	struct tag_ref *tr;
+
+	if (unlikely(qtu_proc_handling_passive))
+		return 0;
+
+	/*
+	 * Do not trust the current->pid, it might just be a kworker cleaning
+	 * up after a dead proc.
+	 */
+	DR_DEBUG("qtaguid: qtudev_release(): "
+		 "pid=%u tgid=%u uid=%u "
+		 "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
+		 current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
+		 pqd_entry, pqd_entry->pid, utd_entry,
+		 utd_entry->num_active_tags);
+
+	spin_lock_bh(&sock_tag_list_lock);
+	spin_lock_bh(&uid_tag_data_tree_lock);
+
+	list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
+		st_entry = list_entry(entry, struct sock_tag, list);
+		DR_DEBUG("qtaguid: %s(): "
+			 "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
+			 __func__,
+			 st_entry, st_entry->sk,
+			 current->pid, current->tgid,
+			 pqd_entry->parent_tag_data->uid);
+
+		utd_entry = uid_tag_data_tree_search(
+			&uid_tag_data_tree,
+			get_uid_from_tag(st_entry->tag));
+		BUG_ON(IS_ERR_OR_NULL(utd_entry));
+		DR_DEBUG("qtaguid: %s(): "
+			 "looking for tag=0x%llx in utd_entry=%p\n", __func__,
+			 st_entry->tag, utd_entry);
+		tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
+					 st_entry->tag);
+		BUG_ON(!tr);
+		BUG_ON(tr->num_sock_tags <= 0);
+		tr->num_sock_tags--;
+		free_tag_ref_from_utd_entry(tr, utd_entry);
+
+		rb_erase(&st_entry->sock_node, &sock_tag_tree);
+		list_del(&st_entry->list);
+		/* Can't sockfd_put() within spinlock, do it later. */
+		sock_tag_tree_insert(st_entry, &st_to_free_tree);
+
+		/*
+		 * Try to free the utd_entry if no other proc_qtu_data is
+		 * using it (num_pqd is 0) and it doesn't have active tags
+		 * (num_active_tags is 0).
+		 */
+		put_utd_entry(utd_entry);
+	}
+
+	rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
+	BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1);
+	pqd_entry->parent_tag_data->num_pqd--;
+	put_utd_entry(pqd_entry->parent_tag_data);
+	kfree(pqd_entry);
+	file->private_data = NULL;
+
+	spin_unlock_bh(&uid_tag_data_tree_lock);
+	spin_unlock_bh(&sock_tag_list_lock);
+
+
+	sock_tag_tree_erase(&st_to_free_tree);
+
+	spin_lock_bh(&sock_tag_list_lock);
+	prdebug_full_state_locked(0, "%s(): pid=%u tgid=%u", __func__,
+			   current->pid, current->tgid);
+	spin_unlock_bh(&sock_tag_list_lock);
+	return 0;
+}
+
+/*------------------------------------------*/
+static const struct file_operations qtudev_fops = {
+	.owner = THIS_MODULE,
+	.open = qtudev_open,
+	.release = qtudev_release,
+};
+
+static struct miscdevice qtu_device = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = QTU_DEV_NAME,
+	.fops = &qtudev_fops,
+	/* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
+};
+
+static const struct seq_operations proc_qtaguid_ctrl_seqops = {
+	.start = qtaguid_ctrl_proc_start,
+	.next = qtaguid_ctrl_proc_next,
+	.stop = qtaguid_ctrl_proc_stop,
+	.show = qtaguid_ctrl_proc_show,
+};
+
+static int proc_qtaguid_ctrl_open(struct inode *inode, struct file *file)
+{
+	return seq_open_private(file, &proc_qtaguid_ctrl_seqops,
+				sizeof(struct proc_ctrl_print_info));
+}
+
+static const struct file_operations proc_qtaguid_ctrl_fops = {
+	.open		= proc_qtaguid_ctrl_open,
+	.read		= seq_read,
+	.write		= qtaguid_ctrl_proc_write,
+	.llseek		= seq_lseek,
+	.release	= seq_release_private,
+};
+
+static const struct seq_operations proc_qtaguid_stats_seqops = {
+	.start = qtaguid_stats_proc_start,
+	.next = qtaguid_stats_proc_next,
+	.stop = qtaguid_stats_proc_stop,
+	.show = qtaguid_stats_proc_show,
+};
+
+static int proc_qtaguid_stats_open(struct inode *inode, struct file *file)
+{
+	return seq_open_private(file, &proc_qtaguid_stats_seqops,
+				sizeof(struct proc_print_info));
+}
+
+static const struct file_operations proc_qtaguid_stats_fops = {
+	.open		= proc_qtaguid_stats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_private,
+};
+
+/*------------------------------------------*/
+static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
+{
+	int ret;
+	*res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
+	if (!*res_procdir) {
+		pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
+		ret = -ENOMEM;
+		goto no_dir;
+	}
+
+	xt_qtaguid_ctrl_file = proc_create_data("ctrl", proc_ctrl_perms,
+						*res_procdir,
+						&proc_qtaguid_ctrl_fops,
+						NULL);
+	if (!xt_qtaguid_ctrl_file) {
+		pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
+			" file\n");
+		ret = -ENOMEM;
+		goto no_ctrl_entry;
+	}
+
+	xt_qtaguid_stats_file = proc_create_data("stats", proc_stats_perms,
+						 *res_procdir,
+						 &proc_qtaguid_stats_fops,
+						 NULL);
+	if (!xt_qtaguid_stats_file) {
+		pr_err("qtaguid: failed to create xt_qtaguid/stats "
+			"file\n");
+		ret = -ENOMEM;
+		goto no_stats_entry;
+	}
+	/*
+	 * TODO: add support counter hacking
+	 * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
+	 */
+	return 0;
+
+no_stats_entry:
+	remove_proc_entry("ctrl", *res_procdir);
+no_ctrl_entry:
+	remove_proc_entry("xt_qtaguid", NULL);
+no_dir:
+	return ret;
+}
+
+static struct xt_match qtaguid_mt_reg __read_mostly = {
+	/*
+	 * This module masquerades as the "owner" module so that iptables
+	 * tools can deal with it.
+	 */
+	.name       = "owner",
+	.revision   = 1,
+	.family     = NFPROTO_UNSPEC,
+	.match      = qtaguid_mt,
+	.matchsize  = sizeof(struct xt_qtaguid_match_info),
+	.me         = THIS_MODULE,
+};
+
+static int __init qtaguid_mt_init(void)
+{
+	if (qtaguid_proc_register(&xt_qtaguid_procdir)
+	    || iface_stat_init(xt_qtaguid_procdir)
+	    || xt_register_match(&qtaguid_mt_reg)
+	    || misc_register(&qtu_device))
+		return -1;
+	return 0;
+}
+
+/*
+ * TODO: allow unloading of the module.
+ * For now stats are permanent.
+ * Kconfig forces'y/n' and never an 'm'.
+ */
+
+module_init(qtaguid_mt_init);
+MODULE_AUTHOR("jpa <jpa@google.com>");
+MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_owner");
+MODULE_ALIAS("ip6t_owner");
+MODULE_ALIAS("ipt_qtaguid");
+MODULE_ALIAS("ip6t_qtaguid");
diff --git a/net/netfilter/xt_qtaguid_internal.h b/net/netfilter/xt_qtaguid_internal.h
new file mode 100644
index 0000000..c705270
--- /dev/null
+++ b/net/netfilter/xt_qtaguid_internal.h
@@ -0,0 +1,350 @@
+/*
+ * Kernel iptables module to track stats for packets based on user tags.
+ *
+ * (C) 2011 Google, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __XT_QTAGUID_INTERNAL_H__
+#define __XT_QTAGUID_INTERNAL_H__
+
+#include <linux/types.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock_types.h>
+#include <linux/workqueue.h>
+
+/* Iface handling */
+#define IDEBUG_MASK (1<<0)
+/* Iptable Matching. Per packet. */
+#define MDEBUG_MASK (1<<1)
+/* Red-black tree handling. Per packet. */
+#define RDEBUG_MASK (1<<2)
+/* procfs ctrl/stats handling */
+#define CDEBUG_MASK (1<<3)
+/* dev and resource tracking */
+#define DDEBUG_MASK (1<<4)
+
+/* E.g (IDEBUG_MASK | CDEBUG_MASK | DDEBUG_MASK) */
+#define DEFAULT_DEBUG_MASK 0
+
+/*
+ * (Un)Define these *DEBUG to compile out/in the pr_debug calls.
+ * All undef: text size ~ 0x3030; all def: ~ 0x4404.
+ */
+#define IDEBUG
+#define MDEBUG
+#define RDEBUG
+#define CDEBUG
+#define DDEBUG
+
+#define MSK_DEBUG(mask, ...) do {                           \
+		if (unlikely(qtaguid_debug_mask & (mask)))  \
+			pr_debug(__VA_ARGS__);              \
+	} while (0)
+#ifdef IDEBUG
+#define IF_DEBUG(...) MSK_DEBUG(IDEBUG_MASK, __VA_ARGS__)
+#else
+#define IF_DEBUG(...) no_printk(__VA_ARGS__)
+#endif
+#ifdef MDEBUG
+#define MT_DEBUG(...) MSK_DEBUG(MDEBUG_MASK, __VA_ARGS__)
+#else
+#define MT_DEBUG(...) no_printk(__VA_ARGS__)
+#endif
+#ifdef RDEBUG
+#define RB_DEBUG(...) MSK_DEBUG(RDEBUG_MASK, __VA_ARGS__)
+#else
+#define RB_DEBUG(...) no_printk(__VA_ARGS__)
+#endif
+#ifdef CDEBUG
+#define CT_DEBUG(...) MSK_DEBUG(CDEBUG_MASK, __VA_ARGS__)
+#else
+#define CT_DEBUG(...) no_printk(__VA_ARGS__)
+#endif
+#ifdef DDEBUG
+#define DR_DEBUG(...) MSK_DEBUG(DDEBUG_MASK, __VA_ARGS__)
+#else
+#define DR_DEBUG(...) no_printk(__VA_ARGS__)
+#endif
+
+extern uint qtaguid_debug_mask;
+
+/*---------------------------------------------------------------------------*/
+/*
+ * Tags:
+ *
+ * They represent what the data usage counters will be tracked against.
+ * By default a tag is just based on the UID.
+ * The UID is used as the base for policing, and can not be ignored.
+ * So a tag will always at least represent a UID (uid_tag).
+ *
+ * A tag can be augmented with an "accounting tag" which is associated
+ * with a UID.
+ * User space can set the acct_tag portion of the tag which is then used
+ * with sockets: all data belonging to that socket will be counted against the
+ * tag. The policing is then based on the tag's uid_tag portion,
+ * and stats are collected for the acct_tag portion separately.
+ *
+ * There could be
+ * a:  {acct_tag=1, uid_tag=10003}
+ * b:  {acct_tag=2, uid_tag=10003}
+ * c:  {acct_tag=3, uid_tag=10003}
+ * d:  {acct_tag=0, uid_tag=10003}
+ * a, b, and c represent tags associated with specific sockets.
+ * d is for the totals for that uid, including all untagged traffic.
+ * Typically d is used with policing/quota rules.
+ *
+ * We want tag_t big enough to distinguish uid_t and acct_tag.
+ * It might become a struct if needed.
+ * Nothing should be using it as an int.
+ */
+typedef uint64_t tag_t;  /* Only used via accessors */
+
+#define TAG_UID_MASK 0xFFFFFFFFULL
+#define TAG_ACCT_MASK (~0xFFFFFFFFULL)
+
+static inline int tag_compare(tag_t t1, tag_t t2)
+{
+	return t1 < t2 ? -1 : t1 == t2 ? 0 : 1;
+}
+
+static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid)
+{
+	return acct_tag | uid;
+}
+static inline tag_t make_tag_from_uid(uid_t uid)
+{
+	return uid;
+}
+static inline uid_t get_uid_from_tag(tag_t tag)
+{
+	return tag & TAG_UID_MASK;
+}
+static inline tag_t get_utag_from_tag(tag_t tag)
+{
+	return tag & TAG_UID_MASK;
+}
+static inline tag_t get_atag_from_tag(tag_t tag)
+{
+	return tag & TAG_ACCT_MASK;
+}
+
+static inline bool valid_atag(tag_t tag)
+{
+	return !(tag & TAG_UID_MASK);
+}
+static inline tag_t make_atag_from_value(uint32_t value)
+{
+	return (uint64_t)value << 32;
+}
+/*---------------------------------------------------------------------------*/
+
+/*
+ * Maximum number of socket tags that a UID is allowed to have active.
+ * Multiple processes belonging to the same UID contribute towards this limit.
+ * Special UIDs that can impersonate a UID also contribute (e.g. download
+ * manager, ...)
+ */
+#define DEFAULT_MAX_SOCK_TAGS 1024
+
+/*
+ * For now we only track 2 sets of counters.
+ * The default set is 0.
+ * Userspace can activate another set for a given uid being tracked.
+ */
+#define IFS_MAX_COUNTER_SETS 2
+
+enum ifs_tx_rx {
+	IFS_TX,
+	IFS_RX,
+	IFS_MAX_DIRECTIONS
+};
+
+/* For now, TCP, UDP, the rest */
+enum ifs_proto {
+	IFS_TCP,
+	IFS_UDP,
+	IFS_PROTO_OTHER,
+	IFS_MAX_PROTOS
+};
+
+struct byte_packet_counters {
+	uint64_t bytes;
+	uint64_t packets;
+};
+
+struct data_counters {
+	struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS];
+};
+
+static inline uint64_t dc_sum_bytes(struct data_counters *counters,
+				    int set,
+				    enum ifs_tx_rx direction)
+{
+	return counters->bpc[set][direction][IFS_TCP].bytes
+		+ counters->bpc[set][direction][IFS_UDP].bytes
+		+ counters->bpc[set][direction][IFS_PROTO_OTHER].bytes;
+}
+
+static inline uint64_t dc_sum_packets(struct data_counters *counters,
+				      int set,
+				      enum ifs_tx_rx direction)
+{
+	return counters->bpc[set][direction][IFS_TCP].packets
+		+ counters->bpc[set][direction][IFS_UDP].packets
+		+ counters->bpc[set][direction][IFS_PROTO_OTHER].packets;
+}
+
+
+/* Generic X based nodes used as a base for rb_tree ops */
+struct tag_node {
+	struct rb_node node;
+	tag_t tag;
+};
+
+struct tag_stat {
+	struct tag_node tn;
+	struct data_counters counters;
+	/*
+	 * If this tag is acct_tag based, we need to count against the
+	 * matching parent uid_tag.
+	 */
+	struct data_counters *parent_counters;
+};
+
+struct iface_stat {
+	struct list_head list;  /* in iface_stat_list */
+	char *ifname;
+	bool active;
+	/* net_dev is only valid for active iface_stat */
+	struct net_device *net_dev;
+
+	struct byte_packet_counters totals_via_dev[IFS_MAX_DIRECTIONS];
+	struct data_counters totals_via_skb;
+	/*
+	 * We keep the last_known, because some devices reset their counters
+	 * just before NETDEV_UP, while some will reset just before
+	 * NETDEV_REGISTER (which is more normal).
+	 * So now, if the device didn't do a NETDEV_UNREGISTER and we see
+	 * its current dev stats smaller that what was previously known, we
+	 * assume an UNREGISTER and just use the last_known.
+	 */
+	struct byte_packet_counters last_known[IFS_MAX_DIRECTIONS];
+	/* last_known is usable when last_known_valid is true */
+	bool last_known_valid;
+
+	struct proc_dir_entry *proc_ptr;
+
+	struct rb_root tag_stat_tree;
+	spinlock_t tag_stat_list_lock;
+};
+
+/* This is needed to create proc_dir_entries from atomic context. */
+struct iface_stat_work {
+	struct work_struct iface_work;
+	struct iface_stat *iface_entry;
+};
+
+/*
+ * Track tag that this socket is transferring data for, and not necessarily
+ * the uid that owns the socket.
+ * This is the tag against which tag_stat.counters will be billed.
+ * These structs need to be looked up by sock and pid.
+ */
+struct sock_tag {
+	struct rb_node sock_node;
+	struct sock *sk;  /* Only used as a number, never dereferenced */
+	/* Used to associate with a given pid */
+	struct list_head list;   /* in proc_qtu_data.sock_tag_list */
+	pid_t pid;
+
+	tag_t tag;
+};
+
+struct qtaguid_event_counts {
+	/* Various successful events */
+	atomic64_t sockets_tagged;
+	atomic64_t sockets_untagged;
+	atomic64_t counter_set_changes;
+	atomic64_t delete_cmds;
+	atomic64_t iface_events;  /* Number of NETDEV_* events handled */
+
+	atomic64_t match_calls;   /* Number of times iptables called mt */
+	/* Number of times iptables called mt from pre or post routing hooks */
+	atomic64_t match_calls_prepost;
+	/*
+	 * match_found_sk_*: numbers related to the netfilter matching
+	 * function finding a sock for the sk_buff.
+	 * Total skbs processed is sum(match_found*).
+	 */
+	atomic64_t match_found_sk;   /* An sk was already in the sk_buff. */
+	/* The connection tracker had or didn't have the sk. */
+	atomic64_t match_found_sk_in_ct;
+	atomic64_t match_found_no_sk_in_ct;
+	/*
+	 * No sk could be found. No apparent owner. Could happen with
+	 * unsolicited traffic.
+	 */
+	atomic64_t match_no_sk;
+	/*
+	 * The file ptr in the sk_socket wasn't there and we couldn't get GID.
+	 * This might happen for traffic while the socket is being closed.
+	 */
+	atomic64_t match_no_sk_gid;
+};
+
+/* Track the set active_set for the given tag. */
+struct tag_counter_set {
+	struct tag_node tn;
+	int active_set;
+};
+
+/*----------------------------------------------*/
+/*
+ * The qtu uid data is used to track resources that are created directly or
+ * indirectly by processes (uid tracked).
+ * It is shared by the processes with the same uid.
+ * Some of the resource will be counted to prevent further rogue allocations,
+ * some will need freeing once the owner process (uid) exits.
+ */
+struct uid_tag_data {
+	struct rb_node node;
+	uid_t uid;
+
+	/*
+	 * For the uid, how many accounting tags have been set.
+	 */
+	int num_active_tags;
+	/* Track the number of proc_qtu_data that reference it */
+	int num_pqd;
+	struct rb_root tag_ref_tree;
+	/* No tag_node_tree_lock; use uid_tag_data_tree_lock */
+};
+
+struct tag_ref {
+	struct tag_node tn;
+
+	/*
+	 * This tracks the number of active sockets that have a tag on them
+	 * which matches this tag_ref.tn.tag.
+	 * A tag ref can live on after the sockets are untagged.
+	 * A tag ref can only be removed during a tag delete command.
+	 */
+	int num_sock_tags;
+};
+
+struct proc_qtu_data {
+	struct rb_node node;
+	pid_t pid;
+
+	struct uid_tag_data *parent_tag_data;
+
+	/* Tracks the sock_tags that need freeing upon this proc's death */
+	struct list_head sock_tag_list;
+	/* No spinlock_t sock_tag_list_lock; use the global one. */
+};
+
+/*----------------------------------------------*/
+#endif  /* ifndef __XT_QTAGUID_INTERNAL_H__ */
diff --git a/net/netfilter/xt_qtaguid_print.c b/net/netfilter/xt_qtaguid_print.c
new file mode 100644
index 0000000..2a7190d
--- /dev/null
+++ b/net/netfilter/xt_qtaguid_print.c
@@ -0,0 +1,566 @@
+/*
+ * Pretty printing Support for iptables xt_qtaguid module.
+ *
+ * (C) 2011 Google, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Most of the functions in this file just waste time if DEBUG is not defined.
+ * The matching xt_qtaguid_print.h will static inline empty funcs if the needed
+ * debug flags ore not defined.
+ * Those funcs that fail to allocate memory will panic as there is no need to
+ * hobble allong just pretending to do the requested work.
+ */
+
+#define DEBUG
+
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/net.h>
+#include <linux/rbtree.h>
+#include <linux/slab.h>
+#include <linux/spinlock_types.h>
+#include <net/sock.h>
+
+#include "xt_qtaguid_internal.h"
+#include "xt_qtaguid_print.h"
+
+#ifdef DDEBUG
+
+static void _bug_on_err_or_null(void *ptr)
+{
+	if (IS_ERR_OR_NULL(ptr)) {
+		pr_err("qtaguid: kmalloc failed\n");
+		BUG();
+	}
+}
+
+char *pp_tag_t(tag_t *tag)
+{
+	char *res;
+
+	if (!tag)
+		res = kasprintf(GFP_ATOMIC, "tag_t@null{}");
+	else
+		res = kasprintf(GFP_ATOMIC,
+				"tag_t@%p{tag=0x%llx, uid=%u}",
+				tag, *tag, get_uid_from_tag(*tag));
+	_bug_on_err_or_null(res);
+	return res;
+}
+
+char *pp_data_counters(struct data_counters *dc, bool showValues)
+{
+	char *res;
+
+	if (!dc)
+		res = kasprintf(GFP_ATOMIC, "data_counters@null{}");
+	else if (showValues)
+		res = kasprintf(
+			GFP_ATOMIC, "data_counters@%p{"
+			"set0{"
+			"rx{"
+			"tcp{b=%llu, p=%llu}, "
+			"udp{b=%llu, p=%llu},"
+			"other{b=%llu, p=%llu}}, "
+			"tx{"
+			"tcp{b=%llu, p=%llu}, "
+			"udp{b=%llu, p=%llu},"
+			"other{b=%llu, p=%llu}}}, "
+			"set1{"
+			"rx{"
+			"tcp{b=%llu, p=%llu}, "
+			"udp{b=%llu, p=%llu},"
+			"other{b=%llu, p=%llu}}, "
+			"tx{"
+			"tcp{b=%llu, p=%llu}, "
+			"udp{b=%llu, p=%llu},"
+			"other{b=%llu, p=%llu}}}}",
+			dc,
+			dc->bpc[0][IFS_RX][IFS_TCP].bytes,
+			dc->bpc[0][IFS_RX][IFS_TCP].packets,
+			dc->bpc[0][IFS_RX][IFS_UDP].bytes,
+			dc->bpc[0][IFS_RX][IFS_UDP].packets,
+			dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].bytes,
+			dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].packets,
+			dc->bpc[0][IFS_TX][IFS_TCP].bytes,
+			dc->bpc[0][IFS_TX][IFS_TCP].packets,
+			dc->bpc[0][IFS_TX][IFS_UDP].bytes,
+			dc->bpc[0][IFS_TX][IFS_UDP].packets,
+			dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].bytes,
+			dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].packets,
+			dc->bpc[1][IFS_RX][IFS_TCP].bytes,
+			dc->bpc[1][IFS_RX][IFS_TCP].packets,
+			dc->bpc[1][IFS_RX][IFS_UDP].bytes,
+			dc->bpc[1][IFS_RX][IFS_UDP].packets,
+			dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].bytes,
+			dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].packets,
+			dc->bpc[1][IFS_TX][IFS_TCP].bytes,
+			dc->bpc[1][IFS_TX][IFS_TCP].packets,
+			dc->bpc[1][IFS_TX][IFS_UDP].bytes,
+			dc->bpc[1][IFS_TX][IFS_UDP].packets,
+			dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].bytes,
+			dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].packets);
+	else
+		res = kasprintf(GFP_ATOMIC, "data_counters@%p{...}", dc);
+	_bug_on_err_or_null(res);
+	return res;
+}
+
+char *pp_tag_node(struct tag_node *tn)
+{
+	char *tag_str;
+	char *res;
+
+	if (!tn) {
+		res = kasprintf(GFP_ATOMIC, "tag_node@null{}");
+		_bug_on_err_or_null(res);
+		return res;
+	}
+	tag_str = pp_tag_t(&tn->tag);
+	res = kasprintf(GFP_ATOMIC,
+			"tag_node@%p{tag=%s}",
+			tn, tag_str);
+	_bug_on_err_or_null(res);
+	kfree(tag_str);
+	return res;
+}
+
+char *pp_tag_ref(struct tag_ref *tr)
+{
+	char *tn_str;
+	char *res;
+
+	if (!tr) {
+		res = kasprintf(GFP_ATOMIC, "tag_ref@null{}");
+		_bug_on_err_or_null(res);
+		return res;
+	}
+	tn_str = pp_tag_node(&tr->tn);
+	res = kasprintf(GFP_ATOMIC,
+			"tag_ref@%p{%s, num_sock_tags=%d}",
+			tr, tn_str, tr->num_sock_tags);
+	_bug_on_err_or_null(res);
+	kfree(tn_str);
+	return res;
+}
+
+char *pp_tag_stat(struct tag_stat *ts)
+{
+	char *tn_str;
+	char *counters_str;
+	char *parent_counters_str;
+	char *res;
+
+	if (!ts) {
+		res = kasprintf(GFP_ATOMIC, "tag_stat@null{}");
+		_bug_on_err_or_null(res);
+		return res;
+	}
+	tn_str = pp_tag_node(&ts->tn);
+	counters_str = pp_data_counters(&ts->counters, true);
+	parent_counters_str = pp_data_counters(ts->parent_counters, false);
+	res = kasprintf(GFP_ATOMIC,
+			"tag_stat@%p{%s, counters=%s, parent_counters=%s}",
+			ts, tn_str, counters_str, parent_counters_str);
+	_bug_on_err_or_null(res);
+	kfree(tn_str);
+	kfree(counters_str);
+	kfree(parent_counters_str);
+	return res;
+}
+
+char *pp_iface_stat(struct iface_stat *is)
+{
+	char *res;
+	if (!is) {
+		res = kasprintf(GFP_ATOMIC, "iface_stat@null{}");
+	} else {
+		struct data_counters *cnts = &is->totals_via_skb;
+		res = kasprintf(GFP_ATOMIC, "iface_stat@%p{"
+				"list=list_head{...}, "
+				"ifname=%s, "
+				"total_dev={rx={bytes=%llu, "
+				"packets=%llu}, "
+				"tx={bytes=%llu, "
+				"packets=%llu}}, "
+				"total_skb={rx={bytes=%llu, "
+				"packets=%llu}, "
+				"tx={bytes=%llu, "
+				"packets=%llu}}, "
+				"last_known_valid=%d, "
+				"last_known={rx={bytes=%llu, "
+				"packets=%llu}, "
+				"tx={bytes=%llu, "
+				"packets=%llu}}, "
+				"active=%d, "
+				"net_dev=%p, "
+				"proc_ptr=%p, "
+				"tag_stat_tree=rb_root{...}}",
+				is,
+				is->ifname,
+				is->totals_via_dev[IFS_RX].bytes,
+				is->totals_via_dev[IFS_RX].packets,
+				is->totals_via_dev[IFS_TX].bytes,
+				is->totals_via_dev[IFS_TX].packets,
+				dc_sum_bytes(cnts, 0, IFS_RX),
+				dc_sum_packets(cnts, 0, IFS_RX),
+				dc_sum_bytes(cnts, 0, IFS_TX),
+				dc_sum_packets(cnts, 0, IFS_TX),
+				is->last_known_valid,
+				is->last_known[IFS_RX].bytes,
+				is->last_known[IFS_RX].packets,
+				is->last_known[IFS_TX].bytes,
+				is->last_known[IFS_TX].packets,
+				is->active,
+				is->net_dev,
+				is->proc_ptr);
+	}
+	_bug_on_err_or_null(res);
+	return res;
+}
+
+char *pp_sock_tag(struct sock_tag *st)
+{
+	char *tag_str;
+	char *res;
+
+	if (!st) {
+		res = kasprintf(GFP_ATOMIC, "sock_tag@null{}");
+		_bug_on_err_or_null(res);
+		return res;
+	}
+	tag_str = pp_tag_t(&st->tag);
+	res = kasprintf(GFP_ATOMIC, "sock_tag@%p{"
+			"sock_node=rb_node{...}, "
+			"sk=%p (f_count=%d), list=list_head{...}, "
+			"pid=%u, tag=%s}",
+			st, st->sk, atomic_read(
+				&st->sk->sk_refcnt),
+			st->pid, tag_str);
+	_bug_on_err_or_null(res);
+	kfree(tag_str);
+	return res;
+}
+
+char *pp_uid_tag_data(struct uid_tag_data *utd)
+{
+	char *res;
+
+	if (!utd)
+		res = kasprintf(GFP_ATOMIC, "uid_tag_data@null{}");
+	else
+		res = kasprintf(GFP_ATOMIC, "uid_tag_data@%p{"
+				"uid=%u, num_active_acct_tags=%d, "
+				"num_pqd=%d, "
+				"tag_node_tree=rb_root{...}, "
+				"proc_qtu_data_tree=rb_root{...}}",
+				utd, utd->uid,
+				utd->num_active_tags, utd->num_pqd);
+	_bug_on_err_or_null(res);
+	return res;
+}
+
+char *pp_proc_qtu_data(struct proc_qtu_data *pqd)
+{
+	char *parent_tag_data_str;
+	char *res;
+
+	if (!pqd) {
+		res = kasprintf(GFP_ATOMIC, "proc_qtu_data@null{}");
+		_bug_on_err_or_null(res);
+		return res;
+	}
+	parent_tag_data_str = pp_uid_tag_data(pqd->parent_tag_data);
+	res = kasprintf(GFP_ATOMIC, "proc_qtu_data@%p{"
+			"node=rb_node{...}, pid=%u, "
+			"parent_tag_data=%s, "
+			"sock_tag_list=list_head{...}}",
+			pqd, pqd->pid, parent_tag_data_str
+		);
+	_bug_on_err_or_null(res);
+	kfree(parent_tag_data_str);
+	return res;
+}
+
+/*------------------------------------------*/
+void prdebug_sock_tag_tree(int indent_level,
+			   struct rb_root *sock_tag_tree)
+{
+	struct rb_node *node;
+	struct sock_tag *sock_tag_entry;
+	char *str;
+
+	if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+		return;
+
+	if (RB_EMPTY_ROOT(sock_tag_tree)) {
+		str = "sock_tag_tree=rb_root{}";
+		pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+		return;
+	}
+
+	str = "sock_tag_tree=rb_root{";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+	indent_level++;
+	for (node = rb_first(sock_tag_tree);
+	     node;
+	     node = rb_next(node)) {
+		sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
+		str = pp_sock_tag(sock_tag_entry);
+		pr_debug("%*d: %s,\n", indent_level*2, indent_level, str);
+		kfree(str);
+	}
+	indent_level--;
+	str = "}";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+void prdebug_sock_tag_list(int indent_level,
+			   struct list_head *sock_tag_list)
+{
+	struct sock_tag *sock_tag_entry;
+	char *str;
+
+	if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+		return;
+
+	if (list_empty(sock_tag_list)) {
+		str = "sock_tag_list=list_head{}";
+		pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+		return;
+	}
+
+	str = "sock_tag_list=list_head{";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+	indent_level++;
+	list_for_each_entry(sock_tag_entry, sock_tag_list, list) {
+		str = pp_sock_tag(sock_tag_entry);
+		pr_debug("%*d: %s,\n", indent_level*2, indent_level, str);
+		kfree(str);
+	}
+	indent_level--;
+	str = "}";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+void prdebug_proc_qtu_data_tree(int indent_level,
+				struct rb_root *proc_qtu_data_tree)
+{
+	char *str;
+	struct rb_node *node;
+	struct proc_qtu_data *proc_qtu_data_entry;
+
+	if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+		return;
+
+	if (RB_EMPTY_ROOT(proc_qtu_data_tree)) {
+		str = "proc_qtu_data_tree=rb_root{}";
+		pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+		return;
+	}
+
+	str = "proc_qtu_data_tree=rb_root{";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+	indent_level++;
+	for (node = rb_first(proc_qtu_data_tree);
+	     node;
+	     node = rb_next(node)) {
+		proc_qtu_data_entry = rb_entry(node,
+					       struct proc_qtu_data,
+					       node);
+		str = pp_proc_qtu_data(proc_qtu_data_entry);
+		pr_debug("%*d: %s,\n", indent_level*2, indent_level,
+			 str);
+		kfree(str);
+		indent_level++;
+		prdebug_sock_tag_list(indent_level,
+				      &proc_qtu_data_entry->sock_tag_list);
+		indent_level--;
+
+	}
+	indent_level--;
+	str = "}";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree)
+{
+	char *str;
+	struct rb_node *node;
+	struct tag_ref *tag_ref_entry;
+
+	if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+		return;
+
+	if (RB_EMPTY_ROOT(tag_ref_tree)) {
+		str = "tag_ref_tree{}";
+		pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+		return;
+	}
+
+	str = "tag_ref_tree{";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+	indent_level++;
+	for (node = rb_first(tag_ref_tree);
+	     node;
+	     node = rb_next(node)) {
+		tag_ref_entry = rb_entry(node,
+					 struct tag_ref,
+					 tn.node);
+		str = pp_tag_ref(tag_ref_entry);
+		pr_debug("%*d: %s,\n", indent_level*2, indent_level,
+			 str);
+		kfree(str);
+	}
+	indent_level--;
+	str = "}";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+void prdebug_uid_tag_data_tree(int indent_level,
+			       struct rb_root *uid_tag_data_tree)
+{
+	char *str;
+	struct rb_node *node;
+	struct uid_tag_data *uid_tag_data_entry;
+
+	if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+		return;
+
+	if (RB_EMPTY_ROOT(uid_tag_data_tree)) {
+		str = "uid_tag_data_tree=rb_root{}";
+		pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+		return;
+	}
+
+	str = "uid_tag_data_tree=rb_root{";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+	indent_level++;
+	for (node = rb_first(uid_tag_data_tree);
+	     node;
+	     node = rb_next(node)) {
+		uid_tag_data_entry = rb_entry(node, struct uid_tag_data,
+					      node);
+		str = pp_uid_tag_data(uid_tag_data_entry);
+		pr_debug("%*d: %s,\n", indent_level*2, indent_level, str);
+		kfree(str);
+		if (!RB_EMPTY_ROOT(&uid_tag_data_entry->tag_ref_tree)) {
+			indent_level++;
+			prdebug_tag_ref_tree(indent_level,
+					     &uid_tag_data_entry->tag_ref_tree);
+			indent_level--;
+		}
+	}
+	indent_level--;
+	str = "}";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+void prdebug_tag_stat_tree(int indent_level,
+				  struct rb_root *tag_stat_tree)
+{
+	char *str;
+	struct rb_node *node;
+	struct tag_stat *ts_entry;
+
+	if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+		return;
+
+	if (RB_EMPTY_ROOT(tag_stat_tree)) {
+		str = "tag_stat_tree{}";
+		pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+		return;
+	}
+
+	str = "tag_stat_tree{";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+	indent_level++;
+	for (node = rb_first(tag_stat_tree);
+	     node;
+	     node = rb_next(node)) {
+		ts_entry = rb_entry(node, struct tag_stat, tn.node);
+		str = pp_tag_stat(ts_entry);
+		pr_debug("%*d: %s\n", indent_level*2, indent_level,
+			 str);
+		kfree(str);
+	}
+	indent_level--;
+	str = "}";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+void prdebug_iface_stat_list(int indent_level,
+			     struct list_head *iface_stat_list)
+{
+	char *str;
+	struct iface_stat *iface_entry;
+
+	if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+		return;
+
+	if (list_empty(iface_stat_list)) {
+		str = "iface_stat_list=list_head{}";
+		pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+		return;
+	}
+
+	str = "iface_stat_list=list_head{";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+	indent_level++;
+	list_for_each_entry(iface_entry, iface_stat_list, list) {
+		str = pp_iface_stat(iface_entry);
+		pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+		kfree(str);
+
+		spin_lock_bh(&iface_entry->tag_stat_list_lock);
+		if (!RB_EMPTY_ROOT(&iface_entry->tag_stat_tree)) {
+			indent_level++;
+			prdebug_tag_stat_tree(indent_level,
+					      &iface_entry->tag_stat_tree);
+			indent_level--;
+		}
+		spin_unlock_bh(&iface_entry->tag_stat_list_lock);
+	}
+	indent_level--;
+	str = "}";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+#endif  /* ifdef DDEBUG */
+/*------------------------------------------*/
+static const char * const netdev_event_strings[] = {
+	"netdev_unknown",
+	"NETDEV_UP",
+	"NETDEV_DOWN",
+	"NETDEV_REBOOT",
+	"NETDEV_CHANGE",
+	"NETDEV_REGISTER",
+	"NETDEV_UNREGISTER",
+	"NETDEV_CHANGEMTU",
+	"NETDEV_CHANGEADDR",
+	"NETDEV_GOING_DOWN",
+	"NETDEV_CHANGENAME",
+	"NETDEV_FEAT_CHANGE",
+	"NETDEV_BONDING_FAILOVER",
+	"NETDEV_PRE_UP",
+	"NETDEV_PRE_TYPE_CHANGE",
+	"NETDEV_POST_TYPE_CHANGE",
+	"NETDEV_POST_INIT",
+	"NETDEV_UNREGISTER_BATCH",
+	"NETDEV_RELEASE",
+	"NETDEV_NOTIFY_PEERS",
+	"NETDEV_JOIN",
+};
+
+const char *netdev_evt_str(int netdev_event)
+{
+	if (netdev_event < 0
+	    || netdev_event >= ARRAY_SIZE(netdev_event_strings))
+		return "bad event num";
+	return netdev_event_strings[netdev_event];
+}
diff --git a/net/netfilter/xt_qtaguid_print.h b/net/netfilter/xt_qtaguid_print.h
new file mode 100644
index 0000000..b63871a
--- /dev/null
+++ b/net/netfilter/xt_qtaguid_print.h
@@ -0,0 +1,120 @@
+/*
+ * Pretty printing Support for iptables xt_qtaguid module.
+ *
+ * (C) 2011 Google, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __XT_QTAGUID_PRINT_H__
+#define __XT_QTAGUID_PRINT_H__
+
+#include "xt_qtaguid_internal.h"
+
+#ifdef DDEBUG
+
+char *pp_tag_t(tag_t *tag);
+char *pp_data_counters(struct data_counters *dc, bool showValues);
+char *pp_tag_node(struct tag_node *tn);
+char *pp_tag_ref(struct tag_ref *tr);
+char *pp_tag_stat(struct tag_stat *ts);
+char *pp_iface_stat(struct iface_stat *is);
+char *pp_sock_tag(struct sock_tag *st);
+char *pp_uid_tag_data(struct uid_tag_data *qtd);
+char *pp_proc_qtu_data(struct proc_qtu_data *pqd);
+
+/*------------------------------------------*/
+void prdebug_sock_tag_list(int indent_level,
+			   struct list_head *sock_tag_list);
+void prdebug_sock_tag_tree(int indent_level,
+			   struct rb_root *sock_tag_tree);
+void prdebug_proc_qtu_data_tree(int indent_level,
+				struct rb_root *proc_qtu_data_tree);
+void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree);
+void prdebug_uid_tag_data_tree(int indent_level,
+			       struct rb_root *uid_tag_data_tree);
+void prdebug_tag_stat_tree(int indent_level,
+			   struct rb_root *tag_stat_tree);
+void prdebug_iface_stat_list(int indent_level,
+			     struct list_head *iface_stat_list);
+
+#else
+
+/*------------------------------------------*/
+static inline char *pp_tag_t(tag_t *tag)
+{
+	return NULL;
+}
+static inline char *pp_data_counters(struct data_counters *dc, bool showValues)
+{
+	return NULL;
+}
+static inline char *pp_tag_node(struct tag_node *tn)
+{
+	return NULL;
+}
+static inline char *pp_tag_ref(struct tag_ref *tr)
+{
+	return NULL;
+}
+static inline char *pp_tag_stat(struct tag_stat *ts)
+{
+	return NULL;
+}
+static inline char *pp_iface_stat(struct iface_stat *is)
+{
+	return NULL;
+}
+static inline char *pp_sock_tag(struct sock_tag *st)
+{
+	return NULL;
+}
+static inline char *pp_uid_tag_data(struct uid_tag_data *qtd)
+{
+	return NULL;
+}
+static inline char *pp_proc_qtu_data(struct proc_qtu_data *pqd)
+{
+	return NULL;
+}
+
+/*------------------------------------------*/
+static inline
+void prdebug_sock_tag_list(int indent_level,
+			   struct list_head *sock_tag_list)
+{
+}
+static inline
+void prdebug_sock_tag_tree(int indent_level,
+			   struct rb_root *sock_tag_tree)
+{
+}
+static inline
+void prdebug_proc_qtu_data_tree(int indent_level,
+				struct rb_root *proc_qtu_data_tree)
+{
+}
+static inline
+void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree)
+{
+}
+static inline
+void prdebug_uid_tag_data_tree(int indent_level,
+			       struct rb_root *uid_tag_data_tree)
+{
+}
+static inline
+void prdebug_tag_stat_tree(int indent_level,
+			   struct rb_root *tag_stat_tree)
+{
+}
+static inline
+void prdebug_iface_stat_list(int indent_level,
+			     struct list_head *iface_stat_list)
+{
+}
+#endif
+/*------------------------------------------*/
+const char *netdev_evt_str(int netdev_event);
+#endif  /* ifndef __XT_QTAGUID_PRINT_H__ */
diff --git a/net/netfilter/xt_quota2.c b/net/netfilter/xt_quota2.c
new file mode 100644
index 0000000..834594a
--- /dev/null
+++ b/net/netfilter/xt_quota2.c
@@ -0,0 +1,401 @@
+/*
+ * xt_quota2 - enhanced xt_quota that can count upwards and in packets
+ * as a minimal accounting match.
+ * by Jan Engelhardt <jengelh@medozas.de>, 2008
+ *
+ * Originally based on xt_quota.c:
+ * 	netfilter module to enforce network quotas
+ * 	Sam Johnston <samj@samj.net>
+ *
+ *	This program is free software; you can redistribute it and/or modify
+ *	it under the terms of the GNU General Public License; either
+ *	version 2 of the License, as published by the Free Software Foundation.
+ */
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+#include <net/netlink.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_quota2.h>
+
+#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
+/* For compatibility, these definitions are copied from the
+ * deprecated header file <linux/netfilter_ipv4/ipt_ULOG.h> */
+#define ULOG_MAC_LEN	80
+#define ULOG_PREFIX_LEN	32
+
+/* Format of the ULOG packets passed through netlink */
+typedef struct ulog_packet_msg {
+	unsigned long mark;
+	long timestamp_sec;
+	long timestamp_usec;
+	unsigned int hook;
+	char indev_name[IFNAMSIZ];
+	char outdev_name[IFNAMSIZ];
+	size_t data_len;
+	char prefix[ULOG_PREFIX_LEN];
+	unsigned char mac_len;
+	unsigned char mac[ULOG_MAC_LEN];
+	unsigned char payload[0];
+} ulog_packet_msg_t;
+#endif
+
+/**
+ * @lock:	lock to protect quota writers from each other
+ */
+struct xt_quota_counter {
+	u_int64_t quota;
+	spinlock_t lock;
+	struct list_head list;
+	atomic_t ref;
+	char name[sizeof(((struct xt_quota_mtinfo2 *)NULL)->name)];
+	struct proc_dir_entry *procfs_entry;
+};
+
+#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
+/* Harald's favorite number +1 :D From ipt_ULOG.C */
+static int qlog_nl_event = 112;
+module_param_named(event_num, qlog_nl_event, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(event_num,
+		 "Event number for NETLINK_NFLOG message. 0 disables log."
+		 "111 is what ipt_ULOG uses.");
+static struct sock *nflognl;
+#endif
+
+static LIST_HEAD(counter_list);
+static DEFINE_SPINLOCK(counter_list_lock);
+
+static struct proc_dir_entry *proc_xt_quota;
+static unsigned int quota_list_perms = S_IRUGO | S_IWUSR;
+static kuid_t quota_list_uid = KUIDT_INIT(0);
+static kgid_t quota_list_gid = KGIDT_INIT(0);
+module_param_named(perms, quota_list_perms, uint, S_IRUGO | S_IWUSR);
+
+#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
+static void quota2_log(unsigned int hooknum,
+		       const struct sk_buff *skb,
+		       const struct net_device *in,
+		       const struct net_device *out,
+		       const char *prefix)
+{
+	ulog_packet_msg_t *pm;
+	struct sk_buff *log_skb;
+	size_t size;
+	struct nlmsghdr *nlh;
+
+	if (!qlog_nl_event)
+		return;
+
+	size = NLMSG_SPACE(sizeof(*pm));
+	size = max(size, (size_t)NLMSG_GOODSIZE);
+	log_skb = alloc_skb(size, GFP_ATOMIC);
+	if (!log_skb) {
+		pr_err("xt_quota2: cannot alloc skb for logging\n");
+		return;
+	}
+
+	nlh = nlmsg_put(log_skb, /*pid*/0, /*seq*/0, qlog_nl_event,
+			sizeof(*pm), 0);
+	if (!nlh) {
+		pr_err("xt_quota2: nlmsg_put failed\n");
+		kfree_skb(log_skb);
+		return;
+	}
+	pm = nlmsg_data(nlh);
+	if (skb->tstamp.tv64 == 0)
+		__net_timestamp((struct sk_buff *)skb);
+	pm->data_len = 0;
+	pm->hook = hooknum;
+	if (prefix != NULL)
+		strlcpy(pm->prefix, prefix, sizeof(pm->prefix));
+	else
+		*(pm->prefix) = '\0';
+	if (in)
+		strlcpy(pm->indev_name, in->name, sizeof(pm->indev_name));
+	else
+		pm->indev_name[0] = '\0';
+
+	if (out)
+		strlcpy(pm->outdev_name, out->name, sizeof(pm->outdev_name));
+	else
+		pm->outdev_name[0] = '\0';
+
+	NETLINK_CB(log_skb).dst_group = 1;
+	pr_debug("throwing 1 packets to netlink group 1\n");
+	netlink_broadcast(nflognl, log_skb, 0, 1, GFP_ATOMIC);
+}
+#else
+static void quota2_log(unsigned int hooknum,
+		       const struct sk_buff *skb,
+		       const struct net_device *in,
+		       const struct net_device *out,
+		       const char *prefix)
+{
+}
+#endif  /* if+else CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG */
+
+static ssize_t quota_proc_read(struct file *file, char __user *buf,
+			   size_t size, loff_t *ppos)
+{
+	struct xt_quota_counter *e = PDE_DATA(file_inode(file));
+	char tmp[24];
+	size_t tmp_size;
+
+	spin_lock_bh(&e->lock);
+	tmp_size = scnprintf(tmp, sizeof(tmp), "%llu\n", e->quota);
+	spin_unlock_bh(&e->lock);
+	return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
+}
+
+static ssize_t quota_proc_write(struct file *file, const char __user *input,
+                            size_t size, loff_t *ppos)
+{
+	struct xt_quota_counter *e = PDE_DATA(file_inode(file));
+	char buf[sizeof("18446744073709551616")];
+
+	if (size > sizeof(buf))
+		size = sizeof(buf);
+	if (copy_from_user(buf, input, size) != 0)
+		return -EFAULT;
+	buf[sizeof(buf)-1] = '\0';
+
+	spin_lock_bh(&e->lock);
+	e->quota = simple_strtoull(buf, NULL, 0);
+	spin_unlock_bh(&e->lock);
+	return size;
+}
+
+static const struct file_operations q2_counter_fops = {
+	.read		= quota_proc_read,
+	.write		= quota_proc_write,
+	.llseek		= default_llseek,
+};
+
+static struct xt_quota_counter *
+q2_new_counter(const struct xt_quota_mtinfo2 *q, bool anon)
+{
+	struct xt_quota_counter *e;
+	unsigned int size;
+
+	/* Do not need all the procfs things for anonymous counters. */
+	size = anon ? offsetof(typeof(*e), list) : sizeof(*e);
+	e = kmalloc(size, GFP_KERNEL);
+	if (e == NULL)
+		return NULL;
+
+	e->quota = q->quota;
+	spin_lock_init(&e->lock);
+	if (!anon) {
+		INIT_LIST_HEAD(&e->list);
+		atomic_set(&e->ref, 1);
+		strlcpy(e->name, q->name, sizeof(e->name));
+	}
+	return e;
+}
+
+/**
+ * q2_get_counter - get ref to counter or create new
+ * @name:	name of counter
+ */
+static struct xt_quota_counter *
+q2_get_counter(const struct xt_quota_mtinfo2 *q)
+{
+	struct proc_dir_entry *p;
+	struct xt_quota_counter *e = NULL;
+	struct xt_quota_counter *new_e;
+
+	if (*q->name == '\0')
+		return q2_new_counter(q, true);
+
+	/* No need to hold a lock while getting a new counter */
+	new_e = q2_new_counter(q, false);
+	if (new_e == NULL)
+		goto out;
+
+	spin_lock_bh(&counter_list_lock);
+	list_for_each_entry(e, &counter_list, list)
+		if (strcmp(e->name, q->name) == 0) {
+			atomic_inc(&e->ref);
+			spin_unlock_bh(&counter_list_lock);
+			kfree(new_e);
+			pr_debug("xt_quota2: old counter name=%s", e->name);
+			return e;
+		}
+	e = new_e;
+	pr_debug("xt_quota2: new_counter name=%s", e->name);
+	list_add_tail(&e->list, &counter_list);
+	/* The entry having a refcount of 1 is not directly destructible.
+	 * This func has not yet returned the new entry, thus iptables
+	 * has not references for destroying this entry.
+	 * For another rule to try to destroy it, it would 1st need for this
+	 * func* to be re-invoked, acquire a new ref for the same named quota.
+	 * Nobody will access the e->procfs_entry either.
+	 * So release the lock. */
+	spin_unlock_bh(&counter_list_lock);
+
+	/* create_proc_entry() is not spin_lock happy */
+	p = e->procfs_entry = proc_create_data(e->name, quota_list_perms,
+	                      proc_xt_quota, &q2_counter_fops, e);
+
+	if (IS_ERR_OR_NULL(p)) {
+		spin_lock_bh(&counter_list_lock);
+		list_del(&e->list);
+		spin_unlock_bh(&counter_list_lock);
+		goto out;
+	}
+	proc_set_user(p, quota_list_uid, quota_list_gid);
+	return e;
+
+ out:
+	kfree(e);
+	return NULL;
+}
+
+static int quota_mt2_check(const struct xt_mtchk_param *par)
+{
+	struct xt_quota_mtinfo2 *q = par->matchinfo;
+
+	pr_debug("xt_quota2: check() flags=0x%04x", q->flags);
+
+	if (q->flags & ~XT_QUOTA_MASK)
+		return -EINVAL;
+
+	q->name[sizeof(q->name)-1] = '\0';
+	if (*q->name == '.' || strchr(q->name, '/') != NULL) {
+		printk(KERN_ERR "xt_quota.3: illegal name\n");
+		return -EINVAL;
+	}
+
+	q->master = q2_get_counter(q);
+	if (q->master == NULL) {
+		printk(KERN_ERR "xt_quota.3: memory alloc failure\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void quota_mt2_destroy(const struct xt_mtdtor_param *par)
+{
+	struct xt_quota_mtinfo2 *q = par->matchinfo;
+	struct xt_quota_counter *e = q->master;
+
+	if (*q->name == '\0') {
+		kfree(e);
+		return;
+	}
+
+	spin_lock_bh(&counter_list_lock);
+	if (!atomic_dec_and_test(&e->ref)) {
+		spin_unlock_bh(&counter_list_lock);
+		return;
+	}
+
+	list_del(&e->list);
+	remove_proc_entry(e->name, proc_xt_quota);
+	spin_unlock_bh(&counter_list_lock);
+	kfree(e);
+}
+
+static bool
+quota_mt2(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	struct xt_quota_mtinfo2 *q = (void *)par->matchinfo;
+	struct xt_quota_counter *e = q->master;
+	bool ret = q->flags & XT_QUOTA_INVERT;
+
+	spin_lock_bh(&e->lock);
+	if (q->flags & XT_QUOTA_GROW) {
+		/*
+		 * While no_change is pointless in "grow" mode, we will
+		 * implement it here simply to have a consistent behavior.
+		 */
+		if (!(q->flags & XT_QUOTA_NO_CHANGE)) {
+			e->quota += (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len;
+		}
+		ret = true;
+	} else {
+		if (e->quota >= skb->len) {
+			if (!(q->flags & XT_QUOTA_NO_CHANGE))
+				e->quota -= (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len;
+			ret = !ret;
+		} else {
+			/* We are transitioning, log that fact. */
+			if (e->quota) {
+				quota2_log(par->hooknum,
+					   skb,
+					   par->in,
+					   par->out,
+					   q->name);
+			}
+			/* we do not allow even small packets from now on */
+			e->quota = 0;
+		}
+	}
+	spin_unlock_bh(&e->lock);
+	return ret;
+}
+
+static struct xt_match quota_mt2_reg[] __read_mostly = {
+	{
+		.name       = "quota2",
+		.revision   = 3,
+		.family     = NFPROTO_IPV4,
+		.checkentry = quota_mt2_check,
+		.match      = quota_mt2,
+		.destroy    = quota_mt2_destroy,
+		.matchsize  = sizeof(struct xt_quota_mtinfo2),
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "quota2",
+		.revision   = 3,
+		.family     = NFPROTO_IPV6,
+		.checkentry = quota_mt2_check,
+		.match      = quota_mt2,
+		.destroy    = quota_mt2_destroy,
+		.matchsize  = sizeof(struct xt_quota_mtinfo2),
+		.me         = THIS_MODULE,
+	},
+};
+
+static int __init quota_mt2_init(void)
+{
+	int ret;
+	pr_debug("xt_quota2: init()");
+
+#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
+	nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, NULL);
+	if (!nflognl)
+		return -ENOMEM;
+#endif
+
+	proc_xt_quota = proc_mkdir("xt_quota", init_net.proc_net);
+	if (proc_xt_quota == NULL)
+		return -EACCES;
+
+	ret = xt_register_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg));
+	if (ret < 0)
+		remove_proc_entry("xt_quota", init_net.proc_net);
+	pr_debug("xt_quota2: init() %d", ret);
+	return ret;
+}
+
+static void __exit quota_mt2_exit(void)
+{
+	xt_unregister_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg));
+	remove_proc_entry("xt_quota", init_net.proc_net);
+}
+
+module_init(quota_mt2_init);
+module_exit(quota_mt2_exit);
+MODULE_DESCRIPTION("Xtables: countdown quota match; up counter");
+MODULE_AUTHOR("Sam Johnston <samj@samj.net>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_quota2");
+MODULE_ALIAS("ip6t_quota2");
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index b10ade2..a52fbaf 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -144,13 +144,14 @@
 	}
 }
 
-static struct sock *xt_socket_lookup_slow_v4(struct net *net,
+struct sock *xt_socket_lookup_slow_v4(struct net *net,
 					     const struct sk_buff *skb,
 					     const struct net_device *indev)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	struct sk_buff *data_skb = NULL;
 	int doff = 0;
+	struct sock *sk = skb->sk;
 	__be32 uninitialized_var(daddr), uninitialized_var(saddr);
 	__be16 uninitialized_var(dport), uninitialized_var(sport);
 	u8 uninitialized_var(protocol);
@@ -205,9 +206,16 @@
 	}
 #endif
 
-	return xt_socket_get_sock_v4(net, data_skb, doff, protocol, saddr,
-				     daddr, sport, dport, indev);
+	if (sk)
+		atomic_inc(&sk->sk_refcnt);
+	else
+		sk = xt_socket_get_sock_v4(dev_net(skb->dev), data_skb, doff,
+					   protocol, saddr, daddr, sport,
+					   dport, indev);
+
+	return sk;
 }
+EXPORT_SYMBOL(xt_socket_lookup_slow_v4);
 
 static bool
 socket_match(const struct sk_buff *skb, struct xt_action_param *par,
@@ -239,8 +247,7 @@
 		    transparent)
 			pskb->mark = sk->sk_mark;
 
-		if (sk != skb->sk)
-			sock_gen_put(sk);
+		sock_gen_put(sk);
 
 		if (wildcard || !transparent)
 			sk = NULL;
@@ -344,10 +351,11 @@
 	return NULL;
 }
 
-static struct sock *xt_socket_lookup_slow_v6(struct net *net,
+struct sock *xt_socket_lookup_slow_v6(struct net *net,
 					     const struct sk_buff *skb,
 					     const struct net_device *indev)
 {
+	struct sock *sk = skb->sk;
 	__be16 uninitialized_var(dport), uninitialized_var(sport);
 	const struct in6_addr *daddr = NULL, *saddr = NULL;
 	struct ipv6hdr *iph = ipv6_hdr(skb);
@@ -387,9 +395,16 @@
 		return NULL;
 	}
 
-	return xt_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr,
-				     sport, dport, indev);
+	if (sk)
+		atomic_inc(&sk->sk_refcnt);
+	else
+		sk = xt_socket_get_sock_v6(dev_net(skb->dev), data_skb, doff,
+					   tproto, saddr, daddr, sport, dport,
+					   indev);
+
+	return sk;
 }
+EXPORT_SYMBOL(xt_socket_lookup_slow_v6);
 
 static bool
 socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig
index 868f1ad..8463a6d 100644
--- a/net/rfkill/Kconfig
+++ b/net/rfkill/Kconfig
@@ -10,6 +10,11 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called rfkill.
 
+config RFKILL_PM
+	bool "Power off on suspend"
+	depends on RFKILL && PM
+	default y
+
 # LED trigger support
 config RFKILL_LEDS
 	bool
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 884027f..a9a7128 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -796,8 +796,7 @@
 }
 EXPORT_SYMBOL(rfkill_resume_polling);
 
-#ifdef CONFIG_PM_SLEEP
-static int rfkill_suspend(struct device *dev)
+static __maybe_unused int rfkill_suspend(struct device *dev)
 {
 	struct rfkill *rfkill = to_rfkill(dev);
 
@@ -807,7 +806,7 @@
 	return 0;
 }
 
-static int rfkill_resume(struct device *dev)
+static __maybe_unused int rfkill_resume(struct device *dev)
 {
 	struct rfkill *rfkill = to_rfkill(dev);
 	bool cur;
@@ -827,17 +826,13 @@
 }
 
 static SIMPLE_DEV_PM_OPS(rfkill_pm_ops, rfkill_suspend, rfkill_resume);
-#define RFKILL_PM_OPS (&rfkill_pm_ops)
-#else
-#define RFKILL_PM_OPS NULL
-#endif
 
 static struct class rfkill_class = {
 	.name		= "rfkill",
 	.dev_release	= rfkill_release,
 	.dev_groups	= rfkill_dev_groups,
 	.dev_uevent	= rfkill_dev_uevent,
-	.pm		= RFKILL_PM_OPS,
+	.pm		= IS_ENABLED(CONFIG_RFKILL_PM) ? &rfkill_pm_ops : NULL,
 };
 
 bool rfkill_blocked(struct rfkill *rfkill)
diff --git a/net/socket.c b/net/socket.c
index d9e2989c..7d47cb7 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -534,8 +534,22 @@
 	return used;
 }
 
+static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
+{
+	int err = simple_setattr(dentry, iattr);
+
+	if (!err && (iattr->ia_valid & ATTR_UID)) {
+		struct socket *sock = SOCKET_I(d_inode(dentry));
+
+		sock->sk->sk_uid = iattr->ia_uid;
+	}
+
+	return err;
+}
+
 static const struct inode_operations sockfs_inode_ops = {
 	.listxattr = sockfs_listxattr,
+	.setattr = sockfs_setattr,
 };
 
 /**
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 435f904..38caa6a 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -70,7 +70,7 @@
 MODULE_PARM_DESC(bss_entries_limit,
                  "limit to number of scan BSS entries (per wiphy, default 1000)");
 
-#define IEEE80211_SCAN_RESULT_EXPIRE	(30 * HZ)
+#define IEEE80211_SCAN_RESULT_EXPIRE	(7 * HZ)
 
 static void bss_free(struct cfg80211_internal_bss *bss)
 {
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 44ac85f..d0ca0db 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -241,7 +241,7 @@
 
 	.uinfo = {
 		.auth = {
-			.icv_truncbits = 96,
+			.icv_truncbits = 128,
 			.icv_fullbits = 256,
 		}
 	},
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index f6f91c3..a120e93 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1740,6 +1740,10 @@
 	struct sk_buff *skb;
 	int err;
 
+	err = verify_policy_dir(dir);
+	if (err)
+		return ERR_PTR(err);
+
 	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
@@ -2261,6 +2265,10 @@
 	int n = 0;
 	struct net *net = sock_net(skb->sk);
 
+	err = verify_policy_dir(pi->dir);
+	if (err)
+		return err;
+
 	if (attrs[XFRMA_MIGRATE] == NULL)
 		return -EINVAL;
 
@@ -2376,6 +2384,11 @@
 {
 	struct net *net = &init_net;
 	struct sk_buff *skb;
+	int err;
+
+	err = verify_policy_dir(dir);
+	if (err)
+		return err;
 
 	skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k), GFP_ATOMIC);
 	if (skb == NULL)
@@ -3033,6 +3046,11 @@
 
 static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
 {
+	int err;
+
+	err = verify_policy_dir(dir);
+	if (err)
+		return err;
 
 	switch (c->event) {
 	case XFRM_MSG_NEWPOLICY:
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 72c58675..b2cdced 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -22,6 +22,7 @@
 hostprogs-y += map_perf_test
 hostprogs-y += test_overhead
 hostprogs-y += test_cgrp2_array_pin
+hostprogs-y += test_cgrp2_attach
 hostprogs-y += xdp1
 hostprogs-y += xdp2
 hostprogs-y += test_current_task_under_cgroup
@@ -50,6 +51,7 @@
 map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o
 test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o
 test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o
+test_cgrp2_attach-objs := libbpf.o test_cgrp2_attach.o
 xdp1-objs := bpf_load.o libbpf.o xdp1_user.o
 # reuse xdp1 source intentionally
 xdp2-objs := bpf_load.o libbpf.o xdp1_user.o
diff --git a/samples/bpf/libbpf.c b/samples/bpf/libbpf.c
index 9969e35..9cbc786 100644
--- a/samples/bpf/libbpf.c
+++ b/samples/bpf/libbpf.c
@@ -104,6 +104,29 @@
 	return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
 }
 
+int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
+		    unsigned int flags)
+{
+	union bpf_attr attr = {
+		.target_fd = target_fd,
+		.attach_bpf_fd = prog_fd,
+		.attach_type = type,
+		.attach_flags  = flags;
+	};
+
+	return syscall(__NR_bpf, BPF_PROG_ATTACH, &attr, sizeof(attr));
+}
+
+int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
+{
+	union bpf_attr attr = {
+		.target_fd = target_fd,
+		.attach_type = type,
+	};
+
+	return syscall(__NR_bpf, BPF_PROG_DETACH, &attr, sizeof(attr));
+}
+
 int bpf_obj_pin(int fd, const char *pathname)
 {
 	union bpf_attr attr = {
diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h
index ac6edb6..b06cf5a 100644
--- a/samples/bpf/libbpf.h
+++ b/samples/bpf/libbpf.h
@@ -15,6 +15,10 @@
 		  const struct bpf_insn *insns, int insn_len,
 		  const char *license, int kern_version);
 
+int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type,
+		    unsigned int flags);
+int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
+
 int bpf_obj_pin(int fd, const char *pathname);
 int bpf_obj_get(const char *pathname);
 
diff --git a/samples/bpf/test_cgrp2_attach.c b/samples/bpf/test_cgrp2_attach.c
new file mode 100644
index 0000000..9de4896
--- /dev/null
+++ b/samples/bpf/test_cgrp2_attach.c
@@ -0,0 +1,147 @@
+/* eBPF example program:
+ *
+ * - Creates arraymap in kernel with 4 bytes keys and 8 byte values
+ *
+ * - Loads eBPF program
+ *
+ *   The eBPF program accesses the map passed in to store two pieces of
+ *   information. The number of invocations of the program, which maps
+ *   to the number of packets received, is stored to key 0. Key 1 is
+ *   incremented on each iteration by the number of bytes stored in
+ *   the skb.
+ *
+ * - Detaches any eBPF program previously attached to the cgroup
+ *
+ * - Attaches the new program to a cgroup using BPF_PROG_ATTACH
+ *
+ * - Every second, reads map[0] and map[1] to see how many bytes and
+ *   packets were seen on any socket of tasks in the given cgroup.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#include <linux/bpf.h>
+
+#include "libbpf.h"
+
+enum {
+	MAP_KEY_PACKETS,
+	MAP_KEY_BYTES,
+};
+
+static int prog_load(int map_fd, int verdict)
+{
+	struct bpf_insn prog[] = {
+		BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), /* save r6 so it's not clobbered by BPF_CALL */
+
+		/* Count packets */
+		BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_PACKETS), /* r0 = 0 */
+		BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+		BPF_LD_MAP_FD(BPF_REG_1, map_fd), /* load map fd to r1 */
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+		BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+		BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */
+		BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+		/* Count bytes */
+		BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_BYTES), /* r0 = 1 */
+		BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+		BPF_LD_MAP_FD(BPF_REG_1, map_fd),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+		BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+		BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, offsetof(struct __sk_buff, len)), /* r1 = skb->len */
+		BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+		BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
+		BPF_EXIT_INSN(),
+	};
+
+	return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SKB,
+			     prog, sizeof(prog), "GPL", 0);
+}
+
+static int usage(const char *argv0)
+{
+	printf("Usage: %s <cg-path> <egress|ingress> [drop]\n", argv0);
+	return EXIT_FAILURE;
+}
+
+int main(int argc, char **argv)
+{
+	int cg_fd, map_fd, prog_fd, key, ret;
+	long long pkt_cnt, byte_cnt;
+	enum bpf_attach_type type;
+	int verdict = 1;
+
+	if (argc < 3)
+		return usage(argv[0]);
+
+	if (strcmp(argv[2], "ingress") == 0)
+		type = BPF_CGROUP_INET_INGRESS;
+	else if (strcmp(argv[2], "egress") == 0)
+		type = BPF_CGROUP_INET_EGRESS;
+	else
+		return usage(argv[0]);
+
+	if (argc > 3 && strcmp(argv[3], "drop") == 0)
+		verdict = 0;
+
+	cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY);
+	if (cg_fd < 0) {
+		printf("Failed to open cgroup path: '%s'\n", strerror(errno));
+		return EXIT_FAILURE;
+	}
+
+	map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY,
+				sizeof(key), sizeof(byte_cnt),
+				256, 0);
+	if (map_fd < 0) {
+		printf("Failed to create map: '%s'\n", strerror(errno));
+		return EXIT_FAILURE;
+	}
+
+	prog_fd = prog_load(map_fd, verdict);
+	printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf);
+
+	if (prog_fd < 0) {
+		printf("Failed to load prog: '%s'\n", strerror(errno));
+		return EXIT_FAILURE;
+	}
+
+	ret = bpf_prog_detach(cg_fd, type);
+	printf("bpf_prog_detach() returned '%s' (%d)\n", strerror(errno), errno);
+
+	ret = bpf_prog_attach(prog_fd, cg_fd, type, 0);
+	if (ret < 0) {
+		printf("Failed to attach prog to cgroup: '%s'\n",
+		       strerror(errno));
+		return EXIT_FAILURE;
+	}
+
+	while (1) {
+		key = MAP_KEY_PACKETS;
+		assert(bpf_lookup_elem(map_fd, &key, &pkt_cnt) == 0);
+
+		key = MAP_KEY_BYTES;
+		assert(bpf_lookup_elem(map_fd, &key, &byte_cnt) == 0);
+
+		printf("cgroup received %lld packets, %lld bytes\n",
+		       pkt_cnt, byte_cnt);
+		sleep(1);
+	}
+
+	return EXIT_SUCCESS;
+}
diff --git a/scripts/Makefile.clean b/scripts/Makefile.clean
index 50616ea..2e70c6f 100644
--- a/scripts/Makefile.clean
+++ b/scripts/Makefile.clean
@@ -11,7 +11,7 @@
 
 # The filename Kbuild has precedence over Makefile
 kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src))
-include $(if $(wildcard $(kbuild-dir)/Kbuild), $(kbuild-dir)/Kbuild, $(kbuild-dir)/Makefile)
+-include $(if $(wildcard $(kbuild-dir)/Kbuild), $(kbuild-dir)/Kbuild, $(kbuild-dir)/Makefile)
 
 # Figure out what we need to build from the various variables
 # ==========================================================================
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 4e02d51..45b5f558 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -314,6 +314,12 @@
 
 dtc-tmp = $(subst $(comma),_,$(dot-target).dts.tmp)
 
+# cat
+# ---------------------------------------------------------------------------
+# Concatentate multiple files together
+quiet_cmd_cat = CAT     $@
+cmd_cat = (cat $(filter-out FORCE,$^) > $@) || (rm -f $@; false)
+
 # Bzip2
 # ---------------------------------------------------------------------------
 
diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst
index 07650ee..6f4c3f5 100644
--- a/scripts/Makefile.modinst
+++ b/scripts/Makefile.modinst
@@ -29,7 +29,7 @@
 INSTALL_MOD_DIR ?= extra
 ext-mod-dir = $(INSTALL_MOD_DIR)$(subst $(patsubst %/,%,$(KBUILD_EXTMOD)),,$(@D))
 
-modinst_dir = $(if $(KBUILD_EXTMOD),$(ext-mod-dir),kernel/$(@D))
+modinst_dir ?= $(if $(KBUILD_EXTMOD),$(ext-mod-dir),kernel/$(@D))
 
 $(modules):
 	$(call cmd,modules_install,$(MODLIB)/$(modinst_dir))
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 5517164..77bc368 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2529,6 +2529,7 @@
 # Check for git id commit length and improperly formed commit descriptions
 		if ($in_commit_log && !$commit_log_possible_stack_dump &&
 		    $line !~ /^\s*(?:Link|Patchwork|http|https|BugLink):/i &&
+		    $line !~ /^This reverts commit [0-9a-f]{7,40}/ &&
 		    ($line =~ /\bcommit\s+[0-9a-f]{5,}\b/i ||
 		     ($line =~ /(?:\s|^)[0-9a-f]{12,40}(?:[\s"'\(\[]|$)/i &&
 		      $line !~ /[\<\[][0-9a-f]{12,40}[\>\]]/i &&
diff --git a/security/Kconfig b/security/Kconfig
index 32f36b4..80a2934 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -18,6 +18,15 @@
 
 	  If you are unsure how to answer this question, answer N.
 
+config SECURITY_PERF_EVENTS_RESTRICT
+	bool "Restrict unprivileged use of performance events"
+	depends on PERF_EVENTS
+	help
+	  If you say Y here, the kernel.perf_event_paranoid sysctl
+	  will be set to 3 by default, and no unprivileged use of the
+	  perf_event_open syscall will be permitted unless it is
+	  changed.
+
 config SECURITY
 	bool "Enable different security models"
 	depends on SYSFS
diff --git a/security/commoncap.c b/security/commoncap.c
index 8df676f..a8e4aac 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -31,6 +31,10 @@
 #include <linux/binfmts.h>
 #include <linux/personality.h>
 
+#ifdef CONFIG_ANDROID_PARANOID_NETWORK
+#include <linux/android_aid.h>
+#endif
+
 /*
  * If a non-root user executes a setuid-root binary in
  * !secure(SECURE_NOROOT) mode, then we raise capabilities.
@@ -73,6 +77,13 @@
 {
 	struct user_namespace *ns = targ_ns;
 
+#ifdef CONFIG_ANDROID_PARANOID_NETWORK
+	if (cap == CAP_NET_RAW && in_egroup_p(AID_NET_RAW))
+		return 0;
+	if (cap == CAP_NET_ADMIN && in_egroup_p(AID_NET_ADMIN))
+		return 0;
+#endif
+
 	/* See if cred has the capability in the target user namespace
 	 * by examining the target user namespace and all of the target
 	 * user namespace's parents.
diff --git a/security/inode.c b/security/inode.c
index c83db05..b4531f2 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -100,7 +100,7 @@
 	dir = d_inode(parent);
 
 	inode_lock(dir);
-	dentry = lookup_one_len(name, parent, strlen(name));
+	dentry = lookup_one_len2(name, mount, parent, strlen(name));
 	if (IS_ERR(dentry))
 		goto out;
 
diff --git a/security/security.c b/security/security.c
index 112df16..ef1d365 100644
--- a/security/security.c
+++ b/security/security.c
@@ -508,6 +508,7 @@
 		return 0;
 	return call_int_hook(path_chown, 0, path, uid, gid);
 }
+EXPORT_SYMBOL(security_path_chown);
 
 int security_path_chroot(const struct path *path)
 {
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 9bd6f97..dbf3c52 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -479,6 +479,7 @@
 		!strcmp(sb->s_type->name, "sysfs") ||
 		!strcmp(sb->s_type->name, "pstore") ||
 		!strcmp(sb->s_type->name, "debugfs") ||
+		!strcmp(sb->s_type->name, "tracefs") ||
 		!strcmp(sb->s_type->name, "rootfs");
 }
 
@@ -797,6 +798,7 @@
 		sbsec->flags |= SE_SBPROC | SE_SBGENFS;
 
 	if (!strcmp(sb->s_type->name, "debugfs") ||
+	    !strcmp(sb->s_type->name, "tracefs") ||
 	    !strcmp(sb->s_type->name, "sysfs") ||
 	    !strcmp(sb->s_type->name, "pstore"))
 		sbsec->flags |= SE_SBGENFS;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f09c70b..b2d5be9 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -73,6 +73,8 @@
 	BPF_PROG_LOAD,
 	BPF_OBJ_PIN,
 	BPF_OBJ_GET,
+	BPF_PROG_ATTACH,
+	BPF_PROG_DETACH,
 };
 
 enum bpf_map_type {
@@ -96,8 +98,23 @@
 	BPF_PROG_TYPE_TRACEPOINT,
 	BPF_PROG_TYPE_XDP,
 	BPF_PROG_TYPE_PERF_EVENT,
+	BPF_PROG_TYPE_CGROUP_SKB,
 };
 
+enum bpf_attach_type {
+	BPF_CGROUP_INET_INGRESS,
+	BPF_CGROUP_INET_EGRESS,
+	__MAX_BPF_ATTACH_TYPE
+};
+
+#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
+
+/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
+ * to the given target_fd cgroup the descendent cgroup will be able to
+ * override effective bpf program that was inherited from this cgroup
+ */
+#define BPF_F_ALLOW_OVERRIDE	(1U << 0)
+
 #define BPF_PSEUDO_MAP_FD	1
 
 /* flags for BPF_MAP_UPDATE_ELEM command */
@@ -141,6 +158,13 @@
 		__aligned_u64	pathname;
 		__u32		bpf_fd;
 	};
+
+	struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
+		__u32		target_fd;	/* container object to attach to */
+		__u32		attach_bpf_fd;	/* eBPF program to attach */
+		__u32		attach_type;
+		__u32		attach_flags;
+	};
 } __attribute__((aligned(8)));
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -426,6 +450,67 @@
 	 */
 	BPF_FUNC_set_hash_invalid,
 
+	/**
+	 * int bpf_get_numa_node_id()
+	 *     Return: Id of current NUMA node.
+	 */
+	BPF_FUNC_get_numa_node_id,
+
+	/**
+	 * int bpf_skb_change_head()
+	 *     Grows headroom of skb and adjusts MAC header offset accordingly.
+	 *     Will extends/reallocae as required automatically.
+	 *     May change skb data pointer and will thus invalidate any check
+	 *     performed for direct packet access.
+	 *     @skb: pointer to skb
+	 *     @len: length of header to be pushed in front
+	 *     @flags: Flags (unused for now)
+	 *     Return: 0 on success or negative error
+	 */
+	BPF_FUNC_skb_change_head,
+
+	/**
+	 * int bpf_xdp_adjust_head(xdp_md, delta)
+	 *     Adjust the xdp_md.data by delta
+	 *     @xdp_md: pointer to xdp_md
+	 *     @delta: An positive/negative integer to be added to xdp_md.data
+	 *     Return: 0 on success or negative on error
+	 */
+	BPF_FUNC_xdp_adjust_head,
+
+	/**
+	 * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
+	 *     Copy a NUL terminated string from unsafe address. In case the string
+	 *     length is smaller than size, the target is not padded with further NUL
+	 *     bytes. In case the string length is larger than size, just count-1
+	 *     bytes are copied and the last byte is set to NUL.
+	 *     @dst: destination address
+	 *     @size: maximum number of bytes to copy, including the trailing NUL
+	 *     @unsafe_ptr: unsafe address
+	 *     Return:
+	 *       > 0 length of the string including the trailing NUL on success
+	 *       < 0 error
+	 */
+	BPF_FUNC_probe_read_str,
+
+	/**
+	 * u64 bpf_bpf_get_socket_cookie(skb)
+	 *     Get the cookie for the socket stored inside sk_buff.
+	 *     @skb: pointer to skb
+	 *     Return: 8 Bytes non-decreasing number on success or 0 if the socket
+	 *     field is missing inside sk_buff
+	 */
+	BPF_FUNC_get_socket_cookie,
+
+	/**
+	 * u32 bpf_get_socket_uid(skb)
+	 *     Get the owner uid of the socket stored inside sk_buff.
+	 *     @skb: pointer to skb
+	 *     Return: uid of the socket owner on success or 0 if the socket pointer
+	 *     inside sk_buff is NULL
+	 */
+	BPF_FUNC_get_socket_uid,
+
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/tools/include/uapi/linux/hw_breakpoint.h b/tools/include/uapi/linux/hw_breakpoint.h
index b04000a..2b65efd 100644
--- a/tools/include/uapi/linux/hw_breakpoint.h
+++ b/tools/include/uapi/linux/hw_breakpoint.h
@@ -4,7 +4,11 @@
 enum {
 	HW_BREAKPOINT_LEN_1 = 1,
 	HW_BREAKPOINT_LEN_2 = 2,
+	HW_BREAKPOINT_LEN_3 = 3,
 	HW_BREAKPOINT_LEN_4 = 4,
+	HW_BREAKPOINT_LEN_5 = 5,
+	HW_BREAKPOINT_LEN_6 = 6,
+	HW_BREAKPOINT_LEN_7 = 7,
 	HW_BREAKPOINT_LEN_8 = 8,
 };